bagit 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,14 @@
1
+ require 'rake'
2
+ require 'rake/rdoctask'
3
+ require 'spec/rake/spectask'
4
+
5
+ Spec::Rake::SpecTask.new('spec') do |t|
6
+ t.libs << 'lib'
7
+ t.libs << 'spec'
8
+ t.spec_opts << "--color"
9
+ # t.warning = true
10
+ # t.rcov = true
11
+ # t.rcov_opts += ["-x /Library", "-x spec"]
12
+ end
13
+
14
+ task :default => [:spec]
@@ -0,0 +1,16 @@
1
+ Gem::Specification.new do |spec|
2
+ spec.name = "bagit"
3
+ spec.version = '0.0.2'
4
+ spec.summary = "BagIt package generation and validation"
5
+ spec.email = "flazzarino@gmail.com"
6
+ spec.homepage = 'http://bagit.rubyforge.org'
7
+ spec.authors = ["Francesco Lazzarino"]
8
+
9
+ spec.files = ["Rakefile", "bagit.gemspec", "lib/bagit/bag.rb",
10
+ "lib/bagit/fetch.rb", "lib/bagit/file.rb",
11
+ "lib/bagit/info.rb", "lib/bagit/manifest.rb",
12
+ "lib/bagit/string.rb", "lib/bagit/valid.rb",
13
+ "lib/bagit.rb" ]
14
+
15
+ spec.has_rdoc = true
16
+ end
@@ -0,0 +1,11 @@
1
+ # == About bagit.rb
2
+ # Author:: Francesco Lazzarino (mailto:flazzarino@gmail.com)
3
+ # Functionality conforms to the BagIt Spec v0.95:
4
+ # http://www.cdlib.org/inside/diglib/bagit/bagitspec.html
5
+
6
+ require 'bagit/bag'
7
+
8
+ module BagIt
9
+ # The version of the BagIt specification the code is conforming to.
10
+ SPEC_VERSION = '0.95'
11
+ end
@@ -0,0 +1,91 @@
1
+ require 'bagit/fetch'
2
+ require 'bagit/file'
3
+ require 'bagit/info'
4
+ require 'bagit/manifest'
5
+ require 'bagit/string'
6
+ require 'bagit/valid'
7
+
8
+ module BagIt
9
+
10
+ # Represents the state of a bag on a filesystem
11
+ class Bag
12
+
13
+ attr_reader :bag_dir
14
+
15
+ include Validity # Validity functionality
16
+ include Info # package & bag info functionality
17
+ include Manifest # manifest related functionality
18
+ include Fetch # fetch related functionality
19
+
20
+ # Make a new Bag based at path
21
+ def initialize(path)
22
+ @bag_dir = path
23
+
24
+ # make the dir structure if it doesn't exist
25
+ FileUtils::mkdir bag_dir unless File.directory? bag_dir
26
+ FileUtils::mkdir data_dir unless File.directory? data_dir
27
+
28
+ # write some tag info if its not there
29
+ unless File.exist? bagit_txt_file
30
+ write_bag_info("BagIt-Version" => SPEC_VERSION, "Tag-File-Character-Encoding" => "UTF-8")
31
+ end
32
+
33
+ unless File.exist? package_info_txt_file
34
+ write_package_info('Bag-Software-Agent' => "BagIt Ruby Gem (http://bagit.rubyforge.org)")
35
+ end
36
+
37
+ end
38
+
39
+ # Return the path to the data directory
40
+ def data_dir
41
+ File.join @bag_dir, 'data'
42
+ end
43
+
44
+ # Return the paths to each bag file relative to bag_dir
45
+ def bag_files
46
+ Dir[File.join(data_dir, '**', '*')].select { |f| File.file? f }
47
+ end
48
+
49
+ # Return the paths to each tag file relative to bag_dir
50
+ def tag_files
51
+ Dir[File.join(@bag_dir, '*')].select { |f| File.file? f }
52
+ end
53
+
54
+ # Add a bag file
55
+ def add_file(base_path, src_path=nil)
56
+ path = File.join(data_dir, base_path)
57
+ raise "Bag file exists: #{base_path}" if File.exist? path
58
+ FileUtils::mkdir_p File.dirname(path)
59
+
60
+ if src_path.nil?
61
+ open(path, 'w') { |io| yield io }
62
+ else
63
+ FileUtils::cp src_path, path
64
+ end
65
+
66
+ end
67
+
68
+ # Remove a bag file
69
+ def remove_file(base_path)
70
+ path = File.join(data_dir, base_path)
71
+ raise "Bag file does not exist: #{base_path}" unless File.exist? path
72
+ FileUtils::rm path
73
+ end
74
+
75
+ # Remove all empty directory trees from the bag
76
+ def gc!
77
+
78
+ Dir.entries(data_dir).each do |f|
79
+
80
+ unless %w{.. .}.include? f
81
+ abs_path = File.join data_dir, f
82
+ File.clean abs_path
83
+ end
84
+
85
+ end
86
+
87
+ end
88
+
89
+ end
90
+
91
+ end
@@ -0,0 +1,50 @@
1
+ require 'open-uri'
2
+
3
+ module BagIt
4
+
5
+ module Fetch
6
+
7
+ def fetch_txt_file
8
+ File.join @bag_dir, 'fetch.txt'
9
+ end
10
+
11
+ def add_remote_file(url, path, size, sha1, md5)
12
+ open(fetch_txt_file, 'a') { |io| io.puts "#{url} #{size || '-'} #{path}" }
13
+ open(manifest_file('sha1'), 'a') { |io| io.puts "#{sha1} #{File.join 'data', path}" }
14
+ open(manifest_file('md5'), 'a') { |io| io.puts "#{md5} #{File.join 'data', path}" }
15
+ end
16
+
17
+ # feth all remote files
18
+ def fetch!
19
+
20
+ open(fetch_txt_file) do |io|
21
+
22
+ io.readlines.each do |line|
23
+
24
+ (url, length, path) = line.chomp.split(/\s+/, 3)
25
+
26
+ add_file(path) do |io|
27
+ io.write open(url)
28
+ end
29
+
30
+ end
31
+
32
+ end
33
+
34
+ # rename the old fetch.txt
35
+ Dir["#{fetch_txt_file}.?*"].sort.reverse.each do |f|
36
+
37
+ if f =~ /fetch.txt.(\d+)$/
38
+ new_f = File.join File.dirname(f), "fetch.txt.#{$1.to_i + 1}"
39
+ FileUtils::mv f, new_f
40
+ end
41
+
42
+ end
43
+
44
+ # move the current fetch_txt
45
+ FileUtils::mv fetch_txt_file, "#{fetch_txt_file}.0"
46
+ end
47
+
48
+ end
49
+
50
+ end
@@ -0,0 +1,19 @@
1
+ class File
2
+
3
+ # Clean out all the empty dirs
4
+ def File.clean(file_name)
5
+
6
+ if File.directory? file_name
7
+ # clean all subdirs
8
+ subdirs = Dir.entries(file_name).select { |p| File.directory?(File.join(file_name, p)) }
9
+ subdirs.reject! { |p| %w{. ..}.include? p }
10
+ subdirs.each { |sd| File.clean File.join(file_name, sd) }
11
+
12
+ # if its empty then delete it
13
+ contents = Dir.entries(file_name).reject { |p| %w{. ..}.include? p }
14
+ Dir.delete file_name if contents.empty?
15
+ end
16
+
17
+ end
18
+
19
+ end
@@ -0,0 +1,77 @@
1
+ require 'set'
2
+
3
+ module BagIt
4
+
5
+ module Info
6
+
7
+ def package_info_txt_file
8
+ File.join bag_dir, 'package-info.txt'
9
+ end
10
+
11
+ def package_info
12
+ read_info_file package_info_txt_file
13
+ end
14
+
15
+ def write_package_info(hash)
16
+ write_info_file package_info_txt_file, hash
17
+ end
18
+
19
+ def bagit_txt_file
20
+ File.join bag_dir, 'bagit.txt'
21
+ end
22
+
23
+ def bag_info
24
+ read_info_file bagit_txt_file
25
+ end
26
+
27
+ def write_bag_info(hash)
28
+ write_info_file bagit_txt_file, hash
29
+ end
30
+
31
+ protected
32
+
33
+ def read_info_file(file)
34
+
35
+ open(file) do |io|
36
+
37
+ entries = io.read.split /\n(?=[^\s])/
38
+
39
+ entries.inject({}) do |hash, line|
40
+ name, value = line.chomp.split /\s*:\s*/
41
+ hash.merge({name => value})
42
+ end
43
+
44
+ end
45
+
46
+ end
47
+
48
+ def write_info_file(file, hash)
49
+
50
+ dups = hash.keys.inject(Set.new) do |acc, key|
51
+ a = hash.keys.grep(/#{key}/i)
52
+ acc + (a.size > 1 ? a : [])
53
+ end
54
+
55
+ raise "Multiple labels (#{dups.to_a.join ', '}) in #{file}" unless dups.empty?
56
+
57
+ open(file, 'w') do |io|
58
+
59
+ hash.each do |name, value|
60
+ simple_entry = "#{name}: #{value.gsub /\s+/, ' '}"
61
+
62
+ entry = if simple_entry.length > 79
63
+ simple_entry.wrap(77).indent(2)
64
+ else
65
+ simple_entry
66
+ end
67
+
68
+ io.puts entry
69
+ end
70
+
71
+ end
72
+
73
+ end
74
+
75
+ end
76
+
77
+ end
@@ -0,0 +1,111 @@
1
+ require 'pathname'
2
+ require 'digest/sha1'
3
+ require 'digest/md5'
4
+
5
+ module BagIt
6
+
7
+ # Requires response to bag_dir, tag_files, bag_files
8
+ module Manifest
9
+
10
+ # All tag files that are bag manifest files (manifest-[algorithm].txt)
11
+ def manifest_files
12
+ tag_files.select { |f| File.basename(f) =~ /^manifest-.*.txt/ }
13
+ end
14
+
15
+ # A path to a manifest file of the specified algorithm
16
+ def manifest_file(algo)
17
+ File.join bag_dir, "manifest-#{algo}.txt"
18
+ end
19
+
20
+ # Generate manifest files for all the bag files
21
+ def manifest!
22
+
23
+ # nuke all the existing manifest files
24
+ manifest_files.each { |f| FileUtils::rm f }
25
+
26
+ # manifest each tag file for each algorithm
27
+ bag_files.each do |f|
28
+ rel_path = Pathname.new(f).relative_path_from(Pathname.new(bag_dir)).to_s
29
+ data = open(f) { |io| io.read }
30
+
31
+ # sha1
32
+ sha1 = Digest::SHA1.hexdigest data
33
+ open(manifest_file(:sha1), 'a') { |io| io.puts "#{sha1} #{rel_path}" }
34
+
35
+ # md5
36
+ md5 = Digest::MD5.hexdigest data
37
+ open(manifest_file(:md5), 'a') { |io| io.puts "#{md5} #{rel_path}" }
38
+ end
39
+
40
+ end
41
+
42
+ # All tag files that are bag manifest files (tagmanifest-[algorithm].txt)
43
+ def tagmanifest_files
44
+ tag_files.select { |f| File.basename(f) =~ /^tagmanifest-.*.txt/ }
45
+ end
46
+
47
+ # A path to a tagmanifest file of the specified algorithm
48
+ def tagmanifest_file(algo)
49
+ File.join bag_dir, "tagmanifest-#{algo}.txt"
50
+ end
51
+
52
+ # Generate manifest files for all the tag files (except the tag
53
+ # manifest files)
54
+ def tagmanifest!
55
+
56
+ # nuke all the existing tagmanifest files
57
+ tagmanifest_files.each { |f| FileUtils::rm f }
58
+
59
+ # manifest each (non tagmanifest) tag file for each algorithm
60
+ (tag_files - tagmanifest_files).each do |f|
61
+ data = open(f) { |io| io.read }
62
+ rel_path = File.basename f
63
+
64
+ # sha1
65
+ sha1 = Digest::SHA1.hexdigest data
66
+ open(tagmanifest_file(:sha1), 'a') { |io| io.puts "#{sha1} #{rel_path}" }
67
+
68
+ # md5
69
+ md5 = Digest::MD5.hexdigest data
70
+ open(tagmanifest_file(:md5), 'a') { |io| io.puts "#{md5} #{rel_path}" }
71
+ end
72
+
73
+ end
74
+
75
+ # Returns true if all present manifested files' message digests
76
+ # match the actual message digest
77
+ def fixed?
78
+ (manifest_files + tagmanifest_files).all? do |mf|
79
+ # extract the algorithm
80
+ mf =~ /manifest-(.+).txt$/
81
+
82
+ algo = case $1
83
+ when /sha1/i
84
+ Digest::SHA1
85
+ when /md5/i
86
+ Digest::MD5
87
+ else
88
+ :unknown
89
+ end
90
+
91
+ # check it, an unknown algorithm is always true
92
+ unless algo == :unknown
93
+ lines = open(mf) { |io| io.readlines }
94
+
95
+ lines.all? do |line|
96
+ manifested_digest, path = line.chomp.split /\s+/, 2
97
+ actual_digest = open(File.join(@bag_dir, path)) { |io| algo.hexdigest io.read }
98
+ actual_digest == manifested_digest
99
+ end
100
+
101
+ else
102
+ true
103
+ end
104
+
105
+ end
106
+
107
+ end
108
+
109
+ end
110
+
111
+ end
@@ -0,0 +1,23 @@
1
+ # Some mixed in functionality for String
2
+ class String
3
+
4
+ # Wrap a string to lines of a specified width. All existing newlines
5
+ # are not guaranteed to be preserved
6
+ def wrap(width)
7
+ s = gsub(/\s+/, ' ').strip
8
+
9
+ if s.length > width
10
+ s[0...width] + '\n' + s[width..-1].wrap(width)
11
+ else
12
+ s
13
+ end
14
+
15
+ end
16
+
17
+ # Indent each line of a string by n spaces
18
+ def indent(n)
19
+ indent = ' ' * n
20
+ gsub '\n', "\n#{indent}"
21
+ end
22
+
23
+ end
@@ -0,0 +1,104 @@
1
+ require 'validatable'
2
+
3
+ module BagIt
4
+
5
+ class Bag
6
+ include Validatable
7
+ validates_true_for :consistency, :logic => lambda { complete? }
8
+ validates_true_for :completeness, :logic => lambda { consistent? }
9
+ end
10
+
11
+ module Validity
12
+
13
+ # Return true if the manifest cover all files and all files are
14
+ # covered.
15
+ def complete?
16
+
17
+ unmanifested_files.each do |file|
18
+ errors.add :completeness, "#{file} is present but not manifested"
19
+ end
20
+
21
+ empty_manifests.each do |file|
22
+ errors.add :completeness, "#{file} is manifested but not present"
23
+ end
24
+
25
+ errors.on(:completeness).nil?
26
+ end
27
+
28
+ # Return true if all manifested files message digests match.
29
+ def consistent?
30
+
31
+ manifest_files.each do |mf|
32
+
33
+ # get the algorithm implementation
34
+ File.basename(mf) =~ /^manifest-(.+).txt$/
35
+
36
+ algo = case $1
37
+ when /sha1/i
38
+ Digest::SHA1
39
+ when /md5/i
40
+ Digest::MD5
41
+ else
42
+ :unknown
43
+ end
44
+
45
+ # Check every file in the manifest
46
+ open(mf) do |io|
47
+
48
+ io.each_line do |line|
49
+ expected, path = line.chomp.split /\s+/, 2
50
+ file = File.join(bag_dir, path)
51
+
52
+ if File.exist? file
53
+ actual = open(file) { |fio| algo.hexdigest(fio.read) }
54
+
55
+ if expected != actual
56
+ errors.add :consistency, "expected #{file} to have #{algo}: #{expected}, actual is #{actual}"
57
+ end
58
+
59
+ end
60
+ end
61
+
62
+ end
63
+
64
+ end
65
+
66
+ errors.on(:consistency).nil?
67
+ end
68
+
69
+ protected
70
+
71
+ # Returns all files in the instance that are not manifested
72
+ def unmanifested_files
73
+ mfs = manifested_files.map { |f| File.join bag_dir, f }
74
+ bag_files.reject { |f| mfs.member? f }
75
+ end
76
+
77
+ # Returns a list of manifested files that are not present
78
+ def empty_manifests
79
+ bfs = bag_files
80
+ manifested_files.reject { |f| bfs.member? File.join(bag_dir, f) }
81
+ end
82
+
83
+ # Returns a list of all files present in the manifest files
84
+ def manifested_files
85
+
86
+ manifest_files.inject([]) do |acc, mf|
87
+
88
+ files = open(mf) do |io|
89
+
90
+ io.readlines.map do |line|
91
+ digest, path = line.chomp.split /\s+/, 2
92
+ path
93
+ end
94
+
95
+ end
96
+
97
+ (acc + files).uniq
98
+ end
99
+
100
+ end
101
+
102
+ end
103
+
104
+ end
metadata ADDED
@@ -0,0 +1,62 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bagit
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Francesco Lazzarino
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-03-02 00:00:00 -05:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description:
17
+ email: flazzarino@gmail.com
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files: []
23
+
24
+ files:
25
+ - Rakefile
26
+ - bagit.gemspec
27
+ - lib/bagit/bag.rb
28
+ - lib/bagit/fetch.rb
29
+ - lib/bagit/file.rb
30
+ - lib/bagit/info.rb
31
+ - lib/bagit/manifest.rb
32
+ - lib/bagit/string.rb
33
+ - lib/bagit/valid.rb
34
+ - lib/bagit.rb
35
+ has_rdoc: true
36
+ homepage: http://bagit.rubyforge.org
37
+ post_install_message:
38
+ rdoc_options: []
39
+
40
+ require_paths:
41
+ - lib
42
+ required_ruby_version: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: "0"
47
+ version:
48
+ required_rubygems_version: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: "0"
53
+ version:
54
+ requirements: []
55
+
56
+ rubyforge_project:
57
+ rubygems_version: 1.3.1
58
+ signing_key:
59
+ specification_version: 2
60
+ summary: BagIt package generation and validation
61
+ test_files: []
62
+