bagit 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,14 @@
1
+ require 'rake'
2
+ require 'rake/rdoctask'
3
+ require 'spec/rake/spectask'
4
+
5
+ Spec::Rake::SpecTask.new('spec') do |t|
6
+ t.libs << 'lib'
7
+ t.libs << 'spec'
8
+ t.spec_opts << "--color"
9
+ # t.warning = true
10
+ # t.rcov = true
11
+ # t.rcov_opts += ["-x /Library", "-x spec"]
12
+ end
13
+
14
+ task :default => [:spec]
@@ -0,0 +1,16 @@
1
+ Gem::Specification.new do |spec|
2
+ spec.name = "bagit"
3
+ spec.version = '0.0.2'
4
+ spec.summary = "BagIt package generation and validation"
5
+ spec.email = "flazzarino@gmail.com"
6
+ spec.homepage = 'http://bagit.rubyforge.org'
7
+ spec.authors = ["Francesco Lazzarino"]
8
+
9
+ spec.files = ["Rakefile", "bagit.gemspec", "lib/bagit/bag.rb",
10
+ "lib/bagit/fetch.rb", "lib/bagit/file.rb",
11
+ "lib/bagit/info.rb", "lib/bagit/manifest.rb",
12
+ "lib/bagit/string.rb", "lib/bagit/valid.rb",
13
+ "lib/bagit.rb" ]
14
+
15
+ spec.has_rdoc = true
16
+ end
@@ -0,0 +1,11 @@
1
+ # == About bagit.rb
2
+ # Author:: Francesco Lazzarino (mailto:flazzarino@gmail.com)
3
+ # Functionality conforms to the BagIt Spec v0.95:
4
+ # http://www.cdlib.org/inside/diglib/bagit/bagitspec.html
5
+
6
+ require 'bagit/bag'
7
+
8
+ module BagIt
9
+ # The version of the BagIt specification the code is conforming to.
10
+ SPEC_VERSION = '0.95'
11
+ end
@@ -0,0 +1,91 @@
1
+ require 'bagit/fetch'
2
+ require 'bagit/file'
3
+ require 'bagit/info'
4
+ require 'bagit/manifest'
5
+ require 'bagit/string'
6
+ require 'bagit/valid'
7
+
8
+ module BagIt
9
+
10
+ # Represents the state of a bag on a filesystem
11
+ class Bag
12
+
13
+ attr_reader :bag_dir
14
+
15
+ include Validity # Validity functionality
16
+ include Info # package & bag info functionality
17
+ include Manifest # manifest related functionality
18
+ include Fetch # fetch related functionality
19
+
20
+ # Make a new Bag based at path
21
+ def initialize(path)
22
+ @bag_dir = path
23
+
24
+ # make the dir structure if it doesn't exist
25
+ FileUtils::mkdir bag_dir unless File.directory? bag_dir
26
+ FileUtils::mkdir data_dir unless File.directory? data_dir
27
+
28
+ # write some tag info if its not there
29
+ unless File.exist? bagit_txt_file
30
+ write_bag_info("BagIt-Version" => SPEC_VERSION, "Tag-File-Character-Encoding" => "UTF-8")
31
+ end
32
+
33
+ unless File.exist? package_info_txt_file
34
+ write_package_info('Bag-Software-Agent' => "BagIt Ruby Gem (http://bagit.rubyforge.org)")
35
+ end
36
+
37
+ end
38
+
39
+ # Return the path to the data directory
40
+ def data_dir
41
+ File.join @bag_dir, 'data'
42
+ end
43
+
44
+ # Return the paths to each bag file relative to bag_dir
45
+ def bag_files
46
+ Dir[File.join(data_dir, '**', '*')].select { |f| File.file? f }
47
+ end
48
+
49
+ # Return the paths to each tag file relative to bag_dir
50
+ def tag_files
51
+ Dir[File.join(@bag_dir, '*')].select { |f| File.file? f }
52
+ end
53
+
54
+ # Add a bag file
55
+ def add_file(base_path, src_path=nil)
56
+ path = File.join(data_dir, base_path)
57
+ raise "Bag file exists: #{base_path}" if File.exist? path
58
+ FileUtils::mkdir_p File.dirname(path)
59
+
60
+ if src_path.nil?
61
+ open(path, 'w') { |io| yield io }
62
+ else
63
+ FileUtils::cp src_path, path
64
+ end
65
+
66
+ end
67
+
68
+ # Remove a bag file
69
+ def remove_file(base_path)
70
+ path = File.join(data_dir, base_path)
71
+ raise "Bag file does not exist: #{base_path}" unless File.exist? path
72
+ FileUtils::rm path
73
+ end
74
+
75
+ # Remove all empty directory trees from the bag
76
+ def gc!
77
+
78
+ Dir.entries(data_dir).each do |f|
79
+
80
+ unless %w{.. .}.include? f
81
+ abs_path = File.join data_dir, f
82
+ File.clean abs_path
83
+ end
84
+
85
+ end
86
+
87
+ end
88
+
89
+ end
90
+
91
+ end
@@ -0,0 +1,50 @@
1
+ require 'open-uri'
2
+
3
+ module BagIt
4
+
5
+ module Fetch
6
+
7
+ def fetch_txt_file
8
+ File.join @bag_dir, 'fetch.txt'
9
+ end
10
+
11
+ def add_remote_file(url, path, size, sha1, md5)
12
+ open(fetch_txt_file, 'a') { |io| io.puts "#{url} #{size || '-'} #{path}" }
13
+ open(manifest_file('sha1'), 'a') { |io| io.puts "#{sha1} #{File.join 'data', path}" }
14
+ open(manifest_file('md5'), 'a') { |io| io.puts "#{md5} #{File.join 'data', path}" }
15
+ end
16
+
17
+ # feth all remote files
18
+ def fetch!
19
+
20
+ open(fetch_txt_file) do |io|
21
+
22
+ io.readlines.each do |line|
23
+
24
+ (url, length, path) = line.chomp.split(/\s+/, 3)
25
+
26
+ add_file(path) do |io|
27
+ io.write open(url)
28
+ end
29
+
30
+ end
31
+
32
+ end
33
+
34
+ # rename the old fetch.txt
35
+ Dir["#{fetch_txt_file}.?*"].sort.reverse.each do |f|
36
+
37
+ if f =~ /fetch.txt.(\d+)$/
38
+ new_f = File.join File.dirname(f), "fetch.txt.#{$1.to_i + 1}"
39
+ FileUtils::mv f, new_f
40
+ end
41
+
42
+ end
43
+
44
+ # move the current fetch_txt
45
+ FileUtils::mv fetch_txt_file, "#{fetch_txt_file}.0"
46
+ end
47
+
48
+ end
49
+
50
+ end
@@ -0,0 +1,19 @@
1
+ class File
2
+
3
+ # Clean out all the empty dirs
4
+ def File.clean(file_name)
5
+
6
+ if File.directory? file_name
7
+ # clean all subdirs
8
+ subdirs = Dir.entries(file_name).select { |p| File.directory?(File.join(file_name, p)) }
9
+ subdirs.reject! { |p| %w{. ..}.include? p }
10
+ subdirs.each { |sd| File.clean File.join(file_name, sd) }
11
+
12
+ # if its empty then delete it
13
+ contents = Dir.entries(file_name).reject { |p| %w{. ..}.include? p }
14
+ Dir.delete file_name if contents.empty?
15
+ end
16
+
17
+ end
18
+
19
+ end
@@ -0,0 +1,77 @@
1
+ require 'set'
2
+
3
+ module BagIt
4
+
5
+ module Info
6
+
7
+ def package_info_txt_file
8
+ File.join bag_dir, 'package-info.txt'
9
+ end
10
+
11
+ def package_info
12
+ read_info_file package_info_txt_file
13
+ end
14
+
15
+ def write_package_info(hash)
16
+ write_info_file package_info_txt_file, hash
17
+ end
18
+
19
+ def bagit_txt_file
20
+ File.join bag_dir, 'bagit.txt'
21
+ end
22
+
23
+ def bag_info
24
+ read_info_file bagit_txt_file
25
+ end
26
+
27
+ def write_bag_info(hash)
28
+ write_info_file bagit_txt_file, hash
29
+ end
30
+
31
+ protected
32
+
33
+ def read_info_file(file)
34
+
35
+ open(file) do |io|
36
+
37
+ entries = io.read.split /\n(?=[^\s])/
38
+
39
+ entries.inject({}) do |hash, line|
40
+ name, value = line.chomp.split /\s*:\s*/
41
+ hash.merge({name => value})
42
+ end
43
+
44
+ end
45
+
46
+ end
47
+
48
+ def write_info_file(file, hash)
49
+
50
+ dups = hash.keys.inject(Set.new) do |acc, key|
51
+ a = hash.keys.grep(/#{key}/i)
52
+ acc + (a.size > 1 ? a : [])
53
+ end
54
+
55
+ raise "Multiple labels (#{dups.to_a.join ', '}) in #{file}" unless dups.empty?
56
+
57
+ open(file, 'w') do |io|
58
+
59
+ hash.each do |name, value|
60
+ simple_entry = "#{name}: #{value.gsub /\s+/, ' '}"
61
+
62
+ entry = if simple_entry.length > 79
63
+ simple_entry.wrap(77).indent(2)
64
+ else
65
+ simple_entry
66
+ end
67
+
68
+ io.puts entry
69
+ end
70
+
71
+ end
72
+
73
+ end
74
+
75
+ end
76
+
77
+ end
@@ -0,0 +1,111 @@
1
+ require 'pathname'
2
+ require 'digest/sha1'
3
+ require 'digest/md5'
4
+
5
+ module BagIt
6
+
7
+ # Requires response to bag_dir, tag_files, bag_files
8
+ module Manifest
9
+
10
+ # All tag files that are bag manifest files (manifest-[algorithm].txt)
11
+ def manifest_files
12
+ tag_files.select { |f| File.basename(f) =~ /^manifest-.*.txt/ }
13
+ end
14
+
15
+ # A path to a manifest file of the specified algorithm
16
+ def manifest_file(algo)
17
+ File.join bag_dir, "manifest-#{algo}.txt"
18
+ end
19
+
20
+ # Generate manifest files for all the bag files
21
+ def manifest!
22
+
23
+ # nuke all the existing manifest files
24
+ manifest_files.each { |f| FileUtils::rm f }
25
+
26
+ # manifest each tag file for each algorithm
27
+ bag_files.each do |f|
28
+ rel_path = Pathname.new(f).relative_path_from(Pathname.new(bag_dir)).to_s
29
+ data = open(f) { |io| io.read }
30
+
31
+ # sha1
32
+ sha1 = Digest::SHA1.hexdigest data
33
+ open(manifest_file(:sha1), 'a') { |io| io.puts "#{sha1} #{rel_path}" }
34
+
35
+ # md5
36
+ md5 = Digest::MD5.hexdigest data
37
+ open(manifest_file(:md5), 'a') { |io| io.puts "#{md5} #{rel_path}" }
38
+ end
39
+
40
+ end
41
+
42
+ # All tag files that are bag manifest files (tagmanifest-[algorithm].txt)
43
+ def tagmanifest_files
44
+ tag_files.select { |f| File.basename(f) =~ /^tagmanifest-.*.txt/ }
45
+ end
46
+
47
+ # A path to a tagmanifest file of the specified algorithm
48
+ def tagmanifest_file(algo)
49
+ File.join bag_dir, "tagmanifest-#{algo}.txt"
50
+ end
51
+
52
+ # Generate manifest files for all the tag files (except the tag
53
+ # manifest files)
54
+ def tagmanifest!
55
+
56
+ # nuke all the existing tagmanifest files
57
+ tagmanifest_files.each { |f| FileUtils::rm f }
58
+
59
+ # manifest each (non tagmanifest) tag file for each algorithm
60
+ (tag_files - tagmanifest_files).each do |f|
61
+ data = open(f) { |io| io.read }
62
+ rel_path = File.basename f
63
+
64
+ # sha1
65
+ sha1 = Digest::SHA1.hexdigest data
66
+ open(tagmanifest_file(:sha1), 'a') { |io| io.puts "#{sha1} #{rel_path}" }
67
+
68
+ # md5
69
+ md5 = Digest::MD5.hexdigest data
70
+ open(tagmanifest_file(:md5), 'a') { |io| io.puts "#{md5} #{rel_path}" }
71
+ end
72
+
73
+ end
74
+
75
+ # Returns true if all present manifested files' message digests
76
+ # match the actual message digest
77
+ def fixed?
78
+ (manifest_files + tagmanifest_files).all? do |mf|
79
+ # extract the algorithm
80
+ mf =~ /manifest-(.+).txt$/
81
+
82
+ algo = case $1
83
+ when /sha1/i
84
+ Digest::SHA1
85
+ when /md5/i
86
+ Digest::MD5
87
+ else
88
+ :unknown
89
+ end
90
+
91
+ # check it, an unknown algorithm is always true
92
+ unless algo == :unknown
93
+ lines = open(mf) { |io| io.readlines }
94
+
95
+ lines.all? do |line|
96
+ manifested_digest, path = line.chomp.split /\s+/, 2
97
+ actual_digest = open(File.join(@bag_dir, path)) { |io| algo.hexdigest io.read }
98
+ actual_digest == manifested_digest
99
+ end
100
+
101
+ else
102
+ true
103
+ end
104
+
105
+ end
106
+
107
+ end
108
+
109
+ end
110
+
111
+ end
@@ -0,0 +1,23 @@
1
+ # Some mixed in functionality for String
2
+ class String
3
+
4
+ # Wrap a string to lines of a specified width. All existing newlines
5
+ # are not guaranteed to be preserved
6
+ def wrap(width)
7
+ s = gsub(/\s+/, ' ').strip
8
+
9
+ if s.length > width
10
+ s[0...width] + '\n' + s[width..-1].wrap(width)
11
+ else
12
+ s
13
+ end
14
+
15
+ end
16
+
17
+ # Indent each line of a string by n spaces
18
+ def indent(n)
19
+ indent = ' ' * n
20
+ gsub '\n', "\n#{indent}"
21
+ end
22
+
23
+ end
@@ -0,0 +1,104 @@
1
+ require 'validatable'
2
+
3
+ module BagIt
4
+
5
+ class Bag
6
+ include Validatable
7
+ validates_true_for :consistency, :logic => lambda { complete? }
8
+ validates_true_for :completeness, :logic => lambda { consistent? }
9
+ end
10
+
11
+ module Validity
12
+
13
+ # Return true if the manifest cover all files and all files are
14
+ # covered.
15
+ def complete?
16
+
17
+ unmanifested_files.each do |file|
18
+ errors.add :completeness, "#{file} is present but not manifested"
19
+ end
20
+
21
+ empty_manifests.each do |file|
22
+ errors.add :completeness, "#{file} is manifested but not present"
23
+ end
24
+
25
+ errors.on(:completeness).nil?
26
+ end
27
+
28
+ # Return true if all manifested files message digests match.
29
+ def consistent?
30
+
31
+ manifest_files.each do |mf|
32
+
33
+ # get the algorithm implementation
34
+ File.basename(mf) =~ /^manifest-(.+).txt$/
35
+
36
+ algo = case $1
37
+ when /sha1/i
38
+ Digest::SHA1
39
+ when /md5/i
40
+ Digest::MD5
41
+ else
42
+ :unknown
43
+ end
44
+
45
+ # Check every file in the manifest
46
+ open(mf) do |io|
47
+
48
+ io.each_line do |line|
49
+ expected, path = line.chomp.split /\s+/, 2
50
+ file = File.join(bag_dir, path)
51
+
52
+ if File.exist? file
53
+ actual = open(file) { |fio| algo.hexdigest(fio.read) }
54
+
55
+ if expected != actual
56
+ errors.add :consistency, "expected #{file} to have #{algo}: #{expected}, actual is #{actual}"
57
+ end
58
+
59
+ end
60
+ end
61
+
62
+ end
63
+
64
+ end
65
+
66
+ errors.on(:consistency).nil?
67
+ end
68
+
69
+ protected
70
+
71
+ # Returns all files in the instance that are not manifested
72
+ def unmanifested_files
73
+ mfs = manifested_files.map { |f| File.join bag_dir, f }
74
+ bag_files.reject { |f| mfs.member? f }
75
+ end
76
+
77
+ # Returns a list of manifested files that are not present
78
+ def empty_manifests
79
+ bfs = bag_files
80
+ manifested_files.reject { |f| bfs.member? File.join(bag_dir, f) }
81
+ end
82
+
83
+ # Returns a list of all files present in the manifest files
84
+ def manifested_files
85
+
86
+ manifest_files.inject([]) do |acc, mf|
87
+
88
+ files = open(mf) do |io|
89
+
90
+ io.readlines.map do |line|
91
+ digest, path = line.chomp.split /\s+/, 2
92
+ path
93
+ end
94
+
95
+ end
96
+
97
+ (acc + files).uniq
98
+ end
99
+
100
+ end
101
+
102
+ end
103
+
104
+ end
metadata ADDED
@@ -0,0 +1,62 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bagit
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Francesco Lazzarino
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-03-02 00:00:00 -05:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description:
17
+ email: flazzarino@gmail.com
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files: []
23
+
24
+ files:
25
+ - Rakefile
26
+ - bagit.gemspec
27
+ - lib/bagit/bag.rb
28
+ - lib/bagit/fetch.rb
29
+ - lib/bagit/file.rb
30
+ - lib/bagit/info.rb
31
+ - lib/bagit/manifest.rb
32
+ - lib/bagit/string.rb
33
+ - lib/bagit/valid.rb
34
+ - lib/bagit.rb
35
+ has_rdoc: true
36
+ homepage: http://bagit.rubyforge.org
37
+ post_install_message:
38
+ rdoc_options: []
39
+
40
+ require_paths:
41
+ - lib
42
+ required_ruby_version: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: "0"
47
+ version:
48
+ required_rubygems_version: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: "0"
53
+ version:
54
+ requirements: []
55
+
56
+ rubyforge_project:
57
+ rubygems_version: 1.3.1
58
+ signing_key:
59
+ specification_version: 2
60
+ summary: BagIt package generation and validation
61
+ test_files: []
62
+