bagit 0.2.0 → 0.3.0.pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ NjFkNDZjYTUzYmMyOGZlMWZiOGM5YTJiMGU3ODM1ODRkNzYzZTY1OA==
5
+ data.tar.gz: !binary |-
6
+ NzU1ODcwNmZhZTI4NmE1YWJmMjNhN2FiNmYwNTFlMzhmMDFjZjY5OA==
7
+ !binary "U0hBNTEy":
8
+ metadata.gz: !binary |-
9
+ ZWJjMjYyYjk5NTMzNDgzMmYyNDE3NjM2YWRjNWUxMzk2NzM4NzAwMWVkY2Qy
10
+ ODRiODllOGRkZmE5YTYxN2Q5NGZjNjdjZTMwZDdjNGJlMTY5NTFkOWVhMGMy
11
+ MGJhYjFlNzAwNWQ2NmI5ZDk5ZDUxN2VkNDQxNjY0YTNmYjI1YTc=
12
+ data.tar.gz: !binary |-
13
+ YWFhNTcwZWU3OGU4Y2NiNzQ0NjYzM2ZjZmZjMmVlZDRiNTk2YjBlOWE4ZTkx
14
+ NTM0Y2MxMmI1ZDhkMzYxOWY2OTNkOTQ1NGJmYWIwY2YwNzYwZjNiNmE3YWM4
15
+ YmYwODE4Yzc5MWVmMjhmZDM4YzY4OWZlZmQ3Y2RiODdhYjA4Mjc=
data/README.md CHANGED
@@ -1,7 +1,9 @@
1
1
  BagIt (for ruby)
2
2
  ================
3
3
 
4
- Based on the [BagItspec v0.96](https://confluence.ucop.edu/display/Curation/BagIt).
4
+ [![Build Status](https://secure.travis-ci.org/no-reply/bagit.png)](http://travis-ci.org/no-reply/bagit)
5
+
6
+ Based on the [BagItspec v0.97](https://confluence.ucop.edu/display/Curation/BagIt).
5
7
 
6
8
  Supported Features:
7
9
  -------------------
@@ -48,12 +50,13 @@ TODO
48
50
  * command line tools for common tasks
49
51
  * better holy bag (fetch.txt) generation
50
52
  * better error reporting.
51
- * poor mans' checksum
52
53
 
53
54
  ---
54
55
 
55
56
  Copyright © 2009, [Francesco Lazzarino](mailto:flazzarino@gmail.com).
56
57
 
58
+ Current Maintainer: [Tom Johnson](mailto:thomas.johnson@oregonstate.edu).
59
+
57
60
  Sponsored by [Florida Center for Library Automation](http://www.fcla.edu).
58
61
 
59
62
  See LICENSE.txt for terms.
data/bagit.gemspec CHANGED
@@ -1,11 +1,12 @@
1
+
1
2
  BAGIT_SPEC = Gem::Specification.new do |spec|
2
3
  spec.name = "bagit"
3
- spec.version = '0.2.0'
4
+ spec.version = '0.3.0.pre'
4
5
  spec.summary = "BagIt package generation and validation"
5
6
  spec.description = "Ruby Library and Command Line tools for bagit"
6
- spec.email = "flazzarino@gmail.com"
7
- spec.homepage = 'http://github.com/flazz/bagit'
8
- spec.authors = ["Francesco Lazzarino"]
7
+ spec.email = "johnson.tom@gmail.com"
8
+ spec.homepage = 'http://github.com/tipr/bagit'
9
+ spec.authors = ["Tom Johnson, Francesco Lazzarino"]
9
10
 
10
11
  spec.add_dependency 'validatable', '~> 1.6'
11
12
 
data/lib/bagit.rb CHANGED
@@ -4,8 +4,10 @@
4
4
  # http://www.cdlib.org/inside/diglib/bagit/bagitspec.html
5
5
 
6
6
  require 'bagit/bag'
7
+ require 'fileutils'
8
+ require 'date'
7
9
 
8
10
  module BagIt
9
11
  # The version of the BagIt specification the code is conforming to.
10
- SPEC_VERSION = '0.96'
12
+ SPEC_VERSION = '0.97'
11
13
  end
data/lib/bagit/bag.rb CHANGED
@@ -17,7 +17,7 @@ module BagIt
17
17
  include Fetch # fetch related functionality
18
18
 
19
19
  # Make a new Bag based at path
20
- def initialize(path)
20
+ def initialize(path, info={})
21
21
  @bag_dir = path
22
22
 
23
23
  # make the dir structure if it doesn't exist
@@ -30,7 +30,7 @@ module BagIt
30
30
  end
31
31
 
32
32
  unless File.exist? bag_info_txt_file
33
- write_bag_info('Bag-Software-Agent' => "BagIt Ruby Gem (http://bagit.rubyforge.org)")
33
+ write_bag_info(info)
34
34
  end
35
35
  end
36
36
 
@@ -46,7 +46,13 @@ module BagIt
46
46
 
47
47
  # Return the paths to each tag file relative to bag_dir
48
48
  def tag_files
49
- Dir[File.join(@bag_dir, '*')].select { |f| File.file? f }
49
+ files = []
50
+ if tagmanifest_files != []
51
+ File.open(tagmanifest_files.first) do |f|
52
+ f.each_line{|line| files << File.join(@bag_dir, line.split(' ')[1])}
53
+ end
54
+ end
55
+ files
50
56
  end
51
57
 
52
58
  # Add a bag file
@@ -56,10 +62,12 @@ module BagIt
56
62
  FileUtils::mkdir_p File.dirname(path)
57
63
 
58
64
  if src_path.nil?
59
- open(path, 'w') { |io| yield io }
65
+ f = open(path, 'w') { |io| yield io }
60
66
  else
61
- FileUtils::cp src_path, path
67
+ f = FileUtils::cp src_path, path
62
68
  end
69
+ write_bag_info
70
+ return f
63
71
  end
64
72
 
65
73
  # Remove a bag file
@@ -85,6 +93,16 @@ module BagIt
85
93
  def paths
86
94
  self.bag_files.collect { |f| f.sub(data_dir + '/', '') }
87
95
  end
96
+
97
+ # Get the Oxum for the payload files
98
+ def payload_oxum
99
+ bytes = 0
100
+ bag_files.each do |f|
101
+ #TODO: filesystem quirks? Are we getting the stream size or the size on disk?
102
+ bytes += File.size(f)
103
+ end
104
+ return bytes.to_s + '.' + bag_files.count.to_s
105
+ end
88
106
 
89
107
  # Remove all empty directory trees from the bag
90
108
  def gc!
data/lib/bagit/info.rb CHANGED
@@ -4,15 +4,41 @@ module BagIt
4
4
 
5
5
  module Info
6
6
 
7
+ @@bag_info_headers = {
8
+ :agent => 'Bag-Software-Agent',
9
+ :org => 'Source-Organization',
10
+ :org_addr => 'Organization-Address',
11
+ :contact_name => 'Contact-Name',
12
+ :contact_phone => 'Contact-Phone',
13
+ :contact_email => 'Contact-Email',
14
+ :ext_desc => 'External-Description',
15
+ :ext_id => 'External-Identifier',
16
+ :size => 'Bag-Size',
17
+ :group_id => 'Bag-Group-Identifier',
18
+ :group_count => 'Bag-Count',
19
+ :sender_id => 'Internal-Sender-Identifier',
20
+ :int_desc => 'Internal-Sender-Description',
21
+ :date => 'Bagging-Date',
22
+ :oxum => 'Payload-Oxum'
23
+ }
24
+
7
25
  def bag_info_txt_file
8
26
  File.join bag_dir, 'bag-info.txt'
9
27
  end
10
28
 
11
29
  def bag_info
12
- read_info_file bag_info_txt_file
30
+ begin
31
+ read_info_file bag_info_txt_file
32
+ rescue
33
+ {}
34
+ end
13
35
  end
14
36
 
15
- def write_bag_info(hash)
37
+ def write_bag_info(hash={})
38
+ hash = bag_info.merge(hash)
39
+ hash[@@bag_info_headers[:agent]] = "BagIt Ruby Gem (http://bagit.rubyforge.org)" if hash[@@bag_info_headers[:agent]].nil?
40
+ hash[@@bag_info_headers[:date]] = Date.today.strftime('%Y-%m-%d') if hash[@@bag_info_headers[:date]].nil?
41
+ hash[@@bag_info_headers[:oxum]] = payload_oxum
16
42
  write_info_file bag_info_txt_file, hash
17
43
  end
18
44
 
@@ -28,6 +54,11 @@ module BagIt
28
54
  write_info_file bagit_txt_file, hash
29
55
  end
30
56
 
57
+ def update_bag_date
58
+ hash["Bagging-Date"] = Date.today.strftime('%Y-%m-%d')
59
+ write_bag_info(hash)
60
+ end
61
+
31
62
  protected
32
63
 
33
64
  def read_info_file(file)
@@ -9,7 +9,10 @@ module BagIt
9
9
 
10
10
  # All tag files that are bag manifest files (manifest-[algorithm].txt)
11
11
  def manifest_files
12
- tag_files.select { |f| File.basename(f) =~ /^manifest-.*.txt/ }
12
+ files = Dir[File.join(@bag_dir, '*')].select { |f|
13
+ File.file? f and File.basename(f) =~ /^manifest-.*.txt/
14
+ }
15
+ files
13
16
  end
14
17
 
15
18
  # A path to a manifest file of the specified algorithm
@@ -26,14 +29,13 @@ module BagIt
26
29
  # manifest each tag file for each algorithm
27
30
  bag_files.each do |f|
28
31
  rel_path = Pathname.new(f).relative_path_from(Pathname.new(bag_dir)).to_s
29
- data = open(f) { |io| io.read }
30
32
 
31
33
  # sha1
32
- sha1 = Digest::SHA1.hexdigest data
34
+ sha1 = Digest::SHA1.file f
33
35
  open(manifest_file(:sha1), 'a') { |io| io.puts "#{sha1} #{rel_path}" }
34
36
 
35
37
  # md5
36
- md5 = Digest::MD5.hexdigest data
38
+ md5 = Digest::MD5.file f
37
39
  open(manifest_file(:md5), 'a') { |io| io.puts "#{md5} #{rel_path}" }
38
40
  end
39
41
 
@@ -41,7 +43,10 @@ module BagIt
41
43
 
42
44
  # All tag files that are bag manifest files (tagmanifest-[algorithm].txt)
43
45
  def tagmanifest_files
44
- tag_files.select { |f| File.basename(f) =~ /^tagmanifest-.*.txt/ }
46
+ files = Dir[File.join(@bag_dir, '*')].select { |f|
47
+ File.file? f and File.basename(f) =~ /^tagmanifest-.*.txt/
48
+ }
49
+ files
45
50
  end
46
51
 
47
52
  # A path to a tagmanifest file of the specified algorithm
@@ -51,25 +56,77 @@ module BagIt
51
56
 
52
57
  # Generate manifest files for all the tag files (except the tag
53
58
  # manifest files)
54
- def tagmanifest!
59
+ def tagmanifest!(tags=nil)
60
+
61
+ tags = tag_files if tags == nil
55
62
 
56
63
  # nuke all the existing tagmanifest files
57
64
  tagmanifest_files.each { |f| FileUtils::rm f }
65
+
66
+ # ensure presence of manfiest files
67
+ manifest_files.each do |manifest|
68
+ tags << manifest unless tags.include?(manifest)
69
+ end
70
+
71
+ # ensure presence of bag info files
72
+ tags << bag_info_txt_file unless tags.include?(bag_info_txt_file)
73
+ tags << bagit_txt_file unless tags.include?(bagit_txt_file)
58
74
 
59
75
  # manifest each (non tagmanifest) tag file for each algorithm
60
- (tag_files - tagmanifest_files).each do |f|
61
- data = open(f) { |io| io.read }
62
- rel_path = File.basename f
76
+ tags.each do |f|
77
+ add_tag_file(Pathname.new(f).relative_path_from(Pathname.new(bag_dir)).to_s)
78
+ end
79
+ tag_files
80
+ end
63
81
 
64
- # sha1
65
- sha1 = Digest::SHA1.hexdigest data
66
- open(tagmanifest_file(:sha1), 'a') { |io| io.puts "#{sha1} #{rel_path}" }
82
+ def add_tag_file(path, src_path=nil)
67
83
 
68
- # md5
69
- md5 = Digest::MD5.hexdigest data
70
- open(tagmanifest_file(:md5), 'a') { |io| io.puts "#{md5} #{rel_path}" }
84
+ f = File.join(@bag_dir, path)
85
+ raise "Tag file already in manifest: #{path}" if tag_files.include?(f)
86
+
87
+ if not File.exist? f
88
+ FileUtils::mkdir_p File.dirname(f)
89
+
90
+ # write file
91
+ if src_path.nil?
92
+ open(f, 'w') { |io| yield io }
93
+ else
94
+ FileUtils::cp src_path, f
95
+ end
96
+ # this adds the manifest and bag info files on initial creation
97
+ # it must only run when the manifest doesn't already exist or it will
98
+ # infinitely recall add_tag_file. Better way of doing this?
99
+ tagmanifest!
100
+ elsif not src_path.nil?
101
+ raise "Tag file already exists, will not overwrite: #{path}\n Use add_tag_file(path) to add an existing tag file."
71
102
  end
72
103
 
104
+ data = open(f) { |io| io.read }
105
+ rel_path = Pathname.new(f).relative_path_from(Pathname.new(bag_dir)).to_s
106
+
107
+ # sha1
108
+ sha1 = Digest::SHA1.hexdigest data
109
+ open(tagmanifest_file(:sha1), 'a') { |io| io.puts "#{sha1} #{rel_path}" }
110
+
111
+ # md5
112
+ md5 = Digest::MD5.hexdigest data
113
+ open(tagmanifest_file(:md5), 'a') { |io| io.puts "#{md5} #{rel_path}" }
114
+ tag_files
115
+ end
116
+
117
+ def remove_tag_file(path)
118
+ tags = tag_files
119
+ raise "Tag file is not in manifest: #{path}" unless tags.include?(File.join(@bag_dir, path))
120
+ tags.delete(File.join(@bag_dir, path))
121
+ tagmanifest!(tags)
122
+ end
123
+
124
+ def delete_tag_file(path)
125
+ filepath = File.join(@bag_dir, path)
126
+ raise "Tag file does not exist: #{path}" unless File.exist? filepath
127
+ #TODO: delete tags even when they are not in the manifest
128
+ remove_tag_file(path)
129
+ FileUtils::rm filepath
73
130
  end
74
131
 
75
132
  # Returns true if all present manifested files' message digests
data/lib/bagit/valid.rb CHANGED
@@ -21,18 +21,18 @@ module BagIt
21
21
  empty_manifests.each do |file|
22
22
  errors.add :completeness, "#{file} is manifested but not present"
23
23
  end
24
-
24
+ tag_empty_manifests.each do |file|
25
+ errors.add :completeness, "#{file} is a manifested tag but not present"
26
+ end
27
+
25
28
  errors.on(:completeness).nil?
26
29
  end
27
30
 
28
31
  # Return true if all manifested files message digests match.
29
32
  def consistent?
30
-
31
- manifest_files.each do |mf|
32
-
33
+ (manifest_files|tagmanifest_files).each do |mf|
33
34
  # get the algorithm implementation
34
- File.basename(mf) =~ /^manifest-(.+).txt$/
35
-
35
+ File.basename(mf) =~ /manifest-(.+).txt$/
36
36
  algo = case $1
37
37
  when /sha1/i
38
38
  Digest::SHA1
@@ -41,31 +41,30 @@ module BagIt
41
41
  else
42
42
  :unknown
43
43
  end
44
-
45
44
  # Check every file in the manifest
46
45
  open(mf) do |io|
47
-
48
46
  io.each_line do |line|
49
47
  expected, path = line.chomp.split /\s+/, 2
50
48
  file = File.join(bag_dir, path)
51
-
52
49
  if File.exist? file
53
- actual = open(file) { |fio| algo.hexdigest(fio.read) }
54
-
50
+ actual = algo.file(file).hexdigest
55
51
  if expected != actual
56
52
  errors.add :consistency, "expected #{file} to have #{algo}: #{expected}, actual is #{actual}"
57
53
  end
58
-
59
54
  end
60
55
  end
61
-
62
56
  end
63
-
64
57
  end
58
+
65
59
 
66
60
  errors.on(:consistency).nil?
67
61
  end
68
62
 
63
+ # Checks for validity against Payload-Oxum
64
+ def valid_oxum?
65
+ bag_info["Payload-Oxum"] == payload_oxum
66
+ end
67
+
69
68
  protected
70
69
 
71
70
  # Returns all files in the instance that are not manifested
@@ -79,7 +78,16 @@ module BagIt
79
78
  bfs = bag_files
80
79
  manifested_files.reject { |f| bfs.member? File.join(bag_dir, f) }
81
80
  end
82
-
81
+ # Returns a list of tag manifested files that are not present
82
+ def tag_empty_manifests
83
+ empty = []
84
+ tag_manifested_files.each do |f|
85
+ if !File.exists?(File.join(bag_dir,f))
86
+ empty.push f
87
+ end
88
+ end
89
+ return empty
90
+ end
83
91
  # Returns a list of all files present in the manifest files
84
92
  def manifested_files
85
93
 
@@ -98,6 +106,18 @@ module BagIt
98
106
  end
99
107
 
100
108
  end
109
+ # Returns a list of all files in the tag manifest files
110
+ def tag_manifested_files
111
+ tagmanifest_files.inject([]) do |acc, mf|
112
+ files = open(mf) do |io|
113
+ io.readlines.map do |line|
114
+ digest, path = line.chomp.split /\s+/, 2
115
+ path
116
+ end
117
+ end
118
+ (acc+files).uniq
119
+ end
120
+ end
101
121
 
102
122
  end
103
123
 
metadata CHANGED
@@ -1,75 +1,69 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: bagit
3
- version: !ruby/object:Gem::Version
4
- prerelease:
5
- version: 0.2.0
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.3.0.pre
6
5
  platform: ruby
7
- authors:
8
- - Francesco Lazzarino
6
+ authors:
7
+ - Tom Johnson, Francesco Lazzarino
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
-
13
- date: 2011-06-08 00:00:00 Z
14
- dependencies:
15
- - !ruby/object:Gem::Dependency
11
+ date: 2013-04-19 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
16
14
  name: validatable
17
- prerelease: false
18
- requirement: &id001 !ruby/object:Gem::Requirement
19
- none: false
20
- requirements:
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
21
17
  - - ~>
22
- - !ruby/object:Gem::Version
23
- version: "1.6"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.6'
24
20
  type: :runtime
25
- version_requirements: *id001
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.6'
26
27
  description: Ruby Library and Command Line tools for bagit
27
- email: flazzarino@gmail.com
28
+ email: johnson.tom@gmail.com
28
29
  executables: []
29
-
30
30
  extensions: []
31
-
32
31
  extra_rdoc_files: []
33
-
34
- files:
32
+ files:
35
33
  - Rakefile
36
34
  - README.md
37
35
  - LICENSE.txt
38
36
  - bagit.gemspec
37
+ - lib/bagit.rb
39
38
  - lib/bagit/bag.rb
40
- - lib/bagit/fetch.rb
41
- - lib/bagit/file.rb
42
- - lib/bagit/info.rb
43
- - lib/bagit/manifest.rb
44
39
  - lib/bagit/string.rb
40
+ - lib/bagit/manifest.rb
41
+ - lib/bagit/info.rb
42
+ - lib/bagit/file.rb
45
43
  - lib/bagit/valid.rb
46
- - lib/bagit.rb
47
- homepage: http://github.com/flazz/bagit
44
+ - lib/bagit/fetch.rb
45
+ homepage: http://github.com/tipr/bagit
48
46
  licenses: []
49
-
47
+ metadata: {}
50
48
  post_install_message:
51
49
  rdoc_options: []
52
-
53
- require_paths:
50
+ require_paths:
54
51
  - lib
55
- required_ruby_version: !ruby/object:Gem::Requirement
56
- none: false
57
- requirements:
58
- - - ">="
59
- - !ruby/object:Gem::Version
60
- version: "0"
61
- required_rubygems_version: !ruby/object:Gem::Requirement
62
- none: false
63
- requirements:
64
- - - ">="
65
- - !ruby/object:Gem::Version
66
- version: "0"
52
+ required_ruby_version: !ruby/object:Gem::Requirement
53
+ requirements:
54
+ - - ! '>='
55
+ - !ruby/object:Gem::Version
56
+ version: '0'
57
+ required_rubygems_version: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ! '>'
60
+ - !ruby/object:Gem::Version
61
+ version: 1.3.1
67
62
  requirements: []
68
-
69
63
  rubyforge_project:
70
- rubygems_version: 1.8.3
64
+ rubygems_version: 2.0.0
71
65
  signing_key:
72
- specification_version: 3
66
+ specification_version: 4
73
67
  summary: BagIt package generation and validation
74
68
  test_files: []
75
-
69
+ has_rdoc: