bagit 0.2.0 → 0.3.0.pre

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ NjFkNDZjYTUzYmMyOGZlMWZiOGM5YTJiMGU3ODM1ODRkNzYzZTY1OA==
5
+ data.tar.gz: !binary |-
6
+ NzU1ODcwNmZhZTI4NmE1YWJmMjNhN2FiNmYwNTFlMzhmMDFjZjY5OA==
7
+ !binary "U0hBNTEy":
8
+ metadata.gz: !binary |-
9
+ ZWJjMjYyYjk5NTMzNDgzMmYyNDE3NjM2YWRjNWUxMzk2NzM4NzAwMWVkY2Qy
10
+ ODRiODllOGRkZmE5YTYxN2Q5NGZjNjdjZTMwZDdjNGJlMTY5NTFkOWVhMGMy
11
+ MGJhYjFlNzAwNWQ2NmI5ZDk5ZDUxN2VkNDQxNjY0YTNmYjI1YTc=
12
+ data.tar.gz: !binary |-
13
+ YWFhNTcwZWU3OGU4Y2NiNzQ0NjYzM2ZjZmZjMmVlZDRiNTk2YjBlOWE4ZTkx
14
+ NTM0Y2MxMmI1ZDhkMzYxOWY2OTNkOTQ1NGJmYWIwY2YwNzYwZjNiNmE3YWM4
15
+ YmYwODE4Yzc5MWVmMjhmZDM4YzY4OWZlZmQ3Y2RiODdhYjA4Mjc=
data/README.md CHANGED
@@ -1,7 +1,9 @@
1
1
  BagIt (for ruby)
2
2
  ================
3
3
 
4
- Based on the [BagItspec v0.96](https://confluence.ucop.edu/display/Curation/BagIt).
4
+ [![Build Status](https://secure.travis-ci.org/no-reply/bagit.png)](http://travis-ci.org/no-reply/bagit)
5
+
6
+ Based on the [BagItspec v0.97](https://confluence.ucop.edu/display/Curation/BagIt).
5
7
 
6
8
  Supported Features:
7
9
  -------------------
@@ -48,12 +50,13 @@ TODO
48
50
  * command line tools for common tasks
49
51
  * better holy bag (fetch.txt) generation
50
52
  * better error reporting.
51
- * poor mans' checksum
52
53
 
53
54
  ---
54
55
 
55
56
  Copyright © 2009, [Francesco Lazzarino](mailto:flazzarino@gmail.com).
56
57
 
58
+ Current Maintainer: [Tom Johnson](mailto:thomas.johnson@oregonstate.edu).
59
+
57
60
  Sponsored by [Florida Center for Library Automation](http://www.fcla.edu).
58
61
 
59
62
  See LICENSE.txt for terms.
data/bagit.gemspec CHANGED
@@ -1,11 +1,12 @@
1
+
1
2
  BAGIT_SPEC = Gem::Specification.new do |spec|
2
3
  spec.name = "bagit"
3
- spec.version = '0.2.0'
4
+ spec.version = '0.3.0.pre'
4
5
  spec.summary = "BagIt package generation and validation"
5
6
  spec.description = "Ruby Library and Command Line tools for bagit"
6
- spec.email = "flazzarino@gmail.com"
7
- spec.homepage = 'http://github.com/flazz/bagit'
8
- spec.authors = ["Francesco Lazzarino"]
7
+ spec.email = "johnson.tom@gmail.com"
8
+ spec.homepage = 'http://github.com/tipr/bagit'
9
+ spec.authors = ["Tom Johnson, Francesco Lazzarino"]
9
10
 
10
11
  spec.add_dependency 'validatable', '~> 1.6'
11
12
 
data/lib/bagit.rb CHANGED
@@ -4,8 +4,10 @@
4
4
  # http://www.cdlib.org/inside/diglib/bagit/bagitspec.html
5
5
 
6
6
  require 'bagit/bag'
7
+ require 'fileutils'
8
+ require 'date'
7
9
 
8
10
  module BagIt
9
11
  # The version of the BagIt specification the code is conforming to.
10
- SPEC_VERSION = '0.96'
12
+ SPEC_VERSION = '0.97'
11
13
  end
data/lib/bagit/bag.rb CHANGED
@@ -17,7 +17,7 @@ module BagIt
17
17
  include Fetch # fetch related functionality
18
18
 
19
19
  # Make a new Bag based at path
20
- def initialize(path)
20
+ def initialize(path, info={})
21
21
  @bag_dir = path
22
22
 
23
23
  # make the dir structure if it doesn't exist
@@ -30,7 +30,7 @@ module BagIt
30
30
  end
31
31
 
32
32
  unless File.exist? bag_info_txt_file
33
- write_bag_info('Bag-Software-Agent' => "BagIt Ruby Gem (http://bagit.rubyforge.org)")
33
+ write_bag_info(info)
34
34
  end
35
35
  end
36
36
 
@@ -46,7 +46,13 @@ module BagIt
46
46
 
47
47
  # Return the paths to each tag file relative to bag_dir
48
48
  def tag_files
49
- Dir[File.join(@bag_dir, '*')].select { |f| File.file? f }
49
+ files = []
50
+ if tagmanifest_files != []
51
+ File.open(tagmanifest_files.first) do |f|
52
+ f.each_line{|line| files << File.join(@bag_dir, line.split(' ')[1])}
53
+ end
54
+ end
55
+ files
50
56
  end
51
57
 
52
58
  # Add a bag file
@@ -56,10 +62,12 @@ module BagIt
56
62
  FileUtils::mkdir_p File.dirname(path)
57
63
 
58
64
  if src_path.nil?
59
- open(path, 'w') { |io| yield io }
65
+ f = open(path, 'w') { |io| yield io }
60
66
  else
61
- FileUtils::cp src_path, path
67
+ f = FileUtils::cp src_path, path
62
68
  end
69
+ write_bag_info
70
+ return f
63
71
  end
64
72
 
65
73
  # Remove a bag file
@@ -85,6 +93,16 @@ module BagIt
85
93
  def paths
86
94
  self.bag_files.collect { |f| f.sub(data_dir + '/', '') }
87
95
  end
96
+
97
+ # Get the Oxum for the payload files
98
+ def payload_oxum
99
+ bytes = 0
100
+ bag_files.each do |f|
101
+ #TODO: filesystem quirks? Are we getting the stream size or the size on disk?
102
+ bytes += File.size(f)
103
+ end
104
+ return bytes.to_s + '.' + bag_files.count.to_s
105
+ end
88
106
 
89
107
  # Remove all empty directory trees from the bag
90
108
  def gc!
data/lib/bagit/info.rb CHANGED
@@ -4,15 +4,41 @@ module BagIt
4
4
 
5
5
  module Info
6
6
 
7
+ @@bag_info_headers = {
8
+ :agent => 'Bag-Software-Agent',
9
+ :org => 'Source-Organization',
10
+ :org_addr => 'Organization-Address',
11
+ :contact_name => 'Contact-Name',
12
+ :contact_phone => 'Contact-Phone',
13
+ :contact_email => 'Contact-Email',
14
+ :ext_desc => 'External-Description',
15
+ :ext_id => 'External-Identifier',
16
+ :size => 'Bag-Size',
17
+ :group_id => 'Bag-Group-Identifier',
18
+ :group_count => 'Bag-Count',
19
+ :sender_id => 'Internal-Sender-Identifier',
20
+ :int_desc => 'Internal-Sender-Description',
21
+ :date => 'Bagging-Date',
22
+ :oxum => 'Payload-Oxum'
23
+ }
24
+
7
25
  def bag_info_txt_file
8
26
  File.join bag_dir, 'bag-info.txt'
9
27
  end
10
28
 
11
29
  def bag_info
12
- read_info_file bag_info_txt_file
30
+ begin
31
+ read_info_file bag_info_txt_file
32
+ rescue
33
+ {}
34
+ end
13
35
  end
14
36
 
15
- def write_bag_info(hash)
37
+ def write_bag_info(hash={})
38
+ hash = bag_info.merge(hash)
39
+ hash[@@bag_info_headers[:agent]] = "BagIt Ruby Gem (http://bagit.rubyforge.org)" if hash[@@bag_info_headers[:agent]].nil?
40
+ hash[@@bag_info_headers[:date]] = Date.today.strftime('%Y-%m-%d') if hash[@@bag_info_headers[:date]].nil?
41
+ hash[@@bag_info_headers[:oxum]] = payload_oxum
16
42
  write_info_file bag_info_txt_file, hash
17
43
  end
18
44
 
@@ -28,6 +54,11 @@ module BagIt
28
54
  write_info_file bagit_txt_file, hash
29
55
  end
30
56
 
57
+ def update_bag_date
58
+ hash["Bagging-Date"] = Date.today.strftime('%Y-%m-%d')
59
+ write_bag_info(hash)
60
+ end
61
+
31
62
  protected
32
63
 
33
64
  def read_info_file(file)
@@ -9,7 +9,10 @@ module BagIt
9
9
 
10
10
  # All tag files that are bag manifest files (manifest-[algorithm].txt)
11
11
  def manifest_files
12
- tag_files.select { |f| File.basename(f) =~ /^manifest-.*.txt/ }
12
+ files = Dir[File.join(@bag_dir, '*')].select { |f|
13
+ File.file? f and File.basename(f) =~ /^manifest-.*.txt/
14
+ }
15
+ files
13
16
  end
14
17
 
15
18
  # A path to a manifest file of the specified algorithm
@@ -26,14 +29,13 @@ module BagIt
26
29
  # manifest each tag file for each algorithm
27
30
  bag_files.each do |f|
28
31
  rel_path = Pathname.new(f).relative_path_from(Pathname.new(bag_dir)).to_s
29
- data = open(f) { |io| io.read }
30
32
 
31
33
  # sha1
32
- sha1 = Digest::SHA1.hexdigest data
34
+ sha1 = Digest::SHA1.file f
33
35
  open(manifest_file(:sha1), 'a') { |io| io.puts "#{sha1} #{rel_path}" }
34
36
 
35
37
  # md5
36
- md5 = Digest::MD5.hexdigest data
38
+ md5 = Digest::MD5.file f
37
39
  open(manifest_file(:md5), 'a') { |io| io.puts "#{md5} #{rel_path}" }
38
40
  end
39
41
 
@@ -41,7 +43,10 @@ module BagIt
41
43
 
42
44
  # All tag files that are bag manifest files (tagmanifest-[algorithm].txt)
43
45
  def tagmanifest_files
44
- tag_files.select { |f| File.basename(f) =~ /^tagmanifest-.*.txt/ }
46
+ files = Dir[File.join(@bag_dir, '*')].select { |f|
47
+ File.file? f and File.basename(f) =~ /^tagmanifest-.*.txt/
48
+ }
49
+ files
45
50
  end
46
51
 
47
52
  # A path to a tagmanifest file of the specified algorithm
@@ -51,25 +56,77 @@ module BagIt
51
56
 
52
57
  # Generate manifest files for all the tag files (except the tag
53
58
  # manifest files)
54
- def tagmanifest!
59
+ def tagmanifest!(tags=nil)
60
+
61
+ tags = tag_files if tags == nil
55
62
 
56
63
  # nuke all the existing tagmanifest files
57
64
  tagmanifest_files.each { |f| FileUtils::rm f }
65
+
66
+ # ensure presence of manfiest files
67
+ manifest_files.each do |manifest|
68
+ tags << manifest unless tags.include?(manifest)
69
+ end
70
+
71
+ # ensure presence of bag info files
72
+ tags << bag_info_txt_file unless tags.include?(bag_info_txt_file)
73
+ tags << bagit_txt_file unless tags.include?(bagit_txt_file)
58
74
 
59
75
  # manifest each (non tagmanifest) tag file for each algorithm
60
- (tag_files - tagmanifest_files).each do |f|
61
- data = open(f) { |io| io.read }
62
- rel_path = File.basename f
76
+ tags.each do |f|
77
+ add_tag_file(Pathname.new(f).relative_path_from(Pathname.new(bag_dir)).to_s)
78
+ end
79
+ tag_files
80
+ end
63
81
 
64
- # sha1
65
- sha1 = Digest::SHA1.hexdigest data
66
- open(tagmanifest_file(:sha1), 'a') { |io| io.puts "#{sha1} #{rel_path}" }
82
+ def add_tag_file(path, src_path=nil)
67
83
 
68
- # md5
69
- md5 = Digest::MD5.hexdigest data
70
- open(tagmanifest_file(:md5), 'a') { |io| io.puts "#{md5} #{rel_path}" }
84
+ f = File.join(@bag_dir, path)
85
+ raise "Tag file already in manifest: #{path}" if tag_files.include?(f)
86
+
87
+ if not File.exist? f
88
+ FileUtils::mkdir_p File.dirname(f)
89
+
90
+ # write file
91
+ if src_path.nil?
92
+ open(f, 'w') { |io| yield io }
93
+ else
94
+ FileUtils::cp src_path, f
95
+ end
96
+ # this adds the manifest and bag info files on initial creation
97
+ # it must only run when the manifest doesn't already exist or it will
98
+ # infinitely recall add_tag_file. Better way of doing this?
99
+ tagmanifest!
100
+ elsif not src_path.nil?
101
+ raise "Tag file already exists, will not overwrite: #{path}\n Use add_tag_file(path) to add an existing tag file."
71
102
  end
72
103
 
104
+ data = open(f) { |io| io.read }
105
+ rel_path = Pathname.new(f).relative_path_from(Pathname.new(bag_dir)).to_s
106
+
107
+ # sha1
108
+ sha1 = Digest::SHA1.hexdigest data
109
+ open(tagmanifest_file(:sha1), 'a') { |io| io.puts "#{sha1} #{rel_path}" }
110
+
111
+ # md5
112
+ md5 = Digest::MD5.hexdigest data
113
+ open(tagmanifest_file(:md5), 'a') { |io| io.puts "#{md5} #{rel_path}" }
114
+ tag_files
115
+ end
116
+
117
+ def remove_tag_file(path)
118
+ tags = tag_files
119
+ raise "Tag file is not in manifest: #{path}" unless tags.include?(File.join(@bag_dir, path))
120
+ tags.delete(File.join(@bag_dir, path))
121
+ tagmanifest!(tags)
122
+ end
123
+
124
+ def delete_tag_file(path)
125
+ filepath = File.join(@bag_dir, path)
126
+ raise "Tag file does not exist: #{path}" unless File.exist? filepath
127
+ #TODO: delete tags even when they are not in the manifest
128
+ remove_tag_file(path)
129
+ FileUtils::rm filepath
73
130
  end
74
131
 
75
132
  # Returns true if all present manifested files' message digests
data/lib/bagit/valid.rb CHANGED
@@ -21,18 +21,18 @@ module BagIt
21
21
  empty_manifests.each do |file|
22
22
  errors.add :completeness, "#{file} is manifested but not present"
23
23
  end
24
-
24
+ tag_empty_manifests.each do |file|
25
+ errors.add :completeness, "#{file} is a manifested tag but not present"
26
+ end
27
+
25
28
  errors.on(:completeness).nil?
26
29
  end
27
30
 
28
31
  # Return true if all manifested files message digests match.
29
32
  def consistent?
30
-
31
- manifest_files.each do |mf|
32
-
33
+ (manifest_files|tagmanifest_files).each do |mf|
33
34
  # get the algorithm implementation
34
- File.basename(mf) =~ /^manifest-(.+).txt$/
35
-
35
+ File.basename(mf) =~ /manifest-(.+).txt$/
36
36
  algo = case $1
37
37
  when /sha1/i
38
38
  Digest::SHA1
@@ -41,31 +41,30 @@ module BagIt
41
41
  else
42
42
  :unknown
43
43
  end
44
-
45
44
  # Check every file in the manifest
46
45
  open(mf) do |io|
47
-
48
46
  io.each_line do |line|
49
47
  expected, path = line.chomp.split /\s+/, 2
50
48
  file = File.join(bag_dir, path)
51
-
52
49
  if File.exist? file
53
- actual = open(file) { |fio| algo.hexdigest(fio.read) }
54
-
50
+ actual = algo.file(file).hexdigest
55
51
  if expected != actual
56
52
  errors.add :consistency, "expected #{file} to have #{algo}: #{expected}, actual is #{actual}"
57
53
  end
58
-
59
54
  end
60
55
  end
61
-
62
56
  end
63
-
64
57
  end
58
+
65
59
 
66
60
  errors.on(:consistency).nil?
67
61
  end
68
62
 
63
+ # Checks for validity against Payload-Oxum
64
+ def valid_oxum?
65
+ bag_info["Payload-Oxum"] == payload_oxum
66
+ end
67
+
69
68
  protected
70
69
 
71
70
  # Returns all files in the instance that are not manifested
@@ -79,7 +78,16 @@ module BagIt
79
78
  bfs = bag_files
80
79
  manifested_files.reject { |f| bfs.member? File.join(bag_dir, f) }
81
80
  end
82
-
81
+ # Returns a list of tag manifested files that are not present
82
+ def tag_empty_manifests
83
+ empty = []
84
+ tag_manifested_files.each do |f|
85
+ if !File.exists?(File.join(bag_dir,f))
86
+ empty.push f
87
+ end
88
+ end
89
+ return empty
90
+ end
83
91
  # Returns a list of all files present in the manifest files
84
92
  def manifested_files
85
93
 
@@ -98,6 +106,18 @@ module BagIt
98
106
  end
99
107
 
100
108
  end
109
+ # Returns a list of all files in the tag manifest files
110
+ def tag_manifested_files
111
+ tagmanifest_files.inject([]) do |acc, mf|
112
+ files = open(mf) do |io|
113
+ io.readlines.map do |line|
114
+ digest, path = line.chomp.split /\s+/, 2
115
+ path
116
+ end
117
+ end
118
+ (acc+files).uniq
119
+ end
120
+ end
101
121
 
102
122
  end
103
123
 
metadata CHANGED
@@ -1,75 +1,69 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: bagit
3
- version: !ruby/object:Gem::Version
4
- prerelease:
5
- version: 0.2.0
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.3.0.pre
6
5
  platform: ruby
7
- authors:
8
- - Francesco Lazzarino
6
+ authors:
7
+ - Tom Johnson, Francesco Lazzarino
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
-
13
- date: 2011-06-08 00:00:00 Z
14
- dependencies:
15
- - !ruby/object:Gem::Dependency
11
+ date: 2013-04-19 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
16
14
  name: validatable
17
- prerelease: false
18
- requirement: &id001 !ruby/object:Gem::Requirement
19
- none: false
20
- requirements:
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
21
17
  - - ~>
22
- - !ruby/object:Gem::Version
23
- version: "1.6"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.6'
24
20
  type: :runtime
25
- version_requirements: *id001
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.6'
26
27
  description: Ruby Library and Command Line tools for bagit
27
- email: flazzarino@gmail.com
28
+ email: johnson.tom@gmail.com
28
29
  executables: []
29
-
30
30
  extensions: []
31
-
32
31
  extra_rdoc_files: []
33
-
34
- files:
32
+ files:
35
33
  - Rakefile
36
34
  - README.md
37
35
  - LICENSE.txt
38
36
  - bagit.gemspec
37
+ - lib/bagit.rb
39
38
  - lib/bagit/bag.rb
40
- - lib/bagit/fetch.rb
41
- - lib/bagit/file.rb
42
- - lib/bagit/info.rb
43
- - lib/bagit/manifest.rb
44
39
  - lib/bagit/string.rb
40
+ - lib/bagit/manifest.rb
41
+ - lib/bagit/info.rb
42
+ - lib/bagit/file.rb
45
43
  - lib/bagit/valid.rb
46
- - lib/bagit.rb
47
- homepage: http://github.com/flazz/bagit
44
+ - lib/bagit/fetch.rb
45
+ homepage: http://github.com/tipr/bagit
48
46
  licenses: []
49
-
47
+ metadata: {}
50
48
  post_install_message:
51
49
  rdoc_options: []
52
-
53
- require_paths:
50
+ require_paths:
54
51
  - lib
55
- required_ruby_version: !ruby/object:Gem::Requirement
56
- none: false
57
- requirements:
58
- - - ">="
59
- - !ruby/object:Gem::Version
60
- version: "0"
61
- required_rubygems_version: !ruby/object:Gem::Requirement
62
- none: false
63
- requirements:
64
- - - ">="
65
- - !ruby/object:Gem::Version
66
- version: "0"
52
+ required_ruby_version: !ruby/object:Gem::Requirement
53
+ requirements:
54
+ - - ! '>='
55
+ - !ruby/object:Gem::Version
56
+ version: '0'
57
+ required_rubygems_version: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ! '>'
60
+ - !ruby/object:Gem::Version
61
+ version: 1.3.1
67
62
  requirements: []
68
-
69
63
  rubyforge_project:
70
- rubygems_version: 1.8.3
64
+ rubygems_version: 2.0.0
71
65
  signing_key:
72
- specification_version: 3
66
+ specification_version: 4
73
67
  summary: BagIt package generation and validation
74
68
  test_files: []
75
-
69
+ has_rdoc: