bagit 0.3.5 → 0.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.gitignore +2 -1
- data/.travis.yml +1 -2
- data/Gemfile +4 -2
- data/README.md +1 -1
- data/Rakefile +11 -8
- data/bagit.gemspec +25 -21
- data/bin/bagit +59 -63
- data/lib/bagit.rb +8 -6
- data/lib/bagit/bag.rb +43 -47
- data/lib/bagit/fetch.rb +23 -27
- data/lib/bagit/file.rb +11 -14
- data/lib/bagit/info.rb +39 -51
- data/lib/bagit/manifest.rb +72 -49
- data/lib/bagit/string.rb +6 -6
- data/lib/bagit/valid.rb +51 -57
- data/lib/bagit/version.rb +3 -1
- data/spec/bagit_spec.rb +59 -54
- data/spec/fetch_spec.rb +33 -38
- data/spec/manifest_spec.rb +107 -111
- data/spec/spec_helper.rb +12 -12
- data/spec/tag_info_spec.rb +101 -108
- data/spec/tag_spec.rb +47 -49
- data/spec/util/bagit_matchers.rb +5 -14
- data/spec/validation_spec.rb +108 -110
- metadata +50 -9
data/lib/bagit/fetch.rb
CHANGED
@@ -1,50 +1,46 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "open-uri"
|
2
4
|
|
3
5
|
module BagIt
|
4
|
-
|
5
6
|
module Fetch
|
6
|
-
|
7
7
|
def fetch_txt_file
|
8
|
-
File.join @bag_dir,
|
8
|
+
File.join @bag_dir, "fetch.txt"
|
9
9
|
end
|
10
10
|
|
11
11
|
def add_remote_file(url, path, size, sha1, md5)
|
12
|
-
open(fetch_txt_file,
|
13
|
-
open(manifest_file(
|
14
|
-
open(manifest_file(
|
12
|
+
File.open(fetch_txt_file, "a") { |io| io.puts "#{url} #{size || "-"} #{path}" }
|
13
|
+
File.open(manifest_file("sha1"), "a") { |io| io.puts "#{sha1} #{File.join "data", path}" }
|
14
|
+
File.open(manifest_file("md5"), "a") { |io| io.puts "#{md5} #{File.join "data", path}" }
|
15
15
|
end
|
16
16
|
|
17
17
|
# feth all remote files
|
18
18
|
def fetch!
|
19
|
-
|
20
|
-
open(fetch_txt_file) do |io|
|
21
|
-
|
19
|
+
File.open(fetch_txt_file) do |io|
|
22
20
|
io.readlines.each do |line|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
io.write open(url)
|
21
|
+
(url, _length, path) = line.chomp.split(/\s+/, 3)
|
22
|
+
|
23
|
+
add_file(path) do |file_io|
|
24
|
+
file_io.write URI.open(url)
|
28
25
|
end
|
29
|
-
|
30
26
|
end
|
31
|
-
|
32
27
|
end
|
33
28
|
|
34
|
-
|
35
|
-
|
36
|
-
|
29
|
+
rename_old_fetch_txt(fetch_txt_file)
|
30
|
+
move_current_fetch_txt(fetch_txt_file)
|
31
|
+
end
|
32
|
+
|
33
|
+
def rename_old_fetch_txt(fetch_txt_file)
|
34
|
+
Dir["#{fetch_txt_file}.?*"].sort.reverse_each do |f|
|
37
35
|
if f =~ /fetch.txt.(\d+)$/
|
38
|
-
new_f = File.join File.dirname(f), "fetch.txt.#{
|
39
|
-
FileUtils
|
36
|
+
new_f = File.join File.dirname(f), "fetch.txt.#{Regexp.last_match(1).to_i + 1}"
|
37
|
+
FileUtils.mv f, new_f
|
40
38
|
end
|
41
|
-
|
42
39
|
end
|
40
|
+
end
|
43
41
|
|
44
|
-
|
45
|
-
FileUtils
|
42
|
+
def move_current_fetch_txt(fetch_txt_file)
|
43
|
+
FileUtils.mv fetch_txt_file, "#{fetch_txt_file}.0"
|
46
44
|
end
|
47
|
-
|
48
45
|
end
|
49
|
-
|
50
46
|
end
|
data/lib/bagit/file.rb
CHANGED
@@ -1,19 +1,16 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
|
+
class File
|
3
4
|
# Clean out all the empty dirs
|
4
|
-
def
|
5
|
+
def self.clean(file_name)
|
6
|
+
return unless File.directory? file_name
|
7
|
+
# clean all subdirs
|
8
|
+
subdirs = Dir.entries(file_name).select { |p| File.directory?(File.join(file_name, p)) }
|
9
|
+
subdirs.reject! { |p| %w[. ..].include? p }
|
10
|
+
subdirs.each { |sd| File.clean File.join(file_name, sd) }
|
5
11
|
|
6
|
-
if
|
7
|
-
|
8
|
-
|
9
|
-
subdirs.reject! { |p| %w{. ..}.include? p }
|
10
|
-
subdirs.each { |sd| File.clean File.join(file_name, sd) }
|
11
|
-
|
12
|
-
# if its empty then delete it
|
13
|
-
contents = Dir.entries(file_name).reject { |p| %w{. ..}.include? p }
|
14
|
-
Dir.delete file_name if contents.empty?
|
15
|
-
end
|
16
|
-
|
12
|
+
# if its empty then delete it
|
13
|
+
contents = Dir.entries(file_name).reject { |p| %w[. ..].include? p }
|
14
|
+
Dir.delete file_name if contents.empty?
|
17
15
|
end
|
18
|
-
|
19
16
|
end
|
data/lib/bagit/info.rb
CHANGED
@@ -1,49 +1,47 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
3
|
+
require "set"
|
4
4
|
|
5
|
+
module BagIt
|
5
6
|
module Info
|
6
|
-
|
7
7
|
@@bag_info_headers = {
|
8
|
-
:
|
9
|
-
:
|
10
|
-
:
|
11
|
-
:
|
12
|
-
:
|
13
|
-
:
|
14
|
-
:
|
15
|
-
:
|
16
|
-
:
|
17
|
-
:
|
18
|
-
:
|
19
|
-
:
|
20
|
-
:
|
21
|
-
:
|
22
|
-
:
|
8
|
+
agent: "Bag-Software-Agent",
|
9
|
+
org: "Source-Organization",
|
10
|
+
org_addr: "Organization-Address",
|
11
|
+
contact_name: "Contact-Name",
|
12
|
+
contact_phone: "Contact-Phone",
|
13
|
+
contact_email: "Contact-Email",
|
14
|
+
ext_desc: "External-Description",
|
15
|
+
ext_id: "External-Identifier",
|
16
|
+
size: "Bag-Size",
|
17
|
+
group_id: "Bag-Group-Identifier",
|
18
|
+
group_count: "Bag-Count",
|
19
|
+
sender_id: "Internal-Sender-Identifier",
|
20
|
+
int_desc: "Internal-Sender-Description",
|
21
|
+
date: "Bagging-Date",
|
22
|
+
oxum: "Payload-Oxum"
|
23
23
|
}
|
24
24
|
|
25
25
|
def bag_info_txt_file
|
26
|
-
File.join bag_dir,
|
26
|
+
File.join bag_dir, "bag-info.txt"
|
27
27
|
end
|
28
28
|
|
29
29
|
def bag_info
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
{}
|
34
|
-
end
|
30
|
+
read_info_file bag_info_txt_file
|
31
|
+
rescue
|
32
|
+
{}
|
35
33
|
end
|
36
34
|
|
37
|
-
def write_bag_info(hash={})
|
35
|
+
def write_bag_info(hash = {})
|
38
36
|
hash = bag_info.merge(hash)
|
39
|
-
hash[@@bag_info_headers[:agent]] = "BagIt Ruby Gem (
|
40
|
-
hash[@@bag_info_headers[:date]] = Date.today.strftime(
|
37
|
+
hash[@@bag_info_headers[:agent]] = "BagIt Ruby Gem (https://github.com/tipr/bagit)" if hash[@@bag_info_headers[:agent]].nil?
|
38
|
+
hash[@@bag_info_headers[:date]] = Date.today.strftime("%Y-%m-%d") if hash[@@bag_info_headers[:date]].nil?
|
41
39
|
hash[@@bag_info_headers[:oxum]] = payload_oxum
|
42
40
|
write_info_file bag_info_txt_file, hash
|
43
41
|
end
|
44
42
|
|
45
43
|
def bagit_txt_file
|
46
|
-
File.join bag_dir,
|
44
|
+
File.join bag_dir, "bagit.txt"
|
47
45
|
end
|
48
46
|
|
49
47
|
def bagit
|
@@ -55,54 +53,44 @@ module BagIt
|
|
55
53
|
end
|
56
54
|
|
57
55
|
def update_bag_date
|
58
|
-
hash["Bagging-Date"] = Date.today.strftime(
|
56
|
+
hash["Bagging-Date"] = Date.today.strftime("%Y-%m-%d")
|
59
57
|
write_bag_info(hash)
|
60
58
|
end
|
61
59
|
|
62
60
|
protected
|
63
61
|
|
64
62
|
def read_info_file(file)
|
65
|
-
|
66
63
|
File.open(file) do |io|
|
67
|
-
|
68
|
-
entries = io.read.split /\n(?=[^\s])/
|
64
|
+
entries = io.read.split(/\n(?=[^\s])/)
|
69
65
|
|
70
66
|
entries.inject({}) do |hash, line|
|
71
|
-
name, value = line.chomp.split
|
72
|
-
hash.merge(
|
67
|
+
name, value = line.chomp.split(/\s*:\s*/, 2)
|
68
|
+
hash.merge(name => value)
|
73
69
|
end
|
74
|
-
|
75
70
|
end
|
76
|
-
|
77
71
|
end
|
78
72
|
|
79
73
|
def write_info_file(file, hash)
|
80
|
-
|
81
|
-
dups = hash.keys.inject(Set.new) do |acc, key|
|
74
|
+
dups = hash.keys.inject(Set.new) { |acc, key|
|
82
75
|
a = hash.keys.grep(/#{key}/i)
|
83
76
|
acc + (a.size > 1 ? a : [])
|
84
|
-
|
77
|
+
}
|
85
78
|
|
86
|
-
raise "Multiple labels (#{dups.to_a.join
|
87
|
-
|
88
|
-
File.open(file, 'w') do |io|
|
79
|
+
raise "Multiple labels (#{dups.to_a.join ", "}) in #{file}" unless dups.empty?
|
89
80
|
|
81
|
+
File.open(file, "w") do |io|
|
90
82
|
hash.each do |name, value|
|
91
|
-
simple_entry = "#{name}: #{value.gsub
|
83
|
+
simple_entry = "#{name}: #{value.gsub(/\s+/, " ")}"
|
92
84
|
|
93
85
|
entry = if simple_entry.length > 79
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
86
|
+
simple_entry.wrap(77).indent(2)
|
87
|
+
else
|
88
|
+
simple_entry
|
89
|
+
end
|
98
90
|
|
99
91
|
io.puts entry
|
100
92
|
end
|
101
|
-
|
102
93
|
end
|
103
|
-
|
104
94
|
end
|
105
|
-
|
106
95
|
end
|
107
|
-
|
108
96
|
end
|
data/lib/bagit/manifest.rb
CHANGED
@@ -1,22 +1,22 @@
|
|
1
|
-
|
2
|
-
require 'digest/sha1'
|
3
|
-
require 'digest/md5'
|
1
|
+
# frozen_string_literal: true
|
4
2
|
|
5
|
-
|
3
|
+
require "pathname"
|
4
|
+
require "digest/sha1"
|
5
|
+
require "digest/md5"
|
6
6
|
|
7
|
+
module BagIt
|
7
8
|
# Requires response to bag_dir, tag_files, bag_files
|
8
9
|
module Manifest
|
9
10
|
def encode_filename(s)
|
10
|
-
|
11
|
-
|
12
|
-
|
11
|
+
s = s.gsub(/\r/, "%0D")
|
12
|
+
s = s.gsub(/\n/, "%0A")
|
13
|
+
s
|
13
14
|
end
|
14
15
|
|
15
|
-
|
16
16
|
# All tag files that are bag manifest files (manifest-[algorithm].txt)
|
17
17
|
def manifest_files
|
18
|
-
files = Dir[File.join(@bag_dir,
|
19
|
-
File.file?
|
18
|
+
files = Dir[File.join(@bag_dir, "*")].select { |f|
|
19
|
+
File.file?(f) && File.basename(f) =~ /^manifest-.*.txt$/
|
20
20
|
}
|
21
21
|
files
|
22
22
|
end
|
@@ -27,30 +27,59 @@ module BagIt
|
|
27
27
|
end
|
28
28
|
|
29
29
|
# Generate manifest files for all the bag files
|
30
|
-
def manifest!
|
31
|
-
|
30
|
+
def manifest!(algo: "default")
|
32
31
|
# nuke all the existing manifest files
|
33
|
-
manifest_files.each { |f| FileUtils
|
32
|
+
manifest_files.each { |f| FileUtils.rm f }
|
34
33
|
|
35
34
|
# manifest each tag file for each algorithm
|
36
35
|
bag_files.each do |f|
|
37
36
|
rel_path = encode_filename(Pathname.new(f).relative_path_from(Pathname.new(bag_dir)).to_s)
|
38
|
-
|
39
|
-
|
40
|
-
sha1 = Digest::SHA1.file f
|
41
|
-
File.open(manifest_file(:sha1), 'a') { |io| io.puts "#{sha1} #{rel_path}" }
|
42
|
-
|
43
|
-
# md5
|
44
|
-
md5 = Digest::MD5.file f
|
45
|
-
File.open(manifest_file(:md5), 'a') { |io| io.puts "#{md5} #{rel_path}" }
|
37
|
+
|
38
|
+
write_checksum(checksum_algo: algo, relative_path: rel_path, file: f)
|
46
39
|
end
|
47
40
|
tagmanifest!
|
48
41
|
end
|
49
42
|
|
43
|
+
def write_checksum(checksum_algo:, relative_path:, file:)
|
44
|
+
case checksum_algo
|
45
|
+
when "sha1"
|
46
|
+
write_sha1(file, relative_path)
|
47
|
+
when "md5"
|
48
|
+
write_md5(file, relative_path)
|
49
|
+
when "sha256"
|
50
|
+
write_sha256(file, relative_path)
|
51
|
+
when "sha512"
|
52
|
+
write_sha256(file, relative_path)
|
53
|
+
when "default"
|
54
|
+
write_sha1(file, relative_path)
|
55
|
+
write_md5(file, relative_path)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def write_sha1(f, rel_path)
|
60
|
+
sha1 = Digest::SHA1.file f
|
61
|
+
File.open(manifest_file(:sha1), "a") { |io| io.puts "#{sha1} #{rel_path}" }
|
62
|
+
end
|
63
|
+
|
64
|
+
def write_md5(f, rel_path)
|
65
|
+
md5 = Digest::MD5.file f
|
66
|
+
File.open(manifest_file(:md5), "a") { |io| io.puts "#{md5} #{rel_path}" }
|
67
|
+
end
|
68
|
+
|
69
|
+
def write_sha256(f, rel_path)
|
70
|
+
sha256 = Digest::SHA256.file f
|
71
|
+
File.open(manifest_file(:sha256), "a") { |io| io.puts "#{sha256} #{rel_path}" }
|
72
|
+
end
|
73
|
+
|
74
|
+
def write_sha512(f, rel_path)
|
75
|
+
sha512 = Digest::SHA512.file f
|
76
|
+
File.open(manifest_file(:sha512), "a") { |io| io.puts "#{sha512} #{rel_path}" }
|
77
|
+
end
|
78
|
+
|
50
79
|
# All tag files that are bag manifest files (tagmanifest-[algorithm].txt)
|
51
80
|
def tagmanifest_files
|
52
|
-
files = Dir[File.join(@bag_dir,
|
53
|
-
File.file?
|
81
|
+
files = Dir[File.join(@bag_dir, "*")].select { |f|
|
82
|
+
File.file?(f) && File.basename(f) =~ /^tagmanifest-.*.txt$/
|
54
83
|
}
|
55
84
|
files
|
56
85
|
end
|
@@ -62,12 +91,11 @@ module BagIt
|
|
62
91
|
|
63
92
|
# Generate manifest files for all the tag files (except the tag
|
64
93
|
# manifest files)
|
65
|
-
def tagmanifest!(tags=nil)
|
66
|
-
|
67
|
-
tags = tag_files if tags == nil
|
94
|
+
def tagmanifest!(tags = nil)
|
95
|
+
tags = tag_files if tags.nil?
|
68
96
|
|
69
97
|
# nuke all the existing tagmanifest files
|
70
|
-
tagmanifest_files.each { |f| FileUtils
|
98
|
+
tagmanifest_files.each { |f| FileUtils.rm f }
|
71
99
|
|
72
100
|
# ensure presence of manfiest files
|
73
101
|
manifest_files.each do |manifest|
|
@@ -85,38 +113,37 @@ module BagIt
|
|
85
113
|
tag_files
|
86
114
|
end
|
87
115
|
|
88
|
-
def add_tag_file(path, src_path=nil)
|
89
|
-
|
116
|
+
def add_tag_file(path, src_path = nil)
|
90
117
|
f = File.join(@bag_dir, path)
|
91
118
|
raise "Tag file already in manifest: #{path}" if tag_files.include?(f)
|
92
119
|
|
93
|
-
if
|
94
|
-
FileUtils
|
120
|
+
if !File.exist? f
|
121
|
+
FileUtils.mkdir_p File.dirname(f)
|
95
122
|
|
96
123
|
# write file
|
97
124
|
if src_path.nil?
|
98
|
-
File.open(f,
|
125
|
+
File.open(f, "w") { |io| yield io }
|
99
126
|
else
|
100
|
-
FileUtils
|
127
|
+
FileUtils.cp src_path, f
|
101
128
|
end
|
102
129
|
# this adds the manifest and bag info files on initial creation
|
103
130
|
# it must only run when the manifest doesn't already exist or it will
|
104
131
|
# infinitely recall add_tag_file. Better way of doing this?
|
105
132
|
tagmanifest!
|
106
|
-
elsif
|
133
|
+
elsif !src_path.nil?
|
107
134
|
raise "Tag file already exists, will not overwrite: #{path}\n Use add_tag_file(path) to add an existing tag file."
|
108
135
|
end
|
109
136
|
|
110
|
-
data = File.open(f
|
137
|
+
data = File.open(f, &:read)
|
111
138
|
rel_path = Pathname.new(f).relative_path_from(Pathname.new(bag_dir)).to_s
|
112
139
|
|
113
140
|
# sha1
|
114
141
|
sha1 = Digest::SHA1.hexdigest data
|
115
|
-
File.open(tagmanifest_file(:sha1),
|
142
|
+
File.open(tagmanifest_file(:sha1), "a") { |io| io.puts "#{sha1} #{rel_path}" }
|
116
143
|
|
117
144
|
# md5
|
118
145
|
md5 = Digest::MD5.hexdigest data
|
119
|
-
File.open(tagmanifest_file(:md5),
|
146
|
+
File.open(tagmanifest_file(:md5), "a") { |io| io.puts "#{md5} #{rel_path}" }
|
120
147
|
tag_files
|
121
148
|
end
|
122
149
|
|
@@ -131,7 +158,7 @@ module BagIt
|
|
131
158
|
filepath = File.join(@bag_dir, path)
|
132
159
|
raise "Tag file does not exist: #{path}" unless File.exist? filepath
|
133
160
|
remove_tag_file(path) if tag_files.include?(path)
|
134
|
-
FileUtils
|
161
|
+
FileUtils.rm filepath
|
135
162
|
end
|
136
163
|
|
137
164
|
# Returns true if all present manifested files' message digests
|
@@ -141,33 +168,29 @@ module BagIt
|
|
141
168
|
# extract the algorithm
|
142
169
|
mf =~ /manifest-(.+).txt$/
|
143
170
|
|
144
|
-
algo = case
|
171
|
+
algo = case Regexp.last_match(1)
|
145
172
|
when /sha1/i
|
146
173
|
Digest::SHA1
|
147
174
|
when /md5/i
|
148
175
|
Digest::MD5
|
149
176
|
else
|
150
177
|
:unknown
|
151
|
-
|
178
|
+
end
|
152
179
|
|
153
180
|
# check it, an unknown algorithm is always true
|
154
|
-
|
155
|
-
|
181
|
+
if algo == :unknown
|
182
|
+
true
|
183
|
+
else
|
184
|
+
lines = File.open(mf, &:readlines)
|
156
185
|
|
157
186
|
lines.all? do |line|
|
158
|
-
manifested_digest, path = line.chomp.split
|
187
|
+
manifested_digest, path = line.chomp.split(/\s+/, 2)
|
159
188
|
actual_digest = File.open(File.join(@bag_dir, path)) { |io| algo.hexdigest io.read }
|
160
189
|
actual_digest == manifested_digest
|
161
190
|
end
|
162
191
|
|
163
|
-
else
|
164
|
-
true
|
165
192
|
end
|
166
|
-
|
167
193
|
end
|
168
|
-
|
169
194
|
end
|
170
|
-
|
171
195
|
end
|
172
|
-
|
173
196
|
end
|