bagit 0.3.5 → 0.4.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.gitignore +2 -1
- data/.travis.yml +1 -2
- data/Gemfile +4 -2
- data/README.md +1 -1
- data/Rakefile +11 -8
- data/bagit.gemspec +25 -21
- data/bin/bagit +59 -63
- data/lib/bagit.rb +8 -6
- data/lib/bagit/bag.rb +43 -47
- data/lib/bagit/fetch.rb +23 -27
- data/lib/bagit/file.rb +11 -14
- data/lib/bagit/info.rb +39 -51
- data/lib/bagit/manifest.rb +72 -49
- data/lib/bagit/string.rb +6 -6
- data/lib/bagit/valid.rb +51 -57
- data/lib/bagit/version.rb +3 -1
- data/spec/bagit_spec.rb +59 -54
- data/spec/fetch_spec.rb +33 -38
- data/spec/manifest_spec.rb +107 -111
- data/spec/spec_helper.rb +12 -12
- data/spec/tag_info_spec.rb +101 -108
- data/spec/tag_spec.rb +47 -49
- data/spec/util/bagit_matchers.rb +5 -14
- data/spec/validation_spec.rb +108 -110
- metadata +50 -9
data/lib/bagit/fetch.rb
CHANGED
@@ -1,50 +1,46 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "open-uri"
|
2
4
|
|
3
5
|
module BagIt
|
4
|
-
|
5
6
|
module Fetch
|
6
|
-
|
7
7
|
def fetch_txt_file
|
8
|
-
File.join @bag_dir,
|
8
|
+
File.join @bag_dir, "fetch.txt"
|
9
9
|
end
|
10
10
|
|
11
11
|
def add_remote_file(url, path, size, sha1, md5)
|
12
|
-
open(fetch_txt_file,
|
13
|
-
open(manifest_file(
|
14
|
-
open(manifest_file(
|
12
|
+
File.open(fetch_txt_file, "a") { |io| io.puts "#{url} #{size || "-"} #{path}" }
|
13
|
+
File.open(manifest_file("sha1"), "a") { |io| io.puts "#{sha1} #{File.join "data", path}" }
|
14
|
+
File.open(manifest_file("md5"), "a") { |io| io.puts "#{md5} #{File.join "data", path}" }
|
15
15
|
end
|
16
16
|
|
17
17
|
# feth all remote files
|
18
18
|
def fetch!
|
19
|
-
|
20
|
-
open(fetch_txt_file) do |io|
|
21
|
-
|
19
|
+
File.open(fetch_txt_file) do |io|
|
22
20
|
io.readlines.each do |line|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
io.write open(url)
|
21
|
+
(url, _length, path) = line.chomp.split(/\s+/, 3)
|
22
|
+
|
23
|
+
add_file(path) do |file_io|
|
24
|
+
file_io.write URI.open(url)
|
28
25
|
end
|
29
|
-
|
30
26
|
end
|
31
|
-
|
32
27
|
end
|
33
28
|
|
34
|
-
|
35
|
-
|
36
|
-
|
29
|
+
rename_old_fetch_txt(fetch_txt_file)
|
30
|
+
move_current_fetch_txt(fetch_txt_file)
|
31
|
+
end
|
32
|
+
|
33
|
+
def rename_old_fetch_txt(fetch_txt_file)
|
34
|
+
Dir["#{fetch_txt_file}.?*"].sort.reverse_each do |f|
|
37
35
|
if f =~ /fetch.txt.(\d+)$/
|
38
|
-
new_f = File.join File.dirname(f), "fetch.txt.#{
|
39
|
-
FileUtils
|
36
|
+
new_f = File.join File.dirname(f), "fetch.txt.#{Regexp.last_match(1).to_i + 1}"
|
37
|
+
FileUtils.mv f, new_f
|
40
38
|
end
|
41
|
-
|
42
39
|
end
|
40
|
+
end
|
43
41
|
|
44
|
-
|
45
|
-
FileUtils
|
42
|
+
def move_current_fetch_txt(fetch_txt_file)
|
43
|
+
FileUtils.mv fetch_txt_file, "#{fetch_txt_file}.0"
|
46
44
|
end
|
47
|
-
|
48
45
|
end
|
49
|
-
|
50
46
|
end
|
data/lib/bagit/file.rb
CHANGED
@@ -1,19 +1,16 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
|
+
class File
|
3
4
|
# Clean out all the empty dirs
|
4
|
-
def
|
5
|
+
def self.clean(file_name)
|
6
|
+
return unless File.directory? file_name
|
7
|
+
# clean all subdirs
|
8
|
+
subdirs = Dir.entries(file_name).select { |p| File.directory?(File.join(file_name, p)) }
|
9
|
+
subdirs.reject! { |p| %w[. ..].include? p }
|
10
|
+
subdirs.each { |sd| File.clean File.join(file_name, sd) }
|
5
11
|
|
6
|
-
if
|
7
|
-
|
8
|
-
|
9
|
-
subdirs.reject! { |p| %w{. ..}.include? p }
|
10
|
-
subdirs.each { |sd| File.clean File.join(file_name, sd) }
|
11
|
-
|
12
|
-
# if its empty then delete it
|
13
|
-
contents = Dir.entries(file_name).reject { |p| %w{. ..}.include? p }
|
14
|
-
Dir.delete file_name if contents.empty?
|
15
|
-
end
|
16
|
-
|
12
|
+
# if its empty then delete it
|
13
|
+
contents = Dir.entries(file_name).reject { |p| %w[. ..].include? p }
|
14
|
+
Dir.delete file_name if contents.empty?
|
17
15
|
end
|
18
|
-
|
19
16
|
end
|
data/lib/bagit/info.rb
CHANGED
@@ -1,49 +1,47 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
3
|
+
require "set"
|
4
4
|
|
5
|
+
module BagIt
|
5
6
|
module Info
|
6
|
-
|
7
7
|
@@bag_info_headers = {
|
8
|
-
:
|
9
|
-
:
|
10
|
-
:
|
11
|
-
:
|
12
|
-
:
|
13
|
-
:
|
14
|
-
:
|
15
|
-
:
|
16
|
-
:
|
17
|
-
:
|
18
|
-
:
|
19
|
-
:
|
20
|
-
:
|
21
|
-
:
|
22
|
-
:
|
8
|
+
agent: "Bag-Software-Agent",
|
9
|
+
org: "Source-Organization",
|
10
|
+
org_addr: "Organization-Address",
|
11
|
+
contact_name: "Contact-Name",
|
12
|
+
contact_phone: "Contact-Phone",
|
13
|
+
contact_email: "Contact-Email",
|
14
|
+
ext_desc: "External-Description",
|
15
|
+
ext_id: "External-Identifier",
|
16
|
+
size: "Bag-Size",
|
17
|
+
group_id: "Bag-Group-Identifier",
|
18
|
+
group_count: "Bag-Count",
|
19
|
+
sender_id: "Internal-Sender-Identifier",
|
20
|
+
int_desc: "Internal-Sender-Description",
|
21
|
+
date: "Bagging-Date",
|
22
|
+
oxum: "Payload-Oxum"
|
23
23
|
}
|
24
24
|
|
25
25
|
def bag_info_txt_file
|
26
|
-
File.join bag_dir,
|
26
|
+
File.join bag_dir, "bag-info.txt"
|
27
27
|
end
|
28
28
|
|
29
29
|
def bag_info
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
{}
|
34
|
-
end
|
30
|
+
read_info_file bag_info_txt_file
|
31
|
+
rescue
|
32
|
+
{}
|
35
33
|
end
|
36
34
|
|
37
|
-
def write_bag_info(hash={})
|
35
|
+
def write_bag_info(hash = {})
|
38
36
|
hash = bag_info.merge(hash)
|
39
|
-
hash[@@bag_info_headers[:agent]] = "BagIt Ruby Gem (
|
40
|
-
hash[@@bag_info_headers[:date]] = Date.today.strftime(
|
37
|
+
hash[@@bag_info_headers[:agent]] = "BagIt Ruby Gem (https://github.com/tipr/bagit)" if hash[@@bag_info_headers[:agent]].nil?
|
38
|
+
hash[@@bag_info_headers[:date]] = Date.today.strftime("%Y-%m-%d") if hash[@@bag_info_headers[:date]].nil?
|
41
39
|
hash[@@bag_info_headers[:oxum]] = payload_oxum
|
42
40
|
write_info_file bag_info_txt_file, hash
|
43
41
|
end
|
44
42
|
|
45
43
|
def bagit_txt_file
|
46
|
-
File.join bag_dir,
|
44
|
+
File.join bag_dir, "bagit.txt"
|
47
45
|
end
|
48
46
|
|
49
47
|
def bagit
|
@@ -55,54 +53,44 @@ module BagIt
|
|
55
53
|
end
|
56
54
|
|
57
55
|
def update_bag_date
|
58
|
-
hash["Bagging-Date"] = Date.today.strftime(
|
56
|
+
hash["Bagging-Date"] = Date.today.strftime("%Y-%m-%d")
|
59
57
|
write_bag_info(hash)
|
60
58
|
end
|
61
59
|
|
62
60
|
protected
|
63
61
|
|
64
62
|
def read_info_file(file)
|
65
|
-
|
66
63
|
File.open(file) do |io|
|
67
|
-
|
68
|
-
entries = io.read.split /\n(?=[^\s])/
|
64
|
+
entries = io.read.split(/\n(?=[^\s])/)
|
69
65
|
|
70
66
|
entries.inject({}) do |hash, line|
|
71
|
-
name, value = line.chomp.split
|
72
|
-
hash.merge(
|
67
|
+
name, value = line.chomp.split(/\s*:\s*/, 2)
|
68
|
+
hash.merge(name => value)
|
73
69
|
end
|
74
|
-
|
75
70
|
end
|
76
|
-
|
77
71
|
end
|
78
72
|
|
79
73
|
def write_info_file(file, hash)
|
80
|
-
|
81
|
-
dups = hash.keys.inject(Set.new) do |acc, key|
|
74
|
+
dups = hash.keys.inject(Set.new) { |acc, key|
|
82
75
|
a = hash.keys.grep(/#{key}/i)
|
83
76
|
acc + (a.size > 1 ? a : [])
|
84
|
-
|
77
|
+
}
|
85
78
|
|
86
|
-
raise "Multiple labels (#{dups.to_a.join
|
87
|
-
|
88
|
-
File.open(file, 'w') do |io|
|
79
|
+
raise "Multiple labels (#{dups.to_a.join ", "}) in #{file}" unless dups.empty?
|
89
80
|
|
81
|
+
File.open(file, "w") do |io|
|
90
82
|
hash.each do |name, value|
|
91
|
-
simple_entry = "#{name}: #{value.gsub
|
83
|
+
simple_entry = "#{name}: #{value.gsub(/\s+/, " ")}"
|
92
84
|
|
93
85
|
entry = if simple_entry.length > 79
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
86
|
+
simple_entry.wrap(77).indent(2)
|
87
|
+
else
|
88
|
+
simple_entry
|
89
|
+
end
|
98
90
|
|
99
91
|
io.puts entry
|
100
92
|
end
|
101
|
-
|
102
93
|
end
|
103
|
-
|
104
94
|
end
|
105
|
-
|
106
95
|
end
|
107
|
-
|
108
96
|
end
|
data/lib/bagit/manifest.rb
CHANGED
@@ -1,22 +1,22 @@
|
|
1
|
-
|
2
|
-
require 'digest/sha1'
|
3
|
-
require 'digest/md5'
|
1
|
+
# frozen_string_literal: true
|
4
2
|
|
5
|
-
|
3
|
+
require "pathname"
|
4
|
+
require "digest/sha1"
|
5
|
+
require "digest/md5"
|
6
6
|
|
7
|
+
module BagIt
|
7
8
|
# Requires response to bag_dir, tag_files, bag_files
|
8
9
|
module Manifest
|
9
10
|
def encode_filename(s)
|
10
|
-
|
11
|
-
|
12
|
-
|
11
|
+
s = s.gsub(/\r/, "%0D")
|
12
|
+
s = s.gsub(/\n/, "%0A")
|
13
|
+
s
|
13
14
|
end
|
14
15
|
|
15
|
-
|
16
16
|
# All tag files that are bag manifest files (manifest-[algorithm].txt)
|
17
17
|
def manifest_files
|
18
|
-
files = Dir[File.join(@bag_dir,
|
19
|
-
File.file?
|
18
|
+
files = Dir[File.join(@bag_dir, "*")].select { |f|
|
19
|
+
File.file?(f) && File.basename(f) =~ /^manifest-.*.txt$/
|
20
20
|
}
|
21
21
|
files
|
22
22
|
end
|
@@ -27,30 +27,59 @@ module BagIt
|
|
27
27
|
end
|
28
28
|
|
29
29
|
# Generate manifest files for all the bag files
|
30
|
-
def manifest!
|
31
|
-
|
30
|
+
def manifest!(algo: "default")
|
32
31
|
# nuke all the existing manifest files
|
33
|
-
manifest_files.each { |f| FileUtils
|
32
|
+
manifest_files.each { |f| FileUtils.rm f }
|
34
33
|
|
35
34
|
# manifest each tag file for each algorithm
|
36
35
|
bag_files.each do |f|
|
37
36
|
rel_path = encode_filename(Pathname.new(f).relative_path_from(Pathname.new(bag_dir)).to_s)
|
38
|
-
|
39
|
-
|
40
|
-
sha1 = Digest::SHA1.file f
|
41
|
-
File.open(manifest_file(:sha1), 'a') { |io| io.puts "#{sha1} #{rel_path}" }
|
42
|
-
|
43
|
-
# md5
|
44
|
-
md5 = Digest::MD5.file f
|
45
|
-
File.open(manifest_file(:md5), 'a') { |io| io.puts "#{md5} #{rel_path}" }
|
37
|
+
|
38
|
+
write_checksum(checksum_algo: algo, relative_path: rel_path, file: f)
|
46
39
|
end
|
47
40
|
tagmanifest!
|
48
41
|
end
|
49
42
|
|
43
|
+
def write_checksum(checksum_algo:, relative_path:, file:)
|
44
|
+
case checksum_algo
|
45
|
+
when "sha1"
|
46
|
+
write_sha1(file, relative_path)
|
47
|
+
when "md5"
|
48
|
+
write_md5(file, relative_path)
|
49
|
+
when "sha256"
|
50
|
+
write_sha256(file, relative_path)
|
51
|
+
when "sha512"
|
52
|
+
write_sha256(file, relative_path)
|
53
|
+
when "default"
|
54
|
+
write_sha1(file, relative_path)
|
55
|
+
write_md5(file, relative_path)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def write_sha1(f, rel_path)
|
60
|
+
sha1 = Digest::SHA1.file f
|
61
|
+
File.open(manifest_file(:sha1), "a") { |io| io.puts "#{sha1} #{rel_path}" }
|
62
|
+
end
|
63
|
+
|
64
|
+
def write_md5(f, rel_path)
|
65
|
+
md5 = Digest::MD5.file f
|
66
|
+
File.open(manifest_file(:md5), "a") { |io| io.puts "#{md5} #{rel_path}" }
|
67
|
+
end
|
68
|
+
|
69
|
+
def write_sha256(f, rel_path)
|
70
|
+
sha256 = Digest::SHA256.file f
|
71
|
+
File.open(manifest_file(:sha256), "a") { |io| io.puts "#{sha256} #{rel_path}" }
|
72
|
+
end
|
73
|
+
|
74
|
+
def write_sha512(f, rel_path)
|
75
|
+
sha512 = Digest::SHA512.file f
|
76
|
+
File.open(manifest_file(:sha512), "a") { |io| io.puts "#{sha512} #{rel_path}" }
|
77
|
+
end
|
78
|
+
|
50
79
|
# All tag files that are bag manifest files (tagmanifest-[algorithm].txt)
|
51
80
|
def tagmanifest_files
|
52
|
-
files = Dir[File.join(@bag_dir,
|
53
|
-
File.file?
|
81
|
+
files = Dir[File.join(@bag_dir, "*")].select { |f|
|
82
|
+
File.file?(f) && File.basename(f) =~ /^tagmanifest-.*.txt$/
|
54
83
|
}
|
55
84
|
files
|
56
85
|
end
|
@@ -62,12 +91,11 @@ module BagIt
|
|
62
91
|
|
63
92
|
# Generate manifest files for all the tag files (except the tag
|
64
93
|
# manifest files)
|
65
|
-
def tagmanifest!(tags=nil)
|
66
|
-
|
67
|
-
tags = tag_files if tags == nil
|
94
|
+
def tagmanifest!(tags = nil)
|
95
|
+
tags = tag_files if tags.nil?
|
68
96
|
|
69
97
|
# nuke all the existing tagmanifest files
|
70
|
-
tagmanifest_files.each { |f| FileUtils
|
98
|
+
tagmanifest_files.each { |f| FileUtils.rm f }
|
71
99
|
|
72
100
|
# ensure presence of manfiest files
|
73
101
|
manifest_files.each do |manifest|
|
@@ -85,38 +113,37 @@ module BagIt
|
|
85
113
|
tag_files
|
86
114
|
end
|
87
115
|
|
88
|
-
def add_tag_file(path, src_path=nil)
|
89
|
-
|
116
|
+
def add_tag_file(path, src_path = nil)
|
90
117
|
f = File.join(@bag_dir, path)
|
91
118
|
raise "Tag file already in manifest: #{path}" if tag_files.include?(f)
|
92
119
|
|
93
|
-
if
|
94
|
-
FileUtils
|
120
|
+
if !File.exist? f
|
121
|
+
FileUtils.mkdir_p File.dirname(f)
|
95
122
|
|
96
123
|
# write file
|
97
124
|
if src_path.nil?
|
98
|
-
File.open(f,
|
125
|
+
File.open(f, "w") { |io| yield io }
|
99
126
|
else
|
100
|
-
FileUtils
|
127
|
+
FileUtils.cp src_path, f
|
101
128
|
end
|
102
129
|
# this adds the manifest and bag info files on initial creation
|
103
130
|
# it must only run when the manifest doesn't already exist or it will
|
104
131
|
# infinitely recall add_tag_file. Better way of doing this?
|
105
132
|
tagmanifest!
|
106
|
-
elsif
|
133
|
+
elsif !src_path.nil?
|
107
134
|
raise "Tag file already exists, will not overwrite: #{path}\n Use add_tag_file(path) to add an existing tag file."
|
108
135
|
end
|
109
136
|
|
110
|
-
data = File.open(f
|
137
|
+
data = File.open(f, &:read)
|
111
138
|
rel_path = Pathname.new(f).relative_path_from(Pathname.new(bag_dir)).to_s
|
112
139
|
|
113
140
|
# sha1
|
114
141
|
sha1 = Digest::SHA1.hexdigest data
|
115
|
-
File.open(tagmanifest_file(:sha1),
|
142
|
+
File.open(tagmanifest_file(:sha1), "a") { |io| io.puts "#{sha1} #{rel_path}" }
|
116
143
|
|
117
144
|
# md5
|
118
145
|
md5 = Digest::MD5.hexdigest data
|
119
|
-
File.open(tagmanifest_file(:md5),
|
146
|
+
File.open(tagmanifest_file(:md5), "a") { |io| io.puts "#{md5} #{rel_path}" }
|
120
147
|
tag_files
|
121
148
|
end
|
122
149
|
|
@@ -131,7 +158,7 @@ module BagIt
|
|
131
158
|
filepath = File.join(@bag_dir, path)
|
132
159
|
raise "Tag file does not exist: #{path}" unless File.exist? filepath
|
133
160
|
remove_tag_file(path) if tag_files.include?(path)
|
134
|
-
FileUtils
|
161
|
+
FileUtils.rm filepath
|
135
162
|
end
|
136
163
|
|
137
164
|
# Returns true if all present manifested files' message digests
|
@@ -141,33 +168,29 @@ module BagIt
|
|
141
168
|
# extract the algorithm
|
142
169
|
mf =~ /manifest-(.+).txt$/
|
143
170
|
|
144
|
-
algo = case
|
171
|
+
algo = case Regexp.last_match(1)
|
145
172
|
when /sha1/i
|
146
173
|
Digest::SHA1
|
147
174
|
when /md5/i
|
148
175
|
Digest::MD5
|
149
176
|
else
|
150
177
|
:unknown
|
151
|
-
|
178
|
+
end
|
152
179
|
|
153
180
|
# check it, an unknown algorithm is always true
|
154
|
-
|
155
|
-
|
181
|
+
if algo == :unknown
|
182
|
+
true
|
183
|
+
else
|
184
|
+
lines = File.open(mf, &:readlines)
|
156
185
|
|
157
186
|
lines.all? do |line|
|
158
|
-
manifested_digest, path = line.chomp.split
|
187
|
+
manifested_digest, path = line.chomp.split(/\s+/, 2)
|
159
188
|
actual_digest = File.open(File.join(@bag_dir, path)) { |io| algo.hexdigest io.read }
|
160
189
|
actual_digest == manifested_digest
|
161
190
|
end
|
162
191
|
|
163
|
-
else
|
164
|
-
true
|
165
192
|
end
|
166
|
-
|
167
193
|
end
|
168
|
-
|
169
194
|
end
|
170
|
-
|
171
195
|
end
|
172
|
-
|
173
196
|
end
|