bagit 0.4.3 → 0.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.gitignore +2 -1
- data/Gemfile +3 -1
- data/Rakefile +10 -13
- data/bagit.gemspec +24 -23
- data/bin/bagit +22 -32
- data/lib/bagit.rb +8 -6
- data/lib/bagit/bag.rb +21 -19
- data/lib/bagit/fetch.rb +10 -8
- data/lib/bagit/file.rb +2 -0
- data/lib/bagit/info.rb +45 -43
- data/lib/bagit/manifest.rb +25 -23
- data/lib/bagit/string.rb +4 -2
- data/lib/bagit/valid.rb +45 -43
- data/lib/bagit/version.rb +3 -1
- data/spec/bagit_spec.rb +30 -29
- data/spec/fetch_spec.rb +13 -12
- data/spec/manifest_spec.rb +14 -9
- data/spec/spec_helper.rb +10 -8
- data/spec/tag_info_spec.rb +31 -30
- data/spec/tag_spec.rb +11 -10
- data/spec/util/bagit_matchers.rb +2 -0
- data/spec/validation_spec.rb +23 -22
- metadata +18 -20
- data/.rubocop.yml +0 -22
data/lib/bagit/manifest.rb
CHANGED
|
@@ -1,21 +1,23 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
require
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "pathname"
|
|
4
|
+
require "digest/sha1"
|
|
5
|
+
require "digest/md5"
|
|
4
6
|
|
|
5
7
|
module BagIt
|
|
6
8
|
# Requires response to bag_dir, tag_files, bag_files
|
|
7
9
|
module Manifest
|
|
8
10
|
def encode_filename(s)
|
|
9
|
-
s = s.gsub(/\r/,
|
|
10
|
-
s = s.gsub(/\n/,
|
|
11
|
+
s = s.gsub(/\r/, "%0D")
|
|
12
|
+
s = s.gsub(/\n/, "%0A")
|
|
11
13
|
s
|
|
12
14
|
end
|
|
13
15
|
|
|
14
16
|
# All tag files that are bag manifest files (manifest-[algorithm].txt)
|
|
15
17
|
def manifest_files
|
|
16
|
-
files = Dir[File.join(@bag_dir,
|
|
18
|
+
files = Dir[File.join(@bag_dir, "*")].select { |f|
|
|
17
19
|
File.file?(f) && File.basename(f) =~ /^manifest-.*.txt$/
|
|
18
|
-
|
|
20
|
+
}
|
|
19
21
|
files
|
|
20
22
|
end
|
|
21
23
|
|
|
@@ -25,7 +27,7 @@ module BagIt
|
|
|
25
27
|
end
|
|
26
28
|
|
|
27
29
|
# Generate manifest files for all the bag files
|
|
28
|
-
def manifest!(algo:
|
|
30
|
+
def manifest!(algo: "default")
|
|
29
31
|
# nuke all the existing manifest files
|
|
30
32
|
manifest_files.each { |f| FileUtils.rm f }
|
|
31
33
|
|
|
@@ -40,15 +42,15 @@ module BagIt
|
|
|
40
42
|
|
|
41
43
|
def write_checksum(checksum_algo:, relative_path:, file:)
|
|
42
44
|
case checksum_algo
|
|
43
|
-
when
|
|
45
|
+
when "sha1"
|
|
44
46
|
write_sha1(file, relative_path)
|
|
45
|
-
when
|
|
47
|
+
when "md5"
|
|
46
48
|
write_md5(file, relative_path)
|
|
47
|
-
when
|
|
49
|
+
when "sha256"
|
|
48
50
|
write_sha256(file, relative_path)
|
|
49
|
-
when
|
|
51
|
+
when "sha512"
|
|
50
52
|
write_sha256(file, relative_path)
|
|
51
|
-
when
|
|
53
|
+
when "default"
|
|
52
54
|
write_sha1(file, relative_path)
|
|
53
55
|
write_md5(file, relative_path)
|
|
54
56
|
end
|
|
@@ -56,29 +58,29 @@ module BagIt
|
|
|
56
58
|
|
|
57
59
|
def write_sha1(f, rel_path)
|
|
58
60
|
sha1 = Digest::SHA1.file f
|
|
59
|
-
File.open(manifest_file(:sha1),
|
|
61
|
+
File.open(manifest_file(:sha1), "a") { |io| io.puts "#{sha1} #{rel_path}" }
|
|
60
62
|
end
|
|
61
63
|
|
|
62
64
|
def write_md5(f, rel_path)
|
|
63
65
|
md5 = Digest::MD5.file f
|
|
64
|
-
File.open(manifest_file(:md5),
|
|
66
|
+
File.open(manifest_file(:md5), "a") { |io| io.puts "#{md5} #{rel_path}" }
|
|
65
67
|
end
|
|
66
68
|
|
|
67
69
|
def write_sha256(f, rel_path)
|
|
68
70
|
sha256 = Digest::SHA256.file f
|
|
69
|
-
File.open(manifest_file(:sha256),
|
|
71
|
+
File.open(manifest_file(:sha256), "a") { |io| io.puts "#{sha256} #{rel_path}" }
|
|
70
72
|
end
|
|
71
73
|
|
|
72
74
|
def write_sha512(f, rel_path)
|
|
73
75
|
sha512 = Digest::SHA512.file f
|
|
74
|
-
File.open(manifest_file(:sha512),
|
|
76
|
+
File.open(manifest_file(:sha512), "a") { |io| io.puts "#{sha512} #{rel_path}" }
|
|
75
77
|
end
|
|
76
78
|
|
|
77
79
|
# All tag files that are bag manifest files (tagmanifest-[algorithm].txt)
|
|
78
80
|
def tagmanifest_files
|
|
79
|
-
files = Dir[File.join(@bag_dir,
|
|
81
|
+
files = Dir[File.join(@bag_dir, "*")].select { |f|
|
|
80
82
|
File.file?(f) && File.basename(f) =~ /^tagmanifest-.*.txt$/
|
|
81
|
-
|
|
83
|
+
}
|
|
82
84
|
files
|
|
83
85
|
end
|
|
84
86
|
|
|
@@ -120,7 +122,7 @@ module BagIt
|
|
|
120
122
|
|
|
121
123
|
# write file
|
|
122
124
|
if src_path.nil?
|
|
123
|
-
File.open(f,
|
|
125
|
+
File.open(f, "w") { |io| yield io }
|
|
124
126
|
else
|
|
125
127
|
FileUtils.cp src_path, f
|
|
126
128
|
end
|
|
@@ -137,11 +139,11 @@ module BagIt
|
|
|
137
139
|
|
|
138
140
|
# sha1
|
|
139
141
|
sha1 = Digest::SHA1.hexdigest data
|
|
140
|
-
File.open(tagmanifest_file(:sha1),
|
|
142
|
+
File.open(tagmanifest_file(:sha1), "a") { |io| io.puts "#{sha1} #{rel_path}" }
|
|
141
143
|
|
|
142
144
|
# md5
|
|
143
145
|
md5 = Digest::MD5.hexdigest data
|
|
144
|
-
File.open(tagmanifest_file(:md5),
|
|
146
|
+
File.open(tagmanifest_file(:md5), "a") { |io| io.puts "#{md5} #{rel_path}" }
|
|
145
147
|
tag_files
|
|
146
148
|
end
|
|
147
149
|
|
|
@@ -173,7 +175,7 @@ module BagIt
|
|
|
173
175
|
Digest::MD5
|
|
174
176
|
else
|
|
175
177
|
:unknown
|
|
176
|
-
|
|
178
|
+
end
|
|
177
179
|
|
|
178
180
|
# check it, an unknown algorithm is always true
|
|
179
181
|
if algo == :unknown
|
data/lib/bagit/string.rb
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
# Some mixed in functionality for String
|
|
2
4
|
class String
|
|
3
5
|
# Wrap a string to lines of a specified width. All existing newlines
|
|
4
6
|
# are not guaranteed to be preserved
|
|
5
7
|
def wrap(width)
|
|
6
|
-
s = gsub(/\s+/,
|
|
8
|
+
s = gsub(/\s+/, " ").strip
|
|
7
9
|
|
|
8
10
|
if s.length > width
|
|
9
11
|
s[0...width] + '\n' + s[width..-1].wrap(width)
|
|
@@ -14,7 +16,7 @@ class String
|
|
|
14
16
|
|
|
15
17
|
# Indent each line of a string by n spaces
|
|
16
18
|
def indent(n)
|
|
17
|
-
indent =
|
|
19
|
+
indent = " " * n
|
|
18
20
|
gsub '\n', "\n#{indent}"
|
|
19
21
|
end
|
|
20
22
|
|
data/lib/bagit/valid.rb
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
require
|
|
4
|
-
require
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "validatable"
|
|
4
|
+
require "open-uri"
|
|
5
|
+
require "cgi"
|
|
6
|
+
require "logger"
|
|
5
7
|
|
|
6
8
|
module BagIt
|
|
7
9
|
class Bag
|
|
@@ -12,8 +14,8 @@ module BagIt
|
|
|
12
14
|
|
|
13
15
|
module Validity
|
|
14
16
|
def decode_filename(s)
|
|
15
|
-
s = s.gsub(
|
|
16
|
-
s = s.gsub(
|
|
17
|
+
s = s.gsub("%0D", "\r")
|
|
18
|
+
s = s.gsub("%0A", "\n")
|
|
17
19
|
s
|
|
18
20
|
end
|
|
19
21
|
|
|
@@ -88,52 +90,52 @@ module BagIt
|
|
|
88
90
|
|
|
89
91
|
protected
|
|
90
92
|
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
93
|
+
# Returns all files in the instance that are not manifested
|
|
94
|
+
def unmanifested_files
|
|
95
|
+
mfs = manifested_files.map { |f| File.join bag_dir, f }
|
|
96
|
+
bag_files.reject { |f| mfs.member? f }
|
|
97
|
+
end
|
|
96
98
|
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
99
|
+
# Returns a list of manifested files that are not present
|
|
100
|
+
def empty_manifests
|
|
101
|
+
bfs = bag_files
|
|
102
|
+
manifested_files.reject { |f| bfs.member? File.join(bag_dir, f) }
|
|
103
|
+
end
|
|
102
104
|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
end
|
|
109
|
-
empty
|
|
105
|
+
# Returns a list of tag manifested files that are not present
|
|
106
|
+
def tag_empty_manifests
|
|
107
|
+
empty = []
|
|
108
|
+
tag_manifested_files.each do |f|
|
|
109
|
+
empty.push f unless File.exist?(File.join(bag_dir, f))
|
|
110
110
|
end
|
|
111
|
+
empty
|
|
112
|
+
end
|
|
111
113
|
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
end
|
|
114
|
+
# Returns a list of all files present in the manifest files
|
|
115
|
+
def manifested_files
|
|
116
|
+
manifest_files.inject([]) do |acc, mf|
|
|
117
|
+
files = File.open(mf) { |io|
|
|
118
|
+
io.readlines.map do |line|
|
|
119
|
+
_digest, path = line.chomp.split(/\s+/, 2)
|
|
120
|
+
decode_filename(path)
|
|
120
121
|
end
|
|
122
|
+
}
|
|
121
123
|
|
|
122
|
-
|
|
123
|
-
end
|
|
124
|
+
(acc + files).uniq
|
|
124
125
|
end
|
|
126
|
+
end
|
|
125
127
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
end
|
|
128
|
+
# Returns a list of all files in the tag manifest files
|
|
129
|
+
def tag_manifested_files
|
|
130
|
+
tagmanifest_files.inject([]) do |acc, mf|
|
|
131
|
+
files = File.open(mf) { |io|
|
|
132
|
+
io.readlines.map do |line|
|
|
133
|
+
_digest, path = line.chomp.split(/\s+/, 2)
|
|
134
|
+
path
|
|
134
135
|
end
|
|
135
|
-
|
|
136
|
-
|
|
136
|
+
}
|
|
137
|
+
(acc + files).uniq
|
|
137
138
|
end
|
|
139
|
+
end
|
|
138
140
|
end
|
|
139
141
|
end
|
data/lib/bagit/version.rb
CHANGED
data/spec/bagit_spec.rb
CHANGED
|
@@ -1,13 +1,14 @@
|
|
|
1
|
-
#
|
|
2
|
-
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "spec_helper"
|
|
3
4
|
|
|
4
5
|
# based on v0.96 http://www.cdlib.org/inside/diglib/bagit/bagitspec.html
|
|
5
6
|
RSpec.describe BagIt::Bag do
|
|
6
|
-
describe
|
|
7
|
+
describe "empty bag" do
|
|
7
8
|
before do
|
|
8
9
|
@sandbox = Sandbox.new
|
|
9
10
|
# make the bag
|
|
10
|
-
@bag_path = File.join @sandbox.to_s,
|
|
11
|
+
@bag_path = File.join @sandbox.to_s, "the_bag"
|
|
11
12
|
@bag = described_class.new @bag_path
|
|
12
13
|
end
|
|
13
14
|
|
|
@@ -20,16 +21,16 @@ RSpec.describe BagIt::Bag do
|
|
|
20
21
|
end
|
|
21
22
|
end
|
|
22
23
|
|
|
23
|
-
describe
|
|
24
|
+
describe "bag with files" do
|
|
24
25
|
before do
|
|
25
26
|
@sandbox = Sandbox.new
|
|
26
27
|
|
|
27
28
|
# make the bag
|
|
28
|
-
@bag_path = File.join @sandbox.to_s,
|
|
29
|
+
@bag_path = File.join @sandbox.to_s, "the_bag"
|
|
29
30
|
@bag = described_class.new @bag_path
|
|
30
31
|
|
|
31
32
|
# add some files
|
|
32
|
-
File.open(
|
|
33
|
+
File.open("/dev/urandom") do |rio|
|
|
33
34
|
10.times do |n|
|
|
34
35
|
@bag.add_file("file-#{n}-💩
|
|
35
36
|
") { |io| io.write rio.read(16) }
|
|
@@ -50,39 +51,39 @@ RSpec.describe BagIt::Bag do
|
|
|
50
51
|
end
|
|
51
52
|
|
|
52
53
|
it "has a sub-directory called data" do
|
|
53
|
-
data_path = File.join @bag_path,
|
|
54
|
+
data_path = File.join @bag_path, "data"
|
|
54
55
|
expect(File.directory?(data_path)).to be true
|
|
55
56
|
end
|
|
56
57
|
|
|
57
58
|
describe "#add_file" do
|
|
58
59
|
it "allows addition of files via io" do
|
|
59
|
-
@bag.add_file("foo") { |io| io.puts
|
|
60
|
+
@bag.add_file("foo") { |io| io.puts "all alone" }
|
|
60
61
|
expect(File.join(@bag_path, "data", "foo")).to exist_on_fs
|
|
61
62
|
end
|
|
62
63
|
|
|
63
64
|
it "allows addition of files via copy" do
|
|
64
|
-
src_path = File.join @sandbox.to_s,
|
|
65
|
-
File.open(src_path,
|
|
66
|
-
@bag.add_file("foo", src_path) { |io| io.puts
|
|
65
|
+
src_path = File.join @sandbox.to_s, "somefile"
|
|
66
|
+
File.open(src_path, "w") { |io| io.puts "something" }
|
|
67
|
+
@bag.add_file("foo", src_path) { |io| io.puts "all alone" }
|
|
67
68
|
expect(File.join(@bag_path, "data", "foo")).to exist_on_fs
|
|
68
69
|
end
|
|
69
70
|
|
|
70
71
|
it "allows addition of files with deep paths" do
|
|
71
|
-
@bag.add_file("deep/dir/structure/file") { |io| io.puts
|
|
72
|
+
@bag.add_file("deep/dir/structure/file") { |io| io.puts "all alone" }
|
|
72
73
|
expect(File.join(@bag_path, "data", "deep/dir/structure/file")).to exist_on_fs
|
|
73
74
|
end
|
|
74
75
|
|
|
75
76
|
it "does not allow overwriting of files" do
|
|
76
|
-
expect
|
|
77
|
+
expect {
|
|
77
78
|
@bag.add_file("file-0-💩
|
|
78
|
-
") { |io| io.puts
|
|
79
|
-
|
|
79
|
+
") { |io| io.puts "overwrite!" }
|
|
80
|
+
}.to raise_error(RuntimeError)
|
|
80
81
|
end
|
|
81
82
|
|
|
82
83
|
it "updates the payload oxum" do
|
|
83
|
-
oxum_count = @bag.bag_info["Payload-Oxum"].split(
|
|
84
|
-
@bag.add_file("foo") { |io| io.puts
|
|
85
|
-
expect(@bag.bag_info["Payload-Oxum"].split(
|
|
84
|
+
oxum_count = @bag.bag_info["Payload-Oxum"].split(".")[1].to_i
|
|
85
|
+
@bag.add_file("foo") { |io| io.puts "all alone" }
|
|
86
|
+
expect(@bag.bag_info["Payload-Oxum"].split(".")[1].to_i).to eq(oxum_count + 1)
|
|
86
87
|
end
|
|
87
88
|
end
|
|
88
89
|
|
|
@@ -95,14 +96,14 @@ RSpec.describe BagIt::Bag do
|
|
|
95
96
|
describe "#get" do
|
|
96
97
|
describe "file not in bag" do
|
|
97
98
|
it "returns nil" do
|
|
98
|
-
expect(@bag.get(
|
|
99
|
+
expect(@bag.get("foobar")).to be_nil
|
|
99
100
|
end
|
|
100
101
|
end
|
|
101
102
|
|
|
102
103
|
describe "file in bag" do
|
|
103
104
|
before do
|
|
104
|
-
@contents =
|
|
105
|
-
@bag.add_file("foo") { |io| io <<
|
|
105
|
+
@contents = "all alone"
|
|
106
|
+
@bag.add_file("foo") { |io| io << "all alone" }
|
|
106
107
|
@file = @bag.get("foo")
|
|
107
108
|
end
|
|
108
109
|
|
|
@@ -135,10 +136,10 @@ RSpec.describe BagIt::Bag do
|
|
|
135
136
|
end
|
|
136
137
|
|
|
137
138
|
it "returns relative paths to all files in the data directory" do
|
|
138
|
-
expect(@paths).to match_array((0..9).collect
|
|
139
|
+
expect(@paths).to match_array((0..9).collect { |x|
|
|
139
140
|
"file-#{x}-💩
|
|
140
141
|
"
|
|
141
|
-
|
|
142
|
+
})
|
|
142
143
|
end
|
|
143
144
|
end
|
|
144
145
|
|
|
@@ -148,19 +149,19 @@ RSpec.describe BagIt::Bag do
|
|
|
148
149
|
end
|
|
149
150
|
|
|
150
151
|
it "accurately specifys the number of payload files" do
|
|
151
|
-
@bag.add_tag_file(
|
|
152
|
-
@bag.payload_oxum.split(
|
|
152
|
+
@bag.add_tag_file("non-payload") { |f| f.puts "I shouldn't count in the oxum" }
|
|
153
|
+
@bag.payload_oxum.split(".")[1] == @bag.bag_files.count
|
|
153
154
|
end
|
|
154
155
|
end
|
|
155
156
|
|
|
156
157
|
describe "#gc!" do
|
|
157
158
|
it "cleans up empty directories" do
|
|
158
159
|
f = File.join "1", "2", "3", "file"
|
|
159
|
-
@bag.add_file(f) { |io| io.puts
|
|
160
|
+
@bag.add_file(f) { |io| io.puts "all alone" }
|
|
160
161
|
@bag.remove_file f
|
|
161
|
-
expect(File.exist?(File.dirname(File.join(@bag_path,
|
|
162
|
+
expect(File.exist?(File.dirname(File.join(@bag_path, "data", f)))).to be true
|
|
162
163
|
@bag.gc!
|
|
163
|
-
expect(File.exist?(File.dirname(File.join(@bag_path,
|
|
164
|
+
expect(File.exist?(File.dirname(File.join(@bag_path, "data", f)))).to be false
|
|
164
165
|
end
|
|
165
166
|
end
|
|
166
167
|
end
|
data/spec/fetch_spec.rb
CHANGED
|
@@ -1,15 +1,16 @@
|
|
|
1
|
-
#
|
|
2
|
-
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "spec_helper"
|
|
3
4
|
|
|
4
5
|
describe BagIt::Bag do
|
|
5
6
|
before do
|
|
6
7
|
@sandbox = Sandbox.new
|
|
7
8
|
# make the bag
|
|
8
|
-
@bag_path = File.join @sandbox.to_s,
|
|
9
|
+
@bag_path = File.join @sandbox.to_s, "the_bag"
|
|
9
10
|
@bag = described_class.new(@bag_path)
|
|
10
11
|
|
|
11
12
|
# add some files
|
|
12
|
-
File.open(
|
|
13
|
+
File.open("/dev/urandom") do |rio|
|
|
13
14
|
10.times do |n|
|
|
14
15
|
@bag.add_file("file-#{n}-💩
|
|
15
16
|
end
|
|
@@ -21,11 +22,11 @@ describe BagIt::Bag do
|
|
|
21
22
|
end
|
|
22
23
|
|
|
23
24
|
before do
|
|
24
|
-
@bag.add_remote_file(
|
|
25
|
-
|
|
26
|
-
|
|
25
|
+
@bag.add_remote_file("http://www.gnu.org/graphics/heckert_gnu.small.png", "gnu.png", 6322,
|
|
26
|
+
"390c0a30976f899cbdf951eab5cce60fe9743ac9",
|
|
27
|
+
"a3bd7ab2442028bb91b51d9f6722ec98")
|
|
27
28
|
|
|
28
|
-
path = File.join @bag_path,
|
|
29
|
+
path = File.join @bag_path, "fetch.txt"
|
|
29
30
|
@lines = File.open(path, &:readlines)
|
|
30
31
|
end
|
|
31
32
|
|
|
@@ -34,18 +35,18 @@ describe BagIt::Bag do
|
|
|
34
35
|
end
|
|
35
36
|
|
|
36
37
|
it "only contains lines of the format URL LENGTH FILENAME" do
|
|
37
|
-
@lines.each { |line| expect(line.chomp).to match(/^[^\s]+\s+(\d
|
|
38
|
+
@lines.each { |line| expect(line.chomp).to match(/^[^\s]+\s+(\d+|-)\s+[^\s]+$/) }
|
|
38
39
|
end
|
|
39
40
|
|
|
40
41
|
it "contains manifested files" do
|
|
41
|
-
path = File.join @bag_path,
|
|
42
|
+
path = File.join @bag_path, "manifest-sha1.txt"
|
|
42
43
|
data = File.open(path, &:read)
|
|
43
|
-
expect(data).to include(
|
|
44
|
+
expect(data).to include("gnu.png")
|
|
44
45
|
end
|
|
45
46
|
|
|
46
47
|
it "is gone when fetch is complete" do
|
|
47
48
|
@bag.fetch!
|
|
48
|
-
expect(File.exist?(File.join(@bag_path,
|
|
49
|
+
expect(File.exist?(File.join(@bag_path, "fetch.txt"))).not_to be true
|
|
49
50
|
end
|
|
50
51
|
end
|
|
51
52
|
end
|