bagit 0.4.3 → 0.4.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.gitignore +2 -1
- data/Gemfile +3 -1
- data/Rakefile +10 -13
- data/bagit.gemspec +24 -23
- data/bin/bagit +22 -32
- data/lib/bagit.rb +8 -6
- data/lib/bagit/bag.rb +21 -19
- data/lib/bagit/fetch.rb +10 -8
- data/lib/bagit/file.rb +2 -0
- data/lib/bagit/info.rb +45 -43
- data/lib/bagit/manifest.rb +25 -23
- data/lib/bagit/string.rb +4 -2
- data/lib/bagit/valid.rb +45 -43
- data/lib/bagit/version.rb +3 -1
- data/spec/bagit_spec.rb +30 -29
- data/spec/fetch_spec.rb +13 -12
- data/spec/manifest_spec.rb +14 -9
- data/spec/spec_helper.rb +10 -8
- data/spec/tag_info_spec.rb +31 -30
- data/spec/tag_spec.rb +11 -10
- data/spec/util/bagit_matchers.rb +2 -0
- data/spec/validation_spec.rb +23 -22
- metadata +18 -20
- data/.rubocop.yml +0 -22
data/lib/bagit/manifest.rb
CHANGED
@@ -1,21 +1,23 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
require
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "pathname"
|
4
|
+
require "digest/sha1"
|
5
|
+
require "digest/md5"
|
4
6
|
|
5
7
|
module BagIt
|
6
8
|
# Requires response to bag_dir, tag_files, bag_files
|
7
9
|
module Manifest
|
8
10
|
def encode_filename(s)
|
9
|
-
s = s.gsub(/\r/,
|
10
|
-
s = s.gsub(/\n/,
|
11
|
+
s = s.gsub(/\r/, "%0D")
|
12
|
+
s = s.gsub(/\n/, "%0A")
|
11
13
|
s
|
12
14
|
end
|
13
15
|
|
14
16
|
# All tag files that are bag manifest files (manifest-[algorithm].txt)
|
15
17
|
def manifest_files
|
16
|
-
files = Dir[File.join(@bag_dir,
|
18
|
+
files = Dir[File.join(@bag_dir, "*")].select { |f|
|
17
19
|
File.file?(f) && File.basename(f) =~ /^manifest-.*.txt$/
|
18
|
-
|
20
|
+
}
|
19
21
|
files
|
20
22
|
end
|
21
23
|
|
@@ -25,7 +27,7 @@ module BagIt
|
|
25
27
|
end
|
26
28
|
|
27
29
|
# Generate manifest files for all the bag files
|
28
|
-
def manifest!(algo:
|
30
|
+
def manifest!(algo: "default")
|
29
31
|
# nuke all the existing manifest files
|
30
32
|
manifest_files.each { |f| FileUtils.rm f }
|
31
33
|
|
@@ -40,15 +42,15 @@ module BagIt
|
|
40
42
|
|
41
43
|
def write_checksum(checksum_algo:, relative_path:, file:)
|
42
44
|
case checksum_algo
|
43
|
-
when
|
45
|
+
when "sha1"
|
44
46
|
write_sha1(file, relative_path)
|
45
|
-
when
|
47
|
+
when "md5"
|
46
48
|
write_md5(file, relative_path)
|
47
|
-
when
|
49
|
+
when "sha256"
|
48
50
|
write_sha256(file, relative_path)
|
49
|
-
when
|
51
|
+
when "sha512"
|
50
52
|
write_sha256(file, relative_path)
|
51
|
-
when
|
53
|
+
when "default"
|
52
54
|
write_sha1(file, relative_path)
|
53
55
|
write_md5(file, relative_path)
|
54
56
|
end
|
@@ -56,29 +58,29 @@ module BagIt
|
|
56
58
|
|
57
59
|
def write_sha1(f, rel_path)
|
58
60
|
sha1 = Digest::SHA1.file f
|
59
|
-
File.open(manifest_file(:sha1),
|
61
|
+
File.open(manifest_file(:sha1), "a") { |io| io.puts "#{sha1} #{rel_path}" }
|
60
62
|
end
|
61
63
|
|
62
64
|
def write_md5(f, rel_path)
|
63
65
|
md5 = Digest::MD5.file f
|
64
|
-
File.open(manifest_file(:md5),
|
66
|
+
File.open(manifest_file(:md5), "a") { |io| io.puts "#{md5} #{rel_path}" }
|
65
67
|
end
|
66
68
|
|
67
69
|
def write_sha256(f, rel_path)
|
68
70
|
sha256 = Digest::SHA256.file f
|
69
|
-
File.open(manifest_file(:sha256),
|
71
|
+
File.open(manifest_file(:sha256), "a") { |io| io.puts "#{sha256} #{rel_path}" }
|
70
72
|
end
|
71
73
|
|
72
74
|
def write_sha512(f, rel_path)
|
73
75
|
sha512 = Digest::SHA512.file f
|
74
|
-
File.open(manifest_file(:sha512),
|
76
|
+
File.open(manifest_file(:sha512), "a") { |io| io.puts "#{sha512} #{rel_path}" }
|
75
77
|
end
|
76
78
|
|
77
79
|
# All tag files that are bag manifest files (tagmanifest-[algorithm].txt)
|
78
80
|
def tagmanifest_files
|
79
|
-
files = Dir[File.join(@bag_dir,
|
81
|
+
files = Dir[File.join(@bag_dir, "*")].select { |f|
|
80
82
|
File.file?(f) && File.basename(f) =~ /^tagmanifest-.*.txt$/
|
81
|
-
|
83
|
+
}
|
82
84
|
files
|
83
85
|
end
|
84
86
|
|
@@ -120,7 +122,7 @@ module BagIt
|
|
120
122
|
|
121
123
|
# write file
|
122
124
|
if src_path.nil?
|
123
|
-
File.open(f,
|
125
|
+
File.open(f, "w") { |io| yield io }
|
124
126
|
else
|
125
127
|
FileUtils.cp src_path, f
|
126
128
|
end
|
@@ -137,11 +139,11 @@ module BagIt
|
|
137
139
|
|
138
140
|
# sha1
|
139
141
|
sha1 = Digest::SHA1.hexdigest data
|
140
|
-
File.open(tagmanifest_file(:sha1),
|
142
|
+
File.open(tagmanifest_file(:sha1), "a") { |io| io.puts "#{sha1} #{rel_path}" }
|
141
143
|
|
142
144
|
# md5
|
143
145
|
md5 = Digest::MD5.hexdigest data
|
144
|
-
File.open(tagmanifest_file(:md5),
|
146
|
+
File.open(tagmanifest_file(:md5), "a") { |io| io.puts "#{md5} #{rel_path}" }
|
145
147
|
tag_files
|
146
148
|
end
|
147
149
|
|
@@ -173,7 +175,7 @@ module BagIt
|
|
173
175
|
Digest::MD5
|
174
176
|
else
|
175
177
|
:unknown
|
176
|
-
|
178
|
+
end
|
177
179
|
|
178
180
|
# check it, an unknown algorithm is always true
|
179
181
|
if algo == :unknown
|
data/lib/bagit/string.rb
CHANGED
@@ -1,9 +1,11 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
# Some mixed in functionality for String
|
2
4
|
class String
|
3
5
|
# Wrap a string to lines of a specified width. All existing newlines
|
4
6
|
# are not guaranteed to be preserved
|
5
7
|
def wrap(width)
|
6
|
-
s = gsub(/\s+/,
|
8
|
+
s = gsub(/\s+/, " ").strip
|
7
9
|
|
8
10
|
if s.length > width
|
9
11
|
s[0...width] + '\n' + s[width..-1].wrap(width)
|
@@ -14,7 +16,7 @@ class String
|
|
14
16
|
|
15
17
|
# Indent each line of a string by n spaces
|
16
18
|
def indent(n)
|
17
|
-
indent =
|
19
|
+
indent = " " * n
|
18
20
|
gsub '\n', "\n#{indent}"
|
19
21
|
end
|
20
22
|
|
data/lib/bagit/valid.rb
CHANGED
@@ -1,7 +1,9 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
require
|
4
|
-
require
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "validatable"
|
4
|
+
require "open-uri"
|
5
|
+
require "cgi"
|
6
|
+
require "logger"
|
5
7
|
|
6
8
|
module BagIt
|
7
9
|
class Bag
|
@@ -12,8 +14,8 @@ module BagIt
|
|
12
14
|
|
13
15
|
module Validity
|
14
16
|
def decode_filename(s)
|
15
|
-
s = s.gsub(
|
16
|
-
s = s.gsub(
|
17
|
+
s = s.gsub("%0D", "\r")
|
18
|
+
s = s.gsub("%0A", "\n")
|
17
19
|
s
|
18
20
|
end
|
19
21
|
|
@@ -88,52 +90,52 @@ module BagIt
|
|
88
90
|
|
89
91
|
protected
|
90
92
|
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
93
|
+
# Returns all files in the instance that are not manifested
|
94
|
+
def unmanifested_files
|
95
|
+
mfs = manifested_files.map { |f| File.join bag_dir, f }
|
96
|
+
bag_files.reject { |f| mfs.member? f }
|
97
|
+
end
|
96
98
|
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
99
|
+
# Returns a list of manifested files that are not present
|
100
|
+
def empty_manifests
|
101
|
+
bfs = bag_files
|
102
|
+
manifested_files.reject { |f| bfs.member? File.join(bag_dir, f) }
|
103
|
+
end
|
102
104
|
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
end
|
109
|
-
empty
|
105
|
+
# Returns a list of tag manifested files that are not present
|
106
|
+
def tag_empty_manifests
|
107
|
+
empty = []
|
108
|
+
tag_manifested_files.each do |f|
|
109
|
+
empty.push f unless File.exist?(File.join(bag_dir, f))
|
110
110
|
end
|
111
|
+
empty
|
112
|
+
end
|
111
113
|
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
end
|
114
|
+
# Returns a list of all files present in the manifest files
|
115
|
+
def manifested_files
|
116
|
+
manifest_files.inject([]) do |acc, mf|
|
117
|
+
files = File.open(mf) { |io|
|
118
|
+
io.readlines.map do |line|
|
119
|
+
_digest, path = line.chomp.split(/\s+/, 2)
|
120
|
+
decode_filename(path)
|
120
121
|
end
|
122
|
+
}
|
121
123
|
|
122
|
-
|
123
|
-
end
|
124
|
+
(acc + files).uniq
|
124
125
|
end
|
126
|
+
end
|
125
127
|
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
end
|
128
|
+
# Returns a list of all files in the tag manifest files
|
129
|
+
def tag_manifested_files
|
130
|
+
tagmanifest_files.inject([]) do |acc, mf|
|
131
|
+
files = File.open(mf) { |io|
|
132
|
+
io.readlines.map do |line|
|
133
|
+
_digest, path = line.chomp.split(/\s+/, 2)
|
134
|
+
path
|
134
135
|
end
|
135
|
-
|
136
|
-
|
136
|
+
}
|
137
|
+
(acc + files).uniq
|
137
138
|
end
|
139
|
+
end
|
138
140
|
end
|
139
141
|
end
|
data/lib/bagit/version.rb
CHANGED
data/spec/bagit_spec.rb
CHANGED
@@ -1,13 +1,14 @@
|
|
1
|
-
#
|
2
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "spec_helper"
|
3
4
|
|
4
5
|
# based on v0.96 http://www.cdlib.org/inside/diglib/bagit/bagitspec.html
|
5
6
|
RSpec.describe BagIt::Bag do
|
6
|
-
describe
|
7
|
+
describe "empty bag" do
|
7
8
|
before do
|
8
9
|
@sandbox = Sandbox.new
|
9
10
|
# make the bag
|
10
|
-
@bag_path = File.join @sandbox.to_s,
|
11
|
+
@bag_path = File.join @sandbox.to_s, "the_bag"
|
11
12
|
@bag = described_class.new @bag_path
|
12
13
|
end
|
13
14
|
|
@@ -20,16 +21,16 @@ RSpec.describe BagIt::Bag do
|
|
20
21
|
end
|
21
22
|
end
|
22
23
|
|
23
|
-
describe
|
24
|
+
describe "bag with files" do
|
24
25
|
before do
|
25
26
|
@sandbox = Sandbox.new
|
26
27
|
|
27
28
|
# make the bag
|
28
|
-
@bag_path = File.join @sandbox.to_s,
|
29
|
+
@bag_path = File.join @sandbox.to_s, "the_bag"
|
29
30
|
@bag = described_class.new @bag_path
|
30
31
|
|
31
32
|
# add some files
|
32
|
-
File.open(
|
33
|
+
File.open("/dev/urandom") do |rio|
|
33
34
|
10.times do |n|
|
34
35
|
@bag.add_file("file-#{n}-💩
|
35
36
|
") { |io| io.write rio.read(16) }
|
@@ -50,39 +51,39 @@ RSpec.describe BagIt::Bag do
|
|
50
51
|
end
|
51
52
|
|
52
53
|
it "has a sub-directory called data" do
|
53
|
-
data_path = File.join @bag_path,
|
54
|
+
data_path = File.join @bag_path, "data"
|
54
55
|
expect(File.directory?(data_path)).to be true
|
55
56
|
end
|
56
57
|
|
57
58
|
describe "#add_file" do
|
58
59
|
it "allows addition of files via io" do
|
59
|
-
@bag.add_file("foo") { |io| io.puts
|
60
|
+
@bag.add_file("foo") { |io| io.puts "all alone" }
|
60
61
|
expect(File.join(@bag_path, "data", "foo")).to exist_on_fs
|
61
62
|
end
|
62
63
|
|
63
64
|
it "allows addition of files via copy" do
|
64
|
-
src_path = File.join @sandbox.to_s,
|
65
|
-
File.open(src_path,
|
66
|
-
@bag.add_file("foo", src_path) { |io| io.puts
|
65
|
+
src_path = File.join @sandbox.to_s, "somefile"
|
66
|
+
File.open(src_path, "w") { |io| io.puts "something" }
|
67
|
+
@bag.add_file("foo", src_path) { |io| io.puts "all alone" }
|
67
68
|
expect(File.join(@bag_path, "data", "foo")).to exist_on_fs
|
68
69
|
end
|
69
70
|
|
70
71
|
it "allows addition of files with deep paths" do
|
71
|
-
@bag.add_file("deep/dir/structure/file") { |io| io.puts
|
72
|
+
@bag.add_file("deep/dir/structure/file") { |io| io.puts "all alone" }
|
72
73
|
expect(File.join(@bag_path, "data", "deep/dir/structure/file")).to exist_on_fs
|
73
74
|
end
|
74
75
|
|
75
76
|
it "does not allow overwriting of files" do
|
76
|
-
expect
|
77
|
+
expect {
|
77
78
|
@bag.add_file("file-0-💩
|
78
|
-
") { |io| io.puts
|
79
|
-
|
79
|
+
") { |io| io.puts "overwrite!" }
|
80
|
+
}.to raise_error(RuntimeError)
|
80
81
|
end
|
81
82
|
|
82
83
|
it "updates the payload oxum" do
|
83
|
-
oxum_count = @bag.bag_info["Payload-Oxum"].split(
|
84
|
-
@bag.add_file("foo") { |io| io.puts
|
85
|
-
expect(@bag.bag_info["Payload-Oxum"].split(
|
84
|
+
oxum_count = @bag.bag_info["Payload-Oxum"].split(".")[1].to_i
|
85
|
+
@bag.add_file("foo") { |io| io.puts "all alone" }
|
86
|
+
expect(@bag.bag_info["Payload-Oxum"].split(".")[1].to_i).to eq(oxum_count + 1)
|
86
87
|
end
|
87
88
|
end
|
88
89
|
|
@@ -95,14 +96,14 @@ RSpec.describe BagIt::Bag do
|
|
95
96
|
describe "#get" do
|
96
97
|
describe "file not in bag" do
|
97
98
|
it "returns nil" do
|
98
|
-
expect(@bag.get(
|
99
|
+
expect(@bag.get("foobar")).to be_nil
|
99
100
|
end
|
100
101
|
end
|
101
102
|
|
102
103
|
describe "file in bag" do
|
103
104
|
before do
|
104
|
-
@contents =
|
105
|
-
@bag.add_file("foo") { |io| io <<
|
105
|
+
@contents = "all alone"
|
106
|
+
@bag.add_file("foo") { |io| io << "all alone" }
|
106
107
|
@file = @bag.get("foo")
|
107
108
|
end
|
108
109
|
|
@@ -135,10 +136,10 @@ RSpec.describe BagIt::Bag do
|
|
135
136
|
end
|
136
137
|
|
137
138
|
it "returns relative paths to all files in the data directory" do
|
138
|
-
expect(@paths).to match_array((0..9).collect
|
139
|
+
expect(@paths).to match_array((0..9).collect { |x|
|
139
140
|
"file-#{x}-💩
|
140
141
|
"
|
141
|
-
|
142
|
+
})
|
142
143
|
end
|
143
144
|
end
|
144
145
|
|
@@ -148,19 +149,19 @@ RSpec.describe BagIt::Bag do
|
|
148
149
|
end
|
149
150
|
|
150
151
|
it "accurately specifys the number of payload files" do
|
151
|
-
@bag.add_tag_file(
|
152
|
-
@bag.payload_oxum.split(
|
152
|
+
@bag.add_tag_file("non-payload") { |f| f.puts "I shouldn't count in the oxum" }
|
153
|
+
@bag.payload_oxum.split(".")[1] == @bag.bag_files.count
|
153
154
|
end
|
154
155
|
end
|
155
156
|
|
156
157
|
describe "#gc!" do
|
157
158
|
it "cleans up empty directories" do
|
158
159
|
f = File.join "1", "2", "3", "file"
|
159
|
-
@bag.add_file(f) { |io| io.puts
|
160
|
+
@bag.add_file(f) { |io| io.puts "all alone" }
|
160
161
|
@bag.remove_file f
|
161
|
-
expect(File.exist?(File.dirname(File.join(@bag_path,
|
162
|
+
expect(File.exist?(File.dirname(File.join(@bag_path, "data", f)))).to be true
|
162
163
|
@bag.gc!
|
163
|
-
expect(File.exist?(File.dirname(File.join(@bag_path,
|
164
|
+
expect(File.exist?(File.dirname(File.join(@bag_path, "data", f)))).to be false
|
164
165
|
end
|
165
166
|
end
|
166
167
|
end
|
data/spec/fetch_spec.rb
CHANGED
@@ -1,15 +1,16 @@
|
|
1
|
-
#
|
2
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "spec_helper"
|
3
4
|
|
4
5
|
describe BagIt::Bag do
|
5
6
|
before do
|
6
7
|
@sandbox = Sandbox.new
|
7
8
|
# make the bag
|
8
|
-
@bag_path = File.join @sandbox.to_s,
|
9
|
+
@bag_path = File.join @sandbox.to_s, "the_bag"
|
9
10
|
@bag = described_class.new(@bag_path)
|
10
11
|
|
11
12
|
# add some files
|
12
|
-
File.open(
|
13
|
+
File.open("/dev/urandom") do |rio|
|
13
14
|
10.times do |n|
|
14
15
|
@bag.add_file("file-#{n}-💩
|
15
16
|
end
|
@@ -21,11 +22,11 @@ describe BagIt::Bag do
|
|
21
22
|
end
|
22
23
|
|
23
24
|
before do
|
24
|
-
@bag.add_remote_file(
|
25
|
-
|
26
|
-
|
25
|
+
@bag.add_remote_file("http://www.gnu.org/graphics/heckert_gnu.small.png", "gnu.png", 6322,
|
26
|
+
"390c0a30976f899cbdf951eab5cce60fe9743ac9",
|
27
|
+
"a3bd7ab2442028bb91b51d9f6722ec98")
|
27
28
|
|
28
|
-
path = File.join @bag_path,
|
29
|
+
path = File.join @bag_path, "fetch.txt"
|
29
30
|
@lines = File.open(path, &:readlines)
|
30
31
|
end
|
31
32
|
|
@@ -34,18 +35,18 @@ describe BagIt::Bag do
|
|
34
35
|
end
|
35
36
|
|
36
37
|
it "only contains lines of the format URL LENGTH FILENAME" do
|
37
|
-
@lines.each { |line| expect(line.chomp).to match(/^[^\s]+\s+(\d
|
38
|
+
@lines.each { |line| expect(line.chomp).to match(/^[^\s]+\s+(\d+|-)\s+[^\s]+$/) }
|
38
39
|
end
|
39
40
|
|
40
41
|
it "contains manifested files" do
|
41
|
-
path = File.join @bag_path,
|
42
|
+
path = File.join @bag_path, "manifest-sha1.txt"
|
42
43
|
data = File.open(path, &:read)
|
43
|
-
expect(data).to include(
|
44
|
+
expect(data).to include("gnu.png")
|
44
45
|
end
|
45
46
|
|
46
47
|
it "is gone when fetch is complete" do
|
47
48
|
@bag.fetch!
|
48
|
-
expect(File.exist?(File.join(@bag_path,
|
49
|
+
expect(File.exist?(File.join(@bag_path, "fetch.txt"))).not_to be true
|
49
50
|
end
|
50
51
|
end
|
51
52
|
end
|