bagit 0.3.5 → 0.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.gitignore +2 -1
- data/.travis.yml +1 -2
- data/Gemfile +4 -2
- data/README.md +1 -1
- data/Rakefile +11 -8
- data/bagit.gemspec +25 -21
- data/bin/bagit +59 -63
- data/lib/bagit.rb +8 -6
- data/lib/bagit/bag.rb +43 -47
- data/lib/bagit/fetch.rb +23 -27
- data/lib/bagit/file.rb +11 -14
- data/lib/bagit/info.rb +39 -51
- data/lib/bagit/manifest.rb +72 -49
- data/lib/bagit/string.rb +6 -6
- data/lib/bagit/valid.rb +51 -57
- data/lib/bagit/version.rb +3 -1
- data/spec/bagit_spec.rb +59 -54
- data/spec/fetch_spec.rb +33 -38
- data/spec/manifest_spec.rb +107 -111
- data/spec/spec_helper.rb +12 -12
- data/spec/tag_info_spec.rb +101 -108
- data/spec/tag_spec.rb +47 -49
- data/spec/util/bagit_matchers.rb +5 -14
- data/spec/validation_spec.rb +108 -110
- metadata +50 -9
data/lib/bagit/string.rb
CHANGED
@@ -1,27 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
# Some mixed in functionality for String
|
2
4
|
class String
|
3
|
-
|
4
5
|
# Wrap a string to lines of a specified width. All existing newlines
|
5
6
|
# are not guaranteed to be preserved
|
6
7
|
def wrap(width)
|
7
|
-
s = gsub(/\s+/,
|
8
|
+
s = gsub(/\s+/, " ").strip
|
8
9
|
|
9
10
|
if s.length > width
|
10
11
|
s[0...width] + '\n' + s[width..-1].wrap(width)
|
11
12
|
else
|
12
13
|
s
|
13
14
|
end
|
14
|
-
|
15
15
|
end
|
16
16
|
|
17
17
|
# Indent each line of a string by n spaces
|
18
18
|
def indent(n)
|
19
|
-
indent =
|
19
|
+
indent = " " * n
|
20
20
|
gsub '\n', "\n#{indent}"
|
21
21
|
end
|
22
22
|
|
23
|
-
# Colorize logs
|
24
|
-
|
23
|
+
# Colorize logs
|
24
|
+
def color(color_code)
|
25
25
|
"\e[#{color_code}m#{self}\e[0m"
|
26
26
|
end
|
27
27
|
|
data/lib/bagit/valid.rb
CHANGED
@@ -1,32 +1,31 @@
|
|
1
|
-
|
2
|
-
require 'open-uri'
|
3
|
-
require 'cgi'
|
4
|
-
require 'logger'
|
1
|
+
# frozen_string_literal: true
|
5
2
|
|
6
|
-
|
3
|
+
require "validatable"
|
4
|
+
require "open-uri"
|
5
|
+
require "cgi"
|
6
|
+
require "logger"
|
7
7
|
|
8
|
+
module BagIt
|
8
9
|
class Bag
|
9
10
|
include Validatable
|
10
|
-
validates_true_for :consistency, :
|
11
|
-
validates_true_for :completeness, :
|
11
|
+
validates_true_for :consistency, logic: proc { consistent? }
|
12
|
+
validates_true_for :completeness, logic: proc { complete? }
|
12
13
|
end
|
13
14
|
|
14
15
|
module Validity
|
15
16
|
def decode_filename(s)
|
16
|
-
s = s.gsub(
|
17
|
-
s = s.gsub(
|
18
|
-
|
17
|
+
s = s.gsub("%0D", "\r")
|
18
|
+
s = s.gsub("%0A", "\n")
|
19
|
+
s
|
19
20
|
end
|
20
|
-
|
21
|
+
|
21
22
|
# Return true if the manifest cover all files and all files are
|
22
23
|
# covered.
|
23
24
|
def complete?
|
24
25
|
logger = Logger.new(STDOUT)
|
25
26
|
|
26
|
-
if manifest_files == []
|
27
|
-
|
28
|
-
end
|
29
|
-
|
27
|
+
errors.add :completeness, "there are no manifest files" if manifest_files == []
|
28
|
+
|
30
29
|
unmanifested_files.each do |file|
|
31
30
|
logger.error("#{file} is present but not manifested".red)
|
32
31
|
errors.add :completeness, "#{file} is present but not manifested"
|
@@ -44,38 +43,39 @@ module BagIt
|
|
44
43
|
errors.on(:completeness).nil?
|
45
44
|
end
|
46
45
|
|
46
|
+
def manifest_type(type)
|
47
|
+
case type
|
48
|
+
when /sha1/i
|
49
|
+
Digest::SHA1
|
50
|
+
when /md5/i
|
51
|
+
Digest::MD5
|
52
|
+
when /sha256/i
|
53
|
+
Digest::SHA256
|
54
|
+
when /sha384/i
|
55
|
+
Digest::SHA384
|
56
|
+
when /sha512/i
|
57
|
+
Digest::SHA512
|
58
|
+
else
|
59
|
+
raise ArgumentError, "Algorithm #{manifest_type} is not supported."
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
47
63
|
# Return true if all manifested files message digests match.
|
48
64
|
def consistent?
|
49
|
-
(manifest_files|tagmanifest_files).each do |mf|
|
65
|
+
(manifest_files | tagmanifest_files).each do |mf|
|
50
66
|
# get the algorithm implementation
|
51
67
|
File.basename(mf) =~ /manifest-(.+).txt$/
|
52
|
-
|
53
|
-
algo =
|
54
|
-
when /sha1/i
|
55
|
-
Digest::SHA1
|
56
|
-
when /md5/i
|
57
|
-
Digest::MD5
|
58
|
-
when /sha256/i
|
59
|
-
Digest::SHA256
|
60
|
-
when /sha384/i
|
61
|
-
Digest::SHA384
|
62
|
-
when /sha512/i
|
63
|
-
Digest::SHA512
|
64
|
-
else
|
65
|
-
raise ArgumentError.new("Algorithm #{manifest_type} is not supported.")
|
66
|
-
end
|
68
|
+
type = Regexp.last_match(1)
|
69
|
+
algo = manifest_type(type)
|
67
70
|
# Check every file in the manifest
|
68
71
|
File.open(mf) do |io|
|
69
72
|
io.each_line do |line|
|
70
|
-
expected, path = line.chomp.split
|
73
|
+
expected, path = line.chomp.split(/\s+/, 2)
|
71
74
|
file = File.join(bag_dir, decode_filename(path))
|
72
75
|
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
errors.add :consistency, "expected #{file} to have #{algo}: #{expected}, actual is #{actual}"
|
77
|
-
end
|
78
|
-
end
|
76
|
+
next unless File.exist? file
|
77
|
+
actual = algo.file(file).hexdigest
|
78
|
+
errors.add :consistency, "expected #{file} to have #{algo}: #{expected}, actual is #{actual}" if expected.downcase != actual
|
79
79
|
end
|
80
80
|
end
|
81
81
|
end
|
@@ -89,7 +89,7 @@ module BagIt
|
|
89
89
|
end
|
90
90
|
|
91
91
|
protected
|
92
|
-
|
92
|
+
|
93
93
|
# Returns all files in the instance that are not manifested
|
94
94
|
def unmanifested_files
|
95
95
|
mfs = manifested_files.map { |f| File.join bag_dir, f }
|
@@ -101,47 +101,41 @@ module BagIt
|
|
101
101
|
bfs = bag_files
|
102
102
|
manifested_files.reject { |f| bfs.member? File.join(bag_dir, f) }
|
103
103
|
end
|
104
|
+
|
104
105
|
# Returns a list of tag manifested files that are not present
|
105
106
|
def tag_empty_manifests
|
106
107
|
empty = []
|
107
108
|
tag_manifested_files.each do |f|
|
108
|
-
|
109
|
-
empty.push f
|
110
|
-
end
|
109
|
+
empty.push f unless File.exist?(File.join(bag_dir, f))
|
111
110
|
end
|
112
|
-
|
111
|
+
empty
|
113
112
|
end
|
113
|
+
|
114
114
|
# Returns a list of all files present in the manifest files
|
115
115
|
def manifested_files
|
116
|
-
|
117
116
|
manifest_files.inject([]) do |acc, mf|
|
118
|
-
|
119
|
-
files = File.open(mf) do |io|
|
120
|
-
|
117
|
+
files = File.open(mf) { |io|
|
121
118
|
io.readlines.map do |line|
|
122
|
-
|
119
|
+
_digest, path = line.chomp.split(/\s+/, 2)
|
123
120
|
decode_filename(path)
|
124
121
|
end
|
125
|
-
|
126
|
-
end
|
122
|
+
}
|
127
123
|
|
128
124
|
(acc + files).uniq
|
129
125
|
end
|
130
|
-
|
131
126
|
end
|
127
|
+
|
132
128
|
# Returns a list of all files in the tag manifest files
|
133
129
|
def tag_manifested_files
|
134
130
|
tagmanifest_files.inject([]) do |acc, mf|
|
135
|
-
files = File.open(mf)
|
131
|
+
files = File.open(mf) { |io|
|
136
132
|
io.readlines.map do |line|
|
137
|
-
|
133
|
+
_digest, path = line.chomp.split(/\s+/, 2)
|
138
134
|
path
|
139
135
|
end
|
140
|
-
|
141
|
-
(acc+files).uniq
|
136
|
+
}
|
137
|
+
(acc + files).uniq
|
142
138
|
end
|
143
139
|
end
|
144
|
-
|
145
140
|
end
|
146
|
-
|
147
141
|
end
|
data/lib/bagit/version.rb
CHANGED
data/spec/bagit_spec.rb
CHANGED
@@ -1,35 +1,36 @@
|
|
1
|
-
#
|
2
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "spec_helper"
|
3
4
|
|
4
5
|
# based on v0.96 http://www.cdlib.org/inside/diglib/bagit/bagitspec.html
|
5
|
-
describe BagIt::Bag do
|
6
|
-
describe
|
7
|
-
before
|
6
|
+
RSpec.describe BagIt::Bag do
|
7
|
+
describe "empty bag" do
|
8
|
+
before do
|
8
9
|
@sandbox = Sandbox.new
|
9
10
|
# make the bag
|
10
|
-
@bag_path = File.join @sandbox.to_s,
|
11
|
-
@bag =
|
11
|
+
@bag_path = File.join @sandbox.to_s, "the_bag"
|
12
|
+
@bag = described_class.new @bag_path
|
12
13
|
end
|
13
14
|
|
14
|
-
after
|
15
|
+
after do
|
15
16
|
@sandbox.cleanup!
|
16
17
|
end
|
17
18
|
|
18
|
-
it "
|
19
|
+
it "is empty" do
|
19
20
|
expect(@bag).to be_empty
|
20
21
|
end
|
21
22
|
end
|
22
23
|
|
23
|
-
describe
|
24
|
-
before
|
24
|
+
describe "bag with files" do
|
25
|
+
before do
|
25
26
|
@sandbox = Sandbox.new
|
26
27
|
|
27
28
|
# make the bag
|
28
|
-
@bag_path = File.join @sandbox.to_s,
|
29
|
-
@bag =
|
29
|
+
@bag_path = File.join @sandbox.to_s, "the_bag"
|
30
|
+
@bag = described_class.new @bag_path
|
30
31
|
|
31
32
|
# add some files
|
32
|
-
File.open(
|
33
|
+
File.open("/dev/urandom") do |rio|
|
33
34
|
10.times do |n|
|
34
35
|
@bag.add_file("file-#{n}-💩
|
35
36
|
") { |io| io.write rio.read(16) }
|
@@ -37,82 +38,84 @@ describe BagIt::Bag do
|
|
37
38
|
end
|
38
39
|
end
|
39
40
|
|
40
|
-
after
|
41
|
+
after do
|
41
42
|
@sandbox.cleanup!
|
42
43
|
end
|
43
44
|
|
44
|
-
it "
|
45
|
+
it "is a directory" do
|
45
46
|
expect(File.directory?(@bag_path)).to be true
|
46
47
|
end
|
47
48
|
|
48
|
-
it "
|
49
|
+
it "is not be empty" do
|
49
50
|
expect(@bag).not_to be_empty
|
50
51
|
end
|
51
52
|
|
52
|
-
it "
|
53
|
-
data_path = File.join @bag_path,
|
53
|
+
it "has a sub-directory called data" do
|
54
|
+
data_path = File.join @bag_path, "data"
|
54
55
|
expect(File.directory?(data_path)).to be true
|
55
56
|
end
|
56
57
|
|
57
58
|
describe "#add_file" do
|
58
|
-
it "
|
59
|
-
@bag.add_file("foo") { |io| io.puts
|
59
|
+
it "allows addition of files via io" do
|
60
|
+
@bag.add_file("foo") { |io| io.puts "all alone" }
|
60
61
|
expect(File.join(@bag_path, "data", "foo")).to exist_on_fs
|
61
62
|
end
|
62
63
|
|
63
|
-
it "
|
64
|
-
src_path = File.join @sandbox.to_s,
|
65
|
-
File.open(src_path,
|
66
|
-
@bag.add_file("foo", src_path) { |io| io.puts
|
64
|
+
it "allows addition of files via copy" do
|
65
|
+
src_path = File.join @sandbox.to_s, "somefile"
|
66
|
+
File.open(src_path, "w") { |io| io.puts "something" }
|
67
|
+
@bag.add_file("foo", src_path) { |io| io.puts "all alone" }
|
67
68
|
expect(File.join(@bag_path, "data", "foo")).to exist_on_fs
|
68
69
|
end
|
69
70
|
|
70
|
-
it "
|
71
|
-
@bag.add_file("deep/dir/structure/file") { |io| io.puts
|
71
|
+
it "allows addition of files with deep paths" do
|
72
|
+
@bag.add_file("deep/dir/structure/file") { |io| io.puts "all alone" }
|
72
73
|
expect(File.join(@bag_path, "data", "deep/dir/structure/file")).to exist_on_fs
|
73
74
|
end
|
74
75
|
|
75
|
-
it "
|
76
|
-
expect {
|
77
|
-
|
76
|
+
it "does not allow overwriting of files" do
|
77
|
+
expect {
|
78
|
+
@bag.add_file("file-0-💩
|
79
|
+
") { |io| io.puts "overwrite!" }
|
80
|
+
}.to raise_error(RuntimeError)
|
78
81
|
end
|
79
82
|
|
80
|
-
it "
|
81
|
-
oxum_count = @bag.bag_info["Payload-Oxum"].split(
|
82
|
-
@bag.add_file("foo") { |io| io.puts
|
83
|
-
expect(@bag.bag_info["Payload-Oxum"].split(
|
83
|
+
it "updates the payload oxum" do
|
84
|
+
oxum_count = @bag.bag_info["Payload-Oxum"].split(".")[1].to_i
|
85
|
+
@bag.add_file("foo") { |io| io.puts "all alone" }
|
86
|
+
expect(@bag.bag_info["Payload-Oxum"].split(".")[1].to_i).to eq(oxum_count + 1)
|
84
87
|
end
|
85
88
|
end
|
86
89
|
|
87
90
|
describe "#remove_file" do
|
88
|
-
it "
|
91
|
+
it "raises an error when deleing non existant files" do
|
89
92
|
expect { @bag.remove_file("file-x") }.to raise_error(RuntimeError)
|
90
93
|
end
|
91
94
|
end
|
92
95
|
|
93
96
|
describe "#get" do
|
94
97
|
describe "file not in bag" do
|
95
|
-
it "
|
96
|
-
expect(@bag.get(
|
98
|
+
it "returns nil" do
|
99
|
+
expect(@bag.get("foobar")).to be_nil
|
97
100
|
end
|
98
101
|
end
|
99
102
|
|
100
103
|
describe "file in bag" do
|
101
104
|
before do
|
102
|
-
@contents =
|
103
|
-
@bag.add_file("foo") { |io| io <<
|
105
|
+
@contents = "all alone"
|
106
|
+
@bag.add_file("foo") { |io| io << "all alone" }
|
104
107
|
@file = @bag.get("foo")
|
105
108
|
end
|
106
109
|
|
107
|
-
it "
|
110
|
+
it "returns an IO object for the given path" do
|
108
111
|
expect(@file).to be_a_kind_of(IO)
|
109
112
|
end
|
110
113
|
|
111
|
-
it "
|
114
|
+
it "has the same content as the file added" do
|
112
115
|
expect(@file.read).to eq(@contents)
|
113
116
|
end
|
114
117
|
|
115
|
-
it "
|
118
|
+
it "accepts an optional leading slash or ./" do
|
116
119
|
expect(@bag.get("/foo").read).to eq(@contents)
|
117
120
|
expect(@bag.get("./foo").read).to eq(@contents)
|
118
121
|
end
|
@@ -124,7 +127,7 @@ describe BagIt::Bag do
|
|
124
127
|
@paths = @bag.paths
|
125
128
|
end
|
126
129
|
|
127
|
-
it "
|
130
|
+
it "returns a non-empty Array of Strings" do
|
128
131
|
expect(@paths).to be_a_kind_of(Array)
|
129
132
|
expect(@paths).not_to be_empty
|
130
133
|
@paths.each do |p|
|
@@ -132,31 +135,33 @@ describe BagIt::Bag do
|
|
132
135
|
end
|
133
136
|
end
|
134
137
|
|
135
|
-
it "
|
136
|
-
expect(@paths).to match_array((0..9).collect { |x|
|
137
|
-
"
|
138
|
+
it "returns relative paths to all files in the data directory" do
|
139
|
+
expect(@paths).to match_array((0..9).collect { |x|
|
140
|
+
"file-#{x}-💩
|
141
|
+
"
|
142
|
+
})
|
138
143
|
end
|
139
144
|
end
|
140
145
|
|
141
146
|
describe "#payload-oxum" do
|
142
|
-
it "
|
147
|
+
it "returns a valid oxum" do
|
143
148
|
expect(@bag.payload_oxum).to match(/^[0-9]+\.[0-9]+$/)
|
144
149
|
end
|
145
150
|
|
146
|
-
it "
|
147
|
-
@bag.add_tag_file(
|
148
|
-
@bag.payload_oxum.split(
|
151
|
+
it "accurately specifys the number of payload files" do
|
152
|
+
@bag.add_tag_file("non-payload") { |f| f.puts "I shouldn't count in the oxum" }
|
153
|
+
@bag.payload_oxum.split(".")[1] == @bag.bag_files.count
|
149
154
|
end
|
150
155
|
end
|
151
156
|
|
152
157
|
describe "#gc!" do
|
153
|
-
it "
|
158
|
+
it "cleans up empty directories" do
|
154
159
|
f = File.join "1", "2", "3", "file"
|
155
|
-
@bag.add_file(f) { |io| io.puts
|
160
|
+
@bag.add_file(f) { |io| io.puts "all alone" }
|
156
161
|
@bag.remove_file f
|
157
|
-
expect(File.exist?(File.dirname(File.join(@bag_path,
|
162
|
+
expect(File.exist?(File.dirname(File.join(@bag_path, "data", f)))).to be true
|
158
163
|
@bag.gc!
|
159
|
-
expect(File.exist?(File.dirname(File.join(@bag_path,
|
164
|
+
expect(File.exist?(File.dirname(File.join(@bag_path, "data", f)))).to be false
|
160
165
|
end
|
161
166
|
end
|
162
167
|
end
|
data/spec/fetch_spec.rb
CHANGED
@@ -1,57 +1,52 @@
|
|
1
|
-
#
|
2
|
-
require 'spec_helper'
|
1
|
+
# frozen_string_literal: true
|
3
2
|
|
4
|
-
|
5
|
-
|
6
|
-
before(:each) do
|
3
|
+
require "spec_helper"
|
7
4
|
|
5
|
+
describe BagIt::Bag do
|
6
|
+
before do
|
8
7
|
@sandbox = Sandbox.new
|
9
|
-
|
10
8
|
# make the bag
|
11
|
-
@bag_path = File.join @sandbox.to_s,
|
12
|
-
@bag =
|
9
|
+
@bag_path = File.join @sandbox.to_s, "the_bag"
|
10
|
+
@bag = described_class.new(@bag_path)
|
13
11
|
|
14
12
|
# add some files
|
15
|
-
File.open(
|
16
|
-
|
13
|
+
File.open("/dev/urandom") do |rio|
|
17
14
|
10.times do |n|
|
18
15
|
@bag.add_file("file-#{n}-💩
|
19
16
|
end
|
20
|
-
|
21
17
|
end
|
22
|
-
|
23
|
-
end
|
24
|
-
|
25
|
-
after(:each) do
|
26
|
-
@sandbox.cleanup!
|
27
18
|
end
|
19
|
+
describe "fetch.txt" do
|
20
|
+
after do
|
21
|
+
@sandbox.cleanup!
|
22
|
+
end
|
28
23
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
24
|
+
before do
|
25
|
+
@bag.add_remote_file("http://www.gnu.org/graphics/heckert_gnu.small.png", "gnu.png", 6322,
|
26
|
+
"390c0a30976f899cbdf951eab5cce60fe9743ac9",
|
27
|
+
"a3bd7ab2442028bb91b51d9f6722ec98")
|
33
28
|
|
34
|
-
|
35
|
-
|
36
|
-
|
29
|
+
path = File.join @bag_path, "fetch.txt"
|
30
|
+
@lines = File.open(path, &:readlines)
|
31
|
+
end
|
37
32
|
|
38
|
-
|
39
|
-
|
40
|
-
|
33
|
+
it "isn't empty" do
|
34
|
+
expect(@lines).not_to be_empty
|
35
|
+
end
|
41
36
|
|
42
|
-
|
43
|
-
|
44
|
-
|
37
|
+
it "only contains lines of the format URL LENGTH FILENAME" do
|
38
|
+
@lines.each { |line| expect(line.chomp).to match(/^[^\s]+\s+(\d+|-)\s+[^\s]+$/) }
|
39
|
+
end
|
45
40
|
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
41
|
+
it "contains manifested files" do
|
42
|
+
path = File.join @bag_path, "manifest-sha1.txt"
|
43
|
+
data = File.open(path, &:read)
|
44
|
+
expect(data).to include("gnu.png")
|
45
|
+
end
|
51
46
|
|
52
|
-
|
53
|
-
|
54
|
-
|
47
|
+
it "is gone when fetch is complete" do
|
48
|
+
@bag.fetch!
|
49
|
+
expect(File.exist?(File.join(@bag_path, "fetch.txt"))).not_to be true
|
50
|
+
end
|
55
51
|
end
|
56
|
-
|
57
52
|
end
|