bagit 0.3.5 → 0.4.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.gitignore +2 -1
- data/.travis.yml +1 -2
- data/Gemfile +4 -2
- data/README.md +1 -1
- data/Rakefile +11 -8
- data/bagit.gemspec +25 -21
- data/bin/bagit +59 -63
- data/lib/bagit.rb +8 -6
- data/lib/bagit/bag.rb +43 -47
- data/lib/bagit/fetch.rb +23 -27
- data/lib/bagit/file.rb +11 -14
- data/lib/bagit/info.rb +39 -51
- data/lib/bagit/manifest.rb +72 -49
- data/lib/bagit/string.rb +6 -6
- data/lib/bagit/valid.rb +51 -57
- data/lib/bagit/version.rb +3 -1
- data/spec/bagit_spec.rb +59 -54
- data/spec/fetch_spec.rb +33 -38
- data/spec/manifest_spec.rb +107 -111
- data/spec/spec_helper.rb +12 -12
- data/spec/tag_info_spec.rb +101 -108
- data/spec/tag_spec.rb +47 -49
- data/spec/util/bagit_matchers.rb +5 -14
- data/spec/validation_spec.rb +108 -110
- metadata +50 -9
data/lib/bagit/string.rb
CHANGED
@@ -1,27 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
# Some mixed in functionality for String
|
2
4
|
class String
|
3
|
-
|
4
5
|
# Wrap a string to lines of a specified width. All existing newlines
|
5
6
|
# are not guaranteed to be preserved
|
6
7
|
def wrap(width)
|
7
|
-
s = gsub(/\s+/,
|
8
|
+
s = gsub(/\s+/, " ").strip
|
8
9
|
|
9
10
|
if s.length > width
|
10
11
|
s[0...width] + '\n' + s[width..-1].wrap(width)
|
11
12
|
else
|
12
13
|
s
|
13
14
|
end
|
14
|
-
|
15
15
|
end
|
16
16
|
|
17
17
|
# Indent each line of a string by n spaces
|
18
18
|
def indent(n)
|
19
|
-
indent =
|
19
|
+
indent = " " * n
|
20
20
|
gsub '\n', "\n#{indent}"
|
21
21
|
end
|
22
22
|
|
23
|
-
# Colorize logs
|
24
|
-
|
23
|
+
# Colorize logs
|
24
|
+
def color(color_code)
|
25
25
|
"\e[#{color_code}m#{self}\e[0m"
|
26
26
|
end
|
27
27
|
|
data/lib/bagit/valid.rb
CHANGED
@@ -1,32 +1,31 @@
|
|
1
|
-
|
2
|
-
require 'open-uri'
|
3
|
-
require 'cgi'
|
4
|
-
require 'logger'
|
1
|
+
# frozen_string_literal: true
|
5
2
|
|
6
|
-
|
3
|
+
require "validatable"
|
4
|
+
require "open-uri"
|
5
|
+
require "cgi"
|
6
|
+
require "logger"
|
7
7
|
|
8
|
+
module BagIt
|
8
9
|
class Bag
|
9
10
|
include Validatable
|
10
|
-
validates_true_for :consistency, :
|
11
|
-
validates_true_for :completeness, :
|
11
|
+
validates_true_for :consistency, logic: proc { consistent? }
|
12
|
+
validates_true_for :completeness, logic: proc { complete? }
|
12
13
|
end
|
13
14
|
|
14
15
|
module Validity
|
15
16
|
def decode_filename(s)
|
16
|
-
s = s.gsub(
|
17
|
-
s = s.gsub(
|
18
|
-
|
17
|
+
s = s.gsub("%0D", "\r")
|
18
|
+
s = s.gsub("%0A", "\n")
|
19
|
+
s
|
19
20
|
end
|
20
|
-
|
21
|
+
|
21
22
|
# Return true if the manifest cover all files and all files are
|
22
23
|
# covered.
|
23
24
|
def complete?
|
24
25
|
logger = Logger.new(STDOUT)
|
25
26
|
|
26
|
-
if manifest_files == []
|
27
|
-
|
28
|
-
end
|
29
|
-
|
27
|
+
errors.add :completeness, "there are no manifest files" if manifest_files == []
|
28
|
+
|
30
29
|
unmanifested_files.each do |file|
|
31
30
|
logger.error("#{file} is present but not manifested".red)
|
32
31
|
errors.add :completeness, "#{file} is present but not manifested"
|
@@ -44,38 +43,39 @@ module BagIt
|
|
44
43
|
errors.on(:completeness).nil?
|
45
44
|
end
|
46
45
|
|
46
|
+
def manifest_type(type)
|
47
|
+
case type
|
48
|
+
when /sha1/i
|
49
|
+
Digest::SHA1
|
50
|
+
when /md5/i
|
51
|
+
Digest::MD5
|
52
|
+
when /sha256/i
|
53
|
+
Digest::SHA256
|
54
|
+
when /sha384/i
|
55
|
+
Digest::SHA384
|
56
|
+
when /sha512/i
|
57
|
+
Digest::SHA512
|
58
|
+
else
|
59
|
+
raise ArgumentError, "Algorithm #{manifest_type} is not supported."
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
47
63
|
# Return true if all manifested files message digests match.
|
48
64
|
def consistent?
|
49
|
-
(manifest_files|tagmanifest_files).each do |mf|
|
65
|
+
(manifest_files | tagmanifest_files).each do |mf|
|
50
66
|
# get the algorithm implementation
|
51
67
|
File.basename(mf) =~ /manifest-(.+).txt$/
|
52
|
-
|
53
|
-
algo =
|
54
|
-
when /sha1/i
|
55
|
-
Digest::SHA1
|
56
|
-
when /md5/i
|
57
|
-
Digest::MD5
|
58
|
-
when /sha256/i
|
59
|
-
Digest::SHA256
|
60
|
-
when /sha384/i
|
61
|
-
Digest::SHA384
|
62
|
-
when /sha512/i
|
63
|
-
Digest::SHA512
|
64
|
-
else
|
65
|
-
raise ArgumentError.new("Algorithm #{manifest_type} is not supported.")
|
66
|
-
end
|
68
|
+
type = Regexp.last_match(1)
|
69
|
+
algo = manifest_type(type)
|
67
70
|
# Check every file in the manifest
|
68
71
|
File.open(mf) do |io|
|
69
72
|
io.each_line do |line|
|
70
|
-
expected, path = line.chomp.split
|
73
|
+
expected, path = line.chomp.split(/\s+/, 2)
|
71
74
|
file = File.join(bag_dir, decode_filename(path))
|
72
75
|
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
errors.add :consistency, "expected #{file} to have #{algo}: #{expected}, actual is #{actual}"
|
77
|
-
end
|
78
|
-
end
|
76
|
+
next unless File.exist? file
|
77
|
+
actual = algo.file(file).hexdigest
|
78
|
+
errors.add :consistency, "expected #{file} to have #{algo}: #{expected}, actual is #{actual}" if expected.downcase != actual
|
79
79
|
end
|
80
80
|
end
|
81
81
|
end
|
@@ -89,7 +89,7 @@ module BagIt
|
|
89
89
|
end
|
90
90
|
|
91
91
|
protected
|
92
|
-
|
92
|
+
|
93
93
|
# Returns all files in the instance that are not manifested
|
94
94
|
def unmanifested_files
|
95
95
|
mfs = manifested_files.map { |f| File.join bag_dir, f }
|
@@ -101,47 +101,41 @@ module BagIt
|
|
101
101
|
bfs = bag_files
|
102
102
|
manifested_files.reject { |f| bfs.member? File.join(bag_dir, f) }
|
103
103
|
end
|
104
|
+
|
104
105
|
# Returns a list of tag manifested files that are not present
|
105
106
|
def tag_empty_manifests
|
106
107
|
empty = []
|
107
108
|
tag_manifested_files.each do |f|
|
108
|
-
|
109
|
-
empty.push f
|
110
|
-
end
|
109
|
+
empty.push f unless File.exist?(File.join(bag_dir, f))
|
111
110
|
end
|
112
|
-
|
111
|
+
empty
|
113
112
|
end
|
113
|
+
|
114
114
|
# Returns a list of all files present in the manifest files
|
115
115
|
def manifested_files
|
116
|
-
|
117
116
|
manifest_files.inject([]) do |acc, mf|
|
118
|
-
|
119
|
-
files = File.open(mf) do |io|
|
120
|
-
|
117
|
+
files = File.open(mf) { |io|
|
121
118
|
io.readlines.map do |line|
|
122
|
-
|
119
|
+
_digest, path = line.chomp.split(/\s+/, 2)
|
123
120
|
decode_filename(path)
|
124
121
|
end
|
125
|
-
|
126
|
-
end
|
122
|
+
}
|
127
123
|
|
128
124
|
(acc + files).uniq
|
129
125
|
end
|
130
|
-
|
131
126
|
end
|
127
|
+
|
132
128
|
# Returns a list of all files in the tag manifest files
|
133
129
|
def tag_manifested_files
|
134
130
|
tagmanifest_files.inject([]) do |acc, mf|
|
135
|
-
files = File.open(mf)
|
131
|
+
files = File.open(mf) { |io|
|
136
132
|
io.readlines.map do |line|
|
137
|
-
|
133
|
+
_digest, path = line.chomp.split(/\s+/, 2)
|
138
134
|
path
|
139
135
|
end
|
140
|
-
|
141
|
-
(acc+files).uniq
|
136
|
+
}
|
137
|
+
(acc + files).uniq
|
142
138
|
end
|
143
139
|
end
|
144
|
-
|
145
140
|
end
|
146
|
-
|
147
141
|
end
|
data/lib/bagit/version.rb
CHANGED
data/spec/bagit_spec.rb
CHANGED
@@ -1,35 +1,36 @@
|
|
1
|
-
#
|
2
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "spec_helper"
|
3
4
|
|
4
5
|
# based on v0.96 http://www.cdlib.org/inside/diglib/bagit/bagitspec.html
|
5
|
-
describe BagIt::Bag do
|
6
|
-
describe
|
7
|
-
before
|
6
|
+
RSpec.describe BagIt::Bag do
|
7
|
+
describe "empty bag" do
|
8
|
+
before do
|
8
9
|
@sandbox = Sandbox.new
|
9
10
|
# make the bag
|
10
|
-
@bag_path = File.join @sandbox.to_s,
|
11
|
-
@bag =
|
11
|
+
@bag_path = File.join @sandbox.to_s, "the_bag"
|
12
|
+
@bag = described_class.new @bag_path
|
12
13
|
end
|
13
14
|
|
14
|
-
after
|
15
|
+
after do
|
15
16
|
@sandbox.cleanup!
|
16
17
|
end
|
17
18
|
|
18
|
-
it "
|
19
|
+
it "is empty" do
|
19
20
|
expect(@bag).to be_empty
|
20
21
|
end
|
21
22
|
end
|
22
23
|
|
23
|
-
describe
|
24
|
-
before
|
24
|
+
describe "bag with files" do
|
25
|
+
before do
|
25
26
|
@sandbox = Sandbox.new
|
26
27
|
|
27
28
|
# make the bag
|
28
|
-
@bag_path = File.join @sandbox.to_s,
|
29
|
-
@bag =
|
29
|
+
@bag_path = File.join @sandbox.to_s, "the_bag"
|
30
|
+
@bag = described_class.new @bag_path
|
30
31
|
|
31
32
|
# add some files
|
32
|
-
File.open(
|
33
|
+
File.open("/dev/urandom") do |rio|
|
33
34
|
10.times do |n|
|
34
35
|
@bag.add_file("file-#{n}-💩
|
35
36
|
") { |io| io.write rio.read(16) }
|
@@ -37,82 +38,84 @@ describe BagIt::Bag do
|
|
37
38
|
end
|
38
39
|
end
|
39
40
|
|
40
|
-
after
|
41
|
+
after do
|
41
42
|
@sandbox.cleanup!
|
42
43
|
end
|
43
44
|
|
44
|
-
it "
|
45
|
+
it "is a directory" do
|
45
46
|
expect(File.directory?(@bag_path)).to be true
|
46
47
|
end
|
47
48
|
|
48
|
-
it "
|
49
|
+
it "is not be empty" do
|
49
50
|
expect(@bag).not_to be_empty
|
50
51
|
end
|
51
52
|
|
52
|
-
it "
|
53
|
-
data_path = File.join @bag_path,
|
53
|
+
it "has a sub-directory called data" do
|
54
|
+
data_path = File.join @bag_path, "data"
|
54
55
|
expect(File.directory?(data_path)).to be true
|
55
56
|
end
|
56
57
|
|
57
58
|
describe "#add_file" do
|
58
|
-
it "
|
59
|
-
@bag.add_file("foo") { |io| io.puts
|
59
|
+
it "allows addition of files via io" do
|
60
|
+
@bag.add_file("foo") { |io| io.puts "all alone" }
|
60
61
|
expect(File.join(@bag_path, "data", "foo")).to exist_on_fs
|
61
62
|
end
|
62
63
|
|
63
|
-
it "
|
64
|
-
src_path = File.join @sandbox.to_s,
|
65
|
-
File.open(src_path,
|
66
|
-
@bag.add_file("foo", src_path) { |io| io.puts
|
64
|
+
it "allows addition of files via copy" do
|
65
|
+
src_path = File.join @sandbox.to_s, "somefile"
|
66
|
+
File.open(src_path, "w") { |io| io.puts "something" }
|
67
|
+
@bag.add_file("foo", src_path) { |io| io.puts "all alone" }
|
67
68
|
expect(File.join(@bag_path, "data", "foo")).to exist_on_fs
|
68
69
|
end
|
69
70
|
|
70
|
-
it "
|
71
|
-
@bag.add_file("deep/dir/structure/file") { |io| io.puts
|
71
|
+
it "allows addition of files with deep paths" do
|
72
|
+
@bag.add_file("deep/dir/structure/file") { |io| io.puts "all alone" }
|
72
73
|
expect(File.join(@bag_path, "data", "deep/dir/structure/file")).to exist_on_fs
|
73
74
|
end
|
74
75
|
|
75
|
-
it "
|
76
|
-
expect {
|
77
|
-
|
76
|
+
it "does not allow overwriting of files" do
|
77
|
+
expect {
|
78
|
+
@bag.add_file("file-0-💩
|
79
|
+
") { |io| io.puts "overwrite!" }
|
80
|
+
}.to raise_error(RuntimeError)
|
78
81
|
end
|
79
82
|
|
80
|
-
it "
|
81
|
-
oxum_count = @bag.bag_info["Payload-Oxum"].split(
|
82
|
-
@bag.add_file("foo") { |io| io.puts
|
83
|
-
expect(@bag.bag_info["Payload-Oxum"].split(
|
83
|
+
it "updates the payload oxum" do
|
84
|
+
oxum_count = @bag.bag_info["Payload-Oxum"].split(".")[1].to_i
|
85
|
+
@bag.add_file("foo") { |io| io.puts "all alone" }
|
86
|
+
expect(@bag.bag_info["Payload-Oxum"].split(".")[1].to_i).to eq(oxum_count + 1)
|
84
87
|
end
|
85
88
|
end
|
86
89
|
|
87
90
|
describe "#remove_file" do
|
88
|
-
it "
|
91
|
+
it "raises an error when deleing non existant files" do
|
89
92
|
expect { @bag.remove_file("file-x") }.to raise_error(RuntimeError)
|
90
93
|
end
|
91
94
|
end
|
92
95
|
|
93
96
|
describe "#get" do
|
94
97
|
describe "file not in bag" do
|
95
|
-
it "
|
96
|
-
expect(@bag.get(
|
98
|
+
it "returns nil" do
|
99
|
+
expect(@bag.get("foobar")).to be_nil
|
97
100
|
end
|
98
101
|
end
|
99
102
|
|
100
103
|
describe "file in bag" do
|
101
104
|
before do
|
102
|
-
@contents =
|
103
|
-
@bag.add_file("foo") { |io| io <<
|
105
|
+
@contents = "all alone"
|
106
|
+
@bag.add_file("foo") { |io| io << "all alone" }
|
104
107
|
@file = @bag.get("foo")
|
105
108
|
end
|
106
109
|
|
107
|
-
it "
|
110
|
+
it "returns an IO object for the given path" do
|
108
111
|
expect(@file).to be_a_kind_of(IO)
|
109
112
|
end
|
110
113
|
|
111
|
-
it "
|
114
|
+
it "has the same content as the file added" do
|
112
115
|
expect(@file.read).to eq(@contents)
|
113
116
|
end
|
114
117
|
|
115
|
-
it "
|
118
|
+
it "accepts an optional leading slash or ./" do
|
116
119
|
expect(@bag.get("/foo").read).to eq(@contents)
|
117
120
|
expect(@bag.get("./foo").read).to eq(@contents)
|
118
121
|
end
|
@@ -124,7 +127,7 @@ describe BagIt::Bag do
|
|
124
127
|
@paths = @bag.paths
|
125
128
|
end
|
126
129
|
|
127
|
-
it "
|
130
|
+
it "returns a non-empty Array of Strings" do
|
128
131
|
expect(@paths).to be_a_kind_of(Array)
|
129
132
|
expect(@paths).not_to be_empty
|
130
133
|
@paths.each do |p|
|
@@ -132,31 +135,33 @@ describe BagIt::Bag do
|
|
132
135
|
end
|
133
136
|
end
|
134
137
|
|
135
|
-
it "
|
136
|
-
expect(@paths).to match_array((0..9).collect { |x|
|
137
|
-
"
|
138
|
+
it "returns relative paths to all files in the data directory" do
|
139
|
+
expect(@paths).to match_array((0..9).collect { |x|
|
140
|
+
"file-#{x}-💩
|
141
|
+
"
|
142
|
+
})
|
138
143
|
end
|
139
144
|
end
|
140
145
|
|
141
146
|
describe "#payload-oxum" do
|
142
|
-
it "
|
147
|
+
it "returns a valid oxum" do
|
143
148
|
expect(@bag.payload_oxum).to match(/^[0-9]+\.[0-9]+$/)
|
144
149
|
end
|
145
150
|
|
146
|
-
it "
|
147
|
-
@bag.add_tag_file(
|
148
|
-
@bag.payload_oxum.split(
|
151
|
+
it "accurately specifys the number of payload files" do
|
152
|
+
@bag.add_tag_file("non-payload") { |f| f.puts "I shouldn't count in the oxum" }
|
153
|
+
@bag.payload_oxum.split(".")[1] == @bag.bag_files.count
|
149
154
|
end
|
150
155
|
end
|
151
156
|
|
152
157
|
describe "#gc!" do
|
153
|
-
it "
|
158
|
+
it "cleans up empty directories" do
|
154
159
|
f = File.join "1", "2", "3", "file"
|
155
|
-
@bag.add_file(f) { |io| io.puts
|
160
|
+
@bag.add_file(f) { |io| io.puts "all alone" }
|
156
161
|
@bag.remove_file f
|
157
|
-
expect(File.exist?(File.dirname(File.join(@bag_path,
|
162
|
+
expect(File.exist?(File.dirname(File.join(@bag_path, "data", f)))).to be true
|
158
163
|
@bag.gc!
|
159
|
-
expect(File.exist?(File.dirname(File.join(@bag_path,
|
164
|
+
expect(File.exist?(File.dirname(File.join(@bag_path, "data", f)))).to be false
|
160
165
|
end
|
161
166
|
end
|
162
167
|
end
|
data/spec/fetch_spec.rb
CHANGED
@@ -1,57 +1,52 @@
|
|
1
|
-
#
|
2
|
-
require 'spec_helper'
|
1
|
+
# frozen_string_literal: true
|
3
2
|
|
4
|
-
|
5
|
-
|
6
|
-
before(:each) do
|
3
|
+
require "spec_helper"
|
7
4
|
|
5
|
+
describe BagIt::Bag do
|
6
|
+
before do
|
8
7
|
@sandbox = Sandbox.new
|
9
|
-
|
10
8
|
# make the bag
|
11
|
-
@bag_path = File.join @sandbox.to_s,
|
12
|
-
@bag =
|
9
|
+
@bag_path = File.join @sandbox.to_s, "the_bag"
|
10
|
+
@bag = described_class.new(@bag_path)
|
13
11
|
|
14
12
|
# add some files
|
15
|
-
File.open(
|
16
|
-
|
13
|
+
File.open("/dev/urandom") do |rio|
|
17
14
|
10.times do |n|
|
18
15
|
@bag.add_file("file-#{n}-💩
|
19
16
|
end
|
20
|
-
|
21
17
|
end
|
22
|
-
|
23
|
-
end
|
24
|
-
|
25
|
-
after(:each) do
|
26
|
-
@sandbox.cleanup!
|
27
18
|
end
|
19
|
+
describe "fetch.txt" do
|
20
|
+
after do
|
21
|
+
@sandbox.cleanup!
|
22
|
+
end
|
28
23
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
24
|
+
before do
|
25
|
+
@bag.add_remote_file("http://www.gnu.org/graphics/heckert_gnu.small.png", "gnu.png", 6322,
|
26
|
+
"390c0a30976f899cbdf951eab5cce60fe9743ac9",
|
27
|
+
"a3bd7ab2442028bb91b51d9f6722ec98")
|
33
28
|
|
34
|
-
|
35
|
-
|
36
|
-
|
29
|
+
path = File.join @bag_path, "fetch.txt"
|
30
|
+
@lines = File.open(path, &:readlines)
|
31
|
+
end
|
37
32
|
|
38
|
-
|
39
|
-
|
40
|
-
|
33
|
+
it "isn't empty" do
|
34
|
+
expect(@lines).not_to be_empty
|
35
|
+
end
|
41
36
|
|
42
|
-
|
43
|
-
|
44
|
-
|
37
|
+
it "only contains lines of the format URL LENGTH FILENAME" do
|
38
|
+
@lines.each { |line| expect(line.chomp).to match(/^[^\s]+\s+(\d+|-)\s+[^\s]+$/) }
|
39
|
+
end
|
45
40
|
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
41
|
+
it "contains manifested files" do
|
42
|
+
path = File.join @bag_path, "manifest-sha1.txt"
|
43
|
+
data = File.open(path, &:read)
|
44
|
+
expect(data).to include("gnu.png")
|
45
|
+
end
|
51
46
|
|
52
|
-
|
53
|
-
|
54
|
-
|
47
|
+
it "is gone when fetch is complete" do
|
48
|
+
@bag.fetch!
|
49
|
+
expect(File.exist?(File.join(@bag_path, "fetch.txt"))).not_to be true
|
50
|
+
end
|
55
51
|
end
|
56
|
-
|
57
52
|
end
|