bagit 0.4.2 → 0.4.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +22 -0
- data/.travis.yml +1 -2
- data/Gemfile +1 -1
- data/README.md +1 -1
- data/Rakefile +8 -2
- data/bagit.gemspec +4 -3
- data/bin/bagit +30 -33
- data/lib/bagit.rb +1 -1
- data/lib/bagit/bag.rb +20 -27
- data/lib/bagit/fetch.rb +14 -20
- data/lib/bagit/file.rb +10 -15
- data/lib/bagit/info.rb +43 -57
- data/lib/bagit/manifest.rb +43 -48
- data/lib/bagit/string.rb +2 -4
- data/lib/bagit/valid.rb +67 -75
- data/lib/bagit/version.rb +1 -1
- data/spec/bagit_spec.rb +34 -30
- data/spec/fetch_spec.rb +29 -35
- data/spec/manifest_spec.rb +99 -108
- data/spec/spec_helper.rb +3 -5
- data/spec/tag_info_spec.rb +91 -99
- data/spec/tag_spec.rb +47 -50
- data/spec/util/bagit_matchers.rb +3 -14
- data/spec/validation_spec.rb +107 -110
- metadata +35 -20
data/spec/fetch_spec.rb
CHANGED
@@ -1,57 +1,51 @@
|
|
1
1
|
# coding: utf-8
|
2
2
|
require 'spec_helper'
|
3
3
|
|
4
|
-
describe
|
5
|
-
|
6
|
-
before(:each) do
|
7
|
-
|
4
|
+
describe BagIt::Bag do
|
5
|
+
before do
|
8
6
|
@sandbox = Sandbox.new
|
9
|
-
|
10
7
|
# make the bag
|
11
8
|
@bag_path = File.join @sandbox.to_s, 'the_bag'
|
12
|
-
@bag =
|
9
|
+
@bag = described_class.new(@bag_path)
|
13
10
|
|
14
11
|
# add some files
|
15
12
|
File.open('/dev/urandom') do |rio|
|
16
|
-
|
17
13
|
10.times do |n|
|
18
14
|
@bag.add_file("file-#{n}-💩
|
19
15
|
end
|
20
|
-
|
21
16
|
end
|
22
|
-
|
23
|
-
end
|
24
|
-
|
25
|
-
after(:each) do
|
26
|
-
@sandbox.cleanup!
|
27
17
|
end
|
18
|
+
describe "fetch.txt" do
|
19
|
+
after do
|
20
|
+
@sandbox.cleanup!
|
21
|
+
end
|
28
22
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
23
|
+
before do
|
24
|
+
@bag.add_remote_file('http://www.gnu.org/graphics/heckert_gnu.small.png', 'gnu.png', 6322,
|
25
|
+
'390c0a30976f899cbdf951eab5cce60fe9743ac9',
|
26
|
+
'a3bd7ab2442028bb91b51d9f6722ec98')
|
33
27
|
|
34
|
-
|
35
|
-
|
36
|
-
|
28
|
+
path = File.join @bag_path, 'fetch.txt'
|
29
|
+
@lines = File.open(path, &:readlines)
|
30
|
+
end
|
37
31
|
|
38
|
-
|
39
|
-
|
40
|
-
|
32
|
+
it "isn't empty" do
|
33
|
+
expect(@lines).not_to be_empty
|
34
|
+
end
|
41
35
|
|
42
|
-
|
43
|
-
|
44
|
-
|
36
|
+
it "only contains lines of the format URL LENGTH FILENAME" do
|
37
|
+
@lines.each { |line| expect(line.chomp).to match(/^[^\s]+\s+(\d+|\-)\s+[^\s]+$/) }
|
38
|
+
end
|
45
39
|
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
40
|
+
it "contains manifested files" do
|
41
|
+
path = File.join @bag_path, 'manifest-sha1.txt'
|
42
|
+
data = File.open(path, &:read)
|
43
|
+
expect(data).to include('gnu.png')
|
44
|
+
end
|
51
45
|
|
52
|
-
|
53
|
-
|
54
|
-
|
46
|
+
it "is gone when fetch is complete" do
|
47
|
+
@bag.fetch!
|
48
|
+
expect(File.exist?(File.join(@bag_path, 'fetch.txt'))).not_to be true
|
49
|
+
end
|
55
50
|
end
|
56
|
-
|
57
51
|
end
|
data/spec/manifest_spec.rb
CHANGED
@@ -1,148 +1,139 @@
|
|
1
1
|
# coding: utf-8
|
2
2
|
require 'spec_helper'
|
3
3
|
|
4
|
-
describe
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
@sandbox = Sandbox.new
|
9
|
-
|
10
|
-
# make the bag
|
11
|
-
@bag_path = File.join @sandbox.to_s, 'the_bag'
|
12
|
-
@bag = BagIt::Bag.new @bag_path
|
4
|
+
describe BagIt::Bag do
|
5
|
+
describe "BagIt Manifests" do
|
6
|
+
before do
|
7
|
+
@sandbox = Sandbox.new
|
13
8
|
|
14
|
-
|
15
|
-
|
9
|
+
# make the bag
|
10
|
+
@bag_path = File.join @sandbox.to_s, 'the_bag'
|
11
|
+
@bag = described_class.new @bag_path
|
16
12
|
|
17
|
-
|
18
|
-
|
19
|
-
|
13
|
+
# add some files
|
14
|
+
File.open('/dev/urandom') do |rio|
|
15
|
+
10.times do |n|
|
16
|
+
@bag.add_file("file-#{n}-💩
|
17
|
+
@bag.add_tag_file("tag-#{n}") { |io| io.write rio.read(16) }
|
18
|
+
end
|
20
19
|
end
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
20
|
end
|
26
21
|
|
27
|
-
|
28
|
-
|
29
|
-
after(:each) do
|
30
|
-
@sandbox.cleanup!
|
31
|
-
end
|
32
|
-
|
33
|
-
shared_examples_for "a manifest file" do
|
34
|
-
|
35
|
-
before do
|
36
|
-
pattern = File.join @bag_path, '*manifest-*.txt'
|
37
|
-
@manifest_files = Dir.glob pattern
|
22
|
+
after do
|
23
|
+
@sandbox.cleanup!
|
38
24
|
end
|
39
25
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
26
|
+
shared_examples_for "a manifest file" do
|
27
|
+
before do
|
28
|
+
pattern = File.join @bag_path, '*manifest-*.txt'
|
29
|
+
@manifest_files = Dir.glob pattern
|
30
|
+
end
|
44
31
|
|
45
|
-
|
46
|
-
|
47
|
-
|
32
|
+
it "has a valid algorithm in the name (at least md5 or sha1)" do
|
33
|
+
algorithms = @manifest_files.map { |mf| mf =~ /manifest-(.*).txt$/; Regexp.last_match(1) }
|
34
|
+
algorithms.each { |a| expect(a).to be_in('md5', 'sha1') }
|
35
|
+
end
|
48
36
|
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
37
|
+
it "is not an empty file" do
|
38
|
+
@manifest_files.each { |mf| expect(File.size(mf)).not_to eq(0) }
|
39
|
+
end
|
40
|
+
|
41
|
+
it "only contains lines of the format CHECKSUM FILENAME" do
|
42
|
+
@manifest_files.each do |file|
|
43
|
+
File.open(file) do |io|
|
44
|
+
io.each_line { |line| expect(line).to match(/^[a-fA-F0-9]+\s+[^\s].+$/) }
|
45
|
+
end
|
53
46
|
end
|
54
47
|
end
|
55
|
-
end
|
56
48
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
49
|
+
it "validates after adding a file and remanifesting" do
|
50
|
+
@bag.add_file('newfile.txt') { |io| io.puts("new file to remanifest") }
|
51
|
+
@bag.manifest!
|
52
|
+
expect(@bag).to be_valid
|
53
|
+
end
|
61
54
|
end
|
62
55
|
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
before do
|
68
|
-
@bag.manifest!
|
69
|
-
end
|
56
|
+
describe "bag manifest files" do
|
57
|
+
before do
|
58
|
+
@bag.manifest!
|
59
|
+
end
|
70
60
|
|
71
|
-
|
61
|
+
it_behaves_like "a manifest file"
|
72
62
|
|
73
|
-
|
74
|
-
|
75
|
-
|
63
|
+
it "has a manifest file" do
|
64
|
+
expect(@bag.manifest_files).not_to be_empty
|
65
|
+
end
|
76
66
|
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
67
|
+
it "only contains bag files" do
|
68
|
+
@bag.manifest_files.each do |mf|
|
69
|
+
File.open(mf) do |io|
|
70
|
+
io.each_line do |line|
|
71
|
+
expect(line.chomp).to match(/^[a-f0-9]+\s+data\/[^\s].+$/)
|
72
|
+
end
|
82
73
|
end
|
83
74
|
end
|
84
75
|
end
|
85
76
|
end
|
86
77
|
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
before do
|
92
|
-
@bag.add_tag_file("test-tag") { |f| f.puts "all alone" }
|
93
|
-
end
|
78
|
+
describe "tag manifest files" do
|
79
|
+
before do
|
80
|
+
@bag.add_tag_file("test-tag") { |f| f.puts "all alone" }
|
81
|
+
end
|
94
82
|
|
95
|
-
|
83
|
+
it_should_behave_like "a manifest file"
|
96
84
|
|
97
|
-
|
98
|
-
|
99
|
-
end
|
100
|
-
it "should only contain tag files" do
|
101
|
-
@bag.tagmanifest_files.each do |mf|
|
102
|
-
File.open(mf) do |io|
|
103
|
-
io.each_line do |line|
|
104
|
-
expect(line.chomp).to match(/^[a-fA-F0-9]+\s+(?!data\/)[^\s].+$/)
|
105
|
-
end
|
106
|
-
end
|
85
|
+
it "has a tag manifest file" do
|
86
|
+
expect(@bag.tagmanifest_files).not_to be_empty
|
107
87
|
end
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
88
|
+
it "only contains tag files" do
|
89
|
+
@bag.tagmanifest_files.each do |mf|
|
90
|
+
File.open(mf) do |io|
|
91
|
+
io.each_line do |line|
|
92
|
+
expect(line.chomp).to match(/^[a-fA-F0-9]+\s+(?!data\/)[^\s].+$/)
|
93
|
+
end
|
94
|
+
end
|
115
95
|
end
|
116
96
|
end
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
97
|
+
it "contains manifest and bag info files" do
|
98
|
+
@bag.tagmanifest_files.each do |mf|
|
99
|
+
expect(File.open(mf).read).to include(File.basename(@bag.bag_info_txt_file))
|
100
|
+
expect(File.open(mf).read).to include(File.basename(@bag.bagit_txt_file))
|
101
|
+
@bag.manifest_files.each do |man|
|
102
|
+
expect(File.open(mf).read).to include(man)
|
103
|
+
end
|
122
104
|
end
|
123
105
|
end
|
124
|
-
|
125
|
-
describe "removing tracked files" do
|
126
|
-
before(:each) do
|
127
|
-
@bag.remove_tag_file "tag-1"
|
128
|
-
@bag.delete_tag_file "tag-2"
|
129
|
-
end
|
130
|
-
it "should still have the untracked tag file on the file system" do
|
131
|
-
expect(File.join(@bag_path, "tag-1")).to exist_on_fs
|
132
|
-
end
|
133
|
-
it "should not have the deleted tag file on the file system" do
|
134
|
-
expect(File.join(@bag_path, "tag-2")).not_to exist_on_fs
|
135
|
-
end
|
136
|
-
it "should not have the removed or deleted tag files in the manifest" do
|
106
|
+
it "does not contain the untracked tag file" do
|
137
107
|
@bag.tagmanifest_files.each do |mf|
|
138
108
|
File.open(mf) do |io|
|
139
|
-
expect(io.read).not_to include "tag-
|
140
|
-
expect(io.read).not_to include "tag-2"
|
109
|
+
expect(io.read).not_to include "tag-notrack"
|
141
110
|
end
|
142
111
|
end
|
143
112
|
end
|
144
|
-
end
|
145
|
-
end
|
146
113
|
|
114
|
+
describe "removing tracked files" do
|
115
|
+
before do
|
116
|
+
@bag.remove_tag_file "tag-1"
|
117
|
+
@bag.delete_tag_file "tag-2"
|
118
|
+
end
|
119
|
+
|
120
|
+
it "still has the untracked tag file on the file system" do
|
121
|
+
expect(File.join(@bag_path, "tag-1")).to exist_on_fs
|
122
|
+
end
|
147
123
|
|
124
|
+
it "doesn't have the deleted tag file on the file system" do
|
125
|
+
expect(File.join(@bag_path, "tag-2")).not_to exist_on_fs
|
126
|
+
end
|
127
|
+
|
128
|
+
it "doesn't have the removed or deleted tag files in the manifest" do
|
129
|
+
@bag.tagmanifest_files.each do |mf|
|
130
|
+
File.open(mf) do |io|
|
131
|
+
expect(io.read).not_to include "tag-1"
|
132
|
+
expect(io.read).not_to include "tag-2"
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
148
139
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -12,26 +12,24 @@ RSpec.configure do |config|
|
|
12
12
|
config.include(BagitMatchers)
|
13
13
|
end
|
14
14
|
|
15
|
-
|
15
|
+
$LOAD_PATH.unshift File.expand_path('../lib', File.dirname(__FILE__))
|
16
16
|
require 'bagit'
|
17
17
|
|
18
18
|
require 'tempfile'
|
19
19
|
|
20
20
|
class Sandbox
|
21
|
-
|
22
21
|
def initialize
|
23
22
|
tf = Tempfile.open 'sandbox'
|
24
23
|
@path = tf.path
|
25
24
|
tf.close!
|
26
|
-
FileUtils
|
25
|
+
FileUtils.mkdir @path
|
27
26
|
end
|
28
27
|
|
29
28
|
def cleanup!
|
30
|
-
FileUtils
|
29
|
+
FileUtils.rm_rf @path
|
31
30
|
end
|
32
31
|
|
33
32
|
def to_s
|
34
33
|
@path
|
35
34
|
end
|
36
|
-
|
37
35
|
end
|
data/spec/tag_info_spec.rb
CHANGED
@@ -1,78 +1,72 @@
|
|
1
1
|
# coding: utf-8
|
2
2
|
require 'spec_helper'
|
3
3
|
|
4
|
-
describe
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
@sandbox = Sandbox.new
|
4
|
+
describe BagIt::Bag do
|
5
|
+
describe "Tag Info Files" do
|
6
|
+
before do
|
7
|
+
@sandbox = Sandbox.new
|
9
8
|
|
10
|
-
|
11
|
-
|
12
|
-
|
9
|
+
# make the bag
|
10
|
+
@bag_path = File.join @sandbox.to_s, 'the_bag'
|
11
|
+
@bag = described_class.new @bag_path
|
13
12
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
13
|
+
# add some files
|
14
|
+
File.open('/dev/urandom') do |rio|
|
15
|
+
10.times do |n|
|
16
|
+
@bag.add_file("file-#{n}-💩
|
17
|
+
end
|
18
18
|
end
|
19
19
|
end
|
20
20
|
|
21
|
-
|
22
|
-
|
23
|
-
after(:each) do
|
24
|
-
@sandbox.cleanup!
|
25
|
-
end
|
26
|
-
|
27
|
-
describe "bagit.txt" do
|
28
|
-
|
29
|
-
before do
|
30
|
-
path = File.join @bag_path, 'bagit.txt'
|
31
|
-
@lines = File.open(path) { |io| io.readlines }
|
32
|
-
end
|
33
|
-
|
34
|
-
it "should create a file bagit.txt on bag initialization" do
|
35
|
-
expect(File.join(@bag_path, 'bagit.txt')).to exist_on_fs
|
21
|
+
after do
|
22
|
+
@sandbox.cleanup!
|
36
23
|
end
|
37
24
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
a = @lines.select { |line| line.chomp =~ /BagIt-Version:\s*\d+\.\d+/ }
|
44
|
-
expect(a).not_to be_empty
|
45
|
-
end
|
25
|
+
describe "bagit.txt" do
|
26
|
+
before do
|
27
|
+
path = File.join @bag_path, 'bagit.txt'
|
28
|
+
@lines = File.open(path, &:readlines)
|
29
|
+
end
|
46
30
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
end
|
31
|
+
it "creates a file bagit.txt on bag initialization" do
|
32
|
+
expect(File.join(@bag_path, 'bagit.txt')).to exist_on_fs
|
33
|
+
end
|
51
34
|
|
52
|
-
|
35
|
+
it "has exactly two lines" do
|
36
|
+
expect(@lines.size).to eq(2)
|
37
|
+
end
|
53
38
|
|
54
|
-
|
39
|
+
it "has a bagit version" do
|
40
|
+
a = @lines.select { |line| line.chomp =~ /BagIt-Version:\s*\d+\.\d+/ }
|
41
|
+
expect(a).not_to be_empty
|
42
|
+
end
|
55
43
|
|
56
|
-
|
57
|
-
|
58
|
-
|
44
|
+
it "has a tag file encoding" do
|
45
|
+
a = @lines.select { |line| line.chomp =~ /Tag-File-Character-Encoding:\s*.+/ }
|
46
|
+
expect(a).not_to be_empty
|
47
|
+
end
|
59
48
|
end
|
60
49
|
|
61
|
-
|
62
|
-
|
63
|
-
|
50
|
+
describe "bag-info.txt" do
|
51
|
+
before do
|
52
|
+
path = File.join @bag_path, 'bag-info.txt'
|
53
|
+
@lines = File.open(path, &:readlines)
|
54
|
+
end
|
64
55
|
|
65
|
-
|
66
|
-
|
67
|
-
|
56
|
+
it "isn't empty" do
|
57
|
+
expect(@lines).not_to be_empty
|
58
|
+
end
|
68
59
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
end
|
60
|
+
it "contains lines of the format LABEL: VALUE (like an email header)" do
|
61
|
+
@lines.each { |line| expect(line.chomp).to match(/^[^\s]+\s*:\s+.*$/) }
|
62
|
+
end
|
73
63
|
|
74
|
-
|
75
|
-
|
64
|
+
it "is case insensitive with respect to LABELs" do
|
65
|
+
expect { @bag.write_bag_info 'foo' => 'lowercase', 'Foo' => 'capital' }.to raise_error(/Multiple labels/)
|
66
|
+
end
|
67
|
+
|
68
|
+
it "folds long VALUEs" do
|
69
|
+
longline = <<LOREM
|
76
70
|
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do
|
77
71
|
eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enimad
|
78
72
|
minim veniam, quis nostrud exercitation ullamco laboris nisi ut
|
@@ -81,53 +75,51 @@ Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do
|
|
81
75
|
pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
|
82
76
|
culpa qui officia deserunt mollit anim id est laborum.
|
83
77
|
LOREM
|
84
|
-
|
85
|
-
|
86
|
-
|
78
|
+
@bag.write_bag_info 'Lorem' => longline
|
79
|
+
expect(@bag.bag_info.keys.length).to eq(4) # this isn't a great test. Changed it from 1 to 4 because unrelated changes caused failure.
|
80
|
+
end
|
87
81
|
|
88
|
-
|
89
|
-
|
90
|
-
|
82
|
+
it "specifys a bag software agent" do
|
83
|
+
expect(@bag.bag_info.keys).to include("Bag-Software-Agent")
|
84
|
+
end
|
91
85
|
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
86
|
+
it "contains a valid bagging date" do
|
87
|
+
expect(@bag.bag_info.keys).to include("Bagging-Date")
|
88
|
+
@bag.bag_info["Bagging-Date"] =~ /^^[0-9]{4}-[0-9]{2}-[0-9]{2}$/
|
89
|
+
end
|
96
90
|
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
91
|
+
it "contains a payload oxum" do
|
92
|
+
expect(@bag.bag_info.keys).to include("Payload-Oxum")
|
93
|
+
end
|
94
|
+
it "does not override any previous values" do
|
95
|
+
path = File.join @bag_path, 'bag-info.txt'
|
96
|
+
@bag.write_bag_info 'Bag-Software-Agent' => 'Some Other Agent'
|
97
|
+
@bag.write_bag_info 'Source-Organization' => 'Awesome Inc.'
|
98
|
+
@bag.write_bag_info 'Bagging-Date' => '1901-01-01'
|
99
|
+
@bag.write_bag_info
|
100
|
+
contents = File.open(path).read
|
101
|
+
expect(contents).to include "Some Other Agent"
|
102
|
+
expect(contents).to include "Awesome Inc."
|
103
|
+
expect(contents).to include "1901-01-01"
|
104
|
+
end
|
105
|
+
it "overrides previous tags when they collide with new ones" do
|
106
|
+
path = File.join @bag_path, 'bag-info.txt'
|
107
|
+
@bag.write_bag_info 'Source-Organization' => 'Awesome Inc.'
|
108
|
+
@bag.write_bag_info 'Source-Organization' => 'Awesome LLC.'
|
109
|
+
contents = File.open(path).read
|
110
|
+
expect(contents).to include "Awesome LLC."
|
111
|
+
expect(contents).not_to include "Awesome Inc."
|
112
|
+
end
|
113
|
+
it "contains values passed to bag" do
|
114
|
+
hash = { "Bag-Software-Agent" => "rspec",
|
115
|
+
"Bagging-Date" => "2012-11-21",
|
116
|
+
"Contact-Name" => "Willis Corto",
|
117
|
+
"Some-Tag" => "Some Value" }
|
118
|
+
bag_with_info = described_class.new(@bag_path + '2', hash)
|
119
|
+
hash.each do |key, value|
|
120
|
+
expect(bag_with_info.bag_info[key]).to eq(value)
|
121
|
+
end
|
128
122
|
end
|
129
123
|
end
|
130
|
-
|
131
124
|
end
|
132
|
-
|
133
125
|
end
|