bagit 0.4.2 → 0.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +22 -0
- data/.travis.yml +1 -2
- data/Gemfile +1 -1
- data/README.md +1 -1
- data/Rakefile +8 -2
- data/bagit.gemspec +4 -3
- data/bin/bagit +30 -33
- data/lib/bagit.rb +1 -1
- data/lib/bagit/bag.rb +20 -27
- data/lib/bagit/fetch.rb +14 -20
- data/lib/bagit/file.rb +10 -15
- data/lib/bagit/info.rb +43 -57
- data/lib/bagit/manifest.rb +43 -48
- data/lib/bagit/string.rb +2 -4
- data/lib/bagit/valid.rb +67 -75
- data/lib/bagit/version.rb +1 -1
- data/spec/bagit_spec.rb +34 -30
- data/spec/fetch_spec.rb +29 -35
- data/spec/manifest_spec.rb +99 -108
- data/spec/spec_helper.rb +3 -5
- data/spec/tag_info_spec.rb +91 -99
- data/spec/tag_spec.rb +47 -50
- data/spec/util/bagit_matchers.rb +3 -14
- data/spec/validation_spec.rb +107 -110
- metadata +35 -20
data/spec/fetch_spec.rb
CHANGED
@@ -1,57 +1,51 @@
|
|
1
1
|
# coding: utf-8
|
2
2
|
require 'spec_helper'
|
3
3
|
|
4
|
-
describe
|
5
|
-
|
6
|
-
before(:each) do
|
7
|
-
|
4
|
+
describe BagIt::Bag do
|
5
|
+
before do
|
8
6
|
@sandbox = Sandbox.new
|
9
|
-
|
10
7
|
# make the bag
|
11
8
|
@bag_path = File.join @sandbox.to_s, 'the_bag'
|
12
|
-
@bag =
|
9
|
+
@bag = described_class.new(@bag_path)
|
13
10
|
|
14
11
|
# add some files
|
15
12
|
File.open('/dev/urandom') do |rio|
|
16
|
-
|
17
13
|
10.times do |n|
|
18
14
|
@bag.add_file("file-#{n}-💩
|
19
15
|
end
|
20
|
-
|
21
16
|
end
|
22
|
-
|
23
|
-
end
|
24
|
-
|
25
|
-
after(:each) do
|
26
|
-
@sandbox.cleanup!
|
27
17
|
end
|
18
|
+
describe "fetch.txt" do
|
19
|
+
after do
|
20
|
+
@sandbox.cleanup!
|
21
|
+
end
|
28
22
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
23
|
+
before do
|
24
|
+
@bag.add_remote_file('http://www.gnu.org/graphics/heckert_gnu.small.png', 'gnu.png', 6322,
|
25
|
+
'390c0a30976f899cbdf951eab5cce60fe9743ac9',
|
26
|
+
'a3bd7ab2442028bb91b51d9f6722ec98')
|
33
27
|
|
34
|
-
|
35
|
-
|
36
|
-
|
28
|
+
path = File.join @bag_path, 'fetch.txt'
|
29
|
+
@lines = File.open(path, &:readlines)
|
30
|
+
end
|
37
31
|
|
38
|
-
|
39
|
-
|
40
|
-
|
32
|
+
it "isn't empty" do
|
33
|
+
expect(@lines).not_to be_empty
|
34
|
+
end
|
41
35
|
|
42
|
-
|
43
|
-
|
44
|
-
|
36
|
+
it "only contains lines of the format URL LENGTH FILENAME" do
|
37
|
+
@lines.each { |line| expect(line.chomp).to match(/^[^\s]+\s+(\d+|\-)\s+[^\s]+$/) }
|
38
|
+
end
|
45
39
|
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
40
|
+
it "contains manifested files" do
|
41
|
+
path = File.join @bag_path, 'manifest-sha1.txt'
|
42
|
+
data = File.open(path, &:read)
|
43
|
+
expect(data).to include('gnu.png')
|
44
|
+
end
|
51
45
|
|
52
|
-
|
53
|
-
|
54
|
-
|
46
|
+
it "is gone when fetch is complete" do
|
47
|
+
@bag.fetch!
|
48
|
+
expect(File.exist?(File.join(@bag_path, 'fetch.txt'))).not_to be true
|
49
|
+
end
|
55
50
|
end
|
56
|
-
|
57
51
|
end
|
data/spec/manifest_spec.rb
CHANGED
@@ -1,148 +1,139 @@
|
|
1
1
|
# coding: utf-8
|
2
2
|
require 'spec_helper'
|
3
3
|
|
4
|
-
describe
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
@sandbox = Sandbox.new
|
9
|
-
|
10
|
-
# make the bag
|
11
|
-
@bag_path = File.join @sandbox.to_s, 'the_bag'
|
12
|
-
@bag = BagIt::Bag.new @bag_path
|
4
|
+
describe BagIt::Bag do
|
5
|
+
describe "BagIt Manifests" do
|
6
|
+
before do
|
7
|
+
@sandbox = Sandbox.new
|
13
8
|
|
14
|
-
|
15
|
-
|
9
|
+
# make the bag
|
10
|
+
@bag_path = File.join @sandbox.to_s, 'the_bag'
|
11
|
+
@bag = described_class.new @bag_path
|
16
12
|
|
17
|
-
|
18
|
-
|
19
|
-
|
13
|
+
# add some files
|
14
|
+
File.open('/dev/urandom') do |rio|
|
15
|
+
10.times do |n|
|
16
|
+
@bag.add_file("file-#{n}-💩
|
17
|
+
@bag.add_tag_file("tag-#{n}") { |io| io.write rio.read(16) }
|
18
|
+
end
|
20
19
|
end
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
20
|
end
|
26
21
|
|
27
|
-
|
28
|
-
|
29
|
-
after(:each) do
|
30
|
-
@sandbox.cleanup!
|
31
|
-
end
|
32
|
-
|
33
|
-
shared_examples_for "a manifest file" do
|
34
|
-
|
35
|
-
before do
|
36
|
-
pattern = File.join @bag_path, '*manifest-*.txt'
|
37
|
-
@manifest_files = Dir.glob pattern
|
22
|
+
after do
|
23
|
+
@sandbox.cleanup!
|
38
24
|
end
|
39
25
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
26
|
+
shared_examples_for "a manifest file" do
|
27
|
+
before do
|
28
|
+
pattern = File.join @bag_path, '*manifest-*.txt'
|
29
|
+
@manifest_files = Dir.glob pattern
|
30
|
+
end
|
44
31
|
|
45
|
-
|
46
|
-
|
47
|
-
|
32
|
+
it "has a valid algorithm in the name (at least md5 or sha1)" do
|
33
|
+
algorithms = @manifest_files.map { |mf| mf =~ /manifest-(.*).txt$/; Regexp.last_match(1) }
|
34
|
+
algorithms.each { |a| expect(a).to be_in('md5', 'sha1') }
|
35
|
+
end
|
48
36
|
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
37
|
+
it "is not an empty file" do
|
38
|
+
@manifest_files.each { |mf| expect(File.size(mf)).not_to eq(0) }
|
39
|
+
end
|
40
|
+
|
41
|
+
it "only contains lines of the format CHECKSUM FILENAME" do
|
42
|
+
@manifest_files.each do |file|
|
43
|
+
File.open(file) do |io|
|
44
|
+
io.each_line { |line| expect(line).to match(/^[a-fA-F0-9]+\s+[^\s].+$/) }
|
45
|
+
end
|
53
46
|
end
|
54
47
|
end
|
55
|
-
end
|
56
48
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
49
|
+
it "validates after adding a file and remanifesting" do
|
50
|
+
@bag.add_file('newfile.txt') { |io| io.puts("new file to remanifest") }
|
51
|
+
@bag.manifest!
|
52
|
+
expect(@bag).to be_valid
|
53
|
+
end
|
61
54
|
end
|
62
55
|
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
before do
|
68
|
-
@bag.manifest!
|
69
|
-
end
|
56
|
+
describe "bag manifest files" do
|
57
|
+
before do
|
58
|
+
@bag.manifest!
|
59
|
+
end
|
70
60
|
|
71
|
-
|
61
|
+
it_behaves_like "a manifest file"
|
72
62
|
|
73
|
-
|
74
|
-
|
75
|
-
|
63
|
+
it "has a manifest file" do
|
64
|
+
expect(@bag.manifest_files).not_to be_empty
|
65
|
+
end
|
76
66
|
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
67
|
+
it "only contains bag files" do
|
68
|
+
@bag.manifest_files.each do |mf|
|
69
|
+
File.open(mf) do |io|
|
70
|
+
io.each_line do |line|
|
71
|
+
expect(line.chomp).to match(/^[a-f0-9]+\s+data\/[^\s].+$/)
|
72
|
+
end
|
82
73
|
end
|
83
74
|
end
|
84
75
|
end
|
85
76
|
end
|
86
77
|
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
before do
|
92
|
-
@bag.add_tag_file("test-tag") { |f| f.puts "all alone" }
|
93
|
-
end
|
78
|
+
describe "tag manifest files" do
|
79
|
+
before do
|
80
|
+
@bag.add_tag_file("test-tag") { |f| f.puts "all alone" }
|
81
|
+
end
|
94
82
|
|
95
|
-
|
83
|
+
it_should_behave_like "a manifest file"
|
96
84
|
|
97
|
-
|
98
|
-
|
99
|
-
end
|
100
|
-
it "should only contain tag files" do
|
101
|
-
@bag.tagmanifest_files.each do |mf|
|
102
|
-
File.open(mf) do |io|
|
103
|
-
io.each_line do |line|
|
104
|
-
expect(line.chomp).to match(/^[a-fA-F0-9]+\s+(?!data\/)[^\s].+$/)
|
105
|
-
end
|
106
|
-
end
|
85
|
+
it "has a tag manifest file" do
|
86
|
+
expect(@bag.tagmanifest_files).not_to be_empty
|
107
87
|
end
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
88
|
+
it "only contains tag files" do
|
89
|
+
@bag.tagmanifest_files.each do |mf|
|
90
|
+
File.open(mf) do |io|
|
91
|
+
io.each_line do |line|
|
92
|
+
expect(line.chomp).to match(/^[a-fA-F0-9]+\s+(?!data\/)[^\s].+$/)
|
93
|
+
end
|
94
|
+
end
|
115
95
|
end
|
116
96
|
end
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
97
|
+
it "contains manifest and bag info files" do
|
98
|
+
@bag.tagmanifest_files.each do |mf|
|
99
|
+
expect(File.open(mf).read).to include(File.basename(@bag.bag_info_txt_file))
|
100
|
+
expect(File.open(mf).read).to include(File.basename(@bag.bagit_txt_file))
|
101
|
+
@bag.manifest_files.each do |man|
|
102
|
+
expect(File.open(mf).read).to include(man)
|
103
|
+
end
|
122
104
|
end
|
123
105
|
end
|
124
|
-
|
125
|
-
describe "removing tracked files" do
|
126
|
-
before(:each) do
|
127
|
-
@bag.remove_tag_file "tag-1"
|
128
|
-
@bag.delete_tag_file "tag-2"
|
129
|
-
end
|
130
|
-
it "should still have the untracked tag file on the file system" do
|
131
|
-
expect(File.join(@bag_path, "tag-1")).to exist_on_fs
|
132
|
-
end
|
133
|
-
it "should not have the deleted tag file on the file system" do
|
134
|
-
expect(File.join(@bag_path, "tag-2")).not_to exist_on_fs
|
135
|
-
end
|
136
|
-
it "should not have the removed or deleted tag files in the manifest" do
|
106
|
+
it "does not contain the untracked tag file" do
|
137
107
|
@bag.tagmanifest_files.each do |mf|
|
138
108
|
File.open(mf) do |io|
|
139
|
-
expect(io.read).not_to include "tag-
|
140
|
-
expect(io.read).not_to include "tag-2"
|
109
|
+
expect(io.read).not_to include "tag-notrack"
|
141
110
|
end
|
142
111
|
end
|
143
112
|
end
|
144
|
-
end
|
145
|
-
end
|
146
113
|
|
114
|
+
describe "removing tracked files" do
|
115
|
+
before do
|
116
|
+
@bag.remove_tag_file "tag-1"
|
117
|
+
@bag.delete_tag_file "tag-2"
|
118
|
+
end
|
119
|
+
|
120
|
+
it "still has the untracked tag file on the file system" do
|
121
|
+
expect(File.join(@bag_path, "tag-1")).to exist_on_fs
|
122
|
+
end
|
147
123
|
|
124
|
+
it "doesn't have the deleted tag file on the file system" do
|
125
|
+
expect(File.join(@bag_path, "tag-2")).not_to exist_on_fs
|
126
|
+
end
|
127
|
+
|
128
|
+
it "doesn't have the removed or deleted tag files in the manifest" do
|
129
|
+
@bag.tagmanifest_files.each do |mf|
|
130
|
+
File.open(mf) do |io|
|
131
|
+
expect(io.read).not_to include "tag-1"
|
132
|
+
expect(io.read).not_to include "tag-2"
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
148
139
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -12,26 +12,24 @@ RSpec.configure do |config|
|
|
12
12
|
config.include(BagitMatchers)
|
13
13
|
end
|
14
14
|
|
15
|
-
|
15
|
+
$LOAD_PATH.unshift File.expand_path('../lib', File.dirname(__FILE__))
|
16
16
|
require 'bagit'
|
17
17
|
|
18
18
|
require 'tempfile'
|
19
19
|
|
20
20
|
class Sandbox
|
21
|
-
|
22
21
|
def initialize
|
23
22
|
tf = Tempfile.open 'sandbox'
|
24
23
|
@path = tf.path
|
25
24
|
tf.close!
|
26
|
-
FileUtils
|
25
|
+
FileUtils.mkdir @path
|
27
26
|
end
|
28
27
|
|
29
28
|
def cleanup!
|
30
|
-
FileUtils
|
29
|
+
FileUtils.rm_rf @path
|
31
30
|
end
|
32
31
|
|
33
32
|
def to_s
|
34
33
|
@path
|
35
34
|
end
|
36
|
-
|
37
35
|
end
|
data/spec/tag_info_spec.rb
CHANGED
@@ -1,78 +1,72 @@
|
|
1
1
|
# coding: utf-8
|
2
2
|
require 'spec_helper'
|
3
3
|
|
4
|
-
describe
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
@sandbox = Sandbox.new
|
4
|
+
describe BagIt::Bag do
|
5
|
+
describe "Tag Info Files" do
|
6
|
+
before do
|
7
|
+
@sandbox = Sandbox.new
|
9
8
|
|
10
|
-
|
11
|
-
|
12
|
-
|
9
|
+
# make the bag
|
10
|
+
@bag_path = File.join @sandbox.to_s, 'the_bag'
|
11
|
+
@bag = described_class.new @bag_path
|
13
12
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
13
|
+
# add some files
|
14
|
+
File.open('/dev/urandom') do |rio|
|
15
|
+
10.times do |n|
|
16
|
+
@bag.add_file("file-#{n}-💩
|
17
|
+
end
|
18
18
|
end
|
19
19
|
end
|
20
20
|
|
21
|
-
|
22
|
-
|
23
|
-
after(:each) do
|
24
|
-
@sandbox.cleanup!
|
25
|
-
end
|
26
|
-
|
27
|
-
describe "bagit.txt" do
|
28
|
-
|
29
|
-
before do
|
30
|
-
path = File.join @bag_path, 'bagit.txt'
|
31
|
-
@lines = File.open(path) { |io| io.readlines }
|
32
|
-
end
|
33
|
-
|
34
|
-
it "should create a file bagit.txt on bag initialization" do
|
35
|
-
expect(File.join(@bag_path, 'bagit.txt')).to exist_on_fs
|
21
|
+
after do
|
22
|
+
@sandbox.cleanup!
|
36
23
|
end
|
37
24
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
a = @lines.select { |line| line.chomp =~ /BagIt-Version:\s*\d+\.\d+/ }
|
44
|
-
expect(a).not_to be_empty
|
45
|
-
end
|
25
|
+
describe "bagit.txt" do
|
26
|
+
before do
|
27
|
+
path = File.join @bag_path, 'bagit.txt'
|
28
|
+
@lines = File.open(path, &:readlines)
|
29
|
+
end
|
46
30
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
end
|
31
|
+
it "creates a file bagit.txt on bag initialization" do
|
32
|
+
expect(File.join(@bag_path, 'bagit.txt')).to exist_on_fs
|
33
|
+
end
|
51
34
|
|
52
|
-
|
35
|
+
it "has exactly two lines" do
|
36
|
+
expect(@lines.size).to eq(2)
|
37
|
+
end
|
53
38
|
|
54
|
-
|
39
|
+
it "has a bagit version" do
|
40
|
+
a = @lines.select { |line| line.chomp =~ /BagIt-Version:\s*\d+\.\d+/ }
|
41
|
+
expect(a).not_to be_empty
|
42
|
+
end
|
55
43
|
|
56
|
-
|
57
|
-
|
58
|
-
|
44
|
+
it "has a tag file encoding" do
|
45
|
+
a = @lines.select { |line| line.chomp =~ /Tag-File-Character-Encoding:\s*.+/ }
|
46
|
+
expect(a).not_to be_empty
|
47
|
+
end
|
59
48
|
end
|
60
49
|
|
61
|
-
|
62
|
-
|
63
|
-
|
50
|
+
describe "bag-info.txt" do
|
51
|
+
before do
|
52
|
+
path = File.join @bag_path, 'bag-info.txt'
|
53
|
+
@lines = File.open(path, &:readlines)
|
54
|
+
end
|
64
55
|
|
65
|
-
|
66
|
-
|
67
|
-
|
56
|
+
it "isn't empty" do
|
57
|
+
expect(@lines).not_to be_empty
|
58
|
+
end
|
68
59
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
end
|
60
|
+
it "contains lines of the format LABEL: VALUE (like an email header)" do
|
61
|
+
@lines.each { |line| expect(line.chomp).to match(/^[^\s]+\s*:\s+.*$/) }
|
62
|
+
end
|
73
63
|
|
74
|
-
|
75
|
-
|
64
|
+
it "is case insensitive with respect to LABELs" do
|
65
|
+
expect { @bag.write_bag_info 'foo' => 'lowercase', 'Foo' => 'capital' }.to raise_error(/Multiple labels/)
|
66
|
+
end
|
67
|
+
|
68
|
+
it "folds long VALUEs" do
|
69
|
+
longline = <<LOREM
|
76
70
|
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do
|
77
71
|
eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enimad
|
78
72
|
minim veniam, quis nostrud exercitation ullamco laboris nisi ut
|
@@ -81,53 +75,51 @@ Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do
|
|
81
75
|
pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
|
82
76
|
culpa qui officia deserunt mollit anim id est laborum.
|
83
77
|
LOREM
|
84
|
-
|
85
|
-
|
86
|
-
|
78
|
+
@bag.write_bag_info 'Lorem' => longline
|
79
|
+
expect(@bag.bag_info.keys.length).to eq(4) # this isn't a great test. Changed it from 1 to 4 because unrelated changes caused failure.
|
80
|
+
end
|
87
81
|
|
88
|
-
|
89
|
-
|
90
|
-
|
82
|
+
it "specifys a bag software agent" do
|
83
|
+
expect(@bag.bag_info.keys).to include("Bag-Software-Agent")
|
84
|
+
end
|
91
85
|
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
86
|
+
it "contains a valid bagging date" do
|
87
|
+
expect(@bag.bag_info.keys).to include("Bagging-Date")
|
88
|
+
@bag.bag_info["Bagging-Date"] =~ /^^[0-9]{4}-[0-9]{2}-[0-9]{2}$/
|
89
|
+
end
|
96
90
|
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
91
|
+
it "contains a payload oxum" do
|
92
|
+
expect(@bag.bag_info.keys).to include("Payload-Oxum")
|
93
|
+
end
|
94
|
+
it "does not override any previous values" do
|
95
|
+
path = File.join @bag_path, 'bag-info.txt'
|
96
|
+
@bag.write_bag_info 'Bag-Software-Agent' => 'Some Other Agent'
|
97
|
+
@bag.write_bag_info 'Source-Organization' => 'Awesome Inc.'
|
98
|
+
@bag.write_bag_info 'Bagging-Date' => '1901-01-01'
|
99
|
+
@bag.write_bag_info
|
100
|
+
contents = File.open(path).read
|
101
|
+
expect(contents).to include "Some Other Agent"
|
102
|
+
expect(contents).to include "Awesome Inc."
|
103
|
+
expect(contents).to include "1901-01-01"
|
104
|
+
end
|
105
|
+
it "overrides previous tags when they collide with new ones" do
|
106
|
+
path = File.join @bag_path, 'bag-info.txt'
|
107
|
+
@bag.write_bag_info 'Source-Organization' => 'Awesome Inc.'
|
108
|
+
@bag.write_bag_info 'Source-Organization' => 'Awesome LLC.'
|
109
|
+
contents = File.open(path).read
|
110
|
+
expect(contents).to include "Awesome LLC."
|
111
|
+
expect(contents).not_to include "Awesome Inc."
|
112
|
+
end
|
113
|
+
it "contains values passed to bag" do
|
114
|
+
hash = { "Bag-Software-Agent" => "rspec",
|
115
|
+
"Bagging-Date" => "2012-11-21",
|
116
|
+
"Contact-Name" => "Willis Corto",
|
117
|
+
"Some-Tag" => "Some Value" }
|
118
|
+
bag_with_info = described_class.new(@bag_path + '2', hash)
|
119
|
+
hash.each do |key, value|
|
120
|
+
expect(bag_with_info.bag_info[key]).to eq(value)
|
121
|
+
end
|
128
122
|
end
|
129
123
|
end
|
130
|
-
|
131
124
|
end
|
132
|
-
|
133
125
|
end
|