bagit 0.4.3 → 0.4.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.gitignore +2 -1
- data/Gemfile +3 -1
- data/Rakefile +10 -13
- data/bagit.gemspec +24 -23
- data/bin/bagit +22 -32
- data/lib/bagit.rb +8 -6
- data/lib/bagit/bag.rb +21 -19
- data/lib/bagit/fetch.rb +10 -8
- data/lib/bagit/file.rb +2 -0
- data/lib/bagit/info.rb +45 -43
- data/lib/bagit/manifest.rb +25 -23
- data/lib/bagit/string.rb +4 -2
- data/lib/bagit/valid.rb +45 -43
- data/lib/bagit/version.rb +3 -1
- data/spec/bagit_spec.rb +30 -29
- data/spec/fetch_spec.rb +13 -12
- data/spec/manifest_spec.rb +14 -9
- data/spec/spec_helper.rb +10 -8
- data/spec/tag_info_spec.rb +31 -30
- data/spec/tag_spec.rb +11 -10
- data/spec/util/bagit_matchers.rb +2 -0
- data/spec/validation_spec.rb +23 -22
- metadata +18 -20
- data/.rubocop.yml +0 -22
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 4b4fd9e3eb682521a82c7cd25ad0320088fba8fa907876a48c39320e2b245478
|
4
|
+
data.tar.gz: ec3971461e6304cc457e50323b369ce0d61a6b3df5f3ac445601c9f34d456f67
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 78dff8b4378080349c13e9794302792e981f36db37f67e206658c554ecf0e025ba3c4332a5cd9f53f48a53935c5cbce9fb4435f28badccd0dacdf556b9ef30e7
|
7
|
+
data.tar.gz: 98828d498d5fdb72551587d4a4b766a579470e911d0056debe8098f9a59cd89c04cd2e1e204f02332b3a0813e2b3967a2ac7f6bae91895463bb7144590e54fb4
|
data/.gitignore
CHANGED
data/Gemfile
CHANGED
data/Rakefile
CHANGED
@@ -1,22 +1,19 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "rubygems"
|
4
|
+
require "bundler/setup"
|
3
5
|
Bundler.setup(:default, :development, :test)
|
4
6
|
|
5
|
-
require
|
6
|
-
require
|
7
|
-
require
|
8
|
-
require
|
7
|
+
require "rake"
|
8
|
+
require "rdoc/task"
|
9
|
+
require "rspec/core/rake_task"
|
10
|
+
require "standard/rake"
|
9
11
|
|
10
12
|
Bundler::GemHelper.install_tasks
|
11
13
|
|
12
|
-
desc 'Run rubocop'
|
13
|
-
task :rubocop do
|
14
|
-
RuboCop::RakeTask.new
|
15
|
-
end
|
16
|
-
|
17
14
|
RSpec::Core::RakeTask.new do |t|
|
18
|
-
t.pattern =
|
15
|
+
t.pattern = "spec/**/*_spec.rb"
|
19
16
|
t.rspec_opts = %w[--format documentation --color]
|
20
17
|
end
|
21
18
|
|
22
|
-
task default: [:
|
19
|
+
task default: [:standard, :spec]
|
data/bagit.gemspec
CHANGED
@@ -1,33 +1,34 @@
|
|
1
|
-
#
|
2
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
lib = File.expand_path("../lib", __FILE__)
|
3
4
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
-
require
|
5
|
+
require "bagit/version"
|
5
6
|
|
6
7
|
Gem::Specification.new do |spec|
|
7
|
-
spec.name
|
8
|
-
spec.version
|
9
|
-
spec.summary
|
8
|
+
spec.name = "bagit"
|
9
|
+
spec.version = BagIt::VERSION
|
10
|
+
spec.summary = "BagIt package generation and validation"
|
10
11
|
spec.description = "Ruby Library and Command Line tools for bagit"
|
11
|
-
spec.email
|
12
|
-
spec.homepage
|
13
|
-
spec.authors
|
14
|
-
spec.license
|
12
|
+
spec.email = "jamie@jamielittle.org"
|
13
|
+
spec.homepage = "http://github.com/tipr/bagit"
|
14
|
+
spec.authors = ["Tom Johnson, Francesco Lazzarino, Jamie Little"]
|
15
|
+
spec.license = "MIT"
|
15
16
|
|
16
|
-
spec.required_ruby_version =
|
17
|
+
spec.required_ruby_version = "~> 2.0"
|
17
18
|
|
18
|
-
spec.add_dependency
|
19
|
-
spec.add_dependency
|
19
|
+
spec.add_dependency "validatable", "~> 1.6"
|
20
|
+
spec.add_dependency "docopt", "~> 0.5.0"
|
20
21
|
|
21
|
-
spec.add_development_dependency
|
22
|
-
spec.add_development_dependency
|
23
|
-
spec.add_development_dependency
|
24
|
-
spec.add_development_dependency
|
25
|
-
spec.add_development_dependency
|
26
|
-
spec.add_development_dependency
|
27
|
-
spec.add_development_dependency
|
22
|
+
spec.add_development_dependency "bundler"
|
23
|
+
spec.add_development_dependency "coveralls"
|
24
|
+
spec.add_development_dependency "pry"
|
25
|
+
spec.add_development_dependency "pry-byebug"
|
26
|
+
spec.add_development_dependency "rake", ">= 12.3.3"
|
27
|
+
spec.add_development_dependency "rspec", "~> 3"
|
28
|
+
spec.add_development_dependency "standard"
|
28
29
|
|
29
|
-
spec.files
|
30
|
-
spec.executables
|
31
|
-
spec.test_files
|
30
|
+
spec.files = `git ls-files`.split($INPUT_RECORD_SEPARATOR)
|
31
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
32
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
32
33
|
spec.require_paths = ["lib"]
|
33
34
|
end
|
data/bin/bagit
CHANGED
@@ -1,9 +1,9 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
4
|
require 'bagit'
|
4
5
|
require 'docopt'
|
5
6
|
require 'logger'
|
6
|
-
require 'pry'
|
7
7
|
logger = Logger.new(STDOUT)
|
8
8
|
|
9
9
|
doc = <<DOCOPT
|
@@ -84,43 +84,33 @@ begin
|
|
84
84
|
# TODO: implement delete for data and tag files; remove for tag files.
|
85
85
|
|
86
86
|
# handle add/delete bag data files
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
bag.add_file(File.basename(datafile), datafile)
|
93
|
-
elsif opts['delete']
|
94
|
-
bag.remove_file(File.basename(datafile))
|
95
|
-
end
|
96
|
-
rescue StandardError => e
|
97
|
-
logger.error("Failed operation on bag file: #{e.message}")
|
98
|
-
end
|
87
|
+
opts['-f']&.each do |datafile|
|
88
|
+
if opts['add'] || opts['new']
|
89
|
+
bag.add_file(File.basename(datafile), datafile)
|
90
|
+
elsif opts['delete']
|
91
|
+
bag.remove_file(File.basename(datafile))
|
99
92
|
end
|
93
|
+
rescue StandardError => e
|
94
|
+
logger.error("Failed operation on bag file: #{e.message}")
|
100
95
|
end
|
101
96
|
|
102
97
|
# handle adding tag files
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
# otherwise, add it
|
112
|
-
else
|
113
|
-
bag.add_tag_file(File.basename(tagfile), tagfile)
|
114
|
-
end
|
115
|
-
elsif opts['delete']
|
116
|
-
bag.delete_tag_file(File.basename(tagfile))
|
117
|
-
elsif opts['remove']
|
118
|
-
bag.remove_tag_file(File.basename(tagfile))
|
119
|
-
end
|
120
|
-
rescue StandardError => e
|
121
|
-
logger.error("Failed operation on tag file: #{e.message}".red)
|
98
|
+
opts['-t']&.each do |tagfile|
|
99
|
+
if opts['add'] || opts['new']
|
100
|
+
# if it does, try to manifest it
|
101
|
+
if File.exist?(File.join(bag.bag_dir, File.basename(tagfile)))
|
102
|
+
bag.add_tag_file(tagfile)
|
103
|
+
# otherwise, add it
|
104
|
+
else
|
105
|
+
bag.add_tag_file(File.basename(tagfile), tagfile)
|
122
106
|
end
|
107
|
+
elsif opts['delete']
|
108
|
+
bag.delete_tag_file(File.basename(tagfile))
|
109
|
+
elsif opts['remove']
|
110
|
+
bag.remove_tag_file(File.basename(tagfile))
|
123
111
|
end
|
112
|
+
rescue StandardError => e
|
113
|
+
logger.error("Failed operation on tag file: #{e.message}".red)
|
124
114
|
end
|
125
115
|
|
126
116
|
# if we haven't quit yet, we need to re-manifest
|
data/lib/bagit.rb
CHANGED
@@ -1,14 +1,16 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
# == About bagit.rb
|
2
4
|
# Author:: Francesco Lazzarino (mailto:flazzarino@gmail.com)
|
3
5
|
# Functionality conforms to the BagIt Spec v0.96:
|
4
6
|
# http://www.cdlib.org/inside/diglib/bagit/bagitspec.html
|
5
7
|
|
6
|
-
require
|
7
|
-
require
|
8
|
-
require
|
9
|
-
require
|
10
|
-
require
|
8
|
+
require "bagit/bag"
|
9
|
+
require "bagit/version"
|
10
|
+
require "fileutils"
|
11
|
+
require "date"
|
12
|
+
require "logger"
|
11
13
|
module BagIt
|
12
14
|
# The version of the BagIt specification the code is conforming to.
|
13
|
-
SPEC_VERSION =
|
15
|
+
SPEC_VERSION = "0.97"
|
14
16
|
end
|
data/lib/bagit/bag.rb
CHANGED
@@ -1,19 +1,21 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
require
|
4
|
-
require
|
5
|
-
require
|
6
|
-
require
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "bagit/fetch"
|
4
|
+
require "bagit/file"
|
5
|
+
require "bagit/info"
|
6
|
+
require "bagit/manifest"
|
7
|
+
require "bagit/string"
|
8
|
+
require "bagit/valid"
|
7
9
|
|
8
10
|
module BagIt
|
9
11
|
# Represents the state of a bag on a filesystem
|
10
12
|
class Bag
|
11
13
|
attr_reader :bag_dir
|
12
14
|
|
13
|
-
include Validity
|
14
|
-
include Info
|
15
|
-
include Manifest
|
16
|
-
include Fetch
|
15
|
+
include Validity # Validity functionality
|
16
|
+
include Info # bagit & bag info functionality
|
17
|
+
include Manifest # manifest related functionality
|
18
|
+
include Fetch # fetch related functionality
|
17
19
|
|
18
20
|
# Make a new Bag based at path
|
19
21
|
def initialize(path, info = {}, _create = false)
|
@@ -30,12 +32,12 @@ module BagIt
|
|
30
32
|
|
31
33
|
# Return the path to the data directory
|
32
34
|
def data_dir
|
33
|
-
File.join @bag_dir,
|
35
|
+
File.join @bag_dir, "data"
|
34
36
|
end
|
35
37
|
|
36
38
|
# Return the paths to each bag file relative to bag_dir
|
37
39
|
def bag_files
|
38
|
-
Dir[File.join(data_dir,
|
40
|
+
Dir[File.join(data_dir, "**", "*")].select { |f| File.file? f }
|
39
41
|
end
|
40
42
|
|
41
43
|
# Return the paths to each tag file relative to bag_dir
|
@@ -43,7 +45,7 @@ module BagIt
|
|
43
45
|
files = []
|
44
46
|
if tagmanifest_files != []
|
45
47
|
File.open(tagmanifest_files.first) do |f|
|
46
|
-
f.each_line { |line| files << File.join(@bag_dir, line.split(
|
48
|
+
f.each_line { |line| files << File.join(@bag_dir, line.split(" ")[1]) }
|
47
49
|
end
|
48
50
|
end
|
49
51
|
files
|
@@ -56,10 +58,10 @@ module BagIt
|
|
56
58
|
FileUtils.mkdir_p File.dirname(path)
|
57
59
|
|
58
60
|
f = if src_path.nil?
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
61
|
+
File.open(path, "w") { |io| yield io }
|
62
|
+
else
|
63
|
+
FileUtils.cp src_path, path
|
64
|
+
end
|
63
65
|
write_bag_info
|
64
66
|
f
|
65
67
|
end
|
@@ -86,7 +88,7 @@ module BagIt
|
|
86
88
|
|
87
89
|
# Get all bag file paths relative to the data dir
|
88
90
|
def paths
|
89
|
-
bag_files.collect { |f| f.sub(data_dir +
|
91
|
+
bag_files.collect { |f| f.sub(data_dir + "/", "") }
|
90
92
|
end
|
91
93
|
|
92
94
|
# Get the Oxum for the payload files
|
@@ -96,7 +98,7 @@ module BagIt
|
|
96
98
|
# TODO: filesystem quirks? Are we getting the stream size or the size on disk?
|
97
99
|
bytes += File.size(f)
|
98
100
|
end
|
99
|
-
bytes.to_s +
|
101
|
+
bytes.to_s + "." + bag_files.count.to_s
|
100
102
|
end
|
101
103
|
|
102
104
|
# Remove all empty directory trees from the bag
|
data/lib/bagit/fetch.rb
CHANGED
@@ -1,25 +1,27 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "open-uri"
|
2
4
|
|
3
5
|
module BagIt
|
4
6
|
module Fetch
|
5
7
|
def fetch_txt_file
|
6
|
-
File.join @bag_dir,
|
8
|
+
File.join @bag_dir, "fetch.txt"
|
7
9
|
end
|
8
10
|
|
9
11
|
def add_remote_file(url, path, size, sha1, md5)
|
10
|
-
open(fetch_txt_file,
|
11
|
-
open(manifest_file(
|
12
|
-
open(manifest_file(
|
12
|
+
File.open(fetch_txt_file, "a") { |io| io.puts "#{url} #{size || "-"} #{path}" }
|
13
|
+
File.open(manifest_file("sha1"), "a") { |io| io.puts "#{sha1} #{File.join "data", path}" }
|
14
|
+
File.open(manifest_file("md5"), "a") { |io| io.puts "#{md5} #{File.join "data", path}" }
|
13
15
|
end
|
14
16
|
|
15
17
|
# feth all remote files
|
16
18
|
def fetch!
|
17
|
-
open(fetch_txt_file) do |io|
|
19
|
+
File.open(fetch_txt_file) do |io|
|
18
20
|
io.readlines.each do |line|
|
19
21
|
(url, _length, path) = line.chomp.split(/\s+/, 3)
|
20
22
|
|
21
23
|
add_file(path) do |file_io|
|
22
|
-
file_io.write open(url)
|
24
|
+
file_io.write URI.open(url)
|
23
25
|
end
|
24
26
|
end
|
25
27
|
end
|
@@ -29,7 +31,7 @@ module BagIt
|
|
29
31
|
end
|
30
32
|
|
31
33
|
def rename_old_fetch_txt(fetch_txt_file)
|
32
|
-
Dir["#{fetch_txt_file}.?*"].sort.
|
34
|
+
Dir["#{fetch_txt_file}.?*"].sort.reverse_each do |f|
|
33
35
|
if f =~ /fetch.txt.(\d+)$/
|
34
36
|
new_f = File.join File.dirname(f), "fetch.txt.#{Regexp.last_match(1).to_i + 1}"
|
35
37
|
FileUtils.mv f, new_f
|
data/lib/bagit/file.rb
CHANGED
data/lib/bagit/info.rb
CHANGED
@@ -1,27 +1,29 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "set"
|
2
4
|
|
3
5
|
module BagIt
|
4
6
|
module Info
|
5
7
|
@@bag_info_headers = {
|
6
|
-
agent:
|
7
|
-
org:
|
8
|
-
org_addr:
|
9
|
-
contact_name:
|
10
|
-
contact_phone:
|
11
|
-
contact_email:
|
12
|
-
ext_desc:
|
13
|
-
ext_id:
|
14
|
-
size:
|
15
|
-
group_id:
|
16
|
-
group_count:
|
17
|
-
sender_id:
|
18
|
-
int_desc:
|
19
|
-
date:
|
20
|
-
oxum:
|
8
|
+
agent: "Bag-Software-Agent",
|
9
|
+
org: "Source-Organization",
|
10
|
+
org_addr: "Organization-Address",
|
11
|
+
contact_name: "Contact-Name",
|
12
|
+
contact_phone: "Contact-Phone",
|
13
|
+
contact_email: "Contact-Email",
|
14
|
+
ext_desc: "External-Description",
|
15
|
+
ext_id: "External-Identifier",
|
16
|
+
size: "Bag-Size",
|
17
|
+
group_id: "Bag-Group-Identifier",
|
18
|
+
group_count: "Bag-Count",
|
19
|
+
sender_id: "Internal-Sender-Identifier",
|
20
|
+
int_desc: "Internal-Sender-Description",
|
21
|
+
date: "Bagging-Date",
|
22
|
+
oxum: "Payload-Oxum"
|
21
23
|
}
|
22
24
|
|
23
25
|
def bag_info_txt_file
|
24
|
-
File.join bag_dir,
|
26
|
+
File.join bag_dir, "bag-info.txt"
|
25
27
|
end
|
26
28
|
|
27
29
|
def bag_info
|
@@ -33,13 +35,13 @@ module BagIt
|
|
33
35
|
def write_bag_info(hash = {})
|
34
36
|
hash = bag_info.merge(hash)
|
35
37
|
hash[@@bag_info_headers[:agent]] = "BagIt Ruby Gem (https://github.com/tipr/bagit)" if hash[@@bag_info_headers[:agent]].nil?
|
36
|
-
hash[@@bag_info_headers[:date]] = Date.today.strftime(
|
38
|
+
hash[@@bag_info_headers[:date]] = Date.today.strftime("%Y-%m-%d") if hash[@@bag_info_headers[:date]].nil?
|
37
39
|
hash[@@bag_info_headers[:oxum]] = payload_oxum
|
38
40
|
write_info_file bag_info_txt_file, hash
|
39
41
|
end
|
40
42
|
|
41
43
|
def bagit_txt_file
|
42
|
-
File.join bag_dir,
|
44
|
+
File.join bag_dir, "bagit.txt"
|
43
45
|
end
|
44
46
|
|
45
47
|
def bagit
|
@@ -51,44 +53,44 @@ module BagIt
|
|
51
53
|
end
|
52
54
|
|
53
55
|
def update_bag_date
|
54
|
-
hash["Bagging-Date"] = Date.today.strftime(
|
56
|
+
hash["Bagging-Date"] = Date.today.strftime("%Y-%m-%d")
|
55
57
|
write_bag_info(hash)
|
56
58
|
end
|
57
59
|
|
58
60
|
protected
|
59
61
|
|
60
|
-
|
61
|
-
|
62
|
-
|
62
|
+
def read_info_file(file)
|
63
|
+
File.open(file) do |io|
|
64
|
+
entries = io.read.split(/\n(?=[^\s])/)
|
63
65
|
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
end
|
66
|
+
entries.inject({}) do |hash, line|
|
67
|
+
name, value = line.chomp.split(/\s*:\s*/, 2)
|
68
|
+
hash.merge(name => value)
|
68
69
|
end
|
69
70
|
end
|
71
|
+
end
|
70
72
|
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
raise "Multiple labels (#{dups.to_a.join ', '}) in #{file}" unless dups.empty?
|
73
|
+
def write_info_file(file, hash)
|
74
|
+
dups = hash.keys.inject(Set.new) { |acc, key|
|
75
|
+
a = hash.keys.grep(/#{key}/i)
|
76
|
+
acc + (a.size > 1 ? a : [])
|
77
|
+
}
|
78
78
|
|
79
|
-
|
80
|
-
hash.each do |name, value|
|
81
|
-
simple_entry = "#{name}: #{value.gsub(/\s+/, ' ')}"
|
79
|
+
raise "Multiple labels (#{dups.to_a.join ", "}) in #{file}" unless dups.empty?
|
82
80
|
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
simple_entry
|
87
|
-
end
|
81
|
+
File.open(file, "w") do |io|
|
82
|
+
hash.each do |name, value|
|
83
|
+
simple_entry = "#{name}: #{value.gsub(/\s+/, " ")}"
|
88
84
|
|
89
|
-
|
85
|
+
entry = if simple_entry.length > 79
|
86
|
+
simple_entry.wrap(77).indent(2)
|
87
|
+
else
|
88
|
+
simple_entry
|
90
89
|
end
|
90
|
+
|
91
|
+
io.puts entry
|
91
92
|
end
|
92
93
|
end
|
94
|
+
end
|
93
95
|
end
|
94
96
|
end
|