moab-versioning 4.1.0 → 4.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/moab.rb +2 -0
- data/lib/moab/bagger.rb +53 -61
- data/lib/moab/deposit_bag_validator.rb +325 -0
- data/lib/moab/file_group.rb +9 -10
- data/lib/moab/file_group_difference.rb +21 -23
- data/lib/moab/file_inventory.rb +20 -22
- data/lib/moab/file_inventory_difference.rb +2 -2
- data/lib/moab/file_manifestation.rb +1 -1
- data/lib/moab/storage_object_validator.rb +1 -1
- metadata +3 -59
- data/lib/tasks/yard.rake +0 -34
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5c13dd09bef589a3c4a3314737c2d3b6dfb4589f
|
4
|
+
data.tar.gz: 629913d22d1ba0b40f62671aabc7cca01767c276
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fe0076d5a2e868613c4e5f081abdee5fd93e76effccfdf4219d7849d8f9944cc11ff728b407ffac22a0d02b9a71c30a75e2077c3f11698bb815e336b951f33b4
|
7
|
+
data.tar.gz: 6c3e42f3cf596cd16e2d62858da725e93fcf37ab835bfcc4f9a3951eb4bbd569845210f9dfa40f39048bab235162fe2d169494a5469a31229924ad29dbea01db
|
data/lib/moab.rb
CHANGED
@@ -29,6 +29,7 @@
|
|
29
29
|
# @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
|
30
30
|
# All rights reserved. See {file:LICENSE.rdoc} for details.
|
31
31
|
module Moab
|
32
|
+
DEFAULT_CHECKSUM_TYPES = [:md5, :sha1, :sha256].freeze
|
32
33
|
end
|
33
34
|
|
34
35
|
require 'serializer'
|
@@ -57,3 +58,4 @@ require 'moab/storage_services'
|
|
57
58
|
require 'moab/exceptions'
|
58
59
|
require 'moab/verification_result'
|
59
60
|
require 'moab/storage_object_validator'
|
61
|
+
require 'moab/deposit_bag_validator'
|
data/lib/moab/bagger.rb
CHANGED
@@ -1,6 +1,3 @@
|
|
1
|
-
require 'moab'
|
2
|
-
require 'systemu'
|
3
|
-
|
4
1
|
module Moab
|
5
2
|
|
6
3
|
# A class used to create a BagIt package from a version inventory and a set of source files.
|
@@ -60,8 +57,8 @@ module Moab
|
|
60
57
|
# @api internal
|
61
58
|
# @return [void] Generate the bagit.txt tag file
|
62
59
|
def create_bagit_txt()
|
63
|
-
|
64
|
-
|
60
|
+
bag_pathname.mkpath
|
61
|
+
bag_pathname.join("bagit.txt").open('w') do |f|
|
65
62
|
f.puts "Tag-File-Character-Encoding: UTF-8"
|
66
63
|
f.puts "BagIt-Version: 0.97"
|
67
64
|
end
|
@@ -70,12 +67,12 @@ module Moab
|
|
70
67
|
# @return [NilClass] Delete the bagit files
|
71
68
|
def delete_bag()
|
72
69
|
# make sure this looks like a bag before deleting
|
73
|
-
if
|
74
|
-
if
|
75
|
-
|
70
|
+
if bag_pathname.join('bagit.txt').exist?
|
71
|
+
if bag_pathname.join('data').exist?
|
72
|
+
bag_pathname.rmtree
|
76
73
|
else
|
77
|
-
|
78
|
-
|
74
|
+
bag_pathname.children.each {|file| file.delete}
|
75
|
+
bag_pathname.rmdir
|
79
76
|
end
|
80
77
|
end
|
81
78
|
nil
|
@@ -83,8 +80,8 @@ module Moab
|
|
83
80
|
|
84
81
|
# @param tar_pathname [Pathname] The location of the tar file (default is based on bag location)
|
85
82
|
def delete_tarfile()
|
86
|
-
bag_name =
|
87
|
-
bag_parent =
|
83
|
+
bag_name = bag_pathname.basename
|
84
|
+
bag_parent = bag_pathname.parent
|
88
85
|
tar_pathname = bag_parent.join("#{bag_name}.tar")
|
89
86
|
tar_pathname.delete if tar_pathname.exist?
|
90
87
|
end
|
@@ -109,17 +106,17 @@ module Moab
|
|
109
106
|
# @return [FileInventory] Create, write, and return the inventory of the files that will become the payload
|
110
107
|
def create_bag_inventory(package_mode)
|
111
108
|
@package_mode = package_mode
|
112
|
-
|
109
|
+
bag_pathname.mkpath
|
113
110
|
case package_mode
|
114
111
|
when :depositor
|
115
|
-
|
116
|
-
@bag_inventory =
|
117
|
-
|
112
|
+
version_inventory.write_xml_file(bag_pathname, 'version')
|
113
|
+
@bag_inventory = signature_catalog.version_additions(version_inventory)
|
114
|
+
bag_inventory.write_xml_file(bag_pathname, 'additions')
|
118
115
|
when :reconstructor
|
119
|
-
@bag_inventory =
|
120
|
-
|
116
|
+
@bag_inventory = version_inventory
|
117
|
+
bag_inventory.write_xml_file(bag_pathname, 'version')
|
121
118
|
end
|
122
|
-
|
119
|
+
bag_inventory
|
123
120
|
end
|
124
121
|
|
125
122
|
# @api internal
|
@@ -128,9 +125,9 @@ module Moab
|
|
128
125
|
# This method uses Unix hard links in order to greatly speed up the process.
|
129
126
|
# Hard links, however, require that the target bag must be created within the same filesystem as the source files
|
130
127
|
def fill_payload(source_base_pathname)
|
131
|
-
|
128
|
+
bag_inventory.groups.each do |group|
|
132
129
|
group_id = group.group_id
|
133
|
-
case
|
130
|
+
case package_mode
|
134
131
|
when :depositor
|
135
132
|
deposit_group(group_id, source_base_pathname.join(group_id))
|
136
133
|
when :reconstructor
|
@@ -144,9 +141,9 @@ module Moab
|
|
144
141
|
# @return [Boolean] Copy all the files listed in the group inventory to the bag.
|
145
142
|
# Return true if successful or nil if the group was not found in the inventory
|
146
143
|
def deposit_group(group_id, source_dir)
|
147
|
-
group =
|
144
|
+
group = bag_inventory.group(group_id)
|
148
145
|
return nil? if group.nil? or group.files.empty?
|
149
|
-
target_dir =
|
146
|
+
target_dir = bag_pathname.join('data',group_id)
|
150
147
|
group.path_list.each do |relative_path|
|
151
148
|
source = source_dir.join(relative_path)
|
152
149
|
target = target_dir.join(relative_path)
|
@@ -161,11 +158,11 @@ module Moab
|
|
161
158
|
# @return [Boolean] Copy all the files listed in the group inventory to the bag.
|
162
159
|
# Return true if successful or nil if the group was not found in the inventory
|
163
160
|
def reconstuct_group(group_id, storage_object_dir)
|
164
|
-
group =
|
161
|
+
group = bag_inventory.group(group_id)
|
165
162
|
return nil? if group.nil? or group.files.empty?
|
166
|
-
target_dir =
|
163
|
+
target_dir = bag_pathname.join('data',group_id)
|
167
164
|
group.files.each do |file|
|
168
|
-
catalog_entry =
|
165
|
+
catalog_entry = signature_catalog.signature_hash[file.signature]
|
169
166
|
source = storage_object_dir.join(catalog_entry.storage_path)
|
170
167
|
file.instances.each do |instance|
|
171
168
|
target = target_dir.join(instance.path)
|
@@ -190,24 +187,23 @@ module Moab
|
|
190
187
|
def create_payload_manifests
|
191
188
|
manifest_pathname = Hash.new
|
192
189
|
manifest_file = Hash.new
|
193
|
-
|
194
|
-
|
195
|
-
manifest_pathname[type] = @bag_pathname.join("manifest-#{type}.txt")
|
190
|
+
DEFAULT_CHECKSUM_TYPES.each do |type|
|
191
|
+
manifest_pathname[type] = bag_pathname.join("manifest-#{type}.txt")
|
196
192
|
manifest_file[type] = manifest_pathname[type].open('w')
|
197
193
|
end
|
198
|
-
|
194
|
+
bag_inventory.groups.each do |group|
|
199
195
|
group.files.each do |file|
|
200
196
|
fixity = file.signature.fixity
|
201
197
|
file.instances.each do |instance|
|
202
198
|
data_path = File.join('data', group.group_id, instance.path)
|
203
|
-
|
199
|
+
DEFAULT_CHECKSUM_TYPES.each do |type|
|
204
200
|
manifest_file[type].puts("#{fixity[type]} #{data_path}") if fixity[type]
|
205
201
|
end
|
206
202
|
end
|
207
203
|
end
|
208
204
|
end
|
209
205
|
ensure
|
210
|
-
|
206
|
+
DEFAULT_CHECKSUM_TYPES.each do |type|
|
211
207
|
if manifest_file[type]
|
212
208
|
manifest_file[type].close
|
213
209
|
manifest_pathname[type].delete if
|
@@ -219,10 +215,10 @@ module Moab
|
|
219
215
|
# @api internal
|
220
216
|
# @return [void] Generate the bag-info.txt tag file
|
221
217
|
def create_bag_info_txt
|
222
|
-
|
223
|
-
f.puts "External-Identifier: #{
|
224
|
-
f.puts "Payload-Oxum: #{
|
225
|
-
f.puts "Bag-Size: #{
|
218
|
+
bag_pathname.join("bag-info.txt").open('w') do |f|
|
219
|
+
f.puts "External-Identifier: #{bag_inventory.package_id}"
|
220
|
+
f.puts "Payload-Oxum: #{bag_inventory.byte_count}.#{bag_inventory.file_count}"
|
221
|
+
f.puts "Bag-Size: #{bag_inventory.human_size}"
|
226
222
|
end
|
227
223
|
end
|
228
224
|
|
@@ -231,22 +227,21 @@ module Moab
|
|
231
227
|
def create_tagfile_manifests()
|
232
228
|
manifest_pathname = Hash.new
|
233
229
|
manifest_file = Hash.new
|
234
|
-
|
235
|
-
|
236
|
-
manifest_pathname[type] = @bag_pathname.join("tagmanifest-#{type}.txt")
|
230
|
+
DEFAULT_CHECKSUM_TYPES.each do |type|
|
231
|
+
manifest_pathname[type] = bag_pathname.join("tagmanifest-#{type}.txt")
|
237
232
|
manifest_file[type] = manifest_pathname[type].open('w')
|
238
233
|
end
|
239
|
-
|
234
|
+
bag_pathname.children.each do |file|
|
240
235
|
unless file.directory? || file.basename.to_s[0, 11] == 'tagmanifest'
|
241
236
|
signature = FileSignature.new.signature_from_file(file)
|
242
237
|
fixity = signature.fixity
|
243
|
-
|
238
|
+
DEFAULT_CHECKSUM_TYPES.each do |type|
|
244
239
|
manifest_file[type].puts("#{fixity[type]} #{file.basename}") if fixity[type]
|
245
240
|
end
|
246
241
|
end
|
247
242
|
end
|
248
243
|
ensure
|
249
|
-
|
244
|
+
DEFAULT_CHECKSUM_TYPES.each do |type|
|
250
245
|
if manifest_file[type]
|
251
246
|
manifest_file[type].close
|
252
247
|
manifest_pathname[type].delete if
|
@@ -257,8 +252,8 @@ module Moab
|
|
257
252
|
|
258
253
|
# @return [Boolean] Create a tar file containing the bag
|
259
254
|
def create_tarfile(tar_pathname=nil)
|
260
|
-
bag_name =
|
261
|
-
bag_parent =
|
255
|
+
bag_name = bag_pathname.basename
|
256
|
+
bag_parent = bag_pathname.parent
|
262
257
|
tar_pathname ||= bag_parent.join("#{bag_name}.tar")
|
263
258
|
tar_cmd="cd '#{bag_parent}'; tar --dereference --force-local -cf '#{tar_pathname}' '#{bag_name}'"
|
264
259
|
begin
|
@@ -268,27 +263,24 @@ module Moab
|
|
268
263
|
end
|
269
264
|
raise "Unable to create tarfile #{tar_pathname}" unless tar_pathname.exist?
|
270
265
|
return true
|
271
|
-
|
272
266
|
end
|
273
267
|
|
274
|
-
# Executes a system command in a subprocess
|
275
|
-
#
|
276
|
-
#
|
277
|
-
# The exception's message will contain the explaination of the failure.
|
278
|
-
# @param [String] command the command to be executed
|
279
|
-
# @return [String] stdout from the command if execution was successful
|
268
|
+
# Executes a system command in a subprocess
|
269
|
+
# if command isn't successful, grabs stdout and stderr and puts them in ruby exception message
|
270
|
+
# @return stdout if execution was successful
|
280
271
|
def shell_execute(command)
|
281
|
-
|
282
|
-
|
283
|
-
|
272
|
+
require 'open3'
|
273
|
+
stdout, stderr, status = Open3.capture3(command.chomp)
|
274
|
+
if status.success? && status.exitstatus.zero?
|
275
|
+
stdout
|
276
|
+
else
|
277
|
+
msg = "Shell command failed: [#{command}] caused by <STDERR = #{stderr}>"
|
278
|
+
msg << " STDOUT = #{stdout}" if stdout && stdout.length.positive?
|
279
|
+
raise(StandardError, msg)
|
284
280
|
end
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
msg << " STDOUT = #{stdout.split($/).join('; ')}" if (stdout && (stdout.length > 0))
|
289
|
-
raise msg
|
281
|
+
rescue SystemCallError => e
|
282
|
+
msg = "Shell command failed: [#{command}] caused by #{e.inspect}"
|
283
|
+
raise(StandardError, msg)
|
290
284
|
end
|
291
|
-
|
292
285
|
end
|
293
|
-
|
294
286
|
end
|
@@ -0,0 +1,325 @@
|
|
1
|
+
module Moab
|
2
|
+
|
3
|
+
# Given a deposit bag, ensures the contents valid for becoming a StorageObjectVersion
|
4
|
+
# this is a Shameless Green implementation, combining code from:
|
5
|
+
# - sdr-preservation-core/lib/sdr_ingest/validate_bag <-- old preservation robots
|
6
|
+
# - archive-utils/lib/bagit_bag <-- gem only used by sdr-preservation-robots
|
7
|
+
# - archive-utils/lib/file_fixity
|
8
|
+
# - archive-utils/lib/fixity
|
9
|
+
# this code adds duplication to this gem (see github issue #119);
|
10
|
+
# for example, computing checksums is done
|
11
|
+
# - deposit_bag_validator
|
12
|
+
# - file_signature
|
13
|
+
class DepositBagValidator
|
14
|
+
|
15
|
+
BAG_DIR_NOT_FOUND = :bag_dir_not_found
|
16
|
+
CHECKSUM_MISMATCH = :checksum_mismatch
|
17
|
+
CHECKSUM_TYPE_UNRECOGNIZED = :checksum_type_unrecognized
|
18
|
+
INVALID_VERSION_XXX_XML = :invalid_versionXxx_xml
|
19
|
+
PAYLOAD_SIZE_MISMATCH = :payload_size_mismatch
|
20
|
+
REQUIRED_FILE_NOT_FOUND = :required_file_not_found
|
21
|
+
VERSION_MISMATCH_TO_MOAB = :version_mismatch_to_moab
|
22
|
+
VERSION_MISSING_FROM_FILE = :version_missing_from_file
|
23
|
+
|
24
|
+
ERROR_CODE_TO_MESSAGES = {
|
25
|
+
BAG_DIR_NOT_FOUND => "Deposit bag directory %{bag_dir} does not exist",
|
26
|
+
CHECKSUM_MISMATCH => "Failed %{manifest_type} verification. Differences: \n%{diffs}",
|
27
|
+
CHECKSUM_TYPE_UNRECOGNIZED => "Checksum type unrecognized: %{checksum_type}; file: %{filename}",
|
28
|
+
INVALID_VERSION_XXX_XML => "Unable to parse %{file_pathname}: %{err_info}",
|
29
|
+
PAYLOAD_SIZE_MISMATCH => "Failed payload size verification. Expected: %{bag_info_sizes}; found: %{generated_sizes}",
|
30
|
+
REQUIRED_FILE_NOT_FOUND => "Deposit bag required file %{file_pathname} not found",
|
31
|
+
VERSION_MISMATCH_TO_MOAB => "Version mismatch in %{file_pathname}: Moab expected %{new_version}; found %{file_version}",
|
32
|
+
VERSION_MISSING_FROM_FILE => "Version xml file %{version_file} missing data at %{xpath} containing version id"
|
33
|
+
}.freeze
|
34
|
+
|
35
|
+
REQUIRED_MANIFEST_CHECKSUM_TYPE = 'sha256'.freeze
|
36
|
+
RECOGNIZED_CHECKSUM_ALGORITHMS = [:md5, :sha1, :sha256, :sha384, :sha512].freeze
|
37
|
+
|
38
|
+
TAGMANIFEST = 'tagmanifest'.freeze
|
39
|
+
MANIFEST = 'manifest'.freeze
|
40
|
+
DATA_DIR_BASENAME = 'data'.freeze
|
41
|
+
BAG_INFO_TXT_BASENAME = 'bag-info.txt'.freeze
|
42
|
+
VERSION_ADDITIONS_BASENAME = 'versionAdditions.xml'.freeze
|
43
|
+
VERSION_INVENTORY_BASENAME = 'versionInventory.xml'.freeze
|
44
|
+
VERSION_METADATA_PATH = "#{DATA_DIR_BASENAME}/metadata/versionMetadata.xml".freeze
|
45
|
+
|
46
|
+
REQUIRED_BAG_FILES = [
|
47
|
+
DATA_DIR_BASENAME,
|
48
|
+
'bagit.txt'.freeze,
|
49
|
+
BAG_INFO_TXT_BASENAME,
|
50
|
+
"#{MANIFEST}-#{REQUIRED_MANIFEST_CHECKSUM_TYPE}.txt".freeze,
|
51
|
+
"#{TAGMANIFEST}-#{REQUIRED_MANIFEST_CHECKSUM_TYPE}.txt".freeze,
|
52
|
+
VERSION_ADDITIONS_BASENAME,
|
53
|
+
VERSION_INVENTORY_BASENAME,
|
54
|
+
VERSION_METADATA_PATH
|
55
|
+
].freeze
|
56
|
+
|
57
|
+
attr_reader :deposit_bag_pathname, :expected_new_version, :result_array
|
58
|
+
|
59
|
+
def initialize(storage_object)
|
60
|
+
@deposit_bag_pathname = storage_object.deposit_bag_pathname
|
61
|
+
@expected_new_version = storage_object.current_version_id + 1
|
62
|
+
@result_array = []
|
63
|
+
end
|
64
|
+
|
65
|
+
# returns Array of tiny error hashes, allowing multiple occurrences of a single error code
|
66
|
+
def validation_errors
|
67
|
+
return [single_error_hash(BAG_DIR_NOT_FOUND, bag_dir: deposit_bag_pathname)] unless deposit_bag_pathname.exist?
|
68
|
+
return result_array unless required_bag_files_exist?
|
69
|
+
verify_version
|
70
|
+
verify_tagmanifests
|
71
|
+
verify_payload_size
|
72
|
+
verify_payload_manifests
|
73
|
+
result_array # attr that accumulates any errors encountered along the way
|
74
|
+
end
|
75
|
+
|
76
|
+
private
|
77
|
+
|
78
|
+
def bag_dir_exists?
|
79
|
+
deposit_bag_pathname.exist?
|
80
|
+
end
|
81
|
+
|
82
|
+
# assumes this is called when result_array is empty, as subsequent checks will use these required files
|
83
|
+
def required_bag_files_exist?
|
84
|
+
REQUIRED_BAG_FILES.each do |filename|
|
85
|
+
pathname = deposit_bag_pathname.join(filename)
|
86
|
+
result_array << single_error_hash(REQUIRED_FILE_NOT_FOUND, file_pathname: pathname) unless pathname.exist?
|
87
|
+
end
|
88
|
+
result_array.empty? ? true : false
|
89
|
+
end
|
90
|
+
|
91
|
+
def verify_version
|
92
|
+
version_md_pathname = deposit_bag_pathname.join(VERSION_METADATA_PATH)
|
93
|
+
version_from_file = last_version_id_from_version_md_xml(version_md_pathname)
|
94
|
+
verify_version_from_xml_file(version_md_pathname, version_from_file) if version_from_file
|
95
|
+
|
96
|
+
version_additions_pathname = deposit_bag_pathname.join(VERSION_ADDITIONS_BASENAME)
|
97
|
+
version_from_file = version_id_from_version_manifest_xml(version_additions_pathname)
|
98
|
+
verify_version_from_xml_file(version_additions_pathname, version_from_file) if version_from_file
|
99
|
+
|
100
|
+
version_inventory_pathname = deposit_bag_pathname.join(VERSION_INVENTORY_BASENAME)
|
101
|
+
version_from_file = version_id_from_version_manifest_xml(version_inventory_pathname)
|
102
|
+
verify_version_from_xml_file(version_inventory_pathname, version_from_file) if version_from_file
|
103
|
+
end
|
104
|
+
|
105
|
+
def last_version_id_from_version_md_xml(version_md_pathname)
|
106
|
+
last_version_id_from_xml(version_md_pathname, '/versionMetadata/version/@versionId')
|
107
|
+
end
|
108
|
+
|
109
|
+
def version_id_from_version_manifest_xml(version_manifest_xml_pathname)
|
110
|
+
last_version_id_from_xml(version_manifest_xml_pathname, '/fileInventory/@versionId')
|
111
|
+
end
|
112
|
+
|
113
|
+
def last_version_id_from_xml(pathname, xpath)
|
114
|
+
doc = Nokogiri::XML(File.open(pathname.to_s), &:strict)
|
115
|
+
version_id = doc.xpath(xpath).last.text unless doc.xpath(xpath).empty?
|
116
|
+
return version_id.to_i if version_id
|
117
|
+
err_data = {
|
118
|
+
version_file: pathname,
|
119
|
+
xpath: xpath
|
120
|
+
}
|
121
|
+
result_array << single_error_hash(VERSION_MISSING_FROM_FILE, err_data) unless version_id
|
122
|
+
nil
|
123
|
+
rescue StandardError => e
|
124
|
+
err_data = {
|
125
|
+
file_pathname: pathname,
|
126
|
+
err_info: "#{e}\n#{e.backtrace}"
|
127
|
+
}
|
128
|
+
result_array << single_error_hash(INVALID_VERSION_XXX_XML, err_data)
|
129
|
+
nil
|
130
|
+
end
|
131
|
+
|
132
|
+
def verify_version_from_xml_file(file_pathname, found)
|
133
|
+
return if found == expected_new_version
|
134
|
+
err_data = {
|
135
|
+
file_pathname: file_pathname,
|
136
|
+
new_version: expected_new_version,
|
137
|
+
file_version: found
|
138
|
+
}
|
139
|
+
result_array << single_error_hash(VERSION_MISMATCH_TO_MOAB, err_data)
|
140
|
+
end
|
141
|
+
|
142
|
+
# adds to result_array if tagmanifest checksums don't match generated checksums
|
143
|
+
def verify_tagmanifests
|
144
|
+
tagmanifests_checksums_hash = checksums_hash_from_manifest_files(TAGMANIFEST)
|
145
|
+
types_to_generate = checksum_types_from_manifest_checksums_hash(tagmanifests_checksums_hash)
|
146
|
+
generated_checksums_hash = generate_tagmanifest_checksums_hash(types_to_generate)
|
147
|
+
verify_manifest_checksums(TAGMANIFEST, tagmanifests_checksums_hash, generated_checksums_hash)
|
148
|
+
end
|
149
|
+
|
150
|
+
# adds to result_array if manifest checksums don't match generated checksums
|
151
|
+
def verify_payload_manifests
|
152
|
+
manifests_checksums_hash = checksums_hash_from_manifest_files(MANIFEST)
|
153
|
+
types_to_generate = checksum_types_from_manifest_checksums_hash(manifests_checksums_hash)
|
154
|
+
generated_checksums_hash = generate_payload_checksums(types_to_generate)
|
155
|
+
verify_manifest_checksums(MANIFEST, manifests_checksums_hash, generated_checksums_hash)
|
156
|
+
end
|
157
|
+
|
158
|
+
# construct hash based on manifest_type-alg.txt files in bag home dir
|
159
|
+
# key: file_name, relative to base_path, value: hash of checksum alg => checksum value
|
160
|
+
def checksums_hash_from_manifest_files(manifest_type)
|
161
|
+
checksums_hash = {}
|
162
|
+
deposit_bag_pathname.children.each do |child_pathname|
|
163
|
+
if child_pathname.file?
|
164
|
+
child_fname = child_pathname.basename.to_s
|
165
|
+
match_result = child_fname.match("^#{manifest_type}-(.*).txt")
|
166
|
+
if match_result
|
167
|
+
checksum_type = match_result.captures.first.to_sym
|
168
|
+
if RECOGNIZED_CHECKSUM_ALGORITHMS.include?(checksum_type)
|
169
|
+
child_pathname.readlines.each do |line|
|
170
|
+
line.chomp!.strip!
|
171
|
+
checksum, file_name = line.split(/[\s*]+/, 2)
|
172
|
+
file_checksums = checksums_hash[file_name] || {}
|
173
|
+
file_checksums[checksum_type] = checksum
|
174
|
+
checksums_hash[file_name] = file_checksums
|
175
|
+
end
|
176
|
+
else
|
177
|
+
result_array << single_error_hash(CHECKSUM_TYPE_UNRECOGNIZED, checksum_type: checksum_type, filename: child_pathname)
|
178
|
+
end
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
182
|
+
checksums_hash
|
183
|
+
end
|
184
|
+
|
185
|
+
# generate hash of checksums by file name for bag home dir files
|
186
|
+
def generate_tagmanifest_checksums_hash(types_to_generate)
|
187
|
+
# all names in the bag home dir except those starting with 'tagmanifest'
|
188
|
+
home_dir_pathnames = deposit_bag_pathname.children.reject { |file| file.basename.to_s.start_with?(TAGMANIFEST) }
|
189
|
+
hash_with_full_pathnames = generate_checksums_hash(home_dir_pathnames, types_to_generate)
|
190
|
+
# return hash keys as basenames only
|
191
|
+
hash_with_full_pathnames.map { |k, v| [Pathname.new(k).basename.to_s, v] }.to_h
|
192
|
+
end
|
193
|
+
|
194
|
+
# generate hash of checksums by file name for bag data dir files
|
195
|
+
def generate_payload_checksums(types_to_generate)
|
196
|
+
data_pathnames = deposit_bag_pathname.join(DATA_DIR_BASENAME).find
|
197
|
+
hash_with_full_pathnames = generate_checksums_hash(data_pathnames, types_to_generate)
|
198
|
+
# return hash keys beginning with 'data/'
|
199
|
+
hash_with_full_pathnames.map { |k, v| [Pathname.new(k).relative_path_from(deposit_bag_pathname).to_s, v] }.to_h
|
200
|
+
end
|
201
|
+
|
202
|
+
def generate_checksums_hash(pathnames, types_to_generate)
|
203
|
+
file_checksums_hash = {}
|
204
|
+
pathnames.each do |pathname|
|
205
|
+
file_checksums_hash[pathname.to_s] = generated_checksums(pathname, types_to_generate) if pathname.file?
|
206
|
+
end
|
207
|
+
file_checksums_hash
|
208
|
+
end
|
209
|
+
|
210
|
+
def generated_checksums(pathname, types_to_generate)
|
211
|
+
my_digester_hash = digester_hash(types_to_generate)
|
212
|
+
pathname.open('r') do |stream|
|
213
|
+
while (buffer = stream.read(8192))
|
214
|
+
my_digester_hash.each_value { |digest| digest.update(buffer) }
|
215
|
+
end
|
216
|
+
end
|
217
|
+
file_checksums = {}
|
218
|
+
my_digester_hash.each do |checksum_type, digest|
|
219
|
+
file_checksums[checksum_type] = digest.hexdigest
|
220
|
+
end
|
221
|
+
file_checksums
|
222
|
+
end
|
223
|
+
|
224
|
+
def digester_hash(types_to_generate=DEFAULT_CHECKSUM_TYPES)
|
225
|
+
types_to_generate.each_with_object({}) do |checksum_type, digester_hash|
|
226
|
+
case checksum_type
|
227
|
+
when :md5
|
228
|
+
digester_hash[checksum_type] = Digest::MD5.new
|
229
|
+
when :sha1
|
230
|
+
digester_hash[checksum_type] = Digest::SHA1.new
|
231
|
+
when :sha256
|
232
|
+
digester_hash[checksum_type] = Digest::SHA2.new(256)
|
233
|
+
when :sha384
|
234
|
+
digesters[checksum_type] = Digest::SHA2.new(384)
|
235
|
+
when :sha512
|
236
|
+
digesters[checksum_type] = Digest::SHA2.new(512)
|
237
|
+
else
|
238
|
+
result_array << single_error_hash(CHECKSUM_TYPE_UNRECOGNIZED, checksum_type: checksum_type, filename: nil)
|
239
|
+
end
|
240
|
+
digester_hash
|
241
|
+
end
|
242
|
+
end
|
243
|
+
|
244
|
+
def verify_manifest_checksums(manifest_type, manifests_checksum_hash, generated_checksum_hash)
|
245
|
+
diff_hash = {}
|
246
|
+
# NOTE: this is intentionally | instead of ||
|
247
|
+
(manifests_checksum_hash.keys | generated_checksum_hash.keys).each do |file_name|
|
248
|
+
manifest_checksums = manifests_checksum_hash[file_name] || {}
|
249
|
+
generated_checksums = generated_checksum_hash[file_name] || {}
|
250
|
+
if manifest_checksums != generated_checksums
|
251
|
+
cdh = checksums_diff_hash(manifest_checksums, generated_checksums, manifest_type, 'generated')
|
252
|
+
diff_hash[file_name] = cdh if cdh
|
253
|
+
end
|
254
|
+
end
|
255
|
+
return if diff_hash.empty?
|
256
|
+
err_data = {
|
257
|
+
manifest_type: manifest_type,
|
258
|
+
diffs: diff_hash
|
259
|
+
}
|
260
|
+
result_array << single_error_hash(CHECKSUM_MISMATCH, err_data)
|
261
|
+
end
|
262
|
+
|
263
|
+
def checksums_diff_hash(left_checksums, right_checksums, left_label, right_label)
|
264
|
+
diff_hash = {}
|
265
|
+
# NOTE: these are intentionally & and | instead of && and ||
|
266
|
+
checksum_types_to_compare = (left_checksums.keys & right_checksums.keys)
|
267
|
+
checksum_types_to_compare = (left_checksums.keys | right_checksums.keys) if checksum_types_to_compare.empty?
|
268
|
+
checksum_types_to_compare.each do |type|
|
269
|
+
left_checksum = left_checksums[type]
|
270
|
+
right_checksum = right_checksums[type]
|
271
|
+
if left_checksum != right_checksum
|
272
|
+
diff_hash[type] = { left_label => left_checksum, right_label => right_checksum }
|
273
|
+
end
|
274
|
+
end
|
275
|
+
diff_hash.empty? ? nil : diff_hash
|
276
|
+
end
|
277
|
+
|
278
|
+
def verify_payload_size
|
279
|
+
sizes_from_bag_info_file = bag_info_payload_size
|
280
|
+
generated_sizes = generated_payload_size
|
281
|
+
return if sizes_from_bag_info_file == generated_sizes
|
282
|
+
err_data = {
|
283
|
+
bag_info_sizes: sizes_from_bag_info_file,
|
284
|
+
generated_sizes: generated_sizes
|
285
|
+
}
|
286
|
+
result_array << single_error_hash(PAYLOAD_SIZE_MISMATCH, err_data)
|
287
|
+
end
|
288
|
+
|
289
|
+
def bag_info_payload_size
|
290
|
+
bag_info_txt_pathname = deposit_bag_pathname.join(BAG_INFO_TXT_BASENAME)
|
291
|
+
bag_info_txt_pathname.readlines.each do |line|
|
292
|
+
line.chomp!.strip!
|
293
|
+
key, value = line.split(':', 2)
|
294
|
+
if key.strip == 'Payload-Oxum'
|
295
|
+
num_bytes, num_files = value.strip.split('.') if value
|
296
|
+
return { bytes: num_bytes.to_i, files: num_files.to_i }
|
297
|
+
end
|
298
|
+
end
|
299
|
+
end
|
300
|
+
|
301
|
+
def generated_payload_size
|
302
|
+
payload_pathname = deposit_bag_pathname.join(DATA_DIR_BASENAME)
|
303
|
+
payload_pathname.find.select(&:file?).each_with_object(bytes: 0, files: 0) do |file, hash|
|
304
|
+
hash[:bytes] += file.size
|
305
|
+
hash[:files] += 1
|
306
|
+
hash
|
307
|
+
end
|
308
|
+
end
|
309
|
+
|
310
|
+
# checksums_hash: { fname => {:md5=>"xxx", :sha1=>"yyy"}, fname => ... }
|
311
|
+
def checksum_types_from_manifest_checksums_hash(checksums_hash)
|
312
|
+
types = []
|
313
|
+
checksums_hash.each_value { |v| v.each_key { |k| types << k unless types.include?(k) } }
|
314
|
+
types
|
315
|
+
end
|
316
|
+
|
317
|
+
def single_error_hash(error_code, err_data_hash)
|
318
|
+
{ error_code => error_code_msg(error_code, err_data_hash) }
|
319
|
+
end
|
320
|
+
|
321
|
+
def error_code_msg(error_code, err_data_hash)
|
322
|
+
ERROR_CODE_TO_MESSAGES[error_code] % err_data_hash
|
323
|
+
end
|
324
|
+
end
|
325
|
+
end
|
data/lib/moab/file_group.rb
CHANGED
@@ -66,13 +66,12 @@ module Moab
|
|
66
66
|
%w{group_id file_count byte_count block_count}
|
67
67
|
end
|
68
68
|
|
69
|
-
|
70
69
|
# @attribute
|
71
70
|
# @return [Array<FileManifestation>] The set of files comprising the group
|
72
71
|
has_many :files, FileManifestation, :tag => 'file'
|
73
72
|
|
74
73
|
def files
|
75
|
-
|
74
|
+
signature_hash.values
|
76
75
|
end
|
77
76
|
|
78
77
|
# @return [Hash<FileSignature, FileManifestation>] The actual in-memory store for the collection
|
@@ -84,7 +83,7 @@ module Moab
|
|
84
83
|
# used to test for existence of a filename in this file group
|
85
84
|
def path_hash
|
86
85
|
path_hash = Hash.new
|
87
|
-
|
86
|
+
signature_hash.each do |signature,manifestation|
|
88
87
|
manifestation.instances.each do |instance|
|
89
88
|
path_hash[instance.path] = signature
|
90
89
|
end
|
@@ -103,7 +102,7 @@ module Moab
|
|
103
102
|
def path_hash_subset(signature_subset)
|
104
103
|
path_hash = Hash.new
|
105
104
|
signature_subset.each do |signature|
|
106
|
-
manifestation =
|
105
|
+
manifestation = signature_hash[signature]
|
107
106
|
manifestation.instances.each do |instance|
|
108
107
|
path_hash[instance.path] = signature
|
109
108
|
end
|
@@ -135,12 +134,12 @@ module Moab
|
|
135
134
|
# @return [void] Add a single {FileSignature},{FileInstance} key/value pair to this group.
|
136
135
|
# Data is actually stored in the {#signature_hash}
|
137
136
|
def add_file_instance(signature,instance)
|
138
|
-
if
|
139
|
-
manifestation =
|
137
|
+
if signature_hash.has_key?(signature)
|
138
|
+
manifestation = signature_hash[signature]
|
140
139
|
else
|
141
140
|
manifestation = FileManifestation.new
|
142
141
|
manifestation.signature = signature
|
143
|
-
|
142
|
+
signature_hash[signature] = manifestation
|
144
143
|
end
|
145
144
|
manifestation.instances << instance
|
146
145
|
end
|
@@ -150,7 +149,7 @@ module Moab
|
|
150
149
|
# for example, the manifest inventory does not contain a file entry for itself
|
151
150
|
def remove_file_having_path(path)
|
152
151
|
signature = self.path_hash[path]
|
153
|
-
|
152
|
+
signature_hash.delete(signature)
|
154
153
|
end
|
155
154
|
|
156
155
|
# @return [Pathname] The full path used as the basis of the relative paths reported
|
@@ -203,13 +202,13 @@ module Moab
|
|
203
202
|
# @see http://stackoverflow.com/questions/3974087/how-to-make-rubys-find-find-follow-symlinks
|
204
203
|
# @see http://stackoverflow.com/questions/357754/can-i-traverse-symlinked-directories-in-ruby-with-a-glob
|
205
204
|
def harvest_directory(path, recursive, validated=nil)
|
206
|
-
pathname=Pathname.new(path).expand_path
|
205
|
+
pathname = Pathname.new(path).expand_path
|
207
206
|
validated ||= is_descendent_of_base?(pathname)
|
208
207
|
pathname.children.sort.each do |child|
|
209
208
|
if child.basename.to_s == ".DS_Store"
|
210
209
|
next
|
211
210
|
elsif child.directory?
|
212
|
-
harvest_directory(child,recursive, validated) if recursive
|
211
|
+
harvest_directory(child, recursive, validated) if recursive
|
213
212
|
else
|
214
213
|
add_physical_file(child, validated)
|
215
214
|
end
|
@@ -49,7 +49,7 @@ module Moab
|
|
49
49
|
# @param change [String] the change type to search for
|
50
50
|
# @return [FileGroupDifferenceSubset] Find a specified subset of changes
|
51
51
|
def subset(change)
|
52
|
-
|
52
|
+
subset_hash[change.to_sym]
|
53
53
|
end
|
54
54
|
|
55
55
|
# (see Serializable#initialize)
|
@@ -69,7 +69,7 @@ module Moab
|
|
69
69
|
|
70
70
|
def difference_count
|
71
71
|
count = 0
|
72
|
-
|
72
|
+
subset_hash.each do |type, subset|
|
73
73
|
count += subset.count if type != :identical
|
74
74
|
end
|
75
75
|
count
|
@@ -79,49 +79,49 @@ module Moab
|
|
79
79
|
# @return [Integer] How many files were unchanged
|
80
80
|
attribute :identical, Integer, :on_save => Proc.new { |n| n.to_s }
|
81
81
|
def identical
|
82
|
-
|
82
|
+
subset_hash[:identical].count
|
83
83
|
end
|
84
84
|
|
85
85
|
# @attribute
|
86
86
|
# @return [Integer] How many duplicate copies of files were added
|
87
87
|
attribute :copyadded, Integer, :on_save => Proc.new { |n| n.to_s }
|
88
88
|
def copyadded
|
89
|
-
|
89
|
+
subset_hash[:copyadded].count
|
90
90
|
end
|
91
91
|
|
92
92
|
# @attribute
|
93
93
|
# @return [Integer] How many duplicate copies of files were deleted
|
94
94
|
attribute :copydeleted, Integer, :on_save => Proc.new { |n| n.to_s }
|
95
95
|
def copydeleted
|
96
|
-
|
96
|
+
subset_hash[:copydeleted].count
|
97
97
|
end
|
98
98
|
|
99
99
|
# @attribute
|
100
100
|
# @return [Integer] How many files were renamed
|
101
101
|
attribute :renamed, Integer, :on_save => Proc.new { |n| n.to_s }
|
102
102
|
def renamed
|
103
|
-
|
103
|
+
subset_hash[:renamed].count
|
104
104
|
end
|
105
105
|
|
106
106
|
# @attribute
|
107
107
|
# @return [Integer] How many files were modified
|
108
108
|
attribute :modified, Integer, :on_save => Proc.new { |n| n.to_s }
|
109
109
|
def modified
|
110
|
-
|
110
|
+
subset_hash[:modified].count
|
111
111
|
end
|
112
112
|
|
113
113
|
# @attribute
|
114
114
|
# @return [Integer] How many files were added
|
115
115
|
attribute :added, Integer, :on_save => Proc.new { |n| n.to_s }
|
116
116
|
def added
|
117
|
-
|
117
|
+
subset_hash[:added].count
|
118
118
|
end
|
119
119
|
|
120
120
|
# @attribute
|
121
121
|
# @return [Integer] How many files were deleted
|
122
122
|
attribute :deleted, Integer, :on_save => Proc.new { |n| n.to_s }
|
123
123
|
def deleted
|
124
|
-
|
124
|
+
subset_hash[:deleted].count
|
125
125
|
end
|
126
126
|
|
127
127
|
# @attribute
|
@@ -130,12 +130,12 @@ module Moab
|
|
130
130
|
has_many :subsets, FileGroupDifferenceSubset, :tag => 'subset'
|
131
131
|
|
132
132
|
def subsets
|
133
|
-
|
133
|
+
subset_hash.values
|
134
134
|
end
|
135
135
|
|
136
136
|
def subsets=(array)
|
137
137
|
if array
|
138
|
-
array.each{|subset|
|
138
|
+
array.each{|subset| subset_hash[subset.change.to_sym] = subset}
|
139
139
|
end
|
140
140
|
end
|
141
141
|
|
@@ -237,13 +237,12 @@ module Moab
|
|
237
237
|
fid.basis_path = path
|
238
238
|
fid.other_path = "same"
|
239
239
|
fid.signatures << signature
|
240
|
-
|
240
|
+
subset_hash[:identical].files << fid
|
241
241
|
end
|
242
242
|
end
|
243
243
|
self
|
244
244
|
end
|
245
245
|
|
246
|
-
|
247
246
|
# @api internal
|
248
247
|
# @param matching_signatures [Array<FileSignature>] The file signature of the file manifestations being compared
|
249
248
|
# @param basis_signature_hash [Hash<FileSignature, FileManifestation>]
|
@@ -272,13 +271,12 @@ module Moab
|
|
272
271
|
else
|
273
272
|
fid.change = 'renamed'
|
274
273
|
end
|
275
|
-
|
274
|
+
subset_hash[fid.change.to_sym].files << fid
|
276
275
|
end
|
277
276
|
end
|
278
277
|
self
|
279
278
|
end
|
280
279
|
|
281
|
-
|
282
280
|
# @api internal
|
283
281
|
# @param basis_path_hash [Hash<String,FileSignature>]
|
284
282
|
# The file paths and associated signatures for manifestations appearing only in the basis group
|
@@ -293,7 +291,7 @@ module Moab
|
|
293
291
|
fid.other_path = "same"
|
294
292
|
fid.signatures << basis_path_hash[path]
|
295
293
|
fid.signatures << other_path_hash[path]
|
296
|
-
|
294
|
+
subset_hash[:modified].files << fid
|
297
295
|
end
|
298
296
|
self
|
299
297
|
end
|
@@ -311,7 +309,7 @@ module Moab
|
|
311
309
|
fid.basis_path = ""
|
312
310
|
fid.other_path = path
|
313
311
|
fid.signatures << other_path_hash[path]
|
314
|
-
|
312
|
+
subset_hash[:added].files << fid
|
315
313
|
end
|
316
314
|
self
|
317
315
|
end
|
@@ -329,7 +327,7 @@ module Moab
|
|
329
327
|
fid.basis_path = path
|
330
328
|
fid.other_path = ""
|
331
329
|
fid.signatures << basis_path_hash[path]
|
332
|
-
|
330
|
+
subset_hash[:deleted].files << fid
|
333
331
|
end
|
334
332
|
self
|
335
333
|
end
|
@@ -340,15 +338,15 @@ module Moab
|
|
340
338
|
deltas = Hash.new {|hash, key| hash[key] = []}
|
341
339
|
# case where other_path is empty or 'same'. (create array of strings)
|
342
340
|
[:identical, :modified, :deleted, :copydeleted].each do |change|
|
343
|
-
deltas[change].concat(
|
341
|
+
deltas[change].concat(subset_hash[change].files.collect{ |file| file.basis_path })
|
344
342
|
end
|
345
343
|
# case where basis_path and other_path are both present. (create array of arrays)
|
346
344
|
[:copyadded, :renamed].each do |change|
|
347
|
-
deltas[change].concat(
|
345
|
+
deltas[change].concat(subset_hash[change].files.collect { |file| [file.basis_path, file.other_path] })
|
348
346
|
end
|
349
347
|
# case where basis_path is empty. (create array of strings)
|
350
348
|
[:added].each do |change|
|
351
|
-
deltas[change].concat(
|
349
|
+
deltas[change].concat(subset_hash[change].files.collect { |file| file.other_path })
|
352
350
|
end
|
353
351
|
deltas
|
354
352
|
end
|
@@ -362,7 +360,7 @@ module Moab
|
|
362
360
|
# Split the filepairs into two arrays
|
363
361
|
oldnames = []
|
364
362
|
newnames = []
|
365
|
-
filepairs.each do |old,new|
|
363
|
+
filepairs.each do |old, new|
|
366
364
|
oldnames << old
|
367
365
|
newnames << new
|
368
366
|
end
|
@@ -374,7 +372,7 @@ module Moab
|
|
374
372
|
# @param [Array<Array<String>>] filepairs The set of oldname, newname pairs for all files being renamed
|
375
373
|
# @return [Array<Array<String>>] a set of file triples containing oldname, tempname, newname
|
376
374
|
def rename_tempfile_triplets(filepairs)
|
377
|
-
filepairs.collect{|old,new| [old, new, "#{new}-#{Time.now.strftime('%Y%m%d%H%H%S')}-tmp"]}
|
375
|
+
filepairs.collect{ |old, new| [old, new, "#{new}-#{Time.now.strftime('%Y%m%d%H%H%S')}-tmp"] }
|
378
376
|
end
|
379
377
|
|
380
378
|
end
|
data/lib/moab/file_inventory.rb
CHANGED
@@ -52,11 +52,11 @@ module Moab
|
|
52
52
|
|
53
53
|
# @attribute
|
54
54
|
# @return [Integer] The ordinal version number
|
55
|
-
attribute :version_id, Integer, :tag => 'versionId', :key => true, :on_save => Proc.new {|n| n.to_s}
|
55
|
+
attribute :version_id, Integer, :tag => 'versionId', :key => true, :on_save => Proc.new { |n| n.to_s }
|
56
56
|
|
57
57
|
# @return [String] The unique identifier concatenating digital object id with version id
|
58
58
|
def composite_key
|
59
|
-
|
59
|
+
digital_object_id + '-' + StorageObject.version_dirname(version_id)
|
60
60
|
end
|
61
61
|
|
62
62
|
# @attribute
|
@@ -64,7 +64,7 @@ module Moab
|
|
64
64
|
attribute :inventory_datetime, String, :tag => 'inventoryDatetime'
|
65
65
|
|
66
66
|
def inventory_datetime=(datetime)
|
67
|
-
@inventory_datetime=Moab::UtcTime.input(datetime)
|
67
|
+
@inventory_datetime = Moab::UtcTime.input(datetime)
|
68
68
|
end
|
69
69
|
|
70
70
|
def inventory_datetime
|
@@ -73,7 +73,7 @@ module Moab
|
|
73
73
|
|
74
74
|
# @attribute
|
75
75
|
# @return [Integer] The total number of data files in the inventory (dynamically calculated)
|
76
|
-
attribute :file_count, Integer, :tag => 'fileCount', :on_save => Proc.new {|t| t.to_s}
|
76
|
+
attribute :file_count, Integer, :tag => 'fileCount', :on_save => Proc.new { |t| t.to_s }
|
77
77
|
|
78
78
|
def file_count
|
79
79
|
groups.inject(0) { |sum, group| sum + group.file_count }
|
@@ -81,7 +81,7 @@ module Moab
|
|
81
81
|
|
82
82
|
# @attribute
|
83
83
|
# @return [Integer] The total size (in bytes) in all files of all files in the inventory (dynamically calculated)
|
84
|
-
attribute :byte_count, Integer, :tag => 'byteCount', :on_save => Proc.new {|t| t.to_s}
|
84
|
+
attribute :byte_count, Integer, :tag => 'byteCount', :on_save => Proc.new { |t| t.to_s }
|
85
85
|
|
86
86
|
def byte_count
|
87
87
|
groups.inject(0) { |sum, group| sum + group.byte_count }
|
@@ -89,7 +89,7 @@ module Moab
|
|
89
89
|
|
90
90
|
# @attribute
|
91
91
|
# @return [Integer] The total disk usage (in 1 kB blocks) of all data files (estimating du -k result) (dynamically calculated)
|
92
|
-
attribute :block_count, Integer, :tag => 'blockCount', :on_save => Proc.new {|t| t.to_s}
|
92
|
+
attribute :block_count, Integer, :tag => 'blockCount', :on_save => Proc.new { |t| t.to_s }
|
93
93
|
|
94
94
|
def block_count
|
95
95
|
groups.inject(0) { |sum, group| sum + group.block_count }
|
@@ -101,20 +101,20 @@ module Moab
|
|
101
101
|
|
102
102
|
# @return [Array<FileGroup] The set of data groups that contain files
|
103
103
|
def non_empty_groups
|
104
|
-
|
104
|
+
groups.select { |group| !group.files.empty? }
|
105
105
|
end
|
106
106
|
|
107
107
|
# @param non_empty [Boolean] if true, return group_id's only for groups having files
|
108
108
|
# @return [Array<String>] group identifiers contained in this file inventory
|
109
109
|
def group_ids(non_empty=nil)
|
110
|
-
|
111
|
-
|
110
|
+
my_groups = non_empty ? self.non_empty_groups : groups
|
111
|
+
my_groups.map { |g| g.group_id }
|
112
112
|
end
|
113
113
|
|
114
114
|
# @param [String] group_id The identifer of the group to be selected
|
115
115
|
# @return [FileGroup] The file group in this inventory for the specified group_id
|
116
116
|
def group(group_id)
|
117
|
-
|
117
|
+
groups.find { |group| group.group_id == group_id}
|
118
118
|
end
|
119
119
|
|
120
120
|
# @param group_id [String] File group identifer (e.g. data, metadata, manifests)
|
@@ -134,10 +134,10 @@ module Moab
|
|
134
134
|
# @return [FileSignature] The signature of the specified file
|
135
135
|
def file_signature(group_id, file_id)
|
136
136
|
file_group = group(group_id)
|
137
|
-
errmsg = "group #{group_id} not found for #{
|
137
|
+
errmsg = "group #{group_id} not found for #{digital_object_id} - #{version_id}"
|
138
138
|
raise FileNotFoundException, errmsg if file_group.nil?
|
139
139
|
file_signature = file_group.path_hash[file_id]
|
140
|
-
errmsg = "#{group_id} file #{file_id} not found for #{
|
140
|
+
errmsg = "#{group_id} file #{file_id} not found for #{digital_object_id} - #{version_id}"
|
141
141
|
raise FileNotFoundException, errmsg if file_signature.nil?
|
142
142
|
file_signature
|
143
143
|
end
|
@@ -154,7 +154,7 @@ module Moab
|
|
154
154
|
# @api internal
|
155
155
|
# @return [String] Concatenation of the objectId and versionId values
|
156
156
|
def package_id
|
157
|
-
"#{
|
157
|
+
"#{digital_object_id}-v#{version_id}"
|
158
158
|
end
|
159
159
|
|
160
160
|
# @api internal
|
@@ -174,7 +174,6 @@ module Moab
|
|
174
174
|
else
|
175
175
|
data_source
|
176
176
|
end
|
177
|
-
|
178
177
|
end
|
179
178
|
end
|
180
179
|
|
@@ -186,10 +185,10 @@ module Moab
|
|
186
185
|
# @example {include:file:spec/features/inventory/harvest_inventory_spec.rb}
|
187
186
|
def inventory_from_directory(data_dir, group_id=nil)
|
188
187
|
if group_id
|
189
|
-
|
188
|
+
groups << FileGroup.new(group_id: group_id).group_from_directory(data_dir)
|
190
189
|
else
|
191
190
|
['content', 'metadata'].each do |gid|
|
192
|
-
|
191
|
+
groups << FileGroup.new(group_id: gid).group_from_directory(Pathname(data_dir).join(gid))
|
193
192
|
end
|
194
193
|
end
|
195
194
|
self
|
@@ -203,7 +202,7 @@ module Moab
|
|
203
202
|
signatures_from_bag = signatures_from_bagit_manifests(bag_pathname)
|
204
203
|
bag_data_subdirs = bag_pathname.join('data').children
|
205
204
|
bag_data_subdirs.each do |subdir|
|
206
|
-
|
205
|
+
groups << FileGroup.new(:group_id=>subdir.basename.to_s).group_from_bagit_subdir(subdir, signatures_from_bag)
|
207
206
|
end
|
208
207
|
self
|
209
208
|
end
|
@@ -212,12 +211,11 @@ module Moab
|
|
212
211
|
# @return [Hash<Pathname,FileSignature>] The fixity data present in the bag's manifest files
|
213
212
|
def signatures_from_bagit_manifests(bag_pathname)
|
214
213
|
manifest_pathname = Hash.new
|
215
|
-
|
216
|
-
checksum_types.each do |type|
|
214
|
+
DEFAULT_CHECKSUM_TYPES.each do |type|
|
217
215
|
manifest_pathname[type] = bag_pathname.join("manifest-#{type}.txt")
|
218
216
|
end
|
219
|
-
signatures = Hash.new { |hash,path| hash[path] = FileSignature.new }
|
220
|
-
|
217
|
+
signatures = Hash.new { |hash, path| hash[path] = FileSignature.new }
|
218
|
+
DEFAULT_CHECKSUM_TYPES.each do |type|
|
221
219
|
if manifest_pathname[type].exist?
|
222
220
|
manifest_pathname[type].each_line do |line|
|
223
221
|
line.chomp!
|
@@ -230,7 +228,7 @@ module Moab
|
|
230
228
|
end
|
231
229
|
end
|
232
230
|
end
|
233
|
-
signatures.each {|file_pathname,signature| signature.size = file_pathname.size}
|
231
|
+
signatures.each { |file_pathname, signature| signature.size = file_pathname.size }
|
234
232
|
signatures
|
235
233
|
end
|
236
234
|
|
@@ -55,7 +55,7 @@ module Moab
|
|
55
55
|
attribute :report_datetime, String, :tag => 'reportDatetime'
|
56
56
|
|
57
57
|
def report_datetime=(datetime)
|
58
|
-
@report_datetime=Moab::UtcTime.input(datetime)
|
58
|
+
@report_datetime = Moab::UtcTime.input(datetime)
|
59
59
|
end
|
60
60
|
|
61
61
|
def report_datetime
|
@@ -115,7 +115,7 @@ module Moab
|
|
115
115
|
inv_diff = self.to_hash
|
116
116
|
inv_diff["group_differences"].each_value do |group_diff|
|
117
117
|
delete_subsets = []
|
118
|
-
group_diff["subsets"].each do |change_type,subset|
|
118
|
+
group_diff["subsets"].each do |change_type, subset|
|
119
119
|
delete_subsets << change_type if change_type == "identical" or subset["count"] == 0
|
120
120
|
end
|
121
121
|
delete_subsets.each do |change_type|
|
@@ -68,7 +68,7 @@ module Moab
|
|
68
68
|
# @return [Integer] The total disk usage (in 1 kB blocks) of all files that share this manifestation's signature
|
69
69
|
# (estimating du -k result)
|
70
70
|
def block_count
|
71
|
-
block_size=1024
|
71
|
+
block_size = 1024
|
72
72
|
instance_blocks = (signature.size.to_i + block_size - 1)/block_size
|
73
73
|
file_count * instance_blocks
|
74
74
|
end
|
@@ -9,7 +9,7 @@ module Moab
|
|
9
9
|
METADATA_DIR = "metadata".freeze
|
10
10
|
CONTENT_DIR = "content".freeze
|
11
11
|
EXPECTED_DATA_SUB_DIRS = [CONTENT_DIR, METADATA_DIR].freeze
|
12
|
-
IMPLICIT_DIRS = ['.', '..'
|
12
|
+
IMPLICIT_DIRS = ['.', '..'].freeze # unlike Find.find, Dir.entries returns the current/parent dirs
|
13
13
|
DATA_DIR = "data".freeze
|
14
14
|
MANIFESTS_DIR = 'manifests'.freeze
|
15
15
|
EXPECTED_VERSION_SUB_DIRS = [DATA_DIR, MANIFESTS_DIR].freeze
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: moab-versioning
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.
|
4
|
+
version: 4.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Darren Weber
|
@@ -11,7 +11,7 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date:
|
14
|
+
date: 2018-03-20 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: confstruct
|
@@ -69,20 +69,6 @@ dependencies:
|
|
69
69
|
- - ">="
|
70
70
|
- !ruby/object:Gem::Version
|
71
71
|
version: '0'
|
72
|
-
- !ruby/object:Gem::Dependency
|
73
|
-
name: systemu
|
74
|
-
requirement: !ruby/object:Gem::Requirement
|
75
|
-
requirements:
|
76
|
-
- - ">="
|
77
|
-
- !ruby/object:Gem::Version
|
78
|
-
version: '0'
|
79
|
-
type: :runtime
|
80
|
-
prerelease: false
|
81
|
-
version_requirements: !ruby/object:Gem::Requirement
|
82
|
-
requirements:
|
83
|
-
- - ">="
|
84
|
-
- !ruby/object:Gem::Version
|
85
|
-
version: '0'
|
86
72
|
- !ruby/object:Gem::Dependency
|
87
73
|
name: druid-tools
|
88
74
|
requirement: !ruby/object:Gem::Requirement
|
@@ -97,20 +83,6 @@ dependencies:
|
|
97
83
|
- - ">="
|
98
84
|
- !ruby/object:Gem::Version
|
99
85
|
version: 1.0.0
|
100
|
-
- !ruby/object:Gem::Dependency
|
101
|
-
name: awesome_print
|
102
|
-
requirement: !ruby/object:Gem::Requirement
|
103
|
-
requirements:
|
104
|
-
- - ">="
|
105
|
-
- !ruby/object:Gem::Version
|
106
|
-
version: '0'
|
107
|
-
type: :development
|
108
|
-
prerelease: false
|
109
|
-
version_requirements: !ruby/object:Gem::Requirement
|
110
|
-
requirements:
|
111
|
-
- - ">="
|
112
|
-
- !ruby/object:Gem::Version
|
113
|
-
version: '0'
|
114
86
|
- !ruby/object:Gem::Dependency
|
115
87
|
name: equivalent-xml
|
116
88
|
requirement: !ruby/object:Gem::Requirement
|
@@ -139,20 +111,6 @@ dependencies:
|
|
139
111
|
- - ">="
|
140
112
|
- !ruby/object:Gem::Version
|
141
113
|
version: '0'
|
142
|
-
- !ruby/object:Gem::Dependency
|
143
|
-
name: rdoc
|
144
|
-
requirement: !ruby/object:Gem::Requirement
|
145
|
-
requirements:
|
146
|
-
- - ">="
|
147
|
-
- !ruby/object:Gem::Version
|
148
|
-
version: '0'
|
149
|
-
type: :development
|
150
|
-
prerelease: false
|
151
|
-
version_requirements: !ruby/object:Gem::Requirement
|
152
|
-
requirements:
|
153
|
-
- - ">="
|
154
|
-
- !ruby/object:Gem::Version
|
155
|
-
version: '0'
|
156
114
|
- !ruby/object:Gem::Dependency
|
157
115
|
name: rspec
|
158
116
|
requirement: !ruby/object:Gem::Requirement
|
@@ -181,20 +139,6 @@ dependencies:
|
|
181
139
|
- - ">="
|
182
140
|
- !ruby/object:Gem::Version
|
183
141
|
version: '0'
|
184
|
-
- !ruby/object:Gem::Dependency
|
185
|
-
name: yard
|
186
|
-
requirement: !ruby/object:Gem::Requirement
|
187
|
-
requirements:
|
188
|
-
- - ">="
|
189
|
-
- !ruby/object:Gem::Version
|
190
|
-
version: '0'
|
191
|
-
type: :development
|
192
|
-
prerelease: false
|
193
|
-
version_requirements: !ruby/object:Gem::Requirement
|
194
|
-
requirements:
|
195
|
-
- - ">="
|
196
|
-
- !ruby/object:Gem::Version
|
197
|
-
version: '0'
|
198
142
|
- !ruby/object:Gem::Dependency
|
199
143
|
name: pry-byebug
|
200
144
|
requirement: !ruby/object:Gem::Requirement
|
@@ -247,6 +191,7 @@ files:
|
|
247
191
|
- lib/moab.rb
|
248
192
|
- lib/moab/bagger.rb
|
249
193
|
- lib/moab/config.rb
|
194
|
+
- lib/moab/deposit_bag_validator.rb
|
250
195
|
- lib/moab/exceptions.rb
|
251
196
|
- lib/moab/file_group.rb
|
252
197
|
- lib/moab/file_group_difference.rb
|
@@ -280,7 +225,6 @@ files:
|
|
280
225
|
- lib/stanford/storage_object_validator.rb
|
281
226
|
- lib/stanford/storage_repository.rb
|
282
227
|
- lib/stanford/storage_services.rb
|
283
|
-
- lib/tasks/yard.rake
|
284
228
|
homepage: https://github.com/sul-dlss/moab-versioning
|
285
229
|
licenses:
|
286
230
|
- Apache-2.0
|
data/lib/tasks/yard.rake
DELETED
@@ -1,34 +0,0 @@
|
|
1
|
-
desc "Generate RDoc"
|
2
|
-
task :doc => ['doc:generate']
|
3
|
-
|
4
|
-
namespace :doc do
|
5
|
-
project_root = File.expand_path(File.join(File.dirname(__FILE__), '..', '..'))
|
6
|
-
doc_destination = File.join(project_root, 'doc')
|
7
|
-
|
8
|
-
begin
|
9
|
-
require 'yard'
|
10
|
-
require 'yard/rake/yardoc_task'
|
11
|
-
|
12
|
-
YARD::Rake::YardocTask.new(:generate) do |yt|
|
13
|
-
yt.files = Dir.glob(File.join(project_root, 'lib', '*.rb')) +
|
14
|
-
Dir.glob(File.join(project_root, 'lib', 'serializer', '*.rb')) +
|
15
|
-
Dir.glob(File.join(project_root, 'lib', 'moab', '*.rb')) +
|
16
|
-
Dir.glob(File.join(project_root, 'lib', 'stanford', '*.rb')) +
|
17
|
-
['-'] +
|
18
|
-
[ File.join(project_root, 'LICENSE.rdoc') ]
|
19
|
-
|
20
|
-
yt.options = ['--output-dir', doc_destination, '--hide-void-return']
|
21
|
-
end
|
22
|
-
rescue LoadError
|
23
|
-
desc "Generate YARD Documentation"
|
24
|
-
task :generate do
|
25
|
-
abort "Please install the YARD gem to generate rdoc."
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
|
-
desc "Remove generated documentation"
|
30
|
-
task :clean do
|
31
|
-
rm_r doc_destination if File.exists?(doc_destination)
|
32
|
-
end
|
33
|
-
|
34
|
-
end
|