moab-versioning 4.1.0 → 4.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/moab.rb +2 -0
- data/lib/moab/bagger.rb +53 -61
- data/lib/moab/deposit_bag_validator.rb +325 -0
- data/lib/moab/file_group.rb +9 -10
- data/lib/moab/file_group_difference.rb +21 -23
- data/lib/moab/file_inventory.rb +20 -22
- data/lib/moab/file_inventory_difference.rb +2 -2
- data/lib/moab/file_manifestation.rb +1 -1
- data/lib/moab/storage_object_validator.rb +1 -1
- metadata +3 -59
- data/lib/tasks/yard.rake +0 -34
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5c13dd09bef589a3c4a3314737c2d3b6dfb4589f
|
4
|
+
data.tar.gz: 629913d22d1ba0b40f62671aabc7cca01767c276
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fe0076d5a2e868613c4e5f081abdee5fd93e76effccfdf4219d7849d8f9944cc11ff728b407ffac22a0d02b9a71c30a75e2077c3f11698bb815e336b951f33b4
|
7
|
+
data.tar.gz: 6c3e42f3cf596cd16e2d62858da725e93fcf37ab835bfcc4f9a3951eb4bbd569845210f9dfa40f39048bab235162fe2d169494a5469a31229924ad29dbea01db
|
data/lib/moab.rb
CHANGED
@@ -29,6 +29,7 @@
|
|
29
29
|
# @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
|
30
30
|
# All rights reserved. See {file:LICENSE.rdoc} for details.
|
31
31
|
module Moab
|
32
|
+
DEFAULT_CHECKSUM_TYPES = [:md5, :sha1, :sha256].freeze
|
32
33
|
end
|
33
34
|
|
34
35
|
require 'serializer'
|
@@ -57,3 +58,4 @@ require 'moab/storage_services'
|
|
57
58
|
require 'moab/exceptions'
|
58
59
|
require 'moab/verification_result'
|
59
60
|
require 'moab/storage_object_validator'
|
61
|
+
require 'moab/deposit_bag_validator'
|
data/lib/moab/bagger.rb
CHANGED
@@ -1,6 +1,3 @@
|
|
1
|
-
require 'moab'
|
2
|
-
require 'systemu'
|
3
|
-
|
4
1
|
module Moab
|
5
2
|
|
6
3
|
# A class used to create a BagIt package from a version inventory and a set of source files.
|
@@ -60,8 +57,8 @@ module Moab
|
|
60
57
|
# @api internal
|
61
58
|
# @return [void] Generate the bagit.txt tag file
|
62
59
|
def create_bagit_txt()
|
63
|
-
|
64
|
-
|
60
|
+
bag_pathname.mkpath
|
61
|
+
bag_pathname.join("bagit.txt").open('w') do |f|
|
65
62
|
f.puts "Tag-File-Character-Encoding: UTF-8"
|
66
63
|
f.puts "BagIt-Version: 0.97"
|
67
64
|
end
|
@@ -70,12 +67,12 @@ module Moab
|
|
70
67
|
# @return [NilClass] Delete the bagit files
|
71
68
|
def delete_bag()
|
72
69
|
# make sure this looks like a bag before deleting
|
73
|
-
if
|
74
|
-
if
|
75
|
-
|
70
|
+
if bag_pathname.join('bagit.txt').exist?
|
71
|
+
if bag_pathname.join('data').exist?
|
72
|
+
bag_pathname.rmtree
|
76
73
|
else
|
77
|
-
|
78
|
-
|
74
|
+
bag_pathname.children.each {|file| file.delete}
|
75
|
+
bag_pathname.rmdir
|
79
76
|
end
|
80
77
|
end
|
81
78
|
nil
|
@@ -83,8 +80,8 @@ module Moab
|
|
83
80
|
|
84
81
|
# @param tar_pathname [Pathname] The location of the tar file (default is based on bag location)
|
85
82
|
def delete_tarfile()
|
86
|
-
bag_name =
|
87
|
-
bag_parent =
|
83
|
+
bag_name = bag_pathname.basename
|
84
|
+
bag_parent = bag_pathname.parent
|
88
85
|
tar_pathname = bag_parent.join("#{bag_name}.tar")
|
89
86
|
tar_pathname.delete if tar_pathname.exist?
|
90
87
|
end
|
@@ -109,17 +106,17 @@ module Moab
|
|
109
106
|
# @return [FileInventory] Create, write, and return the inventory of the files that will become the payload
|
110
107
|
def create_bag_inventory(package_mode)
|
111
108
|
@package_mode = package_mode
|
112
|
-
|
109
|
+
bag_pathname.mkpath
|
113
110
|
case package_mode
|
114
111
|
when :depositor
|
115
|
-
|
116
|
-
@bag_inventory =
|
117
|
-
|
112
|
+
version_inventory.write_xml_file(bag_pathname, 'version')
|
113
|
+
@bag_inventory = signature_catalog.version_additions(version_inventory)
|
114
|
+
bag_inventory.write_xml_file(bag_pathname, 'additions')
|
118
115
|
when :reconstructor
|
119
|
-
@bag_inventory =
|
120
|
-
|
116
|
+
@bag_inventory = version_inventory
|
117
|
+
bag_inventory.write_xml_file(bag_pathname, 'version')
|
121
118
|
end
|
122
|
-
|
119
|
+
bag_inventory
|
123
120
|
end
|
124
121
|
|
125
122
|
# @api internal
|
@@ -128,9 +125,9 @@ module Moab
|
|
128
125
|
# This method uses Unix hard links in order to greatly speed up the process.
|
129
126
|
# Hard links, however, require that the target bag must be created within the same filesystem as the source files
|
130
127
|
def fill_payload(source_base_pathname)
|
131
|
-
|
128
|
+
bag_inventory.groups.each do |group|
|
132
129
|
group_id = group.group_id
|
133
|
-
case
|
130
|
+
case package_mode
|
134
131
|
when :depositor
|
135
132
|
deposit_group(group_id, source_base_pathname.join(group_id))
|
136
133
|
when :reconstructor
|
@@ -144,9 +141,9 @@ module Moab
|
|
144
141
|
# @return [Boolean] Copy all the files listed in the group inventory to the bag.
|
145
142
|
# Return true if successful or nil if the group was not found in the inventory
|
146
143
|
def deposit_group(group_id, source_dir)
|
147
|
-
group =
|
144
|
+
group = bag_inventory.group(group_id)
|
148
145
|
return nil? if group.nil? or group.files.empty?
|
149
|
-
target_dir =
|
146
|
+
target_dir = bag_pathname.join('data',group_id)
|
150
147
|
group.path_list.each do |relative_path|
|
151
148
|
source = source_dir.join(relative_path)
|
152
149
|
target = target_dir.join(relative_path)
|
@@ -161,11 +158,11 @@ module Moab
|
|
161
158
|
# @return [Boolean] Copy all the files listed in the group inventory to the bag.
|
162
159
|
# Return true if successful or nil if the group was not found in the inventory
|
163
160
|
def reconstuct_group(group_id, storage_object_dir)
|
164
|
-
group =
|
161
|
+
group = bag_inventory.group(group_id)
|
165
162
|
return nil? if group.nil? or group.files.empty?
|
166
|
-
target_dir =
|
163
|
+
target_dir = bag_pathname.join('data',group_id)
|
167
164
|
group.files.each do |file|
|
168
|
-
catalog_entry =
|
165
|
+
catalog_entry = signature_catalog.signature_hash[file.signature]
|
169
166
|
source = storage_object_dir.join(catalog_entry.storage_path)
|
170
167
|
file.instances.each do |instance|
|
171
168
|
target = target_dir.join(instance.path)
|
@@ -190,24 +187,23 @@ module Moab
|
|
190
187
|
def create_payload_manifests
|
191
188
|
manifest_pathname = Hash.new
|
192
189
|
manifest_file = Hash.new
|
193
|
-
|
194
|
-
|
195
|
-
manifest_pathname[type] = @bag_pathname.join("manifest-#{type}.txt")
|
190
|
+
DEFAULT_CHECKSUM_TYPES.each do |type|
|
191
|
+
manifest_pathname[type] = bag_pathname.join("manifest-#{type}.txt")
|
196
192
|
manifest_file[type] = manifest_pathname[type].open('w')
|
197
193
|
end
|
198
|
-
|
194
|
+
bag_inventory.groups.each do |group|
|
199
195
|
group.files.each do |file|
|
200
196
|
fixity = file.signature.fixity
|
201
197
|
file.instances.each do |instance|
|
202
198
|
data_path = File.join('data', group.group_id, instance.path)
|
203
|
-
|
199
|
+
DEFAULT_CHECKSUM_TYPES.each do |type|
|
204
200
|
manifest_file[type].puts("#{fixity[type]} #{data_path}") if fixity[type]
|
205
201
|
end
|
206
202
|
end
|
207
203
|
end
|
208
204
|
end
|
209
205
|
ensure
|
210
|
-
|
206
|
+
DEFAULT_CHECKSUM_TYPES.each do |type|
|
211
207
|
if manifest_file[type]
|
212
208
|
manifest_file[type].close
|
213
209
|
manifest_pathname[type].delete if
|
@@ -219,10 +215,10 @@ module Moab
|
|
219
215
|
# @api internal
|
220
216
|
# @return [void] Generate the bag-info.txt tag file
|
221
217
|
def create_bag_info_txt
|
222
|
-
|
223
|
-
f.puts "External-Identifier: #{
|
224
|
-
f.puts "Payload-Oxum: #{
|
225
|
-
f.puts "Bag-Size: #{
|
218
|
+
bag_pathname.join("bag-info.txt").open('w') do |f|
|
219
|
+
f.puts "External-Identifier: #{bag_inventory.package_id}"
|
220
|
+
f.puts "Payload-Oxum: #{bag_inventory.byte_count}.#{bag_inventory.file_count}"
|
221
|
+
f.puts "Bag-Size: #{bag_inventory.human_size}"
|
226
222
|
end
|
227
223
|
end
|
228
224
|
|
@@ -231,22 +227,21 @@ module Moab
|
|
231
227
|
def create_tagfile_manifests()
|
232
228
|
manifest_pathname = Hash.new
|
233
229
|
manifest_file = Hash.new
|
234
|
-
|
235
|
-
|
236
|
-
manifest_pathname[type] = @bag_pathname.join("tagmanifest-#{type}.txt")
|
230
|
+
DEFAULT_CHECKSUM_TYPES.each do |type|
|
231
|
+
manifest_pathname[type] = bag_pathname.join("tagmanifest-#{type}.txt")
|
237
232
|
manifest_file[type] = manifest_pathname[type].open('w')
|
238
233
|
end
|
239
|
-
|
234
|
+
bag_pathname.children.each do |file|
|
240
235
|
unless file.directory? || file.basename.to_s[0, 11] == 'tagmanifest'
|
241
236
|
signature = FileSignature.new.signature_from_file(file)
|
242
237
|
fixity = signature.fixity
|
243
|
-
|
238
|
+
DEFAULT_CHECKSUM_TYPES.each do |type|
|
244
239
|
manifest_file[type].puts("#{fixity[type]} #{file.basename}") if fixity[type]
|
245
240
|
end
|
246
241
|
end
|
247
242
|
end
|
248
243
|
ensure
|
249
|
-
|
244
|
+
DEFAULT_CHECKSUM_TYPES.each do |type|
|
250
245
|
if manifest_file[type]
|
251
246
|
manifest_file[type].close
|
252
247
|
manifest_pathname[type].delete if
|
@@ -257,8 +252,8 @@ module Moab
|
|
257
252
|
|
258
253
|
# @return [Boolean] Create a tar file containing the bag
|
259
254
|
def create_tarfile(tar_pathname=nil)
|
260
|
-
bag_name =
|
261
|
-
bag_parent =
|
255
|
+
bag_name = bag_pathname.basename
|
256
|
+
bag_parent = bag_pathname.parent
|
262
257
|
tar_pathname ||= bag_parent.join("#{bag_name}.tar")
|
263
258
|
tar_cmd="cd '#{bag_parent}'; tar --dereference --force-local -cf '#{tar_pathname}' '#{bag_name}'"
|
264
259
|
begin
|
@@ -268,27 +263,24 @@ module Moab
|
|
268
263
|
end
|
269
264
|
raise "Unable to create tarfile #{tar_pathname}" unless tar_pathname.exist?
|
270
265
|
return true
|
271
|
-
|
272
266
|
end
|
273
267
|
|
274
|
-
# Executes a system command in a subprocess
|
275
|
-
#
|
276
|
-
#
|
277
|
-
# The exception's message will contain the explaination of the failure.
|
278
|
-
# @param [String] command the command to be executed
|
279
|
-
# @return [String] stdout from the command if execution was successful
|
268
|
+
# Executes a system command in a subprocess
|
269
|
+
# if command isn't successful, grabs stdout and stderr and puts them in ruby exception message
|
270
|
+
# @return stdout if execution was successful
|
280
271
|
def shell_execute(command)
|
281
|
-
|
282
|
-
|
283
|
-
|
272
|
+
require 'open3'
|
273
|
+
stdout, stderr, status = Open3.capture3(command.chomp)
|
274
|
+
if status.success? && status.exitstatus.zero?
|
275
|
+
stdout
|
276
|
+
else
|
277
|
+
msg = "Shell command failed: [#{command}] caused by <STDERR = #{stderr}>"
|
278
|
+
msg << " STDOUT = #{stdout}" if stdout && stdout.length.positive?
|
279
|
+
raise(StandardError, msg)
|
284
280
|
end
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
msg << " STDOUT = #{stdout.split($/).join('; ')}" if (stdout && (stdout.length > 0))
|
289
|
-
raise msg
|
281
|
+
rescue SystemCallError => e
|
282
|
+
msg = "Shell command failed: [#{command}] caused by #{e.inspect}"
|
283
|
+
raise(StandardError, msg)
|
290
284
|
end
|
291
|
-
|
292
285
|
end
|
293
|
-
|
294
286
|
end
|
@@ -0,0 +1,325 @@
|
|
1
|
+
module Moab
|
2
|
+
|
3
|
+
# Given a deposit bag, ensures the contents valid for becoming a StorageObjectVersion
|
4
|
+
# this is a Shameless Green implementation, combining code from:
|
5
|
+
# - sdr-preservation-core/lib/sdr_ingest/validate_bag <-- old preservation robots
|
6
|
+
# - archive-utils/lib/bagit_bag <-- gem only used by sdr-preservation-robots
|
7
|
+
# - archive-utils/lib/file_fixity
|
8
|
+
# - archive-utils/lib/fixity
|
9
|
+
# this code adds duplication to this gem (see github issue #119);
|
10
|
+
# for example, computing checksums is done
|
11
|
+
# - deposit_bag_validator
|
12
|
+
# - file_signature
|
13
|
+
class DepositBagValidator
|
14
|
+
|
15
|
+
BAG_DIR_NOT_FOUND = :bag_dir_not_found
|
16
|
+
CHECKSUM_MISMATCH = :checksum_mismatch
|
17
|
+
CHECKSUM_TYPE_UNRECOGNIZED = :checksum_type_unrecognized
|
18
|
+
INVALID_VERSION_XXX_XML = :invalid_versionXxx_xml
|
19
|
+
PAYLOAD_SIZE_MISMATCH = :payload_size_mismatch
|
20
|
+
REQUIRED_FILE_NOT_FOUND = :required_file_not_found
|
21
|
+
VERSION_MISMATCH_TO_MOAB = :version_mismatch_to_moab
|
22
|
+
VERSION_MISSING_FROM_FILE = :version_missing_from_file
|
23
|
+
|
24
|
+
ERROR_CODE_TO_MESSAGES = {
|
25
|
+
BAG_DIR_NOT_FOUND => "Deposit bag directory %{bag_dir} does not exist",
|
26
|
+
CHECKSUM_MISMATCH => "Failed %{manifest_type} verification. Differences: \n%{diffs}",
|
27
|
+
CHECKSUM_TYPE_UNRECOGNIZED => "Checksum type unrecognized: %{checksum_type}; file: %{filename}",
|
28
|
+
INVALID_VERSION_XXX_XML => "Unable to parse %{file_pathname}: %{err_info}",
|
29
|
+
PAYLOAD_SIZE_MISMATCH => "Failed payload size verification. Expected: %{bag_info_sizes}; found: %{generated_sizes}",
|
30
|
+
REQUIRED_FILE_NOT_FOUND => "Deposit bag required file %{file_pathname} not found",
|
31
|
+
VERSION_MISMATCH_TO_MOAB => "Version mismatch in %{file_pathname}: Moab expected %{new_version}; found %{file_version}",
|
32
|
+
VERSION_MISSING_FROM_FILE => "Version xml file %{version_file} missing data at %{xpath} containing version id"
|
33
|
+
}.freeze
|
34
|
+
|
35
|
+
REQUIRED_MANIFEST_CHECKSUM_TYPE = 'sha256'.freeze
|
36
|
+
RECOGNIZED_CHECKSUM_ALGORITHMS = [:md5, :sha1, :sha256, :sha384, :sha512].freeze
|
37
|
+
|
38
|
+
TAGMANIFEST = 'tagmanifest'.freeze
|
39
|
+
MANIFEST = 'manifest'.freeze
|
40
|
+
DATA_DIR_BASENAME = 'data'.freeze
|
41
|
+
BAG_INFO_TXT_BASENAME = 'bag-info.txt'.freeze
|
42
|
+
VERSION_ADDITIONS_BASENAME = 'versionAdditions.xml'.freeze
|
43
|
+
VERSION_INVENTORY_BASENAME = 'versionInventory.xml'.freeze
|
44
|
+
VERSION_METADATA_PATH = "#{DATA_DIR_BASENAME}/metadata/versionMetadata.xml".freeze
|
45
|
+
|
46
|
+
REQUIRED_BAG_FILES = [
|
47
|
+
DATA_DIR_BASENAME,
|
48
|
+
'bagit.txt'.freeze,
|
49
|
+
BAG_INFO_TXT_BASENAME,
|
50
|
+
"#{MANIFEST}-#{REQUIRED_MANIFEST_CHECKSUM_TYPE}.txt".freeze,
|
51
|
+
"#{TAGMANIFEST}-#{REQUIRED_MANIFEST_CHECKSUM_TYPE}.txt".freeze,
|
52
|
+
VERSION_ADDITIONS_BASENAME,
|
53
|
+
VERSION_INVENTORY_BASENAME,
|
54
|
+
VERSION_METADATA_PATH
|
55
|
+
].freeze
|
56
|
+
|
57
|
+
attr_reader :deposit_bag_pathname, :expected_new_version, :result_array
|
58
|
+
|
59
|
+
def initialize(storage_object)
|
60
|
+
@deposit_bag_pathname = storage_object.deposit_bag_pathname
|
61
|
+
@expected_new_version = storage_object.current_version_id + 1
|
62
|
+
@result_array = []
|
63
|
+
end
|
64
|
+
|
65
|
+
# returns Array of tiny error hashes, allowing multiple occurrences of a single error code
|
66
|
+
def validation_errors
|
67
|
+
return [single_error_hash(BAG_DIR_NOT_FOUND, bag_dir: deposit_bag_pathname)] unless deposit_bag_pathname.exist?
|
68
|
+
return result_array unless required_bag_files_exist?
|
69
|
+
verify_version
|
70
|
+
verify_tagmanifests
|
71
|
+
verify_payload_size
|
72
|
+
verify_payload_manifests
|
73
|
+
result_array # attr that accumulates any errors encountered along the way
|
74
|
+
end
|
75
|
+
|
76
|
+
private
|
77
|
+
|
78
|
+
def bag_dir_exists?
|
79
|
+
deposit_bag_pathname.exist?
|
80
|
+
end
|
81
|
+
|
82
|
+
# assumes this is called when result_array is empty, as subsequent checks will use these required files
|
83
|
+
def required_bag_files_exist?
|
84
|
+
REQUIRED_BAG_FILES.each do |filename|
|
85
|
+
pathname = deposit_bag_pathname.join(filename)
|
86
|
+
result_array << single_error_hash(REQUIRED_FILE_NOT_FOUND, file_pathname: pathname) unless pathname.exist?
|
87
|
+
end
|
88
|
+
result_array.empty? ? true : false
|
89
|
+
end
|
90
|
+
|
91
|
+
def verify_version
|
92
|
+
version_md_pathname = deposit_bag_pathname.join(VERSION_METADATA_PATH)
|
93
|
+
version_from_file = last_version_id_from_version_md_xml(version_md_pathname)
|
94
|
+
verify_version_from_xml_file(version_md_pathname, version_from_file) if version_from_file
|
95
|
+
|
96
|
+
version_additions_pathname = deposit_bag_pathname.join(VERSION_ADDITIONS_BASENAME)
|
97
|
+
version_from_file = version_id_from_version_manifest_xml(version_additions_pathname)
|
98
|
+
verify_version_from_xml_file(version_additions_pathname, version_from_file) if version_from_file
|
99
|
+
|
100
|
+
version_inventory_pathname = deposit_bag_pathname.join(VERSION_INVENTORY_BASENAME)
|
101
|
+
version_from_file = version_id_from_version_manifest_xml(version_inventory_pathname)
|
102
|
+
verify_version_from_xml_file(version_inventory_pathname, version_from_file) if version_from_file
|
103
|
+
end
|
104
|
+
|
105
|
+
def last_version_id_from_version_md_xml(version_md_pathname)
|
106
|
+
last_version_id_from_xml(version_md_pathname, '/versionMetadata/version/@versionId')
|
107
|
+
end
|
108
|
+
|
109
|
+
def version_id_from_version_manifest_xml(version_manifest_xml_pathname)
|
110
|
+
last_version_id_from_xml(version_manifest_xml_pathname, '/fileInventory/@versionId')
|
111
|
+
end
|
112
|
+
|
113
|
+
def last_version_id_from_xml(pathname, xpath)
|
114
|
+
doc = Nokogiri::XML(File.open(pathname.to_s), &:strict)
|
115
|
+
version_id = doc.xpath(xpath).last.text unless doc.xpath(xpath).empty?
|
116
|
+
return version_id.to_i if version_id
|
117
|
+
err_data = {
|
118
|
+
version_file: pathname,
|
119
|
+
xpath: xpath
|
120
|
+
}
|
121
|
+
result_array << single_error_hash(VERSION_MISSING_FROM_FILE, err_data) unless version_id
|
122
|
+
nil
|
123
|
+
rescue StandardError => e
|
124
|
+
err_data = {
|
125
|
+
file_pathname: pathname,
|
126
|
+
err_info: "#{e}\n#{e.backtrace}"
|
127
|
+
}
|
128
|
+
result_array << single_error_hash(INVALID_VERSION_XXX_XML, err_data)
|
129
|
+
nil
|
130
|
+
end
|
131
|
+
|
132
|
+
def verify_version_from_xml_file(file_pathname, found)
|
133
|
+
return if found == expected_new_version
|
134
|
+
err_data = {
|
135
|
+
file_pathname: file_pathname,
|
136
|
+
new_version: expected_new_version,
|
137
|
+
file_version: found
|
138
|
+
}
|
139
|
+
result_array << single_error_hash(VERSION_MISMATCH_TO_MOAB, err_data)
|
140
|
+
end
|
141
|
+
|
142
|
+
# adds to result_array if tagmanifest checksums don't match generated checksums
|
143
|
+
def verify_tagmanifests
|
144
|
+
tagmanifests_checksums_hash = checksums_hash_from_manifest_files(TAGMANIFEST)
|
145
|
+
types_to_generate = checksum_types_from_manifest_checksums_hash(tagmanifests_checksums_hash)
|
146
|
+
generated_checksums_hash = generate_tagmanifest_checksums_hash(types_to_generate)
|
147
|
+
verify_manifest_checksums(TAGMANIFEST, tagmanifests_checksums_hash, generated_checksums_hash)
|
148
|
+
end
|
149
|
+
|
150
|
+
# adds to result_array if manifest checksums don't match generated checksums
|
151
|
+
def verify_payload_manifests
|
152
|
+
manifests_checksums_hash = checksums_hash_from_manifest_files(MANIFEST)
|
153
|
+
types_to_generate = checksum_types_from_manifest_checksums_hash(manifests_checksums_hash)
|
154
|
+
generated_checksums_hash = generate_payload_checksums(types_to_generate)
|
155
|
+
verify_manifest_checksums(MANIFEST, manifests_checksums_hash, generated_checksums_hash)
|
156
|
+
end
|
157
|
+
|
158
|
+
# construct hash based on manifest_type-alg.txt files in bag home dir
|
159
|
+
# key: file_name, relative to base_path, value: hash of checksum alg => checksum value
|
160
|
+
def checksums_hash_from_manifest_files(manifest_type)
|
161
|
+
checksums_hash = {}
|
162
|
+
deposit_bag_pathname.children.each do |child_pathname|
|
163
|
+
if child_pathname.file?
|
164
|
+
child_fname = child_pathname.basename.to_s
|
165
|
+
match_result = child_fname.match("^#{manifest_type}-(.*).txt")
|
166
|
+
if match_result
|
167
|
+
checksum_type = match_result.captures.first.to_sym
|
168
|
+
if RECOGNIZED_CHECKSUM_ALGORITHMS.include?(checksum_type)
|
169
|
+
child_pathname.readlines.each do |line|
|
170
|
+
line.chomp!.strip!
|
171
|
+
checksum, file_name = line.split(/[\s*]+/, 2)
|
172
|
+
file_checksums = checksums_hash[file_name] || {}
|
173
|
+
file_checksums[checksum_type] = checksum
|
174
|
+
checksums_hash[file_name] = file_checksums
|
175
|
+
end
|
176
|
+
else
|
177
|
+
result_array << single_error_hash(CHECKSUM_TYPE_UNRECOGNIZED, checksum_type: checksum_type, filename: child_pathname)
|
178
|
+
end
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
182
|
+
checksums_hash
|
183
|
+
end
|
184
|
+
|
185
|
+
# generate hash of checksums by file name for bag home dir files
|
186
|
+
def generate_tagmanifest_checksums_hash(types_to_generate)
|
187
|
+
# all names in the bag home dir except those starting with 'tagmanifest'
|
188
|
+
home_dir_pathnames = deposit_bag_pathname.children.reject { |file| file.basename.to_s.start_with?(TAGMANIFEST) }
|
189
|
+
hash_with_full_pathnames = generate_checksums_hash(home_dir_pathnames, types_to_generate)
|
190
|
+
# return hash keys as basenames only
|
191
|
+
hash_with_full_pathnames.map { |k, v| [Pathname.new(k).basename.to_s, v] }.to_h
|
192
|
+
end
|
193
|
+
|
194
|
+
# generate hash of checksums by file name for bag data dir files
|
195
|
+
def generate_payload_checksums(types_to_generate)
|
196
|
+
data_pathnames = deposit_bag_pathname.join(DATA_DIR_BASENAME).find
|
197
|
+
hash_with_full_pathnames = generate_checksums_hash(data_pathnames, types_to_generate)
|
198
|
+
# return hash keys beginning with 'data/'
|
199
|
+
hash_with_full_pathnames.map { |k, v| [Pathname.new(k).relative_path_from(deposit_bag_pathname).to_s, v] }.to_h
|
200
|
+
end
|
201
|
+
|
202
|
+
def generate_checksums_hash(pathnames, types_to_generate)
|
203
|
+
file_checksums_hash = {}
|
204
|
+
pathnames.each do |pathname|
|
205
|
+
file_checksums_hash[pathname.to_s] = generated_checksums(pathname, types_to_generate) if pathname.file?
|
206
|
+
end
|
207
|
+
file_checksums_hash
|
208
|
+
end
|
209
|
+
|
210
|
+
def generated_checksums(pathname, types_to_generate)
|
211
|
+
my_digester_hash = digester_hash(types_to_generate)
|
212
|
+
pathname.open('r') do |stream|
|
213
|
+
while (buffer = stream.read(8192))
|
214
|
+
my_digester_hash.each_value { |digest| digest.update(buffer) }
|
215
|
+
end
|
216
|
+
end
|
217
|
+
file_checksums = {}
|
218
|
+
my_digester_hash.each do |checksum_type, digest|
|
219
|
+
file_checksums[checksum_type] = digest.hexdigest
|
220
|
+
end
|
221
|
+
file_checksums
|
222
|
+
end
|
223
|
+
|
224
|
+
def digester_hash(types_to_generate=DEFAULT_CHECKSUM_TYPES)
|
225
|
+
types_to_generate.each_with_object({}) do |checksum_type, digester_hash|
|
226
|
+
case checksum_type
|
227
|
+
when :md5
|
228
|
+
digester_hash[checksum_type] = Digest::MD5.new
|
229
|
+
when :sha1
|
230
|
+
digester_hash[checksum_type] = Digest::SHA1.new
|
231
|
+
when :sha256
|
232
|
+
digester_hash[checksum_type] = Digest::SHA2.new(256)
|
233
|
+
when :sha384
|
234
|
+
digesters[checksum_type] = Digest::SHA2.new(384)
|
235
|
+
when :sha512
|
236
|
+
digesters[checksum_type] = Digest::SHA2.new(512)
|
237
|
+
else
|
238
|
+
result_array << single_error_hash(CHECKSUM_TYPE_UNRECOGNIZED, checksum_type: checksum_type, filename: nil)
|
239
|
+
end
|
240
|
+
digester_hash
|
241
|
+
end
|
242
|
+
end
|
243
|
+
|
244
|
+
def verify_manifest_checksums(manifest_type, manifests_checksum_hash, generated_checksum_hash)
|
245
|
+
diff_hash = {}
|
246
|
+
# NOTE: this is intentionally | instead of ||
|
247
|
+
(manifests_checksum_hash.keys | generated_checksum_hash.keys).each do |file_name|
|
248
|
+
manifest_checksums = manifests_checksum_hash[file_name] || {}
|
249
|
+
generated_checksums = generated_checksum_hash[file_name] || {}
|
250
|
+
if manifest_checksums != generated_checksums
|
251
|
+
cdh = checksums_diff_hash(manifest_checksums, generated_checksums, manifest_type, 'generated')
|
252
|
+
diff_hash[file_name] = cdh if cdh
|
253
|
+
end
|
254
|
+
end
|
255
|
+
return if diff_hash.empty?
|
256
|
+
err_data = {
|
257
|
+
manifest_type: manifest_type,
|
258
|
+
diffs: diff_hash
|
259
|
+
}
|
260
|
+
result_array << single_error_hash(CHECKSUM_MISMATCH, err_data)
|
261
|
+
end
|
262
|
+
|
263
|
+
def checksums_diff_hash(left_checksums, right_checksums, left_label, right_label)
|
264
|
+
diff_hash = {}
|
265
|
+
# NOTE: these are intentionally & and | instead of && and ||
|
266
|
+
checksum_types_to_compare = (left_checksums.keys & right_checksums.keys)
|
267
|
+
checksum_types_to_compare = (left_checksums.keys | right_checksums.keys) if checksum_types_to_compare.empty?
|
268
|
+
checksum_types_to_compare.each do |type|
|
269
|
+
left_checksum = left_checksums[type]
|
270
|
+
right_checksum = right_checksums[type]
|
271
|
+
if left_checksum != right_checksum
|
272
|
+
diff_hash[type] = { left_label => left_checksum, right_label => right_checksum }
|
273
|
+
end
|
274
|
+
end
|
275
|
+
diff_hash.empty? ? nil : diff_hash
|
276
|
+
end
|
277
|
+
|
278
|
+
def verify_payload_size
|
279
|
+
sizes_from_bag_info_file = bag_info_payload_size
|
280
|
+
generated_sizes = generated_payload_size
|
281
|
+
return if sizes_from_bag_info_file == generated_sizes
|
282
|
+
err_data = {
|
283
|
+
bag_info_sizes: sizes_from_bag_info_file,
|
284
|
+
generated_sizes: generated_sizes
|
285
|
+
}
|
286
|
+
result_array << single_error_hash(PAYLOAD_SIZE_MISMATCH, err_data)
|
287
|
+
end
|
288
|
+
|
289
|
+
def bag_info_payload_size
|
290
|
+
bag_info_txt_pathname = deposit_bag_pathname.join(BAG_INFO_TXT_BASENAME)
|
291
|
+
bag_info_txt_pathname.readlines.each do |line|
|
292
|
+
line.chomp!.strip!
|
293
|
+
key, value = line.split(':', 2)
|
294
|
+
if key.strip == 'Payload-Oxum'
|
295
|
+
num_bytes, num_files = value.strip.split('.') if value
|
296
|
+
return { bytes: num_bytes.to_i, files: num_files.to_i }
|
297
|
+
end
|
298
|
+
end
|
299
|
+
end
|
300
|
+
|
301
|
+
def generated_payload_size
|
302
|
+
payload_pathname = deposit_bag_pathname.join(DATA_DIR_BASENAME)
|
303
|
+
payload_pathname.find.select(&:file?).each_with_object(bytes: 0, files: 0) do |file, hash|
|
304
|
+
hash[:bytes] += file.size
|
305
|
+
hash[:files] += 1
|
306
|
+
hash
|
307
|
+
end
|
308
|
+
end
|
309
|
+
|
310
|
+
# checksums_hash: { fname => {:md5=>"xxx", :sha1=>"yyy"}, fname => ... }
|
311
|
+
def checksum_types_from_manifest_checksums_hash(checksums_hash)
|
312
|
+
types = []
|
313
|
+
checksums_hash.each_value { |v| v.each_key { |k| types << k unless types.include?(k) } }
|
314
|
+
types
|
315
|
+
end
|
316
|
+
|
317
|
+
def single_error_hash(error_code, err_data_hash)
|
318
|
+
{ error_code => error_code_msg(error_code, err_data_hash) }
|
319
|
+
end
|
320
|
+
|
321
|
+
def error_code_msg(error_code, err_data_hash)
|
322
|
+
ERROR_CODE_TO_MESSAGES[error_code] % err_data_hash
|
323
|
+
end
|
324
|
+
end
|
325
|
+
end
|
data/lib/moab/file_group.rb
CHANGED
@@ -66,13 +66,12 @@ module Moab
|
|
66
66
|
%w{group_id file_count byte_count block_count}
|
67
67
|
end
|
68
68
|
|
69
|
-
|
70
69
|
# @attribute
|
71
70
|
# @return [Array<FileManifestation>] The set of files comprising the group
|
72
71
|
has_many :files, FileManifestation, :tag => 'file'
|
73
72
|
|
74
73
|
def files
|
75
|
-
|
74
|
+
signature_hash.values
|
76
75
|
end
|
77
76
|
|
78
77
|
# @return [Hash<FileSignature, FileManifestation>] The actual in-memory store for the collection
|
@@ -84,7 +83,7 @@ module Moab
|
|
84
83
|
# used to test for existence of a filename in this file group
|
85
84
|
def path_hash
|
86
85
|
path_hash = Hash.new
|
87
|
-
|
86
|
+
signature_hash.each do |signature,manifestation|
|
88
87
|
manifestation.instances.each do |instance|
|
89
88
|
path_hash[instance.path] = signature
|
90
89
|
end
|
@@ -103,7 +102,7 @@ module Moab
|
|
103
102
|
def path_hash_subset(signature_subset)
|
104
103
|
path_hash = Hash.new
|
105
104
|
signature_subset.each do |signature|
|
106
|
-
manifestation =
|
105
|
+
manifestation = signature_hash[signature]
|
107
106
|
manifestation.instances.each do |instance|
|
108
107
|
path_hash[instance.path] = signature
|
109
108
|
end
|
@@ -135,12 +134,12 @@ module Moab
|
|
135
134
|
# @return [void] Add a single {FileSignature},{FileInstance} key/value pair to this group.
|
136
135
|
# Data is actually stored in the {#signature_hash}
|
137
136
|
def add_file_instance(signature,instance)
|
138
|
-
if
|
139
|
-
manifestation =
|
137
|
+
if signature_hash.has_key?(signature)
|
138
|
+
manifestation = signature_hash[signature]
|
140
139
|
else
|
141
140
|
manifestation = FileManifestation.new
|
142
141
|
manifestation.signature = signature
|
143
|
-
|
142
|
+
signature_hash[signature] = manifestation
|
144
143
|
end
|
145
144
|
manifestation.instances << instance
|
146
145
|
end
|
@@ -150,7 +149,7 @@ module Moab
|
|
150
149
|
# for example, the manifest inventory does not contain a file entry for itself
|
151
150
|
def remove_file_having_path(path)
|
152
151
|
signature = self.path_hash[path]
|
153
|
-
|
152
|
+
signature_hash.delete(signature)
|
154
153
|
end
|
155
154
|
|
156
155
|
# @return [Pathname] The full path used as the basis of the relative paths reported
|
@@ -203,13 +202,13 @@ module Moab
|
|
203
202
|
# @see http://stackoverflow.com/questions/3974087/how-to-make-rubys-find-find-follow-symlinks
|
204
203
|
# @see http://stackoverflow.com/questions/357754/can-i-traverse-symlinked-directories-in-ruby-with-a-glob
|
205
204
|
def harvest_directory(path, recursive, validated=nil)
|
206
|
-
pathname=Pathname.new(path).expand_path
|
205
|
+
pathname = Pathname.new(path).expand_path
|
207
206
|
validated ||= is_descendent_of_base?(pathname)
|
208
207
|
pathname.children.sort.each do |child|
|
209
208
|
if child.basename.to_s == ".DS_Store"
|
210
209
|
next
|
211
210
|
elsif child.directory?
|
212
|
-
harvest_directory(child,recursive, validated) if recursive
|
211
|
+
harvest_directory(child, recursive, validated) if recursive
|
213
212
|
else
|
214
213
|
add_physical_file(child, validated)
|
215
214
|
end
|
@@ -49,7 +49,7 @@ module Moab
|
|
49
49
|
# @param change [String] the change type to search for
|
50
50
|
# @return [FileGroupDifferenceSubset] Find a specified subset of changes
|
51
51
|
def subset(change)
|
52
|
-
|
52
|
+
subset_hash[change.to_sym]
|
53
53
|
end
|
54
54
|
|
55
55
|
# (see Serializable#initialize)
|
@@ -69,7 +69,7 @@ module Moab
|
|
69
69
|
|
70
70
|
def difference_count
|
71
71
|
count = 0
|
72
|
-
|
72
|
+
subset_hash.each do |type, subset|
|
73
73
|
count += subset.count if type != :identical
|
74
74
|
end
|
75
75
|
count
|
@@ -79,49 +79,49 @@ module Moab
|
|
79
79
|
# @return [Integer] How many files were unchanged
|
80
80
|
attribute :identical, Integer, :on_save => Proc.new { |n| n.to_s }
|
81
81
|
def identical
|
82
|
-
|
82
|
+
subset_hash[:identical].count
|
83
83
|
end
|
84
84
|
|
85
85
|
# @attribute
|
86
86
|
# @return [Integer] How many duplicate copies of files were added
|
87
87
|
attribute :copyadded, Integer, :on_save => Proc.new { |n| n.to_s }
|
88
88
|
def copyadded
|
89
|
-
|
89
|
+
subset_hash[:copyadded].count
|
90
90
|
end
|
91
91
|
|
92
92
|
# @attribute
|
93
93
|
# @return [Integer] How many duplicate copies of files were deleted
|
94
94
|
attribute :copydeleted, Integer, :on_save => Proc.new { |n| n.to_s }
|
95
95
|
def copydeleted
|
96
|
-
|
96
|
+
subset_hash[:copydeleted].count
|
97
97
|
end
|
98
98
|
|
99
99
|
# @attribute
|
100
100
|
# @return [Integer] How many files were renamed
|
101
101
|
attribute :renamed, Integer, :on_save => Proc.new { |n| n.to_s }
|
102
102
|
def renamed
|
103
|
-
|
103
|
+
subset_hash[:renamed].count
|
104
104
|
end
|
105
105
|
|
106
106
|
# @attribute
|
107
107
|
# @return [Integer] How many files were modified
|
108
108
|
attribute :modified, Integer, :on_save => Proc.new { |n| n.to_s }
|
109
109
|
def modified
|
110
|
-
|
110
|
+
subset_hash[:modified].count
|
111
111
|
end
|
112
112
|
|
113
113
|
# @attribute
|
114
114
|
# @return [Integer] How many files were added
|
115
115
|
attribute :added, Integer, :on_save => Proc.new { |n| n.to_s }
|
116
116
|
def added
|
117
|
-
|
117
|
+
subset_hash[:added].count
|
118
118
|
end
|
119
119
|
|
120
120
|
# @attribute
|
121
121
|
# @return [Integer] How many files were deleted
|
122
122
|
attribute :deleted, Integer, :on_save => Proc.new { |n| n.to_s }
|
123
123
|
def deleted
|
124
|
-
|
124
|
+
subset_hash[:deleted].count
|
125
125
|
end
|
126
126
|
|
127
127
|
# @attribute
|
@@ -130,12 +130,12 @@ module Moab
|
|
130
130
|
has_many :subsets, FileGroupDifferenceSubset, :tag => 'subset'
|
131
131
|
|
132
132
|
def subsets
|
133
|
-
|
133
|
+
subset_hash.values
|
134
134
|
end
|
135
135
|
|
136
136
|
def subsets=(array)
|
137
137
|
if array
|
138
|
-
array.each{|subset|
|
138
|
+
array.each{|subset| subset_hash[subset.change.to_sym] = subset}
|
139
139
|
end
|
140
140
|
end
|
141
141
|
|
@@ -237,13 +237,12 @@ module Moab
|
|
237
237
|
fid.basis_path = path
|
238
238
|
fid.other_path = "same"
|
239
239
|
fid.signatures << signature
|
240
|
-
|
240
|
+
subset_hash[:identical].files << fid
|
241
241
|
end
|
242
242
|
end
|
243
243
|
self
|
244
244
|
end
|
245
245
|
|
246
|
-
|
247
246
|
# @api internal
|
248
247
|
# @param matching_signatures [Array<FileSignature>] The file signature of the file manifestations being compared
|
249
248
|
# @param basis_signature_hash [Hash<FileSignature, FileManifestation>]
|
@@ -272,13 +271,12 @@ module Moab
|
|
272
271
|
else
|
273
272
|
fid.change = 'renamed'
|
274
273
|
end
|
275
|
-
|
274
|
+
subset_hash[fid.change.to_sym].files << fid
|
276
275
|
end
|
277
276
|
end
|
278
277
|
self
|
279
278
|
end
|
280
279
|
|
281
|
-
|
282
280
|
# @api internal
|
283
281
|
# @param basis_path_hash [Hash<String,FileSignature>]
|
284
282
|
# The file paths and associated signatures for manifestations appearing only in the basis group
|
@@ -293,7 +291,7 @@ module Moab
|
|
293
291
|
fid.other_path = "same"
|
294
292
|
fid.signatures << basis_path_hash[path]
|
295
293
|
fid.signatures << other_path_hash[path]
|
296
|
-
|
294
|
+
subset_hash[:modified].files << fid
|
297
295
|
end
|
298
296
|
self
|
299
297
|
end
|
@@ -311,7 +309,7 @@ module Moab
|
|
311
309
|
fid.basis_path = ""
|
312
310
|
fid.other_path = path
|
313
311
|
fid.signatures << other_path_hash[path]
|
314
|
-
|
312
|
+
subset_hash[:added].files << fid
|
315
313
|
end
|
316
314
|
self
|
317
315
|
end
|
@@ -329,7 +327,7 @@ module Moab
|
|
329
327
|
fid.basis_path = path
|
330
328
|
fid.other_path = ""
|
331
329
|
fid.signatures << basis_path_hash[path]
|
332
|
-
|
330
|
+
subset_hash[:deleted].files << fid
|
333
331
|
end
|
334
332
|
self
|
335
333
|
end
|
@@ -340,15 +338,15 @@ module Moab
|
|
340
338
|
deltas = Hash.new {|hash, key| hash[key] = []}
|
341
339
|
# case where other_path is empty or 'same'. (create array of strings)
|
342
340
|
[:identical, :modified, :deleted, :copydeleted].each do |change|
|
343
|
-
deltas[change].concat(
|
341
|
+
deltas[change].concat(subset_hash[change].files.collect{ |file| file.basis_path })
|
344
342
|
end
|
345
343
|
# case where basis_path and other_path are both present. (create array of arrays)
|
346
344
|
[:copyadded, :renamed].each do |change|
|
347
|
-
deltas[change].concat(
|
345
|
+
deltas[change].concat(subset_hash[change].files.collect { |file| [file.basis_path, file.other_path] })
|
348
346
|
end
|
349
347
|
# case where basis_path is empty. (create array of strings)
|
350
348
|
[:added].each do |change|
|
351
|
-
deltas[change].concat(
|
349
|
+
deltas[change].concat(subset_hash[change].files.collect { |file| file.other_path })
|
352
350
|
end
|
353
351
|
deltas
|
354
352
|
end
|
@@ -362,7 +360,7 @@ module Moab
|
|
362
360
|
# Split the filepairs into two arrays
|
363
361
|
oldnames = []
|
364
362
|
newnames = []
|
365
|
-
filepairs.each do |old,new|
|
363
|
+
filepairs.each do |old, new|
|
366
364
|
oldnames << old
|
367
365
|
newnames << new
|
368
366
|
end
|
@@ -374,7 +372,7 @@ module Moab
|
|
374
372
|
# @param [Array<Array<String>>] filepairs The set of oldname, newname pairs for all files being renamed
|
375
373
|
# @return [Array<Array<String>>] a set of file triples containing oldname, tempname, newname
|
376
374
|
def rename_tempfile_triplets(filepairs)
|
377
|
-
filepairs.collect{|old,new| [old, new, "#{new}-#{Time.now.strftime('%Y%m%d%H%H%S')}-tmp"]}
|
375
|
+
filepairs.collect{ |old, new| [old, new, "#{new}-#{Time.now.strftime('%Y%m%d%H%H%S')}-tmp"] }
|
378
376
|
end
|
379
377
|
|
380
378
|
end
|
data/lib/moab/file_inventory.rb
CHANGED
@@ -52,11 +52,11 @@ module Moab
|
|
52
52
|
|
53
53
|
# @attribute
|
54
54
|
# @return [Integer] The ordinal version number
|
55
|
-
attribute :version_id, Integer, :tag => 'versionId', :key => true, :on_save => Proc.new {|n| n.to_s}
|
55
|
+
attribute :version_id, Integer, :tag => 'versionId', :key => true, :on_save => Proc.new { |n| n.to_s }
|
56
56
|
|
57
57
|
# @return [String] The unique identifier concatenating digital object id with version id
|
58
58
|
def composite_key
|
59
|
-
|
59
|
+
digital_object_id + '-' + StorageObject.version_dirname(version_id)
|
60
60
|
end
|
61
61
|
|
62
62
|
# @attribute
|
@@ -64,7 +64,7 @@ module Moab
|
|
64
64
|
attribute :inventory_datetime, String, :tag => 'inventoryDatetime'
|
65
65
|
|
66
66
|
def inventory_datetime=(datetime)
|
67
|
-
@inventory_datetime=Moab::UtcTime.input(datetime)
|
67
|
+
@inventory_datetime = Moab::UtcTime.input(datetime)
|
68
68
|
end
|
69
69
|
|
70
70
|
def inventory_datetime
|
@@ -73,7 +73,7 @@ module Moab
|
|
73
73
|
|
74
74
|
# @attribute
|
75
75
|
# @return [Integer] The total number of data files in the inventory (dynamically calculated)
|
76
|
-
attribute :file_count, Integer, :tag => 'fileCount', :on_save => Proc.new {|t| t.to_s}
|
76
|
+
attribute :file_count, Integer, :tag => 'fileCount', :on_save => Proc.new { |t| t.to_s }
|
77
77
|
|
78
78
|
def file_count
|
79
79
|
groups.inject(0) { |sum, group| sum + group.file_count }
|
@@ -81,7 +81,7 @@ module Moab
|
|
81
81
|
|
82
82
|
# @attribute
|
83
83
|
# @return [Integer] The total size (in bytes) in all files of all files in the inventory (dynamically calculated)
|
84
|
-
attribute :byte_count, Integer, :tag => 'byteCount', :on_save => Proc.new {|t| t.to_s}
|
84
|
+
attribute :byte_count, Integer, :tag => 'byteCount', :on_save => Proc.new { |t| t.to_s }
|
85
85
|
|
86
86
|
def byte_count
|
87
87
|
groups.inject(0) { |sum, group| sum + group.byte_count }
|
@@ -89,7 +89,7 @@ module Moab
|
|
89
89
|
|
90
90
|
# @attribute
|
91
91
|
# @return [Integer] The total disk usage (in 1 kB blocks) of all data files (estimating du -k result) (dynamically calculated)
|
92
|
-
attribute :block_count, Integer, :tag => 'blockCount', :on_save => Proc.new {|t| t.to_s}
|
92
|
+
attribute :block_count, Integer, :tag => 'blockCount', :on_save => Proc.new { |t| t.to_s }
|
93
93
|
|
94
94
|
def block_count
|
95
95
|
groups.inject(0) { |sum, group| sum + group.block_count }
|
@@ -101,20 +101,20 @@ module Moab
|
|
101
101
|
|
102
102
|
# @return [Array<FileGroup] The set of data groups that contain files
|
103
103
|
def non_empty_groups
|
104
|
-
|
104
|
+
groups.select { |group| !group.files.empty? }
|
105
105
|
end
|
106
106
|
|
107
107
|
# @param non_empty [Boolean] if true, return group_id's only for groups having files
|
108
108
|
# @return [Array<String>] group identifiers contained in this file inventory
|
109
109
|
def group_ids(non_empty=nil)
|
110
|
-
|
111
|
-
|
110
|
+
my_groups = non_empty ? self.non_empty_groups : groups
|
111
|
+
my_groups.map { |g| g.group_id }
|
112
112
|
end
|
113
113
|
|
114
114
|
# @param [String] group_id The identifer of the group to be selected
|
115
115
|
# @return [FileGroup] The file group in this inventory for the specified group_id
|
116
116
|
def group(group_id)
|
117
|
-
|
117
|
+
groups.find { |group| group.group_id == group_id}
|
118
118
|
end
|
119
119
|
|
120
120
|
# @param group_id [String] File group identifer (e.g. data, metadata, manifests)
|
@@ -134,10 +134,10 @@ module Moab
|
|
134
134
|
# @return [FileSignature] The signature of the specified file
|
135
135
|
def file_signature(group_id, file_id)
|
136
136
|
file_group = group(group_id)
|
137
|
-
errmsg = "group #{group_id} not found for #{
|
137
|
+
errmsg = "group #{group_id} not found for #{digital_object_id} - #{version_id}"
|
138
138
|
raise FileNotFoundException, errmsg if file_group.nil?
|
139
139
|
file_signature = file_group.path_hash[file_id]
|
140
|
-
errmsg = "#{group_id} file #{file_id} not found for #{
|
140
|
+
errmsg = "#{group_id} file #{file_id} not found for #{digital_object_id} - #{version_id}"
|
141
141
|
raise FileNotFoundException, errmsg if file_signature.nil?
|
142
142
|
file_signature
|
143
143
|
end
|
@@ -154,7 +154,7 @@ module Moab
|
|
154
154
|
# @api internal
|
155
155
|
# @return [String] Concatenation of the objectId and versionId values
|
156
156
|
def package_id
|
157
|
-
"#{
|
157
|
+
"#{digital_object_id}-v#{version_id}"
|
158
158
|
end
|
159
159
|
|
160
160
|
# @api internal
|
@@ -174,7 +174,6 @@ module Moab
|
|
174
174
|
else
|
175
175
|
data_source
|
176
176
|
end
|
177
|
-
|
178
177
|
end
|
179
178
|
end
|
180
179
|
|
@@ -186,10 +185,10 @@ module Moab
|
|
186
185
|
# @example {include:file:spec/features/inventory/harvest_inventory_spec.rb}
|
187
186
|
def inventory_from_directory(data_dir, group_id=nil)
|
188
187
|
if group_id
|
189
|
-
|
188
|
+
groups << FileGroup.new(group_id: group_id).group_from_directory(data_dir)
|
190
189
|
else
|
191
190
|
['content', 'metadata'].each do |gid|
|
192
|
-
|
191
|
+
groups << FileGroup.new(group_id: gid).group_from_directory(Pathname(data_dir).join(gid))
|
193
192
|
end
|
194
193
|
end
|
195
194
|
self
|
@@ -203,7 +202,7 @@ module Moab
|
|
203
202
|
signatures_from_bag = signatures_from_bagit_manifests(bag_pathname)
|
204
203
|
bag_data_subdirs = bag_pathname.join('data').children
|
205
204
|
bag_data_subdirs.each do |subdir|
|
206
|
-
|
205
|
+
groups << FileGroup.new(:group_id=>subdir.basename.to_s).group_from_bagit_subdir(subdir, signatures_from_bag)
|
207
206
|
end
|
208
207
|
self
|
209
208
|
end
|
@@ -212,12 +211,11 @@ module Moab
|
|
212
211
|
# @return [Hash<Pathname,FileSignature>] The fixity data present in the bag's manifest files
|
213
212
|
def signatures_from_bagit_manifests(bag_pathname)
|
214
213
|
manifest_pathname = Hash.new
|
215
|
-
|
216
|
-
checksum_types.each do |type|
|
214
|
+
DEFAULT_CHECKSUM_TYPES.each do |type|
|
217
215
|
manifest_pathname[type] = bag_pathname.join("manifest-#{type}.txt")
|
218
216
|
end
|
219
|
-
signatures = Hash.new { |hash,path| hash[path] = FileSignature.new }
|
220
|
-
|
217
|
+
signatures = Hash.new { |hash, path| hash[path] = FileSignature.new }
|
218
|
+
DEFAULT_CHECKSUM_TYPES.each do |type|
|
221
219
|
if manifest_pathname[type].exist?
|
222
220
|
manifest_pathname[type].each_line do |line|
|
223
221
|
line.chomp!
|
@@ -230,7 +228,7 @@ module Moab
|
|
230
228
|
end
|
231
229
|
end
|
232
230
|
end
|
233
|
-
signatures.each {|file_pathname,signature| signature.size = file_pathname.size}
|
231
|
+
signatures.each { |file_pathname, signature| signature.size = file_pathname.size }
|
234
232
|
signatures
|
235
233
|
end
|
236
234
|
|
@@ -55,7 +55,7 @@ module Moab
|
|
55
55
|
attribute :report_datetime, String, :tag => 'reportDatetime'
|
56
56
|
|
57
57
|
def report_datetime=(datetime)
|
58
|
-
@report_datetime=Moab::UtcTime.input(datetime)
|
58
|
+
@report_datetime = Moab::UtcTime.input(datetime)
|
59
59
|
end
|
60
60
|
|
61
61
|
def report_datetime
|
@@ -115,7 +115,7 @@ module Moab
|
|
115
115
|
inv_diff = self.to_hash
|
116
116
|
inv_diff["group_differences"].each_value do |group_diff|
|
117
117
|
delete_subsets = []
|
118
|
-
group_diff["subsets"].each do |change_type,subset|
|
118
|
+
group_diff["subsets"].each do |change_type, subset|
|
119
119
|
delete_subsets << change_type if change_type == "identical" or subset["count"] == 0
|
120
120
|
end
|
121
121
|
delete_subsets.each do |change_type|
|
@@ -68,7 +68,7 @@ module Moab
|
|
68
68
|
# @return [Integer] The total disk usage (in 1 kB blocks) of all files that share this manifestation's signature
|
69
69
|
# (estimating du -k result)
|
70
70
|
def block_count
|
71
|
-
block_size=1024
|
71
|
+
block_size = 1024
|
72
72
|
instance_blocks = (signature.size.to_i + block_size - 1)/block_size
|
73
73
|
file_count * instance_blocks
|
74
74
|
end
|
@@ -9,7 +9,7 @@ module Moab
|
|
9
9
|
METADATA_DIR = "metadata".freeze
|
10
10
|
CONTENT_DIR = "content".freeze
|
11
11
|
EXPECTED_DATA_SUB_DIRS = [CONTENT_DIR, METADATA_DIR].freeze
|
12
|
-
IMPLICIT_DIRS = ['.', '..'
|
12
|
+
IMPLICIT_DIRS = ['.', '..'].freeze # unlike Find.find, Dir.entries returns the current/parent dirs
|
13
13
|
DATA_DIR = "data".freeze
|
14
14
|
MANIFESTS_DIR = 'manifests'.freeze
|
15
15
|
EXPECTED_VERSION_SUB_DIRS = [DATA_DIR, MANIFESTS_DIR].freeze
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: moab-versioning
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.
|
4
|
+
version: 4.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Darren Weber
|
@@ -11,7 +11,7 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date:
|
14
|
+
date: 2018-03-20 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: confstruct
|
@@ -69,20 +69,6 @@ dependencies:
|
|
69
69
|
- - ">="
|
70
70
|
- !ruby/object:Gem::Version
|
71
71
|
version: '0'
|
72
|
-
- !ruby/object:Gem::Dependency
|
73
|
-
name: systemu
|
74
|
-
requirement: !ruby/object:Gem::Requirement
|
75
|
-
requirements:
|
76
|
-
- - ">="
|
77
|
-
- !ruby/object:Gem::Version
|
78
|
-
version: '0'
|
79
|
-
type: :runtime
|
80
|
-
prerelease: false
|
81
|
-
version_requirements: !ruby/object:Gem::Requirement
|
82
|
-
requirements:
|
83
|
-
- - ">="
|
84
|
-
- !ruby/object:Gem::Version
|
85
|
-
version: '0'
|
86
72
|
- !ruby/object:Gem::Dependency
|
87
73
|
name: druid-tools
|
88
74
|
requirement: !ruby/object:Gem::Requirement
|
@@ -97,20 +83,6 @@ dependencies:
|
|
97
83
|
- - ">="
|
98
84
|
- !ruby/object:Gem::Version
|
99
85
|
version: 1.0.0
|
100
|
-
- !ruby/object:Gem::Dependency
|
101
|
-
name: awesome_print
|
102
|
-
requirement: !ruby/object:Gem::Requirement
|
103
|
-
requirements:
|
104
|
-
- - ">="
|
105
|
-
- !ruby/object:Gem::Version
|
106
|
-
version: '0'
|
107
|
-
type: :development
|
108
|
-
prerelease: false
|
109
|
-
version_requirements: !ruby/object:Gem::Requirement
|
110
|
-
requirements:
|
111
|
-
- - ">="
|
112
|
-
- !ruby/object:Gem::Version
|
113
|
-
version: '0'
|
114
86
|
- !ruby/object:Gem::Dependency
|
115
87
|
name: equivalent-xml
|
116
88
|
requirement: !ruby/object:Gem::Requirement
|
@@ -139,20 +111,6 @@ dependencies:
|
|
139
111
|
- - ">="
|
140
112
|
- !ruby/object:Gem::Version
|
141
113
|
version: '0'
|
142
|
-
- !ruby/object:Gem::Dependency
|
143
|
-
name: rdoc
|
144
|
-
requirement: !ruby/object:Gem::Requirement
|
145
|
-
requirements:
|
146
|
-
- - ">="
|
147
|
-
- !ruby/object:Gem::Version
|
148
|
-
version: '0'
|
149
|
-
type: :development
|
150
|
-
prerelease: false
|
151
|
-
version_requirements: !ruby/object:Gem::Requirement
|
152
|
-
requirements:
|
153
|
-
- - ">="
|
154
|
-
- !ruby/object:Gem::Version
|
155
|
-
version: '0'
|
156
114
|
- !ruby/object:Gem::Dependency
|
157
115
|
name: rspec
|
158
116
|
requirement: !ruby/object:Gem::Requirement
|
@@ -181,20 +139,6 @@ dependencies:
|
|
181
139
|
- - ">="
|
182
140
|
- !ruby/object:Gem::Version
|
183
141
|
version: '0'
|
184
|
-
- !ruby/object:Gem::Dependency
|
185
|
-
name: yard
|
186
|
-
requirement: !ruby/object:Gem::Requirement
|
187
|
-
requirements:
|
188
|
-
- - ">="
|
189
|
-
- !ruby/object:Gem::Version
|
190
|
-
version: '0'
|
191
|
-
type: :development
|
192
|
-
prerelease: false
|
193
|
-
version_requirements: !ruby/object:Gem::Requirement
|
194
|
-
requirements:
|
195
|
-
- - ">="
|
196
|
-
- !ruby/object:Gem::Version
|
197
|
-
version: '0'
|
198
142
|
- !ruby/object:Gem::Dependency
|
199
143
|
name: pry-byebug
|
200
144
|
requirement: !ruby/object:Gem::Requirement
|
@@ -247,6 +191,7 @@ files:
|
|
247
191
|
- lib/moab.rb
|
248
192
|
- lib/moab/bagger.rb
|
249
193
|
- lib/moab/config.rb
|
194
|
+
- lib/moab/deposit_bag_validator.rb
|
250
195
|
- lib/moab/exceptions.rb
|
251
196
|
- lib/moab/file_group.rb
|
252
197
|
- lib/moab/file_group_difference.rb
|
@@ -280,7 +225,6 @@ files:
|
|
280
225
|
- lib/stanford/storage_object_validator.rb
|
281
226
|
- lib/stanford/storage_repository.rb
|
282
227
|
- lib/stanford/storage_services.rb
|
283
|
-
- lib/tasks/yard.rake
|
284
228
|
homepage: https://github.com/sul-dlss/moab-versioning
|
285
229
|
licenses:
|
286
230
|
- Apache-2.0
|
data/lib/tasks/yard.rake
DELETED
@@ -1,34 +0,0 @@
|
|
1
|
-
desc "Generate RDoc"
|
2
|
-
task :doc => ['doc:generate']
|
3
|
-
|
4
|
-
namespace :doc do
|
5
|
-
project_root = File.expand_path(File.join(File.dirname(__FILE__), '..', '..'))
|
6
|
-
doc_destination = File.join(project_root, 'doc')
|
7
|
-
|
8
|
-
begin
|
9
|
-
require 'yard'
|
10
|
-
require 'yard/rake/yardoc_task'
|
11
|
-
|
12
|
-
YARD::Rake::YardocTask.new(:generate) do |yt|
|
13
|
-
yt.files = Dir.glob(File.join(project_root, 'lib', '*.rb')) +
|
14
|
-
Dir.glob(File.join(project_root, 'lib', 'serializer', '*.rb')) +
|
15
|
-
Dir.glob(File.join(project_root, 'lib', 'moab', '*.rb')) +
|
16
|
-
Dir.glob(File.join(project_root, 'lib', 'stanford', '*.rb')) +
|
17
|
-
['-'] +
|
18
|
-
[ File.join(project_root, 'LICENSE.rdoc') ]
|
19
|
-
|
20
|
-
yt.options = ['--output-dir', doc_destination, '--hide-void-return']
|
21
|
-
end
|
22
|
-
rescue LoadError
|
23
|
-
desc "Generate YARD Documentation"
|
24
|
-
task :generate do
|
25
|
-
abort "Please install the YARD gem to generate rdoc."
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
|
-
desc "Remove generated documentation"
|
30
|
-
task :clean do
|
31
|
-
rm_r doc_destination if File.exists?(doc_destination)
|
32
|
-
end
|
33
|
-
|
34
|
-
end
|