moab-versioning 4.4.2 → 5.0.0.beta1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/moab/stanford.rb +0 -10
- data/lib/moab/storage_services.rb +0 -6
- data/lib/moab.rb +0 -8
- metadata +7 -13
- data/lib/moab/deposit_bag_validator.rb +0 -328
- data/lib/moab/version_metadata.rb +0 -34
- data/lib/moab/version_metadata_event.rb +0 -42
- data/lib/stanford/active_fedora_object.rb +0 -30
- data/lib/stanford/dor_metadata.rb +0 -44
- data/lib/stanford/moab_storage_directory.rb +0 -38
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 1d109f89af3228b70fe8e8b8cf394830b9397f4815a8c2324bb7e09e849432d6
|
|
4
|
+
data.tar.gz: 9695c1dfb048663ff1b56084597b8ece2973966704a07d610dd7e537da8be61b
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 85be7b541e0056fd50296092448b01c4d83195529c00bd4aaa244cce6596336b0ae7b41ba855f54b618666bf265a9986844eaf6a0f7a78eb811b6ccdaf1329ae
|
|
7
|
+
data.tar.gz: 72dc3ada7f22d891e883ce08fe55c7c783689f73b8868e6c29bb5040c2bcc45197c044a3b5823a94f3e8ade7de6b16ccc291902ee142ab0e68036824623c6695
|
data/lib/moab/stanford.rb
CHANGED
|
@@ -2,20 +2,10 @@
|
|
|
2
2
|
|
|
3
3
|
require 'moab'
|
|
4
4
|
require 'stanford/content_inventory'
|
|
5
|
-
require 'stanford/dor_metadata'
|
|
6
5
|
require 'stanford/storage_repository'
|
|
7
6
|
require 'stanford/storage_services'
|
|
8
|
-
require 'stanford/active_fedora_object'
|
|
9
|
-
require 'stanford/moab_storage_directory'
|
|
10
7
|
require 'stanford/storage_object_validator'
|
|
11
8
|
|
|
12
9
|
# Stanford is a module that isolates classes specific to the Stanford Digital Repository
|
|
13
|
-
#
|
|
14
|
-
# ====Data Model
|
|
15
|
-
# * <b>{DorMetadata} = utility methods for interfacing with Stanford metadata files (esp contentMetadata)</b>
|
|
16
|
-
# * {ActiveFedoraObject} [1..*] = utility for extracting content or other information from a Fedora Instance
|
|
17
|
-
#
|
|
18
|
-
# @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
|
|
19
|
-
# All rights reserved. See {file:LICENSE.rdoc} for details.
|
|
20
10
|
module Stanford
|
|
21
11
|
end
|
|
@@ -83,12 +83,6 @@ module Moab
|
|
|
83
83
|
@@repository.storage_object(object_id).current_version_id
|
|
84
84
|
end
|
|
85
85
|
|
|
86
|
-
# @param [String] object_id The digital object identifier of the object
|
|
87
|
-
# @return [Pathname] Pathname object containing the full path for the specified file
|
|
88
|
-
def self.version_metadata(object_id)
|
|
89
|
-
retrieve_file('metadata', 'versionMetadata.xml', object_id)
|
|
90
|
-
end
|
|
91
|
-
|
|
92
86
|
# @param [String] object_id The digital object identifier of the object
|
|
93
87
|
# @param [Integer] version_id The ID of the version, if nil use latest version
|
|
94
88
|
# @return [FileInventory] the file inventory for the specified object version
|
data/lib/moab.rb
CHANGED
|
@@ -20,10 +20,6 @@
|
|
|
20
20
|
# * {FileInstanceDifference} [1..*] = contains difference information at the file level
|
|
21
21
|
# * {FileSignature} [1..2] = contains the file signature(s) of two file instances being compared
|
|
22
22
|
#
|
|
23
|
-
# * <b>{VersionMetadata} = descriptive information about a digital object's versions</b>
|
|
24
|
-
# * {VersionMetadataEntry} [1..*] = attributes of a digital object version
|
|
25
|
-
# * {VersionMetadataEvent} [1..*] = object version lifecycle events with timestamps
|
|
26
|
-
#
|
|
27
23
|
# * <b>{StorageObject} = represents a digital object's repository storage location and ingest/dissemination methods</b>
|
|
28
24
|
# * {StorageObjectVersion} [1..*] = represents a version subdirectory within an object's home directory
|
|
29
25
|
# * {Bagger} [1] = utility for creating bagit packages for ingest or dissemination
|
|
@@ -48,9 +44,6 @@ require 'moab/file_instance_difference'
|
|
|
48
44
|
require 'moab/file_group_difference_subset'
|
|
49
45
|
require 'moab/file_group_difference'
|
|
50
46
|
require 'moab/file_inventory_difference'
|
|
51
|
-
require 'moab/version_metadata_event'
|
|
52
|
-
require 'moab/version_metadata_entry'
|
|
53
|
-
require 'moab/version_metadata'
|
|
54
47
|
require 'moab/bagger'
|
|
55
48
|
require 'moab/storage_object'
|
|
56
49
|
require 'moab/storage_object_version'
|
|
@@ -59,4 +52,3 @@ require 'moab/storage_services'
|
|
|
59
52
|
require 'moab/exceptions'
|
|
60
53
|
require 'moab/verification_result'
|
|
61
54
|
require 'moab/storage_object_validator'
|
|
62
|
-
require 'moab/deposit_bag_validator'
|
metadata
CHANGED
|
@@ -1,17 +1,17 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: moab-versioning
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version:
|
|
4
|
+
version: 5.0.0.beta1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Darren Weber
|
|
8
8
|
- Richard Anderson
|
|
9
9
|
- Lynn McRae
|
|
10
10
|
- Hannah Frost
|
|
11
|
-
autorequire:
|
|
11
|
+
autorequire:
|
|
12
12
|
bindir: bin
|
|
13
13
|
cert_chain: []
|
|
14
|
-
date: 2021-
|
|
14
|
+
date: 2021-10-19 00:00:00.000000000 Z
|
|
15
15
|
dependencies:
|
|
16
16
|
- !ruby/object:Gem::Dependency
|
|
17
17
|
name: druid-tools
|
|
@@ -177,7 +177,6 @@ files:
|
|
|
177
177
|
- lib/moab.rb
|
|
178
178
|
- lib/moab/bagger.rb
|
|
179
179
|
- lib/moab/config.rb
|
|
180
|
-
- lib/moab/deposit_bag_validator.rb
|
|
181
180
|
- lib/moab/exceptions.rb
|
|
182
181
|
- lib/moab/file_group.rb
|
|
183
182
|
- lib/moab/file_group_difference.rb
|
|
@@ -198,16 +197,11 @@ files:
|
|
|
198
197
|
- lib/moab/storage_services.rb
|
|
199
198
|
- lib/moab/utc_time.rb
|
|
200
199
|
- lib/moab/verification_result.rb
|
|
201
|
-
- lib/moab/version_metadata.rb
|
|
202
200
|
- lib/moab/version_metadata_entry.rb
|
|
203
|
-
- lib/moab/version_metadata_event.rb
|
|
204
201
|
- lib/serializer.rb
|
|
205
202
|
- lib/serializer/manifest.rb
|
|
206
203
|
- lib/serializer/serializable.rb
|
|
207
|
-
- lib/stanford/active_fedora_object.rb
|
|
208
204
|
- lib/stanford/content_inventory.rb
|
|
209
|
-
- lib/stanford/dor_metadata.rb
|
|
210
|
-
- lib/stanford/moab_storage_directory.rb
|
|
211
205
|
- lib/stanford/storage_object_validator.rb
|
|
212
206
|
- lib/stanford/storage_repository.rb
|
|
213
207
|
- lib/stanford/storage_services.rb
|
|
@@ -215,7 +209,7 @@ homepage: https://github.com/sul-dlss/moab-versioning
|
|
|
215
209
|
licenses:
|
|
216
210
|
- Apache-2.0
|
|
217
211
|
metadata: {}
|
|
218
|
-
post_install_message:
|
|
212
|
+
post_install_message:
|
|
219
213
|
rdoc_options: []
|
|
220
214
|
require_paths:
|
|
221
215
|
- lib
|
|
@@ -226,12 +220,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
226
220
|
version: '2.6'
|
|
227
221
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
228
222
|
requirements:
|
|
229
|
-
- - "
|
|
223
|
+
- - ">"
|
|
230
224
|
- !ruby/object:Gem::Version
|
|
231
|
-
version:
|
|
225
|
+
version: 1.3.1
|
|
232
226
|
requirements: []
|
|
233
227
|
rubygems_version: 3.1.4
|
|
234
|
-
signing_key:
|
|
228
|
+
signing_key:
|
|
235
229
|
specification_version: 4
|
|
236
230
|
summary: Ruby implementation of digital object versioning toolkit used by the SULAIR
|
|
237
231
|
Digital Library
|
|
@@ -1,328 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Moab
|
|
4
|
-
# Given a deposit bag, ensures the contents valid for becoming a StorageObjectVersion
|
|
5
|
-
# this is a Shameless Green implementation, combining code from:
|
|
6
|
-
# - sdr-preservation-core/lib/sdr_ingest/validate_bag <-- old preservation robots
|
|
7
|
-
# - archive-utils/lib/bagit_bag <-- gem only used by sdr-preservation-robots
|
|
8
|
-
# - archive-utils/lib/file_fixity
|
|
9
|
-
# - archive-utils/lib/fixity
|
|
10
|
-
# this code adds duplication to this gem (see github issue #119);
|
|
11
|
-
# for example, computing checksums is done
|
|
12
|
-
# - deposit_bag_validator
|
|
13
|
-
# - file_signature
|
|
14
|
-
class DepositBagValidator
|
|
15
|
-
BAG_DIR_NOT_FOUND = :bag_dir_not_found
|
|
16
|
-
CHECKSUM_MISMATCH = :checksum_mismatch
|
|
17
|
-
CHECKSUM_TYPE_UNRECOGNIZED = :checksum_type_unrecognized
|
|
18
|
-
INVALID_VERSION_XXX_XML = :invalid_versionXxx_xml
|
|
19
|
-
PAYLOAD_SIZE_MISMATCH = :payload_size_mismatch
|
|
20
|
-
REQUIRED_FILE_NOT_FOUND = :required_file_not_found
|
|
21
|
-
VERSION_MISMATCH_TO_MOAB = :version_mismatch_to_moab
|
|
22
|
-
VERSION_MISSING_FROM_FILE = :version_missing_from_file
|
|
23
|
-
|
|
24
|
-
ERROR_CODE_TO_MESSAGES = {
|
|
25
|
-
BAG_DIR_NOT_FOUND => "Deposit bag directory %{bag_dir} does not exist",
|
|
26
|
-
CHECKSUM_MISMATCH => "Failed %{manifest_type} verification. Differences: \n%{diffs}",
|
|
27
|
-
CHECKSUM_TYPE_UNRECOGNIZED => "Checksum type unrecognized: %{checksum_type}; file: %{filename}",
|
|
28
|
-
INVALID_VERSION_XXX_XML => "Unable to parse %{file_pathname}: %{err_info}",
|
|
29
|
-
PAYLOAD_SIZE_MISMATCH => "Failed payload size verification. Expected: %{bag_info_sizes}; found: %{generated_sizes}",
|
|
30
|
-
REQUIRED_FILE_NOT_FOUND => "Deposit bag required file %{file_pathname} not found",
|
|
31
|
-
VERSION_MISMATCH_TO_MOAB => "Version mismatch in %{file_pathname}: Moab expected %{new_version}; found %{file_version}",
|
|
32
|
-
VERSION_MISSING_FROM_FILE => "Version xml file %{version_file} missing data at %{xpath} containing version id"
|
|
33
|
-
}.freeze
|
|
34
|
-
|
|
35
|
-
REQUIRED_MANIFEST_CHECKSUM_TYPE = 'sha256'
|
|
36
|
-
RECOGNIZED_CHECKSUM_ALGORITHMS = %i[md5 sha1 sha256 sha384 sha512].freeze
|
|
37
|
-
|
|
38
|
-
TAGMANIFEST = 'tagmanifest'
|
|
39
|
-
MANIFEST = 'manifest'
|
|
40
|
-
DATA_DIR_BASENAME = 'data'
|
|
41
|
-
BAG_INFO_TXT_BASENAME = 'bag-info.txt'
|
|
42
|
-
VERSION_ADDITIONS_BASENAME = 'versionAdditions.xml'
|
|
43
|
-
VERSION_INVENTORY_BASENAME = 'versionInventory.xml'
|
|
44
|
-
VERSION_METADATA_PATH = "#{DATA_DIR_BASENAME}/metadata/versionMetadata.xml"
|
|
45
|
-
|
|
46
|
-
REQUIRED_BAG_FILES = [
|
|
47
|
-
DATA_DIR_BASENAME,
|
|
48
|
-
'bagit.txt',
|
|
49
|
-
BAG_INFO_TXT_BASENAME,
|
|
50
|
-
"#{MANIFEST}-#{REQUIRED_MANIFEST_CHECKSUM_TYPE}.txt",
|
|
51
|
-
"#{TAGMANIFEST}-#{REQUIRED_MANIFEST_CHECKSUM_TYPE}.txt",
|
|
52
|
-
VERSION_ADDITIONS_BASENAME,
|
|
53
|
-
VERSION_INVENTORY_BASENAME,
|
|
54
|
-
VERSION_METADATA_PATH
|
|
55
|
-
].freeze
|
|
56
|
-
|
|
57
|
-
attr_reader :deposit_bag_pathname, :expected_new_version, :result_array
|
|
58
|
-
|
|
59
|
-
def initialize(storage_object)
|
|
60
|
-
@deposit_bag_pathname = storage_object.deposit_bag_pathname
|
|
61
|
-
@expected_new_version = storage_object.current_version_id + 1
|
|
62
|
-
@result_array = []
|
|
63
|
-
end
|
|
64
|
-
|
|
65
|
-
# returns Array of tiny error hashes, allowing multiple occurrences of a single error code
|
|
66
|
-
def validation_errors
|
|
67
|
-
return [single_error_hash(BAG_DIR_NOT_FOUND, bag_dir: deposit_bag_pathname)] unless deposit_bag_pathname.exist?
|
|
68
|
-
return result_array unless required_bag_files_exist?
|
|
69
|
-
|
|
70
|
-
verify_version
|
|
71
|
-
verify_tagmanifests
|
|
72
|
-
verify_payload_size
|
|
73
|
-
verify_payload_manifests
|
|
74
|
-
result_array # attr that accumulates any errors encountered along the way
|
|
75
|
-
end
|
|
76
|
-
|
|
77
|
-
private
|
|
78
|
-
|
|
79
|
-
def bag_dir_exists?
|
|
80
|
-
deposit_bag_pathname.exist?
|
|
81
|
-
end
|
|
82
|
-
|
|
83
|
-
# assumes this is called when result_array is empty, as subsequent checks will use these required files
|
|
84
|
-
def required_bag_files_exist?
|
|
85
|
-
REQUIRED_BAG_FILES.each do |filename|
|
|
86
|
-
pathname = deposit_bag_pathname.join(filename)
|
|
87
|
-
result_array << single_error_hash(REQUIRED_FILE_NOT_FOUND, file_pathname: pathname) unless pathname.exist?
|
|
88
|
-
end
|
|
89
|
-
result_array.empty?
|
|
90
|
-
end
|
|
91
|
-
|
|
92
|
-
def verify_version
|
|
93
|
-
version_md_pathname = deposit_bag_pathname.join(VERSION_METADATA_PATH)
|
|
94
|
-
version_from_file = last_version_id_from_version_md_xml(version_md_pathname)
|
|
95
|
-
verify_version_from_xml_file(version_md_pathname, version_from_file) if version_from_file
|
|
96
|
-
|
|
97
|
-
version_additions_pathname = deposit_bag_pathname.join(VERSION_ADDITIONS_BASENAME)
|
|
98
|
-
version_from_file = version_id_from_version_manifest_xml(version_additions_pathname)
|
|
99
|
-
verify_version_from_xml_file(version_additions_pathname, version_from_file) if version_from_file
|
|
100
|
-
|
|
101
|
-
version_inventory_pathname = deposit_bag_pathname.join(VERSION_INVENTORY_BASENAME)
|
|
102
|
-
version_from_file = version_id_from_version_manifest_xml(version_inventory_pathname)
|
|
103
|
-
verify_version_from_xml_file(version_inventory_pathname, version_from_file) if version_from_file
|
|
104
|
-
end
|
|
105
|
-
|
|
106
|
-
def last_version_id_from_version_md_xml(version_md_pathname)
|
|
107
|
-
last_version_id_from_xml(version_md_pathname, '/versionMetadata/version/@versionId')
|
|
108
|
-
end
|
|
109
|
-
|
|
110
|
-
def version_id_from_version_manifest_xml(version_manifest_xml_pathname)
|
|
111
|
-
last_version_id_from_xml(version_manifest_xml_pathname, '/fileInventory/@versionId')
|
|
112
|
-
end
|
|
113
|
-
|
|
114
|
-
def last_version_id_from_xml(pathname, xpath)
|
|
115
|
-
doc = Nokogiri::XML(File.open(pathname.to_s), &:strict)
|
|
116
|
-
version_id = doc.xpath(xpath).last.text unless doc.xpath(xpath).empty?
|
|
117
|
-
return version_id.to_i if version_id
|
|
118
|
-
|
|
119
|
-
err_data = {
|
|
120
|
-
version_file: pathname,
|
|
121
|
-
xpath: xpath
|
|
122
|
-
}
|
|
123
|
-
result_array << single_error_hash(VERSION_MISSING_FROM_FILE, err_data) unless version_id
|
|
124
|
-
nil
|
|
125
|
-
rescue StandardError => e
|
|
126
|
-
err_data = {
|
|
127
|
-
file_pathname: pathname,
|
|
128
|
-
err_info: "#{e}\n#{e.backtrace}"
|
|
129
|
-
}
|
|
130
|
-
result_array << single_error_hash(INVALID_VERSION_XXX_XML, err_data)
|
|
131
|
-
nil
|
|
132
|
-
end
|
|
133
|
-
|
|
134
|
-
def verify_version_from_xml_file(file_pathname, found)
|
|
135
|
-
return if found == expected_new_version
|
|
136
|
-
|
|
137
|
-
err_data = {
|
|
138
|
-
file_pathname: file_pathname,
|
|
139
|
-
new_version: expected_new_version,
|
|
140
|
-
file_version: found
|
|
141
|
-
}
|
|
142
|
-
result_array << single_error_hash(VERSION_MISMATCH_TO_MOAB, err_data)
|
|
143
|
-
end
|
|
144
|
-
|
|
145
|
-
# adds to result_array if tagmanifest checksums don't match generated checksums
|
|
146
|
-
def verify_tagmanifests
|
|
147
|
-
tagmanifests_checksums_hash = checksums_hash_from_manifest_files(TAGMANIFEST)
|
|
148
|
-
types_to_generate = checksum_types_from_manifest_checksums_hash(tagmanifests_checksums_hash)
|
|
149
|
-
generated_checksums_hash = generate_tagmanifest_checksums_hash(types_to_generate)
|
|
150
|
-
verify_manifest_checksums(TAGMANIFEST, tagmanifests_checksums_hash, generated_checksums_hash)
|
|
151
|
-
end
|
|
152
|
-
|
|
153
|
-
# adds to result_array if manifest checksums don't match generated checksums
|
|
154
|
-
def verify_payload_manifests
|
|
155
|
-
manifests_checksums_hash = checksums_hash_from_manifest_files(MANIFEST)
|
|
156
|
-
types_to_generate = checksum_types_from_manifest_checksums_hash(manifests_checksums_hash)
|
|
157
|
-
generated_checksums_hash = generate_payload_checksums(types_to_generate)
|
|
158
|
-
verify_manifest_checksums(MANIFEST, manifests_checksums_hash, generated_checksums_hash)
|
|
159
|
-
end
|
|
160
|
-
|
|
161
|
-
# construct hash based on manifest_type-alg.txt files in bag home dir
|
|
162
|
-
# key: file_name, relative to base_path, value: hash of checksum alg => checksum value
|
|
163
|
-
def checksums_hash_from_manifest_files(manifest_type)
|
|
164
|
-
checksums_hash = {}
|
|
165
|
-
deposit_bag_pathname.children.each do |child_pathname|
|
|
166
|
-
if child_pathname.file?
|
|
167
|
-
child_fname = child_pathname.basename.to_s
|
|
168
|
-
match_result = child_fname.match("^#{manifest_type}-(.*).txt")
|
|
169
|
-
if match_result
|
|
170
|
-
checksum_type = match_result.captures.first.to_sym
|
|
171
|
-
if RECOGNIZED_CHECKSUM_ALGORITHMS.include?(checksum_type)
|
|
172
|
-
child_pathname.readlines.each do |line|
|
|
173
|
-
line.chomp!.strip!
|
|
174
|
-
checksum, file_name = line.split(/[\s*]+/, 2)
|
|
175
|
-
file_checksums = checksums_hash[file_name] || {}
|
|
176
|
-
file_checksums[checksum_type] = checksum
|
|
177
|
-
checksums_hash[file_name] = file_checksums
|
|
178
|
-
end
|
|
179
|
-
else
|
|
180
|
-
result_array << single_error_hash(CHECKSUM_TYPE_UNRECOGNIZED, checksum_type: checksum_type, filename: child_pathname)
|
|
181
|
-
end
|
|
182
|
-
end
|
|
183
|
-
end
|
|
184
|
-
end
|
|
185
|
-
checksums_hash
|
|
186
|
-
end
|
|
187
|
-
|
|
188
|
-
# generate hash of checksums by file name for bag home dir files
|
|
189
|
-
def generate_tagmanifest_checksums_hash(types_to_generate)
|
|
190
|
-
# all names in the bag home dir except those starting with 'tagmanifest'
|
|
191
|
-
home_dir_pathnames = deposit_bag_pathname.children.reject { |file| file.basename.to_s.start_with?(TAGMANIFEST) }
|
|
192
|
-
hash_with_full_pathnames = generate_checksums_hash(home_dir_pathnames, types_to_generate)
|
|
193
|
-
# return hash keys as basenames only
|
|
194
|
-
hash_with_full_pathnames.transform_keys { |k| Pathname.new(k).basename.to_s }
|
|
195
|
-
end
|
|
196
|
-
|
|
197
|
-
# generate hash of checksums by file name for bag data dir files
|
|
198
|
-
def generate_payload_checksums(types_to_generate)
|
|
199
|
-
data_pathnames = deposit_bag_pathname.join(DATA_DIR_BASENAME).find
|
|
200
|
-
hash_with_full_pathnames = generate_checksums_hash(data_pathnames, types_to_generate)
|
|
201
|
-
# return hash keys beginning with 'data/'
|
|
202
|
-
hash_with_full_pathnames.transform_keys { |k| Pathname.new(k).relative_path_from(deposit_bag_pathname).to_s }
|
|
203
|
-
end
|
|
204
|
-
|
|
205
|
-
def generate_checksums_hash(pathnames, types_to_generate)
|
|
206
|
-
file_checksums_hash = {}
|
|
207
|
-
pathnames.each do |pathname|
|
|
208
|
-
file_checksums_hash[pathname.to_s] = generated_checksums(pathname, types_to_generate) if pathname.file?
|
|
209
|
-
end
|
|
210
|
-
file_checksums_hash
|
|
211
|
-
end
|
|
212
|
-
|
|
213
|
-
def generated_checksums(pathname, types_to_generate)
|
|
214
|
-
my_digester_hash = digester_hash(types_to_generate)
|
|
215
|
-
pathname.open('r') do |stream|
|
|
216
|
-
while (buffer = stream.read(8192))
|
|
217
|
-
my_digester_hash.each_value { |digest| digest.update(buffer) }
|
|
218
|
-
end
|
|
219
|
-
end
|
|
220
|
-
file_checksums = {}
|
|
221
|
-
my_digester_hash.each do |checksum_type, digest|
|
|
222
|
-
file_checksums[checksum_type] = digest.hexdigest
|
|
223
|
-
end
|
|
224
|
-
file_checksums
|
|
225
|
-
end
|
|
226
|
-
|
|
227
|
-
def digester_hash(types_to_generate = DEFAULT_CHECKSUM_TYPES)
|
|
228
|
-
types_to_generate.each_with_object({}) do |checksum_type, digester_hash|
|
|
229
|
-
case checksum_type
|
|
230
|
-
when :md5
|
|
231
|
-
digester_hash[checksum_type] = Digest::MD5.new
|
|
232
|
-
when :sha1
|
|
233
|
-
digester_hash[checksum_type] = Digest::SHA1.new
|
|
234
|
-
when :sha256
|
|
235
|
-
digester_hash[checksum_type] = Digest::SHA2.new(256)
|
|
236
|
-
when :sha384
|
|
237
|
-
digesters[checksum_type] = Digest::SHA2.new(384)
|
|
238
|
-
when :sha512
|
|
239
|
-
digesters[checksum_type] = Digest::SHA2.new(512)
|
|
240
|
-
else
|
|
241
|
-
result_array << single_error_hash(CHECKSUM_TYPE_UNRECOGNIZED, checksum_type: checksum_type, filename: nil)
|
|
242
|
-
end
|
|
243
|
-
digester_hash
|
|
244
|
-
end
|
|
245
|
-
end
|
|
246
|
-
|
|
247
|
-
def verify_manifest_checksums(manifest_type, manifests_checksum_hash, generated_checksum_hash)
|
|
248
|
-
diff_hash = {}
|
|
249
|
-
# NOTE: this is intentionally | instead of ||
|
|
250
|
-
(manifests_checksum_hash.keys | generated_checksum_hash.keys).each do |file_name|
|
|
251
|
-
manifest_checksums = manifests_checksum_hash[file_name] || {}
|
|
252
|
-
generated_checksums = generated_checksum_hash[file_name] || {}
|
|
253
|
-
if manifest_checksums != generated_checksums
|
|
254
|
-
cdh = checksums_diff_hash(manifest_checksums, generated_checksums, manifest_type, 'generated')
|
|
255
|
-
diff_hash[file_name] = cdh if cdh
|
|
256
|
-
end
|
|
257
|
-
end
|
|
258
|
-
return if diff_hash.empty?
|
|
259
|
-
|
|
260
|
-
err_data = {
|
|
261
|
-
manifest_type: manifest_type,
|
|
262
|
-
diffs: diff_hash
|
|
263
|
-
}
|
|
264
|
-
result_array << single_error_hash(CHECKSUM_MISMATCH, err_data)
|
|
265
|
-
end
|
|
266
|
-
|
|
267
|
-
def checksums_diff_hash(left_checksums, right_checksums, left_label, right_label)
|
|
268
|
-
diff_hash = {}
|
|
269
|
-
# NOTE: these are intentionally & and | instead of && and ||
|
|
270
|
-
checksum_types_to_compare = (left_checksums.keys & right_checksums.keys)
|
|
271
|
-
checksum_types_to_compare = (left_checksums.keys | right_checksums.keys) if checksum_types_to_compare.empty?
|
|
272
|
-
checksum_types_to_compare.each do |type|
|
|
273
|
-
left_checksum = left_checksums[type]
|
|
274
|
-
right_checksum = right_checksums[type]
|
|
275
|
-
diff_hash[type] = { left_label => left_checksum, right_label => right_checksum } if left_checksum != right_checksum
|
|
276
|
-
end
|
|
277
|
-
diff_hash.empty? ? nil : diff_hash
|
|
278
|
-
end
|
|
279
|
-
|
|
280
|
-
def verify_payload_size
|
|
281
|
-
sizes_from_bag_info_file = bag_info_payload_size
|
|
282
|
-
generated_sizes = generated_payload_size
|
|
283
|
-
return if sizes_from_bag_info_file == generated_sizes
|
|
284
|
-
|
|
285
|
-
err_data = {
|
|
286
|
-
bag_info_sizes: sizes_from_bag_info_file,
|
|
287
|
-
generated_sizes: generated_sizes
|
|
288
|
-
}
|
|
289
|
-
result_array << single_error_hash(PAYLOAD_SIZE_MISMATCH, err_data)
|
|
290
|
-
end
|
|
291
|
-
|
|
292
|
-
def bag_info_payload_size
|
|
293
|
-
bag_info_txt_pathname = deposit_bag_pathname.join(BAG_INFO_TXT_BASENAME)
|
|
294
|
-
bag_info_txt_pathname.readlines.each do |line|
|
|
295
|
-
line.chomp!.strip!
|
|
296
|
-
key, value = line.split(':', 2)
|
|
297
|
-
if key.strip == 'Payload-Oxum'
|
|
298
|
-
num_bytes, num_files = value.strip.split('.') if value
|
|
299
|
-
return { bytes: num_bytes.to_i, files: num_files.to_i }
|
|
300
|
-
end
|
|
301
|
-
end
|
|
302
|
-
end
|
|
303
|
-
|
|
304
|
-
def generated_payload_size
|
|
305
|
-
payload_pathname = deposit_bag_pathname.join(DATA_DIR_BASENAME)
|
|
306
|
-
payload_pathname.find.select(&:file?).each_with_object(bytes: 0, files: 0) do |file, hash|
|
|
307
|
-
hash[:bytes] += file.size
|
|
308
|
-
hash[:files] += 1
|
|
309
|
-
hash
|
|
310
|
-
end
|
|
311
|
-
end
|
|
312
|
-
|
|
313
|
-
# checksums_hash: { fname => {:md5=>"xxx", :sha1=>"yyy"}, fname => ... }
|
|
314
|
-
def checksum_types_from_manifest_checksums_hash(checksums_hash)
|
|
315
|
-
types = []
|
|
316
|
-
checksums_hash.each_value { |v| v.each_key { |k| types << k unless types.include?(k) } }
|
|
317
|
-
types
|
|
318
|
-
end
|
|
319
|
-
|
|
320
|
-
def single_error_hash(error_code, err_data_hash)
|
|
321
|
-
{ error_code => error_code_msg(error_code, err_data_hash) }
|
|
322
|
-
end
|
|
323
|
-
|
|
324
|
-
def error_code_msg(error_code, err_data_hash)
|
|
325
|
-
ERROR_CODE_TO_MESSAGES[error_code] % err_data_hash
|
|
326
|
-
end
|
|
327
|
-
end
|
|
328
|
-
end
|
|
@@ -1,34 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Moab
|
|
4
|
-
# The descriptive information about a digital object's collection of versions
|
|
5
|
-
#
|
|
6
|
-
# ====Data Model
|
|
7
|
-
# * <b>{VersionMetadata} = descriptive information about a digital object's versions</b>
|
|
8
|
-
# * {VersionMetadataEntry} [1..*] = attributes of a digital object version
|
|
9
|
-
# * {VersionMetadataEvent} [1..*] = object version lifecycle events with timestamps
|
|
10
|
-
#
|
|
11
|
-
# @example {include:file:spec/fixtures/data/jq937jp0017/v3/metadata/versionMetadata.xml}
|
|
12
|
-
# @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
|
|
13
|
-
# All rights reserved. See {file:LICENSE.rdoc} for details.
|
|
14
|
-
class VersionMetadata < Serializer::Manifest
|
|
15
|
-
include HappyMapper
|
|
16
|
-
|
|
17
|
-
# The name of the XML element used to serialize this objects data
|
|
18
|
-
tag 'versionMetadata'
|
|
19
|
-
|
|
20
|
-
# (see Serializable#initialize)
|
|
21
|
-
def initialize(opts = {})
|
|
22
|
-
@versions = []
|
|
23
|
-
super(opts)
|
|
24
|
-
end
|
|
25
|
-
|
|
26
|
-
# @attribute
|
|
27
|
-
# @return [String] The digital object identifier
|
|
28
|
-
attribute :digital_object_id, String, tag: 'objectId'
|
|
29
|
-
|
|
30
|
-
# @attribute
|
|
31
|
-
# @return [Array<VersionMetadataEntry>] An array of version metadata entries, one per version
|
|
32
|
-
has_many :versions, VersionMetadataEntry, tag: 'version'
|
|
33
|
-
end
|
|
34
|
-
end
|
|
@@ -1,42 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Moab
|
|
4
|
-
# A container element to record object version lifecycle events with timestamps
|
|
5
|
-
#
|
|
6
|
-
# ====Data Model
|
|
7
|
-
# * {VersionMetadata} = descriptive information about a digital object's versions
|
|
8
|
-
# * {VersionMetadataEntry} [1..*] = attributes of a digital object version
|
|
9
|
-
# * <b>{VersionMetadataEvent} [1..*] = object version lifecycle events with timestamps</b>
|
|
10
|
-
#
|
|
11
|
-
# @see VersionMetadata
|
|
12
|
-
# @see VersionMetadataEntry
|
|
13
|
-
# @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
|
|
14
|
-
# All rights reserved. See {file:LICENSE.rdoc} for details.
|
|
15
|
-
class VersionMetadataEvent < Serializer::Serializable
|
|
16
|
-
include HappyMapper
|
|
17
|
-
|
|
18
|
-
# The name of the XML element used to serialize this objects data
|
|
19
|
-
tag 'event'
|
|
20
|
-
|
|
21
|
-
# (see Serializable#initialize)
|
|
22
|
-
def initialize(opts = {})
|
|
23
|
-
super(opts)
|
|
24
|
-
end
|
|
25
|
-
|
|
26
|
-
# @attribute
|
|
27
|
-
# @return [String] The type of event
|
|
28
|
-
attribute :type, String
|
|
29
|
-
|
|
30
|
-
# @attribute
|
|
31
|
-
# @return [String] The date and time of an event
|
|
32
|
-
attribute :datetime, String
|
|
33
|
-
|
|
34
|
-
def datetime=(event_datetime)
|
|
35
|
-
@datetime = Moab::UtcTime.input(event_datetime)
|
|
36
|
-
end
|
|
37
|
-
|
|
38
|
-
def datetime
|
|
39
|
-
Moab::UtcTime.output(@datetime)
|
|
40
|
-
end
|
|
41
|
-
end
|
|
42
|
-
end
|
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Stanford
|
|
4
|
-
# Utility Class for extracting content or other information from a Fedora Instance
|
|
5
|
-
#
|
|
6
|
-
# ====Data Model
|
|
7
|
-
# * {DorMetadata} = utility methods for interfacing with Stanford metadata files (esp contentMetadata)
|
|
8
|
-
# * {ContentInventory} [1..1] = utilities for transforming contentMetadata to versionInventory and doing comparisons
|
|
9
|
-
# * <b>{ActiveFedoraObject} [1..*] = utility for extracting content or other information from a Fedora Instance</b>
|
|
10
|
-
#
|
|
11
|
-
# @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
|
|
12
|
-
# All rights reserved. See {file:LICENSE.rdoc} for details.
|
|
13
|
-
class ActiveFedoraObject
|
|
14
|
-
# @param fedora_object [Object] The Active Fedora representation of the Fedora Object
|
|
15
|
-
# @return [Stanford::ActiveFedoraObject] Create a u
|
|
16
|
-
def initialize(fedora_object)
|
|
17
|
-
@fedora_object = fedora_object
|
|
18
|
-
end
|
|
19
|
-
|
|
20
|
-
# @return [Object] The Active Fedora representation of the Fedora Object
|
|
21
|
-
attr_accessor :fedora_object
|
|
22
|
-
|
|
23
|
-
# @api external
|
|
24
|
-
# @param ds_id [String] The datastream identifier
|
|
25
|
-
# @return [String] The content of the specified datastream
|
|
26
|
-
def get_datastream_content(ds_id)
|
|
27
|
-
@fedora_object.datastreams[ds_id].content
|
|
28
|
-
end
|
|
29
|
-
end
|
|
30
|
-
end
|
|
@@ -1,44 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Stanford
|
|
4
|
-
# Stanford-specific utility methods for interfacing with DOR metadata files
|
|
5
|
-
#
|
|
6
|
-
# ====Data Model
|
|
7
|
-
# * <b>{DorMetadata} = utility methods for interfacing with Stanford metadata files (esp contentMetadata)</b>
|
|
8
|
-
# * {ContentInventory} [1..1] = utilities for transforming contentMetadata to versionInventory and doing comparisons
|
|
9
|
-
# * {ActiveFedoraObject} [1..*] = utility for extracting content or other information from a Fedora Instance
|
|
10
|
-
#
|
|
11
|
-
# @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
|
|
12
|
-
# All rights reserved. See {file:LICENSE.rdoc} for details.
|
|
13
|
-
class DorMetadata
|
|
14
|
-
# @return [String] The digital object identifier (druid)
|
|
15
|
-
attr_accessor :digital_object_id
|
|
16
|
-
|
|
17
|
-
# @return [Integer] \@versionId = The ordinal version number
|
|
18
|
-
attr_accessor :version_id
|
|
19
|
-
|
|
20
|
-
# @param digital_object_id [String] The digital object identifier
|
|
21
|
-
# @param version_id [Integer] The ordinal version number
|
|
22
|
-
# @return [Stanford::DorMetadata]
|
|
23
|
-
def initialize(digital_object_id, version_id = nil)
|
|
24
|
-
@digital_object_id = digital_object_id
|
|
25
|
-
@version_id = version_id
|
|
26
|
-
end
|
|
27
|
-
|
|
28
|
-
# @api internal
|
|
29
|
-
# @param directory [String] The location of the directory to be inventoried
|
|
30
|
-
# @param version_id (see #initialize)
|
|
31
|
-
# @return [FileInventory] Inventory of the files under the specified directory
|
|
32
|
-
def inventory_from_directory(directory, version_id = nil)
|
|
33
|
-
version_id ||= @version_id
|
|
34
|
-
version_inventory = Moab::FileInventory.new(type: 'version', digital_object_id: @digital_object_id,
|
|
35
|
-
version_id: version_id)
|
|
36
|
-
content_metadata = IO.read(File.join(directory, 'contentMetadata.xml'))
|
|
37
|
-
content_group = Stanford::ContentInventory.new.group_from_cm(content_metadata, 'preserve')
|
|
38
|
-
version_inventory.groups << content_group
|
|
39
|
-
metadata_group = Moab::FileGroup.new(group_id: 'metadata').group_from_directory(directory)
|
|
40
|
-
version_inventory.groups << metadata_group
|
|
41
|
-
version_inventory
|
|
42
|
-
end
|
|
43
|
-
end
|
|
44
|
-
end
|
|
@@ -1,38 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require 'find'
|
|
4
|
-
|
|
5
|
-
module Stanford
|
|
6
|
-
##
|
|
7
|
-
# methods for dealing with a directory which stores Moab objects
|
|
8
|
-
class MoabStorageDirectory
|
|
9
|
-
DRUID_TREE_REGEXP = '[[:lower:]]{2}/\\d{3}/[[:lower:]]{2}/\\d{4}'
|
|
10
|
-
DRUID_REGEXP = '[[:lower:]]{2}\\d{3}[[:lower:]]{2}\\d{4}'
|
|
11
|
-
|
|
12
|
-
def self.find_moab_paths(storage_dir)
|
|
13
|
-
Find.find(storage_dir) do |path|
|
|
14
|
-
Find.prune unless File.directory?(path) # don't bother with a matching on files, we only care about directories
|
|
15
|
-
path_match_data = storage_dir_regexp(storage_dir).match(path)
|
|
16
|
-
if path_match_data
|
|
17
|
-
yield path_match_data[1], path, path_match_data # yield the druid, the full path, and the MatchData object
|
|
18
|
-
Find.prune # we don't care about what's in the moab dir, we just want the paths that look like moabs
|
|
19
|
-
end
|
|
20
|
-
end
|
|
21
|
-
end
|
|
22
|
-
|
|
23
|
-
def self.list_moab_druids(storage_dir)
|
|
24
|
-
druids = []
|
|
25
|
-
find_moab_paths(storage_dir) { |druid, _path, _path_match_data| druids << druid }
|
|
26
|
-
druids
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
private_class_method def self.storage_dir_regexps
|
|
30
|
-
@storage_dir_regexps ||= {}
|
|
31
|
-
end
|
|
32
|
-
|
|
33
|
-
# this regexp caching makes things many times faster (e.g. went from ~2200 s to crawl disk11, down to ~300 s)
|
|
34
|
-
private_class_method def self.storage_dir_regexp(storage_dir)
|
|
35
|
-
storage_dir_regexps[storage_dir] ||= Regexp.new("^#{storage_dir}/#{DRUID_TREE_REGEXP}/(#{DRUID_REGEXP})$")
|
|
36
|
-
end
|
|
37
|
-
end
|
|
38
|
-
end
|