moab-versioning 4.3.0 → 5.0.0.beta1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/moab/bagger.rb +7 -2
- data/lib/moab/config.rb +40 -7
- data/lib/moab/exceptions.rb +6 -0
- data/lib/moab/file_group.rb +12 -9
- data/lib/moab/file_group_difference.rb +26 -23
- data/lib/moab/file_group_difference_subset.rb +5 -3
- data/lib/moab/file_instance.rb +4 -1
- data/lib/moab/file_instance_difference.rb +5 -3
- data/lib/moab/file_inventory.rb +13 -9
- data/lib/moab/file_inventory_difference.rb +8 -6
- data/lib/moab/file_manifestation.rb +5 -2
- data/lib/moab/file_signature.rb +12 -7
- data/lib/moab/signature_catalog.rb +13 -13
- data/lib/moab/signature_catalog_entry.rb +6 -4
- data/lib/moab/stanford.rb +2 -10
- data/lib/moab/storage_object.rb +11 -5
- data/lib/moab/storage_object_validator.rb +24 -10
- data/lib/moab/storage_object_version.rb +19 -12
- data/lib/moab/storage_repository.rb +49 -7
- data/lib/moab/storage_services.rb +12 -9
- data/lib/moab/utc_time.rb +2 -0
- data/lib/moab/verification_result.rb +4 -3
- data/lib/moab/version_metadata_entry.rb +6 -4
- data/lib/moab.rb +2 -9
- data/lib/serializer/manifest.rb +4 -2
- data/lib/serializer/serializable.rb +6 -1
- data/lib/serializer.rb +2 -0
- data/lib/stanford/content_inventory.rb +23 -19
- data/lib/stanford/storage_object_validator.rb +2 -0
- data/lib/stanford/storage_repository.rb +6 -2
- data/lib/stanford/storage_services.rb +2 -0
- metadata +22 -42
- data/lib/moab/deposit_bag_validator.rb +0 -323
- data/lib/moab/version_metadata.rb +0 -32
- data/lib/moab/version_metadata_event.rb +0 -40
- data/lib/stanford/active_fedora_object.rb +0 -28
- data/lib/stanford/dor_metadata.rb +0 -41
- data/lib/stanford/moab_storage_directory.rb +0 -36
@@ -1,323 +0,0 @@
|
|
1
|
-
module Moab
|
2
|
-
# Given a deposit bag, ensures the contents valid for becoming a StorageObjectVersion
|
3
|
-
# this is a Shameless Green implementation, combining code from:
|
4
|
-
# - sdr-preservation-core/lib/sdr_ingest/validate_bag <-- old preservation robots
|
5
|
-
# - archive-utils/lib/bagit_bag <-- gem only used by sdr-preservation-robots
|
6
|
-
# - archive-utils/lib/file_fixity
|
7
|
-
# - archive-utils/lib/fixity
|
8
|
-
# this code adds duplication to this gem (see github issue #119);
|
9
|
-
# for example, computing checksums is done
|
10
|
-
# - deposit_bag_validator
|
11
|
-
# - file_signature
|
12
|
-
class DepositBagValidator
|
13
|
-
BAG_DIR_NOT_FOUND = :bag_dir_not_found
|
14
|
-
CHECKSUM_MISMATCH = :checksum_mismatch
|
15
|
-
CHECKSUM_TYPE_UNRECOGNIZED = :checksum_type_unrecognized
|
16
|
-
INVALID_VERSION_XXX_XML = :invalid_versionXxx_xml
|
17
|
-
PAYLOAD_SIZE_MISMATCH = :payload_size_mismatch
|
18
|
-
REQUIRED_FILE_NOT_FOUND = :required_file_not_found
|
19
|
-
VERSION_MISMATCH_TO_MOAB = :version_mismatch_to_moab
|
20
|
-
VERSION_MISSING_FROM_FILE = :version_missing_from_file
|
21
|
-
|
22
|
-
ERROR_CODE_TO_MESSAGES = {
|
23
|
-
BAG_DIR_NOT_FOUND => "Deposit bag directory %{bag_dir} does not exist",
|
24
|
-
CHECKSUM_MISMATCH => "Failed %{manifest_type} verification. Differences: \n%{diffs}",
|
25
|
-
CHECKSUM_TYPE_UNRECOGNIZED => "Checksum type unrecognized: %{checksum_type}; file: %{filename}",
|
26
|
-
INVALID_VERSION_XXX_XML => "Unable to parse %{file_pathname}: %{err_info}",
|
27
|
-
PAYLOAD_SIZE_MISMATCH => "Failed payload size verification. Expected: %{bag_info_sizes}; found: %{generated_sizes}",
|
28
|
-
REQUIRED_FILE_NOT_FOUND => "Deposit bag required file %{file_pathname} not found",
|
29
|
-
VERSION_MISMATCH_TO_MOAB => "Version mismatch in %{file_pathname}: Moab expected %{new_version}; found %{file_version}",
|
30
|
-
VERSION_MISSING_FROM_FILE => "Version xml file %{version_file} missing data at %{xpath} containing version id"
|
31
|
-
}.freeze
|
32
|
-
|
33
|
-
REQUIRED_MANIFEST_CHECKSUM_TYPE = 'sha256'.freeze
|
34
|
-
RECOGNIZED_CHECKSUM_ALGORITHMS = %i[md5 sha1 sha256 sha384 sha512].freeze
|
35
|
-
|
36
|
-
TAGMANIFEST = 'tagmanifest'.freeze
|
37
|
-
MANIFEST = 'manifest'.freeze
|
38
|
-
DATA_DIR_BASENAME = 'data'.freeze
|
39
|
-
BAG_INFO_TXT_BASENAME = 'bag-info.txt'.freeze
|
40
|
-
VERSION_ADDITIONS_BASENAME = 'versionAdditions.xml'.freeze
|
41
|
-
VERSION_INVENTORY_BASENAME = 'versionInventory.xml'.freeze
|
42
|
-
VERSION_METADATA_PATH = "#{DATA_DIR_BASENAME}/metadata/versionMetadata.xml".freeze
|
43
|
-
|
44
|
-
REQUIRED_BAG_FILES = [
|
45
|
-
DATA_DIR_BASENAME,
|
46
|
-
'bagit.txt'.freeze,
|
47
|
-
BAG_INFO_TXT_BASENAME,
|
48
|
-
"#{MANIFEST}-#{REQUIRED_MANIFEST_CHECKSUM_TYPE}.txt".freeze,
|
49
|
-
"#{TAGMANIFEST}-#{REQUIRED_MANIFEST_CHECKSUM_TYPE}.txt".freeze,
|
50
|
-
VERSION_ADDITIONS_BASENAME,
|
51
|
-
VERSION_INVENTORY_BASENAME,
|
52
|
-
VERSION_METADATA_PATH
|
53
|
-
].freeze
|
54
|
-
|
55
|
-
attr_reader :deposit_bag_pathname, :expected_new_version, :result_array
|
56
|
-
|
57
|
-
def initialize(storage_object)
|
58
|
-
@deposit_bag_pathname = storage_object.deposit_bag_pathname
|
59
|
-
@expected_new_version = storage_object.current_version_id + 1
|
60
|
-
@result_array = []
|
61
|
-
end
|
62
|
-
|
63
|
-
# returns Array of tiny error hashes, allowing multiple occurrences of a single error code
|
64
|
-
def validation_errors
|
65
|
-
return [single_error_hash(BAG_DIR_NOT_FOUND, bag_dir: deposit_bag_pathname)] unless deposit_bag_pathname.exist?
|
66
|
-
return result_array unless required_bag_files_exist?
|
67
|
-
verify_version
|
68
|
-
verify_tagmanifests
|
69
|
-
verify_payload_size
|
70
|
-
verify_payload_manifests
|
71
|
-
result_array # attr that accumulates any errors encountered along the way
|
72
|
-
end
|
73
|
-
|
74
|
-
private
|
75
|
-
|
76
|
-
def bag_dir_exists?
|
77
|
-
deposit_bag_pathname.exist?
|
78
|
-
end
|
79
|
-
|
80
|
-
# assumes this is called when result_array is empty, as subsequent checks will use these required files
|
81
|
-
def required_bag_files_exist?
|
82
|
-
REQUIRED_BAG_FILES.each do |filename|
|
83
|
-
pathname = deposit_bag_pathname.join(filename)
|
84
|
-
result_array << single_error_hash(REQUIRED_FILE_NOT_FOUND, file_pathname: pathname) unless pathname.exist?
|
85
|
-
end
|
86
|
-
result_array.empty?
|
87
|
-
end
|
88
|
-
|
89
|
-
def verify_version
|
90
|
-
version_md_pathname = deposit_bag_pathname.join(VERSION_METADATA_PATH)
|
91
|
-
version_from_file = last_version_id_from_version_md_xml(version_md_pathname)
|
92
|
-
verify_version_from_xml_file(version_md_pathname, version_from_file) if version_from_file
|
93
|
-
|
94
|
-
version_additions_pathname = deposit_bag_pathname.join(VERSION_ADDITIONS_BASENAME)
|
95
|
-
version_from_file = version_id_from_version_manifest_xml(version_additions_pathname)
|
96
|
-
verify_version_from_xml_file(version_additions_pathname, version_from_file) if version_from_file
|
97
|
-
|
98
|
-
version_inventory_pathname = deposit_bag_pathname.join(VERSION_INVENTORY_BASENAME)
|
99
|
-
version_from_file = version_id_from_version_manifest_xml(version_inventory_pathname)
|
100
|
-
verify_version_from_xml_file(version_inventory_pathname, version_from_file) if version_from_file
|
101
|
-
end
|
102
|
-
|
103
|
-
def last_version_id_from_version_md_xml(version_md_pathname)
|
104
|
-
last_version_id_from_xml(version_md_pathname, '/versionMetadata/version/@versionId')
|
105
|
-
end
|
106
|
-
|
107
|
-
def version_id_from_version_manifest_xml(version_manifest_xml_pathname)
|
108
|
-
last_version_id_from_xml(version_manifest_xml_pathname, '/fileInventory/@versionId')
|
109
|
-
end
|
110
|
-
|
111
|
-
def last_version_id_from_xml(pathname, xpath)
|
112
|
-
doc = Nokogiri::XML(File.open(pathname.to_s), &:strict)
|
113
|
-
version_id = doc.xpath(xpath).last.text unless doc.xpath(xpath).empty?
|
114
|
-
return version_id.to_i if version_id
|
115
|
-
err_data = {
|
116
|
-
version_file: pathname,
|
117
|
-
xpath: xpath
|
118
|
-
}
|
119
|
-
result_array << single_error_hash(VERSION_MISSING_FROM_FILE, err_data) unless version_id
|
120
|
-
nil
|
121
|
-
rescue StandardError => e
|
122
|
-
err_data = {
|
123
|
-
file_pathname: pathname,
|
124
|
-
err_info: "#{e}\n#{e.backtrace}"
|
125
|
-
}
|
126
|
-
result_array << single_error_hash(INVALID_VERSION_XXX_XML, err_data)
|
127
|
-
nil
|
128
|
-
end
|
129
|
-
|
130
|
-
def verify_version_from_xml_file(file_pathname, found)
|
131
|
-
return if found == expected_new_version
|
132
|
-
err_data = {
|
133
|
-
file_pathname: file_pathname,
|
134
|
-
new_version: expected_new_version,
|
135
|
-
file_version: found
|
136
|
-
}
|
137
|
-
result_array << single_error_hash(VERSION_MISMATCH_TO_MOAB, err_data)
|
138
|
-
end
|
139
|
-
|
140
|
-
# adds to result_array if tagmanifest checksums don't match generated checksums
|
141
|
-
def verify_tagmanifests
|
142
|
-
tagmanifests_checksums_hash = checksums_hash_from_manifest_files(TAGMANIFEST)
|
143
|
-
types_to_generate = checksum_types_from_manifest_checksums_hash(tagmanifests_checksums_hash)
|
144
|
-
generated_checksums_hash = generate_tagmanifest_checksums_hash(types_to_generate)
|
145
|
-
verify_manifest_checksums(TAGMANIFEST, tagmanifests_checksums_hash, generated_checksums_hash)
|
146
|
-
end
|
147
|
-
|
148
|
-
# adds to result_array if manifest checksums don't match generated checksums
|
149
|
-
def verify_payload_manifests
|
150
|
-
manifests_checksums_hash = checksums_hash_from_manifest_files(MANIFEST)
|
151
|
-
types_to_generate = checksum_types_from_manifest_checksums_hash(manifests_checksums_hash)
|
152
|
-
generated_checksums_hash = generate_payload_checksums(types_to_generate)
|
153
|
-
verify_manifest_checksums(MANIFEST, manifests_checksums_hash, generated_checksums_hash)
|
154
|
-
end
|
155
|
-
|
156
|
-
# construct hash based on manifest_type-alg.txt files in bag home dir
|
157
|
-
# key: file_name, relative to base_path, value: hash of checksum alg => checksum value
|
158
|
-
def checksums_hash_from_manifest_files(manifest_type)
|
159
|
-
checksums_hash = {}
|
160
|
-
deposit_bag_pathname.children.each do |child_pathname|
|
161
|
-
if child_pathname.file?
|
162
|
-
child_fname = child_pathname.basename.to_s
|
163
|
-
match_result = child_fname.match("^#{manifest_type}-(.*).txt")
|
164
|
-
if match_result
|
165
|
-
checksum_type = match_result.captures.first.to_sym
|
166
|
-
if RECOGNIZED_CHECKSUM_ALGORITHMS.include?(checksum_type)
|
167
|
-
child_pathname.readlines.each do |line|
|
168
|
-
line.chomp!.strip!
|
169
|
-
checksum, file_name = line.split(/[\s*]+/, 2)
|
170
|
-
file_checksums = checksums_hash[file_name] || {}
|
171
|
-
file_checksums[checksum_type] = checksum
|
172
|
-
checksums_hash[file_name] = file_checksums
|
173
|
-
end
|
174
|
-
else
|
175
|
-
result_array << single_error_hash(CHECKSUM_TYPE_UNRECOGNIZED, checksum_type: checksum_type, filename: child_pathname)
|
176
|
-
end
|
177
|
-
end
|
178
|
-
end
|
179
|
-
end
|
180
|
-
checksums_hash
|
181
|
-
end
|
182
|
-
|
183
|
-
# generate hash of checksums by file name for bag home dir files
|
184
|
-
def generate_tagmanifest_checksums_hash(types_to_generate)
|
185
|
-
# all names in the bag home dir except those starting with 'tagmanifest'
|
186
|
-
home_dir_pathnames = deposit_bag_pathname.children.reject { |file| file.basename.to_s.start_with?(TAGMANIFEST) }
|
187
|
-
hash_with_full_pathnames = generate_checksums_hash(home_dir_pathnames, types_to_generate)
|
188
|
-
# return hash keys as basenames only
|
189
|
-
hash_with_full_pathnames.map { |k, v| [Pathname.new(k).basename.to_s, v] }.to_h
|
190
|
-
end
|
191
|
-
|
192
|
-
# generate hash of checksums by file name for bag data dir files
|
193
|
-
def generate_payload_checksums(types_to_generate)
|
194
|
-
data_pathnames = deposit_bag_pathname.join(DATA_DIR_BASENAME).find
|
195
|
-
hash_with_full_pathnames = generate_checksums_hash(data_pathnames, types_to_generate)
|
196
|
-
# return hash keys beginning with 'data/'
|
197
|
-
hash_with_full_pathnames.map { |k, v| [Pathname.new(k).relative_path_from(deposit_bag_pathname).to_s, v] }.to_h
|
198
|
-
end
|
199
|
-
|
200
|
-
def generate_checksums_hash(pathnames, types_to_generate)
|
201
|
-
file_checksums_hash = {}
|
202
|
-
pathnames.each do |pathname|
|
203
|
-
file_checksums_hash[pathname.to_s] = generated_checksums(pathname, types_to_generate) if pathname.file?
|
204
|
-
end
|
205
|
-
file_checksums_hash
|
206
|
-
end
|
207
|
-
|
208
|
-
def generated_checksums(pathname, types_to_generate)
|
209
|
-
my_digester_hash = digester_hash(types_to_generate)
|
210
|
-
pathname.open('r') do |stream|
|
211
|
-
while (buffer = stream.read(8192))
|
212
|
-
my_digester_hash.each_value { |digest| digest.update(buffer) }
|
213
|
-
end
|
214
|
-
end
|
215
|
-
file_checksums = {}
|
216
|
-
my_digester_hash.each do |checksum_type, digest|
|
217
|
-
file_checksums[checksum_type] = digest.hexdigest
|
218
|
-
end
|
219
|
-
file_checksums
|
220
|
-
end
|
221
|
-
|
222
|
-
def digester_hash(types_to_generate = DEFAULT_CHECKSUM_TYPES)
|
223
|
-
types_to_generate.each_with_object({}) do |checksum_type, digester_hash|
|
224
|
-
case checksum_type
|
225
|
-
when :md5
|
226
|
-
digester_hash[checksum_type] = Digest::MD5.new
|
227
|
-
when :sha1
|
228
|
-
digester_hash[checksum_type] = Digest::SHA1.new
|
229
|
-
when :sha256
|
230
|
-
digester_hash[checksum_type] = Digest::SHA2.new(256)
|
231
|
-
when :sha384
|
232
|
-
digesters[checksum_type] = Digest::SHA2.new(384)
|
233
|
-
when :sha512
|
234
|
-
digesters[checksum_type] = Digest::SHA2.new(512)
|
235
|
-
else
|
236
|
-
result_array << single_error_hash(CHECKSUM_TYPE_UNRECOGNIZED, checksum_type: checksum_type, filename: nil)
|
237
|
-
end
|
238
|
-
digester_hash
|
239
|
-
end
|
240
|
-
end
|
241
|
-
|
242
|
-
def verify_manifest_checksums(manifest_type, manifests_checksum_hash, generated_checksum_hash)
|
243
|
-
diff_hash = {}
|
244
|
-
# NOTE: this is intentionally | instead of ||
|
245
|
-
(manifests_checksum_hash.keys | generated_checksum_hash.keys).each do |file_name|
|
246
|
-
manifest_checksums = manifests_checksum_hash[file_name] || {}
|
247
|
-
generated_checksums = generated_checksum_hash[file_name] || {}
|
248
|
-
if manifest_checksums != generated_checksums
|
249
|
-
cdh = checksums_diff_hash(manifest_checksums, generated_checksums, manifest_type, 'generated')
|
250
|
-
diff_hash[file_name] = cdh if cdh
|
251
|
-
end
|
252
|
-
end
|
253
|
-
return if diff_hash.empty?
|
254
|
-
err_data = {
|
255
|
-
manifest_type: manifest_type,
|
256
|
-
diffs: diff_hash
|
257
|
-
}
|
258
|
-
result_array << single_error_hash(CHECKSUM_MISMATCH, err_data)
|
259
|
-
end
|
260
|
-
|
261
|
-
def checksums_diff_hash(left_checksums, right_checksums, left_label, right_label)
|
262
|
-
diff_hash = {}
|
263
|
-
# NOTE: these are intentionally & and | instead of && and ||
|
264
|
-
checksum_types_to_compare = (left_checksums.keys & right_checksums.keys)
|
265
|
-
checksum_types_to_compare = (left_checksums.keys | right_checksums.keys) if checksum_types_to_compare.empty?
|
266
|
-
checksum_types_to_compare.each do |type|
|
267
|
-
left_checksum = left_checksums[type]
|
268
|
-
right_checksum = right_checksums[type]
|
269
|
-
if left_checksum != right_checksum
|
270
|
-
diff_hash[type] = { left_label => left_checksum, right_label => right_checksum }
|
271
|
-
end
|
272
|
-
end
|
273
|
-
diff_hash.empty? ? nil : diff_hash
|
274
|
-
end
|
275
|
-
|
276
|
-
def verify_payload_size
|
277
|
-
sizes_from_bag_info_file = bag_info_payload_size
|
278
|
-
generated_sizes = generated_payload_size
|
279
|
-
return if sizes_from_bag_info_file == generated_sizes
|
280
|
-
err_data = {
|
281
|
-
bag_info_sizes: sizes_from_bag_info_file,
|
282
|
-
generated_sizes: generated_sizes
|
283
|
-
}
|
284
|
-
result_array << single_error_hash(PAYLOAD_SIZE_MISMATCH, err_data)
|
285
|
-
end
|
286
|
-
|
287
|
-
def bag_info_payload_size
|
288
|
-
bag_info_txt_pathname = deposit_bag_pathname.join(BAG_INFO_TXT_BASENAME)
|
289
|
-
bag_info_txt_pathname.readlines.each do |line|
|
290
|
-
line.chomp!.strip!
|
291
|
-
key, value = line.split(':', 2)
|
292
|
-
if key.strip == 'Payload-Oxum'
|
293
|
-
num_bytes, num_files = value.strip.split('.') if value
|
294
|
-
return { bytes: num_bytes.to_i, files: num_files.to_i }
|
295
|
-
end
|
296
|
-
end
|
297
|
-
end
|
298
|
-
|
299
|
-
def generated_payload_size
|
300
|
-
payload_pathname = deposit_bag_pathname.join(DATA_DIR_BASENAME)
|
301
|
-
payload_pathname.find.select(&:file?).each_with_object(bytes: 0, files: 0) do |file, hash|
|
302
|
-
hash[:bytes] += file.size
|
303
|
-
hash[:files] += 1
|
304
|
-
hash
|
305
|
-
end
|
306
|
-
end
|
307
|
-
|
308
|
-
# checksums_hash: { fname => {:md5=>"xxx", :sha1=>"yyy"}, fname => ... }
|
309
|
-
def checksum_types_from_manifest_checksums_hash(checksums_hash)
|
310
|
-
types = []
|
311
|
-
checksums_hash.each_value { |v| v.each_key { |k| types << k unless types.include?(k) } }
|
312
|
-
types
|
313
|
-
end
|
314
|
-
|
315
|
-
def single_error_hash(error_code, err_data_hash)
|
316
|
-
{ error_code => error_code_msg(error_code, err_data_hash) }
|
317
|
-
end
|
318
|
-
|
319
|
-
def error_code_msg(error_code, err_data_hash)
|
320
|
-
ERROR_CODE_TO_MESSAGES[error_code] % err_data_hash
|
321
|
-
end
|
322
|
-
end
|
323
|
-
end
|
@@ -1,32 +0,0 @@
|
|
1
|
-
module Moab
|
2
|
-
# The descriptive information about a digital object's collection of versions
|
3
|
-
#
|
4
|
-
# ====Data Model
|
5
|
-
# * <b>{VersionMetadata} = descriptive information about a digital object's versions</b>
|
6
|
-
# * {VersionMetadataEntry} [1..*] = attributes of a digital object version
|
7
|
-
# * {VersionMetadataEvent} [1..*] = object version lifecycle events with timestamps
|
8
|
-
#
|
9
|
-
# @example {include:file:spec/fixtures/data/jq937jp0017/v3/metadata/versionMetadata.xml}
|
10
|
-
# @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
|
11
|
-
# All rights reserved. See {file:LICENSE.rdoc} for details.
|
12
|
-
class VersionMetadata < Serializer::Manifest
|
13
|
-
include HappyMapper
|
14
|
-
|
15
|
-
# The name of the XML element used to serialize this objects data
|
16
|
-
tag 'versionMetadata'
|
17
|
-
|
18
|
-
# (see Serializable#initialize)
|
19
|
-
def initialize(opts = {})
|
20
|
-
@versions = []
|
21
|
-
super(opts)
|
22
|
-
end
|
23
|
-
|
24
|
-
# @attribute
|
25
|
-
# @return [String] The digital object identifier
|
26
|
-
attribute :digital_object_id, String, :tag => 'objectId'
|
27
|
-
|
28
|
-
# @attribute
|
29
|
-
# @return [Array<VersionMetadataEntry>] An array of version metadata entries, one per version
|
30
|
-
has_many :versions, VersionMetadataEntry, :tag => 'version'
|
31
|
-
end
|
32
|
-
end
|
@@ -1,40 +0,0 @@
|
|
1
|
-
module Moab
|
2
|
-
# A container element to record object version lifecycle events with timestamps
|
3
|
-
#
|
4
|
-
# ====Data Model
|
5
|
-
# * {VersionMetadata} = descriptive information about a digital object's versions
|
6
|
-
# * {VersionMetadataEntry} [1..*] = attributes of a digital object version
|
7
|
-
# * <b>{VersionMetadataEvent} [1..*] = object version lifecycle events with timestamps</b>
|
8
|
-
#
|
9
|
-
# @see VersionMetadata
|
10
|
-
# @see VersionMetadataEntry
|
11
|
-
# @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
|
12
|
-
# All rights reserved. See {file:LICENSE.rdoc} for details.
|
13
|
-
class VersionMetadataEvent < Serializer::Serializable
|
14
|
-
include HappyMapper
|
15
|
-
|
16
|
-
# The name of the XML element used to serialize this objects data
|
17
|
-
tag 'event'
|
18
|
-
|
19
|
-
# (see Serializable#initialize)
|
20
|
-
def initialize(opts = {})
|
21
|
-
super(opts)
|
22
|
-
end
|
23
|
-
|
24
|
-
# @attribute
|
25
|
-
# @return [String] The type of event
|
26
|
-
attribute :type, String
|
27
|
-
|
28
|
-
# @attribute
|
29
|
-
# @return [String] The date and time of an event
|
30
|
-
attribute :datetime, String
|
31
|
-
|
32
|
-
def datetime=(event_datetime)
|
33
|
-
@datetime = Moab::UtcTime.input(event_datetime)
|
34
|
-
end
|
35
|
-
|
36
|
-
def datetime
|
37
|
-
Moab::UtcTime.output(@datetime)
|
38
|
-
end
|
39
|
-
end
|
40
|
-
end
|
@@ -1,28 +0,0 @@
|
|
1
|
-
module Stanford
|
2
|
-
# Utility Class for extracting content or other information from a Fedora Instance
|
3
|
-
#
|
4
|
-
# ====Data Model
|
5
|
-
# * {DorMetadata} = utility methods for interfacing with Stanford metadata files (esp contentMetadata)
|
6
|
-
# * {ContentInventory} [1..1] = utilities for transforming contentMetadata to versionInventory and doing comparisons
|
7
|
-
# * <b>{ActiveFedoraObject} [1..*] = utility for extracting content or other information from a Fedora Instance</b>
|
8
|
-
#
|
9
|
-
# @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
|
10
|
-
# All rights reserved. See {file:LICENSE.rdoc} for details.
|
11
|
-
class ActiveFedoraObject
|
12
|
-
# @param fedora_object [Object] The Active Fedora representation of the Fedora Object
|
13
|
-
# @return [Stanford::ActiveFedoraObject] Create a u
|
14
|
-
def initialize(fedora_object)
|
15
|
-
@fedora_object = fedora_object
|
16
|
-
end
|
17
|
-
|
18
|
-
# @return [Object] The Active Fedora representation of the Fedora Object
|
19
|
-
attr_accessor :fedora_object
|
20
|
-
|
21
|
-
# @api external
|
22
|
-
# @param ds_id [String] The datastream identifier
|
23
|
-
# @return [String] The content of the specified datastream
|
24
|
-
def get_datastream_content(ds_id)
|
25
|
-
@fedora_object.datastreams[ds_id].content
|
26
|
-
end
|
27
|
-
end
|
28
|
-
end
|
@@ -1,41 +0,0 @@
|
|
1
|
-
module Stanford
|
2
|
-
# Stanford-specific utility methods for interfacing with DOR metadata files
|
3
|
-
#
|
4
|
-
# ====Data Model
|
5
|
-
# * <b>{DorMetadata} = utility methods for interfacing with Stanford metadata files (esp contentMetadata)</b>
|
6
|
-
# * {ContentInventory} [1..1] = utilities for transforming contentMetadata to versionInventory and doing comparisons
|
7
|
-
# * {ActiveFedoraObject} [1..*] = utility for extracting content or other information from a Fedora Instance
|
8
|
-
#
|
9
|
-
# @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
|
10
|
-
# All rights reserved. See {file:LICENSE.rdoc} for details.
|
11
|
-
class DorMetadata
|
12
|
-
# @return [String] The digital object identifier (druid)
|
13
|
-
attr_accessor :digital_object_id
|
14
|
-
|
15
|
-
# @return [Integer] \@versionId = The ordinal version number
|
16
|
-
attr_accessor :version_id
|
17
|
-
|
18
|
-
# @param digital_object_id [String] The digital object identifier
|
19
|
-
# @param version_id [Integer] The ordinal version number
|
20
|
-
# @return [Stanford::DorMetadata]
|
21
|
-
def initialize(digital_object_id, version_id = nil)
|
22
|
-
@digital_object_id = digital_object_id
|
23
|
-
@version_id = version_id
|
24
|
-
end
|
25
|
-
|
26
|
-
# @api internal
|
27
|
-
# @param directory [String] The location of the directory to be inventoried
|
28
|
-
# @param version_id (see #initialize)
|
29
|
-
# @return [FileInventory] Inventory of the files under the specified directory
|
30
|
-
def inventory_from_directory(directory, version_id = nil)
|
31
|
-
version_id ||= @version_id
|
32
|
-
version_inventory = Moab::FileInventory.new(type: 'version', digital_object_id: @digital_object_id, version_id: version_id)
|
33
|
-
content_metadata = IO.read(File.join(directory, 'contentMetadata.xml'))
|
34
|
-
content_group = Stanford::ContentInventory.new.group_from_cm(content_metadata, 'preserve')
|
35
|
-
version_inventory.groups << content_group
|
36
|
-
metadata_group = Moab::FileGroup.new(:group_id => 'metadata').group_from_directory(directory)
|
37
|
-
version_inventory.groups << metadata_group
|
38
|
-
version_inventory
|
39
|
-
end
|
40
|
-
end
|
41
|
-
end
|
@@ -1,36 +0,0 @@
|
|
1
|
-
require 'find'
|
2
|
-
|
3
|
-
module Stanford
|
4
|
-
##
|
5
|
-
# methods for dealing with a directory which stores Moab objects
|
6
|
-
class MoabStorageDirectory
|
7
|
-
DRUID_TREE_REGEXP = '[[:lower:]]{2}/\\d{3}/[[:lower:]]{2}/\\d{4}'.freeze
|
8
|
-
DRUID_REGEXP = '[[:lower:]]{2}\\d{3}[[:lower:]]{2}\\d{4}'.freeze
|
9
|
-
|
10
|
-
def self.find_moab_paths(storage_dir)
|
11
|
-
Find.find(storage_dir) do |path|
|
12
|
-
Find.prune unless File.directory?(path) # don't bother with a matching on files, we only care about directories
|
13
|
-
path_match_data = storage_dir_regexp(storage_dir).match(path)
|
14
|
-
if path_match_data
|
15
|
-
yield path_match_data[1], path, path_match_data # yield the druid, the full path, and the MatchData object
|
16
|
-
Find.prune # we don't care about what's in the moab dir, we just want the paths that look like moabs
|
17
|
-
end
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
def self.list_moab_druids(storage_dir)
|
22
|
-
druids = []
|
23
|
-
find_moab_paths(storage_dir) { |druid, _path, _path_match_data| druids << druid }
|
24
|
-
druids
|
25
|
-
end
|
26
|
-
|
27
|
-
private_class_method def self.storage_dir_regexps
|
28
|
-
@storage_dir_regexps ||= {}
|
29
|
-
end
|
30
|
-
|
31
|
-
# this regexp caching makes things many times faster (e.g. went from ~2200 s to crawl disk11, down to ~300 s)
|
32
|
-
private_class_method def self.storage_dir_regexp(storage_dir)
|
33
|
-
storage_dir_regexps[storage_dir] ||= Regexp.new("^#{storage_dir}/#{DRUID_TREE_REGEXP}/(#{DRUID_REGEXP})$")
|
34
|
-
end
|
35
|
-
end
|
36
|
-
end
|