moab-versioning 4.4.2 → 5.0.0.beta1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4b576c73187205ae4548c6460bac2b68cedab5dc7c3b94a66a5d8ec13729c6ae
4
- data.tar.gz: e714a158d2e1388dec94482c1fa858a2ee29a6eca6e0f9004658446801088476
3
+ metadata.gz: 1d109f89af3228b70fe8e8b8cf394830b9397f4815a8c2324bb7e09e849432d6
4
+ data.tar.gz: 9695c1dfb048663ff1b56084597b8ece2973966704a07d610dd7e537da8be61b
5
5
  SHA512:
6
- metadata.gz: 251925f9e4c45f20f6ae4270fc73adfe9f4645689bda08ebd42c2b707909177ab575e0427c83e321c2ebc039c735f9bc141b29037f28cb65249c25bd9e76d520
7
- data.tar.gz: '053391d53a3a3a46bbfedce8e7a04ea0f28a7f5c43689c2bde3c1901f9303eae7356d507904ec1892d9da61f2d4c9c6e58ed3fec894985e19b9ab85dd5fa65fd'
6
+ metadata.gz: 85be7b541e0056fd50296092448b01c4d83195529c00bd4aaa244cce6596336b0ae7b41ba855f54b618666bf265a9986844eaf6a0f7a78eb811b6ccdaf1329ae
7
+ data.tar.gz: 72dc3ada7f22d891e883ce08fe55c7c783689f73b8868e6c29bb5040c2bcc45197c044a3b5823a94f3e8ade7de6b16ccc291902ee142ab0e68036824623c6695
data/lib/moab/stanford.rb CHANGED
@@ -2,20 +2,10 @@
2
2
 
3
3
  require 'moab'
4
4
  require 'stanford/content_inventory'
5
- require 'stanford/dor_metadata'
6
5
  require 'stanford/storage_repository'
7
6
  require 'stanford/storage_services'
8
- require 'stanford/active_fedora_object'
9
- require 'stanford/moab_storage_directory'
10
7
  require 'stanford/storage_object_validator'
11
8
 
12
9
  # Stanford is a module that isolates classes specific to the Stanford Digital Repository
13
- #
14
- # ====Data Model
15
- # * <b>{DorMetadata} = utility methods for interfacing with Stanford metadata files (esp contentMetadata)</b>
16
- # * {ActiveFedoraObject} [1..*] = utility for extracting content or other information from a Fedora Instance
17
- #
18
- # @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
19
- # All rights reserved. See {file:LICENSE.rdoc} for details.
20
10
  module Stanford
21
11
  end
@@ -83,12 +83,6 @@ module Moab
83
83
  @@repository.storage_object(object_id).current_version_id
84
84
  end
85
85
 
86
- # @param [String] object_id The digital object identifier of the object
87
- # @return [Pathname] Pathname object containing the full path for the specified file
88
- def self.version_metadata(object_id)
89
- retrieve_file('metadata', 'versionMetadata.xml', object_id)
90
- end
91
-
92
86
  # @param [String] object_id The digital object identifier of the object
93
87
  # @param [Integer] version_id The ID of the version, if nil use latest version
94
88
  # @return [FileInventory] the file inventory for the specified object version
data/lib/moab.rb CHANGED
@@ -20,10 +20,6 @@
20
20
  # * {FileInstanceDifference} [1..*] = contains difference information at the file level
21
21
  # * {FileSignature} [1..2] = contains the file signature(s) of two file instances being compared
22
22
  #
23
- # * <b>{VersionMetadata} = descriptive information about a digital object's versions</b>
24
- # * {VersionMetadataEntry} [1..*] = attributes of a digital object version
25
- # * {VersionMetadataEvent} [1..*] = object version lifecycle events with timestamps
26
- #
27
23
  # * <b>{StorageObject} = represents a digital object's repository storage location and ingest/dissemination methods</b>
28
24
  # * {StorageObjectVersion} [1..*] = represents a version subdirectory within an object's home directory
29
25
  # * {Bagger} [1] = utility for creating bagit packages for ingest or dissemination
@@ -48,9 +44,6 @@ require 'moab/file_instance_difference'
48
44
  require 'moab/file_group_difference_subset'
49
45
  require 'moab/file_group_difference'
50
46
  require 'moab/file_inventory_difference'
51
- require 'moab/version_metadata_event'
52
- require 'moab/version_metadata_entry'
53
- require 'moab/version_metadata'
54
47
  require 'moab/bagger'
55
48
  require 'moab/storage_object'
56
49
  require 'moab/storage_object_version'
@@ -59,4 +52,3 @@ require 'moab/storage_services'
59
52
  require 'moab/exceptions'
60
53
  require 'moab/verification_result'
61
54
  require 'moab/storage_object_validator'
62
- require 'moab/deposit_bag_validator'
metadata CHANGED
@@ -1,17 +1,17 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: moab-versioning
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.4.2
4
+ version: 5.0.0.beta1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Darren Weber
8
8
  - Richard Anderson
9
9
  - Lynn McRae
10
10
  - Hannah Frost
11
- autorequire:
11
+ autorequire:
12
12
  bindir: bin
13
13
  cert_chain: []
14
- date: 2021-06-01 00:00:00.000000000 Z
14
+ date: 2021-10-19 00:00:00.000000000 Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
17
  name: druid-tools
@@ -177,7 +177,6 @@ files:
177
177
  - lib/moab.rb
178
178
  - lib/moab/bagger.rb
179
179
  - lib/moab/config.rb
180
- - lib/moab/deposit_bag_validator.rb
181
180
  - lib/moab/exceptions.rb
182
181
  - lib/moab/file_group.rb
183
182
  - lib/moab/file_group_difference.rb
@@ -198,16 +197,11 @@ files:
198
197
  - lib/moab/storage_services.rb
199
198
  - lib/moab/utc_time.rb
200
199
  - lib/moab/verification_result.rb
201
- - lib/moab/version_metadata.rb
202
200
  - lib/moab/version_metadata_entry.rb
203
- - lib/moab/version_metadata_event.rb
204
201
  - lib/serializer.rb
205
202
  - lib/serializer/manifest.rb
206
203
  - lib/serializer/serializable.rb
207
- - lib/stanford/active_fedora_object.rb
208
204
  - lib/stanford/content_inventory.rb
209
- - lib/stanford/dor_metadata.rb
210
- - lib/stanford/moab_storage_directory.rb
211
205
  - lib/stanford/storage_object_validator.rb
212
206
  - lib/stanford/storage_repository.rb
213
207
  - lib/stanford/storage_services.rb
@@ -215,7 +209,7 @@ homepage: https://github.com/sul-dlss/moab-versioning
215
209
  licenses:
216
210
  - Apache-2.0
217
211
  metadata: {}
218
- post_install_message:
212
+ post_install_message:
219
213
  rdoc_options: []
220
214
  require_paths:
221
215
  - lib
@@ -226,12 +220,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
226
220
  version: '2.6'
227
221
  required_rubygems_version: !ruby/object:Gem::Requirement
228
222
  requirements:
229
- - - ">="
223
+ - - ">"
230
224
  - !ruby/object:Gem::Version
231
- version: '0'
225
+ version: 1.3.1
232
226
  requirements: []
233
227
  rubygems_version: 3.1.4
234
- signing_key:
228
+ signing_key:
235
229
  specification_version: 4
236
230
  summary: Ruby implementation of digital object versioning toolkit used by the SULAIR
237
231
  Digital Library
@@ -1,328 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Moab
4
- # Given a deposit bag, ensures the contents valid for becoming a StorageObjectVersion
5
- # this is a Shameless Green implementation, combining code from:
6
- # - sdr-preservation-core/lib/sdr_ingest/validate_bag <-- old preservation robots
7
- # - archive-utils/lib/bagit_bag <-- gem only used by sdr-preservation-robots
8
- # - archive-utils/lib/file_fixity
9
- # - archive-utils/lib/fixity
10
- # this code adds duplication to this gem (see github issue #119);
11
- # for example, computing checksums is done
12
- # - deposit_bag_validator
13
- # - file_signature
14
- class DepositBagValidator
15
- BAG_DIR_NOT_FOUND = :bag_dir_not_found
16
- CHECKSUM_MISMATCH = :checksum_mismatch
17
- CHECKSUM_TYPE_UNRECOGNIZED = :checksum_type_unrecognized
18
- INVALID_VERSION_XXX_XML = :invalid_versionXxx_xml
19
- PAYLOAD_SIZE_MISMATCH = :payload_size_mismatch
20
- REQUIRED_FILE_NOT_FOUND = :required_file_not_found
21
- VERSION_MISMATCH_TO_MOAB = :version_mismatch_to_moab
22
- VERSION_MISSING_FROM_FILE = :version_missing_from_file
23
-
24
- ERROR_CODE_TO_MESSAGES = {
25
- BAG_DIR_NOT_FOUND => "Deposit bag directory %{bag_dir} does not exist",
26
- CHECKSUM_MISMATCH => "Failed %{manifest_type} verification. Differences: \n%{diffs}",
27
- CHECKSUM_TYPE_UNRECOGNIZED => "Checksum type unrecognized: %{checksum_type}; file: %{filename}",
28
- INVALID_VERSION_XXX_XML => "Unable to parse %{file_pathname}: %{err_info}",
29
- PAYLOAD_SIZE_MISMATCH => "Failed payload size verification. Expected: %{bag_info_sizes}; found: %{generated_sizes}",
30
- REQUIRED_FILE_NOT_FOUND => "Deposit bag required file %{file_pathname} not found",
31
- VERSION_MISMATCH_TO_MOAB => "Version mismatch in %{file_pathname}: Moab expected %{new_version}; found %{file_version}",
32
- VERSION_MISSING_FROM_FILE => "Version xml file %{version_file} missing data at %{xpath} containing version id"
33
- }.freeze
34
-
35
- REQUIRED_MANIFEST_CHECKSUM_TYPE = 'sha256'
36
- RECOGNIZED_CHECKSUM_ALGORITHMS = %i[md5 sha1 sha256 sha384 sha512].freeze
37
-
38
- TAGMANIFEST = 'tagmanifest'
39
- MANIFEST = 'manifest'
40
- DATA_DIR_BASENAME = 'data'
41
- BAG_INFO_TXT_BASENAME = 'bag-info.txt'
42
- VERSION_ADDITIONS_BASENAME = 'versionAdditions.xml'
43
- VERSION_INVENTORY_BASENAME = 'versionInventory.xml'
44
- VERSION_METADATA_PATH = "#{DATA_DIR_BASENAME}/metadata/versionMetadata.xml"
45
-
46
- REQUIRED_BAG_FILES = [
47
- DATA_DIR_BASENAME,
48
- 'bagit.txt',
49
- BAG_INFO_TXT_BASENAME,
50
- "#{MANIFEST}-#{REQUIRED_MANIFEST_CHECKSUM_TYPE}.txt",
51
- "#{TAGMANIFEST}-#{REQUIRED_MANIFEST_CHECKSUM_TYPE}.txt",
52
- VERSION_ADDITIONS_BASENAME,
53
- VERSION_INVENTORY_BASENAME,
54
- VERSION_METADATA_PATH
55
- ].freeze
56
-
57
- attr_reader :deposit_bag_pathname, :expected_new_version, :result_array
58
-
59
- def initialize(storage_object)
60
- @deposit_bag_pathname = storage_object.deposit_bag_pathname
61
- @expected_new_version = storage_object.current_version_id + 1
62
- @result_array = []
63
- end
64
-
65
- # returns Array of tiny error hashes, allowing multiple occurrences of a single error code
66
- def validation_errors
67
- return [single_error_hash(BAG_DIR_NOT_FOUND, bag_dir: deposit_bag_pathname)] unless deposit_bag_pathname.exist?
68
- return result_array unless required_bag_files_exist?
69
-
70
- verify_version
71
- verify_tagmanifests
72
- verify_payload_size
73
- verify_payload_manifests
74
- result_array # attr that accumulates any errors encountered along the way
75
- end
76
-
77
- private
78
-
79
- def bag_dir_exists?
80
- deposit_bag_pathname.exist?
81
- end
82
-
83
- # assumes this is called when result_array is empty, as subsequent checks will use these required files
84
- def required_bag_files_exist?
85
- REQUIRED_BAG_FILES.each do |filename|
86
- pathname = deposit_bag_pathname.join(filename)
87
- result_array << single_error_hash(REQUIRED_FILE_NOT_FOUND, file_pathname: pathname) unless pathname.exist?
88
- end
89
- result_array.empty?
90
- end
91
-
92
- def verify_version
93
- version_md_pathname = deposit_bag_pathname.join(VERSION_METADATA_PATH)
94
- version_from_file = last_version_id_from_version_md_xml(version_md_pathname)
95
- verify_version_from_xml_file(version_md_pathname, version_from_file) if version_from_file
96
-
97
- version_additions_pathname = deposit_bag_pathname.join(VERSION_ADDITIONS_BASENAME)
98
- version_from_file = version_id_from_version_manifest_xml(version_additions_pathname)
99
- verify_version_from_xml_file(version_additions_pathname, version_from_file) if version_from_file
100
-
101
- version_inventory_pathname = deposit_bag_pathname.join(VERSION_INVENTORY_BASENAME)
102
- version_from_file = version_id_from_version_manifest_xml(version_inventory_pathname)
103
- verify_version_from_xml_file(version_inventory_pathname, version_from_file) if version_from_file
104
- end
105
-
106
- def last_version_id_from_version_md_xml(version_md_pathname)
107
- last_version_id_from_xml(version_md_pathname, '/versionMetadata/version/@versionId')
108
- end
109
-
110
- def version_id_from_version_manifest_xml(version_manifest_xml_pathname)
111
- last_version_id_from_xml(version_manifest_xml_pathname, '/fileInventory/@versionId')
112
- end
113
-
114
- def last_version_id_from_xml(pathname, xpath)
115
- doc = Nokogiri::XML(File.open(pathname.to_s), &:strict)
116
- version_id = doc.xpath(xpath).last.text unless doc.xpath(xpath).empty?
117
- return version_id.to_i if version_id
118
-
119
- err_data = {
120
- version_file: pathname,
121
- xpath: xpath
122
- }
123
- result_array << single_error_hash(VERSION_MISSING_FROM_FILE, err_data) unless version_id
124
- nil
125
- rescue StandardError => e
126
- err_data = {
127
- file_pathname: pathname,
128
- err_info: "#{e}\n#{e.backtrace}"
129
- }
130
- result_array << single_error_hash(INVALID_VERSION_XXX_XML, err_data)
131
- nil
132
- end
133
-
134
- def verify_version_from_xml_file(file_pathname, found)
135
- return if found == expected_new_version
136
-
137
- err_data = {
138
- file_pathname: file_pathname,
139
- new_version: expected_new_version,
140
- file_version: found
141
- }
142
- result_array << single_error_hash(VERSION_MISMATCH_TO_MOAB, err_data)
143
- end
144
-
145
- # adds to result_array if tagmanifest checksums don't match generated checksums
146
- def verify_tagmanifests
147
- tagmanifests_checksums_hash = checksums_hash_from_manifest_files(TAGMANIFEST)
148
- types_to_generate = checksum_types_from_manifest_checksums_hash(tagmanifests_checksums_hash)
149
- generated_checksums_hash = generate_tagmanifest_checksums_hash(types_to_generate)
150
- verify_manifest_checksums(TAGMANIFEST, tagmanifests_checksums_hash, generated_checksums_hash)
151
- end
152
-
153
- # adds to result_array if manifest checksums don't match generated checksums
154
- def verify_payload_manifests
155
- manifests_checksums_hash = checksums_hash_from_manifest_files(MANIFEST)
156
- types_to_generate = checksum_types_from_manifest_checksums_hash(manifests_checksums_hash)
157
- generated_checksums_hash = generate_payload_checksums(types_to_generate)
158
- verify_manifest_checksums(MANIFEST, manifests_checksums_hash, generated_checksums_hash)
159
- end
160
-
161
- # construct hash based on manifest_type-alg.txt files in bag home dir
162
- # key: file_name, relative to base_path, value: hash of checksum alg => checksum value
163
- def checksums_hash_from_manifest_files(manifest_type)
164
- checksums_hash = {}
165
- deposit_bag_pathname.children.each do |child_pathname|
166
- if child_pathname.file?
167
- child_fname = child_pathname.basename.to_s
168
- match_result = child_fname.match("^#{manifest_type}-(.*).txt")
169
- if match_result
170
- checksum_type = match_result.captures.first.to_sym
171
- if RECOGNIZED_CHECKSUM_ALGORITHMS.include?(checksum_type)
172
- child_pathname.readlines.each do |line|
173
- line.chomp!.strip!
174
- checksum, file_name = line.split(/[\s*]+/, 2)
175
- file_checksums = checksums_hash[file_name] || {}
176
- file_checksums[checksum_type] = checksum
177
- checksums_hash[file_name] = file_checksums
178
- end
179
- else
180
- result_array << single_error_hash(CHECKSUM_TYPE_UNRECOGNIZED, checksum_type: checksum_type, filename: child_pathname)
181
- end
182
- end
183
- end
184
- end
185
- checksums_hash
186
- end
187
-
188
- # generate hash of checksums by file name for bag home dir files
189
- def generate_tagmanifest_checksums_hash(types_to_generate)
190
- # all names in the bag home dir except those starting with 'tagmanifest'
191
- home_dir_pathnames = deposit_bag_pathname.children.reject { |file| file.basename.to_s.start_with?(TAGMANIFEST) }
192
- hash_with_full_pathnames = generate_checksums_hash(home_dir_pathnames, types_to_generate)
193
- # return hash keys as basenames only
194
- hash_with_full_pathnames.transform_keys { |k| Pathname.new(k).basename.to_s }
195
- end
196
-
197
- # generate hash of checksums by file name for bag data dir files
198
- def generate_payload_checksums(types_to_generate)
199
- data_pathnames = deposit_bag_pathname.join(DATA_DIR_BASENAME).find
200
- hash_with_full_pathnames = generate_checksums_hash(data_pathnames, types_to_generate)
201
- # return hash keys beginning with 'data/'
202
- hash_with_full_pathnames.transform_keys { |k| Pathname.new(k).relative_path_from(deposit_bag_pathname).to_s }
203
- end
204
-
205
- def generate_checksums_hash(pathnames, types_to_generate)
206
- file_checksums_hash = {}
207
- pathnames.each do |pathname|
208
- file_checksums_hash[pathname.to_s] = generated_checksums(pathname, types_to_generate) if pathname.file?
209
- end
210
- file_checksums_hash
211
- end
212
-
213
- def generated_checksums(pathname, types_to_generate)
214
- my_digester_hash = digester_hash(types_to_generate)
215
- pathname.open('r') do |stream|
216
- while (buffer = stream.read(8192))
217
- my_digester_hash.each_value { |digest| digest.update(buffer) }
218
- end
219
- end
220
- file_checksums = {}
221
- my_digester_hash.each do |checksum_type, digest|
222
- file_checksums[checksum_type] = digest.hexdigest
223
- end
224
- file_checksums
225
- end
226
-
227
- def digester_hash(types_to_generate = DEFAULT_CHECKSUM_TYPES)
228
- types_to_generate.each_with_object({}) do |checksum_type, digester_hash|
229
- case checksum_type
230
- when :md5
231
- digester_hash[checksum_type] = Digest::MD5.new
232
- when :sha1
233
- digester_hash[checksum_type] = Digest::SHA1.new
234
- when :sha256
235
- digester_hash[checksum_type] = Digest::SHA2.new(256)
236
- when :sha384
237
- digesters[checksum_type] = Digest::SHA2.new(384)
238
- when :sha512
239
- digesters[checksum_type] = Digest::SHA2.new(512)
240
- else
241
- result_array << single_error_hash(CHECKSUM_TYPE_UNRECOGNIZED, checksum_type: checksum_type, filename: nil)
242
- end
243
- digester_hash
244
- end
245
- end
246
-
247
- def verify_manifest_checksums(manifest_type, manifests_checksum_hash, generated_checksum_hash)
248
- diff_hash = {}
249
- # NOTE: this is intentionally | instead of ||
250
- (manifests_checksum_hash.keys | generated_checksum_hash.keys).each do |file_name|
251
- manifest_checksums = manifests_checksum_hash[file_name] || {}
252
- generated_checksums = generated_checksum_hash[file_name] || {}
253
- if manifest_checksums != generated_checksums
254
- cdh = checksums_diff_hash(manifest_checksums, generated_checksums, manifest_type, 'generated')
255
- diff_hash[file_name] = cdh if cdh
256
- end
257
- end
258
- return if diff_hash.empty?
259
-
260
- err_data = {
261
- manifest_type: manifest_type,
262
- diffs: diff_hash
263
- }
264
- result_array << single_error_hash(CHECKSUM_MISMATCH, err_data)
265
- end
266
-
267
- def checksums_diff_hash(left_checksums, right_checksums, left_label, right_label)
268
- diff_hash = {}
269
- # NOTE: these are intentionally & and | instead of && and ||
270
- checksum_types_to_compare = (left_checksums.keys & right_checksums.keys)
271
- checksum_types_to_compare = (left_checksums.keys | right_checksums.keys) if checksum_types_to_compare.empty?
272
- checksum_types_to_compare.each do |type|
273
- left_checksum = left_checksums[type]
274
- right_checksum = right_checksums[type]
275
- diff_hash[type] = { left_label => left_checksum, right_label => right_checksum } if left_checksum != right_checksum
276
- end
277
- diff_hash.empty? ? nil : diff_hash
278
- end
279
-
280
- def verify_payload_size
281
- sizes_from_bag_info_file = bag_info_payload_size
282
- generated_sizes = generated_payload_size
283
- return if sizes_from_bag_info_file == generated_sizes
284
-
285
- err_data = {
286
- bag_info_sizes: sizes_from_bag_info_file,
287
- generated_sizes: generated_sizes
288
- }
289
- result_array << single_error_hash(PAYLOAD_SIZE_MISMATCH, err_data)
290
- end
291
-
292
- def bag_info_payload_size
293
- bag_info_txt_pathname = deposit_bag_pathname.join(BAG_INFO_TXT_BASENAME)
294
- bag_info_txt_pathname.readlines.each do |line|
295
- line.chomp!.strip!
296
- key, value = line.split(':', 2)
297
- if key.strip == 'Payload-Oxum'
298
- num_bytes, num_files = value.strip.split('.') if value
299
- return { bytes: num_bytes.to_i, files: num_files.to_i }
300
- end
301
- end
302
- end
303
-
304
- def generated_payload_size
305
- payload_pathname = deposit_bag_pathname.join(DATA_DIR_BASENAME)
306
- payload_pathname.find.select(&:file?).each_with_object(bytes: 0, files: 0) do |file, hash|
307
- hash[:bytes] += file.size
308
- hash[:files] += 1
309
- hash
310
- end
311
- end
312
-
313
- # checksums_hash: { fname => {:md5=>"xxx", :sha1=>"yyy"}, fname => ... }
314
- def checksum_types_from_manifest_checksums_hash(checksums_hash)
315
- types = []
316
- checksums_hash.each_value { |v| v.each_key { |k| types << k unless types.include?(k) } }
317
- types
318
- end
319
-
320
- def single_error_hash(error_code, err_data_hash)
321
- { error_code => error_code_msg(error_code, err_data_hash) }
322
- end
323
-
324
- def error_code_msg(error_code, err_data_hash)
325
- ERROR_CODE_TO_MESSAGES[error_code] % err_data_hash
326
- end
327
- end
328
- end
@@ -1,34 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Moab
4
- # The descriptive information about a digital object's collection of versions
5
- #
6
- # ====Data Model
7
- # * <b>{VersionMetadata} = descriptive information about a digital object's versions</b>
8
- # * {VersionMetadataEntry} [1..*] = attributes of a digital object version
9
- # * {VersionMetadataEvent} [1..*] = object version lifecycle events with timestamps
10
- #
11
- # @example {include:file:spec/fixtures/data/jq937jp0017/v3/metadata/versionMetadata.xml}
12
- # @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
13
- # All rights reserved. See {file:LICENSE.rdoc} for details.
14
- class VersionMetadata < Serializer::Manifest
15
- include HappyMapper
16
-
17
- # The name of the XML element used to serialize this objects data
18
- tag 'versionMetadata'
19
-
20
- # (see Serializable#initialize)
21
- def initialize(opts = {})
22
- @versions = []
23
- super(opts)
24
- end
25
-
26
- # @attribute
27
- # @return [String] The digital object identifier
28
- attribute :digital_object_id, String, tag: 'objectId'
29
-
30
- # @attribute
31
- # @return [Array<VersionMetadataEntry>] An array of version metadata entries, one per version
32
- has_many :versions, VersionMetadataEntry, tag: 'version'
33
- end
34
- end
@@ -1,42 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Moab
4
- # A container element to record object version lifecycle events with timestamps
5
- #
6
- # ====Data Model
7
- # * {VersionMetadata} = descriptive information about a digital object's versions
8
- # * {VersionMetadataEntry} [1..*] = attributes of a digital object version
9
- # * <b>{VersionMetadataEvent} [1..*] = object version lifecycle events with timestamps</b>
10
- #
11
- # @see VersionMetadata
12
- # @see VersionMetadataEntry
13
- # @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
14
- # All rights reserved. See {file:LICENSE.rdoc} for details.
15
- class VersionMetadataEvent < Serializer::Serializable
16
- include HappyMapper
17
-
18
- # The name of the XML element used to serialize this objects data
19
- tag 'event'
20
-
21
- # (see Serializable#initialize)
22
- def initialize(opts = {})
23
- super(opts)
24
- end
25
-
26
- # @attribute
27
- # @return [String] The type of event
28
- attribute :type, String
29
-
30
- # @attribute
31
- # @return [String] The date and time of an event
32
- attribute :datetime, String
33
-
34
- def datetime=(event_datetime)
35
- @datetime = Moab::UtcTime.input(event_datetime)
36
- end
37
-
38
- def datetime
39
- Moab::UtcTime.output(@datetime)
40
- end
41
- end
42
- end
@@ -1,30 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Stanford
4
- # Utility Class for extracting content or other information from a Fedora Instance
5
- #
6
- # ====Data Model
7
- # * {DorMetadata} = utility methods for interfacing with Stanford metadata files (esp contentMetadata)
8
- # * {ContentInventory} [1..1] = utilities for transforming contentMetadata to versionInventory and doing comparisons
9
- # * <b>{ActiveFedoraObject} [1..*] = utility for extracting content or other information from a Fedora Instance</b>
10
- #
11
- # @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
12
- # All rights reserved. See {file:LICENSE.rdoc} for details.
13
- class ActiveFedoraObject
14
- # @param fedora_object [Object] The Active Fedora representation of the Fedora Object
15
- # @return [Stanford::ActiveFedoraObject] Create a u
16
- def initialize(fedora_object)
17
- @fedora_object = fedora_object
18
- end
19
-
20
- # @return [Object] The Active Fedora representation of the Fedora Object
21
- attr_accessor :fedora_object
22
-
23
- # @api external
24
- # @param ds_id [String] The datastream identifier
25
- # @return [String] The content of the specified datastream
26
- def get_datastream_content(ds_id)
27
- @fedora_object.datastreams[ds_id].content
28
- end
29
- end
30
- end
@@ -1,44 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Stanford
4
- # Stanford-specific utility methods for interfacing with DOR metadata files
5
- #
6
- # ====Data Model
7
- # * <b>{DorMetadata} = utility methods for interfacing with Stanford metadata files (esp contentMetadata)</b>
8
- # * {ContentInventory} [1..1] = utilities for transforming contentMetadata to versionInventory and doing comparisons
9
- # * {ActiveFedoraObject} [1..*] = utility for extracting content or other information from a Fedora Instance
10
- #
11
- # @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
12
- # All rights reserved. See {file:LICENSE.rdoc} for details.
13
- class DorMetadata
14
- # @return [String] The digital object identifier (druid)
15
- attr_accessor :digital_object_id
16
-
17
- # @return [Integer] \@versionId = The ordinal version number
18
- attr_accessor :version_id
19
-
20
- # @param digital_object_id [String] The digital object identifier
21
- # @param version_id [Integer] The ordinal version number
22
- # @return [Stanford::DorMetadata]
23
- def initialize(digital_object_id, version_id = nil)
24
- @digital_object_id = digital_object_id
25
- @version_id = version_id
26
- end
27
-
28
- # @api internal
29
- # @param directory [String] The location of the directory to be inventoried
30
- # @param version_id (see #initialize)
31
- # @return [FileInventory] Inventory of the files under the specified directory
32
- def inventory_from_directory(directory, version_id = nil)
33
- version_id ||= @version_id
34
- version_inventory = Moab::FileInventory.new(type: 'version', digital_object_id: @digital_object_id,
35
- version_id: version_id)
36
- content_metadata = IO.read(File.join(directory, 'contentMetadata.xml'))
37
- content_group = Stanford::ContentInventory.new.group_from_cm(content_metadata, 'preserve')
38
- version_inventory.groups << content_group
39
- metadata_group = Moab::FileGroup.new(group_id: 'metadata').group_from_directory(directory)
40
- version_inventory.groups << metadata_group
41
- version_inventory
42
- end
43
- end
44
- end
@@ -1,38 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'find'
4
-
5
- module Stanford
6
- ##
7
- # methods for dealing with a directory which stores Moab objects
8
- class MoabStorageDirectory
9
- DRUID_TREE_REGEXP = '[[:lower:]]{2}/\\d{3}/[[:lower:]]{2}/\\d{4}'
10
- DRUID_REGEXP = '[[:lower:]]{2}\\d{3}[[:lower:]]{2}\\d{4}'
11
-
12
- def self.find_moab_paths(storage_dir)
13
- Find.find(storage_dir) do |path|
14
- Find.prune unless File.directory?(path) # don't bother with a matching on files, we only care about directories
15
- path_match_data = storage_dir_regexp(storage_dir).match(path)
16
- if path_match_data
17
- yield path_match_data[1], path, path_match_data # yield the druid, the full path, and the MatchData object
18
- Find.prune # we don't care about what's in the moab dir, we just want the paths that look like moabs
19
- end
20
- end
21
- end
22
-
23
- def self.list_moab_druids(storage_dir)
24
- druids = []
25
- find_moab_paths(storage_dir) { |druid, _path, _path_match_data| druids << druid }
26
- druids
27
- end
28
-
29
- private_class_method def self.storage_dir_regexps
30
- @storage_dir_regexps ||= {}
31
- end
32
-
33
- # this regexp caching makes things many times faster (e.g. went from ~2200 s to crawl disk11, down to ~300 s)
34
- private_class_method def self.storage_dir_regexp(storage_dir)
35
- storage_dir_regexps[storage_dir] ||= Regexp.new("^#{storage_dir}/#{DRUID_TREE_REGEXP}/(#{DRUID_REGEXP})$")
36
- end
37
- end
38
- end