moab-versioning 4.3.0 → 5.0.0.beta1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/lib/moab/bagger.rb +7 -2
  3. data/lib/moab/config.rb +40 -7
  4. data/lib/moab/exceptions.rb +6 -0
  5. data/lib/moab/file_group.rb +12 -9
  6. data/lib/moab/file_group_difference.rb +26 -23
  7. data/lib/moab/file_group_difference_subset.rb +5 -3
  8. data/lib/moab/file_instance.rb +4 -1
  9. data/lib/moab/file_instance_difference.rb +5 -3
  10. data/lib/moab/file_inventory.rb +13 -9
  11. data/lib/moab/file_inventory_difference.rb +8 -6
  12. data/lib/moab/file_manifestation.rb +5 -2
  13. data/lib/moab/file_signature.rb +12 -7
  14. data/lib/moab/signature_catalog.rb +13 -13
  15. data/lib/moab/signature_catalog_entry.rb +6 -4
  16. data/lib/moab/stanford.rb +2 -10
  17. data/lib/moab/storage_object.rb +11 -5
  18. data/lib/moab/storage_object_validator.rb +24 -10
  19. data/lib/moab/storage_object_version.rb +19 -12
  20. data/lib/moab/storage_repository.rb +49 -7
  21. data/lib/moab/storage_services.rb +12 -9
  22. data/lib/moab/utc_time.rb +2 -0
  23. data/lib/moab/verification_result.rb +4 -3
  24. data/lib/moab/version_metadata_entry.rb +6 -4
  25. data/lib/moab.rb +2 -9
  26. data/lib/serializer/manifest.rb +4 -2
  27. data/lib/serializer/serializable.rb +6 -1
  28. data/lib/serializer.rb +2 -0
  29. data/lib/stanford/content_inventory.rb +23 -19
  30. data/lib/stanford/storage_object_validator.rb +2 -0
  31. data/lib/stanford/storage_repository.rb +6 -2
  32. data/lib/stanford/storage_services.rb +2 -0
  33. metadata +22 -42
  34. data/lib/moab/deposit_bag_validator.rb +0 -323
  35. data/lib/moab/version_metadata.rb +0 -32
  36. data/lib/moab/version_metadata_event.rb +0 -40
  37. data/lib/stanford/active_fedora_object.rb +0 -28
  38. data/lib/stanford/dor_metadata.rb +0 -41
  39. data/lib/stanford/moab_storage_directory.rb +0 -36
@@ -1,323 +0,0 @@
1
- module Moab
2
- # Given a deposit bag, ensures the contents valid for becoming a StorageObjectVersion
3
- # this is a Shameless Green implementation, combining code from:
4
- # - sdr-preservation-core/lib/sdr_ingest/validate_bag <-- old preservation robots
5
- # - archive-utils/lib/bagit_bag <-- gem only used by sdr-preservation-robots
6
- # - archive-utils/lib/file_fixity
7
- # - archive-utils/lib/fixity
8
- # this code adds duplication to this gem (see github issue #119);
9
- # for example, computing checksums is done
10
- # - deposit_bag_validator
11
- # - file_signature
12
- class DepositBagValidator
13
- BAG_DIR_NOT_FOUND = :bag_dir_not_found
14
- CHECKSUM_MISMATCH = :checksum_mismatch
15
- CHECKSUM_TYPE_UNRECOGNIZED = :checksum_type_unrecognized
16
- INVALID_VERSION_XXX_XML = :invalid_versionXxx_xml
17
- PAYLOAD_SIZE_MISMATCH = :payload_size_mismatch
18
- REQUIRED_FILE_NOT_FOUND = :required_file_not_found
19
- VERSION_MISMATCH_TO_MOAB = :version_mismatch_to_moab
20
- VERSION_MISSING_FROM_FILE = :version_missing_from_file
21
-
22
- ERROR_CODE_TO_MESSAGES = {
23
- BAG_DIR_NOT_FOUND => "Deposit bag directory %{bag_dir} does not exist",
24
- CHECKSUM_MISMATCH => "Failed %{manifest_type} verification. Differences: \n%{diffs}",
25
- CHECKSUM_TYPE_UNRECOGNIZED => "Checksum type unrecognized: %{checksum_type}; file: %{filename}",
26
- INVALID_VERSION_XXX_XML => "Unable to parse %{file_pathname}: %{err_info}",
27
- PAYLOAD_SIZE_MISMATCH => "Failed payload size verification. Expected: %{bag_info_sizes}; found: %{generated_sizes}",
28
- REQUIRED_FILE_NOT_FOUND => "Deposit bag required file %{file_pathname} not found",
29
- VERSION_MISMATCH_TO_MOAB => "Version mismatch in %{file_pathname}: Moab expected %{new_version}; found %{file_version}",
30
- VERSION_MISSING_FROM_FILE => "Version xml file %{version_file} missing data at %{xpath} containing version id"
31
- }.freeze
32
-
33
- REQUIRED_MANIFEST_CHECKSUM_TYPE = 'sha256'.freeze
34
- RECOGNIZED_CHECKSUM_ALGORITHMS = %i[md5 sha1 sha256 sha384 sha512].freeze
35
-
36
- TAGMANIFEST = 'tagmanifest'.freeze
37
- MANIFEST = 'manifest'.freeze
38
- DATA_DIR_BASENAME = 'data'.freeze
39
- BAG_INFO_TXT_BASENAME = 'bag-info.txt'.freeze
40
- VERSION_ADDITIONS_BASENAME = 'versionAdditions.xml'.freeze
41
- VERSION_INVENTORY_BASENAME = 'versionInventory.xml'.freeze
42
- VERSION_METADATA_PATH = "#{DATA_DIR_BASENAME}/metadata/versionMetadata.xml".freeze
43
-
44
- REQUIRED_BAG_FILES = [
45
- DATA_DIR_BASENAME,
46
- 'bagit.txt'.freeze,
47
- BAG_INFO_TXT_BASENAME,
48
- "#{MANIFEST}-#{REQUIRED_MANIFEST_CHECKSUM_TYPE}.txt".freeze,
49
- "#{TAGMANIFEST}-#{REQUIRED_MANIFEST_CHECKSUM_TYPE}.txt".freeze,
50
- VERSION_ADDITIONS_BASENAME,
51
- VERSION_INVENTORY_BASENAME,
52
- VERSION_METADATA_PATH
53
- ].freeze
54
-
55
- attr_reader :deposit_bag_pathname, :expected_new_version, :result_array
56
-
57
- def initialize(storage_object)
58
- @deposit_bag_pathname = storage_object.deposit_bag_pathname
59
- @expected_new_version = storage_object.current_version_id + 1
60
- @result_array = []
61
- end
62
-
63
- # returns Array of tiny error hashes, allowing multiple occurrences of a single error code
64
- def validation_errors
65
- return [single_error_hash(BAG_DIR_NOT_FOUND, bag_dir: deposit_bag_pathname)] unless deposit_bag_pathname.exist?
66
- return result_array unless required_bag_files_exist?
67
- verify_version
68
- verify_tagmanifests
69
- verify_payload_size
70
- verify_payload_manifests
71
- result_array # attr that accumulates any errors encountered along the way
72
- end
73
-
74
- private
75
-
76
- def bag_dir_exists?
77
- deposit_bag_pathname.exist?
78
- end
79
-
80
- # assumes this is called when result_array is empty, as subsequent checks will use these required files
81
- def required_bag_files_exist?
82
- REQUIRED_BAG_FILES.each do |filename|
83
- pathname = deposit_bag_pathname.join(filename)
84
- result_array << single_error_hash(REQUIRED_FILE_NOT_FOUND, file_pathname: pathname) unless pathname.exist?
85
- end
86
- result_array.empty?
87
- end
88
-
89
- def verify_version
90
- version_md_pathname = deposit_bag_pathname.join(VERSION_METADATA_PATH)
91
- version_from_file = last_version_id_from_version_md_xml(version_md_pathname)
92
- verify_version_from_xml_file(version_md_pathname, version_from_file) if version_from_file
93
-
94
- version_additions_pathname = deposit_bag_pathname.join(VERSION_ADDITIONS_BASENAME)
95
- version_from_file = version_id_from_version_manifest_xml(version_additions_pathname)
96
- verify_version_from_xml_file(version_additions_pathname, version_from_file) if version_from_file
97
-
98
- version_inventory_pathname = deposit_bag_pathname.join(VERSION_INVENTORY_BASENAME)
99
- version_from_file = version_id_from_version_manifest_xml(version_inventory_pathname)
100
- verify_version_from_xml_file(version_inventory_pathname, version_from_file) if version_from_file
101
- end
102
-
103
- def last_version_id_from_version_md_xml(version_md_pathname)
104
- last_version_id_from_xml(version_md_pathname, '/versionMetadata/version/@versionId')
105
- end
106
-
107
- def version_id_from_version_manifest_xml(version_manifest_xml_pathname)
108
- last_version_id_from_xml(version_manifest_xml_pathname, '/fileInventory/@versionId')
109
- end
110
-
111
- def last_version_id_from_xml(pathname, xpath)
112
- doc = Nokogiri::XML(File.open(pathname.to_s), &:strict)
113
- version_id = doc.xpath(xpath).last.text unless doc.xpath(xpath).empty?
114
- return version_id.to_i if version_id
115
- err_data = {
116
- version_file: pathname,
117
- xpath: xpath
118
- }
119
- result_array << single_error_hash(VERSION_MISSING_FROM_FILE, err_data) unless version_id
120
- nil
121
- rescue StandardError => e
122
- err_data = {
123
- file_pathname: pathname,
124
- err_info: "#{e}\n#{e.backtrace}"
125
- }
126
- result_array << single_error_hash(INVALID_VERSION_XXX_XML, err_data)
127
- nil
128
- end
129
-
130
- def verify_version_from_xml_file(file_pathname, found)
131
- return if found == expected_new_version
132
- err_data = {
133
- file_pathname: file_pathname,
134
- new_version: expected_new_version,
135
- file_version: found
136
- }
137
- result_array << single_error_hash(VERSION_MISMATCH_TO_MOAB, err_data)
138
- end
139
-
140
- # adds to result_array if tagmanifest checksums don't match generated checksums
141
- def verify_tagmanifests
142
- tagmanifests_checksums_hash = checksums_hash_from_manifest_files(TAGMANIFEST)
143
- types_to_generate = checksum_types_from_manifest_checksums_hash(tagmanifests_checksums_hash)
144
- generated_checksums_hash = generate_tagmanifest_checksums_hash(types_to_generate)
145
- verify_manifest_checksums(TAGMANIFEST, tagmanifests_checksums_hash, generated_checksums_hash)
146
- end
147
-
148
- # adds to result_array if manifest checksums don't match generated checksums
149
- def verify_payload_manifests
150
- manifests_checksums_hash = checksums_hash_from_manifest_files(MANIFEST)
151
- types_to_generate = checksum_types_from_manifest_checksums_hash(manifests_checksums_hash)
152
- generated_checksums_hash = generate_payload_checksums(types_to_generate)
153
- verify_manifest_checksums(MANIFEST, manifests_checksums_hash, generated_checksums_hash)
154
- end
155
-
156
- # construct hash based on manifest_type-alg.txt files in bag home dir
157
- # key: file_name, relative to base_path, value: hash of checksum alg => checksum value
158
- def checksums_hash_from_manifest_files(manifest_type)
159
- checksums_hash = {}
160
- deposit_bag_pathname.children.each do |child_pathname|
161
- if child_pathname.file?
162
- child_fname = child_pathname.basename.to_s
163
- match_result = child_fname.match("^#{manifest_type}-(.*).txt")
164
- if match_result
165
- checksum_type = match_result.captures.first.to_sym
166
- if RECOGNIZED_CHECKSUM_ALGORITHMS.include?(checksum_type)
167
- child_pathname.readlines.each do |line|
168
- line.chomp!.strip!
169
- checksum, file_name = line.split(/[\s*]+/, 2)
170
- file_checksums = checksums_hash[file_name] || {}
171
- file_checksums[checksum_type] = checksum
172
- checksums_hash[file_name] = file_checksums
173
- end
174
- else
175
- result_array << single_error_hash(CHECKSUM_TYPE_UNRECOGNIZED, checksum_type: checksum_type, filename: child_pathname)
176
- end
177
- end
178
- end
179
- end
180
- checksums_hash
181
- end
182
-
183
- # generate hash of checksums by file name for bag home dir files
184
- def generate_tagmanifest_checksums_hash(types_to_generate)
185
- # all names in the bag home dir except those starting with 'tagmanifest'
186
- home_dir_pathnames = deposit_bag_pathname.children.reject { |file| file.basename.to_s.start_with?(TAGMANIFEST) }
187
- hash_with_full_pathnames = generate_checksums_hash(home_dir_pathnames, types_to_generate)
188
- # return hash keys as basenames only
189
- hash_with_full_pathnames.map { |k, v| [Pathname.new(k).basename.to_s, v] }.to_h
190
- end
191
-
192
- # generate hash of checksums by file name for bag data dir files
193
- def generate_payload_checksums(types_to_generate)
194
- data_pathnames = deposit_bag_pathname.join(DATA_DIR_BASENAME).find
195
- hash_with_full_pathnames = generate_checksums_hash(data_pathnames, types_to_generate)
196
- # return hash keys beginning with 'data/'
197
- hash_with_full_pathnames.map { |k, v| [Pathname.new(k).relative_path_from(deposit_bag_pathname).to_s, v] }.to_h
198
- end
199
-
200
- def generate_checksums_hash(pathnames, types_to_generate)
201
- file_checksums_hash = {}
202
- pathnames.each do |pathname|
203
- file_checksums_hash[pathname.to_s] = generated_checksums(pathname, types_to_generate) if pathname.file?
204
- end
205
- file_checksums_hash
206
- end
207
-
208
- def generated_checksums(pathname, types_to_generate)
209
- my_digester_hash = digester_hash(types_to_generate)
210
- pathname.open('r') do |stream|
211
- while (buffer = stream.read(8192))
212
- my_digester_hash.each_value { |digest| digest.update(buffer) }
213
- end
214
- end
215
- file_checksums = {}
216
- my_digester_hash.each do |checksum_type, digest|
217
- file_checksums[checksum_type] = digest.hexdigest
218
- end
219
- file_checksums
220
- end
221
-
222
- def digester_hash(types_to_generate = DEFAULT_CHECKSUM_TYPES)
223
- types_to_generate.each_with_object({}) do |checksum_type, digester_hash|
224
- case checksum_type
225
- when :md5
226
- digester_hash[checksum_type] = Digest::MD5.new
227
- when :sha1
228
- digester_hash[checksum_type] = Digest::SHA1.new
229
- when :sha256
230
- digester_hash[checksum_type] = Digest::SHA2.new(256)
231
- when :sha384
232
- digesters[checksum_type] = Digest::SHA2.new(384)
233
- when :sha512
234
- digesters[checksum_type] = Digest::SHA2.new(512)
235
- else
236
- result_array << single_error_hash(CHECKSUM_TYPE_UNRECOGNIZED, checksum_type: checksum_type, filename: nil)
237
- end
238
- digester_hash
239
- end
240
- end
241
-
242
- def verify_manifest_checksums(manifest_type, manifests_checksum_hash, generated_checksum_hash)
243
- diff_hash = {}
244
- # NOTE: this is intentionally | instead of ||
245
- (manifests_checksum_hash.keys | generated_checksum_hash.keys).each do |file_name|
246
- manifest_checksums = manifests_checksum_hash[file_name] || {}
247
- generated_checksums = generated_checksum_hash[file_name] || {}
248
- if manifest_checksums != generated_checksums
249
- cdh = checksums_diff_hash(manifest_checksums, generated_checksums, manifest_type, 'generated')
250
- diff_hash[file_name] = cdh if cdh
251
- end
252
- end
253
- return if diff_hash.empty?
254
- err_data = {
255
- manifest_type: manifest_type,
256
- diffs: diff_hash
257
- }
258
- result_array << single_error_hash(CHECKSUM_MISMATCH, err_data)
259
- end
260
-
261
- def checksums_diff_hash(left_checksums, right_checksums, left_label, right_label)
262
- diff_hash = {}
263
- # NOTE: these are intentionally & and | instead of && and ||
264
- checksum_types_to_compare = (left_checksums.keys & right_checksums.keys)
265
- checksum_types_to_compare = (left_checksums.keys | right_checksums.keys) if checksum_types_to_compare.empty?
266
- checksum_types_to_compare.each do |type|
267
- left_checksum = left_checksums[type]
268
- right_checksum = right_checksums[type]
269
- if left_checksum != right_checksum
270
- diff_hash[type] = { left_label => left_checksum, right_label => right_checksum }
271
- end
272
- end
273
- diff_hash.empty? ? nil : diff_hash
274
- end
275
-
276
- def verify_payload_size
277
- sizes_from_bag_info_file = bag_info_payload_size
278
- generated_sizes = generated_payload_size
279
- return if sizes_from_bag_info_file == generated_sizes
280
- err_data = {
281
- bag_info_sizes: sizes_from_bag_info_file,
282
- generated_sizes: generated_sizes
283
- }
284
- result_array << single_error_hash(PAYLOAD_SIZE_MISMATCH, err_data)
285
- end
286
-
287
- def bag_info_payload_size
288
- bag_info_txt_pathname = deposit_bag_pathname.join(BAG_INFO_TXT_BASENAME)
289
- bag_info_txt_pathname.readlines.each do |line|
290
- line.chomp!.strip!
291
- key, value = line.split(':', 2)
292
- if key.strip == 'Payload-Oxum'
293
- num_bytes, num_files = value.strip.split('.') if value
294
- return { bytes: num_bytes.to_i, files: num_files.to_i }
295
- end
296
- end
297
- end
298
-
299
- def generated_payload_size
300
- payload_pathname = deposit_bag_pathname.join(DATA_DIR_BASENAME)
301
- payload_pathname.find.select(&:file?).each_with_object(bytes: 0, files: 0) do |file, hash|
302
- hash[:bytes] += file.size
303
- hash[:files] += 1
304
- hash
305
- end
306
- end
307
-
308
- # checksums_hash: { fname => {:md5=>"xxx", :sha1=>"yyy"}, fname => ... }
309
- def checksum_types_from_manifest_checksums_hash(checksums_hash)
310
- types = []
311
- checksums_hash.each_value { |v| v.each_key { |k| types << k unless types.include?(k) } }
312
- types
313
- end
314
-
315
- def single_error_hash(error_code, err_data_hash)
316
- { error_code => error_code_msg(error_code, err_data_hash) }
317
- end
318
-
319
- def error_code_msg(error_code, err_data_hash)
320
- ERROR_CODE_TO_MESSAGES[error_code] % err_data_hash
321
- end
322
- end
323
- end
@@ -1,32 +0,0 @@
1
- module Moab
2
- # The descriptive information about a digital object's collection of versions
3
- #
4
- # ====Data Model
5
- # * <b>{VersionMetadata} = descriptive information about a digital object's versions</b>
6
- # * {VersionMetadataEntry} [1..*] = attributes of a digital object version
7
- # * {VersionMetadataEvent} [1..*] = object version lifecycle events with timestamps
8
- #
9
- # @example {include:file:spec/fixtures/data/jq937jp0017/v3/metadata/versionMetadata.xml}
10
- # @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
11
- # All rights reserved. See {file:LICENSE.rdoc} for details.
12
- class VersionMetadata < Serializer::Manifest
13
- include HappyMapper
14
-
15
- # The name of the XML element used to serialize this objects data
16
- tag 'versionMetadata'
17
-
18
- # (see Serializable#initialize)
19
- def initialize(opts = {})
20
- @versions = []
21
- super(opts)
22
- end
23
-
24
- # @attribute
25
- # @return [String] The digital object identifier
26
- attribute :digital_object_id, String, :tag => 'objectId'
27
-
28
- # @attribute
29
- # @return [Array<VersionMetadataEntry>] An array of version metadata entries, one per version
30
- has_many :versions, VersionMetadataEntry, :tag => 'version'
31
- end
32
- end
@@ -1,40 +0,0 @@
1
- module Moab
2
- # A container element to record object version lifecycle events with timestamps
3
- #
4
- # ====Data Model
5
- # * {VersionMetadata} = descriptive information about a digital object's versions
6
- # * {VersionMetadataEntry} [1..*] = attributes of a digital object version
7
- # * <b>{VersionMetadataEvent} [1..*] = object version lifecycle events with timestamps</b>
8
- #
9
- # @see VersionMetadata
10
- # @see VersionMetadataEntry
11
- # @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
12
- # All rights reserved. See {file:LICENSE.rdoc} for details.
13
- class VersionMetadataEvent < Serializer::Serializable
14
- include HappyMapper
15
-
16
- # The name of the XML element used to serialize this objects data
17
- tag 'event'
18
-
19
- # (see Serializable#initialize)
20
- def initialize(opts = {})
21
- super(opts)
22
- end
23
-
24
- # @attribute
25
- # @return [String] The type of event
26
- attribute :type, String
27
-
28
- # @attribute
29
- # @return [String] The date and time of an event
30
- attribute :datetime, String
31
-
32
- def datetime=(event_datetime)
33
- @datetime = Moab::UtcTime.input(event_datetime)
34
- end
35
-
36
- def datetime
37
- Moab::UtcTime.output(@datetime)
38
- end
39
- end
40
- end
@@ -1,28 +0,0 @@
1
- module Stanford
2
- # Utility Class for extracting content or other information from a Fedora Instance
3
- #
4
- # ====Data Model
5
- # * {DorMetadata} = utility methods for interfacing with Stanford metadata files (esp contentMetadata)
6
- # * {ContentInventory} [1..1] = utilities for transforming contentMetadata to versionInventory and doing comparisons
7
- # * <b>{ActiveFedoraObject} [1..*] = utility for extracting content or other information from a Fedora Instance</b>
8
- #
9
- # @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
10
- # All rights reserved. See {file:LICENSE.rdoc} for details.
11
- class ActiveFedoraObject
12
- # @param fedora_object [Object] The Active Fedora representation of the Fedora Object
13
- # @return [Stanford::ActiveFedoraObject] Create a u
14
- def initialize(fedora_object)
15
- @fedora_object = fedora_object
16
- end
17
-
18
- # @return [Object] The Active Fedora representation of the Fedora Object
19
- attr_accessor :fedora_object
20
-
21
- # @api external
22
- # @param ds_id [String] The datastream identifier
23
- # @return [String] The content of the specified datastream
24
- def get_datastream_content(ds_id)
25
- @fedora_object.datastreams[ds_id].content
26
- end
27
- end
28
- end
@@ -1,41 +0,0 @@
1
- module Stanford
2
- # Stanford-specific utility methods for interfacing with DOR metadata files
3
- #
4
- # ====Data Model
5
- # * <b>{DorMetadata} = utility methods for interfacing with Stanford metadata files (esp contentMetadata)</b>
6
- # * {ContentInventory} [1..1] = utilities for transforming contentMetadata to versionInventory and doing comparisons
7
- # * {ActiveFedoraObject} [1..*] = utility for extracting content or other information from a Fedora Instance
8
- #
9
- # @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
10
- # All rights reserved. See {file:LICENSE.rdoc} for details.
11
- class DorMetadata
12
- # @return [String] The digital object identifier (druid)
13
- attr_accessor :digital_object_id
14
-
15
- # @return [Integer] \@versionId = The ordinal version number
16
- attr_accessor :version_id
17
-
18
- # @param digital_object_id [String] The digital object identifier
19
- # @param version_id [Integer] The ordinal version number
20
- # @return [Stanford::DorMetadata]
21
- def initialize(digital_object_id, version_id = nil)
22
- @digital_object_id = digital_object_id
23
- @version_id = version_id
24
- end
25
-
26
- # @api internal
27
- # @param directory [String] The location of the directory to be inventoried
28
- # @param version_id (see #initialize)
29
- # @return [FileInventory] Inventory of the files under the specified directory
30
- def inventory_from_directory(directory, version_id = nil)
31
- version_id ||= @version_id
32
- version_inventory = Moab::FileInventory.new(type: 'version', digital_object_id: @digital_object_id, version_id: version_id)
33
- content_metadata = IO.read(File.join(directory, 'contentMetadata.xml'))
34
- content_group = Stanford::ContentInventory.new.group_from_cm(content_metadata, 'preserve')
35
- version_inventory.groups << content_group
36
- metadata_group = Moab::FileGroup.new(:group_id => 'metadata').group_from_directory(directory)
37
- version_inventory.groups << metadata_group
38
- version_inventory
39
- end
40
- end
41
- end
@@ -1,36 +0,0 @@
1
- require 'find'
2
-
3
- module Stanford
4
- ##
5
- # methods for dealing with a directory which stores Moab objects
6
- class MoabStorageDirectory
7
- DRUID_TREE_REGEXP = '[[:lower:]]{2}/\\d{3}/[[:lower:]]{2}/\\d{4}'.freeze
8
- DRUID_REGEXP = '[[:lower:]]{2}\\d{3}[[:lower:]]{2}\\d{4}'.freeze
9
-
10
- def self.find_moab_paths(storage_dir)
11
- Find.find(storage_dir) do |path|
12
- Find.prune unless File.directory?(path) # don't bother with a matching on files, we only care about directories
13
- path_match_data = storage_dir_regexp(storage_dir).match(path)
14
- if path_match_data
15
- yield path_match_data[1], path, path_match_data # yield the druid, the full path, and the MatchData object
16
- Find.prune # we don't care about what's in the moab dir, we just want the paths that look like moabs
17
- end
18
- end
19
- end
20
-
21
- def self.list_moab_druids(storage_dir)
22
- druids = []
23
- find_moab_paths(storage_dir) { |druid, _path, _path_match_data| druids << druid }
24
- druids
25
- end
26
-
27
- private_class_method def self.storage_dir_regexps
28
- @storage_dir_regexps ||= {}
29
- end
30
-
31
- # this regexp caching makes things many times faster (e.g. went from ~2200 s to crawl disk11, down to ~300 s)
32
- private_class_method def self.storage_dir_regexp(storage_dir)
33
- storage_dir_regexps[storage_dir] ||= Regexp.new("^#{storage_dir}/#{DRUID_TREE_REGEXP}/(#{DRUID_REGEXP})$")
34
- end
35
- end
36
- end