moab-versioning 4.4.2 → 5.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4b576c73187205ae4548c6460bac2b68cedab5dc7c3b94a66a5d8ec13729c6ae
4
- data.tar.gz: e714a158d2e1388dec94482c1fa858a2ee29a6eca6e0f9004658446801088476
3
+ metadata.gz: 6e9810b925a8e475771162b8f367bea2555e1a9e663993040f84256d1897a1bc
4
+ data.tar.gz: b329ea89a3b4ba5705cb63a5201d35125772d2dc872942b0df7cf62fc3c260c6
5
5
  SHA512:
6
- metadata.gz: 251925f9e4c45f20f6ae4270fc73adfe9f4645689bda08ebd42c2b707909177ab575e0427c83e321c2ebc039c735f9bc141b29037f28cb65249c25bd9e76d520
7
- data.tar.gz: '053391d53a3a3a46bbfedce8e7a04ea0f28a7f5c43689c2bde3c1901f9303eae7356d507904ec1892d9da61f2d4c9c6e58ed3fec894985e19b9ab85dd5fa65fd'
6
+ metadata.gz: 399fdfdef956fbec356495e4346b7a4114c921bf84df1f24bb1595795d045c6ef2ad5618b148c53364375ef7178fd99ac1be57a9aac4c9a2e184334387031d57
7
+ data.tar.gz: 29610e9c9a72e57ca5340e5685d6582897bda53f79904bdeccc0bc62f485827eb72eff4afb86ca2258bce47052f56f204c8ec987b94bef55579002fa045caa3c
@@ -100,10 +100,10 @@ module Moab
100
100
  # @param (see #compare)
101
101
  # @return [String] Returns either the common digitial object ID, or a concatenation of both inventory's IDs
102
102
  def common_object_id(basis_inventory, other_inventory)
103
- if basis_inventory.digital_object_id != other_inventory.digital_object_id
104
- "#{basis_inventory.digital_object_id}|#{other_inventory.digital_object_id}"
105
- else
103
+ if basis_inventory.digital_object_id == other_inventory.digital_object_id
106
104
  basis_inventory.digital_object_id.to_s
105
+ else
106
+ "#{basis_inventory.digital_object_id}|#{other_inventory.digital_object_id}"
107
107
  end
108
108
  end
109
109
 
@@ -77,7 +77,7 @@ module Moab
77
77
  def self.from_file(pathname, algos_to_use = active_algos)
78
78
  raise(MoabRuntimeError, 'Unrecognized algorithm requested') unless algos_to_use.all? { |a| KNOWN_ALGOS.include?(a) }
79
79
 
80
- signatures = algos_to_use.map { |k| [k, KNOWN_ALGOS[k].call] }.to_h
80
+ signatures = algos_to_use.to_h { |k| [k, KNOWN_ALGOS[k].call] }
81
81
 
82
82
  pathname.open("r") do |stream|
83
83
  while (buffer = stream.read(8192))
@@ -84,7 +84,7 @@ module Moab
84
84
 
85
85
  def block_count
86
86
  block_size = 1024
87
- entries.inject(0) { |sum, entry| sum + (entry.signature.size.to_i + block_size - 1) / block_size }
87
+ entries.inject(0) { |sum, entry| sum + ((entry.signature.size.to_i + block_size - 1) / block_size) }
88
88
  end
89
89
 
90
90
  # @return [Array<String>] The data fields to include in summary reports
data/lib/moab/stanford.rb CHANGED
@@ -2,20 +2,10 @@
2
2
 
3
3
  require 'moab'
4
4
  require 'stanford/content_inventory'
5
- require 'stanford/dor_metadata'
6
5
  require 'stanford/storage_repository'
7
6
  require 'stanford/storage_services'
8
- require 'stanford/active_fedora_object'
9
- require 'stanford/moab_storage_directory'
10
7
  require 'stanford/storage_object_validator'
11
8
 
12
9
  # Stanford is a module that isolates classes specific to the Stanford Digital Repository
13
- #
14
- # ====Data Model
15
- # * <b>{DorMetadata} = utility methods for interfacing with Stanford metadata files (esp contentMetadata)</b>
16
- # * {ActiveFedoraObject} [1..*] = utility for extracting content or other information from a Fedora Instance
17
- #
18
- # @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
19
- # All rights reserved. See {file:LICENSE.rdoc} for details.
20
10
  module Stanford
21
11
  end
@@ -57,9 +57,10 @@ module Moab
57
57
 
58
58
  # @api external
59
59
  # @param bag_dir [Pathname,String] The location of the bag to be ingested
60
+ # @param use_links [Boolean] If true, use hard links; if false, make copies
60
61
  # @return [void] Ingest a new object version contained in a bag into this objects storage area
61
62
  # @example {include:file:spec/features/storage/ingest_spec.rb}
62
- def ingest_bag(bag_dir = deposit_bag_pathname)
63
+ def ingest_bag(bag_dir = deposit_bag_pathname, use_links: true)
63
64
  bag_dir = Pathname(bag_dir)
64
65
  current_version = StorageObjectVersion.new(self, current_version_id)
65
66
  current_inventory = current_version.file_inventory('version')
@@ -70,7 +71,7 @@ module Moab
70
71
  new_inventory = versionize_bag(bag_dir, current_version, new_version)
71
72
  end
72
73
  validate_new_inventory(new_inventory)
73
- new_version.ingest_bag_data(bag_dir)
74
+ new_version.ingest_bag_data(bag_dir, use_links: use_links)
74
75
  new_version.update_catalog(current_version.signature_catalog, new_inventory)
75
76
  new_version.generate_differences_report(current_inventory, new_inventory)
76
77
  new_version.generate_manifest_inventory
@@ -137,15 +137,16 @@ module Moab
137
137
 
138
138
  # @api internal
139
139
  # @param bag_dir [Pathname,String] The location of the bag to be ingested
140
+ # @param use_links [Boolean] If true, use hard links; if false, make copies
140
141
  # @return [void] Create the version subdirectory and move files into it
141
- def ingest_bag_data(bag_dir)
142
+ def ingest_bag_data(bag_dir, use_links: true)
142
143
  raise(MoabRuntimeError, "Version already exists: #{@version_pathname}") if @version_pathname.exist?
143
144
 
144
145
  @version_pathname.join('manifests').mkpath
145
146
  bag_dir = Pathname(bag_dir)
146
- ingest_dir(bag_dir.join('data'), @version_pathname.join('data'))
147
- ingest_file(bag_dir.join(FileInventory.xml_filename('version')), @version_pathname.join('manifests'))
148
- ingest_file(bag_dir.join(FileInventory.xml_filename('additions')), @version_pathname.join('manifests'))
147
+ ingest_dir(bag_dir.join('data'), @version_pathname.join('data'), use_links)
148
+ ingest_file(bag_dir.join(FileInventory.xml_filename('version')), @version_pathname.join('manifests'), use_links)
149
+ ingest_file(bag_dir.join(FileInventory.xml_filename('additions')), @version_pathname.join('manifests'), use_links)
149
150
  end
150
151
 
151
152
  # @api internal
@@ -286,10 +287,10 @@ module Moab
286
287
  file.instances.each do |instance|
287
288
  relative_path = File.join(group.group_id, instance.path)
288
289
  catalog_entry = signature_catalog.signature_hash[file.signature]
289
- if !catalog_entry.nil?
290
- found += 1
291
- else
290
+ if catalog_entry.nil?
292
291
  missing << relative_path.to_s
292
+ else
293
+ found += 1
293
294
  end
294
295
  end
295
296
  end
@@ -83,12 +83,6 @@ module Moab
83
83
  @@repository.storage_object(object_id).current_version_id
84
84
  end
85
85
 
86
- # @param [String] object_id The digital object identifier of the object
87
- # @return [Pathname] Pathname object containing the full path for the specified file
88
- def self.version_metadata(object_id)
89
- retrieve_file('metadata', 'versionMetadata.xml', object_id)
90
- end
91
-
92
86
  # @param [String] object_id The digital object identifier of the object
93
87
  # @param [Integer] version_id The ID of the version, if nil use latest version
94
88
  # @return [FileInventory] the file inventory for the specified object version
data/lib/moab/utc_time.rb CHANGED
@@ -7,9 +7,7 @@ module Moab
7
7
  # @return [void] Convert input datetime to a Time object, or nil if input is empty.
8
8
  def self.input(datetime)
9
9
  case datetime
10
- when nil
11
- nil
12
- when ""
10
+ when nil, ""
13
11
  nil
14
12
  when String
15
13
  Time.parse(datetime)
@@ -66,7 +66,7 @@ module Moab
66
66
  # @param level [Integer] Used to increment the depth of recursion
67
67
  # @return [Hash] The verification result of subentities serialized to a hash
68
68
  def subentities_to_hash(verbose, level)
69
- subentities.map { |s| [s.entity, s.to_hash(verbose, level + 1)] }.to_h
69
+ subentities.to_h { |s| [s.entity, s.to_hash(verbose, level + 1)] }
70
70
  end
71
71
  end
72
72
  end
data/lib/moab.rb CHANGED
@@ -20,10 +20,6 @@
20
20
  # * {FileInstanceDifference} [1..*] = contains difference information at the file level
21
21
  # * {FileSignature} [1..2] = contains the file signature(s) of two file instances being compared
22
22
  #
23
- # * <b>{VersionMetadata} = descriptive information about a digital object's versions</b>
24
- # * {VersionMetadataEntry} [1..*] = attributes of a digital object version
25
- # * {VersionMetadataEvent} [1..*] = object version lifecycle events with timestamps
26
- #
27
23
  # * <b>{StorageObject} = represents a digital object's repository storage location and ingest/dissemination methods</b>
28
24
  # * {StorageObjectVersion} [1..*] = represents a version subdirectory within an object's home directory
29
25
  # * {Bagger} [1] = utility for creating bagit packages for ingest or dissemination
@@ -48,9 +44,6 @@ require 'moab/file_instance_difference'
48
44
  require 'moab/file_group_difference_subset'
49
45
  require 'moab/file_group_difference'
50
46
  require 'moab/file_inventory_difference'
51
- require 'moab/version_metadata_event'
52
- require 'moab/version_metadata_entry'
53
- require 'moab/version_metadata'
54
47
  require 'moab/bagger'
55
48
  require 'moab/storage_object'
56
49
  require 'moab/storage_object_version'
@@ -59,4 +52,3 @@ require 'moab/storage_services'
59
52
  require 'moab/exceptions'
60
53
  require 'moab/verification_result'
61
54
  require 'moab/storage_object_validator'
62
- require 'moab/deposit_bag_validator'
@@ -157,7 +157,7 @@ module Serializer
157
157
  diff[k] = if left[k].is_a?(Hash) && right[k].is_a?(Hash)
158
158
  deep_diff(ltag, left[k], rtag, right[k])
159
159
  else
160
- Hash.[](ltag, left[k], rtag, right[k])
160
+ { ltag => left[k], rtag => right[k] }
161
161
  end
162
162
  end
163
163
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: moab-versioning
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.4.2
4
+ version: 5.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Darren Weber
@@ -11,7 +11,7 @@ authors:
11
11
  autorequire:
12
12
  bindir: bin
13
13
  cert_chain: []
14
- date: 2021-06-01 00:00:00.000000000 Z
14
+ date: 2022-07-07 00:00:00.000000000 Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
17
  name: druid-tools
@@ -69,20 +69,6 @@ dependencies:
69
69
  - - ">="
70
70
  - !ruby/object:Gem::Version
71
71
  version: '0'
72
- - !ruby/object:Gem::Dependency
73
- name: coveralls
74
- requirement: !ruby/object:Gem::Requirement
75
- requirements:
76
- - - ">="
77
- - !ruby/object:Gem::Version
78
- version: '0'
79
- type: :development
80
- prerelease: false
81
- version_requirements: !ruby/object:Gem::Requirement
82
- requirements:
83
- - - ">="
84
- - !ruby/object:Gem::Version
85
- version: '0'
86
72
  - !ruby/object:Gem::Dependency
87
73
  name: equivalent-xml
88
74
  requirement: !ruby/object:Gem::Requirement
@@ -167,6 +153,20 @@ dependencies:
167
153
  - - "~>"
168
154
  - !ruby/object:Gem::Version
169
155
  version: '2.1'
156
+ - !ruby/object:Gem::Dependency
157
+ name: simplecov
158
+ requirement: !ruby/object:Gem::Requirement
159
+ requirements:
160
+ - - ">="
161
+ - !ruby/object:Gem::Version
162
+ version: '0'
163
+ type: :development
164
+ prerelease: false
165
+ version_requirements: !ruby/object:Gem::Requirement
166
+ requirements:
167
+ - - ">="
168
+ - !ruby/object:Gem::Version
169
+ version: '0'
170
170
  description: Contains classes to process digital object version content and metadata
171
171
  email:
172
172
  - darren.weber@stanford.edu
@@ -177,7 +177,6 @@ files:
177
177
  - lib/moab.rb
178
178
  - lib/moab/bagger.rb
179
179
  - lib/moab/config.rb
180
- - lib/moab/deposit_bag_validator.rb
181
180
  - lib/moab/exceptions.rb
182
181
  - lib/moab/file_group.rb
183
182
  - lib/moab/file_group_difference.rb
@@ -198,23 +197,19 @@ files:
198
197
  - lib/moab/storage_services.rb
199
198
  - lib/moab/utc_time.rb
200
199
  - lib/moab/verification_result.rb
201
- - lib/moab/version_metadata.rb
202
200
  - lib/moab/version_metadata_entry.rb
203
- - lib/moab/version_metadata_event.rb
204
201
  - lib/serializer.rb
205
202
  - lib/serializer/manifest.rb
206
203
  - lib/serializer/serializable.rb
207
- - lib/stanford/active_fedora_object.rb
208
204
  - lib/stanford/content_inventory.rb
209
- - lib/stanford/dor_metadata.rb
210
- - lib/stanford/moab_storage_directory.rb
211
205
  - lib/stanford/storage_object_validator.rb
212
206
  - lib/stanford/storage_repository.rb
213
207
  - lib/stanford/storage_services.rb
214
208
  homepage: https://github.com/sul-dlss/moab-versioning
215
209
  licenses:
216
210
  - Apache-2.0
217
- metadata: {}
211
+ metadata:
212
+ rubygems_mfa_required: 'true'
218
213
  post_install_message:
219
214
  rdoc_options: []
220
215
  require_paths:
@@ -223,14 +218,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
223
218
  requirements:
224
219
  - - ">="
225
220
  - !ruby/object:Gem::Version
226
- version: '2.6'
221
+ version: '2.7'
227
222
  required_rubygems_version: !ruby/object:Gem::Requirement
228
223
  requirements:
229
224
  - - ">="
230
225
  - !ruby/object:Gem::Version
231
226
  version: '0'
232
227
  requirements: []
233
- rubygems_version: 3.1.4
228
+ rubygems_version: 3.2.32
234
229
  signing_key:
235
230
  specification_version: 4
236
231
  summary: Ruby implementation of digital object versioning toolkit used by the SULAIR
@@ -1,328 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Moab
4
- # Given a deposit bag, ensures the contents valid for becoming a StorageObjectVersion
5
- # this is a Shameless Green implementation, combining code from:
6
- # - sdr-preservation-core/lib/sdr_ingest/validate_bag <-- old preservation robots
7
- # - archive-utils/lib/bagit_bag <-- gem only used by sdr-preservation-robots
8
- # - archive-utils/lib/file_fixity
9
- # - archive-utils/lib/fixity
10
- # this code adds duplication to this gem (see github issue #119);
11
- # for example, computing checksums is done
12
- # - deposit_bag_validator
13
- # - file_signature
14
- class DepositBagValidator
15
- BAG_DIR_NOT_FOUND = :bag_dir_not_found
16
- CHECKSUM_MISMATCH = :checksum_mismatch
17
- CHECKSUM_TYPE_UNRECOGNIZED = :checksum_type_unrecognized
18
- INVALID_VERSION_XXX_XML = :invalid_versionXxx_xml
19
- PAYLOAD_SIZE_MISMATCH = :payload_size_mismatch
20
- REQUIRED_FILE_NOT_FOUND = :required_file_not_found
21
- VERSION_MISMATCH_TO_MOAB = :version_mismatch_to_moab
22
- VERSION_MISSING_FROM_FILE = :version_missing_from_file
23
-
24
- ERROR_CODE_TO_MESSAGES = {
25
- BAG_DIR_NOT_FOUND => "Deposit bag directory %{bag_dir} does not exist",
26
- CHECKSUM_MISMATCH => "Failed %{manifest_type} verification. Differences: \n%{diffs}",
27
- CHECKSUM_TYPE_UNRECOGNIZED => "Checksum type unrecognized: %{checksum_type}; file: %{filename}",
28
- INVALID_VERSION_XXX_XML => "Unable to parse %{file_pathname}: %{err_info}",
29
- PAYLOAD_SIZE_MISMATCH => "Failed payload size verification. Expected: %{bag_info_sizes}; found: %{generated_sizes}",
30
- REQUIRED_FILE_NOT_FOUND => "Deposit bag required file %{file_pathname} not found",
31
- VERSION_MISMATCH_TO_MOAB => "Version mismatch in %{file_pathname}: Moab expected %{new_version}; found %{file_version}",
32
- VERSION_MISSING_FROM_FILE => "Version xml file %{version_file} missing data at %{xpath} containing version id"
33
- }.freeze
34
-
35
- REQUIRED_MANIFEST_CHECKSUM_TYPE = 'sha256'
36
- RECOGNIZED_CHECKSUM_ALGORITHMS = %i[md5 sha1 sha256 sha384 sha512].freeze
37
-
38
- TAGMANIFEST = 'tagmanifest'
39
- MANIFEST = 'manifest'
40
- DATA_DIR_BASENAME = 'data'
41
- BAG_INFO_TXT_BASENAME = 'bag-info.txt'
42
- VERSION_ADDITIONS_BASENAME = 'versionAdditions.xml'
43
- VERSION_INVENTORY_BASENAME = 'versionInventory.xml'
44
- VERSION_METADATA_PATH = "#{DATA_DIR_BASENAME}/metadata/versionMetadata.xml"
45
-
46
- REQUIRED_BAG_FILES = [
47
- DATA_DIR_BASENAME,
48
- 'bagit.txt',
49
- BAG_INFO_TXT_BASENAME,
50
- "#{MANIFEST}-#{REQUIRED_MANIFEST_CHECKSUM_TYPE}.txt",
51
- "#{TAGMANIFEST}-#{REQUIRED_MANIFEST_CHECKSUM_TYPE}.txt",
52
- VERSION_ADDITIONS_BASENAME,
53
- VERSION_INVENTORY_BASENAME,
54
- VERSION_METADATA_PATH
55
- ].freeze
56
-
57
- attr_reader :deposit_bag_pathname, :expected_new_version, :result_array
58
-
59
- def initialize(storage_object)
60
- @deposit_bag_pathname = storage_object.deposit_bag_pathname
61
- @expected_new_version = storage_object.current_version_id + 1
62
- @result_array = []
63
- end
64
-
65
- # returns Array of tiny error hashes, allowing multiple occurrences of a single error code
66
- def validation_errors
67
- return [single_error_hash(BAG_DIR_NOT_FOUND, bag_dir: deposit_bag_pathname)] unless deposit_bag_pathname.exist?
68
- return result_array unless required_bag_files_exist?
69
-
70
- verify_version
71
- verify_tagmanifests
72
- verify_payload_size
73
- verify_payload_manifests
74
- result_array # attr that accumulates any errors encountered along the way
75
- end
76
-
77
- private
78
-
79
- def bag_dir_exists?
80
- deposit_bag_pathname.exist?
81
- end
82
-
83
- # assumes this is called when result_array is empty, as subsequent checks will use these required files
84
- def required_bag_files_exist?
85
- REQUIRED_BAG_FILES.each do |filename|
86
- pathname = deposit_bag_pathname.join(filename)
87
- result_array << single_error_hash(REQUIRED_FILE_NOT_FOUND, file_pathname: pathname) unless pathname.exist?
88
- end
89
- result_array.empty?
90
- end
91
-
92
- def verify_version
93
- version_md_pathname = deposit_bag_pathname.join(VERSION_METADATA_PATH)
94
- version_from_file = last_version_id_from_version_md_xml(version_md_pathname)
95
- verify_version_from_xml_file(version_md_pathname, version_from_file) if version_from_file
96
-
97
- version_additions_pathname = deposit_bag_pathname.join(VERSION_ADDITIONS_BASENAME)
98
- version_from_file = version_id_from_version_manifest_xml(version_additions_pathname)
99
- verify_version_from_xml_file(version_additions_pathname, version_from_file) if version_from_file
100
-
101
- version_inventory_pathname = deposit_bag_pathname.join(VERSION_INVENTORY_BASENAME)
102
- version_from_file = version_id_from_version_manifest_xml(version_inventory_pathname)
103
- verify_version_from_xml_file(version_inventory_pathname, version_from_file) if version_from_file
104
- end
105
-
106
- def last_version_id_from_version_md_xml(version_md_pathname)
107
- last_version_id_from_xml(version_md_pathname, '/versionMetadata/version/@versionId')
108
- end
109
-
110
- def version_id_from_version_manifest_xml(version_manifest_xml_pathname)
111
- last_version_id_from_xml(version_manifest_xml_pathname, '/fileInventory/@versionId')
112
- end
113
-
114
- def last_version_id_from_xml(pathname, xpath)
115
- doc = Nokogiri::XML(File.open(pathname.to_s), &:strict)
116
- version_id = doc.xpath(xpath).last.text unless doc.xpath(xpath).empty?
117
- return version_id.to_i if version_id
118
-
119
- err_data = {
120
- version_file: pathname,
121
- xpath: xpath
122
- }
123
- result_array << single_error_hash(VERSION_MISSING_FROM_FILE, err_data) unless version_id
124
- nil
125
- rescue StandardError => e
126
- err_data = {
127
- file_pathname: pathname,
128
- err_info: "#{e}\n#{e.backtrace}"
129
- }
130
- result_array << single_error_hash(INVALID_VERSION_XXX_XML, err_data)
131
- nil
132
- end
133
-
134
- def verify_version_from_xml_file(file_pathname, found)
135
- return if found == expected_new_version
136
-
137
- err_data = {
138
- file_pathname: file_pathname,
139
- new_version: expected_new_version,
140
- file_version: found
141
- }
142
- result_array << single_error_hash(VERSION_MISMATCH_TO_MOAB, err_data)
143
- end
144
-
145
- # adds to result_array if tagmanifest checksums don't match generated checksums
146
- def verify_tagmanifests
147
- tagmanifests_checksums_hash = checksums_hash_from_manifest_files(TAGMANIFEST)
148
- types_to_generate = checksum_types_from_manifest_checksums_hash(tagmanifests_checksums_hash)
149
- generated_checksums_hash = generate_tagmanifest_checksums_hash(types_to_generate)
150
- verify_manifest_checksums(TAGMANIFEST, tagmanifests_checksums_hash, generated_checksums_hash)
151
- end
152
-
153
- # adds to result_array if manifest checksums don't match generated checksums
154
- def verify_payload_manifests
155
- manifests_checksums_hash = checksums_hash_from_manifest_files(MANIFEST)
156
- types_to_generate = checksum_types_from_manifest_checksums_hash(manifests_checksums_hash)
157
- generated_checksums_hash = generate_payload_checksums(types_to_generate)
158
- verify_manifest_checksums(MANIFEST, manifests_checksums_hash, generated_checksums_hash)
159
- end
160
-
161
- # construct hash based on manifest_type-alg.txt files in bag home dir
162
- # key: file_name, relative to base_path, value: hash of checksum alg => checksum value
163
- def checksums_hash_from_manifest_files(manifest_type)
164
- checksums_hash = {}
165
- deposit_bag_pathname.children.each do |child_pathname|
166
- if child_pathname.file?
167
- child_fname = child_pathname.basename.to_s
168
- match_result = child_fname.match("^#{manifest_type}-(.*).txt")
169
- if match_result
170
- checksum_type = match_result.captures.first.to_sym
171
- if RECOGNIZED_CHECKSUM_ALGORITHMS.include?(checksum_type)
172
- child_pathname.readlines.each do |line|
173
- line.chomp!.strip!
174
- checksum, file_name = line.split(/[\s*]+/, 2)
175
- file_checksums = checksums_hash[file_name] || {}
176
- file_checksums[checksum_type] = checksum
177
- checksums_hash[file_name] = file_checksums
178
- end
179
- else
180
- result_array << single_error_hash(CHECKSUM_TYPE_UNRECOGNIZED, checksum_type: checksum_type, filename: child_pathname)
181
- end
182
- end
183
- end
184
- end
185
- checksums_hash
186
- end
187
-
188
- # generate hash of checksums by file name for bag home dir files
189
- def generate_tagmanifest_checksums_hash(types_to_generate)
190
- # all names in the bag home dir except those starting with 'tagmanifest'
191
- home_dir_pathnames = deposit_bag_pathname.children.reject { |file| file.basename.to_s.start_with?(TAGMANIFEST) }
192
- hash_with_full_pathnames = generate_checksums_hash(home_dir_pathnames, types_to_generate)
193
- # return hash keys as basenames only
194
- hash_with_full_pathnames.transform_keys { |k| Pathname.new(k).basename.to_s }
195
- end
196
-
197
- # generate hash of checksums by file name for bag data dir files
198
- def generate_payload_checksums(types_to_generate)
199
- data_pathnames = deposit_bag_pathname.join(DATA_DIR_BASENAME).find
200
- hash_with_full_pathnames = generate_checksums_hash(data_pathnames, types_to_generate)
201
- # return hash keys beginning with 'data/'
202
- hash_with_full_pathnames.transform_keys { |k| Pathname.new(k).relative_path_from(deposit_bag_pathname).to_s }
203
- end
204
-
205
- def generate_checksums_hash(pathnames, types_to_generate)
206
- file_checksums_hash = {}
207
- pathnames.each do |pathname|
208
- file_checksums_hash[pathname.to_s] = generated_checksums(pathname, types_to_generate) if pathname.file?
209
- end
210
- file_checksums_hash
211
- end
212
-
213
- def generated_checksums(pathname, types_to_generate)
214
- my_digester_hash = digester_hash(types_to_generate)
215
- pathname.open('r') do |stream|
216
- while (buffer = stream.read(8192))
217
- my_digester_hash.each_value { |digest| digest.update(buffer) }
218
- end
219
- end
220
- file_checksums = {}
221
- my_digester_hash.each do |checksum_type, digest|
222
- file_checksums[checksum_type] = digest.hexdigest
223
- end
224
- file_checksums
225
- end
226
-
227
- def digester_hash(types_to_generate = DEFAULT_CHECKSUM_TYPES)
228
- types_to_generate.each_with_object({}) do |checksum_type, digester_hash|
229
- case checksum_type
230
- when :md5
231
- digester_hash[checksum_type] = Digest::MD5.new
232
- when :sha1
233
- digester_hash[checksum_type] = Digest::SHA1.new
234
- when :sha256
235
- digester_hash[checksum_type] = Digest::SHA2.new(256)
236
- when :sha384
237
- digesters[checksum_type] = Digest::SHA2.new(384)
238
- when :sha512
239
- digesters[checksum_type] = Digest::SHA2.new(512)
240
- else
241
- result_array << single_error_hash(CHECKSUM_TYPE_UNRECOGNIZED, checksum_type: checksum_type, filename: nil)
242
- end
243
- digester_hash
244
- end
245
- end
246
-
247
- def verify_manifest_checksums(manifest_type, manifests_checksum_hash, generated_checksum_hash)
248
- diff_hash = {}
249
- # NOTE: this is intentionally | instead of ||
250
- (manifests_checksum_hash.keys | generated_checksum_hash.keys).each do |file_name|
251
- manifest_checksums = manifests_checksum_hash[file_name] || {}
252
- generated_checksums = generated_checksum_hash[file_name] || {}
253
- if manifest_checksums != generated_checksums
254
- cdh = checksums_diff_hash(manifest_checksums, generated_checksums, manifest_type, 'generated')
255
- diff_hash[file_name] = cdh if cdh
256
- end
257
- end
258
- return if diff_hash.empty?
259
-
260
- err_data = {
261
- manifest_type: manifest_type,
262
- diffs: diff_hash
263
- }
264
- result_array << single_error_hash(CHECKSUM_MISMATCH, err_data)
265
- end
266
-
267
- def checksums_diff_hash(left_checksums, right_checksums, left_label, right_label)
268
- diff_hash = {}
269
- # NOTE: these are intentionally & and | instead of && and ||
270
- checksum_types_to_compare = (left_checksums.keys & right_checksums.keys)
271
- checksum_types_to_compare = (left_checksums.keys | right_checksums.keys) if checksum_types_to_compare.empty?
272
- checksum_types_to_compare.each do |type|
273
- left_checksum = left_checksums[type]
274
- right_checksum = right_checksums[type]
275
- diff_hash[type] = { left_label => left_checksum, right_label => right_checksum } if left_checksum != right_checksum
276
- end
277
- diff_hash.empty? ? nil : diff_hash
278
- end
279
-
280
- def verify_payload_size
281
- sizes_from_bag_info_file = bag_info_payload_size
282
- generated_sizes = generated_payload_size
283
- return if sizes_from_bag_info_file == generated_sizes
284
-
285
- err_data = {
286
- bag_info_sizes: sizes_from_bag_info_file,
287
- generated_sizes: generated_sizes
288
- }
289
- result_array << single_error_hash(PAYLOAD_SIZE_MISMATCH, err_data)
290
- end
291
-
292
- def bag_info_payload_size
293
- bag_info_txt_pathname = deposit_bag_pathname.join(BAG_INFO_TXT_BASENAME)
294
- bag_info_txt_pathname.readlines.each do |line|
295
- line.chomp!.strip!
296
- key, value = line.split(':', 2)
297
- if key.strip == 'Payload-Oxum'
298
- num_bytes, num_files = value.strip.split('.') if value
299
- return { bytes: num_bytes.to_i, files: num_files.to_i }
300
- end
301
- end
302
- end
303
-
304
- def generated_payload_size
305
- payload_pathname = deposit_bag_pathname.join(DATA_DIR_BASENAME)
306
- payload_pathname.find.select(&:file?).each_with_object(bytes: 0, files: 0) do |file, hash|
307
- hash[:bytes] += file.size
308
- hash[:files] += 1
309
- hash
310
- end
311
- end
312
-
313
- # checksums_hash: { fname => {:md5=>"xxx", :sha1=>"yyy"}, fname => ... }
314
- def checksum_types_from_manifest_checksums_hash(checksums_hash)
315
- types = []
316
- checksums_hash.each_value { |v| v.each_key { |k| types << k unless types.include?(k) } }
317
- types
318
- end
319
-
320
- def single_error_hash(error_code, err_data_hash)
321
- { error_code => error_code_msg(error_code, err_data_hash) }
322
- end
323
-
324
- def error_code_msg(error_code, err_data_hash)
325
- ERROR_CODE_TO_MESSAGES[error_code] % err_data_hash
326
- end
327
- end
328
- end
@@ -1,34 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Moab
4
- # The descriptive information about a digital object's collection of versions
5
- #
6
- # ====Data Model
7
- # * <b>{VersionMetadata} = descriptive information about a digital object's versions</b>
8
- # * {VersionMetadataEntry} [1..*] = attributes of a digital object version
9
- # * {VersionMetadataEvent} [1..*] = object version lifecycle events with timestamps
10
- #
11
- # @example {include:file:spec/fixtures/data/jq937jp0017/v3/metadata/versionMetadata.xml}
12
- # @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
13
- # All rights reserved. See {file:LICENSE.rdoc} for details.
14
- class VersionMetadata < Serializer::Manifest
15
- include HappyMapper
16
-
17
- # The name of the XML element used to serialize this objects data
18
- tag 'versionMetadata'
19
-
20
- # (see Serializable#initialize)
21
- def initialize(opts = {})
22
- @versions = []
23
- super(opts)
24
- end
25
-
26
- # @attribute
27
- # @return [String] The digital object identifier
28
- attribute :digital_object_id, String, tag: 'objectId'
29
-
30
- # @attribute
31
- # @return [Array<VersionMetadataEntry>] An array of version metadata entries, one per version
32
- has_many :versions, VersionMetadataEntry, tag: 'version'
33
- end
34
- end
@@ -1,42 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Moab
4
- # A container element to record object version lifecycle events with timestamps
5
- #
6
- # ====Data Model
7
- # * {VersionMetadata} = descriptive information about a digital object's versions
8
- # * {VersionMetadataEntry} [1..*] = attributes of a digital object version
9
- # * <b>{VersionMetadataEvent} [1..*] = object version lifecycle events with timestamps</b>
10
- #
11
- # @see VersionMetadata
12
- # @see VersionMetadataEntry
13
- # @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
14
- # All rights reserved. See {file:LICENSE.rdoc} for details.
15
- class VersionMetadataEvent < Serializer::Serializable
16
- include HappyMapper
17
-
18
- # The name of the XML element used to serialize this objects data
19
- tag 'event'
20
-
21
- # (see Serializable#initialize)
22
- def initialize(opts = {})
23
- super(opts)
24
- end
25
-
26
- # @attribute
27
- # @return [String] The type of event
28
- attribute :type, String
29
-
30
- # @attribute
31
- # @return [String] The date and time of an event
32
- attribute :datetime, String
33
-
34
- def datetime=(event_datetime)
35
- @datetime = Moab::UtcTime.input(event_datetime)
36
- end
37
-
38
- def datetime
39
- Moab::UtcTime.output(@datetime)
40
- end
41
- end
42
- end
@@ -1,30 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Stanford
4
- # Utility Class for extracting content or other information from a Fedora Instance
5
- #
6
- # ====Data Model
7
- # * {DorMetadata} = utility methods for interfacing with Stanford metadata files (esp contentMetadata)
8
- # * {ContentInventory} [1..1] = utilities for transforming contentMetadata to versionInventory and doing comparisons
9
- # * <b>{ActiveFedoraObject} [1..*] = utility for extracting content or other information from a Fedora Instance</b>
10
- #
11
- # @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
12
- # All rights reserved. See {file:LICENSE.rdoc} for details.
13
- class ActiveFedoraObject
14
- # @param fedora_object [Object] The Active Fedora representation of the Fedora Object
15
- # @return [Stanford::ActiveFedoraObject] Create a u
16
- def initialize(fedora_object)
17
- @fedora_object = fedora_object
18
- end
19
-
20
- # @return [Object] The Active Fedora representation of the Fedora Object
21
- attr_accessor :fedora_object
22
-
23
- # @api external
24
- # @param ds_id [String] The datastream identifier
25
- # @return [String] The content of the specified datastream
26
- def get_datastream_content(ds_id)
27
- @fedora_object.datastreams[ds_id].content
28
- end
29
- end
30
- end
@@ -1,44 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Stanford
4
- # Stanford-specific utility methods for interfacing with DOR metadata files
5
- #
6
- # ====Data Model
7
- # * <b>{DorMetadata} = utility methods for interfacing with Stanford metadata files (esp contentMetadata)</b>
8
- # * {ContentInventory} [1..1] = utilities for transforming contentMetadata to versionInventory and doing comparisons
9
- # * {ActiveFedoraObject} [1..*] = utility for extracting content or other information from a Fedora Instance
10
- #
11
- # @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
12
- # All rights reserved. See {file:LICENSE.rdoc} for details.
13
- class DorMetadata
14
- # @return [String] The digital object identifier (druid)
15
- attr_accessor :digital_object_id
16
-
17
- # @return [Integer] \@versionId = The ordinal version number
18
- attr_accessor :version_id
19
-
20
- # @param digital_object_id [String] The digital object identifier
21
- # @param version_id [Integer] The ordinal version number
22
- # @return [Stanford::DorMetadata]
23
- def initialize(digital_object_id, version_id = nil)
24
- @digital_object_id = digital_object_id
25
- @version_id = version_id
26
- end
27
-
28
- # @api internal
29
- # @param directory [String] The location of the directory to be inventoried
30
- # @param version_id (see #initialize)
31
- # @return [FileInventory] Inventory of the files under the specified directory
32
- def inventory_from_directory(directory, version_id = nil)
33
- version_id ||= @version_id
34
- version_inventory = Moab::FileInventory.new(type: 'version', digital_object_id: @digital_object_id,
35
- version_id: version_id)
36
- content_metadata = IO.read(File.join(directory, 'contentMetadata.xml'))
37
- content_group = Stanford::ContentInventory.new.group_from_cm(content_metadata, 'preserve')
38
- version_inventory.groups << content_group
39
- metadata_group = Moab::FileGroup.new(group_id: 'metadata').group_from_directory(directory)
40
- version_inventory.groups << metadata_group
41
- version_inventory
42
- end
43
- end
44
- end
@@ -1,38 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'find'
4
-
5
- module Stanford
6
- ##
7
- # methods for dealing with a directory which stores Moab objects
8
- class MoabStorageDirectory
9
- DRUID_TREE_REGEXP = '[[:lower:]]{2}/\\d{3}/[[:lower:]]{2}/\\d{4}'
10
- DRUID_REGEXP = '[[:lower:]]{2}\\d{3}[[:lower:]]{2}\\d{4}'
11
-
12
- def self.find_moab_paths(storage_dir)
13
- Find.find(storage_dir) do |path|
14
- Find.prune unless File.directory?(path) # don't bother with a matching on files, we only care about directories
15
- path_match_data = storage_dir_regexp(storage_dir).match(path)
16
- if path_match_data
17
- yield path_match_data[1], path, path_match_data # yield the druid, the full path, and the MatchData object
18
- Find.prune # we don't care about what's in the moab dir, we just want the paths that look like moabs
19
- end
20
- end
21
- end
22
-
23
- def self.list_moab_druids(storage_dir)
24
- druids = []
25
- find_moab_paths(storage_dir) { |druid, _path, _path_match_data| druids << druid }
26
- druids
27
- end
28
-
29
- private_class_method def self.storage_dir_regexps
30
- @storage_dir_regexps ||= {}
31
- end
32
-
33
- # this regexp caching makes things many times faster (e.g. went from ~2200 s to crawl disk11, down to ~300 s)
34
- private_class_method def self.storage_dir_regexp(storage_dir)
35
- storage_dir_regexps[storage_dir] ||= Regexp.new("^#{storage_dir}/#{DRUID_TREE_REGEXP}/(#{DRUID_REGEXP})$")
36
- end
37
- end
38
- end