ocfl-tools 0.9.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,493 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OcflTools
4
+ # Class to verify that an instance of {OcflTools::OcflObject} or {OcflTools::OcflInventory} is composed of valid data and structures.
5
+ class OcflVerify # < OcflTools::OcflObject
6
+ # @return {OcflTools::OcflResults} containing check results.
7
+ attr_reader :my_results
8
+
9
+ # Create a new OCFLVerify object, using an OcflTools::Ocflobject as source.
10
+ # @param {OcflTools::OcflObject} ocfl_object an ocfl object or inventory to verify.
11
+ def initialize(ocfl_object)
12
+ @my_victim = ocfl_object
13
+ @my_results = OcflTools::OcflResults.new
14
+
15
+ # check .respond_to? first for all expected methods.
16
+ preflight
17
+ end
18
+
19
+ # @return {OcflTools::OcflResults} containing information about actions taken
20
+ # against this object.
21
+ def results
22
+ @my_results
23
+ end
24
+
25
+ # Performs all checks on the given object and reports results.
26
+ # @return {Ocfltools::OcflResults} of results.
27
+ def check_all
28
+ # Duck-typing the heck out of this, assuming @my_victim will respond to ocflobject methods.
29
+ check_id
30
+ check_type
31
+ check_head
32
+ check_fixity
33
+ check_manifest
34
+ check_versions
35
+ crosscheck_digests
36
+ check_digestAlgorithm
37
+ @my_results
38
+ end
39
+
40
+ # Checks OCFL Object for valid value in the id attribute.
41
+ # Id value MUST be present and SHOULD be a URI.
42
+ # @return {Ocfltools::OcflResults} of results.
43
+ def check_id
44
+ case @my_victim.id
45
+ when nil
46
+ @my_results.error('E202', 'check_id', 'OCFL 3.5.1 Object ID cannot be nil')
47
+ return @my_results
48
+ when 0
49
+ @my_results.error('E201', 'check_id', 'OCFL 3.5.1 Object ID cannot be 0 length')
50
+ return @my_results
51
+ when !String
52
+ @my_results.error('E201', 'check_id', 'OCFL 3.5.1 Object ID must be a string.')
53
+ return @my_results
54
+ end
55
+
56
+ if @my_victim.id =~ /\A#{URI::regexp}\z/
57
+ @my_results.ok('O200', 'check_id', 'OCFL 3.5.1 Inventory ID is OK.')
58
+ return @my_results
59
+ else
60
+ @my_results.warn('W201', 'check_id', 'OCFL 3.5.1 Inventory ID present, but does not appear to be a URI.')
61
+ return @my_results
62
+ end
63
+ end
64
+
65
+ # Checks OCFL Object for valid value in the head attribute.
66
+ # @return {Ocfltools::OcflResults} of results.
67
+ def check_head
68
+ case @my_victim.head
69
+ when nil
70
+ @my_results.error('E212', 'check_head', 'OCFL 3.5.1 @head cannot be nil')
71
+ when Integer
72
+ @my_results.error('E213', 'check_head', 'OCFL 3.5.1 @head cannot be an Integer')
73
+ when String
74
+ version = OcflTools::Utils.version_string_to_int(@my_victim.head)
75
+ target_version = @my_victim.version_id_list.max
76
+ if version == target_version
77
+ @my_results.ok('O200', 'check_head', 'OCFL 3.5.1 Inventory Head is OK.')
78
+ @my_results.info('I200', 'check_head', "OCFL 3.5.1 Inventory Head version #{version} matches highest version in versions.")
79
+ else
80
+ @my_results.error('E214', 'check_head', "OCFL 3.5.1 Inventory Head version #{version} does not match expected version #{target_version}")
81
+ end
82
+ else
83
+ # default case error
84
+ @my_results.error('E911', 'check_head', 'An unknown error has occurred.')
85
+ end
86
+ @my_results
87
+ end
88
+
89
+ # Checks OCFL Object for valid value in the type attribute.
90
+ # @return {Ocfltools::OcflResults} of results.
91
+ def check_type
92
+ case @my_victim.type
93
+ when nil
94
+ @my_results.error('E230', 'check_type', 'OCFL 3.5.1 Required OCFL key type not found.')
95
+ when 'https://ocfl.io/1.0/spec/#inventory'
96
+ @my_results.ok('O200', 'check_type', 'OCFL 3.5.1 Inventory Type is OK.')
97
+ else
98
+ @my_results.error('E231', 'check_type', 'OCFL 3.5.1 Required OCFL key type does not match expected value.')
99
+ end
100
+ @my_results
101
+ end
102
+
103
+ # Checks OCFL Object for valid value in the digestAlgorithm attribute.
104
+ # @return {Ocfltools::OcflResults} of results.
105
+ def check_digestAlgorithm
106
+ # If there's no digestAlgorithm set in the inventory, that's a showstopper.
107
+ if @my_victim.digestAlgorithm == nil
108
+ @my_results.error('E222', 'check_digestAlgorithm', "Algorithm cannot be nil")
109
+ return @my_results
110
+ end
111
+
112
+ # must be one of sha256 or sha512
113
+ if @my_victim.digestAlgorithm.downcase == 'sha256'
114
+ @my_results.ok('O200', 'check_digestAlgorithm', 'OCFL 3.5.1 Inventory Algorithm is OK.')
115
+ @my_results.info('I220', 'check_digestAlgorithm', "OCFL 3.5.1 #{@my_victim.digestAlgorithm.downcase} is a supported digest algorithm.")
116
+ @my_results.warn('W220', 'check_digestAlgorithm', "OCFL 3.5.1 #{@my_victim.digestAlgorithm.downcase} SHOULD be Sha512.")
117
+ elsif @my_victim.digestAlgorithm.downcase == 'sha512'
118
+ @my_results.ok('O200', 'check_digestAlgorithm', 'OCFL 3.5.1 Inventory Algorithm is OK.')
119
+ @my_results.info('I220', 'check_digestAlgorithm', "OCFL 3.5.1 #{@my_victim.digestAlgorithm.downcase} is a supported digest algorithm.")
120
+ else
121
+ @my_results.error('E223', 'check_digestAlgorithm', "OCFL 3.5.1 Algorithm #{@my_victim.digestAlgorithm} is not valid for OCFL use.")
122
+ end
123
+ @my_results
124
+ end
125
+
126
+ # Checks OCFL Object for a well-formed manifest block.
127
+ # @return {Ocfltools::OcflResults} of results.
128
+ def check_manifest
129
+ # Should pass digest cross_check.
130
+ # can be null if it passes cross_check? (empty inventories are valid, but warn)
131
+ # There MUST be a block called 'manifests'
132
+ errors = nil
133
+ if @my_victim.manifest.nil?
134
+ @my_results.error('E250', 'check_manifest', 'OCFL 3.5.2 there MUST be a manifest block.')
135
+ errors = true
136
+ elsif @my_victim.manifest == {}
137
+ @my_results.error('E251', 'check_manifest', 'OCFL 3.5.2 manifest block cannot be empty.')
138
+ errors = true
139
+ end
140
+
141
+ # TODO: Should check that it's a hash of digests and filepaths somehow...?
142
+ # Get digest Algo type, use that to get key length.
143
+ # check all keys in manifest to make sure they're all that length.
144
+
145
+ if errors.nil?
146
+ @my_results.ok('O200', 'check_manifest', 'OCFL 3.5.2 Inventory Manifest syntax is OK.')
147
+ end
148
+
149
+ @my_results
150
+ end
151
+
152
+ # Checks OCFL Object for a well-formed versions block.
153
+ # @return {Ocfltools::OcflResults} of results.
154
+ def check_versions
155
+ version_count = @my_victim.version_id_list.length
156
+ highest_version = @my_victim.version_id_list.max
157
+ my_versions = @my_victim.version_id_list.sort
158
+
159
+ @version_check = nil
160
+ if version_count != highest_version
161
+ @my_results.error('E014', 'check_versions', "OCFL 3.5.3 Found #{version_count} versions, but highest version is #{highest_version}")
162
+ @version_check = true
163
+ elsif version_count == highest_version
164
+ @my_results.ok('O200', 'check_versions', "OCFL 3.5.3 Found #{version_count} versions, highest version is #{highest_version}")
165
+ end
166
+ # should be contiguous version numbers starting at 1.
167
+ count = 0
168
+ until count == highest_version
169
+ # (count - 1) is a proxy for the index in @my_victim.version_id_list.sort
170
+ count += 1
171
+ if count != my_versions[count - 1]
172
+ @my_results.error('E015', 'check_versions', "OCFL 3.5.3 Expected version sequence not found. Expected version #{count}, found version #{my_versions[count]}.")
173
+ @version_check = true
174
+ end
175
+ end
176
+ # We do NOT need to check the @versions.keys here for 'v0001', etc.
177
+ # That's already been done when we looked at version_id_list and
178
+ # checked for contiguous version numbers in my_versions.
179
+
180
+ @my_victim.versions.each do |version, hash|
181
+ %w[created message user state].each do |key|
182
+ if hash.key?(key) == false
183
+ @my_results.error('E016', 'check_versions', "OCFL 3.5.3.1 version #{version} is missing #{key} block.")
184
+ @version_check = true
185
+ next
186
+ end # key is present, does it conform?
187
+
188
+ case key
189
+ when 'created'
190
+ check_version_created(hash['created'], version)
191
+ when 'user'
192
+ check_version_user(hash['user'], version)
193
+ when 'state'
194
+ check_version_state(hash['state'], version)
195
+ when 'message'
196
+ check_version_message(hash['message'], version)
197
+ else
198
+ @my_results.error('E111', 'check_versions', "OCFL 3.5.3.1 version #{version} contains unknown key #{key} block.")
199
+ @version_check = true
200
+ end
201
+ end
202
+ end
203
+
204
+ if @version_check.nil?
205
+ @my_results.ok('O200', 'check_versions', 'OCFL 3.5.3.1 version syntax is OK.')
206
+ end
207
+ @my_results
208
+ end
209
+
210
+ # Checks OCFL Object for a well-formed fixity block, if present. We do not compute fixity here; only check existence.
211
+ # @return {Ocfltools::OcflResults} of results.
212
+ def check_fixity
213
+ # If present, should have at least 1 sub-key and 1 value.
214
+ errors = nil
215
+ unless @my_victim.fixity.empty?
216
+ @my_results.info('I111', 'check_fixity', 'Fixity block is present.')
217
+ end
218
+ # Set OcflTools.config.fixity_algorithms for what to look for.
219
+ @my_victim.fixity.each do |algorithm, _digest|
220
+ unless OcflTools.config.fixity_algorithms.include? algorithm
221
+ @my_results.error('E111', 'check_fixity', "Fixity block contains unsupported algorithm #{algorithm}")
222
+ errors = true
223
+ end
224
+ end
225
+
226
+ if errors.nil? && !@my_victim.fixity.empty?
227
+ @my_results.ok('O111', 'check_fixity', 'Fixity block is present and contains valid algorithms.')
228
+ end
229
+
230
+ @my_results
231
+ end
232
+
233
+ # Checks the contents of the manifest block against the files and digests in the versions block to verify all
234
+ # files necessary to re-constitute the object at any version are correctly referenced in the OCFL Object.
235
+ # @return {Ocfltools::OcflResults} of results.
236
+ def crosscheck_digests
237
+ # requires values in @versions and @manifest.
238
+ # verifies that every digest in @versions can be found in @manifest.
239
+ errors = nil
240
+ my_checksums = []
241
+
242
+ @my_victim.versions.each do |version, block|
243
+ if !block.is_a?(Hash)
244
+ @my_results.error('E111', 'crosscheck_digests', "version #{version} block is wrong type.")
245
+ next
246
+ end
247
+ version_digests = block['state']
248
+ if !version_digests.is_a?(Hash)
249
+ @my_results.error('E111', 'crosscheck_digests', "version #{version} state block is wrong type.")
250
+ next
251
+ end
252
+ version_digests.each_key { |k| my_checksums << k }
253
+ end
254
+
255
+ unique_checksums = my_checksums.uniq
256
+
257
+ # First check; there should be the same number of entries on both sides.
258
+ if unique_checksums.length != @my_victim.manifest.length
259
+ @my_results.error('E050', 'crosscheck_digests', "OCFL 3.5.3.1 Digests missing! #{unique_checksums.length} digests in versions vs. #{@my_victim.manifest.length} digests in manifest.")
260
+ errors = true
261
+ end
262
+
263
+ # Second check; each entry in unique_checksums should have a match in @manifest.
264
+ unique_checksums.each do |checksum|
265
+ if @my_victim.manifest.member?(checksum) == false
266
+ @my_results.error('E051', 'crosscheck_digests', "OCFL 3.5.3.1 Checksum #{checksum} not found in manifest!")
267
+ errors = true
268
+ end
269
+ end
270
+
271
+ if errors.nil?
272
+ @my_results.ok('O200', 'crosscheck_digests', 'OCFL 3.5.3.1 Digests are OK.')
273
+ end
274
+ @my_results
275
+ end
276
+
277
+ # Verifies that the object passed to this class at instantiation responds to the expected
278
+ # methods and attributes. Raises an exception on failure.
279
+ # @return [Boolean] true
280
+ def preflight
281
+ # check for expected instance_variables with .instance_variable_defined?(@some_var)
282
+ ['@id', '@head', '@type', '@digestAlgorithm', '@contentDirectory', '@manifest', '@versions', '@fixity'].each do |var|
283
+ unless @my_victim.instance_variable_defined?(var)
284
+ raise "Object does not have instance var #{var} defined"
285
+ end
286
+ end
287
+
288
+ # check for all methods we need to validate OCFL structure
289
+ %w[get_files get_current_files get_state version_id_list get_digest].each do |mthd|
290
+ unless @my_victim.respond_to?(mthd)
291
+ raise "Object does not respond to #{mthd}"
292
+ end
293
+ end
294
+ end
295
+
296
+ private
297
+
298
+ def check_version_message(value, version)
299
+ # version.message must be a String.
300
+ if !value.is_a?(String)
301
+ @my_results.error('E111', 'check_version', "Value in version #{version} message block is wrong type.")
302
+ @version_check = true
303
+ return # No point in processing further.
304
+ end
305
+ # version.message is valid!
306
+ end
307
+
308
+ # 'user'.'name' must contain a string value.
309
+ # 'user'.'address' should contain value
310
+ def check_version_user(value, version)
311
+ # 'user' must be a hash.
312
+ if !value.is_a?(Hash)
313
+ @my_results.error('E111', 'check_version', "Value in version #{version} user block is wrong type.")
314
+ @version_check = true
315
+ return # No point in processing further.
316
+ end
317
+
318
+ # 'user' must contain 'name'
319
+ # 'user' must contain 'address'
320
+ value.each do |user_key, user_value|
321
+ case user_key
322
+ when 'name'
323
+ # user_name must be String.
324
+ if !user_value.is_a?(String)
325
+ @my_results.error('E111', 'check_version', "Value in version #{version} user name block is not a String.")
326
+ @version_check = true
327
+ next
328
+ end
329
+ # user_name must have content.
330
+ if user_value.empty?
331
+ @my_results.error('E111', 'check_version', "Value in version #{version} user name block cannot be empty.")
332
+ @version_check = true
333
+ end
334
+ # user.name is valid!
335
+ when 'address'
336
+ # user_address must be String.
337
+ if !user_value.is_a?(String)
338
+ @my_results.error('E111', 'check_version', "Value in version #{version} user address block is not a String.")
339
+ @version_check = true
340
+ next
341
+ end
342
+ # user_address SHOULD have content.
343
+ if user_value.empty?
344
+ @my_results.warn('W111', 'check_version', "Value in version #{version} user address block SHOULD NOT be empty.")
345
+ next
346
+ end
347
+ # user.address should be either mailto: or URI.
348
+ if check_for_mailto(user_value) == true
349
+ next # It's a mailto:, we don't need to process further.
350
+ end
351
+
352
+ if check_for_uri(user_value) == true
353
+ next # It's a URI, don't need to process further.
354
+ end
355
+ # If we get to here, it wasn't a mailto or a URI.
356
+ @my_results.error('E111', 'check_version', "Value in #{version} #{user_value} is not a valid URI or mailto: format.")
357
+ @version_check = true
358
+
359
+ else # unexpected value in user block.
360
+ @my_results.error('E111', 'check_version', "Unexpected value in version #{version} user block #{user_key}.")
361
+ @version_check = true
362
+ end
363
+
364
+ end
365
+ # user block is valid!
366
+ end
367
+
368
+ # used by user.address validation. RFC6068.
369
+ def check_for_mailto(value)
370
+ if value =~ /^mailto:*/
371
+ value.slice!('mailto:')
372
+ return value.match?(URI::MailTo::EMAIL_REGEXP) # returns true if it's an email.
373
+ else
374
+ return value.match?(URI::MailTo::EMAIL_REGEXP) # Is it still an email?
375
+ end
376
+ end
377
+
378
+ # used by check_id and user.address validation. RFC3986.
379
+ def check_for_uri(value)
380
+ if value =~ /\A#{URI::regexp}\z/
381
+ return true # emits OK result.
382
+ else
383
+ # if it doesn't pass the check, it's a problem.
384
+ return false
385
+ end
386
+ end
387
+
388
+ # 'state' must be a hash.
389
+ # 'state' must contain at least 1 key/value pair
390
+ def check_version_state(value, version)
391
+ if !value.is_a?(Hash)
392
+ @my_results.error('E111', 'check_version', "Value in version #{version} state block is wrong type.")
393
+ @version_check = true
394
+ return # No point in processing further.
395
+ end
396
+ # State hash must have content.
397
+ if value.empty?
398
+ @my_results.error('E111', 'check_version', "Version #{version} state block is empty.")
399
+ @version_check = true
400
+ return # No point in processing further.
401
+ end
402
+
403
+ # Now that we have a prima facie valid state block, check for logical path.
404
+ value.each do | digest, logical_path |
405
+ # logical_path must be an Array.
406
+ if !logical_path.is_a?(Array)
407
+ @my_results.error('E260', 'check_version', "OCFL 3.5.3.1 logical path syntax error: Version #{version} state block key #{digest} contains a value that is not an array.")
408
+ @version_check = true
409
+ next # No point in processing this digest key further.
410
+ end
411
+
412
+ # Now for each value in this array, it's a String and must conform to logical_path content restrictions.
413
+ logical_path.each do | content |
414
+ # Must be a String (and not an Array or Hash)
415
+ if !content.is_a?(String)
416
+ @my_results.error('E260', 'check_version', "OCFL 3.5.3.1 logical path syntax error: Value in version #{version} state block key #{digest} contains an array value that is not a String.")
417
+ @version_check = true
418
+ next # No point in processing this digest key further.
419
+ end
420
+
421
+ logical_path_result = check_logical_path(content)
422
+
423
+ if logical_path_result.size == 0
424
+ next # all is well; evaluate next logical_path content string.
425
+ else
426
+ # All is not well. What is wrong?
427
+ @my_results.error('E260', 'check_version', "OCFL 3.5.3.1 logical path syntax error: Value in version #{version} state block key #{digest} contains logical_path #{content} with error: #{logical_path_result}")
428
+ @version_check = true
429
+ next # evaluate next logical_path content string.
430
+ end
431
+ end
432
+ end
433
+ # State block is valid!
434
+ end
435
+
436
+ # "[Logical] Path elements MUST NOT be ., .., or empty (//). Additionally, a logical path MUST NOT begin with a leading /."
437
+ # Returns an Array of errors, or an Array of zero size if logical_path is fine.
438
+ def check_logical_path(content)
439
+ results = Array.new
440
+
441
+ if content.size == 0
442
+ results << "logical_path content must not be empty."
443
+ return results # We're done here; no point processing further.
444
+ end
445
+
446
+ # a logical path MUST NOT begin with a leading /."
447
+ if content.match(/^\//)
448
+ results << "logical_path content must not start with a /"
449
+ end
450
+
451
+ # Get all elements in content (we know there is at least 1 element in content)
452
+ elements = content.split("/")
453
+ # "[Logical] Path elements MUST NOT be ., .., or empty (//).
454
+ elements.each do | element |
455
+ case
456
+ when element.match(/^\.$/)
457
+ results << "logical_path element must not be '.'"
458
+ when element.match(/^\.\.$/)
459
+ results << "logical_path element must not be '..'"
460
+ when element.size == 0
461
+ results << "logical_path element must not be empty."
462
+ end
463
+ end
464
+ return results
465
+ end
466
+
467
+ # 'created' block must be a String.
468
+ # 'created' must contain rfc3339 value.
469
+ def check_version_created(value, version)
470
+ if !value.is_a?(String)
471
+ @my_results.error('E111', 'check_version', "Value in version #{version} created address block is not a String.")
472
+ @version_check = true
473
+ return
474
+ end
475
+ # 'created' cannot be empty.
476
+ if value.empty?
477
+ @my_results.error('E111', 'check_version', "Version #{version} created block is empty.")
478
+ @version_check = true
479
+ return # No point in processing further.
480
+ end
481
+
482
+ # This throws an exception if 'value' isn't a String in rfc3339 notation.
483
+ begin
484
+ DateTime.rfc3339(value)
485
+ rescue ArgumentError => e
486
+ @my_results.error('E261', 'check_version', "OCFL 3.5.3.1 Version #{version} created block must be expressed in RFC3339 format.")
487
+ @version_check = true
488
+ return
489
+ end
490
+ # Created block is valid!
491
+ end
492
+ end
493
+ end