ocfl-tools 0.9.14

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,493 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OcflTools
4
+ # Class to verify that an instance of {OcflTools::OcflObject} or {OcflTools::OcflInventory} is composed of valid data and structures.
5
+ class OcflVerify # < OcflTools::OcflObject
6
+ # @return {OcflTools::OcflResults} containing check results.
7
+ attr_reader :my_results
8
+
9
+ # Create a new OCFLVerify object, using an OcflTools::Ocflobject as source.
10
+ # @param {OcflTools::OcflObject} ocfl_object an ocfl object or inventory to verify.
11
+ def initialize(ocfl_object)
12
+ @my_victim = ocfl_object
13
+ @my_results = OcflTools::OcflResults.new
14
+
15
+ # check .respond_to? first for all expected methods.
16
+ preflight
17
+ end
18
+
19
+ # @return {OcflTools::OcflResults} containing information about actions taken
20
+ # against this object.
21
+ def results
22
+ @my_results
23
+ end
24
+
25
+ # Performs all checks on the given object and reports results.
26
+ # @return {Ocfltools::OcflResults} of results.
27
+ def check_all
28
+ # Duck-typing the heck out of this, assuming @my_victim will respond to ocflobject methods.
29
+ check_id
30
+ check_type
31
+ check_head
32
+ check_fixity
33
+ check_manifest
34
+ check_versions
35
+ crosscheck_digests
36
+ check_digestAlgorithm
37
+ @my_results
38
+ end
39
+
40
+ # Checks OCFL Object for valid value in the id attribute.
41
+ # Id value MUST be present and SHOULD be a URI.
42
+ # @return {Ocfltools::OcflResults} of results.
43
+ def check_id
44
+ case @my_victim.id
45
+ when nil
46
+ @my_results.error('E202', 'check_id', 'OCFL 3.5.1 Object ID cannot be nil')
47
+ return @my_results
48
+ when 0
49
+ @my_results.error('E201', 'check_id', 'OCFL 3.5.1 Object ID cannot be 0 length')
50
+ return @my_results
51
+ when !String
52
+ @my_results.error('E201', 'check_id', 'OCFL 3.5.1 Object ID must be a string.')
53
+ return @my_results
54
+ end
55
+
56
+ if @my_victim.id =~ /\A#{URI::regexp}\z/
57
+ @my_results.ok('O200', 'check_id', 'OCFL 3.5.1 Inventory ID is OK.')
58
+ return @my_results
59
+ else
60
+ @my_results.warn('W201', 'check_id', 'OCFL 3.5.1 Inventory ID present, but does not appear to be a URI.')
61
+ return @my_results
62
+ end
63
+ end
64
+
65
+ # Checks OCFL Object for valid value in the head attribute.
66
+ # @return {Ocfltools::OcflResults} of results.
67
+ def check_head
68
+ case @my_victim.head
69
+ when nil
70
+ @my_results.error('E212', 'check_head', 'OCFL 3.5.1 @head cannot be nil')
71
+ when Integer
72
+ @my_results.error('E213', 'check_head', 'OCFL 3.5.1 @head cannot be an Integer')
73
+ when String
74
+ version = OcflTools::Utils.version_string_to_int(@my_victim.head)
75
+ target_version = @my_victim.version_id_list.max
76
+ if version == target_version
77
+ @my_results.ok('O200', 'check_head', 'OCFL 3.5.1 Inventory Head is OK.')
78
+ @my_results.info('I200', 'check_head', "OCFL 3.5.1 Inventory Head version #{version} matches highest version in versions.")
79
+ else
80
+ @my_results.error('E214', 'check_head', "OCFL 3.5.1 Inventory Head version #{version} does not match expected version #{target_version}")
81
+ end
82
+ else
83
+ # default case error
84
+ @my_results.error('E911', 'check_head', 'An unknown error has occurred.')
85
+ end
86
+ @my_results
87
+ end
88
+
89
+ # Checks OCFL Object for valid value in the type attribute.
90
+ # @return {Ocfltools::OcflResults} of results.
91
+ def check_type
92
+ case @my_victim.type
93
+ when nil
94
+ @my_results.error('E230', 'check_type', 'OCFL 3.5.1 Required OCFL key type not found.')
95
+ when 'https://ocfl.io/1.0/spec/#inventory'
96
+ @my_results.ok('O200', 'check_type', 'OCFL 3.5.1 Inventory Type is OK.')
97
+ else
98
+ @my_results.error('E231', 'check_type', 'OCFL 3.5.1 Required OCFL key type does not match expected value.')
99
+ end
100
+ @my_results
101
+ end
102
+
103
+ # Checks OCFL Object for valid value in the digestAlgorithm attribute.
104
+ # @return {Ocfltools::OcflResults} of results.
105
+ def check_digestAlgorithm
106
+ # If there's no digestAlgorithm set in the inventory, that's a showstopper.
107
+ if @my_victim.digestAlgorithm == nil
108
+ @my_results.error('E222', 'check_digestAlgorithm', "Algorithm cannot be nil")
109
+ return @my_results
110
+ end
111
+
112
+ # must be one of sha256 or sha512
113
+ if @my_victim.digestAlgorithm.downcase == 'sha256'
114
+ @my_results.ok('O200', 'check_digestAlgorithm', 'OCFL 3.5.1 Inventory Algorithm is OK.')
115
+ @my_results.info('I220', 'check_digestAlgorithm', "OCFL 3.5.1 #{@my_victim.digestAlgorithm.downcase} is a supported digest algorithm.")
116
+ @my_results.warn('W220', 'check_digestAlgorithm', "OCFL 3.5.1 #{@my_victim.digestAlgorithm.downcase} SHOULD be Sha512.")
117
+ elsif @my_victim.digestAlgorithm.downcase == 'sha512'
118
+ @my_results.ok('O200', 'check_digestAlgorithm', 'OCFL 3.5.1 Inventory Algorithm is OK.')
119
+ @my_results.info('I220', 'check_digestAlgorithm', "OCFL 3.5.1 #{@my_victim.digestAlgorithm.downcase} is a supported digest algorithm.")
120
+ else
121
+ @my_results.error('E223', 'check_digestAlgorithm', "OCFL 3.5.1 Algorithm #{@my_victim.digestAlgorithm} is not valid for OCFL use.")
122
+ end
123
+ @my_results
124
+ end
125
+
126
+ # Checks OCFL Object for a well-formed manifest block.
127
+ # @return {Ocfltools::OcflResults} of results.
128
+ def check_manifest
129
+ # Should pass digest cross_check.
130
+ # can be null if it passes cross_check? (empty inventories are valid, but warn)
131
+ # There MUST be a block called 'manifests'
132
+ errors = nil
133
+ if @my_victim.manifest.nil?
134
+ @my_results.error('E250', 'check_manifest', 'OCFL 3.5.2 there MUST be a manifest block.')
135
+ errors = true
136
+ elsif @my_victim.manifest == {}
137
+ @my_results.error('E251', 'check_manifest', 'OCFL 3.5.2 manifest block cannot be empty.')
138
+ errors = true
139
+ end
140
+
141
+ # TODO: Should check that it's a hash of digests and filepaths somehow...?
142
+ # Get digest Algo type, use that to get key length.
143
+ # check all keys in manifest to make sure they're all that length.
144
+
145
+ if errors.nil?
146
+ @my_results.ok('O200', 'check_manifest', 'OCFL 3.5.2 Inventory Manifest syntax is OK.')
147
+ end
148
+
149
+ @my_results
150
+ end
151
+
152
+ # Checks OCFL Object for a well-formed versions block.
153
+ # @return {Ocfltools::OcflResults} of results.
154
+ def check_versions
155
+ version_count = @my_victim.version_id_list.length
156
+ highest_version = @my_victim.version_id_list.max
157
+ my_versions = @my_victim.version_id_list.sort
158
+
159
+ @version_check = nil
160
+ if version_count != highest_version
161
+ @my_results.error('E014', 'check_versions', "OCFL 3.5.3 Found #{version_count} versions, but highest version is #{highest_version}")
162
+ @version_check = true
163
+ elsif version_count == highest_version
164
+ @my_results.ok('O200', 'check_versions', "OCFL 3.5.3 Found #{version_count} versions, highest version is #{highest_version}")
165
+ end
166
+ # should be contiguous version numbers starting at 1.
167
+ count = 0
168
+ until count == highest_version
169
+ # (count - 1) is a proxy for the index in @my_victim.version_id_list.sort
170
+ count += 1
171
+ if count != my_versions[count - 1]
172
+ @my_results.error('E015', 'check_versions', "OCFL 3.5.3 Expected version sequence not found. Expected version #{count}, found version #{my_versions[count]}.")
173
+ @version_check = true
174
+ end
175
+ end
176
+ # We do NOT need to check the @versions.keys here for 'v0001', etc.
177
+ # That's already been done when we looked at version_id_list and
178
+ # checked for contiguous version numbers in my_versions.
179
+
180
+ @my_victim.versions.each do |version, hash|
181
+ %w[created message user state].each do |key|
182
+ if hash.key?(key) == false
183
+ @my_results.error('E016', 'check_versions', "OCFL 3.5.3.1 version #{version} is missing #{key} block.")
184
+ @version_check = true
185
+ next
186
+ end # key is present, does it conform?
187
+
188
+ case key
189
+ when 'created'
190
+ check_version_created(hash['created'], version)
191
+ when 'user'
192
+ check_version_user(hash['user'], version)
193
+ when 'state'
194
+ check_version_state(hash['state'], version)
195
+ when 'message'
196
+ check_version_message(hash['message'], version)
197
+ else
198
+ @my_results.error('E111', 'check_versions', "OCFL 3.5.3.1 version #{version} contains unknown key #{key} block.")
199
+ @version_check = true
200
+ end
201
+ end
202
+ end
203
+
204
+ if @version_check.nil?
205
+ @my_results.ok('O200', 'check_versions', 'OCFL 3.5.3.1 version syntax is OK.')
206
+ end
207
+ @my_results
208
+ end
209
+
210
+ # Checks OCFL Object for a well-formed fixity block, if present. We do not compute fixity here; only check existence.
211
+ # @return {Ocfltools::OcflResults} of results.
212
+ def check_fixity
213
+ # If present, should have at least 1 sub-key and 1 value.
214
+ errors = nil
215
+ unless @my_victim.fixity.empty?
216
+ @my_results.info('I111', 'check_fixity', 'Fixity block is present.')
217
+ end
218
+ # Set OcflTools.config.fixity_algorithms for what to look for.
219
+ @my_victim.fixity.each do |algorithm, _digest|
220
+ unless OcflTools.config.fixity_algorithms.include? algorithm
221
+ @my_results.error('E111', 'check_fixity', "Fixity block contains unsupported algorithm #{algorithm}")
222
+ errors = true
223
+ end
224
+ end
225
+
226
+ if errors.nil? && !@my_victim.fixity.empty?
227
+ @my_results.ok('O111', 'check_fixity', 'Fixity block is present and contains valid algorithms.')
228
+ end
229
+
230
+ @my_results
231
+ end
232
+
233
+ # Checks the contents of the manifest block against the files and digests in the versions block to verify all
234
+ # files necessary to re-constitute the object at any version are correctly referenced in the OCFL Object.
235
+ # @return {Ocfltools::OcflResults} of results.
236
+ def crosscheck_digests
237
+ # requires values in @versions and @manifest.
238
+ # verifies that every digest in @versions can be found in @manifest.
239
+ errors = nil
240
+ my_checksums = []
241
+
242
+ @my_victim.versions.each do |version, block|
243
+ if !block.is_a?(Hash)
244
+ @my_results.error('E111', 'crosscheck_digests', "version #{version} block is wrong type.")
245
+ next
246
+ end
247
+ version_digests = block['state']
248
+ if !version_digests.is_a?(Hash)
249
+ @my_results.error('E111', 'crosscheck_digests', "version #{version} state block is wrong type.")
250
+ next
251
+ end
252
+ version_digests.each_key { |k| my_checksums << k }
253
+ end
254
+
255
+ unique_checksums = my_checksums.uniq
256
+
257
+ # First check; there should be the same number of entries on both sides.
258
+ if unique_checksums.length != @my_victim.manifest.length
259
+ @my_results.error('E050', 'crosscheck_digests', "OCFL 3.5.3.1 Digests missing! #{unique_checksums.length} digests in versions vs. #{@my_victim.manifest.length} digests in manifest.")
260
+ errors = true
261
+ end
262
+
263
+ # Second check; each entry in unique_checksums should have a match in @manifest.
264
+ unique_checksums.each do |checksum|
265
+ if @my_victim.manifest.member?(checksum) == false
266
+ @my_results.error('E051', 'crosscheck_digests', "OCFL 3.5.3.1 Checksum #{checksum} not found in manifest!")
267
+ errors = true
268
+ end
269
+ end
270
+
271
+ if errors.nil?
272
+ @my_results.ok('O200', 'crosscheck_digests', 'OCFL 3.5.3.1 Digests are OK.')
273
+ end
274
+ @my_results
275
+ end
276
+
277
+ # Verifies that the object passed to this class at instantiation responds to the expected
278
+ # methods and attributes. Raises an exception on failure.
279
+ # @return [Boolean] true
280
+ def preflight
281
+ # check for expected instance_variables with .instance_variable_defined?(@some_var)
282
+ ['@id', '@head', '@type', '@digestAlgorithm', '@contentDirectory', '@manifest', '@versions', '@fixity'].each do |var|
283
+ unless @my_victim.instance_variable_defined?(var)
284
+ raise "Object does not have instance var #{var} defined"
285
+ end
286
+ end
287
+
288
+ # check for all methods we need to validate OCFL structure
289
+ %w[get_files get_current_files get_state version_id_list get_digest].each do |mthd|
290
+ unless @my_victim.respond_to?(mthd)
291
+ raise "Object does not respond to #{mthd}"
292
+ end
293
+ end
294
+ end
295
+
296
+ private
297
+
298
+ def check_version_message(value, version)
299
+ # version.message must be a String.
300
+ if !value.is_a?(String)
301
+ @my_results.error('E111', 'check_version', "Value in version #{version} message block is wrong type.")
302
+ @version_check = true
303
+ return # No point in processing further.
304
+ end
305
+ # version.message is valid!
306
+ end
307
+
308
+ # 'user'.'name' must contain a string value.
309
+ # 'user'.'address' should contain value
310
+ def check_version_user(value, version)
311
+ # 'user' must be a hash.
312
+ if !value.is_a?(Hash)
313
+ @my_results.error('E111', 'check_version', "Value in version #{version} user block is wrong type.")
314
+ @version_check = true
315
+ return # No point in processing further.
316
+ end
317
+
318
+ # 'user' must contain 'name'
319
+ # 'user' must contain 'address'
320
+ value.each do |user_key, user_value|
321
+ case user_key
322
+ when 'name'
323
+ # user_name must be String.
324
+ if !user_value.is_a?(String)
325
+ @my_results.error('E111', 'check_version', "Value in version #{version} user name block is not a String.")
326
+ @version_check = true
327
+ next
328
+ end
329
+ # user_name must have content.
330
+ if user_value.empty?
331
+ @my_results.error('E111', 'check_version', "Value in version #{version} user name block cannot be empty.")
332
+ @version_check = true
333
+ end
334
+ # user.name is valid!
335
+ when 'address'
336
+ # user_address must be String.
337
+ if !user_value.is_a?(String)
338
+ @my_results.error('E111', 'check_version', "Value in version #{version} user address block is not a String.")
339
+ @version_check = true
340
+ next
341
+ end
342
+ # user_address SHOULD have content.
343
+ if user_value.empty?
344
+ @my_results.warn('W111', 'check_version', "Value in version #{version} user address block SHOULD NOT be empty.")
345
+ next
346
+ end
347
+ # user.address should be either mailto: or URI.
348
+ if check_for_mailto(user_value) == true
349
+ next # It's a mailto:, we don't need to process further.
350
+ end
351
+
352
+ if check_for_uri(user_value) == true
353
+ next # It's a URI, don't need to process further.
354
+ end
355
+ # If we get to here, it wasn't a mailto or a URI.
356
+ @my_results.error('E111', 'check_version', "Value in #{version} #{user_value} is not a valid URI or mailto: format.")
357
+ @version_check = true
358
+
359
+ else # unexpected value in user block.
360
+ @my_results.error('E111', 'check_version', "Unexpected value in version #{version} user block #{user_key}.")
361
+ @version_check = true
362
+ end
363
+
364
+ end
365
+ # user block is valid!
366
+ end
367
+
368
+ # used by user.address validation. RFC6068.
369
+ def check_for_mailto(value)
370
+ if value =~ /^mailto:*/
371
+ value.slice!('mailto:')
372
+ return value.match?(URI::MailTo::EMAIL_REGEXP) # returns true if it's an email.
373
+ else
374
+ return value.match?(URI::MailTo::EMAIL_REGEXP) # Is it still an email?
375
+ end
376
+ end
377
+
378
+ # used by check_id and user.address validation. RFC3986.
379
+ def check_for_uri(value)
380
+ if value =~ /\A#{URI::regexp}\z/
381
+ return true # emits OK result.
382
+ else
383
+ # if it doesn't pass the check, it's a problem.
384
+ return false
385
+ end
386
+ end
387
+
388
+ # 'state' must be a hash.
389
+ # 'state' must contain at least 1 key/value pair
390
+ def check_version_state(value, version)
391
+ if !value.is_a?(Hash)
392
+ @my_results.error('E111', 'check_version', "Value in version #{version} state block is wrong type.")
393
+ @version_check = true
394
+ return # No point in processing further.
395
+ end
396
+ # State hash must have content.
397
+ if value.empty?
398
+ @my_results.error('E111', 'check_version', "Version #{version} state block is empty.")
399
+ @version_check = true
400
+ return # No point in processing further.
401
+ end
402
+
403
+ # Now that we have a prima facie valid state block, check for logical path.
404
+ value.each do | digest, logical_path |
405
+ # logical_path must be an Array.
406
+ if !logical_path.is_a?(Array)
407
+ @my_results.error('E260', 'check_version', "OCFL 3.5.3.1 logical path syntax error: Version #{version} state block key #{digest} contains a value that is not an array.")
408
+ @version_check = true
409
+ next # No point in processing this digest key further.
410
+ end
411
+
412
+ # Now for each value in this array, it's a String and must conform to logical_path content restrictions.
413
+ logical_path.each do | content |
414
+ # Must be a String (and not an Array or Hash)
415
+ if !content.is_a?(String)
416
+ @my_results.error('E260', 'check_version', "OCFL 3.5.3.1 logical path syntax error: Value in version #{version} state block key #{digest} contains an array value that is not a String.")
417
+ @version_check = true
418
+ next # No point in processing this digest key further.
419
+ end
420
+
421
+ logical_path_result = check_logical_path(content)
422
+
423
+ if logical_path_result.size == 0
424
+ next # all is well; evaluate next logical_path content string.
425
+ else
426
+ # All is not well. What is wrong?
427
+ @my_results.error('E260', 'check_version', "OCFL 3.5.3.1 logical path syntax error: Value in version #{version} state block key #{digest} contains logical_path #{content} with error: #{logical_path_result}")
428
+ @version_check = true
429
+ next # evaluate next logical_path content string.
430
+ end
431
+ end
432
+ end
433
+ # State block is valid!
434
+ end
435
+
436
+ # "[Logical] Path elements MUST NOT be ., .., or empty (//). Additionally, a logical path MUST NOT begin with a leading /."
437
+ # Returns an Array of errors, or an Array of zero size if logical_path is fine.
438
+ def check_logical_path(content)
439
+ results = Array.new
440
+
441
+ if content.size == 0
442
+ results << "logical_path content must not be empty."
443
+ return results # We're done here; no point processing further.
444
+ end
445
+
446
+ # a logical path MUST NOT begin with a leading /."
447
+ if content.match(/^\//)
448
+ results << "logical_path content must not start with a /"
449
+ end
450
+
451
+ # Get all elements in content (we know there is at least 1 element in content)
452
+ elements = content.split("/")
453
+ # "[Logical] Path elements MUST NOT be ., .., or empty (//).
454
+ elements.each do | element |
455
+ case
456
+ when element.match(/^\.$/)
457
+ results << "logical_path element must not be '.'"
458
+ when element.match(/^\.\.$/)
459
+ results << "logical_path element must not be '..'"
460
+ when element.size == 0
461
+ results << "logical_path element must not be empty."
462
+ end
463
+ end
464
+ return results
465
+ end
466
+
467
+ # 'created' block must be a String.
468
+ # 'created' must contain rfc3339 value.
469
+ def check_version_created(value, version)
470
+ if !value.is_a?(String)
471
+ @my_results.error('E111', 'check_version', "Value in version #{version} created address block is not a String.")
472
+ @version_check = true
473
+ return
474
+ end
475
+ # 'created' cannot be empty.
476
+ if value.empty?
477
+ @my_results.error('E111', 'check_version', "Version #{version} created block is empty.")
478
+ @version_check = true
479
+ return # No point in processing further.
480
+ end
481
+
482
+ # This throws an exception if 'value' isn't a String in rfc3339 notation.
483
+ begin
484
+ DateTime.rfc3339(value)
485
+ rescue ArgumentError => e
486
+ @my_results.error('E261', 'check_version', "OCFL 3.5.3.1 Version #{version} created block must be expressed in RFC3339 format.")
487
+ @version_check = true
488
+ return
489
+ end
490
+ # Created block is valid!
491
+ end
492
+ end
493
+ end