ocfl-tools 0.9.14
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +53 -0
- data/.rubocop.yml +35 -0
- data/.rubocop_todo.yml +189 -0
- data/.travis.yml +12 -0
- data/Gemfile +5 -0
- data/README.md +647 -0
- data/Rakefile +13 -0
- data/VERSION +1 -0
- data/examples/list_files.rb +56 -0
- data/examples/validate_object.rb +23 -0
- data/lib/ocfl-tools.rb +19 -0
- data/lib/ocfl_tools.rb +17 -0
- data/lib/ocfl_tools/config.rb +27 -0
- data/lib/ocfl_tools/ocfl_actions.rb +146 -0
- data/lib/ocfl_tools/ocfl_delta.rb +250 -0
- data/lib/ocfl_tools/ocfl_deposit.rb +685 -0
- data/lib/ocfl_tools/ocfl_errors.rb +23 -0
- data/lib/ocfl_tools/ocfl_inventory.rb +95 -0
- data/lib/ocfl_tools/ocfl_object.rb +425 -0
- data/lib/ocfl_tools/ocfl_results.rb +272 -0
- data/lib/ocfl_tools/ocfl_validator.rb +799 -0
- data/lib/ocfl_tools/ocfl_verify.rb +493 -0
- data/lib/ocfl_tools/utils.rb +127 -0
- data/lib/ocfl_tools/utils_file.rb +195 -0
- data/lib/ocfl_tools/utils_inventory.rb +96 -0
- data/ocfl-tools.gemspec +31 -0
- data/results_codes.md +106 -0
- data/test-it.sh +11 -0
- metadata +191 -0
@@ -0,0 +1,272 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module OcflTools
|
4
|
+
# Class for collating results of validation and verification checks.
|
5
|
+
class OcflResults
|
6
|
+
def initialize
|
7
|
+
@my_results = {}
|
8
|
+
@my_results['error'] = {}
|
9
|
+
@my_results['warn'] = {}
|
10
|
+
@my_results['info'] = {}
|
11
|
+
@my_results['ok'] = {}
|
12
|
+
|
13
|
+
@my_contexts = {}
|
14
|
+
end
|
15
|
+
|
16
|
+
# Convenience method for obtaining a hash of results.
|
17
|
+
# @return [Hash] of results stored in this instance.
|
18
|
+
def results
|
19
|
+
@my_results
|
20
|
+
end
|
21
|
+
|
22
|
+
# Convenience method for obtaining a hash of results.
|
23
|
+
# @return [Hash] of results stored in this instance.
|
24
|
+
def all
|
25
|
+
@my_results
|
26
|
+
end
|
27
|
+
|
28
|
+
# Convenience method to print out the results hash to stdout.
|
29
|
+
def print
|
30
|
+
@my_results.each do | level, status_codes |
|
31
|
+
puts "#{level.upcase}" unless status_codes.size == 0
|
32
|
+
status_codes.each do | code, contexts |
|
33
|
+
contexts.each do | context, descriptions |
|
34
|
+
descriptions.each do | desc |
|
35
|
+
puts " #{code}:#{context}:#{desc}"
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
# @return [Hash] a hash of all the 'error' entries stored in this instance.
|
43
|
+
def get_errors
|
44
|
+
@my_results['error']
|
45
|
+
end
|
46
|
+
|
47
|
+
# @return [Hash] a hash of all the 'warn' entries stored in this instance.
|
48
|
+
def get_warnings
|
49
|
+
@my_results['warn']
|
50
|
+
end
|
51
|
+
|
52
|
+
# @return [Hash] a hash of all the 'info' entries stored in this instance.
|
53
|
+
def get_info
|
54
|
+
@my_results['info']
|
55
|
+
end
|
56
|
+
|
57
|
+
# @return [Hash] a hash of all the 'OK' entries stored in this instance.
|
58
|
+
def get_ok
|
59
|
+
@my_results['ok']
|
60
|
+
end
|
61
|
+
|
62
|
+
# Processes all of @my_results and creates a nested hash of
|
63
|
+
# context => level => code => [ descriptions ]
|
64
|
+
# Useful if you want to get all the info/error/warn/ok results for a specific context.
|
65
|
+
# @return [Hash] a nested hash of results, organized with 'context' as a top level key.
|
66
|
+
def get_contexts
|
67
|
+
@my_results.each do |level, codes| # levels are warn, info, ok, error
|
68
|
+
codes.each do |code, contexts|
|
69
|
+
contexts.each do |context, description|
|
70
|
+
# puts "got : #{level} #{code} #{context} #{description}"
|
71
|
+
# puts "want : #{context} #{level} #{code} #{description}"
|
72
|
+
if @my_contexts.key?(context)
|
73
|
+
my_levels = @my_contexts[context]
|
74
|
+
if my_levels.key?(level)
|
75
|
+
my_codes = my_levels[level]
|
76
|
+
if my_codes.key?(code)
|
77
|
+
# what should I do here? Nothing, apparently, as it's soft-copied already.
|
78
|
+
else
|
79
|
+
my_codes[code] = description # new code for this level! Add it.
|
80
|
+
end
|
81
|
+
else
|
82
|
+
# if the context key already exists, but the level key
|
83
|
+
# does not, we can add everything beneath context in one go.
|
84
|
+
my_levels[level] = { code => description }
|
85
|
+
end
|
86
|
+
else
|
87
|
+
# If the context (the top level key) doesn't exist already,
|
88
|
+
# we can just slam everything in at once.
|
89
|
+
@my_contexts[context] = {}
|
90
|
+
my_level = {}
|
91
|
+
my_level[code] = description
|
92
|
+
@my_contexts[context] = { level => my_level }
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
@my_contexts
|
98
|
+
end
|
99
|
+
|
100
|
+
# Get all results for a specific context (e.g. 'verify_checksums')
|
101
|
+
# @param [String] my_context a string value of the context (e.g. 'verify_checksums') to query
|
102
|
+
# @return [Hash] a hash of results for the specified context, arranged by 'code' => [ descriptions ].
|
103
|
+
def get_context(my_context)
|
104
|
+
get_contexts[my_context]
|
105
|
+
end
|
106
|
+
|
107
|
+
# Gets the total number of error events contained within this instance.
|
108
|
+
# @return [Integer] the number of errors.
|
109
|
+
def error_count
|
110
|
+
my_count = 0
|
111
|
+
@my_results['error'].each do |_code, contexts|
|
112
|
+
contexts.each do |_context, description|
|
113
|
+
my_count += description.size
|
114
|
+
end
|
115
|
+
end
|
116
|
+
my_count
|
117
|
+
end
|
118
|
+
|
119
|
+
# Gets the total number of warning events contained within this instance.
|
120
|
+
# @return [Integer] the number of warnings.
|
121
|
+
def warn_count
|
122
|
+
my_count = 0
|
123
|
+
@my_results['warn'].each do |_code, contexts|
|
124
|
+
contexts.each do |_context, description|
|
125
|
+
my_count += description.size
|
126
|
+
end
|
127
|
+
end
|
128
|
+
my_count
|
129
|
+
end
|
130
|
+
|
131
|
+
# Gets the total number of 'info' events contained within this instance.
|
132
|
+
# @return [Integer] the number of informational messages.
|
133
|
+
def info_count
|
134
|
+
my_count = 0
|
135
|
+
@my_results['info'].each do |_code, contexts|
|
136
|
+
contexts.each do |_context, description|
|
137
|
+
my_count += description.size
|
138
|
+
end
|
139
|
+
end
|
140
|
+
my_count
|
141
|
+
end
|
142
|
+
|
143
|
+
# Gets the total number of 'ok' events contained within this instance.
|
144
|
+
# @return [Integer] the number of OK messages.
|
145
|
+
def ok_count
|
146
|
+
my_count = 0
|
147
|
+
@my_results['ok'].each do |_code, contexts|
|
148
|
+
contexts.each do |_context, description|
|
149
|
+
my_count += description.size
|
150
|
+
end
|
151
|
+
end
|
152
|
+
my_count
|
153
|
+
end
|
154
|
+
|
155
|
+
# Creates an 'OK' message in the object with the specified code and context.
|
156
|
+
# @param [String] code the appropriate 'ok' code for this event.
|
157
|
+
# @param [String] context the process or class that is creating this event.
|
158
|
+
# @param [String] description the details of this specific event.
|
159
|
+
# @return [String] description of posted OK statement.
|
160
|
+
def ok(code, context, description)
|
161
|
+
@my_results['ok'][code] = {} if @my_results['ok'].key?(code) == false
|
162
|
+
if @my_results['ok'][code].key?(context) == false
|
163
|
+
@my_results['ok'][code][context] = []
|
164
|
+
end
|
165
|
+
# Only put unique values into description
|
166
|
+
if @my_results['ok'][code][context].include?(description)
|
167
|
+
return description
|
168
|
+
else
|
169
|
+
@my_results['ok'][code][context] = (@my_results['ok'][code][context] << description)
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
# Creates an 'info' message in the object with the specified code and context.
|
174
|
+
# @param [String] code the appropriate 'Info' code for this event.
|
175
|
+
# @param [String] context the process or class that is creating this event.
|
176
|
+
# @param [String] description the details of this specific event.
|
177
|
+
# @return [String] description of posted Info statement.
|
178
|
+
def info(code, context, description)
|
179
|
+
@my_results['info'][code] = {} if @my_results['info'].key?(code) == false
|
180
|
+
if @my_results['info'][code].key?(context) == false
|
181
|
+
@my_results['info'][code][context] = []
|
182
|
+
end
|
183
|
+
# Only put unique values into description
|
184
|
+
if @my_results['info'][code][context].include?(description)
|
185
|
+
return description
|
186
|
+
else
|
187
|
+
@my_results['info'][code][context] = (@my_results['info'][code][context] << description)
|
188
|
+
end
|
189
|
+
end
|
190
|
+
|
191
|
+
# Creates a 'Warn' message in the object with the specified code and context.
|
192
|
+
# @param [String] code the appropriate 'warn' code for this event.
|
193
|
+
# @param [String] context the process or class that is creating this event.
|
194
|
+
# @param [String] description the details of this specific event.
|
195
|
+
# @return [String] description of posted Warn statement.
|
196
|
+
def warn(code, context, description)
|
197
|
+
@my_results['warn'][code] = {} if @my_results['warn'].key?(code) == false
|
198
|
+
if @my_results['warn'][code].key?(context) == false
|
199
|
+
@my_results['warn'][code][context] = []
|
200
|
+
end
|
201
|
+
# Only put unique values into description
|
202
|
+
if @my_results['warn'][code][context].include?(description)
|
203
|
+
return description
|
204
|
+
else
|
205
|
+
@my_results['warn'][code][context] = (@my_results['warn'][code][context] << description)
|
206
|
+
end
|
207
|
+
end
|
208
|
+
|
209
|
+
# Creates an 'Error' message in the object with the specified code and context.
|
210
|
+
# @param [String] code the appropriate 'error' code for this event.
|
211
|
+
# @param [String] context the process or class that is creating this event.
|
212
|
+
# @param [String] description the details of this specific event.
|
213
|
+
# @return [String] description of posted Error statement.
|
214
|
+
def error(code, context, description)
|
215
|
+
if @my_results['error'].key?(code) == false
|
216
|
+
@my_results['error'][code] = {}
|
217
|
+
end
|
218
|
+
if @my_results['error'][code].key?(context) == false
|
219
|
+
@my_results['error'][code][context] = []
|
220
|
+
end
|
221
|
+
# Only put unique values into description
|
222
|
+
if @my_results['error'][code][context].include?(description)
|
223
|
+
return description
|
224
|
+
else
|
225
|
+
@my_results['error'][code][context] = (@my_results['error'][code][context] << description)
|
226
|
+
end
|
227
|
+
end
|
228
|
+
|
229
|
+
# Given another {OcflTools::OcflResults} instance, copy that object's data into this one. Used to 'roll up' Results
|
230
|
+
# from different levels of validation or process into a single results instance.
|
231
|
+
# @param {OcflTools::OcflResults} source Results instance to copy into this instance.
|
232
|
+
# @return {OcflTools::OcflResults} self
|
233
|
+
def add_results(source)
|
234
|
+
unless source.is_a?(OcflTools::OcflResults)
|
235
|
+
raise "#{source} is not a Results object!"
|
236
|
+
end
|
237
|
+
|
238
|
+
source.get_ok.each do |code, contexts|
|
239
|
+
contexts.each do |context, descriptions|
|
240
|
+
descriptions.each do |description|
|
241
|
+
ok(code, context, description)
|
242
|
+
end
|
243
|
+
end
|
244
|
+
end
|
245
|
+
|
246
|
+
source.get_info.each do |code, contexts|
|
247
|
+
contexts.each do |context, descriptions|
|
248
|
+
descriptions.each do |description|
|
249
|
+
info(code, context, description)
|
250
|
+
end
|
251
|
+
end
|
252
|
+
end
|
253
|
+
|
254
|
+
source.get_warnings.each do |code, contexts|
|
255
|
+
contexts.each do |context, descriptions|
|
256
|
+
descriptions.each do |description|
|
257
|
+
warn(code, context, description)
|
258
|
+
end
|
259
|
+
end
|
260
|
+
end
|
261
|
+
|
262
|
+
source.get_errors.each do |code, contexts|
|
263
|
+
contexts.each do |context, descriptions|
|
264
|
+
descriptions.each do |description|
|
265
|
+
error(code, context, description)
|
266
|
+
end
|
267
|
+
end
|
268
|
+
end
|
269
|
+
self
|
270
|
+
end
|
271
|
+
end
|
272
|
+
end
|
@@ -0,0 +1,799 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module OcflTools
|
4
|
+
# Class to perform validation actions on POSIX directories that potentially contain OCFL objects.
|
5
|
+
class OcflValidator
|
6
|
+
# @return [Pathname] the full local filesystem path to the OCFL object root directory.
|
7
|
+
attr_reader :ocfl_object_root
|
8
|
+
|
9
|
+
# @return [String] the discovered version format of the object, found by inspecting version directory names.
|
10
|
+
attr_reader :version_format
|
11
|
+
|
12
|
+
# @return [String] the version of OCFL that this validator object is targeting.
|
13
|
+
attr_accessor :ocfl_version
|
14
|
+
|
15
|
+
# @return {OcflTools::OcflInventory} an OcflInventory instance that represents an inventory.json file, if the directory contains a valid OCFL object.
|
16
|
+
attr_reader :inventory
|
17
|
+
|
18
|
+
# @return {OcflTools::OcflVerify} an OcflVerify instance that represents the results of requesting verification of an OcflInventory.
|
19
|
+
attr_reader :verify
|
20
|
+
|
21
|
+
# @param [Pathname] ocfl_object_root is a the full local filesystem path to the object directory.
|
22
|
+
def initialize(ocfl_object_root)
|
23
|
+
unless File.directory? ocfl_object_root
|
24
|
+
raise "#{ocfl_object_root} is not a directory!"
|
25
|
+
end
|
26
|
+
|
27
|
+
@digest = nil
|
28
|
+
@version_format = nil
|
29
|
+
@ocfl_version = nil
|
30
|
+
@ocfl_object_root = ocfl_object_root
|
31
|
+
@my_results = OcflTools::OcflResults.new
|
32
|
+
@inventory = nil # some checks create an inventory object; have a way to get at that.
|
33
|
+
@verify = nil # some checks create a verify object; have a way to get at that.
|
34
|
+
end
|
35
|
+
|
36
|
+
# Get the current summation of results events for this instance, including a roll-up of any verify actions.
|
37
|
+
# @return [OcflTools::OcflResults] current validation results.
|
38
|
+
def results
|
39
|
+
@my_results.add_results(@verify.results) unless @verify.nil?
|
40
|
+
@my_results
|
41
|
+
end
|
42
|
+
|
43
|
+
# Perform an OCFL-spec validation of the given object directory.
|
44
|
+
# If given the optional digest value, verify file content using checksums in inventory file will fail if digest is not found in manifest or a fixity block. This validates all versions and all files in the object_root. If you want to just check a specific version, call {verify_directory}.
|
45
|
+
# @param [String] digest optional digest to use, if one wishes to use values in the fixity block instead of the official OCFL digest values.
|
46
|
+
# @return {OcflTools::OcflResults} event results
|
47
|
+
def validate_ocfl_object_root(digest: nil)
|
48
|
+
# calls verify_structure, verify_inventory and verify_checksums.
|
49
|
+
verify_structure
|
50
|
+
verify_inventory # returns a diff. results object; merge it?
|
51
|
+
verify_manifest
|
52
|
+
if !digest.nil?
|
53
|
+
verify_fixity(digest: digest)
|
54
|
+
else
|
55
|
+
verify_checksums
|
56
|
+
end
|
57
|
+
results # this copies verify.results into our main results object, if it exists.
|
58
|
+
end
|
59
|
+
|
60
|
+
# Performs checksum validation of files listed in the inventory's fixity block.
|
61
|
+
# @param [Pathname] inventory_file fully-qualified path to a valid OCFL inventory.json.
|
62
|
+
# @param [String] digest string value of the algorithm to use for this fixity check. This value must exist as a key in the object's fixity block.
|
63
|
+
# @return {OcflTools::OcflResults} of event results
|
64
|
+
def verify_fixity(inventory_file: "#{@ocfl_object_root}/inventory.json", digest: 'md5')
|
65
|
+
# Gets the appropriate fixity block, calls compare_hash_checksums
|
66
|
+
@inventory = OcflTools::OcflInventory.new.from_file(inventory_file)
|
67
|
+
# Since fixity blocks are not required to be complete, we just validate what's there.
|
68
|
+
# So get the fixity block, flip it, expand it, checksum it against the same files on disk.
|
69
|
+
|
70
|
+
if @inventory.fixity.empty?
|
71
|
+
@my_results.error('E111', "verify_fixity #{digest}", "No fixity block in #{inventory_file}!")
|
72
|
+
return @my_results
|
73
|
+
end
|
74
|
+
|
75
|
+
unless @inventory.fixity.key?(digest)
|
76
|
+
@my_results.error('E111', "verify_fixity #{digest}", "Requested algorithm #{digest} not found in fixity block.")
|
77
|
+
return @my_results
|
78
|
+
end
|
79
|
+
|
80
|
+
fixity_checksums = OcflTools::Utils::Files.invert_and_expand_and_prepend(@inventory.fixity[digest], @ocfl_object_root)
|
81
|
+
|
82
|
+
my_files_on_disk = fixity_checksums.keys
|
83
|
+
|
84
|
+
# Warn if there are less files in requested fixity block than in manifest.
|
85
|
+
if @inventory.manifest.keys.size > fixity_checksums.keys.size
|
86
|
+
missing_files = @inventory.manifest.keys.size - fixity_checksums.keys.size
|
87
|
+
@my_results.warn(
|
88
|
+
'W111',
|
89
|
+
"verify_fixity #{digest}",
|
90
|
+
"#{missing_files} files in manifest are missing from fixity block."
|
91
|
+
)
|
92
|
+
end
|
93
|
+
|
94
|
+
# check these files exist on disk before trying to make checksums!
|
95
|
+
my_files_on_disk.each do |file|
|
96
|
+
unless File.file? file
|
97
|
+
@my_results.error('E111', "verify_fixity #{digest}", "File #{file} in fixity block not found on disk.")
|
98
|
+
my_files_on_disk.delete(file)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
disk_checksums = OcflTools::Utils::Files.create_digests(my_files_on_disk, digest)
|
103
|
+
|
104
|
+
# And now we can compare values!
|
105
|
+
OcflTools::Utils.compare_hash_checksums(
|
106
|
+
disk_checksums: disk_checksums,
|
107
|
+
inventory_checksums: fixity_checksums,
|
108
|
+
results: @my_results,
|
109
|
+
context: "verify_fixity #{digest}"
|
110
|
+
)
|
111
|
+
end
|
112
|
+
|
113
|
+
# Given an inventory file, do the files mentioned in the manifest exist on disk?
|
114
|
+
# This is a basic file existence cross-check.
|
115
|
+
# @param [Pathname] inventory_file fully-qualified path to a valid OCFL inventory.json.
|
116
|
+
# @return {OcflTools::OcflResults} of event results
|
117
|
+
def verify_manifest(inventory_file = "#{@ocfl_object_root}/inventory.json")
|
118
|
+
@my_results ||= OcflTools::OcflResults.new
|
119
|
+
unless File.exist?(inventory_file)
|
120
|
+
@my_results.error('E215', 'verify_files', "Expected inventory file #{inventory_file} not found.")
|
121
|
+
return @my_results
|
122
|
+
end
|
123
|
+
|
124
|
+
if load_inventory(inventory_file) == true
|
125
|
+
@inventory = OcflTools::OcflInventory.new.from_file(inventory_file)
|
126
|
+
else
|
127
|
+
@my_results.error('E210', 'verify_fixity', "Unable to process inventory file #{inventory_file}.")
|
128
|
+
return @my_results
|
129
|
+
end
|
130
|
+
|
131
|
+
# These are arrays, not hashes, so they must be sorted for the comparison below to work.
|
132
|
+
files_in_manifest = OcflTools::Utils::Files.invert_and_expand_and_prepend(@inventory.manifest, @ocfl_object_root).keys.sort
|
133
|
+
files_on_disk = OcflTools::Utils::Files.get_versions_dir_files(@ocfl_object_root, @inventory.version_id_list.min, @inventory.version_id_list.max).sort
|
134
|
+
|
135
|
+
# E012: Check that the contentDirectory value in inventory is present on disk.
|
136
|
+
# 3.3.1 Content directory
|
137
|
+
# Version directories MUST contain a designated content sub-directory if the version contains files to be preserved,
|
138
|
+
# and SHOULD NOT contain this sub-directory otherwise.
|
139
|
+
@inventory.versions.keys.each do | version |
|
140
|
+
# Are there files mentioned in the manifest for this version?
|
141
|
+
files_in_this_version = files_in_manifest.grep(/^#{@ocfl_object_root}\/#{version}/)
|
142
|
+
if files_in_this_version.size > 0
|
143
|
+
if Dir.exist?("#{@ocfl_object_root}/#{version}/#{@inventory.contentDirectory}")
|
144
|
+
@my_results.info('I200', 'verify_manifest', "OCFL 3.3.1 Expected content directory #{version}/#{@inventory.contentDirectory} found.")
|
145
|
+
else
|
146
|
+
@my_results.error('E012', 'verify_manifest', "OCFL 3.3.1 Expected content directory #{version}/#{@inventory.contentDirectory} not found.")
|
147
|
+
# IF WE GET THIS RESULT, then files_on_disk is likely not going to work (it expects contentDirectory to NOT LIE).
|
148
|
+
end
|
149
|
+
else
|
150
|
+
if Dir.exist?("#{@ocfl_object_root}/#{version}/#{@inventory.contentDirectory}")
|
151
|
+
@my_results.warn('W102', 'verify_manifest', "OCFL 3.3.1 version #{version} contentDirectory should not be empty.")
|
152
|
+
else
|
153
|
+
@my_results.info('I200', 'verify_manifest', "OCFL 3.3.1 version #{version} does not have (and should not have) a contentDirectory directory.")
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
# get_versions_dir_files relies on contentDirectory value being correct. Check!
|
159
|
+
# If the prefix of files_in_manifest DOES NOT MATCH object_root/<ver>/contentDirectory, we have a problem.
|
160
|
+
case
|
161
|
+
when files_in_manifest.size > 0
|
162
|
+
working_files = OcflTools::Utils::Files.invert_and_expand(@inventory.manifest).keys
|
163
|
+
# gives me version/contentDir/<foo>
|
164
|
+
# ....and I know that '/' is the delimiter, and element 0 is version. I want element 1, the content_dir value.
|
165
|
+
manifest_content_dirs = []
|
166
|
+
working_files.each do | file |
|
167
|
+
manifest_content_dirs << file.split('/')[1]
|
168
|
+
# Need this from manifest: /object_root/version/[contentDirectory]/file
|
169
|
+
end
|
170
|
+
manifest_content_dirs.uniq!
|
171
|
+
manifest_content_dirs.each do | mcd |
|
172
|
+
if "#{mcd}" != "#{@inventory.contentDirectory}"
|
173
|
+
# Don't need this; version_structure test will pick up on this directory's presence.
|
174
|
+
# @my_results.error('E111', 'verify_manifest', "content directory '#{mcd}' discovered in manifest DOES NOT match inventory contentDirectory value!")
|
175
|
+
# We should check that path on disk to see if there's content there.
|
176
|
+
@inventory.versions.keys.each do | version |
|
177
|
+
my_files = OcflTools::Utils::Files.get_dir_files("#{@ocfl_object_root}/#{version}/#{mcd}")
|
178
|
+
# And expand it to a full file path (returns an array, so flatten it out and append to files_on_disk.
|
179
|
+
OcflTools::Utils::Files.expand_filepaths(my_files, "#{@ocfl_object_root}/#{version}/#{mcd}").each do | file |
|
180
|
+
files_on_disk << file
|
181
|
+
end
|
182
|
+
end
|
183
|
+
files_on_disk.uniq! # Just to make sure there's no funky business.
|
184
|
+
end
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
# we only need the files (keys), not the digests here.
|
189
|
+
if files_on_disk == files_in_manifest
|
190
|
+
@my_results.ok('O200', 'verify_manifest', "All files in inventory were found on disk.")
|
191
|
+
@my_results.ok('O200', 'verify_manifest', "All discovered files on disk are referenced in inventory file.")
|
192
|
+
# Now call verify_versions_across_inventories; this will check to make sure all prior inventories match this one.
|
193
|
+
verify_versions_across_inventories
|
194
|
+
return @my_results
|
195
|
+
end
|
196
|
+
|
197
|
+
missing_from_disk = []
|
198
|
+
files_in_manifest.each do | file |
|
199
|
+
# Is there a match to a file in files_on_disk ?
|
200
|
+
unless files_on_disk.include? file # Unless this returns true, the file's not there.
|
201
|
+
missing_from_disk << file
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
missing_from_manifest = []
|
206
|
+
files_on_disk.each do | file |
|
207
|
+
unless files_in_manifest.include? file
|
208
|
+
missing_from_manifest << file
|
209
|
+
end
|
210
|
+
end
|
211
|
+
|
212
|
+
unless missing_from_manifest.empty?
|
213
|
+
missing_from_manifest.each do |missing|
|
214
|
+
@my_results.error('E111', 'verify_manifest', "Unexpected file #{missing} discovered on disk.")
|
215
|
+
end
|
216
|
+
if missing_from_disk.empty?
|
217
|
+
if files_in_manifest.size < 1
|
218
|
+
@my_results.warn('W111', 'verify_manifest', 'No files were found in manifest.')
|
219
|
+
else
|
220
|
+
@my_results.ok('O200', 'verify_manifest', "All files in inventory were found on disk.")
|
221
|
+
end
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
unless missing_from_disk.empty?
|
226
|
+
missing_from_disk.each do |missing|
|
227
|
+
@my_results.error('E111', 'verify_manifest', "Expected file #{missing} not found on disk.")
|
228
|
+
end
|
229
|
+
if missing_from_manifest.empty?
|
230
|
+
# This is NOT TRUE if there were no files_on_disk in the first place!
|
231
|
+
if files_on_disk.size < 1
|
232
|
+
@my_results.warn('W111', 'verify_manifest', "No files discovered on disk.")
|
233
|
+
else
|
234
|
+
@my_results.ok('O200', 'verify_manifest', "All discovered files on disk are referenced in inventory file.")
|
235
|
+
end
|
236
|
+
end
|
237
|
+
end
|
238
|
+
# Now call verify_versions_across_inventories; this will check to make sure all prior inventories match this one.
|
239
|
+
verify_versions_across_inventories
|
240
|
+
@my_results
|
241
|
+
end
|
242
|
+
|
243
|
+
# The default checksum test assumes you want to test all likely files on disk against
|
244
|
+
# whatever version of the inventory.json (hopefully the latest!) is in the root directory.
|
245
|
+
# Otherwise, if you give it a version 3 inventory, it'll check v1...v3 directories on disk
|
246
|
+
# against the inventory's manifest, but won't check >v4.
|
247
|
+
# {#verify_structure} will, however, let you know if your most recent inventory goes to v3,
|
248
|
+
# but there's a v4 directory in your object root.
|
249
|
+
# @param [Pathname] inventory_file fully-qualified path to a valid OCFL inventory.json.
|
250
|
+
# @return {OcflTools::OcflResults} of event results
|
251
|
+
def verify_checksums(inventory_file = "#{@ocfl_object_root}/inventory.json")
|
252
|
+
# validate inventory.json checksum against inventory.json.<sha256|sha512>
|
253
|
+
# validate files in manifest against physical copies on disk.
|
254
|
+
# cross_check digestss.
|
255
|
+
# Report out via @my_results.
|
256
|
+
# Inventory file does not exist; create a results object, record this epic fail, and return.
|
257
|
+
@my_results ||= OcflTools::OcflResults.new
|
258
|
+
unless File.exist?(inventory_file)
|
259
|
+
@my_results.error('E215', 'verify_checksums', "Expected inventory file #{inventory_file} not found.")
|
260
|
+
return @my_results
|
261
|
+
end
|
262
|
+
|
263
|
+
if load_inventory(inventory_file) == true
|
264
|
+
@inventory = OcflTools::OcflInventory.new.from_file(inventory_file)
|
265
|
+
else
|
266
|
+
@my_results.error('E210', 'verify_checksums', "Unable to process inventory file #{inventory_file}.")
|
267
|
+
return @my_results
|
268
|
+
end
|
269
|
+
|
270
|
+
# if @digest is set, use that as the digest for checksumming.
|
271
|
+
# ( but check inventory.fixity to make sure it's there first )
|
272
|
+
# Otherwise, use the value of inventory.digestAlgorithm
|
273
|
+
# TODO: files_on_disk here LIES if contentDirectory value is wrong!
|
274
|
+
# files_on_disk = OcflTools::Utils::Files.get_versions_dir_files(@ocfl_object_root, @inventory.version_id_list.min, @inventory.version_id_list.max)
|
275
|
+
# WORK OUT what contentDirectory is by inspecting all paths in inventory.manifest.
|
276
|
+
files_on_disk = []
|
277
|
+
working_files = OcflTools::Utils::Files.invert_and_expand(@inventory.manifest).keys
|
278
|
+
# gives me version/contentDir/<foo>
|
279
|
+
# ....and I know that '/' is the delimiter, and element 0 is version. I want element 1, the content_dir value.
|
280
|
+
manifest_content_dirs = []
|
281
|
+
working_files.each do | file |
|
282
|
+
manifest_content_dirs << file.split('/')[1]
|
283
|
+
end
|
284
|
+
manifest_content_dirs.uniq!
|
285
|
+
manifest_content_dirs.each do | mcd |
|
286
|
+
# We should check that path on disk to see if there's content there.
|
287
|
+
@inventory.versions.keys.each do | version |
|
288
|
+
my_files = OcflTools::Utils::Files.get_dir_files("#{@ocfl_object_root}/#{version}/#{mcd}")
|
289
|
+
# And expand it to a full file path (returns an array, so flatten it out and append to files_on_disk.
|
290
|
+
OcflTools::Utils::Files.expand_filepaths(my_files, "#{@ocfl_object_root}/#{version}/#{mcd}").each do | file |
|
291
|
+
files_on_disk << file
|
292
|
+
end
|
293
|
+
end
|
294
|
+
files_on_disk.uniq! # Just to make sure there's no funky business.
|
295
|
+
end
|
296
|
+
|
297
|
+
# Now generate checksums for the files we found on disk, and Hash them.
|
298
|
+
disk_checksums = OcflTools::Utils::Files.create_digests(files_on_disk, @inventory.digestAlgorithm)
|
299
|
+
# Get an equivalent hash by manipulating the inventory.manifest hash.
|
300
|
+
manifest_checksums = OcflTools::Utils::Files.invert_and_expand_and_prepend(@inventory.manifest, @ocfl_object_root)
|
301
|
+
# Returns OcflTools::OcflResults object; either new or the one passed in with new content.
|
302
|
+
OcflTools::Utils.compare_hash_checksums(disk_checksums: disk_checksums, inventory_checksums: manifest_checksums, results: @my_results)
|
303
|
+
end
|
304
|
+
|
305
|
+
# Do all the files and directories in the object_dir conform to spec?
|
306
|
+
# Are there inventory.json files in each version directory? (warn if not in version dirs)
|
307
|
+
# Deduce version dir naming convention by finding the v1 directory; apply that format to other dirs.
|
308
|
+
# @return {OcflTools::OcflResults} of event results
|
309
|
+
def verify_structure
|
310
|
+
error = nil
|
311
|
+
@my_results ||= OcflTools::OcflResults.new
|
312
|
+
# 1. use get_version_format to determine the format used for version directories.
|
313
|
+
# If we can't deduce it by inspection of the object_root, ERROR and try and process using site-wide defaults.
|
314
|
+
if get_version_format == false
|
315
|
+
error = true
|
316
|
+
end
|
317
|
+
|
318
|
+
object_root_dirs = []
|
319
|
+
object_root_files = []
|
320
|
+
|
321
|
+
Dir.chdir(@ocfl_object_root)
|
322
|
+
Dir.glob('*').select do |file|
|
323
|
+
object_root_dirs << file if File.directory? file
|
324
|
+
object_root_files << file if File.file? file
|
325
|
+
end
|
326
|
+
|
327
|
+
# 1b. What happens if some this directory is just completely empty?
|
328
|
+
if object_root_dirs.size == 0 && object_root_files.size == 0
|
329
|
+
@my_results.error('E100', 'verify_sructure', "Object root directory #{@ocfl_object_root} is empty.")
|
330
|
+
return @my_results
|
331
|
+
end
|
332
|
+
|
333
|
+
# 2. Check object root directory for required files.
|
334
|
+
# We have to check the top of inventory.json to get the appropriate digest algo.
|
335
|
+
# This is so we don't cause get_digestAlgorithm to throw up if inventory.json doesn't exist.
|
336
|
+
file_checks = ['inventory.json']
|
337
|
+
|
338
|
+
# 2a. What digest should the inventory.json sidecar be using? Ask inventory.json.
|
339
|
+
# 2b. What's the highest version we should find here?
|
340
|
+
# 2c. What should our contentDirectory value be?
|
341
|
+
if File.exist? "#{@ocfl_object_root}/inventory.json"
|
342
|
+
if load_inventory("#{@ocfl_object_root}/inventory.json") == true
|
343
|
+
json_digest = OcflTools::Utils::Inventory.get_digestAlgorithm("#{@ocfl_object_root}/inventory.json")
|
344
|
+
contentDirectory = OcflTools::Utils::Inventory.get_contentDirectory("#{@ocfl_object_root}/inventory.json")
|
345
|
+
expect_head = OcflTools::Utils::Inventory.get_value("#{@ocfl_object_root}/inventory.json", 'head')
|
346
|
+
file_checks << "inventory.json.#{json_digest}"
|
347
|
+
else
|
348
|
+
# We couldn't load up the inventory; use site defaults.
|
349
|
+
contentDirectory = OcflTools.config.content_directory
|
350
|
+
json_digest = OcflTools.config.digest_algorithm
|
351
|
+
file_checks << "inventory.json.#{json_digest}"
|
352
|
+
end
|
353
|
+
else
|
354
|
+
# If we can't get these values from a handy inventory.json, use the site defaults.
|
355
|
+
contentDirectory = OcflTools.config.content_directory
|
356
|
+
json_digest = OcflTools.config.digest_algorithm
|
357
|
+
file_checks << "inventory.json.#{json_digest}"
|
358
|
+
end
|
359
|
+
|
360
|
+
# Error if a required file is not found in the object root.
|
361
|
+
# This is now just the check for inventory.json and sidecar file.
|
362
|
+
file_checks.each do |file|
|
363
|
+
unless object_root_files.include? file
|
364
|
+
@my_results.error('E102', 'verify_structure', "Object root does not include required file #{file}")
|
365
|
+
error = true
|
366
|
+
end
|
367
|
+
object_root_files.delete(file)
|
368
|
+
end
|
369
|
+
|
370
|
+
# NamAsTe file checks:
|
371
|
+
# C1: There should be only 1 file in the root dir beginning with '0=ocfl_object_'
|
372
|
+
# C2: That file should match the expected value of OCFL_version (e.g. '0=ocfl_object_1.0')
|
373
|
+
# C3: The content of that file should match the filename, less the leading '0='
|
374
|
+
root_namaste_files = []
|
375
|
+
Dir.glob('0=ocfl_object_*').select do |file|
|
376
|
+
root_namaste_files << file if File.file? file
|
377
|
+
end
|
378
|
+
|
379
|
+
# C1: We need EXACTLY ONE of these files.
|
380
|
+
if root_namaste_files.size == 0
|
381
|
+
@my_results.error('E103', 'verify_structure', 'Object root does not include required NamAsTe file.')
|
382
|
+
error = true
|
383
|
+
end
|
384
|
+
|
385
|
+
if root_namaste_files.size > 1
|
386
|
+
@my_results.error('E104', 'verify_structure', "Object root contains multiple NamAsTe files: #{root_namaste_files}")
|
387
|
+
error = true
|
388
|
+
end
|
389
|
+
|
390
|
+
# C2 and C3 here.
|
391
|
+
# If we're dealing with 1 or more ocfl_object_files, process them for correctness.
|
392
|
+
unless root_namaste_files.size == 0 || root_namaste_files.size == nil
|
393
|
+
|
394
|
+
# What OCFL version are we looking for? Pull the default value if not otherwise set.
|
395
|
+
@ocfl_version ||= OcflTools.config.ocfl_version
|
396
|
+
|
397
|
+
root_namaste_files.each do | file |
|
398
|
+
|
399
|
+
# C2: Is this file the expected version?
|
400
|
+
if file != "0=ocfl_object_#{@ocfl_version}"
|
401
|
+
@my_results.error('E107', 'verify_structure', "Required NamAsTe file in object root is for unexpected OCFL version: #{file}")
|
402
|
+
error = true
|
403
|
+
end
|
404
|
+
|
405
|
+
# C3: does the file content match the file name?
|
406
|
+
# Cut the first 2 characters from the filename; what remains is the expected content.
|
407
|
+
expected_content = file.slice(2..file.size)
|
408
|
+
|
409
|
+
# We use &:gets here instead of &:readline so we don't throw an exception if the file doesn't have content.
|
410
|
+
first_line = File.open("#{@ocfl_object_root}/#{file}", &:gets)
|
411
|
+
|
412
|
+
# Handle 'the Namaste file is empty' case.
|
413
|
+
if first_line == nil
|
414
|
+
@my_results.error('E105', 'verify_structure', 'Required NamAsTe file in object root directory has no content!')
|
415
|
+
error = true
|
416
|
+
object_root_files.delete(file)
|
417
|
+
next
|
418
|
+
end
|
419
|
+
|
420
|
+
# it'll have a \n on the end. Remove it, then verify for correct content.
|
421
|
+
if first_line.chomp! != expected_content
|
422
|
+
@my_results.error('E106', 'verify_structure', 'Required NamAsTe file in object root directory does not contain expected string.')
|
423
|
+
error = true
|
424
|
+
end
|
425
|
+
object_root_files.delete(file)
|
426
|
+
end
|
427
|
+
end
|
428
|
+
|
429
|
+
# 3. Error if there are extraneous files in object root.
|
430
|
+
unless object_root_files.empty?
|
431
|
+
@my_results.error('E101', 'verify_structure', "Object root contains noncompliant files: #{object_root_files}")
|
432
|
+
error = true
|
433
|
+
end
|
434
|
+
|
435
|
+
# 4. Warn if the optional 'logs' directory is found in the object root.
|
436
|
+
if object_root_dirs.include? 'logs'
|
437
|
+
@my_results.warn('W111', 'verify_structure', 'OCFL 3.1 optional logs directory found in object root.')
|
438
|
+
object_root_dirs.delete('logs')
|
439
|
+
end
|
440
|
+
|
441
|
+
# 5. Warn if the optional 'extensions' directory is found in object root.
|
442
|
+
if object_root_dirs.include? 'extensions'
|
443
|
+
@my_results.warn('W111', 'verify_structure', 'OCFL 3.1 optional extensions directory found in object root.')
|
444
|
+
object_root_dirs.delete('extensions')
|
445
|
+
end
|
446
|
+
|
447
|
+
version_directories = OcflTools::Utils::Files.get_version_directories(@ocfl_object_root)
|
448
|
+
|
449
|
+
remaining_dirs = object_root_dirs - version_directories
|
450
|
+
|
451
|
+
# 6. Error if there are extraneous/unexpected directories in the object root.
|
452
|
+
unless remaining_dirs.empty?
|
453
|
+
@my_results.error('E100', 'verify_structure', "Object root contains noncompliant directories: #{remaining_dirs}")
|
454
|
+
error = true
|
455
|
+
end
|
456
|
+
|
457
|
+
# 7. Version directories must be a continuous sequence, starting at v1.
|
458
|
+
version_dir_count = version_directories.size
|
459
|
+
count = 0
|
460
|
+
|
461
|
+
until count == version_dir_count
|
462
|
+
count += 1
|
463
|
+
expected_directory = @version_format % count
|
464
|
+
# just check to see if it's in the array version_directories.
|
465
|
+
# We're not *SURE* that what we have is a continous sequence starting at 1;
|
466
|
+
# just that they're valid version dir names, sorted in ascending order, and they exist.
|
467
|
+
if version_directories.include? expected_directory
|
468
|
+
# Could verbose log this here.
|
469
|
+
# @my_results.ok('O200', 'verify_sructure', "Expected version directory #{expected_directory} found.")
|
470
|
+
else
|
471
|
+
@my_results.error('E013', 'verify_structure', "Expected version directory #{expected_directory} missing from directory list #{version_directories} ")
|
472
|
+
error = true
|
473
|
+
end
|
474
|
+
end
|
475
|
+
|
476
|
+
# 8. Error if the head version in the inventory does not match the highest version directory discovered in the object root.
|
477
|
+
unless expect_head.nil? # No point checking this is we've already failed the root inventory.json check.
|
478
|
+
if version_directories[-1] != expect_head
|
479
|
+
@my_results.error('E111', 'verify_structure', "Inventory file expects a highest version of #{expect_head} but directory list contains #{version_directories} ")
|
480
|
+
error = true
|
481
|
+
else
|
482
|
+
# could log an 'expected head version found' here.
|
483
|
+
end
|
484
|
+
end
|
485
|
+
|
486
|
+
# CHECK VERSION DIRECTORY CONTENTS
|
487
|
+
# This is setup for the next round of checks.
|
488
|
+
# For the version_directories we *do* have, are they cool?
|
489
|
+
version_directories.each do |ver|
|
490
|
+
version_dirs = []
|
491
|
+
version_files = []
|
492
|
+
|
493
|
+
Dir.chdir("#{@ocfl_object_root}/#{ver}")
|
494
|
+
Dir.glob('*').select do |file|
|
495
|
+
version_dirs << file if File.directory? file
|
496
|
+
version_files << file if File.file? file
|
497
|
+
end
|
498
|
+
|
499
|
+
# 9. Warn if inventory.json and sidecar are not present in version directory.
|
500
|
+
file_checks = []
|
501
|
+
if File.exist? "#{@ocfl_object_root}/#{ver}/inventory.json"
|
502
|
+
if load_inventory("#{@ocfl_object_root}/inventory.json") == true
|
503
|
+
json_digest = OcflTools::Utils::Inventory.get_digestAlgorithm("#{@ocfl_object_root}/#{ver}/inventory.json")
|
504
|
+
file_checks << 'inventory.json'
|
505
|
+
file_checks << "inventory.json.#{json_digest}"
|
506
|
+
versionContentDirectory = OcflTools::Utils::Inventory.get_contentDirectory("#{@ocfl_object_root}/#{ver}/inventory.json")
|
507
|
+
if versionContentDirectory != contentDirectory
|
508
|
+
@my_results.error('E111', 'verify_structure', "contentDirectory value #{versionContentDirectory} in version #{ver} does not match expected contentDirectory value #{contentDirectory}.")
|
509
|
+
error = true
|
510
|
+
end
|
511
|
+
else
|
512
|
+
json_digest = OcflTools.config.digest_algorithm
|
513
|
+
file_checks << 'inventory.json'
|
514
|
+
file_checks << "inventory.json.#{json_digest}"
|
515
|
+
end
|
516
|
+
|
517
|
+
else
|
518
|
+
file_checks << 'inventory.json' # We look for it, even though we know we won't find it, so we can log the omission.
|
519
|
+
file_checks << 'inventory.json.sha512' # We look for it, even though we know we won't find it, so we can log the omission.
|
520
|
+
end
|
521
|
+
|
522
|
+
file_checks.each do |file|
|
523
|
+
if version_files.include? file
|
524
|
+
# The inventory file in the highest version directory MUST match the inventory file in the object root.
|
525
|
+
case file
|
526
|
+
when 'inventory.json'
|
527
|
+
case ver
|
528
|
+
# expect_head is nil if there's no inventory.json in the object root, so this test won't run against nothing.
|
529
|
+
when expect_head
|
530
|
+
# expand_filepaths(@ocfl_object_root)
|
531
|
+
my_files = ["#{@ocfl_object_root}/inventory.json", "#{@ocfl_object_root}/#{ver}/inventory.json"]
|
532
|
+
# create_digests
|
533
|
+
my_digests = OcflTools::Utils::Files.create_digests(my_files, 'sha512')
|
534
|
+
# We know there must be only 2 values here, and they should be equal.
|
535
|
+
if my_digests.values[0] != my_digests.values[1]
|
536
|
+
@my_results.error('E111', 'verify_structure', "Inventory.json in root and highest version #{ver} MUST match.")
|
537
|
+
end
|
538
|
+
# Only 1 case block, no 'else'
|
539
|
+
end
|
540
|
+
# Only 1 case block, no 'else'
|
541
|
+
end
|
542
|
+
# Expected file exists; delete it from our check list and move on.
|
543
|
+
version_files.delete(file)
|
544
|
+
else
|
545
|
+
@my_results.warn('W111', 'verify_structure', "OCFL 3.1 optional #{file} missing from #{ver} directory")
|
546
|
+
version_files.delete(file)
|
547
|
+
end
|
548
|
+
end
|
549
|
+
|
550
|
+
# 10. Error if files other than inventory & sidecar found in version directory.
|
551
|
+
unless version_files.empty?
|
552
|
+
@my_results.error('E011', 'verify_structure', "non-compliant files #{version_files} in #{ver} directory")
|
553
|
+
error = true
|
554
|
+
end
|
555
|
+
|
556
|
+
###### CHECK THIS; I think this is superseded by the verify_manifest content_dir check.
|
557
|
+
# 11. WARN if a contentDirectory exists, but is empty.
|
558
|
+
if version_dirs.include? contentDirectory
|
559
|
+
version_dirs.delete(contentDirectory)
|
560
|
+
# if Dir.empty?(contentDirectory)
|
561
|
+
# @my_results.warn('W102', 'verify_structure', "OCFL 3.3.1 version #{ver} contentDirectory should not be empty.")
|
562
|
+
# end
|
563
|
+
# else
|
564
|
+
# # Informational message that contentDir does not exist. Not necssarily a problem!
|
565
|
+
# @my_results.info('I101', 'verify_structure', "OCFL 3.3.1 version #{ver} does not contain a contentDirectory.")
|
566
|
+
end
|
567
|
+
|
568
|
+
# 12. Warn if any directories other than the expected 'content' directory are found in the version directory.
|
569
|
+
# This is the "Moab Excepion" to allow for legacy Moab object migration - a 'manifests' directory would be here.
|
570
|
+
unless version_dirs.empty?
|
571
|
+
@my_results.warn('W101', 'version_structure', "OCFL 3.3 version directory should not contain any directories other than the designated content sub-directory. Additional directories found: #{version_dirs}")
|
572
|
+
error = true
|
573
|
+
end
|
574
|
+
end
|
575
|
+
|
576
|
+
# If we get here without errors (warnings are OK), we passed!
|
577
|
+
if error.nil?
|
578
|
+
@my_results.ok('O111', 'verify_structure', 'OCFL 3.1 Object root passed file structure test.')
|
579
|
+
end
|
580
|
+
@my_results
|
581
|
+
end
|
582
|
+
|
583
|
+
# We may also want to only verify a specific directory, not the entire object.
|
584
|
+
# For example, if we've just added a new version, we might want to just check those files
|
585
|
+
# and not the rest of the object (esp. if it has some very large version directories).
|
586
|
+
# @param [Integer] version directory to verify
|
587
|
+
# @return {OcflTools::OcflResults} of verify events
|
588
|
+
def verify_directory(version)
|
589
|
+
# start by getting version format and directories.
|
590
|
+
if @version_format.nil?
|
591
|
+
@version_format = OcflTools::Utils::Files.get_version_format(@ocfl_object_root)
|
592
|
+
end
|
593
|
+
|
594
|
+
# result = OcflTools.config.version_format % version.to_i
|
595
|
+
version_name = @version_format % version.to_i
|
596
|
+
# Make sure this directory actually exists.
|
597
|
+
unless File.directory?("#{@ocfl_object_root}/#{version_name}")
|
598
|
+
raise "Requested version directory doesn't exist!"
|
599
|
+
end
|
600
|
+
|
601
|
+
# OK, now we need an inventory.json to tell use what the contentDirectory should be.
|
602
|
+
if File.exist?("#{ocfl_object_root}/#{version_name}/inventory.json")
|
603
|
+
my_content_dir = OcflTools::Utils::Inventory.get_contentDirectory("#{ocfl_object_root}/#{version_name}/inventory.json")
|
604
|
+
@inventory = OcflTools::OcflInventory.new.from_file("#{ocfl_object_root}/#{version_name}/inventory.json")
|
605
|
+
else
|
606
|
+
my_content_dir = OcflTools::Utils::Inventory.get_contentDirectory("#{ocfl_object_root}/inventory.json")
|
607
|
+
@inventory = OcflTools::OcflInventory.new.from_file("#{ocfl_object_root}/inventory.json")
|
608
|
+
end
|
609
|
+
|
610
|
+
# Get a list of fully-resolvable files for this version directory from disk.
|
611
|
+
my_files_on_disk = OcflTools::Utils::Files.get_version_dir_files(@ocfl_object_root, version)
|
612
|
+
|
613
|
+
# Now process my_inventory.manifest
|
614
|
+
# Flip and invert it.
|
615
|
+
manifest_checksums = OcflTools::Utils::Files.invert_and_expand_and_prepend(@inventory.manifest, @ocfl_object_root)
|
616
|
+
|
617
|
+
# Now we need to trim manifest_checksums to the stuff that only matches
|
618
|
+
# ocfl_object_root/version_string/content_dir
|
619
|
+
filtered_checksums = {}
|
620
|
+
manifest_checksums.each do |file, digest|
|
621
|
+
if file =~ %r{^#{ocfl_object_root}/#{version_name}/#{my_content_dir}}
|
622
|
+
filtered_checksums[file] = digest
|
623
|
+
end
|
624
|
+
end
|
625
|
+
|
626
|
+
# Now generate checksums for the files we found on disk, and Hash them.
|
627
|
+
disk_checksums = OcflTools::Utils::Files.create_digests(my_files_on_disk, @inventory.digestAlgorithm)
|
628
|
+
|
629
|
+
# Finally! Pass them to checksum checker.
|
630
|
+
OcflTools::Utils.compare_hash_checksums(disk_checksums: disk_checksums, inventory_checksums: filtered_checksums, results: @my_results, context: "verify_directory #{version_name}")
|
631
|
+
end
|
632
|
+
|
633
|
+
# Different from verify_directory.
|
634
|
+
# Verify_version is *all* versions of the object, up to and including this one.
|
635
|
+
# Verify_directory is *just* check the files and checksums inside that particular version directory.
|
636
|
+
# Verify_version(@head) is the canonical way to check an entire object?
|
637
|
+
# @param [Integer] version of object to verify
|
638
|
+
# @return {OcflTools::OcflResults}
|
639
|
+
def verify_version(version)
|
640
|
+
# calls verify_directory for 1...n versions.
|
641
|
+
count = 1 # start at the bottom
|
642
|
+
until count > version # count to the top
|
643
|
+
verify_directory(count)
|
644
|
+
count += 1
|
645
|
+
end
|
646
|
+
@my_results
|
647
|
+
end
|
648
|
+
|
649
|
+
# Creates an {OcflInventory} for the given inventory.json,
|
650
|
+
# then creates an {OcflVerify} instance of it and verifies it.
|
651
|
+
# @param [Pathname] inventory_file fully-qualified path to a valid OCFL inventory.json.
|
652
|
+
# @return {OcflTools::OcflResults} event results
|
653
|
+
def verify_inventory(inventory_file = "#{@ocfl_object_root}/inventory.json")
|
654
|
+
# Load up the object with ocfl_inventory, push it through ocfl_verify.
|
655
|
+
@my_results ||= OcflTools::OcflResults.new
|
656
|
+
# Inventory file does not exist; create a results object, record this epic fail, and return.
|
657
|
+
if File.exist?(inventory_file)
|
658
|
+
if load_inventory("#{@ocfl_object_root}/inventory.json") == true
|
659
|
+
@inventory = OcflTools::OcflInventory.new.from_file(inventory_file)
|
660
|
+
@verify = OcflTools::OcflVerify.new(@inventory)
|
661
|
+
@verify.check_all # creates & returns @results object from OcflVerify
|
662
|
+
else
|
663
|
+
# The inventory had problems; we can't run verify.
|
664
|
+
@my_results.error('E210', 'verify_inventory', "Unable to process inventory file #{inventory_file}.")
|
665
|
+
return @my_results
|
666
|
+
end
|
667
|
+
else
|
668
|
+
@my_results.error('E215', 'verify_inventory', "Expected inventory file #{inventory_file} not found.")
|
669
|
+
return @my_results
|
670
|
+
end
|
671
|
+
end
|
672
|
+
|
673
|
+
private
|
674
|
+
# load up an inventory file and handle any errors.
|
675
|
+
# Returns true if the inventory file is syntatically correct; false if otherwise.
|
676
|
+
def load_inventory(inventory_file)
|
677
|
+
begin
|
678
|
+
@my_results ||= OcflTools::OcflResults.new
|
679
|
+
OcflTools::OcflInventory.new.from_file(inventory_file)
|
680
|
+
return true
|
681
|
+
rescue RuntimeError
|
682
|
+
@my_results.error('E210', 'load_inventory', "Unable to read Inventory file #{inventory_file}")
|
683
|
+
return false
|
684
|
+
rescue OcflTools::Errors::Error211
|
685
|
+
@my_results.error('E211', 'load_inventory', "#{inventory_file} is not valid JSON.")
|
686
|
+
return false
|
687
|
+
rescue OcflTools::Errors::Error216 => e
|
688
|
+
@my_results.error('E216', 'load_inventory', "#{e} in #{inventory_file}")
|
689
|
+
return false
|
690
|
+
rescue OcflTools::Errors::Error217 => e
|
691
|
+
@my_results.error('E217', 'load_inventory', "#{e} in #{inventory_file}")
|
692
|
+
return false
|
693
|
+
end
|
694
|
+
end
|
695
|
+
|
696
|
+
# Compares the state blocks for all versions across all inventories in the object,
|
697
|
+
# and errors if the state block for a given version differs between inventory files.
|
698
|
+
# NOTE: this is a private method that should only be called by #verify_manifest.
|
699
|
+
# That way, we know that @inventory is valid and set by the time we get here.
|
700
|
+
def verify_versions_across_inventories
|
701
|
+
# OCFL 3.7: In the case that prior version directories include an inventory file
|
702
|
+
# there will be multiple inventory files describing prior versions within the OCFL
|
703
|
+
# Object. Each version block in each prior inventory file must represent the same
|
704
|
+
# object state as the corresponding version block in the current inventory file.
|
705
|
+
# Additionally, the values of the created, message and user keys in each version
|
706
|
+
# block in each prior inventory file should have the same values as the
|
707
|
+
# corresponding keys in the corresponding version block in the current inventory file.
|
708
|
+
|
709
|
+
current_version = @inventory.version_id_list.max
|
710
|
+
# Nothing to do if there's only 1 version. Other checks will catch if the inventory
|
711
|
+
# in the v1 directory doesn't match the inventory in the object_root.
|
712
|
+
return if current_version == 1
|
713
|
+
|
714
|
+
prior_versions = @inventory.version_id_list.max - 1
|
715
|
+
|
716
|
+
until current_version == 1
|
717
|
+
compare_inventories(current_version)
|
718
|
+
current_version -= 1
|
719
|
+
end
|
720
|
+
end
|
721
|
+
|
722
|
+
# PRIVATE METHOD called by verify_versions_across_inventories
|
723
|
+
# Tries to load up an inventory file from the given version directory,
|
724
|
+
# and the inventory in the previous version directory.
|
725
|
+
def compare_inventories(version)
|
726
|
+
# @version_format is important here.
|
727
|
+
if get_version_format == false
|
728
|
+
@my_results.error('E111', 'compare_inventories', 'OCFL unable to determine version format by inspection of directories.')
|
729
|
+
return
|
730
|
+
end
|
731
|
+
|
732
|
+
current_version = @version_format % version.to_i
|
733
|
+
current_inventory = OcflTools::OcflInventory.new.from_file("#{@ocfl_object_root}/#{current_version}/inventory.json")
|
734
|
+
|
735
|
+
previous_version_int = version - 1
|
736
|
+
previous_version = @version_format % previous_version_int.to_i
|
737
|
+
|
738
|
+
|
739
|
+
if !File.exist? "#{@ocfl_object_root}/#{previous_version}/inventory.json"
|
740
|
+
@my_results.error('E111', 'compare_inventories', "OCFL unable to locate previous inventory file at #{@ocfl_object_root}/#{previous_version}/inventory.json.")
|
741
|
+
return
|
742
|
+
end
|
743
|
+
|
744
|
+
previous_inventory = OcflTools::OcflInventory.new.from_file("#{@ocfl_object_root}/#{previous_version}/inventory.json")
|
745
|
+
|
746
|
+
# Now we have two inventories, we can get their versions blocks.
|
747
|
+
#puts current_inventory.versions
|
748
|
+
compare_inventories_to_version(current_inventory, previous_inventory, previous_version_int)
|
749
|
+
end
|
750
|
+
|
751
|
+
# Given 2 inventories and a version, step down thru versions until you reach 1 and compare their states.
|
752
|
+
def compare_inventories_to_version(current_inventory, previous_inventory, version)
|
753
|
+
# increment thru versions, calling compare_inventories_version for each.
|
754
|
+
until version == 0
|
755
|
+
compare_inventories_version(current_inventory, previous_inventory, version)
|
756
|
+
version -= 1
|
757
|
+
end
|
758
|
+
end
|
759
|
+
|
760
|
+
# Get the version state from each inventory and compare.
|
761
|
+
def compare_inventories_version(current_inventory, previous_inventory, version)
|
762
|
+
current_version_string = @version_format % version.to_i
|
763
|
+
current_version_block = current_inventory.versions[current_version_string]
|
764
|
+
previous_version_block = previous_inventory.versions[current_version_string]
|
765
|
+
|
766
|
+
# message, user, created are WARN if different. state is ERROR if different.
|
767
|
+
if current_version_block['message'] != previous_version_block['message']
|
768
|
+
@my_results.warn('W270', 'compare_inventories_version', "OCFL 3.7 version message mismatch between inventory files: version #{version} message block in #{current_inventory.head}/inventory.json differs from previous inventory.json.")
|
769
|
+
end
|
770
|
+
|
771
|
+
if current_version_block['user'] != previous_version_block['user']
|
772
|
+
@my_results.warn('W272', 'compare_inventories_version', "OCFL 3.7 version user mismatch between inventory files: version #{version} user block in #{current_inventory.head}/inventory.json differs from previous inventory.json.")
|
773
|
+
end
|
774
|
+
|
775
|
+
if current_version_block['created'] != previous_version_block['created']
|
776
|
+
@my_results.warn('W271', 'compare_inventories_version', "OCFL 3.7 version created mismatch between inventory files: version #{version} created block in #{current_inventory.head}/inventory.json differs from previous inventory.json.")
|
777
|
+
end
|
778
|
+
|
779
|
+
if current_version_block['state'] != previous_version_block['state']
|
780
|
+
@my_results.error('E270', 'compare_inventories_version', "OCFL 3.7 version state mismatch between inventory files: version #{version} state block in #{current_inventory.head}/inventory.json differs from previous inventory.json.")
|
781
|
+
end
|
782
|
+
|
783
|
+
end
|
784
|
+
|
785
|
+
def get_version_format
|
786
|
+
begin
|
787
|
+
@version_format ||= OcflTools::Utils::Files.get_version_format(@ocfl_object_root)
|
788
|
+
@my_results.ok('O111', 'version_format', 'OCFL conforming first version directory found.')
|
789
|
+
return true
|
790
|
+
rescue StandardError
|
791
|
+
@my_results.error('E111', 'version_format', 'OCFL unable to determine version format by inspection of directories.')
|
792
|
+
@version_format = OcflTools.config.version_format
|
793
|
+
@my_results.warn('W111', 'version_format', "Attempting to process using default value: #{OcflTools.config.version_format}")
|
794
|
+
return false
|
795
|
+
end
|
796
|
+
end
|
797
|
+
|
798
|
+
end
|
799
|
+
end
|