ocfl-tools 0.9.14
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +53 -0
- data/.rubocop.yml +35 -0
- data/.rubocop_todo.yml +189 -0
- data/.travis.yml +12 -0
- data/Gemfile +5 -0
- data/README.md +647 -0
- data/Rakefile +13 -0
- data/VERSION +1 -0
- data/examples/list_files.rb +56 -0
- data/examples/validate_object.rb +23 -0
- data/lib/ocfl-tools.rb +19 -0
- data/lib/ocfl_tools.rb +17 -0
- data/lib/ocfl_tools/config.rb +27 -0
- data/lib/ocfl_tools/ocfl_actions.rb +146 -0
- data/lib/ocfl_tools/ocfl_delta.rb +250 -0
- data/lib/ocfl_tools/ocfl_deposit.rb +685 -0
- data/lib/ocfl_tools/ocfl_errors.rb +23 -0
- data/lib/ocfl_tools/ocfl_inventory.rb +95 -0
- data/lib/ocfl_tools/ocfl_object.rb +425 -0
- data/lib/ocfl_tools/ocfl_results.rb +272 -0
- data/lib/ocfl_tools/ocfl_validator.rb +799 -0
- data/lib/ocfl_tools/ocfl_verify.rb +493 -0
- data/lib/ocfl_tools/utils.rb +127 -0
- data/lib/ocfl_tools/utils_file.rb +195 -0
- data/lib/ocfl_tools/utils_inventory.rb +96 -0
- data/ocfl-tools.gemspec +31 -0
- data/results_codes.md +106 -0
- data/test-it.sh +11 -0
- metadata +191 -0
@@ -0,0 +1,685 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module OcflTools
|
4
|
+
# Class to take new content from a deposit directory and marshal it
|
5
|
+
# into a new version directory of a new or existing OCFL object dir.
|
6
|
+
# Expects deposit_dir to be:
|
7
|
+
#
|
8
|
+
# <ocfl deposit directoy>/
|
9
|
+
# |-- inventory.json (from object_directory root, if adding to existing version)
|
10
|
+
# |-- inventory.json.sha512 (matching sidecar from object_directory root)
|
11
|
+
# |-- head/
|
12
|
+
# |-- head.json
|
13
|
+
# | OR a combination of the following files:
|
14
|
+
# |-- add_files.json (all proposed file add actions)
|
15
|
+
# |-- update_files.json (all proposed file update actions)
|
16
|
+
# |-- copy_files.json (all proposed file copy actions)
|
17
|
+
# |-- delete_files.json (all proposed file delete actions)
|
18
|
+
# |-- move_files.json (all proposed file move actions)
|
19
|
+
# |-- version.json (optional version metadata)
|
20
|
+
# |-- fixity_files.json (optional fixity information)
|
21
|
+
# |-- <content_dir>/
|
22
|
+
# |-- <files to add or update>
|
23
|
+
#
|
24
|
+
class OcflDeposit < OcflTools::OcflInventory
|
25
|
+
|
26
|
+
# @return [String] the version of OCFL that this deposit object is targeting.
|
27
|
+
attr_accessor :ocfl_version
|
28
|
+
|
29
|
+
# @param [Pathname] deposit_directory fully-qualified path to a well-formed deposit directory.
|
30
|
+
# @param [Pathname] object_directory fully-qualified path to either an empty directory to create new OCFL object in, or the existing OCFL object to which the new version directory should be added.
|
31
|
+
# @return {OcflTools::OcflDeposit}
|
32
|
+
def initialize(deposit_directory:, object_directory:)
|
33
|
+
@deposit_dir = deposit_directory
|
34
|
+
@object_dir = object_directory
|
35
|
+
unless File.directory? deposit_directory
|
36
|
+
raise "#{@deposit_dir} is not a valid directory!"
|
37
|
+
end
|
38
|
+
unless File.directory? object_directory
|
39
|
+
raise "#{@object_dir} is not a valid directory!"
|
40
|
+
end
|
41
|
+
|
42
|
+
# Since we are overriding OcflObject's initialize block, we need to define these variables again.
|
43
|
+
@id = nil
|
44
|
+
@head = nil
|
45
|
+
@type = OcflTools.config.content_type
|
46
|
+
@digestAlgorithm = OcflTools.config.digest_algorithm # sha512 is recommended, Stanford uses sha256.
|
47
|
+
@contentDirectory = OcflTools.config.content_directory # default is 'content', Stanford uses 'data'
|
48
|
+
@manifest = {}
|
49
|
+
@versions = {} # A hash of Version hashes.
|
50
|
+
@fixity = {} # Optional. Same format as Manifest.
|
51
|
+
|
52
|
+
@ocfl_version = nil
|
53
|
+
|
54
|
+
@my_results = OcflTools::OcflResults.new
|
55
|
+
|
56
|
+
# san_check works out if the deposit_dir and object_dir represents a
|
57
|
+
# new object with a first version, or an update to an existing object.
|
58
|
+
# It then verifies and stages all files so that, if it doesn't raise an
|
59
|
+
# exception, the calling app can simply invoke #deposit_new_version to proceed.
|
60
|
+
san_check
|
61
|
+
end
|
62
|
+
|
63
|
+
# Returns a {OcflTools::OcflResults} object containing information about actions taken during the staging and creation of this new version.
|
64
|
+
# @return {OcflTools::OcflResults}
|
65
|
+
def results
|
66
|
+
@my_results
|
67
|
+
end
|
68
|
+
|
69
|
+
# Creates a new version of an OCFL object in the destination object directory.
|
70
|
+
# This method can only be called if the {OcflTools::OcflDeposit} object passed all
|
71
|
+
# necessary sanity checks, which occur when the object is initialized.
|
72
|
+
# @return {OcflTools::OcflDeposit} self
|
73
|
+
def deposit_new_version
|
74
|
+
# verify that our object_directory head is still what we expect.
|
75
|
+
# create the version and contentDirectory directories.
|
76
|
+
# move or copy content over from deposit_directory
|
77
|
+
# write the inventory.json & sidecar into version directory.
|
78
|
+
# do a directory verify on the new directory.
|
79
|
+
# write the new inventory.json to object root.
|
80
|
+
# Can only be called if there are no errors in @my_results; raise exception if otherwise?
|
81
|
+
set_head_version
|
82
|
+
|
83
|
+
# Am I put together correctly?
|
84
|
+
@my_results.add_results(OcflTools::OcflVerify.new(self).check_all)
|
85
|
+
# If @my_results.error_count > 0, abort!
|
86
|
+
if @my_results.error_count > 0
|
87
|
+
raise "Errors detected in OCFL object verification. Cannot process deposit: #{@my_results.get_errors}"
|
88
|
+
end
|
89
|
+
|
90
|
+
if OcflTools::Utils.version_string_to_int(@head) == 1 && !Dir.empty?(@object_dir)
|
91
|
+
raise "#{@object_dir} is not empty! Unable to create new object."
|
92
|
+
end
|
93
|
+
|
94
|
+
process_new_version
|
95
|
+
self
|
96
|
+
end
|
97
|
+
|
98
|
+
private
|
99
|
+
|
100
|
+
def san_check
|
101
|
+
# If deposit directory contains inventory.json:
|
102
|
+
# - it's an update to an existing object. Do existing_object_san_check.
|
103
|
+
# If deposit directory !contain inventory.json:
|
104
|
+
# - it's a new object. Do a new_object_san_check.
|
105
|
+
|
106
|
+
if File.file? "#{@deposit_dir}/inventory.json"
|
107
|
+
@my_results.info('I111', 'san_check', "Existing inventory found at #{@deposit_dir}/inventory.json")
|
108
|
+
existing_object_san_check
|
109
|
+
else
|
110
|
+
@my_results.info('I111', 'san_check', "No inventory.json found in #{@deposit_dir}; assuming new object workflow.")
|
111
|
+
new_object_san_check
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
def new_object_san_check
|
116
|
+
# 1. Object directory must be empty.
|
117
|
+
if Dir.empty?(@object_dir)
|
118
|
+
@my_results.info('I111', 'new_object_san_check', "target dir #{@object_dir} is empty.")
|
119
|
+
else
|
120
|
+
@my_results.error('E111', 'new_object_san_check', "target dir #{@object_dir} is NOT empty!")
|
121
|
+
end
|
122
|
+
|
123
|
+
# 2. Deposit directory must contain 'head' directory.
|
124
|
+
if File.directory?("#{@deposit_dir}/head")
|
125
|
+
@my_results.info('I111', 'new_object_san_check', "Deposit dir #{@deposit_dir} contains a 'head' directory.")
|
126
|
+
else
|
127
|
+
@my_results.error('E111', 'new_object_san_check', "Deposit dir #{@deposit_dir} does NOT contain required 'head' directory.")
|
128
|
+
end
|
129
|
+
|
130
|
+
# 3. Deposit directory must contain ONE id namaste file. (4='id')
|
131
|
+
deposit_root_files = []
|
132
|
+
deposit_root_directories = []
|
133
|
+
Dir.chdir(@deposit_dir)
|
134
|
+
Dir.glob('*').select do |file|
|
135
|
+
deposit_root_files << file if File.file? file
|
136
|
+
deposit_root_directories << file if File.directory? file
|
137
|
+
end
|
138
|
+
|
139
|
+
namaste_file = nil
|
140
|
+
deposit_root_files.each do |file|
|
141
|
+
next unless file =~ /^4=/ # Looks like the start of a Namaste file.
|
142
|
+
|
143
|
+
deposit_root_files.delete(file)
|
144
|
+
if namaste_file.nil?
|
145
|
+
namaste_file = file
|
146
|
+
@my_results.info('I111', 'new_object_san_check', "Matching Namaste file #{file} found in #{@deposit_dir}.")
|
147
|
+
else
|
148
|
+
@my_results.error('E111', 'new_object_san_check', "More than one matching Namaste file found in #{@deposit_dir}!")
|
149
|
+
raise "More than one matching Namaste file found in #{@deposit_dir}! #{namaste_file} & #{file}"
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
# 3b. Verify namaste file is valid.
|
154
|
+
object_id = namaste_file.split('=')[1]
|
155
|
+
raise 'Object ID cannot be zero length!' if object_id.empty?
|
156
|
+
|
157
|
+
File.readlines("#{@deposit_dir}/#{namaste_file}").each do |line|
|
158
|
+
line.chomp!
|
159
|
+
if object_id != line
|
160
|
+
@my_results.error('E111', 'new_object_san_check', "Contents of Namaste ID file do not match filename! #{object_id} vs #{line}.")
|
161
|
+
raise "Contents of Namaste ID file do not match filename! #{object_id} vs #{line}."
|
162
|
+
end
|
163
|
+
end
|
164
|
+
# Really there should only be 1 line in namaste_file but so long as they all match, we're good.
|
165
|
+
@namaste = object_id
|
166
|
+
|
167
|
+
# 4. Deposit directory must NOT contain any other files.
|
168
|
+
unless deposit_root_files.empty?
|
169
|
+
@my_results.error('E111', 'new_object_san_check', "Deposit directory contains extraneous files: #{deposit_root_files}")
|
170
|
+
raise "Deposit directory contains extraneous files: #{deposit_root_files}."
|
171
|
+
end
|
172
|
+
|
173
|
+
# 4b. Deposit directory MUST contain a 'head' directory.
|
174
|
+
if deposit_root_directories.include? 'head'
|
175
|
+
@my_results.info('I111', 'new_object_san_check', "#{@deposit_dir} contains expected 'head' directory.")
|
176
|
+
deposit_root_directories.delete('head')
|
177
|
+
else
|
178
|
+
@my_results.error('E111', 'new_object_san_check', "#{@deposit_dir} must contain 'head' directory!")
|
179
|
+
raise "Deposit directory must contain a 'head' directory."
|
180
|
+
end
|
181
|
+
|
182
|
+
# 4c. Deposit directory MUST NOT contain any other directories.
|
183
|
+
unless deposit_root_directories.empty?
|
184
|
+
@my_results.error('E111', 'new_object_san_check', "#{@deposit_dir} contains extraneous directories: #{deposit_root_directories}")
|
185
|
+
raise "#{deposit_dir} contains extraneous directories: #{deposit_root_directories}"
|
186
|
+
end
|
187
|
+
|
188
|
+
# Intermission: prepare deposit/head for inspection
|
189
|
+
deposit_head_files = []
|
190
|
+
deposit_head_directories = []
|
191
|
+
Dir.chdir("#{@deposit_dir}/head")
|
192
|
+
Dir.glob('*').select do |file|
|
193
|
+
deposit_head_files << file if File.file? file
|
194
|
+
deposit_head_directories << file if File.directory? file
|
195
|
+
end
|
196
|
+
|
197
|
+
# 5. 'head' directory must contain a 'content' directory that matches sitewide setting.
|
198
|
+
if deposit_head_directories.include? OcflTools.config.content_directory
|
199
|
+
@my_results.info('I111', 'new_object_san_check', "#{@deposit_dir}/head contains expected #{OcflTools.config.content_directory} directory.")
|
200
|
+
deposit_head_directories.delete(OcflTools.config.content_directory)
|
201
|
+
else
|
202
|
+
@my_results.error('E111', 'new_object_san_check', "#{@deposit_dir}/head does NOT contain expected #{OcflTools.config.content_directory} directory.")
|
203
|
+
raise "#{@deposit_dir}/head does NOT contain expected #{OcflTools.config.content_directory} directory."
|
204
|
+
end
|
205
|
+
|
206
|
+
# 5b. 'head' directory MUST NOT contain any other directories.
|
207
|
+
unless deposit_head_directories.empty?
|
208
|
+
@my_results.error('E111', 'new_object_san_check', "#{@deposit_dir}/head contains extraneous directories: #{deposit_head_directories}")
|
209
|
+
raise "#{deposit_dir}/head contains extraneous directories: #{deposit_head_directories}"
|
210
|
+
end
|
211
|
+
|
212
|
+
# 6. 'head' directory MUST contain either 'head.json' or 'add_files.json'
|
213
|
+
found_me = nil
|
214
|
+
require_one = ['head.json', 'add_files.json']
|
215
|
+
require_one.each do |file|
|
216
|
+
if deposit_head_files.include? file
|
217
|
+
@my_results.info('I111', 'new_object_san_check', "#{@deposit_dir}/head contains required file #{file}")
|
218
|
+
deposit_head_files.delete(file)
|
219
|
+
found_me = true
|
220
|
+
end
|
221
|
+
end
|
222
|
+
|
223
|
+
unless found_me
|
224
|
+
@my_results.error('E111', 'new_object_san_check', "#{@deposit_dir}/head requires either head.json or add_files.json, but not found.")
|
225
|
+
raise "#{@deposit_dir}/head requires either head.json or add_files.json, but not found."
|
226
|
+
end
|
227
|
+
|
228
|
+
# 7. 'head' directory MAY contain one or more of these action files.
|
229
|
+
action_files = ['head.json', 'add_files.json', 'update_files.json', 'version.json', 'update_manifest.json', 'delete_files.json', 'move_files.json', 'fixity_files.json']
|
230
|
+
action_files.each do |file|
|
231
|
+
if deposit_head_files.include? file
|
232
|
+
@my_results.info('I111', 'new_object_san_check', "#{@deposit_dir}/head contains optional #{file}")
|
233
|
+
deposit_head_files.delete(file)
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
237
|
+
# 8. 'head' directory MUST NOT contain any other files.
|
238
|
+
unless deposit_head_files.empty?
|
239
|
+
@my_results.error('E111', 'new_object_san_check', "#{@deposit_dir}/head contains extraneous files: #{deposit_head_files}")
|
240
|
+
raise "#{@deposit_dir}/head contains extraneous files: #{deposit_head_files}"
|
241
|
+
end
|
242
|
+
# Only call this if we got here without errors.
|
243
|
+
stage_new_object
|
244
|
+
end
|
245
|
+
|
246
|
+
def existing_object_san_check
|
247
|
+
deposit_root_files = []
|
248
|
+
deposit_root_directories = []
|
249
|
+
Dir.chdir(@deposit_dir)
|
250
|
+
Dir.glob('*').select do |file|
|
251
|
+
deposit_root_files << file if File.file? file
|
252
|
+
deposit_root_directories << file if File.directory? file
|
253
|
+
end
|
254
|
+
|
255
|
+
# 1. Deposit directory MUST contain an inventory.json
|
256
|
+
if deposit_root_files.include? 'inventory.json'
|
257
|
+
@my_results.info('I111', 'existing_object_san_check', "#{@deposit_dir}/inventory.json found")
|
258
|
+
deposit_root_files.delete('inventory.json')
|
259
|
+
else
|
260
|
+
@my_results.error('E111', 'existing_object_san_check', "#{@deposit_dir}/inventory.json required, but not found.")
|
261
|
+
raise "#{@deposit_dir}/inventory.json required, but not found."
|
262
|
+
end
|
263
|
+
|
264
|
+
# 2. Deposit directory MUST contain a matching inventory.json sidecar file.
|
265
|
+
inventory_digest = OcflTools::Utils::Inventory.get_digestAlgorithm("#{@deposit_dir}/inventory.json")
|
266
|
+
|
267
|
+
if deposit_root_files.include? "inventory.json.#{inventory_digest}"
|
268
|
+
@my_results.info('I111', 'existing_object_san_check', "#{@deposit_dir}/inventory.json.#{inventory_digest} found")
|
269
|
+
deposit_root_files.delete("inventory.json.#{inventory_digest}")
|
270
|
+
else
|
271
|
+
@my_results.error('E111', 'existing_object_san_check', "#{@deposit_dir}/inventory.json.#{inventory_digest} required, but not found")
|
272
|
+
raise "#{@deposit_dir}/inventory.json.#{inventory_digest} required, but not found."
|
273
|
+
end
|
274
|
+
|
275
|
+
# 3. inventory.json MUST validate against sidecar digest value.
|
276
|
+
generated_digest = OcflTools::Utils.generate_file_digest("#{@deposit_dir}/inventory.json", inventory_digest)
|
277
|
+
sidecar_digest = File.open("#{@deposit_dir}/inventory.json.#{inventory_digest}", &:readline).split(' ')[0]
|
278
|
+
|
279
|
+
if generated_digest == sidecar_digest
|
280
|
+
@my_results.info('I111', 'existing_object_san_check', "#{@deposit_dir}/inventory.json checksum matches generated value.")
|
281
|
+
else
|
282
|
+
@my_results.error('E111', 'existing_object_san_check', "#{@deposit_dir}/inventory.json checksum does not match generated value.")
|
283
|
+
raise "#{@deposit_dir}/inventory.json checksum does not match generated value."
|
284
|
+
end
|
285
|
+
|
286
|
+
# 4. inventory.json MUST be a valid OCFL inventory (passes OcflVerify; copy results into our results instance).
|
287
|
+
deposit_inventory = OcflTools::OcflInventory.new.from_file("#{@deposit_dir}/inventory.json")
|
288
|
+
|
289
|
+
@my_results.add_results(OcflTools::OcflVerify.new(deposit_inventory).check_all)
|
290
|
+
|
291
|
+
unless @my_results.error_count == 0
|
292
|
+
raise 'Errors detected in deposit inventory verification!'
|
293
|
+
end
|
294
|
+
|
295
|
+
# 5. Deposit directory MUST NOT contain any other files.
|
296
|
+
unless deposit_root_files.empty?
|
297
|
+
@my_results.error('E111', 'existing_object_san_check', "Deposit directory contains extraneous files: #{deposit_root_files}")
|
298
|
+
raise "Deposit directory contains extraneous files: #{deposit_root_files}."
|
299
|
+
end
|
300
|
+
|
301
|
+
# 6. Deposit directory MUST contain a 'head' directory.
|
302
|
+
if deposit_root_directories.include? 'head'
|
303
|
+
@my_results.info('I111', 'existing_object_san_check', "#{@deposit_dir} contains expected 'head' directory.")
|
304
|
+
deposit_root_directories.delete('head')
|
305
|
+
else
|
306
|
+
@my_results.error('E111', 'existing_object_san_check', "#{@deposit_dir} must contain 'head' directory!")
|
307
|
+
raise "Deposit directory must contain a 'head' directory."
|
308
|
+
end
|
309
|
+
|
310
|
+
# 7. Deposit directory MUST NOT contain any other directories.
|
311
|
+
unless deposit_root_directories.empty?
|
312
|
+
@my_results.error('E111', 'existing_object_san_check', "#{@deposit_dir} contains extraneous directories: #{deposit_root_directories}")
|
313
|
+
raise "#{deposit_dir} contains extraneous directories: #{deposit_root_directories}"
|
314
|
+
end
|
315
|
+
|
316
|
+
# Intermission: into the head directory!
|
317
|
+
|
318
|
+
deposit_head_files = []
|
319
|
+
deposit_head_directories = []
|
320
|
+
Dir.chdir("#{@deposit_dir}/head")
|
321
|
+
Dir.glob('*').select do |file|
|
322
|
+
deposit_head_files << file if File.file? file
|
323
|
+
deposit_head_directories << file if File.directory? file
|
324
|
+
end
|
325
|
+
|
326
|
+
# 8. 'head' directory must contain a 'content' directory that
|
327
|
+
# matches value in inventory.json or OCFL default if not otherwise set.
|
328
|
+
content_directory = OcflTools::Utils::Inventory.get_contentDirectory("#{@deposit_dir}/inventory.json")
|
329
|
+
|
330
|
+
if deposit_head_directories.include? content_directory
|
331
|
+
@my_results.info('I111', 'existing_object_san_check', "#{@deposit_dir}/head contains expected #{content_directory} directory.")
|
332
|
+
deposit_head_directories.delete(content_directory)
|
333
|
+
else
|
334
|
+
@my_results.error('E111', 'existing_object_san_check', "#{@deposit_dir}/head does NOT contain expected #{content_directory} directory.")
|
335
|
+
raise "#{@deposit_dir}/head does NOT contain expected #{content_directory} directory."
|
336
|
+
end
|
337
|
+
|
338
|
+
# 9. 'head' MUST contain at least one of the 'actions' json files (inc. fixity).
|
339
|
+
# Any one of these is needed.
|
340
|
+
action_files = ['add_files.json', 'head.json', 'update_manifest.json', 'update_files.json', 'delete_files.json', 'move_files.json', 'fixity_files.json']
|
341
|
+
action_found = nil
|
342
|
+
|
343
|
+
deposit_head_files.each do |file|
|
344
|
+
if action_files.include? file # We found an action file!
|
345
|
+
deposit_head_files.delete(file)
|
346
|
+
action_found = true
|
347
|
+
end
|
348
|
+
end
|
349
|
+
|
350
|
+
if action_found == true
|
351
|
+
@my_results.info('I111', 'existing_object_san_check', "#{@deposit_dir}/head contains at least 1 action file.")
|
352
|
+
else
|
353
|
+
@my_results.error('E111', 'existing_object_san_check', "Unable to find any action files in #{@deposit_dir}/head")
|
354
|
+
raise "Unable to find any action files in #{@deposit_dir}/head"
|
355
|
+
end
|
356
|
+
|
357
|
+
# 9b. 'head' directory MAY contain a 'version.json' file.
|
358
|
+
if deposit_head_files.include? 'version.json'
|
359
|
+
@my_results.info('I111', 'new_object_san_check', "#{@deposit_dir}/head contains optional version.json")
|
360
|
+
deposit_head_files.delete('version.json')
|
361
|
+
else
|
362
|
+
@my_results.info('I111', 'new_object_san_check', "#{@deposit_dir}/head does not contain optional version.json")
|
363
|
+
end
|
364
|
+
|
365
|
+
# 10. Object root MUST contain an inventory.json
|
366
|
+
if File.exist? "#{@object_dir}/inventory.json"
|
367
|
+
@my_results.info('I111', 'existing_object_san_check', "#{@object_dir}/inventory.json exists.")
|
368
|
+
else
|
369
|
+
@my_results.error('E111', 'existing_object_san_check', "#{@object_dir}/inventory.json does not exist.")
|
370
|
+
raise "#{@object_dir}/inventory.json does not exist."
|
371
|
+
end
|
372
|
+
|
373
|
+
# 11. Object directory OCFL MUST match Deposit directory OCFL object (sidecar check)
|
374
|
+
# - doing a digest check is the fastest way to ensure it's the same inventory file & contents.
|
375
|
+
object_root_digest = OcflTools::Utils.generate_file_digest("#{@object_dir}/inventory.json", inventory_digest)
|
376
|
+
|
377
|
+
if object_root_digest == generated_digest
|
378
|
+
@my_results.info('I111', 'existing_object_san_check', "#{@object_dir}/inventory.json matches #{@deposit_dir}/inventory.json")
|
379
|
+
else
|
380
|
+
@my_results.error('E111', 'existing_object_san_check', "#{@object_dir}/inventory.json does not match #{@deposit_dir}/inventory.json")
|
381
|
+
raise "#{@object_dir}/inventory.json does not match #{@deposit_dir}/inventory.json"
|
382
|
+
end
|
383
|
+
|
384
|
+
# 12. Object directory OCFL must pass a structure test (don't do checksum verification)
|
385
|
+
destination_ocfl = OcflTools::OcflValidator.new(@object_dir)
|
386
|
+
@my_results.add_results(destination_ocfl.verify_structure)
|
387
|
+
unless @my_results.error_count == 0
|
388
|
+
raise 'Errors detected in destination object structure!'
|
389
|
+
end
|
390
|
+
|
391
|
+
# Only call this if we got here without errors.
|
392
|
+
stage_existing_object
|
393
|
+
end
|
394
|
+
|
395
|
+
def stage_new_object
|
396
|
+
# read id.namaste file, set @id.
|
397
|
+
# set new version
|
398
|
+
# process action files
|
399
|
+
self.id = @namaste
|
400
|
+
@new_version = 1
|
401
|
+
get_version(@new_version) # It's a new OCFL object; we start at version 1.
|
402
|
+
process_action_files
|
403
|
+
end
|
404
|
+
|
405
|
+
def process_update_manifest(update_manifest_block)
|
406
|
+
# Process update_manifest, if present.
|
407
|
+
update_manifest_block.each do |digest, filepaths|
|
408
|
+
filepaths.each do |file|
|
409
|
+
# Make sure it actually exists!
|
410
|
+
unless File.exist? "#{@deposit_dir}/head/#{@contentDirectory}/#{file}"
|
411
|
+
@my_results.error('E111', 'process_action_files', "File #{file} referenced in update_manifest.json not found in #{@deposit_dir}/head/#{@contentDirectory}")
|
412
|
+
raise "File #{file} referenced in update_manifest.json not found in #{@deposit_dir}/head/#{@contentDirectory}"
|
413
|
+
end
|
414
|
+
# Here's where we'd compute checksum.
|
415
|
+
if OcflTools::Utils.generate_file_digest("#{@deposit_dir}/head/#{@contentDirectory}/#{file}", @digestAlgorithm) == digest
|
416
|
+
update_manifest(file, digest, @new_version)
|
417
|
+
@my_results.info('I111', 'process_action_files', "#{@deposit_dir}/head/#{@contentDirectory}/#{file} added to manifest inventory.")
|
418
|
+
else
|
419
|
+
@my_results.error('E111', 'process_action_files', "#{@deposit_dir}/head/#{@contentDirectory}/#{file} computed checksum does not match provided digest.")
|
420
|
+
raise "#{@deposit_dir}/head/#{@contentDirectory}/#{file} computed checksum does not match provided digest."
|
421
|
+
end
|
422
|
+
end
|
423
|
+
end
|
424
|
+
end
|
425
|
+
|
426
|
+
def process_add_files(add_files_block)
|
427
|
+
add_files_block.each do |digest, filepaths|
|
428
|
+
filepaths.each do |file|
|
429
|
+
unless manifest.key?(digest)
|
430
|
+
# This digest does NOT exist in the manifest; check disk for ingest (because add_file's going to add it to manifest later).
|
431
|
+
# It better be on disk, buck-o.
|
432
|
+
unless File.exist? "#{@deposit_dir}/head/#{@contentDirectory}/#{file}"
|
433
|
+
@my_results.error('E111', 'process_action_files', "File #{file} referenced in add_files block not found in #{@deposit_dir}/head/#{@contentDirectory}")
|
434
|
+
raise "File #{file} referenced in add_files block not found in #{@deposit_dir}/head/#{@contentDirectory}"
|
435
|
+
end
|
436
|
+
|
437
|
+
if !OcflTools::Utils.generate_file_digest("#{@deposit_dir}/head/#{@contentDirectory}/#{file}", @digestAlgorithm) == digest
|
438
|
+
# checksum failed, raise error.
|
439
|
+
raise "#{@deposit_dir}/head/#{@contentDirectory}/#{file} computed checksum does not match provided digest in add_files block."
|
440
|
+
end
|
441
|
+
end
|
442
|
+
# If we get to here, we're OK to add_file.
|
443
|
+
add_file(file, digest, @new_version)
|
444
|
+
@my_results.info('I111', 'process_action_files', "#{@deposit_dir}/head/#{@contentDirectory}/#{file} added to inventory.")
|
445
|
+
end
|
446
|
+
end
|
447
|
+
end
|
448
|
+
|
449
|
+
def process_update_files(update_files_block)
|
450
|
+
update_files_block.each do |digest, filepaths|
|
451
|
+
filepaths.each do |file|
|
452
|
+
# Make sure it actually exists!
|
453
|
+
unless File.exist? "#{@deposit_dir}/head/#{@contentDirectory}/#{file}"
|
454
|
+
@my_results.error('E111', 'process_action_files', "File #{file} referenced in update_files.json not found in #{@deposit_dir}/head/#{@contentDirectory}")
|
455
|
+
raise "File #{file} referenced in update_files.json not found in #{@deposit_dir}/head/#{@contentDirectory}"
|
456
|
+
end
|
457
|
+
# Here's where we'd compute checksum.
|
458
|
+
if OcflTools::Utils.generate_file_digest("#{@deposit_dir}/head/#{@contentDirectory}/#{file}", @digestAlgorithm) == digest
|
459
|
+
update_file(file, digest, @new_version)
|
460
|
+
@my_results.info('I111', 'process_action_files', "#{@deposit_dir}/head/#{@contentDirectory}/#{file} added to inventory.")
|
461
|
+
else
|
462
|
+
@my_results.error('E111', 'process_action_files', "#{@deposit_dir}/head/#{@contentDirectory}/#{file} computed checksum does not match provided digest.")
|
463
|
+
raise "#{@deposit_dir}/head/#{@contentDirectory}/#{file} computed checksum does not match provided digest."
|
464
|
+
end
|
465
|
+
end
|
466
|
+
end
|
467
|
+
end
|
468
|
+
|
469
|
+
def process_move_files(move_files_block)
|
470
|
+
move_files_block.each do |digest, filepaths|
|
471
|
+
my_state = get_state(@new_version)
|
472
|
+
unless my_state.key?(digest)
|
473
|
+
@my_results.error('E111', 'process_action_files', "Unable to find digest #{digest} in state whilst processing a move request.")
|
474
|
+
raise "Unable to find digest #{digest} in state whilst processing a move request."
|
475
|
+
end
|
476
|
+
previous_files = my_state[digest]
|
477
|
+
# Disambiguation; we can only process a move if there is only 1 file here.
|
478
|
+
if previous_files.size != 1
|
479
|
+
@my_results.error('E111', 'process_action_files', "Disambiguation protection: unable to process move for digest #{digest}: more than 1 file uses this digest in prior version.")
|
480
|
+
raise "Disambiguation protection: unable to process move for digest #{digest}: more than 1 file uses this digest in this version."
|
481
|
+
end
|
482
|
+
unless filepaths.include?(previous_files[0])
|
483
|
+
@my_results.error('E111', 'process_action_files', "Unable to find source file #{previous_files[0]} digest #{digest} in state whilst processing a move request.")
|
484
|
+
raise "Unable to find source file #{previous_files[0]} digest #{digest} in state whilst processing a move request."
|
485
|
+
end
|
486
|
+
source_file = previous_files[0]
|
487
|
+
destination_file = filepaths[1]
|
488
|
+
move_file(source_file, destination_file, @new_version)
|
489
|
+
end
|
490
|
+
end
|
491
|
+
|
492
|
+
def process_copy_files(copy_files_block)
|
493
|
+
my_state = get_state(@new_version)
|
494
|
+
copy_files_block.each do |digest, filepaths|
|
495
|
+
unless my_state.key?(digest)
|
496
|
+
@my_results.error('E111', 'process_action_files', "Unable to find digest #{digest} in state whilst processing a copy request.")
|
497
|
+
raise "Unable to find digest #{digest} in state whilst processing a copy request."
|
498
|
+
end
|
499
|
+
|
500
|
+
previous_files = my_state[digest]
|
501
|
+
|
502
|
+
filepaths.each do |destination_file|
|
503
|
+
copy_file(previous_files[0], destination_file, @new_version)
|
504
|
+
end
|
505
|
+
end
|
506
|
+
end
|
507
|
+
|
508
|
+
def process_delete_files(delete_files_block)
|
509
|
+
delete_files_block.each do |_digest, filepaths|
|
510
|
+
filepaths.each do |filepath|
|
511
|
+
delete_file(filepath, @new_version)
|
512
|
+
end
|
513
|
+
end
|
514
|
+
end
|
515
|
+
|
516
|
+
def process_version(version_block)
|
517
|
+
# Version block MUST contain keys 'created', 'message', 'user'
|
518
|
+
%w[created message user].each do |req_key|
|
519
|
+
unless version_block.key?(req_key)
|
520
|
+
@my_results.error('E111', 'process_action_files', "#{@deposit_dir}/head/version.json does not contain expected key #{req_key}")
|
521
|
+
raise "#{@deposit_dir}/head/version.json does not contain expected key #{req_key}"
|
522
|
+
end
|
523
|
+
end
|
524
|
+
# user block MUST contain 'name', 'address'
|
525
|
+
%w[name address].each do |req_key|
|
526
|
+
unless version_block['user'].key?(req_key)
|
527
|
+
@my_results.error('E111', 'process_action_files', "#{@deposit_dir}/head/version.json does not contain expected key #{req_key}")
|
528
|
+
raise "#{@deposit_dir}/head/version.json does not contain expected key #{req_key}"
|
529
|
+
end
|
530
|
+
end
|
531
|
+
# Now process!
|
532
|
+
set_version_user(@new_version, version_block['user'])
|
533
|
+
set_version_message(@new_version, version_block['message'])
|
534
|
+
set_version_created(@new_version, version_block['created'])
|
535
|
+
end
|
536
|
+
|
537
|
+
def process_fixity(fixity_block)
|
538
|
+
fixity_block.each do |algorithm, checksums|
|
539
|
+
# check if algorithm is in list of acceptable fixity algos for this site.
|
540
|
+
unless OcflTools.config.fixity_algorithms.include? algorithm
|
541
|
+
@my_results.error('E111', 'process_action_files', "#{@deposit_dir}/head/fixity_files.json contains unsupported algorithm #{algorithm}")
|
542
|
+
raise "#{@deposit_dir}/head/fixity_files.json contains unsupported algorithm #{algorithm}"
|
543
|
+
end
|
544
|
+
# Algo is permitted in the fixity block; add it.
|
545
|
+
checksums.each do |manifest_checksum, fixity_checksum|
|
546
|
+
update_fixity(manifest_checksum, algorithm, fixity_checksum)
|
547
|
+
end
|
548
|
+
end
|
549
|
+
@my_results.info('I111', 'process_action_files', "#{@deposit_dir}/head/fixity_files.json successfully processed.")
|
550
|
+
end
|
551
|
+
|
552
|
+
def process_action_files
|
553
|
+
# Moving towards just processing 1 big head.json file.
|
554
|
+
|
555
|
+
if File.exist? "#{@deposit_dir}/head/head.json"
|
556
|
+
head = read_json("#{@deposit_dir}/head/head.json")
|
557
|
+
# Process keys here.
|
558
|
+
process_update_manifest(head['update_manifest']) if head.key?('update_manifest')
|
559
|
+
process_add_files(head['add']) if head.key?('add')
|
560
|
+
process_update_files(head['update']) if head.key?('update')
|
561
|
+
process_copy_files(head['copy']) if head.key?('copy')
|
562
|
+
process_move_files(head['move']) if head.key?('move')
|
563
|
+
process_move_files(head['delete']) if head.key?('delete')
|
564
|
+
process_fixity(head['fixity']) if head.key?('fixity')
|
565
|
+
process_version(head['version']) if head.key?('version')
|
566
|
+
return # don't process any more.
|
567
|
+
end
|
568
|
+
|
569
|
+
# Process update_manifest, if present.
|
570
|
+
if File.exist? "#{@deposit_dir}/head/update_manifest.json"
|
571
|
+
updates = read_json("#{@deposit_dir}/head/update_manifest.json")
|
572
|
+
process_update_manifest(updates)
|
573
|
+
end
|
574
|
+
|
575
|
+
# Process add_files, if present.
|
576
|
+
# add_files requires { "digest_value": [ "filepaths" ]}
|
577
|
+
if File.exist? "#{@deposit_dir}/head/add_files.json"
|
578
|
+
add_files = read_json("#{@deposit_dir}/head/add_files.json")
|
579
|
+
process_add_files(add_files)
|
580
|
+
end
|
581
|
+
|
582
|
+
# process update_files, if present.
|
583
|
+
# update_files requires { "digest_value": [ "filepaths" ]}
|
584
|
+
if File.exist? "#{@deposit_dir}/head/update_files.json"
|
585
|
+
update_files = read_json("#{@deposit_dir}/head/update_files.json")
|
586
|
+
process_update_files(update_files)
|
587
|
+
end
|
588
|
+
|
589
|
+
# Process move_files, if present.
|
590
|
+
# move_file requires digest => [ filepaths ]
|
591
|
+
if File.exist? "#{@deposit_dir}/head/move_files.json"
|
592
|
+
move_files = read_json("#{@deposit_dir}/head/move_files.json")
|
593
|
+
process_move_files(move_files)
|
594
|
+
end
|
595
|
+
|
596
|
+
# Process copy_files, if present.
|
597
|
+
# copy_files requires digest => [ filepaths_of_copy_destinations ]
|
598
|
+
if File.exist? "#{@deposit_dir}/head/copy_files.json"
|
599
|
+
copy_files = read_json("#{@deposit_dir}/head/copy_files.json")
|
600
|
+
process_copy_files(copy_files)
|
601
|
+
end
|
602
|
+
|
603
|
+
# Process delete_files, if present.
|
604
|
+
# Do this last in case the same file is moved > 1.
|
605
|
+
# { digest => [ filepaths_to_delete ] }
|
606
|
+
if File.exist? "#{@deposit_dir}/head/delete_files.json"
|
607
|
+
delete_files = read_json("#{@deposit_dir}/head/delete_files.json")
|
608
|
+
process_delete_files(delete_files)
|
609
|
+
end
|
610
|
+
|
611
|
+
# If there's a fixity block, add it too.
|
612
|
+
if File.file? "#{@deposit_dir}/head/fixity_files.json"
|
613
|
+
fixity_files = read_json("#{@deposit_dir}/head/fixity_files.json")
|
614
|
+
process_fixity(fixity_files)
|
615
|
+
end
|
616
|
+
|
617
|
+
# Process version.json, if present.
|
618
|
+
if File.file? "#{@deposit_dir}/head/version.json"
|
619
|
+
version_file = read_json("#{@deposit_dir}/head/version.json")
|
620
|
+
process_version(version_file)
|
621
|
+
end
|
622
|
+
end
|
623
|
+
|
624
|
+
def stage_existing_object
|
625
|
+
# If we get here, we know that the local inventory.json is the same as the dest. inventory.json.
|
626
|
+
from_file("#{@deposit_dir}/inventory.json")
|
627
|
+
|
628
|
+
# Increment the version from the inventory.json by 1.
|
629
|
+
@new_version = OcflTools::Utils.version_string_to_int(head) + 1
|
630
|
+
|
631
|
+
get_version(@new_version) # Add this new version to our representation of this inventory in self.
|
632
|
+
|
633
|
+
process_action_files # now process all our action files for this new version.
|
634
|
+
end
|
635
|
+
|
636
|
+
def process_new_version
|
637
|
+
# We just passed OCflVerify to get here, so we're good to go.
|
638
|
+
|
639
|
+
# Create version & content directory.
|
640
|
+
target_content = "#{@object_dir}/#{@head}/#{@contentDirectory}"
|
641
|
+
|
642
|
+
# Abort if target_content already exists!
|
643
|
+
if Dir.exist? target_content
|
644
|
+
@my_results.error('E111', 'process_new_version', "#{target_content} already exists! Unable to process new version.")
|
645
|
+
raise "#{target_content} already exists! Unable to process new version."
|
646
|
+
end
|
647
|
+
|
648
|
+
unless FileUtils.mkdir_p target_content
|
649
|
+
raise "Errror creating #{target_content}!"
|
650
|
+
end
|
651
|
+
|
652
|
+
source_content = "#{@deposit_dir}/head/#{@contentDirectory}"
|
653
|
+
|
654
|
+
# Copy [or move? make this behavior configurable] content across.
|
655
|
+
# Why move? Well, if you're on the same filesystem root, and you're moving large files,
|
656
|
+
# move is *much, much faster* and doesn't run the risk of bitstream corruption as it's
|
657
|
+
# just a filesystem metadata operation.
|
658
|
+
FileUtils.cp_r "#{source_content}/.", target_content
|
659
|
+
|
660
|
+
# Add inventory.json to version directory.
|
661
|
+
to_file("#{@object_dir}/#{@head}")
|
662
|
+
# Verify version directory.
|
663
|
+
validation = OcflTools::OcflValidator.new(@object_dir)
|
664
|
+
validation.verify_directory(@new_version)
|
665
|
+
|
666
|
+
@my_results.add_results(validation.results)
|
667
|
+
raise 'Errors detected in validation!' unless @my_results.error_count == 0
|
668
|
+
|
669
|
+
# What OCFL version are we targeting? Pull the default value if not otherwise set.
|
670
|
+
@ocfl_version ||= OcflTools.config.ocfl_version
|
671
|
+
|
672
|
+
# If this is version 1, there will not be a Namaste file in object root - add it.
|
673
|
+
unless File.exist?("#{@object_dir}/0=ocfl_object_#{@ocfl_version}")
|
674
|
+
namaste = File.open("#{@object_dir}/0=ocfl_object_#{@ocfl_version}", 'w')
|
675
|
+
namaste.puts "ocfl_object_#{@ocfl_version}"
|
676
|
+
namaste.close
|
677
|
+
end
|
678
|
+
|
679
|
+
# Add new inventory.json to object root directory. This should always be the final step.
|
680
|
+
to_file(@object_dir)
|
681
|
+
|
682
|
+
@my_results.ok('0111', 'process_new_version', "object #{id} version #{@new_version} successfully processed.")
|
683
|
+
end
|
684
|
+
end
|
685
|
+
end
|