ocfl-tools 0.9.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +53 -0
- data/.rubocop.yml +35 -0
- data/.rubocop_todo.yml +189 -0
- data/.travis.yml +12 -0
- data/Gemfile +5 -0
- data/README.md +647 -0
- data/Rakefile +13 -0
- data/VERSION +1 -0
- data/examples/list_files.rb +56 -0
- data/examples/validate_object.rb +23 -0
- data/lib/ocfl-tools.rb +19 -0
- data/lib/ocfl_tools.rb +17 -0
- data/lib/ocfl_tools/config.rb +27 -0
- data/lib/ocfl_tools/ocfl_actions.rb +146 -0
- data/lib/ocfl_tools/ocfl_delta.rb +250 -0
- data/lib/ocfl_tools/ocfl_deposit.rb +685 -0
- data/lib/ocfl_tools/ocfl_errors.rb +23 -0
- data/lib/ocfl_tools/ocfl_inventory.rb +95 -0
- data/lib/ocfl_tools/ocfl_object.rb +425 -0
- data/lib/ocfl_tools/ocfl_results.rb +272 -0
- data/lib/ocfl_tools/ocfl_validator.rb +799 -0
- data/lib/ocfl_tools/ocfl_verify.rb +493 -0
- data/lib/ocfl_tools/utils.rb +127 -0
- data/lib/ocfl_tools/utils_file.rb +195 -0
- data/lib/ocfl_tools/utils_inventory.rb +96 -0
- data/ocfl-tools.gemspec +31 -0
- data/results_codes.md +106 -0
- data/test-it.sh +11 -0
- metadata +191 -0
|
@@ -0,0 +1,685 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module OcflTools
|
|
4
|
+
# Class to take new content from a deposit directory and marshal it
|
|
5
|
+
# into a new version directory of a new or existing OCFL object dir.
|
|
6
|
+
# Expects deposit_dir to be:
|
|
7
|
+
#
|
|
8
|
+
# <ocfl deposit directoy>/
|
|
9
|
+
# |-- inventory.json (from object_directory root, if adding to existing version)
|
|
10
|
+
# |-- inventory.json.sha512 (matching sidecar from object_directory root)
|
|
11
|
+
# |-- head/
|
|
12
|
+
# |-- head.json
|
|
13
|
+
# | OR a combination of the following files:
|
|
14
|
+
# |-- add_files.json (all proposed file add actions)
|
|
15
|
+
# |-- update_files.json (all proposed file update actions)
|
|
16
|
+
# |-- copy_files.json (all proposed file copy actions)
|
|
17
|
+
# |-- delete_files.json (all proposed file delete actions)
|
|
18
|
+
# |-- move_files.json (all proposed file move actions)
|
|
19
|
+
# |-- version.json (optional version metadata)
|
|
20
|
+
# |-- fixity_files.json (optional fixity information)
|
|
21
|
+
# |-- <content_dir>/
|
|
22
|
+
# |-- <files to add or update>
|
|
23
|
+
#
|
|
24
|
+
class OcflDeposit < OcflTools::OcflInventory
|
|
25
|
+
|
|
26
|
+
# @return [String] the version of OCFL that this deposit object is targeting.
|
|
27
|
+
attr_accessor :ocfl_version
|
|
28
|
+
|
|
29
|
+
# @param [Pathname] deposit_directory fully-qualified path to a well-formed deposit directory.
|
|
30
|
+
# @param [Pathname] object_directory fully-qualified path to either an empty directory to create new OCFL object in, or the existing OCFL object to which the new version directory should be added.
|
|
31
|
+
# @return {OcflTools::OcflDeposit}
|
|
32
|
+
def initialize(deposit_directory:, object_directory:)
|
|
33
|
+
@deposit_dir = deposit_directory
|
|
34
|
+
@object_dir = object_directory
|
|
35
|
+
unless File.directory? deposit_directory
|
|
36
|
+
raise "#{@deposit_dir} is not a valid directory!"
|
|
37
|
+
end
|
|
38
|
+
unless File.directory? object_directory
|
|
39
|
+
raise "#{@object_dir} is not a valid directory!"
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Since we are overriding OcflObject's initialize block, we need to define these variables again.
|
|
43
|
+
@id = nil
|
|
44
|
+
@head = nil
|
|
45
|
+
@type = OcflTools.config.content_type
|
|
46
|
+
@digestAlgorithm = OcflTools.config.digest_algorithm # sha512 is recommended, Stanford uses sha256.
|
|
47
|
+
@contentDirectory = OcflTools.config.content_directory # default is 'content', Stanford uses 'data'
|
|
48
|
+
@manifest = {}
|
|
49
|
+
@versions = {} # A hash of Version hashes.
|
|
50
|
+
@fixity = {} # Optional. Same format as Manifest.
|
|
51
|
+
|
|
52
|
+
@ocfl_version = nil
|
|
53
|
+
|
|
54
|
+
@my_results = OcflTools::OcflResults.new
|
|
55
|
+
|
|
56
|
+
# san_check works out if the deposit_dir and object_dir represents a
|
|
57
|
+
# new object with a first version, or an update to an existing object.
|
|
58
|
+
# It then verifies and stages all files so that, if it doesn't raise an
|
|
59
|
+
# exception, the calling app can simply invoke #deposit_new_version to proceed.
|
|
60
|
+
san_check
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Returns a {OcflTools::OcflResults} object containing information about actions taken during the staging and creation of this new version.
|
|
64
|
+
# @return {OcflTools::OcflResults}
|
|
65
|
+
def results
|
|
66
|
+
@my_results
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Creates a new version of an OCFL object in the destination object directory.
|
|
70
|
+
# This method can only be called if the {OcflTools::OcflDeposit} object passed all
|
|
71
|
+
# necessary sanity checks, which occur when the object is initialized.
|
|
72
|
+
# @return {OcflTools::OcflDeposit} self
|
|
73
|
+
def deposit_new_version
|
|
74
|
+
# verify that our object_directory head is still what we expect.
|
|
75
|
+
# create the version and contentDirectory directories.
|
|
76
|
+
# move or copy content over from deposit_directory
|
|
77
|
+
# write the inventory.json & sidecar into version directory.
|
|
78
|
+
# do a directory verify on the new directory.
|
|
79
|
+
# write the new inventory.json to object root.
|
|
80
|
+
# Can only be called if there are no errors in @my_results; raise exception if otherwise?
|
|
81
|
+
set_head_version
|
|
82
|
+
|
|
83
|
+
# Am I put together correctly?
|
|
84
|
+
@my_results.add_results(OcflTools::OcflVerify.new(self).check_all)
|
|
85
|
+
# If @my_results.error_count > 0, abort!
|
|
86
|
+
if @my_results.error_count > 0
|
|
87
|
+
raise "Errors detected in OCFL object verification. Cannot process deposit: #{@my_results.get_errors}"
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
if OcflTools::Utils.version_string_to_int(@head) == 1 && !Dir.empty?(@object_dir)
|
|
91
|
+
raise "#{@object_dir} is not empty! Unable to create new object."
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
process_new_version
|
|
95
|
+
self
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
private
|
|
99
|
+
|
|
100
|
+
def san_check
|
|
101
|
+
# If deposit directory contains inventory.json:
|
|
102
|
+
# - it's an update to an existing object. Do existing_object_san_check.
|
|
103
|
+
# If deposit directory !contain inventory.json:
|
|
104
|
+
# - it's a new object. Do a new_object_san_check.
|
|
105
|
+
|
|
106
|
+
if File.file? "#{@deposit_dir}/inventory.json"
|
|
107
|
+
@my_results.info('I111', 'san_check', "Existing inventory found at #{@deposit_dir}/inventory.json")
|
|
108
|
+
existing_object_san_check
|
|
109
|
+
else
|
|
110
|
+
@my_results.info('I111', 'san_check', "No inventory.json found in #{@deposit_dir}; assuming new object workflow.")
|
|
111
|
+
new_object_san_check
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def new_object_san_check
|
|
116
|
+
# 1. Object directory must be empty.
|
|
117
|
+
if Dir.empty?(@object_dir)
|
|
118
|
+
@my_results.info('I111', 'new_object_san_check', "target dir #{@object_dir} is empty.")
|
|
119
|
+
else
|
|
120
|
+
@my_results.error('E111', 'new_object_san_check', "target dir #{@object_dir} is NOT empty!")
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# 2. Deposit directory must contain 'head' directory.
|
|
124
|
+
if File.directory?("#{@deposit_dir}/head")
|
|
125
|
+
@my_results.info('I111', 'new_object_san_check', "Deposit dir #{@deposit_dir} contains a 'head' directory.")
|
|
126
|
+
else
|
|
127
|
+
@my_results.error('E111', 'new_object_san_check', "Deposit dir #{@deposit_dir} does NOT contain required 'head' directory.")
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
# 3. Deposit directory must contain ONE id namaste file. (4='id')
|
|
131
|
+
deposit_root_files = []
|
|
132
|
+
deposit_root_directories = []
|
|
133
|
+
Dir.chdir(@deposit_dir)
|
|
134
|
+
Dir.glob('*').select do |file|
|
|
135
|
+
deposit_root_files << file if File.file? file
|
|
136
|
+
deposit_root_directories << file if File.directory? file
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
namaste_file = nil
|
|
140
|
+
deposit_root_files.each do |file|
|
|
141
|
+
next unless file =~ /^4=/ # Looks like the start of a Namaste file.
|
|
142
|
+
|
|
143
|
+
deposit_root_files.delete(file)
|
|
144
|
+
if namaste_file.nil?
|
|
145
|
+
namaste_file = file
|
|
146
|
+
@my_results.info('I111', 'new_object_san_check', "Matching Namaste file #{file} found in #{@deposit_dir}.")
|
|
147
|
+
else
|
|
148
|
+
@my_results.error('E111', 'new_object_san_check', "More than one matching Namaste file found in #{@deposit_dir}!")
|
|
149
|
+
raise "More than one matching Namaste file found in #{@deposit_dir}! #{namaste_file} & #{file}"
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
# 3b. Verify namaste file is valid.
|
|
154
|
+
object_id = namaste_file.split('=')[1]
|
|
155
|
+
raise 'Object ID cannot be zero length!' if object_id.empty?
|
|
156
|
+
|
|
157
|
+
File.readlines("#{@deposit_dir}/#{namaste_file}").each do |line|
|
|
158
|
+
line.chomp!
|
|
159
|
+
if object_id != line
|
|
160
|
+
@my_results.error('E111', 'new_object_san_check', "Contents of Namaste ID file do not match filename! #{object_id} vs #{line}.")
|
|
161
|
+
raise "Contents of Namaste ID file do not match filename! #{object_id} vs #{line}."
|
|
162
|
+
end
|
|
163
|
+
end
|
|
164
|
+
# Really there should only be 1 line in namaste_file but so long as they all match, we're good.
|
|
165
|
+
@namaste = object_id
|
|
166
|
+
|
|
167
|
+
# 4. Deposit directory must NOT contain any other files.
|
|
168
|
+
unless deposit_root_files.empty?
|
|
169
|
+
@my_results.error('E111', 'new_object_san_check', "Deposit directory contains extraneous files: #{deposit_root_files}")
|
|
170
|
+
raise "Deposit directory contains extraneous files: #{deposit_root_files}."
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
# 4b. Deposit directory MUST contain a 'head' directory.
|
|
174
|
+
if deposit_root_directories.include? 'head'
|
|
175
|
+
@my_results.info('I111', 'new_object_san_check', "#{@deposit_dir} contains expected 'head' directory.")
|
|
176
|
+
deposit_root_directories.delete('head')
|
|
177
|
+
else
|
|
178
|
+
@my_results.error('E111', 'new_object_san_check', "#{@deposit_dir} must contain 'head' directory!")
|
|
179
|
+
raise "Deposit directory must contain a 'head' directory."
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
# 4c. Deposit directory MUST NOT contain any other directories.
|
|
183
|
+
unless deposit_root_directories.empty?
|
|
184
|
+
@my_results.error('E111', 'new_object_san_check', "#{@deposit_dir} contains extraneous directories: #{deposit_root_directories}")
|
|
185
|
+
raise "#{deposit_dir} contains extraneous directories: #{deposit_root_directories}"
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
# Intermission: prepare deposit/head for inspection
|
|
189
|
+
deposit_head_files = []
|
|
190
|
+
deposit_head_directories = []
|
|
191
|
+
Dir.chdir("#{@deposit_dir}/head")
|
|
192
|
+
Dir.glob('*').select do |file|
|
|
193
|
+
deposit_head_files << file if File.file? file
|
|
194
|
+
deposit_head_directories << file if File.directory? file
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
# 5. 'head' directory must contain a 'content' directory that matches sitewide setting.
|
|
198
|
+
if deposit_head_directories.include? OcflTools.config.content_directory
|
|
199
|
+
@my_results.info('I111', 'new_object_san_check', "#{@deposit_dir}/head contains expected #{OcflTools.config.content_directory} directory.")
|
|
200
|
+
deposit_head_directories.delete(OcflTools.config.content_directory)
|
|
201
|
+
else
|
|
202
|
+
@my_results.error('E111', 'new_object_san_check', "#{@deposit_dir}/head does NOT contain expected #{OcflTools.config.content_directory} directory.")
|
|
203
|
+
raise "#{@deposit_dir}/head does NOT contain expected #{OcflTools.config.content_directory} directory."
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
# 5b. 'head' directory MUST NOT contain any other directories.
|
|
207
|
+
unless deposit_head_directories.empty?
|
|
208
|
+
@my_results.error('E111', 'new_object_san_check', "#{@deposit_dir}/head contains extraneous directories: #{deposit_head_directories}")
|
|
209
|
+
raise "#{deposit_dir}/head contains extraneous directories: #{deposit_head_directories}"
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
# 6. 'head' directory MUST contain either 'head.json' or 'add_files.json'
|
|
213
|
+
found_me = nil
|
|
214
|
+
require_one = ['head.json', 'add_files.json']
|
|
215
|
+
require_one.each do |file|
|
|
216
|
+
if deposit_head_files.include? file
|
|
217
|
+
@my_results.info('I111', 'new_object_san_check', "#{@deposit_dir}/head contains required file #{file}")
|
|
218
|
+
deposit_head_files.delete(file)
|
|
219
|
+
found_me = true
|
|
220
|
+
end
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
unless found_me
|
|
224
|
+
@my_results.error('E111', 'new_object_san_check', "#{@deposit_dir}/head requires either head.json or add_files.json, but not found.")
|
|
225
|
+
raise "#{@deposit_dir}/head requires either head.json or add_files.json, but not found."
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
# 7. 'head' directory MAY contain one or more of these action files.
|
|
229
|
+
action_files = ['head.json', 'add_files.json', 'update_files.json', 'version.json', 'update_manifest.json', 'delete_files.json', 'move_files.json', 'fixity_files.json']
|
|
230
|
+
action_files.each do |file|
|
|
231
|
+
if deposit_head_files.include? file
|
|
232
|
+
@my_results.info('I111', 'new_object_san_check', "#{@deposit_dir}/head contains optional #{file}")
|
|
233
|
+
deposit_head_files.delete(file)
|
|
234
|
+
end
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
# 8. 'head' directory MUST NOT contain any other files.
|
|
238
|
+
unless deposit_head_files.empty?
|
|
239
|
+
@my_results.error('E111', 'new_object_san_check', "#{@deposit_dir}/head contains extraneous files: #{deposit_head_files}")
|
|
240
|
+
raise "#{@deposit_dir}/head contains extraneous files: #{deposit_head_files}"
|
|
241
|
+
end
|
|
242
|
+
# Only call this if we got here without errors.
|
|
243
|
+
stage_new_object
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
def existing_object_san_check
|
|
247
|
+
deposit_root_files = []
|
|
248
|
+
deposit_root_directories = []
|
|
249
|
+
Dir.chdir(@deposit_dir)
|
|
250
|
+
Dir.glob('*').select do |file|
|
|
251
|
+
deposit_root_files << file if File.file? file
|
|
252
|
+
deposit_root_directories << file if File.directory? file
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
# 1. Deposit directory MUST contain an inventory.json
|
|
256
|
+
if deposit_root_files.include? 'inventory.json'
|
|
257
|
+
@my_results.info('I111', 'existing_object_san_check', "#{@deposit_dir}/inventory.json found")
|
|
258
|
+
deposit_root_files.delete('inventory.json')
|
|
259
|
+
else
|
|
260
|
+
@my_results.error('E111', 'existing_object_san_check', "#{@deposit_dir}/inventory.json required, but not found.")
|
|
261
|
+
raise "#{@deposit_dir}/inventory.json required, but not found."
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
# 2. Deposit directory MUST contain a matching inventory.json sidecar file.
|
|
265
|
+
inventory_digest = OcflTools::Utils::Inventory.get_digestAlgorithm("#{@deposit_dir}/inventory.json")
|
|
266
|
+
|
|
267
|
+
if deposit_root_files.include? "inventory.json.#{inventory_digest}"
|
|
268
|
+
@my_results.info('I111', 'existing_object_san_check', "#{@deposit_dir}/inventory.json.#{inventory_digest} found")
|
|
269
|
+
deposit_root_files.delete("inventory.json.#{inventory_digest}")
|
|
270
|
+
else
|
|
271
|
+
@my_results.error('E111', 'existing_object_san_check', "#{@deposit_dir}/inventory.json.#{inventory_digest} required, but not found")
|
|
272
|
+
raise "#{@deposit_dir}/inventory.json.#{inventory_digest} required, but not found."
|
|
273
|
+
end
|
|
274
|
+
|
|
275
|
+
# 3. inventory.json MUST validate against sidecar digest value.
|
|
276
|
+
generated_digest = OcflTools::Utils.generate_file_digest("#{@deposit_dir}/inventory.json", inventory_digest)
|
|
277
|
+
sidecar_digest = File.open("#{@deposit_dir}/inventory.json.#{inventory_digest}", &:readline).split(' ')[0]
|
|
278
|
+
|
|
279
|
+
if generated_digest == sidecar_digest
|
|
280
|
+
@my_results.info('I111', 'existing_object_san_check', "#{@deposit_dir}/inventory.json checksum matches generated value.")
|
|
281
|
+
else
|
|
282
|
+
@my_results.error('E111', 'existing_object_san_check', "#{@deposit_dir}/inventory.json checksum does not match generated value.")
|
|
283
|
+
raise "#{@deposit_dir}/inventory.json checksum does not match generated value."
|
|
284
|
+
end
|
|
285
|
+
|
|
286
|
+
# 4. inventory.json MUST be a valid OCFL inventory (passes OcflVerify; copy results into our results instance).
|
|
287
|
+
deposit_inventory = OcflTools::OcflInventory.new.from_file("#{@deposit_dir}/inventory.json")
|
|
288
|
+
|
|
289
|
+
@my_results.add_results(OcflTools::OcflVerify.new(deposit_inventory).check_all)
|
|
290
|
+
|
|
291
|
+
unless @my_results.error_count == 0
|
|
292
|
+
raise 'Errors detected in deposit inventory verification!'
|
|
293
|
+
end
|
|
294
|
+
|
|
295
|
+
# 5. Deposit directory MUST NOT contain any other files.
|
|
296
|
+
unless deposit_root_files.empty?
|
|
297
|
+
@my_results.error('E111', 'existing_object_san_check', "Deposit directory contains extraneous files: #{deposit_root_files}")
|
|
298
|
+
raise "Deposit directory contains extraneous files: #{deposit_root_files}."
|
|
299
|
+
end
|
|
300
|
+
|
|
301
|
+
# 6. Deposit directory MUST contain a 'head' directory.
|
|
302
|
+
if deposit_root_directories.include? 'head'
|
|
303
|
+
@my_results.info('I111', 'existing_object_san_check', "#{@deposit_dir} contains expected 'head' directory.")
|
|
304
|
+
deposit_root_directories.delete('head')
|
|
305
|
+
else
|
|
306
|
+
@my_results.error('E111', 'existing_object_san_check', "#{@deposit_dir} must contain 'head' directory!")
|
|
307
|
+
raise "Deposit directory must contain a 'head' directory."
|
|
308
|
+
end
|
|
309
|
+
|
|
310
|
+
# 7. Deposit directory MUST NOT contain any other directories.
|
|
311
|
+
unless deposit_root_directories.empty?
|
|
312
|
+
@my_results.error('E111', 'existing_object_san_check', "#{@deposit_dir} contains extraneous directories: #{deposit_root_directories}")
|
|
313
|
+
raise "#{deposit_dir} contains extraneous directories: #{deposit_root_directories}"
|
|
314
|
+
end
|
|
315
|
+
|
|
316
|
+
# Intermission: into the head directory!
|
|
317
|
+
|
|
318
|
+
deposit_head_files = []
|
|
319
|
+
deposit_head_directories = []
|
|
320
|
+
Dir.chdir("#{@deposit_dir}/head")
|
|
321
|
+
Dir.glob('*').select do |file|
|
|
322
|
+
deposit_head_files << file if File.file? file
|
|
323
|
+
deposit_head_directories << file if File.directory? file
|
|
324
|
+
end
|
|
325
|
+
|
|
326
|
+
# 8. 'head' directory must contain a 'content' directory that
|
|
327
|
+
# matches value in inventory.json or OCFL default if not otherwise set.
|
|
328
|
+
content_directory = OcflTools::Utils::Inventory.get_contentDirectory("#{@deposit_dir}/inventory.json")
|
|
329
|
+
|
|
330
|
+
if deposit_head_directories.include? content_directory
|
|
331
|
+
@my_results.info('I111', 'existing_object_san_check', "#{@deposit_dir}/head contains expected #{content_directory} directory.")
|
|
332
|
+
deposit_head_directories.delete(content_directory)
|
|
333
|
+
else
|
|
334
|
+
@my_results.error('E111', 'existing_object_san_check', "#{@deposit_dir}/head does NOT contain expected #{content_directory} directory.")
|
|
335
|
+
raise "#{@deposit_dir}/head does NOT contain expected #{content_directory} directory."
|
|
336
|
+
end
|
|
337
|
+
|
|
338
|
+
# 9. 'head' MUST contain at least one of the 'actions' json files (inc. fixity).
|
|
339
|
+
# Any one of these is needed.
|
|
340
|
+
action_files = ['add_files.json', 'head.json', 'update_manifest.json', 'update_files.json', 'delete_files.json', 'move_files.json', 'fixity_files.json']
|
|
341
|
+
action_found = nil
|
|
342
|
+
|
|
343
|
+
deposit_head_files.each do |file|
|
|
344
|
+
if action_files.include? file # We found an action file!
|
|
345
|
+
deposit_head_files.delete(file)
|
|
346
|
+
action_found = true
|
|
347
|
+
end
|
|
348
|
+
end
|
|
349
|
+
|
|
350
|
+
if action_found == true
|
|
351
|
+
@my_results.info('I111', 'existing_object_san_check', "#{@deposit_dir}/head contains at least 1 action file.")
|
|
352
|
+
else
|
|
353
|
+
@my_results.error('E111', 'existing_object_san_check', "Unable to find any action files in #{@deposit_dir}/head")
|
|
354
|
+
raise "Unable to find any action files in #{@deposit_dir}/head"
|
|
355
|
+
end
|
|
356
|
+
|
|
357
|
+
# 9b. 'head' directory MAY contain a 'version.json' file.
|
|
358
|
+
if deposit_head_files.include? 'version.json'
|
|
359
|
+
@my_results.info('I111', 'new_object_san_check', "#{@deposit_dir}/head contains optional version.json")
|
|
360
|
+
deposit_head_files.delete('version.json')
|
|
361
|
+
else
|
|
362
|
+
@my_results.info('I111', 'new_object_san_check', "#{@deposit_dir}/head does not contain optional version.json")
|
|
363
|
+
end
|
|
364
|
+
|
|
365
|
+
# 10. Object root MUST contain an inventory.json
|
|
366
|
+
if File.exist? "#{@object_dir}/inventory.json"
|
|
367
|
+
@my_results.info('I111', 'existing_object_san_check', "#{@object_dir}/inventory.json exists.")
|
|
368
|
+
else
|
|
369
|
+
@my_results.error('E111', 'existing_object_san_check', "#{@object_dir}/inventory.json does not exist.")
|
|
370
|
+
raise "#{@object_dir}/inventory.json does not exist."
|
|
371
|
+
end
|
|
372
|
+
|
|
373
|
+
# 11. Object directory OCFL MUST match Deposit directory OCFL object (sidecar check)
|
|
374
|
+
# - doing a digest check is the fastest way to ensure it's the same inventory file & contents.
|
|
375
|
+
object_root_digest = OcflTools::Utils.generate_file_digest("#{@object_dir}/inventory.json", inventory_digest)
|
|
376
|
+
|
|
377
|
+
if object_root_digest == generated_digest
|
|
378
|
+
@my_results.info('I111', 'existing_object_san_check', "#{@object_dir}/inventory.json matches #{@deposit_dir}/inventory.json")
|
|
379
|
+
else
|
|
380
|
+
@my_results.error('E111', 'existing_object_san_check', "#{@object_dir}/inventory.json does not match #{@deposit_dir}/inventory.json")
|
|
381
|
+
raise "#{@object_dir}/inventory.json does not match #{@deposit_dir}/inventory.json"
|
|
382
|
+
end
|
|
383
|
+
|
|
384
|
+
# 12. Object directory OCFL must pass a structure test (don't do checksum verification)
|
|
385
|
+
destination_ocfl = OcflTools::OcflValidator.new(@object_dir)
|
|
386
|
+
@my_results.add_results(destination_ocfl.verify_structure)
|
|
387
|
+
unless @my_results.error_count == 0
|
|
388
|
+
raise 'Errors detected in destination object structure!'
|
|
389
|
+
end
|
|
390
|
+
|
|
391
|
+
# Only call this if we got here without errors.
|
|
392
|
+
stage_existing_object
|
|
393
|
+
end
|
|
394
|
+
|
|
395
|
+
def stage_new_object
|
|
396
|
+
# read id.namaste file, set @id.
|
|
397
|
+
# set new version
|
|
398
|
+
# process action files
|
|
399
|
+
self.id = @namaste
|
|
400
|
+
@new_version = 1
|
|
401
|
+
get_version(@new_version) # It's a new OCFL object; we start at version 1.
|
|
402
|
+
process_action_files
|
|
403
|
+
end
|
|
404
|
+
|
|
405
|
+
def process_update_manifest(update_manifest_block)
|
|
406
|
+
# Process update_manifest, if present.
|
|
407
|
+
update_manifest_block.each do |digest, filepaths|
|
|
408
|
+
filepaths.each do |file|
|
|
409
|
+
# Make sure it actually exists!
|
|
410
|
+
unless File.exist? "#{@deposit_dir}/head/#{@contentDirectory}/#{file}"
|
|
411
|
+
@my_results.error('E111', 'process_action_files', "File #{file} referenced in update_manifest.json not found in #{@deposit_dir}/head/#{@contentDirectory}")
|
|
412
|
+
raise "File #{file} referenced in update_manifest.json not found in #{@deposit_dir}/head/#{@contentDirectory}"
|
|
413
|
+
end
|
|
414
|
+
# Here's where we'd compute checksum.
|
|
415
|
+
if OcflTools::Utils.generate_file_digest("#{@deposit_dir}/head/#{@contentDirectory}/#{file}", @digestAlgorithm) == digest
|
|
416
|
+
update_manifest(file, digest, @new_version)
|
|
417
|
+
@my_results.info('I111', 'process_action_files', "#{@deposit_dir}/head/#{@contentDirectory}/#{file} added to manifest inventory.")
|
|
418
|
+
else
|
|
419
|
+
@my_results.error('E111', 'process_action_files', "#{@deposit_dir}/head/#{@contentDirectory}/#{file} computed checksum does not match provided digest.")
|
|
420
|
+
raise "#{@deposit_dir}/head/#{@contentDirectory}/#{file} computed checksum does not match provided digest."
|
|
421
|
+
end
|
|
422
|
+
end
|
|
423
|
+
end
|
|
424
|
+
end
|
|
425
|
+
|
|
426
|
+
def process_add_files(add_files_block)
|
|
427
|
+
add_files_block.each do |digest, filepaths|
|
|
428
|
+
filepaths.each do |file|
|
|
429
|
+
unless manifest.key?(digest)
|
|
430
|
+
# This digest does NOT exist in the manifest; check disk for ingest (because add_file's going to add it to manifest later).
|
|
431
|
+
# It better be on disk, buck-o.
|
|
432
|
+
unless File.exist? "#{@deposit_dir}/head/#{@contentDirectory}/#{file}"
|
|
433
|
+
@my_results.error('E111', 'process_action_files', "File #{file} referenced in add_files block not found in #{@deposit_dir}/head/#{@contentDirectory}")
|
|
434
|
+
raise "File #{file} referenced in add_files block not found in #{@deposit_dir}/head/#{@contentDirectory}"
|
|
435
|
+
end
|
|
436
|
+
|
|
437
|
+
if !OcflTools::Utils.generate_file_digest("#{@deposit_dir}/head/#{@contentDirectory}/#{file}", @digestAlgorithm) == digest
|
|
438
|
+
# checksum failed, raise error.
|
|
439
|
+
raise "#{@deposit_dir}/head/#{@contentDirectory}/#{file} computed checksum does not match provided digest in add_files block."
|
|
440
|
+
end
|
|
441
|
+
end
|
|
442
|
+
# If we get to here, we're OK to add_file.
|
|
443
|
+
add_file(file, digest, @new_version)
|
|
444
|
+
@my_results.info('I111', 'process_action_files', "#{@deposit_dir}/head/#{@contentDirectory}/#{file} added to inventory.")
|
|
445
|
+
end
|
|
446
|
+
end
|
|
447
|
+
end
|
|
448
|
+
|
|
449
|
+
def process_update_files(update_files_block)
|
|
450
|
+
update_files_block.each do |digest, filepaths|
|
|
451
|
+
filepaths.each do |file|
|
|
452
|
+
# Make sure it actually exists!
|
|
453
|
+
unless File.exist? "#{@deposit_dir}/head/#{@contentDirectory}/#{file}"
|
|
454
|
+
@my_results.error('E111', 'process_action_files', "File #{file} referenced in update_files.json not found in #{@deposit_dir}/head/#{@contentDirectory}")
|
|
455
|
+
raise "File #{file} referenced in update_files.json not found in #{@deposit_dir}/head/#{@contentDirectory}"
|
|
456
|
+
end
|
|
457
|
+
# Here's where we'd compute checksum.
|
|
458
|
+
if OcflTools::Utils.generate_file_digest("#{@deposit_dir}/head/#{@contentDirectory}/#{file}", @digestAlgorithm) == digest
|
|
459
|
+
update_file(file, digest, @new_version)
|
|
460
|
+
@my_results.info('I111', 'process_action_files', "#{@deposit_dir}/head/#{@contentDirectory}/#{file} added to inventory.")
|
|
461
|
+
else
|
|
462
|
+
@my_results.error('E111', 'process_action_files', "#{@deposit_dir}/head/#{@contentDirectory}/#{file} computed checksum does not match provided digest.")
|
|
463
|
+
raise "#{@deposit_dir}/head/#{@contentDirectory}/#{file} computed checksum does not match provided digest."
|
|
464
|
+
end
|
|
465
|
+
end
|
|
466
|
+
end
|
|
467
|
+
end
|
|
468
|
+
|
|
469
|
+
def process_move_files(move_files_block)
|
|
470
|
+
move_files_block.each do |digest, filepaths|
|
|
471
|
+
my_state = get_state(@new_version)
|
|
472
|
+
unless my_state.key?(digest)
|
|
473
|
+
@my_results.error('E111', 'process_action_files', "Unable to find digest #{digest} in state whilst processing a move request.")
|
|
474
|
+
raise "Unable to find digest #{digest} in state whilst processing a move request."
|
|
475
|
+
end
|
|
476
|
+
previous_files = my_state[digest]
|
|
477
|
+
# Disambiguation; we can only process a move if there is only 1 file here.
|
|
478
|
+
if previous_files.size != 1
|
|
479
|
+
@my_results.error('E111', 'process_action_files', "Disambiguation protection: unable to process move for digest #{digest}: more than 1 file uses this digest in prior version.")
|
|
480
|
+
raise "Disambiguation protection: unable to process move for digest #{digest}: more than 1 file uses this digest in this version."
|
|
481
|
+
end
|
|
482
|
+
unless filepaths.include?(previous_files[0])
|
|
483
|
+
@my_results.error('E111', 'process_action_files', "Unable to find source file #{previous_files[0]} digest #{digest} in state whilst processing a move request.")
|
|
484
|
+
raise "Unable to find source file #{previous_files[0]} digest #{digest} in state whilst processing a move request."
|
|
485
|
+
end
|
|
486
|
+
source_file = previous_files[0]
|
|
487
|
+
destination_file = filepaths[1]
|
|
488
|
+
move_file(source_file, destination_file, @new_version)
|
|
489
|
+
end
|
|
490
|
+
end
|
|
491
|
+
|
|
492
|
+
def process_copy_files(copy_files_block)
|
|
493
|
+
my_state = get_state(@new_version)
|
|
494
|
+
copy_files_block.each do |digest, filepaths|
|
|
495
|
+
unless my_state.key?(digest)
|
|
496
|
+
@my_results.error('E111', 'process_action_files', "Unable to find digest #{digest} in state whilst processing a copy request.")
|
|
497
|
+
raise "Unable to find digest #{digest} in state whilst processing a copy request."
|
|
498
|
+
end
|
|
499
|
+
|
|
500
|
+
previous_files = my_state[digest]
|
|
501
|
+
|
|
502
|
+
filepaths.each do |destination_file|
|
|
503
|
+
copy_file(previous_files[0], destination_file, @new_version)
|
|
504
|
+
end
|
|
505
|
+
end
|
|
506
|
+
end
|
|
507
|
+
|
|
508
|
+
def process_delete_files(delete_files_block)
|
|
509
|
+
delete_files_block.each do |_digest, filepaths|
|
|
510
|
+
filepaths.each do |filepath|
|
|
511
|
+
delete_file(filepath, @new_version)
|
|
512
|
+
end
|
|
513
|
+
end
|
|
514
|
+
end
|
|
515
|
+
|
|
516
|
+
def process_version(version_block)
|
|
517
|
+
# Version block MUST contain keys 'created', 'message', 'user'
|
|
518
|
+
%w[created message user].each do |req_key|
|
|
519
|
+
unless version_block.key?(req_key)
|
|
520
|
+
@my_results.error('E111', 'process_action_files', "#{@deposit_dir}/head/version.json does not contain expected key #{req_key}")
|
|
521
|
+
raise "#{@deposit_dir}/head/version.json does not contain expected key #{req_key}"
|
|
522
|
+
end
|
|
523
|
+
end
|
|
524
|
+
# user block MUST contain 'name', 'address'
|
|
525
|
+
%w[name address].each do |req_key|
|
|
526
|
+
unless version_block['user'].key?(req_key)
|
|
527
|
+
@my_results.error('E111', 'process_action_files', "#{@deposit_dir}/head/version.json does not contain expected key #{req_key}")
|
|
528
|
+
raise "#{@deposit_dir}/head/version.json does not contain expected key #{req_key}"
|
|
529
|
+
end
|
|
530
|
+
end
|
|
531
|
+
# Now process!
|
|
532
|
+
set_version_user(@new_version, version_block['user'])
|
|
533
|
+
set_version_message(@new_version, version_block['message'])
|
|
534
|
+
set_version_created(@new_version, version_block['created'])
|
|
535
|
+
end
|
|
536
|
+
|
|
537
|
+
def process_fixity(fixity_block)
|
|
538
|
+
fixity_block.each do |algorithm, checksums|
|
|
539
|
+
# check if algorithm is in list of acceptable fixity algos for this site.
|
|
540
|
+
unless OcflTools.config.fixity_algorithms.include? algorithm
|
|
541
|
+
@my_results.error('E111', 'process_action_files', "#{@deposit_dir}/head/fixity_files.json contains unsupported algorithm #{algorithm}")
|
|
542
|
+
raise "#{@deposit_dir}/head/fixity_files.json contains unsupported algorithm #{algorithm}"
|
|
543
|
+
end
|
|
544
|
+
# Algo is permitted in the fixity block; add it.
|
|
545
|
+
checksums.each do |manifest_checksum, fixity_checksum|
|
|
546
|
+
update_fixity(manifest_checksum, algorithm, fixity_checksum)
|
|
547
|
+
end
|
|
548
|
+
end
|
|
549
|
+
@my_results.info('I111', 'process_action_files', "#{@deposit_dir}/head/fixity_files.json successfully processed.")
|
|
550
|
+
end
|
|
551
|
+
|
|
552
|
+
def process_action_files
|
|
553
|
+
# Moving towards just processing 1 big head.json file.
|
|
554
|
+
|
|
555
|
+
if File.exist? "#{@deposit_dir}/head/head.json"
|
|
556
|
+
head = read_json("#{@deposit_dir}/head/head.json")
|
|
557
|
+
# Process keys here.
|
|
558
|
+
process_update_manifest(head['update_manifest']) if head.key?('update_manifest')
|
|
559
|
+
process_add_files(head['add']) if head.key?('add')
|
|
560
|
+
process_update_files(head['update']) if head.key?('update')
|
|
561
|
+
process_copy_files(head['copy']) if head.key?('copy')
|
|
562
|
+
process_move_files(head['move']) if head.key?('move')
|
|
563
|
+
process_move_files(head['delete']) if head.key?('delete')
|
|
564
|
+
process_fixity(head['fixity']) if head.key?('fixity')
|
|
565
|
+
process_version(head['version']) if head.key?('version')
|
|
566
|
+
return # don't process any more.
|
|
567
|
+
end
|
|
568
|
+
|
|
569
|
+
# Process update_manifest, if present.
|
|
570
|
+
if File.exist? "#{@deposit_dir}/head/update_manifest.json"
|
|
571
|
+
updates = read_json("#{@deposit_dir}/head/update_manifest.json")
|
|
572
|
+
process_update_manifest(updates)
|
|
573
|
+
end
|
|
574
|
+
|
|
575
|
+
# Process add_files, if present.
|
|
576
|
+
# add_files requires { "digest_value": [ "filepaths" ]}
|
|
577
|
+
if File.exist? "#{@deposit_dir}/head/add_files.json"
|
|
578
|
+
add_files = read_json("#{@deposit_dir}/head/add_files.json")
|
|
579
|
+
process_add_files(add_files)
|
|
580
|
+
end
|
|
581
|
+
|
|
582
|
+
# process update_files, if present.
|
|
583
|
+
# update_files requires { "digest_value": [ "filepaths" ]}
|
|
584
|
+
if File.exist? "#{@deposit_dir}/head/update_files.json"
|
|
585
|
+
update_files = read_json("#{@deposit_dir}/head/update_files.json")
|
|
586
|
+
process_update_files(update_files)
|
|
587
|
+
end
|
|
588
|
+
|
|
589
|
+
# Process move_files, if present.
|
|
590
|
+
# move_file requires digest => [ filepaths ]
|
|
591
|
+
if File.exist? "#{@deposit_dir}/head/move_files.json"
|
|
592
|
+
move_files = read_json("#{@deposit_dir}/head/move_files.json")
|
|
593
|
+
process_move_files(move_files)
|
|
594
|
+
end
|
|
595
|
+
|
|
596
|
+
# Process copy_files, if present.
|
|
597
|
+
# copy_files requires digest => [ filepaths_of_copy_destinations ]
|
|
598
|
+
if File.exist? "#{@deposit_dir}/head/copy_files.json"
|
|
599
|
+
copy_files = read_json("#{@deposit_dir}/head/copy_files.json")
|
|
600
|
+
process_copy_files(copy_files)
|
|
601
|
+
end
|
|
602
|
+
|
|
603
|
+
# Process delete_files, if present.
|
|
604
|
+
# Do this last in case the same file is moved > 1.
|
|
605
|
+
# { digest => [ filepaths_to_delete ] }
|
|
606
|
+
if File.exist? "#{@deposit_dir}/head/delete_files.json"
|
|
607
|
+
delete_files = read_json("#{@deposit_dir}/head/delete_files.json")
|
|
608
|
+
process_delete_files(delete_files)
|
|
609
|
+
end
|
|
610
|
+
|
|
611
|
+
# If there's a fixity block, add it too.
|
|
612
|
+
if File.file? "#{@deposit_dir}/head/fixity_files.json"
|
|
613
|
+
fixity_files = read_json("#{@deposit_dir}/head/fixity_files.json")
|
|
614
|
+
process_fixity(fixity_files)
|
|
615
|
+
end
|
|
616
|
+
|
|
617
|
+
# Process version.json, if present.
|
|
618
|
+
if File.file? "#{@deposit_dir}/head/version.json"
|
|
619
|
+
version_file = read_json("#{@deposit_dir}/head/version.json")
|
|
620
|
+
process_version(version_file)
|
|
621
|
+
end
|
|
622
|
+
end
|
|
623
|
+
|
|
624
|
+
def stage_existing_object
|
|
625
|
+
# If we get here, we know that the local inventory.json is the same as the dest. inventory.json.
|
|
626
|
+
from_file("#{@deposit_dir}/inventory.json")
|
|
627
|
+
|
|
628
|
+
# Increment the version from the inventory.json by 1.
|
|
629
|
+
@new_version = OcflTools::Utils.version_string_to_int(head) + 1
|
|
630
|
+
|
|
631
|
+
get_version(@new_version) # Add this new version to our representation of this inventory in self.
|
|
632
|
+
|
|
633
|
+
process_action_files # now process all our action files for this new version.
|
|
634
|
+
end
|
|
635
|
+
|
|
636
|
+
def process_new_version
|
|
637
|
+
# We just passed OCflVerify to get here, so we're good to go.
|
|
638
|
+
|
|
639
|
+
# Create version & content directory.
|
|
640
|
+
target_content = "#{@object_dir}/#{@head}/#{@contentDirectory}"
|
|
641
|
+
|
|
642
|
+
# Abort if target_content already exists!
|
|
643
|
+
if Dir.exist? target_content
|
|
644
|
+
@my_results.error('E111', 'process_new_version', "#{target_content} already exists! Unable to process new version.")
|
|
645
|
+
raise "#{target_content} already exists! Unable to process new version."
|
|
646
|
+
end
|
|
647
|
+
|
|
648
|
+
unless FileUtils.mkdir_p target_content
|
|
649
|
+
raise "Errror creating #{target_content}!"
|
|
650
|
+
end
|
|
651
|
+
|
|
652
|
+
source_content = "#{@deposit_dir}/head/#{@contentDirectory}"
|
|
653
|
+
|
|
654
|
+
# Copy [or move? make this behavior configurable] content across.
|
|
655
|
+
# Why move? Well, if you're on the same filesystem root, and you're moving large files,
|
|
656
|
+
# move is *much, much faster* and doesn't run the risk of bitstream corruption as it's
|
|
657
|
+
# just a filesystem metadata operation.
|
|
658
|
+
FileUtils.cp_r "#{source_content}/.", target_content
|
|
659
|
+
|
|
660
|
+
# Add inventory.json to version directory.
|
|
661
|
+
to_file("#{@object_dir}/#{@head}")
|
|
662
|
+
# Verify version directory.
|
|
663
|
+
validation = OcflTools::OcflValidator.new(@object_dir)
|
|
664
|
+
validation.verify_directory(@new_version)
|
|
665
|
+
|
|
666
|
+
@my_results.add_results(validation.results)
|
|
667
|
+
raise 'Errors detected in validation!' unless @my_results.error_count == 0
|
|
668
|
+
|
|
669
|
+
# What OCFL version are we targeting? Pull the default value if not otherwise set.
|
|
670
|
+
@ocfl_version ||= OcflTools.config.ocfl_version
|
|
671
|
+
|
|
672
|
+
# If this is version 1, there will not be a Namaste file in object root - add it.
|
|
673
|
+
unless File.exist?("#{@object_dir}/0=ocfl_object_#{@ocfl_version}")
|
|
674
|
+
namaste = File.open("#{@object_dir}/0=ocfl_object_#{@ocfl_version}", 'w')
|
|
675
|
+
namaste.puts "ocfl_object_#{@ocfl_version}"
|
|
676
|
+
namaste.close
|
|
677
|
+
end
|
|
678
|
+
|
|
679
|
+
# Add new inventory.json to object root directory. This should always be the final step.
|
|
680
|
+
to_file(@object_dir)
|
|
681
|
+
|
|
682
|
+
@my_results.ok('0111', 'process_new_version', "object #{id} version #{@new_version} successfully processed.")
|
|
683
|
+
end
|
|
684
|
+
end
|
|
685
|
+
end
|