ocfl-tools 0.9.14

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,23 @@
1
+ module OcflTools
2
+ module Errors
3
+
4
+ class Error211 < StandardError
5
+ def initialize(msg="inventory.json is not valid JSON.")
6
+ super
7
+ end
8
+ end
9
+
10
+ class Error216 < StandardError
11
+ def initialize(msg="Unable to find required key in inventory.json.")
12
+ super
13
+ end
14
+ end
15
+
16
+ class Error217 < StandardError
17
+ def initialize(msg="Required key in inventory.json must contain a value.")
18
+ super
19
+ end
20
+ end
21
+
22
+ end
23
+ end
@@ -0,0 +1,95 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OcflTools
4
+ # create and manipulate an OCFL inventory file.
5
+ class OcflInventory < OcflTools::OcflObject
6
+ # serializes all versions of the object to JSON.
7
+ # @return [JSON] complete OCFL object in serialized JSON format, suitable
8
+ # for writing to a storage layer.
9
+ def serialize
10
+ output_hash = {}
11
+
12
+ set_head_version # We're about to make an OCFL. At least pretend it'll pass validation.
13
+
14
+ # If you've not set type by now, set it to the site default.
15
+ @type ||= OcflTools.config.content_type
16
+
17
+ output_hash['id'] = @id
18
+ output_hash['head'] = @head
19
+ output_hash['type'] = @type
20
+ output_hash['digestAlgorithm'] = @digestAlgorithm
21
+ unless @contentDirectory.empty?
22
+ output_hash['contentDirectory'] = @contentDirectory
23
+ end
24
+ output_hash['manifest'] = @manifest
25
+ output_hash['versions'] = @versions
26
+ # optional
27
+ output_hash['fixity'] = @fixity unless @fixity.empty?
28
+
29
+ JSON.pretty_generate(output_hash)
30
+ end
31
+
32
+ # Sets @head to highest version found in object.
33
+ # @return [String] current version name.
34
+ def set_head_version
35
+ set_head_from_version(version_id_list.max)
36
+ end
37
+
38
+ # Writes inventory file and inventory sidecar digest file to a directory.
39
+ # @param [String] directory resolvable directory to write inventory.json to.
40
+ def to_file(directory)
41
+ inventory = File.new("#{directory}/inventory.json", 'w+')
42
+ inventory.syswrite(serialize)
43
+
44
+ checksum = OcflTools::Utils.generate_file_digest(inventory.path, @digestAlgorithm)
45
+
46
+ inventory_digest = File.new("#{inventory.path}.#{@digestAlgorithm}", 'w+')
47
+ inventory_digest.syswrite("#{checksum} inventory.json")
48
+ end
49
+
50
+ # Reads a file in from disk and parses the JSON within.
51
+ # @param [Pathname] file resolvable path to alleged inventory.json.
52
+ # @return [Hash] of JSON keys & values.
53
+ def read_json(file)
54
+ begin
55
+ JSON.parse(File.read(file))
56
+ rescue JSON::ParserError
57
+ raise OcflTools::Errors::Error211
58
+ rescue StandardError
59
+ raise "An unknown error occured reading file #{file}" # catch/encapsulate any JSON::Parser or FileIO issues
60
+ end
61
+ end
62
+
63
+ # Reads in a file, parses the JSON and ingests it into an {OcflTools::OcflInventory}
64
+ # @param [Pathname] file fully-qualified filepath to a valid OCFL inventory.json.
65
+ # @return [self]
66
+ def from_file(file)
67
+ import_hash = read_json(file)
68
+
69
+ # REQUIRED keys; raise exception if not found.
70
+ [ 'id', 'head', 'type', 'digestAlgorithm', 'manifest', 'versions' ].each do | key |
71
+ unless import_hash.key?(key)
72
+ raise OcflTools::Errors::Error216, "Required key #{key} not found"
73
+ end
74
+ if import_hash[key].empty?
75
+ raise OcflTools::Errors::Error217, "Required key #{key} must contain a value"
76
+ end
77
+ end
78
+
79
+ @id = import_hash['id']
80
+ @head = import_hash['head']
81
+ @type = import_hash['type']
82
+ @digestAlgorithm = import_hash['digestAlgorithm']
83
+ # if import_hash.key?('contentDirectory')
84
+ # @contentDirectory = import_hash['contentDirectory']
85
+ # end
86
+ @manifest = import_hash['manifest']
87
+ @versions = import_hash['versions']
88
+ # Optional keys - contentDirectory and fixity block.
89
+ @fixity = import_hash['fixity'] if import_hash.key?('fixity')
90
+ @contentDirectory = import_hash['contentDirectory'] if import_hash.key?('contentDirectory')
91
+
92
+ self
93
+ end
94
+ end
95
+ end
@@ -0,0 +1,425 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OcflTools
4
+ # Class that represents the data structures used by an OCFL inventory file.
5
+ class OcflObject
6
+ # @return [Hash] manifest block of the OCFL object.
7
+ attr_accessor :manifest
8
+
9
+ # @return [Hash] versions block of the OCFL object.
10
+ attr_accessor :versions
11
+
12
+ # @return [Hash] fixity block of the OCFL object.
13
+ attr_accessor :fixity
14
+
15
+ # @return [String] id the unique identifer of the OCFL object, as defined by the local repository system.
16
+ attr_accessor :id
17
+
18
+ # @return [String] algorithm used by the OCFL object to generate digests for file manifests and versions.
19
+ attr_accessor :digestAlgorithm
20
+
21
+ # @return [String] the most recent version of the OCFL object, expressed as a string that conforms to the format defined in version_format.
22
+ attr_accessor :head
23
+
24
+ # @return [String] the version of the OCFL spec to which this object conforms, expressed as a URL, as required by the OCFL specification.
25
+ attr_accessor :type
26
+
27
+ # @return [String] the name of the directory, inside each version directory, that the OCFL object should use as the base directory for files.
28
+ attr_accessor :contentDirectory
29
+
30
+ def initialize
31
+ # Parameters that must be serialized into JSON
32
+ @id = nil
33
+ @head = nil
34
+ @type = nil # OcflTools.config.content_type
35
+ @digestAlgorithm = OcflTools.config.digest_algorithm # sha512 is recommended, Stanford uses sha256.
36
+ @contentDirectory = OcflTools.config.content_directory # default is 'content', Stanford uses 'data'
37
+ @manifest = {}
38
+ @versions = {} # A hash of Version hashes.
39
+ @fixity = {} # Optional. Same format as Manifest.
40
+ end
41
+
42
+ # sets @head in current string format, when given integer.
43
+ # @param [Integer] version to set head to.
44
+ # @return {@head} value of most recent version.
45
+ def set_head_from_version(version)
46
+ @head = OcflTools::Utils.version_int_to_string(version)
47
+ end
48
+
49
+ # sets the message field for a given version.
50
+ # @param [Integer] version of OCFL object to set message for.
51
+ # @param [String] message to set for given version.
52
+ # @note will raise an exception if you attempt to query a non-existent version.
53
+ def set_version_message(version, message)
54
+ unless @versions.key?(OcflTools::Utils.version_int_to_string(version))
55
+ raise "Version #{version} does not yet exist!"
56
+ end
57
+
58
+ @versions[OcflTools::Utils.version_int_to_string(version)]['message'] = message
59
+ end
60
+
61
+ # returns the message field for a given version.
62
+ # @param [Integer] version of OCFL object to get the message for.
63
+ # @return [String] message set for the given version, if any.
64
+ # @note will raise an exception if you attempt to query a non-existent version.
65
+ def get_version_message(version)
66
+ unless @versions.key?(OcflTools::Utils.version_int_to_string(version))
67
+ raise "Version #{version} does not yet exist!"
68
+ end
69
+
70
+ @versions[OcflTools::Utils.version_int_to_string(version)]['message']
71
+ end
72
+
73
+ # sets the created field for a given version.
74
+ # @param [Integer] version of OCFL object to set value for.
75
+ # @param [String] created value to set for given version.
76
+ # @note will raise an exception if you attempt to query a non-existent version.
77
+ def set_version_created(version, created)
78
+ unless @versions.key?(OcflTools::Utils.version_int_to_string(version))
79
+ raise "Version #{version} does not yet exist!"
80
+ end
81
+
82
+ @versions[OcflTools::Utils.version_int_to_string(version)]['created'] = created
83
+ end
84
+
85
+ # returns the created field for a given version.
86
+ # @param [Integer] version of OCFL object to get value for.
87
+ # @return [String] created value set for the given version, if any.
88
+ # @note will raise an exception if you attempt to query a non-existent version.
89
+ def get_version_created(version)
90
+ unless @versions.key?(OcflTools::Utils.version_int_to_string(version))
91
+ raise "Version #{version} does not yet exist!"
92
+ end
93
+
94
+ @versions[OcflTools::Utils.version_int_to_string(version)]['created']
95
+ end
96
+
97
+ # Sets the user Hash for a given version. Expects a complete User hash (with sub-keys of name & address).
98
+ # @param [Integer] version of OCFL object to set the user block for.
99
+ # @param [Hash] user block to set for this version. Must be a hash with two keys 'name' and 'address'.
100
+ # @note will raise an exception if you attempt to query a nonexistent version.
101
+ def set_version_user(version, user)
102
+ unless @versions.key?(OcflTools::Utils.version_int_to_string(version))
103
+ raise "Version #{version} does not yet exist!"
104
+ end
105
+
106
+ @versions[OcflTools::Utils.version_int_to_string(version)]['user'] = user
107
+ end
108
+
109
+ # Gets the user Hash for a given version.
110
+ # @ param [Integer] version of OCFL object to retrieve user block for.
111
+ # @return [Hash] user block for this version, a hash consisting of two keys, 'name' and 'address'.
112
+ # @note will raise an exception if you attempt to query a nonexistent version.
113
+ def get_version_user(version)
114
+ unless @versions.key?(OcflTools::Utils.version_int_to_string(version))
115
+ raise "Version #{version} does not yet exist!"
116
+ end
117
+
118
+ @versions[OcflTools::Utils.version_int_to_string(version)]['user']
119
+ end
120
+
121
+ # Gets an array of integers comprising all versions of this OCFL object. It is not guaranteed to be in numeric order.
122
+ # @return [Array{Integer}] versions that exist in the object.
123
+ def version_id_list
124
+ my_versions = []
125
+ @versions.keys.each do |key|
126
+ my_versions << OcflTools::Utils.version_string_to_int(key)
127
+ end
128
+ my_versions
129
+ end
130
+
131
+ # Gets the state block of a given version, comprising of digest keys and an array of filenames associated with those digests.
132
+ # @param [Integer] version of OCFL object to retreive version state block of.
133
+ # @return [Hash] of digests and array of pathnames associated with this version.
134
+ # @note Creates new version and copies previous versions' state block over if requested version does not yet exist.
135
+ def get_state(version)
136
+ my_version = get_version(version)
137
+ my_version['state']
138
+ end
139
+
140
+ # Sets the state block for a given version when provided with a hash of digest keys and an array of associated filenames.
141
+ # @param [Integer] version of object to set state for.
142
+ # @param [Hash] hash of digests (keys) and an array of pathnames (values) associated with those digests.
143
+ # @note It is prefered to update version state via add/update/delete/copy/move file operations.
144
+ def set_state(version, hash)
145
+ # SAN Check needed here to make sure passed Hash has all expected keys.
146
+ @versions[OcflTools::Utils.version_int_to_string(version)]['state'] = hash
147
+ end
148
+
149
+ # Gets a hash of all logical files and their associated physical filepaths with the given version.
150
+ # @param [Integer] version from which to generate file list.
151
+ # @return [Hash] of files, with logical file as key, physical location within object dir as value.
152
+ def get_files(version)
153
+ my_state = get_state(version)
154
+ my_files = {}
155
+
156
+ my_state.each do |digest, filepaths| # filepaths is [Array]
157
+ filepaths.each do |logical_filepath|
158
+ # look up this file via digest in @manifest.
159
+ physical_filepath = @manifest[digest]
160
+ # physical_filepath is an [Array] of files, but they're all the same so only need 1.
161
+ my_files[logical_filepath] = physical_filepath[0]
162
+ end
163
+ end
164
+ my_files
165
+ end
166
+
167
+ # Gets all files for the current (highest) version of the OCFL object. Represents the state of the object at 'head',
168
+ # with the logical files that consist of the most recent version and their physical representations on disk, relative
169
+ # to the object's root directory.
170
+ # @return [Hash] of files from most recent version, with logical file as key, associated physical filepath as value.
171
+ def get_current_files
172
+ get_files(OcflTools::Utils.version_string_to_int(@head))
173
+ end
174
+
175
+ # Adds a file to a version.
176
+ # @param [Pathname] file is the logical filename within the object.
177
+ # @param [String] digest of filename, presumably computed with the {digestAlgorithm} for the object.
178
+ # @param [Integer] version to add file to.
179
+ # @return [Hash] state block reflecting the version after the changes.
180
+ # @note will raise an error if an attempt is made to add a file to a prior (non-head) version. Will also raise an error if the requested file already exists in this version with a different digest: use {update_file} instead.
181
+ def add_file(file, digest, version)
182
+ # We use get_state here instead of asking @versions directly
183
+ # because get_state will create version hash if it doesn't already exist.
184
+ my_state = get_state(version)
185
+
186
+ unless version == version_id_list.max
187
+ raise "Can't edit prior versions! Only version #{version_id_list.max} can be modified now."
188
+ end
189
+
190
+ # if the key is not in the manifest, assume that we meant to add it.
191
+ update_manifest(file, digest, version) unless @manifest.key?(digest)
192
+
193
+ if my_state.key?(digest)
194
+ # file's already in this version. Add file to existing digest.
195
+ my_files = my_state[digest]
196
+ my_files << file
197
+ unique_files = my_files.uniq # Just in case we're trying to add the same thing multiple times.
198
+ # Need to actually add this to @versions!
199
+ @versions[OcflTools::Utils.version_int_to_string(version)]['state'][digest] = unique_files
200
+ # Prove we actually added to state
201
+ return get_state(version)
202
+ end
203
+
204
+ # Check to make sure the file isn't already in this state with a different digest!
205
+ # If so; fail. We don't do implicit / soft adds. You want that, be explict: do an update_file instead.
206
+ existing_files = get_files(version)
207
+ if existing_files.key?(file)
208
+ raise 'File already exists with different digest in this version! Consider update instead.'
209
+ end
210
+
211
+ # if it's not in State already, just add it.
212
+ @versions[OcflTools::Utils.version_int_to_string(version)]['state'][digest] = [file]
213
+
214
+ get_state(version)
215
+ end
216
+
217
+ # Updates an existing file with a new bitstream and digest.
218
+ # @param [String] file filepath to update.
219
+ # @param [String] digest of updated file.
220
+ # @param [Integer] version of object to update.
221
+ # @note this method explicitly deletes the prior file if found, and re-creates it with a new digest via the {add_file} method.
222
+ def update_file(file, digest, version)
223
+ # Same filename, different digest, update manifest.
224
+ # Do a Delete, then an Add.
225
+ existing_files = get_files(version)
226
+
227
+ delete_file(file, version) if existing_files.key?(file)
228
+ add_file(file, digest, version)
229
+ end
230
+
231
+ # Add a file and digest to the manifest at the given version.
232
+ # @param [Pathname] file filepath to add to the manifest.
233
+ # @param [String] digest of file being added to the manifest.
234
+ # @param [Integer] version version of the OCFL object that the file is being added to.
235
+ # @note internal API.
236
+ def update_manifest(file, digest, version)
237
+ # We only ever add to the manifest.
238
+ physical_filepath = "#{OcflTools::Utils.version_int_to_string(version)}/#{@contentDirectory}/#{file}"
239
+
240
+ if @manifest.key?(digest)
241
+ # This bitstream is already in the manifest.
242
+ # We need to append the new filepath to the existing array.
243
+ @manifest[digest] = (@manifest[digest] << physical_filepath)
244
+ return @manifest[digest]
245
+ end
246
+ @manifest[digest] = [physical_filepath] # otherwise, add our first entry to the array.
247
+ @manifest[digest]
248
+ end
249
+
250
+ # Given a digest, fixityAlgo and fixityDigest, add to fixity block.
251
+ # @param [String] digest value from Manifest for the file we are adding fixity info for.
252
+ # @param [String] fixityAlgorithm a valid fixity algorithm for this site (see Config.fixity_algorithms).
253
+ # @param [String] fixityDigest the digest value of the file, using the provided fixityAlgorithm.
254
+ # @return [Hash] fixity block for the object.
255
+ def update_fixity(digest, fixityAlgorithm, fixityDigest)
256
+ # Does Digest exist in @manifest? Fail if not.
257
+ # Doe fixityAlgorithm exist as a key in @fixity? Add if not.
258
+ unless @manifest.key?(digest) == true
259
+ raise "Unable to find digest #{digest} in manifest!"
260
+ end
261
+
262
+ filepaths = @manifest[digest]
263
+
264
+ # Construct the nested hash, if necessary.
265
+ @fixity[fixityAlgorithm] = {} if @fixity.key?(fixityAlgorithm) != true
266
+
267
+ if @fixity[fixityAlgorithm].key?(fixityDigest) != true
268
+ @fixity[fixityAlgorithm][fixityDigest] = []
269
+ end
270
+
271
+ # Append the filepath to the appropriate fixityDigest, if it's not already there.
272
+ filepaths.each do |filepath|
273
+ if @fixity[fixityAlgorithm][fixityDigest].include?(filepath)
274
+ next # don't add it if the filepath is already in the array.
275
+ end
276
+
277
+ @fixity[fixityAlgorithm][fixityDigest] = (@fixity[fixityAlgorithm][fixityDigest] << filepath)
278
+ end
279
+ @fixity
280
+ end
281
+
282
+ # Given a filepath, deletes that file from the given version. If multiple copies of the same file
283
+ # (as identified by a common digest) exist in the version, only the requested filepath is removed.
284
+ # @param [Pathname] file logical path of file to be deleted.
285
+ # @param [Integer] version version of object to delete file from.
286
+ # @return [Hash] state of version after delete has completed.
287
+ def delete_file(file, version)
288
+ # remove filename, may remove digest if that was last file associated with that digest.
289
+ my_state = get_state(version) # Creates version & copies state from prior version if doesn't exist.
290
+
291
+ unless version == version_id_list.max
292
+ raise "Can't edit prior versions! Only version #{version} can be modified now."
293
+ end
294
+
295
+ my_digest = get_digest(file, version)
296
+ # we know it's here b/c self.get_digest would have crapped out if not.
297
+ my_array = my_state[my_digest] # Get [Array] of files that have this digest in this version.
298
+ my_array.delete(file) # Delete the array value that matches file.
299
+ if !my_array.empty?
300
+ # update the array with (fewer) items.
301
+ my_state[my_digest] = my_array
302
+ else
303
+ # delete the key.
304
+ my_state.delete(my_digest)
305
+ end
306
+ # put results back into State.
307
+ set_state(version, my_state)
308
+ end
309
+
310
+ # Copies a file within the same version. If the destination file already exists with a different digest,
311
+ # it is overwritten with the digest of the source file.
312
+ # @param [Filepath] source_file filepath of source file.
313
+ # @param [Filepath] destination_file filepath of destination file.
314
+ # @param [Integer] version version of OCFL object.
315
+ # @return [Hash] state block of version after file copy has completed.
316
+ # @note Raises an error if source_file does not exist in this version.
317
+ def copy_file(source_file, destination_file, version)
318
+ # add new filename to existing digest in current state.
319
+ # If destination file already exists, overwrite it.
320
+ existing_files = get_files(version)
321
+
322
+ if existing_files.key?(destination_file)
323
+ delete_file(destination_file, version)
324
+ end
325
+ # should NOT call add_file, as add_file updates the manifest.
326
+ # Should instead JUST update current state with new filepath.
327
+ digest = get_digest(source_file, version) # errors out if source_file not found in current state
328
+
329
+ my_state = get_state(version)
330
+ my_files = my_state[digest]
331
+ my_files << destination_file
332
+ unique_files = my_files.uniq # Just in case we're trying to add the same thing multiple times.
333
+ # Need to actually add this to @versions!
334
+ @versions[OcflTools::Utils.version_int_to_string(version)]['state'][digest] = unique_files
335
+ # Prove we actually added to state
336
+ get_state(version)
337
+ # self.add_file(destination_file, self.get_digest(source_file, version), version)
338
+ end
339
+
340
+ # Moves (renames) a file from one location to another within the same version.
341
+ # @param [Pathname] old_file filepath to move.
342
+ # @param [Pathname] new_file new filepath.
343
+ # @return [Hash] state block of version after file copy has completed.
344
+ # @note This is functionally a {copy_file} followed by a {delete_file}. Will raise an error if the source file does not exist in this version.
345
+ def move_file(old_file, new_file, version)
346
+ # re-name; functionally a copy and delete.
347
+ copy_file(old_file, new_file, version)
348
+ delete_file(old_file, version)
349
+ end
350
+
351
+ # When given a file path and version, return the associated digest from version state.
352
+ # @param [Pathname] file filepath of file to return digest for.
353
+ # @param [Integer] version version of OCFL object to search for the requested file.
354
+ # @return [String] digest of requested file.
355
+ # @note Will raise an exception if requested filepath is not in given version.
356
+ def get_digest(file, version)
357
+ # Make a hash with each individual file as a key, with the appropriate digest as value.
358
+ inverted = get_state(version).invert
359
+ my_files = {}
360
+ inverted.each do |files, digest|
361
+ files.each do |i_file|
362
+ my_files[i_file] = digest
363
+ end
364
+ end
365
+ # Now see if the requested file is actually here.
366
+ unless my_files.key?(file)
367
+ raise "Get_digest can't find requested file in given version!"
368
+ end
369
+
370
+ my_files[file]
371
+ end
372
+
373
+ # Gets the existing version hash for the requested version, or else creates
374
+ # and populates a new, empty version hash.
375
+ # @param [Integer] version
376
+ # @return [Hash] version block, if it exists, or creates new with prior version state in it.
377
+ # @note If a (n-1) version exists in the object, and the requested version does not yet exist, this method will copy that version's state block into the new version.
378
+ def get_version(version)
379
+ unless version > 0
380
+ raise "OCFL object version cannot be zero!"
381
+ end
382
+ if @versions.key?(OcflTools::Utils.version_int_to_string(version))
383
+ @versions[OcflTools::Utils.version_int_to_string(version)]
384
+ else
385
+ # Otherwise, construct a new Version [Hash] and return that.
386
+ @versions[OcflTools::Utils.version_int_to_string(version)] = create_version_hash
387
+
388
+ # If version -1 exists, copy prior version state over.
389
+ if @versions.key?(OcflTools::Utils.version_int_to_string(version - 1))
390
+ @versions[OcflTools::Utils.version_int_to_string(version)]['state'] = OcflTools::Utils.deep_copy(@versions[OcflTools::Utils.version_int_to_string(version - 1)]['state'])
391
+ end
392
+
393
+ @versions[OcflTools::Utils.version_int_to_string(version)]
394
+ end
395
+ end
396
+
397
+ # Returns a version hash with the correct keys created, ready for content to be added.
398
+ # @return [Hash] empty version Hash with 'created', 'message', 'user' and 'state' keys.
399
+ # @note internal API
400
+ def create_version_hash
401
+ new_version = {}
402
+ new_version['created'] = ''
403
+ new_version['message'] = ''
404
+ new_version['user'] = {}
405
+ # user is #name, # address.
406
+ new_version['user']['name'] = ''
407
+ new_version['user']['address'] = ''
408
+ new_version['state'] = {}
409
+ new_version
410
+ end
411
+
412
+ # When given a correctly-constructed hash, create a new OCFL version. See {create_version_hash} for more context.
413
+ # @param [Integer] version create a new OCFL version block with this version number.
414
+ # @param [Hash] hash use this hash for the content of the new OCFL version block.
415
+ def set_version(version, hash)
416
+ # SAN Check to make sure passed Hash has all expected keys.
417
+ %w[created message user state].each do |key|
418
+ if hash.key?(key) == false
419
+ raise "version #{version} hash block is missing required #{key} key"
420
+ end
421
+ end
422
+ @versions[OcflTools::Utils.version_int_to_string(version)] = hash
423
+ end
424
+ end
425
+ end