ocfl-tools 0.9.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,23 @@
1
+ module OcflTools
2
+ module Errors
3
+
4
+ class Error211 < StandardError
5
+ def initialize(msg="inventory.json is not valid JSON.")
6
+ super
7
+ end
8
+ end
9
+
10
+ class Error216 < StandardError
11
+ def initialize(msg="Unable to find required key in inventory.json.")
12
+ super
13
+ end
14
+ end
15
+
16
+ class Error217 < StandardError
17
+ def initialize(msg="Required key in inventory.json must contain a value.")
18
+ super
19
+ end
20
+ end
21
+
22
+ end
23
+ end
@@ -0,0 +1,95 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OcflTools
4
+ # create and manipulate an OCFL inventory file.
5
+ class OcflInventory < OcflTools::OcflObject
6
+ # serializes all versions of the object to JSON.
7
+ # @return [JSON] complete OCFL object in serialized JSON format, suitable
8
+ # for writing to a storage layer.
9
+ def serialize
10
+ output_hash = {}
11
+
12
+ set_head_version # We're about to make an OCFL. At least pretend it'll pass validation.
13
+
14
+ # If you've not set type by now, set it to the site default.
15
+ @type ||= OcflTools.config.content_type
16
+
17
+ output_hash['id'] = @id
18
+ output_hash['head'] = @head
19
+ output_hash['type'] = @type
20
+ output_hash['digestAlgorithm'] = @digestAlgorithm
21
+ unless @contentDirectory.empty?
22
+ output_hash['contentDirectory'] = @contentDirectory
23
+ end
24
+ output_hash['manifest'] = @manifest
25
+ output_hash['versions'] = @versions
26
+ # optional
27
+ output_hash['fixity'] = @fixity unless @fixity.empty?
28
+
29
+ JSON.pretty_generate(output_hash)
30
+ end
31
+
32
+ # Sets @head to highest version found in object.
33
+ # @return [String] current version name.
34
+ def set_head_version
35
+ set_head_from_version(version_id_list.max)
36
+ end
37
+
38
+ # Writes inventory file and inventory sidecar digest file to a directory.
39
+ # @param [String] directory resolvable directory to write inventory.json to.
40
+ def to_file(directory)
41
+ inventory = File.new("#{directory}/inventory.json", 'w+')
42
+ inventory.syswrite(serialize)
43
+
44
+ checksum = OcflTools::Utils.generate_file_digest(inventory.path, @digestAlgorithm)
45
+
46
+ inventory_digest = File.new("#{inventory.path}.#{@digestAlgorithm}", 'w+')
47
+ inventory_digest.syswrite("#{checksum} inventory.json")
48
+ end
49
+
50
+ # Reads a file in from disk and parses the JSON within.
51
+ # @param [Pathname] file resolvable path to alleged inventory.json.
52
+ # @return [Hash] of JSON keys & values.
53
+ def read_json(file)
54
+ begin
55
+ JSON.parse(File.read(file))
56
+ rescue JSON::ParserError
57
+ raise OcflTools::Errors::Error211
58
+ rescue StandardError
59
+ raise "An unknown error occured reading file #{file}" # catch/encapsulate any JSON::Parser or FileIO issues
60
+ end
61
+ end
62
+
63
+ # Reads in a file, parses the JSON and ingests it into an {OcflTools::OcflInventory}
64
+ # @param [Pathname] file fully-qualified filepath to a valid OCFL inventory.json.
65
+ # @return [self]
66
+ def from_file(file)
67
+ import_hash = read_json(file)
68
+
69
+ # REQUIRED keys; raise exception if not found.
70
+ [ 'id', 'head', 'type', 'digestAlgorithm', 'manifest', 'versions' ].each do | key |
71
+ unless import_hash.key?(key)
72
+ raise OcflTools::Errors::Error216, "Required key #{key} not found"
73
+ end
74
+ if import_hash[key].empty?
75
+ raise OcflTools::Errors::Error217, "Required key #{key} must contain a value"
76
+ end
77
+ end
78
+
79
+ @id = import_hash['id']
80
+ @head = import_hash['head']
81
+ @type = import_hash['type']
82
+ @digestAlgorithm = import_hash['digestAlgorithm']
83
+ # if import_hash.key?('contentDirectory')
84
+ # @contentDirectory = import_hash['contentDirectory']
85
+ # end
86
+ @manifest = import_hash['manifest']
87
+ @versions = import_hash['versions']
88
+ # Optional keys - contentDirectory and fixity block.
89
+ @fixity = import_hash['fixity'] if import_hash.key?('fixity')
90
+ @contentDirectory = import_hash['contentDirectory'] if import_hash.key?('contentDirectory')
91
+
92
+ self
93
+ end
94
+ end
95
+ end
@@ -0,0 +1,425 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OcflTools
4
+ # Class that represents the data structures used by an OCFL inventory file.
5
+ class OcflObject
6
+ # @return [Hash] manifest block of the OCFL object.
7
+ attr_accessor :manifest
8
+
9
+ # @return [Hash] versions block of the OCFL object.
10
+ attr_accessor :versions
11
+
12
+ # @return [Hash] fixity block of the OCFL object.
13
+ attr_accessor :fixity
14
+
15
+ # @return [String] id the unique identifer of the OCFL object, as defined by the local repository system.
16
+ attr_accessor :id
17
+
18
+ # @return [String] algorithm used by the OCFL object to generate digests for file manifests and versions.
19
+ attr_accessor :digestAlgorithm
20
+
21
+ # @return [String] the most recent version of the OCFL object, expressed as a string that conforms to the format defined in version_format.
22
+ attr_accessor :head
23
+
24
+ # @return [String] the version of the OCFL spec to which this object conforms, expressed as a URL, as required by the OCFL specification.
25
+ attr_accessor :type
26
+
27
+ # @return [String] the name of the directory, inside each version directory, that the OCFL object should use as the base directory for files.
28
+ attr_accessor :contentDirectory
29
+
30
+ def initialize
31
+ # Parameters that must be serialized into JSON
32
+ @id = nil
33
+ @head = nil
34
+ @type = nil # OcflTools.config.content_type
35
+ @digestAlgorithm = OcflTools.config.digest_algorithm # sha512 is recommended, Stanford uses sha256.
36
+ @contentDirectory = OcflTools.config.content_directory # default is 'content', Stanford uses 'data'
37
+ @manifest = {}
38
+ @versions = {} # A hash of Version hashes.
39
+ @fixity = {} # Optional. Same format as Manifest.
40
+ end
41
+
42
+ # sets @head in current string format, when given integer.
43
+ # @param [Integer] version to set head to.
44
+ # @return {@head} value of most recent version.
45
+ def set_head_from_version(version)
46
+ @head = OcflTools::Utils.version_int_to_string(version)
47
+ end
48
+
49
+ # sets the message field for a given version.
50
+ # @param [Integer] version of OCFL object to set message for.
51
+ # @param [String] message to set for given version.
52
+ # @note will raise an exception if you attempt to query a non-existent version.
53
+ def set_version_message(version, message)
54
+ unless @versions.key?(OcflTools::Utils.version_int_to_string(version))
55
+ raise "Version #{version} does not yet exist!"
56
+ end
57
+
58
+ @versions[OcflTools::Utils.version_int_to_string(version)]['message'] = message
59
+ end
60
+
61
+ # returns the message field for a given version.
62
+ # @param [Integer] version of OCFL object to get the message for.
63
+ # @return [String] message set for the given version, if any.
64
+ # @note will raise an exception if you attempt to query a non-existent version.
65
+ def get_version_message(version)
66
+ unless @versions.key?(OcflTools::Utils.version_int_to_string(version))
67
+ raise "Version #{version} does not yet exist!"
68
+ end
69
+
70
+ @versions[OcflTools::Utils.version_int_to_string(version)]['message']
71
+ end
72
+
73
+ # sets the created field for a given version.
74
+ # @param [Integer] version of OCFL object to set value for.
75
+ # @param [String] created value to set for given version.
76
+ # @note will raise an exception if you attempt to query a non-existent version.
77
+ def set_version_created(version, created)
78
+ unless @versions.key?(OcflTools::Utils.version_int_to_string(version))
79
+ raise "Version #{version} does not yet exist!"
80
+ end
81
+
82
+ @versions[OcflTools::Utils.version_int_to_string(version)]['created'] = created
83
+ end
84
+
85
+ # returns the created field for a given version.
86
+ # @param [Integer] version of OCFL object to get value for.
87
+ # @return [String] created value set for the given version, if any.
88
+ # @note will raise an exception if you attempt to query a non-existent version.
89
+ def get_version_created(version)
90
+ unless @versions.key?(OcflTools::Utils.version_int_to_string(version))
91
+ raise "Version #{version} does not yet exist!"
92
+ end
93
+
94
+ @versions[OcflTools::Utils.version_int_to_string(version)]['created']
95
+ end
96
+
97
+ # Sets the user Hash for a given version. Expects a complete User hash (with sub-keys of name & address).
98
+ # @param [Integer] version of OCFL object to set the user block for.
99
+ # @param [Hash] user block to set for this version. Must be a hash with two keys 'name' and 'address'.
100
+ # @note will raise an exception if you attempt to query a nonexistent version.
101
+ def set_version_user(version, user)
102
+ unless @versions.key?(OcflTools::Utils.version_int_to_string(version))
103
+ raise "Version #{version} does not yet exist!"
104
+ end
105
+
106
+ @versions[OcflTools::Utils.version_int_to_string(version)]['user'] = user
107
+ end
108
+
109
+ # Gets the user Hash for a given version.
110
+ # @ param [Integer] version of OCFL object to retrieve user block for.
111
+ # @return [Hash] user block for this version, a hash consisting of two keys, 'name' and 'address'.
112
+ # @note will raise an exception if you attempt to query a nonexistent version.
113
+ def get_version_user(version)
114
+ unless @versions.key?(OcflTools::Utils.version_int_to_string(version))
115
+ raise "Version #{version} does not yet exist!"
116
+ end
117
+
118
+ @versions[OcflTools::Utils.version_int_to_string(version)]['user']
119
+ end
120
+
121
+ # Gets an array of integers comprising all versions of this OCFL object. It is not guaranteed to be in numeric order.
122
+ # @return [Array{Integer}] versions that exist in the object.
123
+ def version_id_list
124
+ my_versions = []
125
+ @versions.keys.each do |key|
126
+ my_versions << OcflTools::Utils.version_string_to_int(key)
127
+ end
128
+ my_versions
129
+ end
130
+
131
+ # Gets the state block of a given version, comprising of digest keys and an array of filenames associated with those digests.
132
+ # @param [Integer] version of OCFL object to retreive version state block of.
133
+ # @return [Hash] of digests and array of pathnames associated with this version.
134
+ # @note Creates new version and copies previous versions' state block over if requested version does not yet exist.
135
+ def get_state(version)
136
+ my_version = get_version(version)
137
+ my_version['state']
138
+ end
139
+
140
+ # Sets the state block for a given version when provided with a hash of digest keys and an array of associated filenames.
141
+ # @param [Integer] version of object to set state for.
142
+ # @param [Hash] hash of digests (keys) and an array of pathnames (values) associated with those digests.
143
+ # @note It is prefered to update version state via add/update/delete/copy/move file operations.
144
+ def set_state(version, hash)
145
+ # SAN Check needed here to make sure passed Hash has all expected keys.
146
+ @versions[OcflTools::Utils.version_int_to_string(version)]['state'] = hash
147
+ end
148
+
149
+ # Gets a hash of all logical files and their associated physical filepaths with the given version.
150
+ # @param [Integer] version from which to generate file list.
151
+ # @return [Hash] of files, with logical file as key, physical location within object dir as value.
152
+ def get_files(version)
153
+ my_state = get_state(version)
154
+ my_files = {}
155
+
156
+ my_state.each do |digest, filepaths| # filepaths is [Array]
157
+ filepaths.each do |logical_filepath|
158
+ # look up this file via digest in @manifest.
159
+ physical_filepath = @manifest[digest]
160
+ # physical_filepath is an [Array] of files, but they're all the same so only need 1.
161
+ my_files[logical_filepath] = physical_filepath[0]
162
+ end
163
+ end
164
+ my_files
165
+ end
166
+
167
+ # Gets all files for the current (highest) version of the OCFL object. Represents the state of the object at 'head',
168
+ # with the logical files that consist of the most recent version and their physical representations on disk, relative
169
+ # to the object's root directory.
170
+ # @return [Hash] of files from most recent version, with logical file as key, associated physical filepath as value.
171
+ def get_current_files
172
+ get_files(OcflTools::Utils.version_string_to_int(@head))
173
+ end
174
+
175
+ # Adds a file to a version.
176
+ # @param [Pathname] file is the logical filename within the object.
177
+ # @param [String] digest of filename, presumably computed with the {digestAlgorithm} for the object.
178
+ # @param [Integer] version to add file to.
179
+ # @return [Hash] state block reflecting the version after the changes.
180
+ # @note will raise an error if an attempt is made to add a file to a prior (non-head) version. Will also raise an error if the requested file already exists in this version with a different digest: use {update_file} instead.
181
+ def add_file(file, digest, version)
182
+ # We use get_state here instead of asking @versions directly
183
+ # because get_state will create version hash if it doesn't already exist.
184
+ my_state = get_state(version)
185
+
186
+ unless version == version_id_list.max
187
+ raise "Can't edit prior versions! Only version #{version_id_list.max} can be modified now."
188
+ end
189
+
190
+ # if the key is not in the manifest, assume that we meant to add it.
191
+ update_manifest(file, digest, version) unless @manifest.key?(digest)
192
+
193
+ if my_state.key?(digest)
194
+ # file's already in this version. Add file to existing digest.
195
+ my_files = my_state[digest]
196
+ my_files << file
197
+ unique_files = my_files.uniq # Just in case we're trying to add the same thing multiple times.
198
+ # Need to actually add this to @versions!
199
+ @versions[OcflTools::Utils.version_int_to_string(version)]['state'][digest] = unique_files
200
+ # Prove we actually added to state
201
+ return get_state(version)
202
+ end
203
+
204
+ # Check to make sure the file isn't already in this state with a different digest!
205
+ # If so; fail. We don't do implicit / soft adds. You want that, be explict: do an update_file instead.
206
+ existing_files = get_files(version)
207
+ if existing_files.key?(file)
208
+ raise 'File already exists with different digest in this version! Consider update instead.'
209
+ end
210
+
211
+ # if it's not in State already, just add it.
212
+ @versions[OcflTools::Utils.version_int_to_string(version)]['state'][digest] = [file]
213
+
214
+ get_state(version)
215
+ end
216
+
217
+ # Updates an existing file with a new bitstream and digest.
218
+ # @param [String] file filepath to update.
219
+ # @param [String] digest of updated file.
220
+ # @param [Integer] version of object to update.
221
+ # @note this method explicitly deletes the prior file if found, and re-creates it with a new digest via the {add_file} method.
222
+ def update_file(file, digest, version)
223
+ # Same filename, different digest, update manifest.
224
+ # Do a Delete, then an Add.
225
+ existing_files = get_files(version)
226
+
227
+ delete_file(file, version) if existing_files.key?(file)
228
+ add_file(file, digest, version)
229
+ end
230
+
231
+ # Add a file and digest to the manifest at the given version.
232
+ # @param [Pathname] file filepath to add to the manifest.
233
+ # @param [String] digest of file being added to the manifest.
234
+ # @param [Integer] version version of the OCFL object that the file is being added to.
235
+ # @note internal API.
236
+ def update_manifest(file, digest, version)
237
+ # We only ever add to the manifest.
238
+ physical_filepath = "#{OcflTools::Utils.version_int_to_string(version)}/#{@contentDirectory}/#{file}"
239
+
240
+ if @manifest.key?(digest)
241
+ # This bitstream is already in the manifest.
242
+ # We need to append the new filepath to the existing array.
243
+ @manifest[digest] = (@manifest[digest] << physical_filepath)
244
+ return @manifest[digest]
245
+ end
246
+ @manifest[digest] = [physical_filepath] # otherwise, add our first entry to the array.
247
+ @manifest[digest]
248
+ end
249
+
250
+ # Given a digest, fixityAlgo and fixityDigest, add to fixity block.
251
+ # @param [String] digest value from Manifest for the file we are adding fixity info for.
252
+ # @param [String] fixityAlgorithm a valid fixity algorithm for this site (see Config.fixity_algorithms).
253
+ # @param [String] fixityDigest the digest value of the file, using the provided fixityAlgorithm.
254
+ # @return [Hash] fixity block for the object.
255
+ def update_fixity(digest, fixityAlgorithm, fixityDigest)
256
+ # Does Digest exist in @manifest? Fail if not.
257
+ # Doe fixityAlgorithm exist as a key in @fixity? Add if not.
258
+ unless @manifest.key?(digest) == true
259
+ raise "Unable to find digest #{digest} in manifest!"
260
+ end
261
+
262
+ filepaths = @manifest[digest]
263
+
264
+ # Construct the nested hash, if necessary.
265
+ @fixity[fixityAlgorithm] = {} if @fixity.key?(fixityAlgorithm) != true
266
+
267
+ if @fixity[fixityAlgorithm].key?(fixityDigest) != true
268
+ @fixity[fixityAlgorithm][fixityDigest] = []
269
+ end
270
+
271
+ # Append the filepath to the appropriate fixityDigest, if it's not already there.
272
+ filepaths.each do |filepath|
273
+ if @fixity[fixityAlgorithm][fixityDigest].include?(filepath)
274
+ next # don't add it if the filepath is already in the array.
275
+ end
276
+
277
+ @fixity[fixityAlgorithm][fixityDigest] = (@fixity[fixityAlgorithm][fixityDigest] << filepath)
278
+ end
279
+ @fixity
280
+ end
281
+
282
+ # Given a filepath, deletes that file from the given version. If multiple copies of the same file
283
+ # (as identified by a common digest) exist in the version, only the requested filepath is removed.
284
+ # @param [Pathname] file logical path of file to be deleted.
285
+ # @param [Integer] version version of object to delete file from.
286
+ # @return [Hash] state of version after delete has completed.
287
+ def delete_file(file, version)
288
+ # remove filename, may remove digest if that was last file associated with that digest.
289
+ my_state = get_state(version) # Creates version & copies state from prior version if doesn't exist.
290
+
291
+ unless version == version_id_list.max
292
+ raise "Can't edit prior versions! Only version #{version} can be modified now."
293
+ end
294
+
295
+ my_digest = get_digest(file, version)
296
+ # we know it's here b/c self.get_digest would have crapped out if not.
297
+ my_array = my_state[my_digest] # Get [Array] of files that have this digest in this version.
298
+ my_array.delete(file) # Delete the array value that matches file.
299
+ if !my_array.empty?
300
+ # update the array with (fewer) items.
301
+ my_state[my_digest] = my_array
302
+ else
303
+ # delete the key.
304
+ my_state.delete(my_digest)
305
+ end
306
+ # put results back into State.
307
+ set_state(version, my_state)
308
+ end
309
+
310
+ # Copies a file within the same version. If the destination file already exists with a different digest,
311
+ # it is overwritten with the digest of the source file.
312
+ # @param [Filepath] source_file filepath of source file.
313
+ # @param [Filepath] destination_file filepath of destination file.
314
+ # @param [Integer] version version of OCFL object.
315
+ # @return [Hash] state block of version after file copy has completed.
316
+ # @note Raises an error if source_file does not exist in this version.
317
+ def copy_file(source_file, destination_file, version)
318
+ # add new filename to existing digest in current state.
319
+ # If destination file already exists, overwrite it.
320
+ existing_files = get_files(version)
321
+
322
+ if existing_files.key?(destination_file)
323
+ delete_file(destination_file, version)
324
+ end
325
+ # should NOT call add_file, as add_file updates the manifest.
326
+ # Should instead JUST update current state with new filepath.
327
+ digest = get_digest(source_file, version) # errors out if source_file not found in current state
328
+
329
+ my_state = get_state(version)
330
+ my_files = my_state[digest]
331
+ my_files << destination_file
332
+ unique_files = my_files.uniq # Just in case we're trying to add the same thing multiple times.
333
+ # Need to actually add this to @versions!
334
+ @versions[OcflTools::Utils.version_int_to_string(version)]['state'][digest] = unique_files
335
+ # Prove we actually added to state
336
+ get_state(version)
337
+ # self.add_file(destination_file, self.get_digest(source_file, version), version)
338
+ end
339
+
340
+ # Moves (renames) a file from one location to another within the same version.
341
+ # @param [Pathname] old_file filepath to move.
342
+ # @param [Pathname] new_file new filepath.
343
+ # @return [Hash] state block of version after file copy has completed.
344
+ # @note This is functionally a {copy_file} followed by a {delete_file}. Will raise an error if the source file does not exist in this version.
345
+ def move_file(old_file, new_file, version)
346
+ # re-name; functionally a copy and delete.
347
+ copy_file(old_file, new_file, version)
348
+ delete_file(old_file, version)
349
+ end
350
+
351
+ # When given a file path and version, return the associated digest from version state.
352
+ # @param [Pathname] file filepath of file to return digest for.
353
+ # @param [Integer] version version of OCFL object to search for the requested file.
354
+ # @return [String] digest of requested file.
355
+ # @note Will raise an exception if requested filepath is not in given version.
356
+ def get_digest(file, version)
357
+ # Make a hash with each individual file as a key, with the appropriate digest as value.
358
+ inverted = get_state(version).invert
359
+ my_files = {}
360
+ inverted.each do |files, digest|
361
+ files.each do |i_file|
362
+ my_files[i_file] = digest
363
+ end
364
+ end
365
+ # Now see if the requested file is actually here.
366
+ unless my_files.key?(file)
367
+ raise "Get_digest can't find requested file in given version!"
368
+ end
369
+
370
+ my_files[file]
371
+ end
372
+
373
+ # Gets the existing version hash for the requested version, or else creates
374
+ # and populates a new, empty version hash.
375
+ # @param [Integer] version
376
+ # @return [Hash] version block, if it exists, or creates new with prior version state in it.
377
+ # @note If a (n-1) version exists in the object, and the requested version does not yet exist, this method will copy that version's state block into the new version.
378
+ def get_version(version)
379
+ unless version > 0
380
+ raise "OCFL object version cannot be zero!"
381
+ end
382
+ if @versions.key?(OcflTools::Utils.version_int_to_string(version))
383
+ @versions[OcflTools::Utils.version_int_to_string(version)]
384
+ else
385
+ # Otherwise, construct a new Version [Hash] and return that.
386
+ @versions[OcflTools::Utils.version_int_to_string(version)] = create_version_hash
387
+
388
+ # If version -1 exists, copy prior version state over.
389
+ if @versions.key?(OcflTools::Utils.version_int_to_string(version - 1))
390
+ @versions[OcflTools::Utils.version_int_to_string(version)]['state'] = OcflTools::Utils.deep_copy(@versions[OcflTools::Utils.version_int_to_string(version - 1)]['state'])
391
+ end
392
+
393
+ @versions[OcflTools::Utils.version_int_to_string(version)]
394
+ end
395
+ end
396
+
397
+ # Returns a version hash with the correct keys created, ready for content to be added.
398
+ # @return [Hash] empty version Hash with 'created', 'message', 'user' and 'state' keys.
399
+ # @note internal API
400
+ def create_version_hash
401
+ new_version = {}
402
+ new_version['created'] = ''
403
+ new_version['message'] = ''
404
+ new_version['user'] = {}
405
+ # user is #name, # address.
406
+ new_version['user']['name'] = ''
407
+ new_version['user']['address'] = ''
408
+ new_version['state'] = {}
409
+ new_version
410
+ end
411
+
412
+ # When given a correctly-constructed hash, create a new OCFL version. See {create_version_hash} for more context.
413
+ # @param [Integer] version create a new OCFL version block with this version number.
414
+ # @param [Hash] hash use this hash for the content of the new OCFL version block.
415
+ def set_version(version, hash)
416
+ # SAN Check to make sure passed Hash has all expected keys.
417
+ %w[created message user state].each do |key|
418
+ if hash.key?(key) == false
419
+ raise "version #{version} hash block is missing required #{key} key"
420
+ end
421
+ end
422
+ @versions[OcflTools::Utils.version_int_to_string(version)] = hash
423
+ end
424
+ end
425
+ end