ocfl-tools 0.9.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile ADDED
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'bundler/gem_tasks'
4
+
5
+ require 'rspec/core/rake_task'
6
+ RSpec::Core::RakeTask.new(:spec)
7
+
8
+ require 'rubocop/rake_task'
9
+ RuboCop::RakeTask.new
10
+
11
+ task default: %i[spec]
12
+
13
+ task all: %i[spec rubocop]
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.9.14
@@ -0,0 +1,56 @@
1
+ # A simple example to demonstrate the relationship between logical content in an OCFL object
2
+ # and the fully-resolved path to those binaries on the local storage system.
3
+
4
+ require 'ocfl-tools'
5
+ require 'optparse'
6
+
7
+ options = {}
8
+
9
+ opts = OptionParser.new do |opts|
10
+ opts.on('-d DIRECTORY', '--dir DIRECTORY', 'A directory containing an OCFL object') do |dir|
11
+ unless Dir.exist?(dir)
12
+ raise "#{dir} is not a valid directory path."
13
+ end
14
+ options[:object_root] = dir
15
+ end
16
+
17
+ opts.on('-v VERSION', '--version VERSION', 'An optional version number') do |ver|
18
+ options[:version] = ver.to_i
19
+ end
20
+
21
+ end
22
+
23
+ opts.parse(ARGV)
24
+
25
+ raise OptionParser::MissingArgument if options[:object_root].nil?
26
+
27
+ object_root = options[:object_root]
28
+
29
+ # The inventory we're working on might not conform to the site default version format.
30
+ # Inspect the object root to determine what version format we should use, and use it.
31
+ OcflTools.config.version_format = OcflTools::Utils::Files.get_version_format(object_root)
32
+
33
+ # Get the latest inventory file from the object root.
34
+ inventory_file = OcflTools::Utils::Files.get_latest_inventory(object_root)
35
+
36
+ # Create an ocfl object from that inventory.
37
+ ocfl_object = OcflTools::OcflInventory.new.from_file(inventory_file)
38
+
39
+ # If we've been asked for a specific version, use it.
40
+ if options[:version].nil?
41
+ version = OcflTools::Utils.version_string_to_int(ocfl_object.head)
42
+ else
43
+ version = options[:version]
44
+ end
45
+
46
+ local_files = ocfl_object.get_files(version)
47
+
48
+ # Prepend the object root path to content_path to get fully-resolvable files.
49
+ local_files.each do | logical_path, content_path |
50
+ local_files[logical_path] = object_root + '/' + content_path
51
+ end
52
+
53
+ # Output a pretty result, for demo purposes.
54
+ local_files.each do | logical_path, content_path |
55
+ puts " #{logical_path} => #{content_path}"
56
+ end
@@ -0,0 +1,23 @@
1
+ # Usage: ruby ./validate_object.rb /path/to/directory/to/check
2
+ require 'ocfl-tools'
3
+ require 'optparse'
4
+
5
+ options = {}
6
+
7
+ opts = OptionParser.new do |opts|
8
+ opts.on('-d DIRECTORY', '--dir DIRECTORY', 'A directory containing an OCFL object') do |dir|
9
+ unless Dir.exist?(dir)
10
+ raise "#{dir} is not a valid directory path."
11
+ end
12
+ options[:object_root] = dir
13
+ end
14
+ end
15
+
16
+ opts.parse(ARGV)
17
+
18
+ raise OptionParser::MissingArgument if options[:object_root].nil?
19
+
20
+ object_root = options[:object_root]
21
+
22
+
23
+ OcflTools::OcflValidator.new(object_root).validate_ocfl_object_root.print
data/lib/ocfl-tools.rb ADDED
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ # OcflTools is a module that provides a distintive namespace for classes that create,
4
+ # maintain and read Oxford Common File Layout preservation objects.
5
+ #
6
+ # ====Data Model
7
+ #
8
+ # * <b>{OcflObject} = an object that models the internal data structures of an OCFL manifest.</b>
9
+ # * {OcflInventory} = An I/O interface for {OcflObject} allowing the reading and creaton of OCFL inventory.json files.
10
+ #
11
+ # @note Copyright (c) 2019 by The Board of Trustees of the Leland Stanford Junior University.
12
+
13
+ require 'ocfl_tools'
14
+ require 'json'
15
+ require 'anyway'
16
+ require 'fileutils'
17
+ require 'digest'
18
+ require 'time' # for iso8601 checking.
19
+ require 'uri' # for, well, uri testing.
data/lib/ocfl_tools.rb ADDED
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OcflTools
4
+ require 'ocfl_tools/ocfl_object'
5
+ require 'ocfl_tools/ocfl_inventory'
6
+ require 'ocfl_tools/ocfl_verify'
7
+ require 'ocfl_tools/ocfl_deposit'
8
+ require 'ocfl_tools/ocfl_validator'
9
+ require 'ocfl_tools/ocfl_results'
10
+ require 'ocfl_tools/ocfl_delta'
11
+ require 'ocfl_tools/ocfl_actions'
12
+ require 'ocfl_tools/ocfl_errors'
13
+ require 'ocfl_tools/config'
14
+ require 'ocfl_tools/utils'
15
+ require 'ocfl_tools/utils_file'
16
+ require 'ocfl_tools/utils_inventory'
17
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'anyway'
4
+
5
+ module OcflTools
6
+ # Site-wide configuration settings for OCFL-Tools, using the 'anyway' gem.
7
+ # Settings and their default values are:
8
+ # version_format: "v%04d",
9
+ # content_type: 'https://ocfl.io/1.0/spec/#inventory',
10
+ # content_directory: 'content',
11
+ # digest_algorithm: 'sha512',
12
+ # fixity_algorithms: ['md5', 'sha1', 'sha256']
13
+ # ocfl_version: '1.0'
14
+ class Config < Anyway::Config
15
+ attr_config version_format: 'v%04d',
16
+ content_type: 'https://ocfl.io/1.0/spec/#inventory',
17
+ content_directory: 'content',
18
+ digest_algorithm: 'sha512',
19
+ fixity_algorithms: %w[md5 sha1 sha256], # site-specific allowable fixity algorithms
20
+ ocfl_version: '1.0'
21
+ end
22
+
23
+ # Creates a new config instance if it doesn't already exist.
24
+ def self.config
25
+ @config ||= Config.new
26
+ end
27
+ end
@@ -0,0 +1,146 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OcflTools
4
+ # Class for collating manifest actions, both for delta reporting and staging new versions.
5
+ class OcflActions
6
+ def initialize
7
+ @my_actions = {}
8
+ @my_actions['update_manifest'] = {}
9
+ @my_actions['add'] = {}
10
+ @my_actions['update'] = {}
11
+ @my_actions['copy'] = {}
12
+ @my_actions['move'] = {}
13
+ @my_actions['delete'] = {}
14
+ end
15
+
16
+ # Convenience method for obtaining a hash of recorded actions.
17
+ # @return [Hash] of actions stored in this instance.
18
+ def actions
19
+ # Don't return empty keys.
20
+ @my_actions.delete_if { |_k, v| v == {} }
21
+ @my_actions
22
+ end
23
+
24
+ # Convenience method for obtaining a hash recorded of actions.
25
+ # @return [Hash] of actions stored in this instance.
26
+ def all
27
+ # Don't return empty keys.
28
+ @my_actions.delete_if { |_k, v| v == {} }
29
+ @my_actions
30
+ end
31
+
32
+ # Creates an 'update_manifest' entry in the actions hash.
33
+ # @param [String] digest of the filepath being recorded.
34
+ # @param [Pathname] filepath of file to record.
35
+ # @return [Hash] of recorded action.
36
+ def update_manifest(digest, filepath)
37
+ if @my_actions['update_manifest'].key?(digest) == false
38
+ @my_actions['update_manifest'][digest] = []
39
+ end
40
+ # Only put unique values into filepaths
41
+ if @my_actions['update_manifest'][digest].include?(filepath)
42
+ return @my_actions['update_manifest'][digest]
43
+ else
44
+ @my_actions['update_manifest'][digest] = (@my_actions['update_manifest'][digest] << filepath)
45
+ end
46
+ end
47
+
48
+ # Creates an 'add' entry in the actions hash.
49
+ # @param [String] digest of the filepath being recorded.
50
+ # @param [Pathname] filepath of file to record.
51
+ # @return [Hash] of recorded action.
52
+ def add(digest, filepath)
53
+ if @my_actions['add'].key?(digest) == false
54
+ @my_actions['add'][digest] = []
55
+ end
56
+ # Only put unique values into filepaths
57
+ if @my_actions['add'][digest].include?(filepath)
58
+ return @my_actions['add'][digest]
59
+ else
60
+ @my_actions['add'][digest] = (@my_actions['add'][digest] << filepath)
61
+ end
62
+ end
63
+
64
+ # Creates an 'update' entry in the actions hash.
65
+ # @param [String] digest of the filepath being recorded.
66
+ # @param [Pathname] filepath of file to record.
67
+ # @return [Hash] of recorded action.
68
+ def update(digest, filepath)
69
+ if @my_actions['update'].key?(digest) == false
70
+ @my_actions['update'][digest] = []
71
+ end
72
+ # Only put unique values into filepaths
73
+ if @my_actions['update'][digest].include?(filepath)
74
+ return @my_actions['update'][digest]
75
+ else
76
+ @my_actions['update'][digest] = (@my_actions['update'][digest] << filepath)
77
+ end
78
+ end
79
+
80
+ # Creates a 'copy' entry in the actions hash.
81
+ # @param [String] digest of the filepath being recorded.
82
+ # @param [Pathname] filepath of file to record.
83
+ # @return [Hash] of recorded action.
84
+ def copy(digest, filepath)
85
+ if @my_actions['copy'].key?(digest) == false
86
+ @my_actions['copy'][digest] = []
87
+ end
88
+ # Only put unique values into filepaths
89
+ if @my_actions['copy'][digest].include?(filepath)
90
+ return @my_actions['copy'][digest]
91
+ else
92
+ @my_actions['copy'][digest] = (@my_actions['copy'][digest] << filepath)
93
+ end
94
+ end
95
+
96
+ # Creates a 'move' entry in the actions hash.
97
+ # @param [String] digest of the filepath being recorded.
98
+ # @param [Pathname] filepath of file to record.
99
+ # @return [Hash] of recorded action.
100
+ def move(digest, filepath)
101
+ if @my_actions['move'].key?(digest) == false
102
+ @my_actions['move'][digest] = []
103
+ end
104
+ # Only put unique values into filepaths
105
+ if @my_actions['move'][digest].include?(filepath)
106
+ return @my_actions['move'][digest]
107
+ else
108
+ @my_actions['move'][digest] = (@my_actions['move'][digest] << filepath)
109
+ end
110
+ end
111
+
112
+ # Creates a 'delete' entry in the actions hash.
113
+ # @param [String] digest of the filepath being recorded.
114
+ # @param [Pathname] filepath of file to record.
115
+ # @return [Hash] of recorded action.
116
+ def delete(digest, filepath)
117
+ if @my_actions['delete'].key?(digest) == false
118
+ @my_actions['delete'][digest] = []
119
+ end
120
+ # Only put unique values into filepaths
121
+ if @my_actions['delete'][digest].include?(filepath)
122
+ return @my_actions['delete'][digest]
123
+ else
124
+ @my_actions['delete'][digest] = (@my_actions['delete'][digest] << filepath)
125
+ end
126
+ end
127
+
128
+ # @param [String] digest of the filepath that is getting additional fixity values.
129
+ # @param [String] fixity_algorithm of the fixity digest being added (e.g. 'md5', 'sha1').
130
+ # @param [String] fixity_digest to associate with this digest.
131
+ # @return [Hash] of recorded fixity block.
132
+ def fixity(digest, fixity_algorithm, fixity_digest)
133
+ # Only create this key if used.
134
+ @my_actions['fixity'] = {} if @my_actions.key?('fixity') == false
135
+ if @my_actions['fixity'].key?(fixity_algorithm) == false
136
+ @my_actions['fixity'][fixity_algorithm] = {}
137
+ end
138
+ # only add unique fixity digests.
139
+ if @my_actions['fixity'][fixity_algorithm].include?(digest)
140
+ return @my_actions['fixity'][fixity_algorithm][digest]
141
+ else
142
+ @my_actions['fixity'][fixity_algorithm][digest] = fixity_digest
143
+ end
144
+ end
145
+ end
146
+ end
@@ -0,0 +1,250 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OcflTools
4
+ # Given an inventory, show changes from previous versions.
5
+ # OcflDelta takes in an OCFL Inventory object and creates a delta hash containing
6
+ # the actions performed to assemble the requested version.
7
+ class OcflDelta
8
+ attr_reader :delta
9
+
10
+ def initialize(ocfl_object)
11
+ # Duck sanity check.
12
+ ['@id', '@head', '@manifest', '@versions', '@fixity'].each do |var|
13
+ unless ocfl_object.instance_variable_defined?(var)
14
+ raise "Object #{ocfl_object} does not have instance var #{var} defined"
15
+ end
16
+ end
17
+
18
+ %w[get_state version_id_list get_digest].each do |mthd|
19
+ unless ocfl_object.respond_to?(mthd)
20
+ raise "Object #{ocfl_object} does not respond to #{mthd}"
21
+ end
22
+ end
23
+
24
+ @ocfl_object = ocfl_object
25
+ @delta = {}
26
+ # We need to get version format, for final report-out. Assume that the ocfl_object versions are
27
+ # formatted correctly (starting with a 'v'). We can't trust the site config setting
28
+ # for this, as there's no guarantee the inventory we are reading in was created at this site.
29
+ first_version = @ocfl_object.versions.keys.min # should get us 'v0001' or 'v1'
30
+ sliced_version = first_version.split('v')[1] # cut the leading 'v' from the string.
31
+ if sliced_version.length == 1 # A length of 1 for the first version implies 'v1'
32
+ @version_format = 'v%d'
33
+ else
34
+ @version_format = "v%0#{sliced_version.length}d"
35
+ end
36
+ end
37
+
38
+ # Generates a complete delta hash for all versions of this object.
39
+ def all
40
+ @ocfl_object.version_id_list.each do |version|
41
+ get_version_delta(version)
42
+ end
43
+ @delta
44
+ end
45
+
46
+ # Given a version, get the delta from the previous version.
47
+ # @param [Integer] version of object to get deltas for.
48
+ # @return [Hash] of actions applied to previous version to create current version.
49
+ def previous(version)
50
+ # San check, does version exist in object?
51
+ if version == 1
52
+ get_first_version_delta
53
+ else
54
+ # verify version exists, then...
55
+ unless @ocfl_object.version_id_list.include?(version)
56
+ raise "Version #{version} not found in #{@ocfl_object}!"
57
+ end
58
+ get_version_delta(version)
59
+ end
60
+ end
61
+
62
+ private
63
+
64
+ def get_version_delta(version)
65
+
66
+ unless version > 1
67
+ return get_first_version_delta
68
+ end
69
+
70
+ current_digests = @ocfl_object.get_state(version)
71
+ current_files = OcflTools::Utils::Files.invert_and_expand(current_digests)
72
+
73
+ previous_digests = @ocfl_object.get_state((version - 1))
74
+ previous_files = OcflTools::Utils::Files.invert_and_expand(previous_digests)
75
+
76
+ missing_digests = {}
77
+ missing_files = {}
78
+
79
+ new_digests = {}
80
+ new_files = {}
81
+
82
+ unchanged_digests = {} # digests may not have changed, but filepaths can!
83
+ unchanged_files = {} # filepaths may not change, but digests can!
84
+
85
+ version_string = @version_format % version.to_i
86
+ @delta[version_string] = {}
87
+ @delta[version_string].clear # Always clear out the existing version delta.
88
+ actions = OcflTools::OcflActions.new
89
+
90
+ temp_digests = previous_digests.keys - current_digests.keys
91
+ unless temp_digests.empty?
92
+ temp_digests.each do |digest|
93
+ missing_digests[digest] = previous_digests[digest]
94
+ end
95
+ end
96
+
97
+ temp_files = previous_files.keys - current_files.keys
98
+ unless temp_files.empty?
99
+ temp_files.each do |file|
100
+ missing_files[file] = previous_files[file]
101
+ end
102
+ end
103
+
104
+ temp_digests = current_digests.keys - previous_digests.keys
105
+ unless temp_digests.empty?
106
+
107
+ temp_digests.each do |digest|
108
+ new_digests[digest] = current_digests[digest]
109
+ end
110
+ end
111
+
112
+ temp_files = current_files.keys - previous_files.keys
113
+ unless temp_files.empty?
114
+
115
+ temp_files.each do |file|
116
+ new_files[file] = current_files[file]
117
+ end
118
+ end
119
+
120
+ temp_digests = current_digests.keys - (new_digests.keys + missing_digests.keys)
121
+ unless temp_digests.empty?
122
+ temp_digests.each do |digest|
123
+ unchanged_digests[digest] = current_digests[digest]
124
+ end
125
+ end
126
+
127
+ temp_files = current_files.keys - (new_files.keys + missing_files.keys)
128
+ unless temp_files.empty?
129
+ temp_files.each do |file|
130
+ unchanged_files[file] = current_files[file]
131
+ end
132
+ end
133
+
134
+ # 1. ADD is new digest, new filepath.
135
+ # consult new_digests and new_files
136
+ unless new_digests.empty?
137
+ new_digests.each do |digest, filepaths|
138
+ # If new_files, check for ADD.
139
+ filepaths.each do |file|
140
+ if new_files.key?(file)
141
+ # new digest, new file, it's an ADD!
142
+ if new_files[file] == digest
143
+ actions.add(digest, file)
144
+ update_manifest_action(digest, version, actions)
145
+ next # need this so we don't also count it as an UPDATE
146
+ end
147
+ end
148
+
149
+ # 2. UPDATE is new digest, existing filepath
150
+ # if new_files doesn't have it, check current_files
151
+ if current_files.key?(file)
152
+ # New digest, existing file
153
+ if current_files[file] == digest
154
+ actions.update(digest, file)
155
+ update_manifest_action(digest, version, actions)
156
+ end
157
+ end
158
+ end
159
+ end
160
+ end
161
+
162
+ # 3. COPY is unchanged digest, additional (new) filepath
163
+ unless unchanged_digests.empty?
164
+ unchanged_digests.each do |digest, filepaths|
165
+ # get previous version filepaths, compare to current version filepaths.
166
+ if filepaths.size > previous_digests[digest].size
167
+ # Take current array from previous array
168
+ # What *new* filepaths do we have for this digest in this version?
169
+ copied_files = filepaths - previous_digests[digest]
170
+ copied_files.each do |copy_file|
171
+ actions.copy(digest, copy_file)
172
+ end
173
+ end
174
+
175
+ # 4. MOVE is unchanged digest, 1 deleted filepath, 1 added filepath.
176
+ if filepaths.size == previous_digests[digest].size
177
+ # For it to be a move, this digest must be listed in missing_files AND new_files.
178
+ if missing_files.value?(digest) && new_files.value?(digest)
179
+ # look this up in previous_files.
180
+ old_filename = previous_digests[digest][0]
181
+ new_filename = current_digests[digest][0]
182
+ actions.move(digest, old_filename)
183
+ actions.move(digest, new_filename)
184
+ end
185
+ end
186
+
187
+ # 5. One possible DELETE is unchanged digest, fewer filepaths.
188
+ if filepaths.size < previous_digests[digest].size
189
+
190
+ # Am I in missing_files ?
191
+ previous_filepaths = previous_digests[digest]
192
+ deleted_filepaths = previous_filepaths - filepaths
193
+ if deleted_filepaths.empty?
194
+ deleted_filepaths.each do |delete_me|
195
+ actions.delete(digest, delete_me)
196
+ end
197
+ end
198
+ end
199
+ end
200
+ end
201
+
202
+ # 6. DELETE of last filepath is where there's a missing_digest && the filepath is gone too.
203
+ unless missing_digests.empty?
204
+ missing_digests.each do |digest, filepaths|
205
+ # For each missing digest, see if any of its filepaths are still referenced in current files.
206
+ filepaths.each do |filepath|
207
+ actions.delete(digest, filepath) unless current_files.key?(filepath)
208
+ end
209
+ end
210
+ end
211
+
212
+ @delta[version_string] = actions.all
213
+ end
214
+
215
+ def update_manifest_action(digest, version, action)
216
+ version_string = @version_format % version.to_i
217
+ # We need to make a deep copy here so content_paths edits don't screw up the ocfl_object's manifest.
218
+ content_paths = OcflTools::Utils.deep_copy(@ocfl_object.manifest[digest])
219
+ # Find any content_path that starts with the current version's directory & contentDirectory;
220
+ # these are bitstreams that were added to this version directory.
221
+ content_paths.each do |content_path|
222
+ if content_path =~ /^#{version_string}\/#{@ocfl_object.contentDirectory}/
223
+ # Now trim from front of content_path.
224
+ content_path.slice!("#{version_string}/#{@ocfl_object.contentDirectory}/")
225
+ action.update_manifest(digest, content_path)
226
+ end
227
+ end
228
+ end
229
+
230
+ def get_first_version_delta
231
+ # Everything in get_state is an 'add'
232
+ version = 1
233
+ actions = OcflTools::OcflActions.new
234
+
235
+ version_string = @version_format % version.to_i
236
+ @delta[version_string] = {} # Always clear out the existing version delta.
237
+ @delta[version_string].clear
238
+
239
+ current_digests = @ocfl_object.get_state(version)
240
+ current_digests.each do |digest, filepaths|
241
+ filepaths.each do |file|
242
+ actions.add(digest, file)
243
+ update_manifest_action(digest, version, actions)
244
+ end
245
+ end
246
+ @delta[version_string] = actions.all
247
+ # Everything in Fixity is also an 'add'
248
+ end
249
+ end
250
+ end