aspire 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +59 -0
  3. data/.rbenv-gemsets +1 -0
  4. data/.travis.yml +5 -0
  5. data/CODE_OF_CONDUCT.md +74 -0
  6. data/Dockerfile +20 -0
  7. data/Gemfile +4 -0
  8. data/LICENSE.txt +21 -0
  9. data/README.md +851 -0
  10. data/Rakefile +10 -0
  11. data/aspire.gemspec +40 -0
  12. data/bin/console +14 -0
  13. data/bin/setup +8 -0
  14. data/entrypoint.sh +11 -0
  15. data/exe/build-cache +13 -0
  16. data/lib/aspire.rb +11 -0
  17. data/lib/aspire/api.rb +2 -0
  18. data/lib/aspire/api/base.rb +198 -0
  19. data/lib/aspire/api/json.rb +195 -0
  20. data/lib/aspire/api/linked_data.rb +214 -0
  21. data/lib/aspire/caching.rb +4 -0
  22. data/lib/aspire/caching/builder.rb +356 -0
  23. data/lib/aspire/caching/cache.rb +365 -0
  24. data/lib/aspire/caching/cache_entry.rb +296 -0
  25. data/lib/aspire/caching/cache_logger.rb +63 -0
  26. data/lib/aspire/caching/util.rb +210 -0
  27. data/lib/aspire/cli/cache_builder.rb +123 -0
  28. data/lib/aspire/cli/command.rb +20 -0
  29. data/lib/aspire/enumerator/base.rb +29 -0
  30. data/lib/aspire/enumerator/json_enumerator.rb +130 -0
  31. data/lib/aspire/enumerator/linked_data_uri_enumerator.rb +32 -0
  32. data/lib/aspire/enumerator/report_enumerator.rb +64 -0
  33. data/lib/aspire/exceptions.rb +36 -0
  34. data/lib/aspire/object.rb +7 -0
  35. data/lib/aspire/object/base.rb +155 -0
  36. data/lib/aspire/object/digitisation.rb +43 -0
  37. data/lib/aspire/object/factory.rb +87 -0
  38. data/lib/aspire/object/list.rb +590 -0
  39. data/lib/aspire/object/module.rb +36 -0
  40. data/lib/aspire/object/resource.rb +371 -0
  41. data/lib/aspire/object/time_period.rb +47 -0
  42. data/lib/aspire/object/user.rb +46 -0
  43. data/lib/aspire/properties.rb +20 -0
  44. data/lib/aspire/user_lookup.rb +103 -0
  45. data/lib/aspire/util.rb +185 -0
  46. data/lib/aspire/version.rb +3 -0
  47. data/lib/retry.rb +197 -0
  48. metadata +274 -0
@@ -0,0 +1,365 @@
1
+ require 'aspire/caching/cache_entry'
2
+ require 'aspire/caching/util'
3
+ require 'aspire/exceptions'
4
+
5
+ module Aspire
6
+ module Caching
7
+ # Reads and writes Aspire API data to and from a file-based cache
8
+ class Cache
9
+ include Aspire::Exceptions
10
+ include Util
11
+
12
+ # The default cache directory permissions
13
+ MODE = 0o0750
14
+
15
+ # The default cache root directory
16
+ PATH = '/tmp/aspire/cache'.freeze
17
+
18
+ # @!attribute [rw] json_api
19
+ # @return [Aspire::API::JSON] the JSON API instance
20
+ attr_accessor :json_api
21
+
22
+ # @!attribute [rw] ld_api
23
+ # @return [Aspire::API::LinkedData] the linked data API instance
24
+ attr_accessor :ld_api
25
+
26
+ # @!attribute [rw] logger
27
+ # @return [Aspire::Caching::CacheLogger] the cache activity logger
28
+ attr_accessor :logger
29
+
30
+ # @!attribute [rw] mode
31
+ # @return [String, Integer] the cache directory permissions
32
+ attr_accessor :mode
33
+
34
+ # @!attribute [rw] path
35
+ # @return [String] the cache root directory
36
+ attr_accessor :path
37
+
38
+ # Initialises a new Cache instance
39
+ # @param json_api [Aspire::API::JSON] the JSON API instance
40
+ # @param ld_api [Aspire::API::LinkedData] the linked data API instance
41
+ # @param path [String] the cache root directory
42
+ # @param options [Hash] the cache options
43
+ # @option options [Integer] :api_retries the maximum number of retries
44
+ # after an API call timeout
45
+ # @option options [Boolean] :clear if true, clear the cache, otherwise
46
+ # leave any existing cache content intact
47
+ # @option options [Logger] :logger the cache activity logger
48
+ # @option options [String, Integer] :mode the cache directory permissions
49
+ # @return [void]
50
+ def initialize(ld_api = nil, json_api = nil, path = nil, **options)
51
+ options ||= {}
52
+ self.json_api = json_api
53
+ self.ld_api = ld_api
54
+ self.logger = Aspire::Caching::CacheLogger.new(options[:logger])
55
+ self.mode = options[:mode] || MODE
56
+ self.path = path || PATH
57
+ # Clear the cache contents if required
58
+ clear if options[:clear]
59
+ end
60
+
61
+ # Returns a CacheEntry instance for the URL
62
+ # @param url [String] the URL of the API object
63
+ # @return [Aspire::Caching::CacheEntry] the cache entry
64
+ def cache_entry(url)
65
+ CacheEntry.new(ld_api.canonical_url(url), self)
66
+ end
67
+
68
+ # Returns the canonical form of the URL
69
+ # @param url [String] the URL of the API object
70
+ # @return [String] the canonical URL of the object
71
+ def canonical_url(url)
72
+ ld_api.canonical_url(url)
73
+ end
74
+
75
+ # Clears the cache contents
76
+ # @return [void]
77
+ # @raise [Aspire::Exceptions::RemoveError] if the operation fails
78
+ def clear
79
+ return unless path?
80
+ rm(File.join(path, '*'), logger, 'Cache cleared', 'Cache clear failed')
81
+ end
82
+
83
+ # Deletes the cache
84
+ # @return [void]
85
+ # @raise [Aspire::Exceptions::RemoveError] if the operation fails
86
+ def delete
87
+ return unless path?
88
+ rm(path, logger, 'Cache deleted', 'Cache delete failed')
89
+ end
90
+
91
+ # Returns true if the cache is empty, false if not
92
+ # @return [Boolean] true if the cache is empty, false if not
93
+ def empty?
94
+ Dir.empty?(path)
95
+ end
96
+
97
+ # Returns true if the specified URL is in the cache, false if not
98
+ # @param url [String] the URL
99
+ # @param entry [Aspire::Caching::CacheEntry] the cache entry
100
+ # @return [Boolean] true if the URL is in the cache, false if not
101
+ def include?(url = nil, entry: nil)
102
+ entry ||= cache_entry(url)
103
+ entry.cached?
104
+ end
105
+
106
+ # Returns the linked data form of the URL
107
+ # @param url [String] the URL of the API object
108
+ # @return [String] the linked data URL of the object
109
+ def linked_data_url(url)
110
+ ld_api.linked_data_url(url)
111
+ end
112
+
113
+ # Iterates over a single cache object type and passes the partial object
114
+ # URLs to the block
115
+ # @param type [String] the cache object type ('lists', 'resources' etc.)
116
+ # or '**' for all object types
117
+ # @yield [url] passes the partial object URL to the block
118
+ # @yieldparam url [String] the partial object URL of the list
119
+ # @return [void]
120
+ def marked_entry(type)
121
+ Dir.glob(File.join(path, type, '.[^.]*')) do |filename|
122
+ # Convert the filename to a URL and pass to the block
123
+ begin
124
+ entry = CacheEntry.new(filename_to_url(filename), self)
125
+ yield(entry) if block_given?
126
+ rescue NotCacheable
127
+ nil
128
+ end
129
+ end
130
+ end
131
+
132
+ # Iterates over marked (in-progress) cache entries and passes the partial
133
+ # URL path to the block
134
+ # Positional parameters are the object types to include, e.g. 'lists',
135
+ # 'resources' etc. - default: all object types
136
+ # @yield [url] passes the list URL to the block
137
+ # @yieldparam url [String] the partial linked data URL of the list
138
+ # @return [void]
139
+ def marked_entries(*types, &block)
140
+ if types.nil? || types.empty?
141
+ marked_entry('**', &block)
142
+ else
143
+ types.each { |type| marked_entry(type, &block) }
144
+ end
145
+ end
146
+
147
+ # Sets and creates the root directory of the cache
148
+ # @param dir [String] the root directory path of the cache
149
+ # @return [void]
150
+ # @raise [ArgumentError] if no path is specified
151
+ # @raise [CacheError] if the directory cannot be created
152
+ def path=(dir = nil)
153
+ raise ArgumentError, 'directory expected' if dir.nil? || dir.empty?
154
+ mkdir(dir, logger, "Cache path set to #{dir}", 'Set cache path failed')
155
+ @path = dir
156
+ end
157
+
158
+ # Returns true if the cache path is a valid directory
159
+ # @return [Boolean] true if the cache path is a valid directory
160
+ def path?
161
+ !path.nil? && File.directory?(path)
162
+ end
163
+
164
+ # Reads an API data object from the cache or API
165
+ # @param url [String] the URL of the API object
166
+ # @param entry [Aspire::Caching::CacheEntry] the cache entry
167
+ # @param json [Boolean] if true, read the JSON API, otherwise read the
168
+ # linked data API
169
+ # @param use_cache [Boolean] if true, try the cache before the Aspire API
170
+ # @yield [data, flags] passes the data and flags to the block
171
+ # @yieldparam data [Hash] the parsed data from the cache or API call
172
+ # @yieldparam flags [Hash] the cache processing flags
173
+ # @yieldparam from_cache [Boolean] true if the data was read from the
174
+ # cache, false if it was read from the API
175
+ # @yieldparam json [Boolean] true if the data is from the JSON API, false
176
+ # if it is from the linked data API
177
+ # @return [Hash] the parsed JSON data from the cache or API
178
+ # @raise [Aspire::Exceptions::APIError] if the API call fails
179
+ # @raise [Aspire::Exceptions::ReadError] if the cache read fails
180
+ # @raise [Aspire::Exceptions::WriteError] if the cache write fails
181
+ def read(url = nil,
182
+ entry: nil, json: false, use_api: true, use_cache: true)
183
+ entry ||= cache_entry(url)
184
+ # Try the cache, data is nil on a cache miss
185
+ data = use_cache ? read_cache(entry, json: json) : nil
186
+ from_cache = !data.nil?
187
+ # Try the API if nothing was returned from the cache
188
+ data ||= write(entry: entry, json: json) if use_api
189
+ # Call the block if the read was successful
190
+ yield(data, entry, from_cache, json) if block_given? && data
191
+ # Return the data
192
+ data
193
+ rescue NotCacheable
194
+ # Uncacheable URLs have no data representation in the Aspire API
195
+ nil
196
+ end
197
+
198
+ # Removes the URL from the cache
199
+ # @param url [String] the URL of the API object
200
+ # @param entry [Aspire::Caching::CacheEntry] the cache entry
201
+ # @param force [Boolean] if remove, remove the URL even if it is marked
202
+ # as in-progress; otherwise fails on marked entries
203
+ # @param remove_children [Boolean] if true, remove all children of the
204
+ # object as well as the object itself, otherwise remove just the object
205
+ # @yield [data, entry] passes the data and cache entry to the block
206
+ # @yieldparam data [Hash] the parsed JSON data from the cache or API call
207
+ # @yieldparam entry [Aspire::Caching::CacheEntry] the cache entry
208
+ # @return [Hash, nil] the parsed JSON data removed from the cache
209
+ # @raise [Aspire::Exceptions::MarkedError] if the cache entry is
210
+ # marked as in-progress and force is false
211
+ # @raise [Aspire::Exceptions::RemoveError] if the operation fails
212
+ def remove(url = nil, entry: nil, force: false, remove_children: false)
213
+ entry ||= cache_entry(url)
214
+ return nil unless entry.cached?
215
+ # Read the data from the cache for the return value
216
+ data = read_cache(entry)
217
+ # Call the block
218
+ yield(data, entry) if block_given?
219
+ # Remove the cached files
220
+ entry.delete(force: force, remove_children: remove_children)
221
+ # Return the cached data
222
+ data
223
+ rescue NotCacheable
224
+ nil
225
+ end
226
+
227
+ # Returns the Aspire tenancy host name
228
+ # @return [String] the Aspire tenancy host name
229
+ def tenancy_host
230
+ ld_api ? ld_api.tenancy_host : nil
231
+ end
232
+
233
+ # Writes an API object to the cache
234
+ # @param url [String] the URL of the API object
235
+ # @param data [Hash, String, nil] parsed or unparsed data to be cached
236
+ # @param entry [Aspire::Caching::CacheEntry] the cache entry
237
+ # @param json [Boolean] if true, read the JSON API, otherwise read the
238
+ # linked data API
239
+ # @yield [data, entry] passes the data and cache entry to the block
240
+ # @yieldparam data [Hash] the parsed JSON data from the cache or API call
241
+ # @yieldparam entry [Aspire::Caching::CacheEntry] the cache entry
242
+ # @return [Hash] the parsed JSON data written to the cache
243
+ # @raise [Aspire::Exceptions::WriteError] if the operation fails
244
+ def write(url = nil, data: nil, entry: nil, json: false)
245
+ # Get the cache processing flags
246
+ entry ||= cache_entry(url)
247
+ # Get the data from the API if not supplied
248
+ raw, parsed = write_data(data) || read_api(entry, json: json)
249
+ return nil unless raw && parsed
250
+ # Write the data to the cache
251
+ write_cache(entry, raw, json: json)
252
+ # Call the block
253
+ yield(parsed, entry) if block_given?
254
+ # Return the data written to the cache
255
+ parsed
256
+ end
257
+
258
+ private
259
+
260
+ # Converts a status filename to a linked data URL
261
+ # @param filename [String] the filename of a linked data object status
262
+ # file in the cache
263
+ def filename_to_url(filename)
264
+ # Remove the cache path
265
+ f = strip_prefix(filename, path)
266
+ # Remove the leading . from the base filename
267
+ f = strip_filename_prefix(f, '.')
268
+ # Remove the leading / from the path
269
+ f.slice!(0) if f.start_with?('/')
270
+ # Return the full Aspire linked data URL
271
+ ld_api.api_url(f)
272
+ end
273
+
274
+ # Reads data from the Aspire JSON or linked data APIs
275
+ # @param entry [Aspire::Caching::CacheEntry] the cache entry
276
+ # @param json [Boolean] if true, read the JSON API, otherwise read the
277
+ # linked data API
278
+ # @return [Array] the unparsed JSON string and parsed hash from the API
279
+ def read_api(entry, json: false)
280
+ data = json ? read_json_api(entry) : read_linked_data_api(entry)
281
+ logger.log(Logger::DEBUG, read_api_msg('read', entry, json))
282
+ data
283
+ rescue APITimeout, APIError => e
284
+ msg = read_api_msg('read failed', entry, json, e)
285
+ logger.log_exception(msg, ReadError)
286
+ end
287
+
288
+ # Returns a log/exception message for #read_api
289
+ # @param msg [String] the event message
290
+ # @param entry [Aspire::Caching::CacheEntry] the cache entry
291
+ # @param json [Boolean] if true, return a JSON API message, otherwise
292
+ # return a linked data API message
293
+ # @param exception [Exception] the exception
294
+ # @return [String] the formatted log message
295
+ def read_api_msg(msg, entry, json, exception = nil)
296
+ [
297
+ "#{entry.url} #{msg} from #{json ? 'JSON' : 'LD'} API",
298
+ json ? " [#{entry.json_api_url}]" : '',
299
+ exception ? ": #{exception}" : ''
300
+ ].join
301
+ end
302
+
303
+ # Reads an Aspire linked data URL from the cache
304
+ # @param entry [Aspire::Caching::CacheEntry] the cache entry
305
+ # @param json [Boolean] if true, read JSON API data, otherwise read
306
+ # linked data API data
307
+ # @return [Hash, nil] the parsed JSON data from the cache or nil if the
308
+ # URL is not cached
309
+ # @raise [Aspire::Exceptions::ReadError] if the cache read fails
310
+ def read_cache(entry, json: false)
311
+ data = entry.read(json, parsed: true)
312
+ msg = "#{entry.url}#{json ? ' [JSON]' : ''} read from cache"
313
+ logger.log(Logger::DEBUG, msg)
314
+ data
315
+ rescue CacheMiss
316
+ nil
317
+ end
318
+
319
+ # Reads data from the Aspire JSON API
320
+ # @param entry [Aspire::Caching::CacheEntry] the cache entry
321
+ # @return [Array] the unparsed JSON string and parsed hash from the API
322
+ def read_json_api(entry)
323
+ opts = entry.json_api_opt || {}
324
+ url = entry.json_api_url
325
+ json_api.call(url, **opts) do |response, data|
326
+ return response.body, data
327
+ end
328
+ end
329
+
330
+ # Reads data from the Aspire linked data API
331
+ # @param entry [Aspire::Caching::CacheEntry] the cache entry
332
+ # @return [Array] the unparsed JSON string and parsed hash from the API
333
+ def read_linked_data_api(entry)
334
+ ld_api.call(entry.url) { |response, data| return response.body, data }
335
+ end
336
+
337
+ # Writes data to the cache
338
+ # @param entry [Aspire::Caching::CacheEntry] the cache entry
339
+ # @param data [String] the data to be written to the cache
340
+ # @return [void]
341
+ # @raise [Aspire::Exceptions::WriteError] if the operation fails
342
+ def write_cache(entry, data = nil, json: false)
343
+ entry.write(data, json)
344
+ file_path = entry.path(json)
345
+ logger.log(Logger::INFO, "#{entry.url} written to cache [#{file_path}]")
346
+ rescue WriteError => e
347
+ logger.log(Logger::ERROR, e.to_s)
348
+ end
349
+
350
+ # Converts user-supplied data to a string for caching
351
+ # @param data [Hash, String] the data to be written to the cache
352
+ # @return [Array, nil] the unparsed JSON string and parsed hash
353
+ def write_data(data = nil)
354
+ # Return nil if no data is supplied
355
+ return nil if data.nil?
356
+ # Return a JSON string and the data if a Hash is supplied
357
+ parsed_json = data.is_a?(Hash) || data.is_a?(Array)
358
+ return JSON.generate(data), data if parsed_json
359
+ # Otherwise return the data as a string and a parsed JSON hash
360
+ data = data.to_s
361
+ [data, JSON.parse(data)]
362
+ end
363
+ end
364
+ end
365
+ end
@@ -0,0 +1,296 @@
1
+ require 'aspire/caching/util'
2
+ require 'aspire/exceptions'
3
+ require 'aspire/util'
4
+
5
+ module Aspire
6
+ module Caching
7
+ # Represents an entry in the cache
8
+ class CacheEntry
9
+ include Aspire::Caching::Util
10
+ include Aspire::Exceptions
11
+ include Aspire::Util
12
+
13
+ # @!attribute [rw] cache
14
+ # @return [Aspire::Caching::Cache] the cache
15
+ attr_accessor :cache
16
+
17
+ # :!attribute [rw] draft
18
+ # @!attribute [rw] json_api_opt
19
+ # @return [Hash] #call parameters for the JSON API call
20
+ attr_accessor :json_api_opt
21
+
22
+ # @!attribute [rw] json_api_url
23
+ # @return [String] the JSON API #call URL
24
+ attr_accessor :json_api_url
25
+
26
+ # @!attribute [rw] uri
27
+ # @return [MatchData] the parsed URL
28
+ attr_accessor :parsed_url
29
+
30
+ # @!attribute [rw] url
31
+ # @return [String] the URL
32
+ attr_accessor :url
33
+
34
+ # Initialises a new CacheEntry instance
35
+ # @param url [String] the URL of the API object
36
+ # @param cache [Aspire::Caching::Cache] the parent cache
37
+ # @return [void]
38
+ # @raise [Aspire::Exceptions::NotCacheable] if the URL is not
39
+ # cacheable
40
+ def initialize(url, cache)
41
+ self.cache = cache
42
+ self.url = url
43
+ end
44
+
45
+ # Returns true if cache entries refer to the same object
46
+ # @param other [Aspire::Caching::CacheEntry, String] a cache entry or URL
47
+ # @return [Boolean] true if the entries refer to the same object
48
+ def ==(other)
49
+ url == url_for_comparison(other, cache.ld_api)
50
+ end
51
+
52
+ # Returns true if this cache entry is a child of the URL
53
+ # @param url [Aspire::Caching::CacheEntry, String] the URL to test
54
+ # @param strict [Boolean] if true, the URL must be a parent of this entry,
55
+ # otherwise the URL must be a parent or the same as this entry
56
+ # @return [Boolean] true if the URL is a child of the cache entry, false
57
+ # otherwise
58
+ def child_of?(url, strict: false)
59
+ child_url?(parsed_url, url, cache.ld_api, strict: strict)
60
+ end
61
+
62
+ # Returns true if the object is in the cache, false if not
63
+ # @return [Boolean] true if the object is cached, false if not
64
+ def cached?(json = false)
65
+ filename = json ? json_file : file
66
+ filename.nil? ? nil : File.exist?(filename)
67
+ end
68
+
69
+ # Deletes the object from the cache
70
+ # @param force [Boolean] delete even if the entry is marked in-progress
71
+ # @param remove_children [Boolean] if true, remove children of the object
72
+ # as well as the object, otherwise remove just the object
73
+ # @return [void]
74
+ # @raise [Aspire::Exceptions::MarkedError] if the entry is
75
+ # marked in-progress and force = false
76
+ def delete(force: false, remove_children: false)
77
+ mark(force: force) { |_f| delete_entry(file, remove_children) }
78
+ end
79
+
80
+ # Returns the linked data filename in the cache
81
+ # @return [String] the linked data filename in the cache
82
+ def file
83
+ File.join(cache.path, url_path)
84
+ end
85
+
86
+ # Returns true if the object has associated JSON API data, false if not
87
+ # @return [Boolean] true if the object has associated JSON API data, false
88
+ # otherwise
89
+ def json?
90
+ !json_api_url.nil? && !json_api_url.empty?
91
+ end
92
+
93
+ # Returns the JSON API data filename in the cache or nil if there is no
94
+ # JSON API data for the URL
95
+ # @param filename [String] the linked data filename in the cache
96
+ # @return [String, nil] the JSON API data filename or nil if there is no
97
+ # JSON API data for the URL
98
+ def json_file(filename = nil)
99
+ json? ? add_filename_suffix(filename || file, '-json') : nil
100
+ end
101
+
102
+ # Returns true if the cache entry is a list, false otherwise
103
+ # @param strict [Boolean] if true, the cache entry must be a list,
104
+ # otherwise the cache entry must be a list or a child of a list
105
+ # @return [Boolean] true if the cache entry is a list, false otherwise
106
+ def list?(strict: true)
107
+ # The cache entry must be a list or the child of a list
108
+ return false unless parsed_url[:type] == 'lists'
109
+ # Strict checking requires that the cache entry is a list, not a child
110
+ return false if strict && !parsed_url[:child_type].nil?
111
+ true
112
+ end
113
+
114
+ # Marks the cache entry as in-progress
115
+ # @param force [Boolean] if true, do not raise MarkedError when the entry
116
+ # is already marked; otherwise, MarkedError is raised when the entry is
117
+ # already marked.
118
+ # @return [void]
119
+ # @yield [file] passes the opened status file to the block
120
+ # @yieldparam file [File] the opened status file
121
+ # @raise [Aspire::Exceptions::MarkError] if the operation failed
122
+ # @raise [Aspire::Exceptions::MarkedError] if the cache entry is
123
+ # already marked
124
+ def mark(force: false, &block)
125
+ filename = status_file
126
+ flags = File::CREAT
127
+ flags |= File::EXCL unless force
128
+ File.open(filename, flags, &block)
129
+ rescue Errno::EEXIST
130
+ raise MarkedError, "#{url} already marked [#{filename}]"
131
+ rescue SystemCallError => e
132
+ raise MarkError, "#{url} mark failed [#{filename}]: #{e}"
133
+ end
134
+
135
+ # Returns true if the cache entry is locked
136
+ # @return [Boolean] true if the cache entry is marked as in-progress,
137
+ # false otherwise
138
+ def marked?
139
+ File.exist?(status_file)
140
+ end
141
+
142
+ # Returns true if this cache entry is the parent of the URL
143
+ # @param url [Aspire::Caching::CacheEntry, String] the URL to test
144
+ # @param strict [Boolean] if true, the URL must be a parent of this entry,
145
+ # otherwise the URL must be a parent or the same as this entry
146
+ # @return [Boolean] true if this cache entry is the parent of the URL,
147
+ # false otherwise
148
+ def parent_of?(url, strict: false)
149
+ parent_url?(parsed_url, url, cache.ld_api, strict: strict)
150
+ end
151
+
152
+ # Returns the filename of the cache entry
153
+ # @param json [Boolean] if true, returns the JSON API filename, otherwise
154
+ # returns the linked data API filename
155
+ def path(json = false)
156
+ json ? json_file : file
157
+ end
158
+
159
+ # Returns data from the cache
160
+ # @param json [Boolean] if true, read the JSON API file, otherwise read
161
+ # the linked data API file
162
+ # @param parsed [Boolean] if true, return JSON-parsed data, otherwise
163
+ # return a JSON string
164
+ # @return [Array, Hash, String, nil] the parsed JSON data or JSON string,
165
+ # or nil if JSON API data is requested but not available for this entry
166
+ # @raise [Aspire::Exceptions::CacheMiss] when the data is not in the cache
167
+ # @raise [Aspire::Exceptions::ReadError] when the read operation fails
168
+ def read(json = false, parsed: false)
169
+ filename = json ? json_file : file
170
+ return nil if filename.nil? || filename.empty?
171
+ File.open(filename, 'r') do |f|
172
+ data = f.read
173
+ return parsed ? JSON.parse(data) : data
174
+ end
175
+ rescue Errno::ENOENT
176
+ raise CacheMiss, "#{url} cache miss [#{filename}"
177
+ rescue IOError, SystemCallError => e
178
+ raise ReadError, "#{url} cache read failed [#{filename}]: #{e}"
179
+ end
180
+
181
+ # Returns true if the object's references are cacheable
182
+ # @return [Boolean] true if the object's references are cacheable, false
183
+ # otherwise
184
+ def references?
185
+ # Events are not JSON-LD so we can't cache references
186
+ parsed_url[:type] != 'events' && parsed_url[:child_type] != 'events'
187
+ end
188
+
189
+ # Returns the status filename in the cache
190
+ # @param filename [String] the linked data filename in the cache
191
+ def status_file(filename = nil)
192
+ # Prepend '.' to the filename
193
+ add_filename_prefix(filename || file, '.')
194
+ end
195
+
196
+ # Returns a string representation of the cache entry
197
+ # @return [String] the string representation (URL) of the cache entry
198
+ def to_s
199
+ url
200
+ end
201
+
202
+ # Removes an in-progress mark from the cache entry
203
+ def unmark
204
+ filename = status_file
205
+ File.delete(filename) if File.exist?(filename)
206
+ rescue SystemCallError => e
207
+ raise UnmarkError, "#{url} unmark failed [#{filename}]: #{e}"
208
+ end
209
+
210
+ # Sets the URL and associated flags
211
+ # @param u [String] the URL of the API object
212
+ # @return [void]
213
+ # @raise [Aspire::Exceptions::NotCacheable] if the URL is not
214
+ # cacheable
215
+ def url=(u)
216
+ # Convert the URL to canonical form for comparison
217
+ u = cache.canonical_url(u)
218
+ # Parse and check the URL
219
+ # - this will raise NotCacheable if it is not a valid cacheable URL
220
+ self.parsed_url = cacheable_url(u)
221
+ # Set the URL properties
222
+ @url = u
223
+ return unless list_url?(parsed: parsed_url)
224
+ self.json_api_opt = { bookjacket: 1, editions: 1, draft: 1, history: 1 }
225
+ self.json_api_url = "lists/#{strip_ext(parsed_url[:id])}"
226
+ end
227
+
228
+ # Writes data to the cache
229
+ # @param data [Object] the data to write to the cache
230
+ # @param json [Boolean] if true, write the data as JSON API data,
231
+ # otherwise write it as linked data
232
+ # @param parsed [Boolean] if true, treat data as a parsed JSON data
233
+ # structure, otherwise treat it as a JSON string
234
+ # @return [void]
235
+ # @raise [Aspire::Exceptions::WriteError] when the write operation fails
236
+ def write(data, json = false, parsed: false)
237
+ filename = json ? json_file : file
238
+ return if filename.nil? || filename.empty?
239
+ # Create the path to the file
240
+ FileUtils.mkdir_p(File.dirname(filename), mode: cache.mode)
241
+ # Write the data
242
+ File.open(filename, 'w') do |f|
243
+ f.flock(File::LOCK_EX)
244
+ f.write(parsed ? JSON.generate(data) : data)
245
+ end
246
+ rescue IOError, JSON::JSONError, SystemCallError => e
247
+ raise WriteError, "#{url} cache write failed [#{filename}]: #{e}"
248
+ end
249
+
250
+ private
251
+
252
+ # Deletes children of the cache entry
253
+ # @param filename [String] the linked data API filename
254
+ # @return [nil]
255
+ # @raise [Aspire::Exceptions::RemoveError] if the operation fails
256
+ def delete_children(filename)
257
+ # Child objects of the cache entry are stored in a directory with the
258
+ # same name as the linked data cache file without the '.json' extension
259
+ children = "#{strip_ext(filename)}/*"
260
+ return unless children.nil? || children.empty? || children == '/*'
261
+ FileUtils.rm_rf(Dir.glob(children), secure: true)
262
+ rescue SystemCallError => e
263
+ raise RemoveError, "#{url} remove failed [#{children}]: #{e}"
264
+ end
265
+
266
+ # Deletes the files for the cache entry and removes any empty directories
267
+ # on the cache file's path
268
+ # @param filename [String] the linked data filename in the cache
269
+ # @param remove_children [Boolean]
270
+ # @return [nil]
271
+ def delete_entry(filename, remove_children = false)
272
+ # Delete the files for the cache entry
273
+ delete_file(filename)
274
+ delete_file(json_file(filename))
275
+ delete_file(status_file(filename))
276
+ delete_children(filename) if remove_children
277
+ # Delete any empty directories on the entry's file path
278
+ rmdir_empty(filename, cache.path)
279
+ end
280
+
281
+ # Deletes the specified file
282
+ # @param filename [String] the filename to delete
283
+ # @return [void]
284
+ # @raise [Aspire::Exceptions::RemoveError] if the delete fails
285
+ # for any reason other than the file not existing
286
+ def delete_file(filename)
287
+ File.delete(filename) unless filename.nil? || filename.empty?
288
+ rescue Errno::ENOENT
289
+ # Ignore file-does-not-exist errors
290
+ nil
291
+ rescue SystemCallError => e
292
+ raise RemoveError, "#{url} remove failed [#{filename}]: #{e}"
293
+ end
294
+ end
295
+ end
296
+ end