aspire 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +59 -0
- data/.rbenv-gemsets +1 -0
- data/.travis.yml +5 -0
- data/CODE_OF_CONDUCT.md +74 -0
- data/Dockerfile +20 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +851 -0
- data/Rakefile +10 -0
- data/aspire.gemspec +40 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/entrypoint.sh +11 -0
- data/exe/build-cache +13 -0
- data/lib/aspire.rb +11 -0
- data/lib/aspire/api.rb +2 -0
- data/lib/aspire/api/base.rb +198 -0
- data/lib/aspire/api/json.rb +195 -0
- data/lib/aspire/api/linked_data.rb +214 -0
- data/lib/aspire/caching.rb +4 -0
- data/lib/aspire/caching/builder.rb +356 -0
- data/lib/aspire/caching/cache.rb +365 -0
- data/lib/aspire/caching/cache_entry.rb +296 -0
- data/lib/aspire/caching/cache_logger.rb +63 -0
- data/lib/aspire/caching/util.rb +210 -0
- data/lib/aspire/cli/cache_builder.rb +123 -0
- data/lib/aspire/cli/command.rb +20 -0
- data/lib/aspire/enumerator/base.rb +29 -0
- data/lib/aspire/enumerator/json_enumerator.rb +130 -0
- data/lib/aspire/enumerator/linked_data_uri_enumerator.rb +32 -0
- data/lib/aspire/enumerator/report_enumerator.rb +64 -0
- data/lib/aspire/exceptions.rb +36 -0
- data/lib/aspire/object.rb +7 -0
- data/lib/aspire/object/base.rb +155 -0
- data/lib/aspire/object/digitisation.rb +43 -0
- data/lib/aspire/object/factory.rb +87 -0
- data/lib/aspire/object/list.rb +590 -0
- data/lib/aspire/object/module.rb +36 -0
- data/lib/aspire/object/resource.rb +371 -0
- data/lib/aspire/object/time_period.rb +47 -0
- data/lib/aspire/object/user.rb +46 -0
- data/lib/aspire/properties.rb +20 -0
- data/lib/aspire/user_lookup.rb +103 -0
- data/lib/aspire/util.rb +185 -0
- data/lib/aspire/version.rb +3 -0
- data/lib/retry.rb +197 -0
- metadata +274 -0
@@ -0,0 +1,365 @@
|
|
1
|
+
require 'aspire/caching/cache_entry'
|
2
|
+
require 'aspire/caching/util'
|
3
|
+
require 'aspire/exceptions'
|
4
|
+
|
5
|
+
module Aspire
|
6
|
+
module Caching
|
7
|
+
# Reads and writes Aspire API data to and from a file-based cache
|
8
|
+
class Cache
|
9
|
+
include Aspire::Exceptions
|
10
|
+
include Util
|
11
|
+
|
12
|
+
# The default cache directory permissions
|
13
|
+
MODE = 0o0750
|
14
|
+
|
15
|
+
# The default cache root directory
|
16
|
+
PATH = '/tmp/aspire/cache'.freeze
|
17
|
+
|
18
|
+
# @!attribute [rw] json_api
|
19
|
+
# @return [Aspire::API::JSON] the JSON API instance
|
20
|
+
attr_accessor :json_api
|
21
|
+
|
22
|
+
# @!attribute [rw] ld_api
|
23
|
+
# @return [Aspire::API::LinkedData] the linked data API instance
|
24
|
+
attr_accessor :ld_api
|
25
|
+
|
26
|
+
# @!attribute [rw] logger
|
27
|
+
# @return [Aspire::Caching::CacheLogger] the cache activity logger
|
28
|
+
attr_accessor :logger
|
29
|
+
|
30
|
+
# @!attribute [rw] mode
|
31
|
+
# @return [String, Integer] the cache directory permissions
|
32
|
+
attr_accessor :mode
|
33
|
+
|
34
|
+
# @!attribute [rw] path
|
35
|
+
# @return [String] the cache root directory
|
36
|
+
attr_accessor :path
|
37
|
+
|
38
|
+
# Initialises a new Cache instance
|
39
|
+
# @param json_api [Aspire::API::JSON] the JSON API instance
|
40
|
+
# @param ld_api [Aspire::API::LinkedData] the linked data API instance
|
41
|
+
# @param path [String] the cache root directory
|
42
|
+
# @param options [Hash] the cache options
|
43
|
+
# @option options [Integer] :api_retries the maximum number of retries
|
44
|
+
# after an API call timeout
|
45
|
+
# @option options [Boolean] :clear if true, clear the cache, otherwise
|
46
|
+
# leave any existing cache content intact
|
47
|
+
# @option options [Logger] :logger the cache activity logger
|
48
|
+
# @option options [String, Integer] :mode the cache directory permissions
|
49
|
+
# @return [void]
|
50
|
+
def initialize(ld_api = nil, json_api = nil, path = nil, **options)
|
51
|
+
options ||= {}
|
52
|
+
self.json_api = json_api
|
53
|
+
self.ld_api = ld_api
|
54
|
+
self.logger = Aspire::Caching::CacheLogger.new(options[:logger])
|
55
|
+
self.mode = options[:mode] || MODE
|
56
|
+
self.path = path || PATH
|
57
|
+
# Clear the cache contents if required
|
58
|
+
clear if options[:clear]
|
59
|
+
end
|
60
|
+
|
61
|
+
# Returns a CacheEntry instance for the URL
|
62
|
+
# @param url [String] the URL of the API object
|
63
|
+
# @return [Aspire::Caching::CacheEntry] the cache entry
|
64
|
+
def cache_entry(url)
|
65
|
+
CacheEntry.new(ld_api.canonical_url(url), self)
|
66
|
+
end
|
67
|
+
|
68
|
+
# Returns the canonical form of the URL
|
69
|
+
# @param url [String] the URL of the API object
|
70
|
+
# @return [String] the canonical URL of the object
|
71
|
+
def canonical_url(url)
|
72
|
+
ld_api.canonical_url(url)
|
73
|
+
end
|
74
|
+
|
75
|
+
# Clears the cache contents
|
76
|
+
# @return [void]
|
77
|
+
# @raise [Aspire::Exceptions::RemoveError] if the operation fails
|
78
|
+
def clear
|
79
|
+
return unless path?
|
80
|
+
rm(File.join(path, '*'), logger, 'Cache cleared', 'Cache clear failed')
|
81
|
+
end
|
82
|
+
|
83
|
+
# Deletes the cache
|
84
|
+
# @return [void]
|
85
|
+
# @raise [Aspire::Exceptions::RemoveError] if the operation fails
|
86
|
+
def delete
|
87
|
+
return unless path?
|
88
|
+
rm(path, logger, 'Cache deleted', 'Cache delete failed')
|
89
|
+
end
|
90
|
+
|
91
|
+
# Returns true if the cache is empty, false if not
|
92
|
+
# @return [Boolean] true if the cache is empty, false if not
|
93
|
+
def empty?
|
94
|
+
Dir.empty?(path)
|
95
|
+
end
|
96
|
+
|
97
|
+
# Returns true if the specified URL is in the cache, false if not
|
98
|
+
# @param url [String] the URL
|
99
|
+
# @param entry [Aspire::Caching::CacheEntry] the cache entry
|
100
|
+
# @return [Boolean] true if the URL is in the cache, false if not
|
101
|
+
def include?(url = nil, entry: nil)
|
102
|
+
entry ||= cache_entry(url)
|
103
|
+
entry.cached?
|
104
|
+
end
|
105
|
+
|
106
|
+
# Returns the linked data form of the URL
|
107
|
+
# @param url [String] the URL of the API object
|
108
|
+
# @return [String] the linked data URL of the object
|
109
|
+
def linked_data_url(url)
|
110
|
+
ld_api.linked_data_url(url)
|
111
|
+
end
|
112
|
+
|
113
|
+
# Iterates over a single cache object type and passes the partial object
|
114
|
+
# URLs to the block
|
115
|
+
# @param type [String] the cache object type ('lists', 'resources' etc.)
|
116
|
+
# or '**' for all object types
|
117
|
+
# @yield [url] passes the partial object URL to the block
|
118
|
+
# @yieldparam url [String] the partial object URL of the list
|
119
|
+
# @return [void]
|
120
|
+
def marked_entry(type)
|
121
|
+
Dir.glob(File.join(path, type, '.[^.]*')) do |filename|
|
122
|
+
# Convert the filename to a URL and pass to the block
|
123
|
+
begin
|
124
|
+
entry = CacheEntry.new(filename_to_url(filename), self)
|
125
|
+
yield(entry) if block_given?
|
126
|
+
rescue NotCacheable
|
127
|
+
nil
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
# Iterates over marked (in-progress) cache entries and passes the partial
|
133
|
+
# URL path to the block
|
134
|
+
# Positional parameters are the object types to include, e.g. 'lists',
|
135
|
+
# 'resources' etc. - default: all object types
|
136
|
+
# @yield [url] passes the list URL to the block
|
137
|
+
# @yieldparam url [String] the partial linked data URL of the list
|
138
|
+
# @return [void]
|
139
|
+
def marked_entries(*types, &block)
|
140
|
+
if types.nil? || types.empty?
|
141
|
+
marked_entry('**', &block)
|
142
|
+
else
|
143
|
+
types.each { |type| marked_entry(type, &block) }
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
# Sets and creates the root directory of the cache
|
148
|
+
# @param dir [String] the root directory path of the cache
|
149
|
+
# @return [void]
|
150
|
+
# @raise [ArgumentError] if no path is specified
|
151
|
+
# @raise [CacheError] if the directory cannot be created
|
152
|
+
def path=(dir = nil)
|
153
|
+
raise ArgumentError, 'directory expected' if dir.nil? || dir.empty?
|
154
|
+
mkdir(dir, logger, "Cache path set to #{dir}", 'Set cache path failed')
|
155
|
+
@path = dir
|
156
|
+
end
|
157
|
+
|
158
|
+
# Returns true if the cache path is a valid directory
|
159
|
+
# @return [Boolean] true if the cache path is a valid directory
|
160
|
+
def path?
|
161
|
+
!path.nil? && File.directory?(path)
|
162
|
+
end
|
163
|
+
|
164
|
+
# Reads an API data object from the cache or API
|
165
|
+
# @param url [String] the URL of the API object
|
166
|
+
# @param entry [Aspire::Caching::CacheEntry] the cache entry
|
167
|
+
# @param json [Boolean] if true, read the JSON API, otherwise read the
|
168
|
+
# linked data API
|
169
|
+
# @param use_cache [Boolean] if true, try the cache before the Aspire API
|
170
|
+
# @yield [data, flags] passes the data and flags to the block
|
171
|
+
# @yieldparam data [Hash] the parsed data from the cache or API call
|
172
|
+
# @yieldparam flags [Hash] the cache processing flags
|
173
|
+
# @yieldparam from_cache [Boolean] true if the data was read from the
|
174
|
+
# cache, false if it was read from the API
|
175
|
+
# @yieldparam json [Boolean] true if the data is from the JSON API, false
|
176
|
+
# if it is from the linked data API
|
177
|
+
# @return [Hash] the parsed JSON data from the cache or API
|
178
|
+
# @raise [Aspire::Exceptions::APIError] if the API call fails
|
179
|
+
# @raise [Aspire::Exceptions::ReadError] if the cache read fails
|
180
|
+
# @raise [Aspire::Exceptions::WriteError] if the cache write fails
|
181
|
+
def read(url = nil,
|
182
|
+
entry: nil, json: false, use_api: true, use_cache: true)
|
183
|
+
entry ||= cache_entry(url)
|
184
|
+
# Try the cache, data is nil on a cache miss
|
185
|
+
data = use_cache ? read_cache(entry, json: json) : nil
|
186
|
+
from_cache = !data.nil?
|
187
|
+
# Try the API if nothing was returned from the cache
|
188
|
+
data ||= write(entry: entry, json: json) if use_api
|
189
|
+
# Call the block if the read was successful
|
190
|
+
yield(data, entry, from_cache, json) if block_given? && data
|
191
|
+
# Return the data
|
192
|
+
data
|
193
|
+
rescue NotCacheable
|
194
|
+
# Uncacheable URLs have no data representation in the Aspire API
|
195
|
+
nil
|
196
|
+
end
|
197
|
+
|
198
|
+
# Removes the URL from the cache
|
199
|
+
# @param url [String] the URL of the API object
|
200
|
+
# @param entry [Aspire::Caching::CacheEntry] the cache entry
|
201
|
+
# @param force [Boolean] if remove, remove the URL even if it is marked
|
202
|
+
# as in-progress; otherwise fails on marked entries
|
203
|
+
# @param remove_children [Boolean] if true, remove all children of the
|
204
|
+
# object as well as the object itself, otherwise remove just the object
|
205
|
+
# @yield [data, entry] passes the data and cache entry to the block
|
206
|
+
# @yieldparam data [Hash] the parsed JSON data from the cache or API call
|
207
|
+
# @yieldparam entry [Aspire::Caching::CacheEntry] the cache entry
|
208
|
+
# @return [Hash, nil] the parsed JSON data removed from the cache
|
209
|
+
# @raise [Aspire::Exceptions::MarkedError] if the cache entry is
|
210
|
+
# marked as in-progress and force is false
|
211
|
+
# @raise [Aspire::Exceptions::RemoveError] if the operation fails
|
212
|
+
def remove(url = nil, entry: nil, force: false, remove_children: false)
|
213
|
+
entry ||= cache_entry(url)
|
214
|
+
return nil unless entry.cached?
|
215
|
+
# Read the data from the cache for the return value
|
216
|
+
data = read_cache(entry)
|
217
|
+
# Call the block
|
218
|
+
yield(data, entry) if block_given?
|
219
|
+
# Remove the cached files
|
220
|
+
entry.delete(force: force, remove_children: remove_children)
|
221
|
+
# Return the cached data
|
222
|
+
data
|
223
|
+
rescue NotCacheable
|
224
|
+
nil
|
225
|
+
end
|
226
|
+
|
227
|
+
# Returns the Aspire tenancy host name
|
228
|
+
# @return [String] the Aspire tenancy host name
|
229
|
+
def tenancy_host
|
230
|
+
ld_api ? ld_api.tenancy_host : nil
|
231
|
+
end
|
232
|
+
|
233
|
+
# Writes an API object to the cache
|
234
|
+
# @param url [String] the URL of the API object
|
235
|
+
# @param data [Hash, String, nil] parsed or unparsed data to be cached
|
236
|
+
# @param entry [Aspire::Caching::CacheEntry] the cache entry
|
237
|
+
# @param json [Boolean] if true, read the JSON API, otherwise read the
|
238
|
+
# linked data API
|
239
|
+
# @yield [data, entry] passes the data and cache entry to the block
|
240
|
+
# @yieldparam data [Hash] the parsed JSON data from the cache or API call
|
241
|
+
# @yieldparam entry [Aspire::Caching::CacheEntry] the cache entry
|
242
|
+
# @return [Hash] the parsed JSON data written to the cache
|
243
|
+
# @raise [Aspire::Exceptions::WriteError] if the operation fails
|
244
|
+
def write(url = nil, data: nil, entry: nil, json: false)
|
245
|
+
# Get the cache processing flags
|
246
|
+
entry ||= cache_entry(url)
|
247
|
+
# Get the data from the API if not supplied
|
248
|
+
raw, parsed = write_data(data) || read_api(entry, json: json)
|
249
|
+
return nil unless raw && parsed
|
250
|
+
# Write the data to the cache
|
251
|
+
write_cache(entry, raw, json: json)
|
252
|
+
# Call the block
|
253
|
+
yield(parsed, entry) if block_given?
|
254
|
+
# Return the data written to the cache
|
255
|
+
parsed
|
256
|
+
end
|
257
|
+
|
258
|
+
private
|
259
|
+
|
260
|
+
# Converts a status filename to a linked data URL
|
261
|
+
# @param filename [String] the filename of a linked data object status
|
262
|
+
# file in the cache
|
263
|
+
def filename_to_url(filename)
|
264
|
+
# Remove the cache path
|
265
|
+
f = strip_prefix(filename, path)
|
266
|
+
# Remove the leading . from the base filename
|
267
|
+
f = strip_filename_prefix(f, '.')
|
268
|
+
# Remove the leading / from the path
|
269
|
+
f.slice!(0) if f.start_with?('/')
|
270
|
+
# Return the full Aspire linked data URL
|
271
|
+
ld_api.api_url(f)
|
272
|
+
end
|
273
|
+
|
274
|
+
# Reads data from the Aspire JSON or linked data APIs
|
275
|
+
# @param entry [Aspire::Caching::CacheEntry] the cache entry
|
276
|
+
# @param json [Boolean] if true, read the JSON API, otherwise read the
|
277
|
+
# linked data API
|
278
|
+
# @return [Array] the unparsed JSON string and parsed hash from the API
|
279
|
+
def read_api(entry, json: false)
|
280
|
+
data = json ? read_json_api(entry) : read_linked_data_api(entry)
|
281
|
+
logger.log(Logger::DEBUG, read_api_msg('read', entry, json))
|
282
|
+
data
|
283
|
+
rescue APITimeout, APIError => e
|
284
|
+
msg = read_api_msg('read failed', entry, json, e)
|
285
|
+
logger.log_exception(msg, ReadError)
|
286
|
+
end
|
287
|
+
|
288
|
+
# Returns a log/exception message for #read_api
|
289
|
+
# @param msg [String] the event message
|
290
|
+
# @param entry [Aspire::Caching::CacheEntry] the cache entry
|
291
|
+
# @param json [Boolean] if true, return a JSON API message, otherwise
|
292
|
+
# return a linked data API message
|
293
|
+
# @param exception [Exception] the exception
|
294
|
+
# @return [String] the formatted log message
|
295
|
+
def read_api_msg(msg, entry, json, exception = nil)
|
296
|
+
[
|
297
|
+
"#{entry.url} #{msg} from #{json ? 'JSON' : 'LD'} API",
|
298
|
+
json ? " [#{entry.json_api_url}]" : '',
|
299
|
+
exception ? ": #{exception}" : ''
|
300
|
+
].join
|
301
|
+
end
|
302
|
+
|
303
|
+
# Reads an Aspire linked data URL from the cache
|
304
|
+
# @param entry [Aspire::Caching::CacheEntry] the cache entry
|
305
|
+
# @param json [Boolean] if true, read JSON API data, otherwise read
|
306
|
+
# linked data API data
|
307
|
+
# @return [Hash, nil] the parsed JSON data from the cache or nil if the
|
308
|
+
# URL is not cached
|
309
|
+
# @raise [Aspire::Exceptions::ReadError] if the cache read fails
|
310
|
+
def read_cache(entry, json: false)
|
311
|
+
data = entry.read(json, parsed: true)
|
312
|
+
msg = "#{entry.url}#{json ? ' [JSON]' : ''} read from cache"
|
313
|
+
logger.log(Logger::DEBUG, msg)
|
314
|
+
data
|
315
|
+
rescue CacheMiss
|
316
|
+
nil
|
317
|
+
end
|
318
|
+
|
319
|
+
# Reads data from the Aspire JSON API
|
320
|
+
# @param entry [Aspire::Caching::CacheEntry] the cache entry
|
321
|
+
# @return [Array] the unparsed JSON string and parsed hash from the API
|
322
|
+
def read_json_api(entry)
|
323
|
+
opts = entry.json_api_opt || {}
|
324
|
+
url = entry.json_api_url
|
325
|
+
json_api.call(url, **opts) do |response, data|
|
326
|
+
return response.body, data
|
327
|
+
end
|
328
|
+
end
|
329
|
+
|
330
|
+
# Reads data from the Aspire linked data API
|
331
|
+
# @param entry [Aspire::Caching::CacheEntry] the cache entry
|
332
|
+
# @return [Array] the unparsed JSON string and parsed hash from the API
|
333
|
+
def read_linked_data_api(entry)
|
334
|
+
ld_api.call(entry.url) { |response, data| return response.body, data }
|
335
|
+
end
|
336
|
+
|
337
|
+
# Writes data to the cache
|
338
|
+
# @param entry [Aspire::Caching::CacheEntry] the cache entry
|
339
|
+
# @param data [String] the data to be written to the cache
|
340
|
+
# @return [void]
|
341
|
+
# @raise [Aspire::Exceptions::WriteError] if the operation fails
|
342
|
+
def write_cache(entry, data = nil, json: false)
|
343
|
+
entry.write(data, json)
|
344
|
+
file_path = entry.path(json)
|
345
|
+
logger.log(Logger::INFO, "#{entry.url} written to cache [#{file_path}]")
|
346
|
+
rescue WriteError => e
|
347
|
+
logger.log(Logger::ERROR, e.to_s)
|
348
|
+
end
|
349
|
+
|
350
|
+
# Converts user-supplied data to a string for caching
|
351
|
+
# @param data [Hash, String] the data to be written to the cache
|
352
|
+
# @return [Array, nil] the unparsed JSON string and parsed hash
|
353
|
+
def write_data(data = nil)
|
354
|
+
# Return nil if no data is supplied
|
355
|
+
return nil if data.nil?
|
356
|
+
# Return a JSON string and the data if a Hash is supplied
|
357
|
+
parsed_json = data.is_a?(Hash) || data.is_a?(Array)
|
358
|
+
return JSON.generate(data), data if parsed_json
|
359
|
+
# Otherwise return the data as a string and a parsed JSON hash
|
360
|
+
data = data.to_s
|
361
|
+
[data, JSON.parse(data)]
|
362
|
+
end
|
363
|
+
end
|
364
|
+
end
|
365
|
+
end
|
@@ -0,0 +1,296 @@
|
|
1
|
+
require 'aspire/caching/util'
|
2
|
+
require 'aspire/exceptions'
|
3
|
+
require 'aspire/util'
|
4
|
+
|
5
|
+
module Aspire
|
6
|
+
module Caching
|
7
|
+
# Represents an entry in the cache
|
8
|
+
class CacheEntry
|
9
|
+
include Aspire::Caching::Util
|
10
|
+
include Aspire::Exceptions
|
11
|
+
include Aspire::Util
|
12
|
+
|
13
|
+
# @!attribute [rw] cache
|
14
|
+
# @return [Aspire::Caching::Cache] the cache
|
15
|
+
attr_accessor :cache
|
16
|
+
|
17
|
+
# :!attribute [rw] draft
|
18
|
+
# @!attribute [rw] json_api_opt
|
19
|
+
# @return [Hash] #call parameters for the JSON API call
|
20
|
+
attr_accessor :json_api_opt
|
21
|
+
|
22
|
+
# @!attribute [rw] json_api_url
|
23
|
+
# @return [String] the JSON API #call URL
|
24
|
+
attr_accessor :json_api_url
|
25
|
+
|
26
|
+
# @!attribute [rw] uri
|
27
|
+
# @return [MatchData] the parsed URL
|
28
|
+
attr_accessor :parsed_url
|
29
|
+
|
30
|
+
# @!attribute [rw] url
|
31
|
+
# @return [String] the URL
|
32
|
+
attr_accessor :url
|
33
|
+
|
34
|
+
# Initialises a new CacheEntry instance
|
35
|
+
# @param url [String] the URL of the API object
|
36
|
+
# @param cache [Aspire::Caching::Cache] the parent cache
|
37
|
+
# @return [void]
|
38
|
+
# @raise [Aspire::Exceptions::NotCacheable] if the URL is not
|
39
|
+
# cacheable
|
40
|
+
def initialize(url, cache)
|
41
|
+
self.cache = cache
|
42
|
+
self.url = url
|
43
|
+
end
|
44
|
+
|
45
|
+
# Returns true if cache entries refer to the same object
|
46
|
+
# @param other [Aspire::Caching::CacheEntry, String] a cache entry or URL
|
47
|
+
# @return [Boolean] true if the entries refer to the same object
|
48
|
+
def ==(other)
|
49
|
+
url == url_for_comparison(other, cache.ld_api)
|
50
|
+
end
|
51
|
+
|
52
|
+
# Returns true if this cache entry is a child of the URL
|
53
|
+
# @param url [Aspire::Caching::CacheEntry, String] the URL to test
|
54
|
+
# @param strict [Boolean] if true, the URL must be a parent of this entry,
|
55
|
+
# otherwise the URL must be a parent or the same as this entry
|
56
|
+
# @return [Boolean] true if the URL is a child of the cache entry, false
|
57
|
+
# otherwise
|
58
|
+
def child_of?(url, strict: false)
|
59
|
+
child_url?(parsed_url, url, cache.ld_api, strict: strict)
|
60
|
+
end
|
61
|
+
|
62
|
+
# Returns true if the object is in the cache, false if not
|
63
|
+
# @return [Boolean] true if the object is cached, false if not
|
64
|
+
def cached?(json = false)
|
65
|
+
filename = json ? json_file : file
|
66
|
+
filename.nil? ? nil : File.exist?(filename)
|
67
|
+
end
|
68
|
+
|
69
|
+
# Deletes the object from the cache
|
70
|
+
# @param force [Boolean] delete even if the entry is marked in-progress
|
71
|
+
# @param remove_children [Boolean] if true, remove children of the object
|
72
|
+
# as well as the object, otherwise remove just the object
|
73
|
+
# @return [void]
|
74
|
+
# @raise [Aspire::Exceptions::MarkedError] if the entry is
|
75
|
+
# marked in-progress and force = false
|
76
|
+
def delete(force: false, remove_children: false)
|
77
|
+
mark(force: force) { |_f| delete_entry(file, remove_children) }
|
78
|
+
end
|
79
|
+
|
80
|
+
# Returns the linked data filename in the cache
|
81
|
+
# @return [String] the linked data filename in the cache
|
82
|
+
def file
|
83
|
+
File.join(cache.path, url_path)
|
84
|
+
end
|
85
|
+
|
86
|
+
# Returns true if the object has associated JSON API data, false if not
|
87
|
+
# @return [Boolean] true if the object has associated JSON API data, false
|
88
|
+
# otherwise
|
89
|
+
def json?
|
90
|
+
!json_api_url.nil? && !json_api_url.empty?
|
91
|
+
end
|
92
|
+
|
93
|
+
# Returns the JSON API data filename in the cache or nil if there is no
|
94
|
+
# JSON API data for the URL
|
95
|
+
# @param filename [String] the linked data filename in the cache
|
96
|
+
# @return [String, nil] the JSON API data filename or nil if there is no
|
97
|
+
# JSON API data for the URL
|
98
|
+
def json_file(filename = nil)
|
99
|
+
json? ? add_filename_suffix(filename || file, '-json') : nil
|
100
|
+
end
|
101
|
+
|
102
|
+
# Returns true if the cache entry is a list, false otherwise
|
103
|
+
# @param strict [Boolean] if true, the cache entry must be a list,
|
104
|
+
# otherwise the cache entry must be a list or a child of a list
|
105
|
+
# @return [Boolean] true if the cache entry is a list, false otherwise
|
106
|
+
def list?(strict: true)
|
107
|
+
# The cache entry must be a list or the child of a list
|
108
|
+
return false unless parsed_url[:type] == 'lists'
|
109
|
+
# Strict checking requires that the cache entry is a list, not a child
|
110
|
+
return false if strict && !parsed_url[:child_type].nil?
|
111
|
+
true
|
112
|
+
end
|
113
|
+
|
114
|
+
# Marks the cache entry as in-progress
|
115
|
+
# @param force [Boolean] if true, do not raise MarkedError when the entry
|
116
|
+
# is already marked; otherwise, MarkedError is raised when the entry is
|
117
|
+
# already marked.
|
118
|
+
# @return [void]
|
119
|
+
# @yield [file] passes the opened status file to the block
|
120
|
+
# @yieldparam file [File] the opened status file
|
121
|
+
# @raise [Aspire::Exceptions::MarkError] if the operation failed
|
122
|
+
# @raise [Aspire::Exceptions::MarkedError] if the cache entry is
|
123
|
+
# already marked
|
124
|
+
def mark(force: false, &block)
|
125
|
+
filename = status_file
|
126
|
+
flags = File::CREAT
|
127
|
+
flags |= File::EXCL unless force
|
128
|
+
File.open(filename, flags, &block)
|
129
|
+
rescue Errno::EEXIST
|
130
|
+
raise MarkedError, "#{url} already marked [#{filename}]"
|
131
|
+
rescue SystemCallError => e
|
132
|
+
raise MarkError, "#{url} mark failed [#{filename}]: #{e}"
|
133
|
+
end
|
134
|
+
|
135
|
+
# Returns true if the cache entry is locked
|
136
|
+
# @return [Boolean] true if the cache entry is marked as in-progress,
|
137
|
+
# false otherwise
|
138
|
+
def marked?
|
139
|
+
File.exist?(status_file)
|
140
|
+
end
|
141
|
+
|
142
|
+
# Returns true if this cache entry is the parent of the URL
|
143
|
+
# @param url [Aspire::Caching::CacheEntry, String] the URL to test
|
144
|
+
# @param strict [Boolean] if true, the URL must be a parent of this entry,
|
145
|
+
# otherwise the URL must be a parent or the same as this entry
|
146
|
+
# @return [Boolean] true if this cache entry is the parent of the URL,
|
147
|
+
# false otherwise
|
148
|
+
def parent_of?(url, strict: false)
|
149
|
+
parent_url?(parsed_url, url, cache.ld_api, strict: strict)
|
150
|
+
end
|
151
|
+
|
152
|
+
# Returns the filename of the cache entry
|
153
|
+
# @param json [Boolean] if true, returns the JSON API filename, otherwise
|
154
|
+
# returns the linked data API filename
|
155
|
+
def path(json = false)
|
156
|
+
json ? json_file : file
|
157
|
+
end
|
158
|
+
|
159
|
+
# Returns data from the cache
|
160
|
+
# @param json [Boolean] if true, read the JSON API file, otherwise read
|
161
|
+
# the linked data API file
|
162
|
+
# @param parsed [Boolean] if true, return JSON-parsed data, otherwise
|
163
|
+
# return a JSON string
|
164
|
+
# @return [Array, Hash, String, nil] the parsed JSON data or JSON string,
|
165
|
+
# or nil if JSON API data is requested but not available for this entry
|
166
|
+
# @raise [Aspire::Exceptions::CacheMiss] when the data is not in the cache
|
167
|
+
# @raise [Aspire::Exceptions::ReadError] when the read operation fails
|
168
|
+
def read(json = false, parsed: false)
|
169
|
+
filename = json ? json_file : file
|
170
|
+
return nil if filename.nil? || filename.empty?
|
171
|
+
File.open(filename, 'r') do |f|
|
172
|
+
data = f.read
|
173
|
+
return parsed ? JSON.parse(data) : data
|
174
|
+
end
|
175
|
+
rescue Errno::ENOENT
|
176
|
+
raise CacheMiss, "#{url} cache miss [#{filename}"
|
177
|
+
rescue IOError, SystemCallError => e
|
178
|
+
raise ReadError, "#{url} cache read failed [#{filename}]: #{e}"
|
179
|
+
end
|
180
|
+
|
181
|
+
# Returns true if the object's references are cacheable
|
182
|
+
# @return [Boolean] true if the object's references are cacheable, false
|
183
|
+
# otherwise
|
184
|
+
def references?
|
185
|
+
# Events are not JSON-LD so we can't cache references
|
186
|
+
parsed_url[:type] != 'events' && parsed_url[:child_type] != 'events'
|
187
|
+
end
|
188
|
+
|
189
|
+
# Returns the status filename in the cache
|
190
|
+
# @param filename [String] the linked data filename in the cache
|
191
|
+
def status_file(filename = nil)
|
192
|
+
# Prepend '.' to the filename
|
193
|
+
add_filename_prefix(filename || file, '.')
|
194
|
+
end
|
195
|
+
|
196
|
+
# Returns a string representation of the cache entry
|
197
|
+
# @return [String] the string representation (URL) of the cache entry
|
198
|
+
def to_s
|
199
|
+
url
|
200
|
+
end
|
201
|
+
|
202
|
+
# Removes an in-progress mark from the cache entry
|
203
|
+
def unmark
|
204
|
+
filename = status_file
|
205
|
+
File.delete(filename) if File.exist?(filename)
|
206
|
+
rescue SystemCallError => e
|
207
|
+
raise UnmarkError, "#{url} unmark failed [#{filename}]: #{e}"
|
208
|
+
end
|
209
|
+
|
210
|
+
# Sets the URL and associated flags
|
211
|
+
# @param u [String] the URL of the API object
|
212
|
+
# @return [void]
|
213
|
+
# @raise [Aspire::Exceptions::NotCacheable] if the URL is not
|
214
|
+
# cacheable
|
215
|
+
def url=(u)
|
216
|
+
# Convert the URL to canonical form for comparison
|
217
|
+
u = cache.canonical_url(u)
|
218
|
+
# Parse and check the URL
|
219
|
+
# - this will raise NotCacheable if it is not a valid cacheable URL
|
220
|
+
self.parsed_url = cacheable_url(u)
|
221
|
+
# Set the URL properties
|
222
|
+
@url = u
|
223
|
+
return unless list_url?(parsed: parsed_url)
|
224
|
+
self.json_api_opt = { bookjacket: 1, editions: 1, draft: 1, history: 1 }
|
225
|
+
self.json_api_url = "lists/#{strip_ext(parsed_url[:id])}"
|
226
|
+
end
|
227
|
+
|
228
|
+
# Writes data to the cache
|
229
|
+
# @param data [Object] the data to write to the cache
|
230
|
+
# @param json [Boolean] if true, write the data as JSON API data,
|
231
|
+
# otherwise write it as linked data
|
232
|
+
# @param parsed [Boolean] if true, treat data as a parsed JSON data
|
233
|
+
# structure, otherwise treat it as a JSON string
|
234
|
+
# @return [void]
|
235
|
+
# @raise [Aspire::Exceptions::WriteError] when the write operation fails
|
236
|
+
def write(data, json = false, parsed: false)
|
237
|
+
filename = json ? json_file : file
|
238
|
+
return if filename.nil? || filename.empty?
|
239
|
+
# Create the path to the file
|
240
|
+
FileUtils.mkdir_p(File.dirname(filename), mode: cache.mode)
|
241
|
+
# Write the data
|
242
|
+
File.open(filename, 'w') do |f|
|
243
|
+
f.flock(File::LOCK_EX)
|
244
|
+
f.write(parsed ? JSON.generate(data) : data)
|
245
|
+
end
|
246
|
+
rescue IOError, JSON::JSONError, SystemCallError => e
|
247
|
+
raise WriteError, "#{url} cache write failed [#{filename}]: #{e}"
|
248
|
+
end
|
249
|
+
|
250
|
+
private
|
251
|
+
|
252
|
+
# Deletes children of the cache entry
|
253
|
+
# @param filename [String] the linked data API filename
|
254
|
+
# @return [nil]
|
255
|
+
# @raise [Aspire::Exceptions::RemoveError] if the operation fails
|
256
|
+
def delete_children(filename)
|
257
|
+
# Child objects of the cache entry are stored in a directory with the
|
258
|
+
# same name as the linked data cache file without the '.json' extension
|
259
|
+
children = "#{strip_ext(filename)}/*"
|
260
|
+
return unless children.nil? || children.empty? || children == '/*'
|
261
|
+
FileUtils.rm_rf(Dir.glob(children), secure: true)
|
262
|
+
rescue SystemCallError => e
|
263
|
+
raise RemoveError, "#{url} remove failed [#{children}]: #{e}"
|
264
|
+
end
|
265
|
+
|
266
|
+
# Deletes the files for the cache entry and removes any empty directories
|
267
|
+
# on the cache file's path
|
268
|
+
# @param filename [String] the linked data filename in the cache
|
269
|
+
# @param remove_children [Boolean]
|
270
|
+
# @return [nil]
|
271
|
+
def delete_entry(filename, remove_children = false)
|
272
|
+
# Delete the files for the cache entry
|
273
|
+
delete_file(filename)
|
274
|
+
delete_file(json_file(filename))
|
275
|
+
delete_file(status_file(filename))
|
276
|
+
delete_children(filename) if remove_children
|
277
|
+
# Delete any empty directories on the entry's file path
|
278
|
+
rmdir_empty(filename, cache.path)
|
279
|
+
end
|
280
|
+
|
281
|
+
# Deletes the specified file
|
282
|
+
# @param filename [String] the filename to delete
|
283
|
+
# @return [void]
|
284
|
+
# @raise [Aspire::Exceptions::RemoveError] if the delete fails
|
285
|
+
# for any reason other than the file not existing
|
286
|
+
def delete_file(filename)
|
287
|
+
File.delete(filename) unless filename.nil? || filename.empty?
|
288
|
+
rescue Errno::ENOENT
|
289
|
+
# Ignore file-does-not-exist errors
|
290
|
+
nil
|
291
|
+
rescue SystemCallError => e
|
292
|
+
raise RemoveError, "#{url} remove failed [#{filename}]: #{e}"
|
293
|
+
end
|
294
|
+
end
|
295
|
+
end
|
296
|
+
end
|