aspire 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +59 -0
  3. data/.rbenv-gemsets +1 -0
  4. data/.travis.yml +5 -0
  5. data/CODE_OF_CONDUCT.md +74 -0
  6. data/Dockerfile +20 -0
  7. data/Gemfile +4 -0
  8. data/LICENSE.txt +21 -0
  9. data/README.md +851 -0
  10. data/Rakefile +10 -0
  11. data/aspire.gemspec +40 -0
  12. data/bin/console +14 -0
  13. data/bin/setup +8 -0
  14. data/entrypoint.sh +11 -0
  15. data/exe/build-cache +13 -0
  16. data/lib/aspire.rb +11 -0
  17. data/lib/aspire/api.rb +2 -0
  18. data/lib/aspire/api/base.rb +198 -0
  19. data/lib/aspire/api/json.rb +195 -0
  20. data/lib/aspire/api/linked_data.rb +214 -0
  21. data/lib/aspire/caching.rb +4 -0
  22. data/lib/aspire/caching/builder.rb +356 -0
  23. data/lib/aspire/caching/cache.rb +365 -0
  24. data/lib/aspire/caching/cache_entry.rb +296 -0
  25. data/lib/aspire/caching/cache_logger.rb +63 -0
  26. data/lib/aspire/caching/util.rb +210 -0
  27. data/lib/aspire/cli/cache_builder.rb +123 -0
  28. data/lib/aspire/cli/command.rb +20 -0
  29. data/lib/aspire/enumerator/base.rb +29 -0
  30. data/lib/aspire/enumerator/json_enumerator.rb +130 -0
  31. data/lib/aspire/enumerator/linked_data_uri_enumerator.rb +32 -0
  32. data/lib/aspire/enumerator/report_enumerator.rb +64 -0
  33. data/lib/aspire/exceptions.rb +36 -0
  34. data/lib/aspire/object.rb +7 -0
  35. data/lib/aspire/object/base.rb +155 -0
  36. data/lib/aspire/object/digitisation.rb +43 -0
  37. data/lib/aspire/object/factory.rb +87 -0
  38. data/lib/aspire/object/list.rb +590 -0
  39. data/lib/aspire/object/module.rb +36 -0
  40. data/lib/aspire/object/resource.rb +371 -0
  41. data/lib/aspire/object/time_period.rb +47 -0
  42. data/lib/aspire/object/user.rb +46 -0
  43. data/lib/aspire/properties.rb +20 -0
  44. data/lib/aspire/user_lookup.rb +103 -0
  45. data/lib/aspire/util.rb +185 -0
  46. data/lib/aspire/version.rb +3 -0
  47. data/lib/retry.rb +197 -0
  48. metadata +274 -0
@@ -0,0 +1,214 @@
1
+ require 'uri'
2
+
3
+ require_relative 'base'
4
+
5
+ module Aspire
6
+ module API
7
+ # A wrapper class for the Aspire linked data API
8
+ class LinkedData < Base
9
+ # The tenancy domain
10
+ TENANCY_DOMAIN = 'myreadinglists.org'.freeze
11
+
12
+ # @!attribute [rw] linked_data_root
13
+ # @return [URI] the root URI of linked data URIs
14
+ attr_accessor :linked_data_root
15
+
16
+ # @!attribute [rw] tenancy_host_aliases
17
+ # @return [Array<String>] the list of non-canonical tenancy host names
18
+ attr_accessor :tenancy_host_aliases
19
+
20
+ # @!attribute [rw] tenancy_root
21
+ # @return [URI] the canonical root URI of the tenancy
22
+ attr_accessor :tenancy_root
23
+
24
+ # Initialises a new LinkedData instance
25
+ # @param tenancy_code [String] the Aspire tenancy code
26
+ # @param opts [Hash] the options hash
27
+ # @option opts [String] :linked_data_root the root URI of linked data URIs
28
+ # usually 'http://<tenancy-code>.myreadinglists.org'
29
+ # @option opts [Array<String>] :tenancy_host_aliases the list of host
30
+ # name aliases for the tenancy
31
+ # @option opts [String] :tenancy_root the canonical root URI of the
32
+ # tenancy, usually 'http://<tenancy-code>.rl.talis.com'
33
+ # @return [void]
34
+ def initialize(tenancy_code, **opts)
35
+ super(tenancy_code, **opts)
36
+ self.linked_data_root = opts[:linked_data_root]
37
+ self.tenancy_host_aliases = opts[:tenancy_host_aliases]
38
+ self.tenancy_root = opts[:tenancy_root]
39
+ end
40
+
41
+ # Returns a full Aspire tenancy URL from a partial resource path
42
+ # @param path [String] the partial resource path
43
+ # @return [String] the full tenancy URL
44
+ def api_url(path)
45
+ path.include?('//') ? path : "#{tenancy_root}/#{path}"
46
+ end
47
+
48
+ # Returns parsed JSON data for a URI using the Aspire linked data API
49
+ # @param url [String] the partial (minus the tenancy root) or complete
50
+ # tenancy URL of the resource
51
+ # @return [Hash] the parsed JSON content from the API response
52
+ # @yield [response, data] Passes the REST client response and parsed JSON
53
+ # hash to the block
54
+ # @yieldparam response [RestClient::Response] the REST client response
55
+ # @yieldparam data [Hash] the parsed JSON data from the response
56
+ def call(url)
57
+ url = api_url(url)
58
+ url = "#{url}.json" unless url.end_with?('.json')
59
+ rest_options = call_rest_options(url)
60
+ response, data = call_api(**rest_options)
61
+ yield(response, data) if block_given?
62
+ data
63
+ end
64
+
65
+ # Returns the canonical host name for an Aspire tenancy
66
+ # @return [String] the canonical host name for the tenancy
67
+ def canonical_host
68
+ "#{tenancy_code}.#{TENANCY_DOMAIN}"
69
+ end
70
+
71
+ # Converts an Aspire tenancy alias or URL to canonical form
72
+ # @param url [String] an Aspire host name or URL
73
+ # @return [String, nil] the equivalent canonical host name or URL using
74
+ # the tenancy base URL, or nil if the host is not a valid tenancy alias
75
+ def canonical_url(url)
76
+ # Set the canonical host name and add the default format extension if
77
+ # required
78
+ rewrite_url(url, tenancy_host)
79
+ end
80
+
81
+ # Returns the linked data URI host name
82
+ # @return [String] the linked data URI host name
83
+ def linked_data_host
84
+ linked_data_root.host
85
+ end
86
+
87
+ # Sets the linked data root URL
88
+ # @param url [String] the linked data root URL
89
+ # @return [URI] the linked data root URI instance
90
+ # @raise [URI::InvalidComponentError] if the URL is invalid
91
+ # @raise [URI::InvalidURIError] if the URL is invalid
92
+ def linked_data_root=(url)
93
+ @linked_data_root = parse_url(url)
94
+ end
95
+
96
+ # Converts an Aspire URL to the form used in linked data APIs
97
+ # @param url [String] an Aspire URL
98
+ # @return [String, nil] the equivalent linked data URL
99
+ def linked_data_url(url)
100
+ # Set the linked data URI host name and remove any format extension
101
+ rewrite_url(url, linked_data_host, '')
102
+ end
103
+
104
+ # Returns the canonical tenancy host name
105
+ # @return [String] the canonical tenancy host name
106
+ def tenancy_host
107
+ tenancy_root.host
108
+ end
109
+
110
+ # Sets the list of tenancy aliases
111
+ # @param aliases [Array<String>] the list of tenancy aliases
112
+ # @return [void]
113
+ def tenancy_host_aliases=(aliases)
114
+ if aliases.nil?
115
+ @tenancy_host_aliases = [canonical_host]
116
+ elsif aliases.empty?
117
+ @tenancy_host_aliases = []
118
+ else
119
+ # Extract the host name of each alias
120
+ aliases = [aliases] unless aliases.is_a?(Array)
121
+ aliases = aliases.map { |a| uri_host(a) }
122
+ @tenancy_host_aliases = aliases.reject { |a| a.nil? || a.empty? }
123
+ end
124
+ end
125
+
126
+ # Sets the tenancy root URL
127
+ # @param url [String] the tenancy root URL
128
+ # @return [URI] the tenancy root URI instance
129
+ # @raise [URI::InvalidComponentError] if the URL is invalid
130
+ # @raise [URI::InvalidURIError] if the URL is invalid
131
+ def tenancy_root=(url)
132
+ @tenancy_root = parse_url(url)
133
+ end
134
+
135
+ # Returns true if host is a valid tenancy hostname
136
+ # @param host [String, URI] the hostname
137
+ # @return [Boolean] true if the hostname is valid, false otherwise
138
+ def valid_host?(host)
139
+ return false if host.nil?
140
+ host = host.host if host.is_a?(URI)
141
+ host == tenancy_host || tenancy_host_aliases.include?(host)
142
+ end
143
+
144
+ # Returns true if URL is a valid tenancy URL or host
145
+ # @param url [String] the URL or host
146
+ # @return [Boolean] true if the URL or host is valid, false otherwise
147
+ def valid_url?(url)
148
+ url.nil? ? false : valid_host?(uri(url))
149
+ rescue URI::InvalidComponentError, URI::InvalidURIError
150
+ false
151
+ end
152
+
153
+ private
154
+
155
+ # Returns a URI instance for a URL
156
+ # @param url [String] the URL
157
+ # @return [URI, nil] the URI instance, or nil if the URL is invalid
158
+ # @raise [URI::InvalidComponentError] if the URL is invalid
159
+ # @raise [URI::InvalidURIError] if the URL is invalid
160
+ def parse_url(url)
161
+ # Use the default tenancy host name if no URI is specified
162
+ url = canonical_host if url.nil? || url.empty?
163
+ # If the URI contains no path components, uri.host is nil and uri.path
164
+ # contains the whole string, so use this as the host name
165
+ uri = URI.parse(url)
166
+ if uri.host.nil? || uri.host.empty?
167
+ uri.host = uri.path
168
+ uri.path = ''
169
+ end
170
+ # Set the URI scheme if required
171
+ uri.scheme ||= SCHEME
172
+ # Return the URI
173
+ uri
174
+ end
175
+
176
+ # Replaces the host name of a URL
177
+ # @param url [String] the URL
178
+ # @param host [String] the new host name
179
+ # @param format [String] the format suffix - defaults to '.json' if not
180
+ # specified, specify an empty string to remove any format
181
+ # @return [String] the new URL
182
+ def rewrite_url(url, host, format = nil)
183
+ # Ensure the host name is valid
184
+ url = uri(url)
185
+ return nil unless valid_host?(url)
186
+ # Replace the host name with the canonical host name
187
+ url.host = host
188
+ # Remove any existing format extension
189
+ url.path = rewrite_url_format(url.path, format)
190
+ # Return the URL string
191
+ url.to_s
192
+ rescue URI::InvalidComponentError, URI::InvalidURIError
193
+ return nil
194
+ end
195
+
196
+ # Replaces the format extension to the URL
197
+ # @param url [String] the URL
198
+ # @param format [String] the new format - defaults '.json' if not given.
199
+ # Specify an empty string to remove the existing format
200
+ # @return [String] the new URL
201
+ def rewrite_url_format(url, format = nil)
202
+ # Set the default format
203
+ format ||= '.json'
204
+ # Remove the existing format
205
+ ext = File.extname(url)
206
+ url = url.rpartition(ext)[0] unless ext.nil? || ext.empty?
207
+ # Add the new format if not already present
208
+ url = "#{url}#{format}" unless url.empty? || url.end_with?(format)
209
+ # Return the URL
210
+ url
211
+ end
212
+ end
213
+ end
214
+ end
@@ -0,0 +1,4 @@
1
+ require 'aspire/caching/builder'
2
+ require 'aspire/caching/cache'
3
+ require 'aspire/caching/cache_entry'
4
+ require 'aspire/exceptions'
@@ -0,0 +1,356 @@
1
+ require 'benchmark'
2
+ require 'json'
3
+ require 'logger'
4
+ require 'uri'
5
+
6
+ require 'aspire/caching/cache_entry'
7
+ require 'aspire/caching/cache_logger'
8
+ require 'aspire/caching/util'
9
+ require 'aspire/exceptions'
10
+
11
+ module Aspire
12
+ # Tools for building a cache from the Aspire APIs
13
+ module Caching
14
+ # Caches Aspire API objects and their references
15
+ class Builder
16
+ include Aspire::Exceptions
17
+ include Util
18
+
19
+ # @!attribute [rw] cache
20
+ # @return [Aspire::Caching::Cache] the Aspire cache
21
+ attr_accessor :cache
22
+
23
+ # Initialises a new Cache instance
24
+ # @param cache [Aspire::Caching::Cache] the Aspire cache
25
+ # @return [void]
26
+ def initialize(cache = nil)
27
+ self.cache = cache
28
+ end
29
+
30
+ # Builds a cache of Aspire lists from the Aspire All Lists report
31
+ # @param enumerator [Aspire::Enumerator::ReportEnumerator] the Aspire
32
+ # All Lists report enumerator
33
+ # @param clear [Boolean] if true, clear the cache before building
34
+ # @return [Integer] the number of lists cached
35
+ def build(enumerator, clear: false)
36
+ # Empty the cache if required
37
+ cache.clear if clear
38
+ # Cache the enumerated lists
39
+ # - call with reload: false so that existing cache entries are ignored
40
+ # to speed up processing
41
+ lists = 0
42
+ time = Benchmark.measure do
43
+ enumerator.each do |row|
44
+ write_list(row['List Link'], reload: false)
45
+ lists += 1
46
+ end
47
+ end
48
+ # Log completion
49
+ cache.logger.info("#{lists} lists cached in #{duration(time)}")
50
+ end
51
+
52
+ # Resumes an interrupted build
53
+ # @param enumerator [Aspire::Enumerator::ReportEnumerator] the Aspire
54
+ # All Lists report enumerator
55
+ def resume(enumerator)
56
+ # Log activity
57
+ cache.logger.info('Resuming previous build')
58
+ # Reload any list marked as in-progress
59
+ reload_marked_lists
60
+ # Resume the build
61
+ build(enumerator, clear: false)
62
+ end
63
+
64
+ # Caches an Aspire linked data API object.
65
+ # Use write(url) to build a cache for the first time.
66
+ # Use write(url, reload: true) to reload parts of the cache.
67
+ # @param url [String, Aspire::Caching::CacheEntry] the URL or cache entry
68
+ # # of the API object
69
+ # @param data [Hash, nil] the parsed JSON data to be written to the cache;
70
+ # if omitted, this is read from the API
71
+ # @param list [Aspire::Caching::CacheEntry] the parent list cache entry;
72
+ # if present, this implies that references to other lists are ignored
73
+ # @param reload [Boolean] if true, reload the cache entry from the API,
74
+ # otherwise do nothing if the entry is already in the cache
75
+ # @param urls [Hash] the set of URLs handled in the current operation
76
+ # @return [void]
77
+ def write(url = nil, data = nil, list: nil, reload: true, urls: {})
78
+ #
79
+ # Parsed data from the Linked Data API has the following structure:
80
+ # { url => {primary-object},
81
+ # related-url1 => {related-object1}, ... }
82
+ # where url => {primary-object} is the object referenced by the url
83
+ # parameter, and the related URLs/objects are objects referenced by
84
+ # the primary object and included in the API response.
85
+ #
86
+ # The primary and related objects are written to the caching before any
87
+ # object references within the primary and related objects are followed.
88
+ # This should reduce unnecessary duplication of API calls.
89
+ #
90
+ # Some objects with a linked data URL are not accessible through that
91
+ # API(e.g. users /users/<user-id> are not accessible, but user notes
92
+ # /users/<user-id>/notes<note-id> are accessible).
93
+ #
94
+ # Some objects with a linked data URL are accessible though the API but
95
+ # do not return JSON-LD (e.g. events /events/<event-id> return regular
96
+ # JSON rather than JSON-LD). These objects are cached but no attempt is
97
+ # made to follow LD references within them.
98
+ #
99
+ # byebug if url.is_a?(String) && url.include?('34C1190E-F50E-35CB-94C9-F476963D69C0')
100
+ # byebug if url.is_a?(Aspire::Caching::CacheEntry) && url.url.include?('34C1190E-F50E-35CB-94C9-F476963D69C0')
101
+ entry = cache_entry(url, list)
102
+ return unless entry && write?(entry, urls, list, reload)
103
+ write_data(entry, urls, data, list, reload)
104
+ rescue NotCacheable
105
+ # cache.logger.debug("#{url} not cacheable")
106
+ rescue StandardError => e
107
+ # Log the error and continue processing
108
+ Raven.capture_exception(e)
109
+ # cache.logger.error("#{e}\n#{e.backtrace.join('\n')}")
110
+ cache.logger.error(e.to_s)
111
+ rescue Exception => e
112
+ # Log the error and fail
113
+ Raven.capture_exception(e)
114
+ # cache.logger.fatal("#{e}\n#{e.backtrace.join('\n')}")
115
+ cache.logger.fatal(e.to_s)
116
+ raise e
117
+ end
118
+
119
+ # Caches an Aspire linked data API list object and ignores any references
120
+ # to other lists
121
+ # @param url [String, Aspire::Caching::CacheEntry] the URL or cache entry
122
+ # of the API list object
123
+ # @param data [Hash, nil] the parsed JSON data to be written to the cache;
124
+ # if omitted, this is read from the API
125
+ # @param reload [Boolean] if true, reload the cache entry from the API,
126
+ # otherwise do nothing if the entry is already in the cache
127
+ # @return [void]
128
+ def write_list(url = nil, data = nil, reload: true)
129
+ entry = cache_entry(url)
130
+ raise ArgumentError, 'List expected' unless entry.list?
131
+ write(entry, data, list: entry, reload: reload)
132
+ rescue NotCacheable
133
+ # cache.logger.debug("#{url} not cacheable")
134
+ end
135
+
136
+ private
137
+
138
+ # Returns true if a cached URL should be reloaded, false if not
139
+ # @param entry [Aspire::Caching::CacheEntry] the cache entry
140
+ # @param reload [Boolean] if true, reload the cache entry from the API,
141
+ # otherwise do nothing if the entry is already in the cache
142
+ def already_cached?(entry, reload)
143
+ # If reloading, skip cached entries only if marked as in-progress
144
+ # If not reloading, skip all cached entries
145
+ if entry.marked? && reload
146
+ cache.logger.debug("#{entry.url} ignored, in progress (reload)")
147
+ return true
148
+ end
149
+ if entry.cached? && !reload
150
+ cache.logger.debug("#{entry.url} ignored, in cache")
151
+ return true
152
+ end
153
+ # Otherwise the entry is not cached
154
+ false
155
+ end
156
+
157
+ # Returns true if a URL has already been handled in this transaction
158
+ # @param entry [Aspire::Caching::CacheEntry] the cache entry
159
+ # @param urls [Hash] the set of URLs handled in the current operation
160
+ # @return [Boolean] true if the URL has already been handled, false if not
161
+ def already_handled?(entry, urls)
162
+ return false unless urls.include?(entry.url)
163
+ # cache.logger.debug("#{entry.url} already handled")
164
+ true
165
+ end
166
+
167
+ # Returns the CacheEntry instance for a URL
168
+ # @param url [String, Aspire::Caching::CacheEntry] the URL or cache entry
169
+ # @param default [Aspire::Caching::CacheEntry, nil] the default if URL is
170
+ # not given
171
+ # @return [Aspire::Caching::CacheEntry] the cache entry for the URL
172
+ def cache_entry(url, default = nil)
173
+ return default if url.nil?
174
+ return url if url.is_a?(CacheEntry)
175
+ CacheEntry.new(url, cache)
176
+ end
177
+
178
+ # Reloads a cache entry
179
+ # @param entry [Aspire::Caching::CacheEntry] the cache entry
180
+ # @return [void]
181
+ def reload(entry)
182
+ cache.logger.log(Logger::INFO, "Reloading #{entry.url}")
183
+ entry.delete(force: true)
184
+ if entry.list?(strict: true)
185
+ write_list(entry, reload: true)
186
+ else
187
+ write(entry, reload: true)
188
+ end
189
+ end
190
+
191
+ # Reloads any entry marked as in-progress
192
+ # Positional parameters are the object types to include, e.g. 'lists',
193
+ # 'resources' etc. - default: all object types
194
+ # @return [void]
195
+ def reload_marked_entries(*types)
196
+ cache.marked_entries(*types) { |entry| reload(entry) }
197
+ end
198
+
199
+ # Reloads any list marked as in-progress
200
+ # @return [void]
201
+ def reload_marked_lists
202
+ cache.marked_entries('lists') { |entry| reload(entry) }
203
+ end
204
+
205
+ # Returns true if the cache entry is a list which is unrelated to the
206
+ # parent list. This prevents unrelated lists being downloaded through
207
+ # paths such as list.usedBy -> module.usesList -> [unrelated lists]).
208
+ # Returns false if:
209
+ # no parent list is provided,
210
+ # or the cache entry is not a list,
211
+ # or it is the same as the parent list,
212
+ # or it is a child of the parent list.
213
+ # @param entry [Aspire::Caching::CacheEntry] the cache entry
214
+ # @param parent_list [Aspire::Caching::CacheEntry] the parent list entry
215
+ # @return [Boolean] true if the cache entry is a list unrelated to the
216
+ # parent list, otherwise false
217
+ def unrelated_list?(entry, parent_list)
218
+ # Ignore if no parent list is given or the entry is not a list/child
219
+ return false unless parent_list
220
+ # Ignore if the entry is not a list
221
+ return false unless entry.list?(strict: false)
222
+ # Ignore if the entry is a child of (or the same as) the parent list
223
+ return false if entry.child_of?(parent_list, strict: false)
224
+ # Otherwise the entry is a list unrelated to the parent list
225
+ msg = "#{entry.url} ignored, not related to #{parent_list.url}"
226
+ cache.logger.debug(msg)
227
+ true
228
+ end
229
+
230
+ # Writes a linked data API object and its references to the caching
231
+ # @param entry [Aspire::Caching::CacheEntry] the cache entry
232
+ # @param urls [Hash] the set of URLs handled in the current operation
233
+ # @param data [Hash, nil] the parsed JSON data to be written to the cache;
234
+ # if omitted, this is read from the API
235
+ # @param parent_list [Aspire::Caching::CacheEntry] the parent list entry
236
+ # @param reload [Boolean] if true, reload the cache entry from the API,
237
+ # otherwise do nothing if the entry is already in the cache
238
+ # @return [void]
239
+ def write_data(entry, urls, data = nil, parent_list = nil, reload = true)
240
+ # Read the linked data and associated JSON API data into the cache
241
+ linked_data, json_data = write_object(entry, urls, data, reload)
242
+ if linked_data && entry.references?
243
+ # Start processing this URL
244
+ entry.mark
245
+ # Write the related linked data objects to the cache
246
+ write_related(entry, urls, linked_data, parent_list, reload)
247
+ # Write the referenced API objects to the cache
248
+ write_references(urls, linked_data, parent_list, reload)
249
+ # Finish processing this URL
250
+ entry.unmark
251
+ end
252
+ # Return the linked data and JSON API objects
253
+ [linked_data, json_data]
254
+ end
255
+
256
+ # Caches a linked data API object and any associated JSON API object
257
+ # @param entry [Aspire::Caching::CacheEntry] the cache entry
258
+ # @param urls [Hash] the set of URLs handled in the current operation
259
+ # @param data [Hash, nil] the parsed JSON linked data of the object; if
260
+ # omitted, the data is read from the API URL
261
+ # @param reload [Boolean] if true, reload the cache entry from the API,
262
+ # otherwise do nothing if the entry is already in the cache
263
+ # @return [Array] the unparsed and parsed linked data of the object
264
+ def write_object(entry, urls, data = nil, reload = true)
265
+ # Ignore the cache if reloading
266
+ use_cache = !reload
267
+ # Get the linked data object
268
+ data = write_object_data(entry, data, use_cache)
269
+ # Get the JSON API object if available
270
+ json = write_object_json(entry, use_cache)
271
+ # Flag the URL as handled
272
+ urls[entry.url] = true
273
+ # Return the object data
274
+ [data, json]
275
+ end
276
+
277
+ # Writes a linked data API object to the cache
278
+ # @param entry [Aspire::Caching::CacheEntry] the cache entry
279
+ # @param data [Hash] the data to write to the cache
280
+ # @param use_cache [Boolean] if true, return data from the cache,
281
+ # otherwise update the cache with data from the API
282
+ def write_object_data(entry, data, use_cache)
283
+ if data
284
+ cache.write(data: data, entry: entry)
285
+ else
286
+ cache.read(entry: entry, use_cache: use_cache)
287
+ end
288
+ end
289
+
290
+ # Writes a JSON API object to the cache
291
+ # @param entry [Aspire::Caching::CacheEntry] the cache entry
292
+ # @param use_cache [Boolean] if true, return data from the cache,
293
+ # otherwise update the cache with data from the API
294
+ def write_object_json(entry, use_cache)
295
+ return nil unless entry.json?
296
+ cache.read(entry: entry, json: true, use_cache: use_cache)
297
+ end
298
+
299
+ # Caches all the objects referenced by the argument object
300
+ # @param urls [Hash] the set of URLs handled in the current operation
301
+ # @param data [Hash] the parsed linked data object
302
+ # @param parent_list [Aspire::Caching::CacheEntry] the parent list entry
303
+ # @param reload [Boolean] if true, reload the cache entry from the API,
304
+ # otherwise do nothing if the entry is already in the cache
305
+ # @return [void]
306
+ def write_references(urls, data, parent_list = nil, reload = true)
307
+ data.each do |url, object|
308
+ # Write each URI to the cache
309
+ references(url, object).each do |uri|
310
+ # byebug if uri.is_a?(String) && uri.include?('34C1190E-F50E-35CB-94C9-F476963D69C0')
311
+ # byebug if uri.is_a?(Aspire::Caching::CacheEntry) && uri.url.include?('34C1190E-F50E-35CB-94C9-F476963D69C0')
312
+ write(uri, list: parent_list, reload: reload, urls: urls)
313
+ end
314
+ end
315
+ end
316
+
317
+ # Caches related linked data API objects included with the primary object
318
+ # @param entry [Aspire::Caching::CacheEntry] the cache entry
319
+ # @param urls [Hash] the set of URLs handled in the current operation
320
+ # @param data [Hash] the parsed linked data API object
321
+ # @param parent_list [Aspire::Caching::CacheEntry] the parent list entry
322
+ # @param reload [Boolean] if true, reload the cache entry from the API,
323
+ # otherwise do nothing if the entry is already in the cache
324
+ # @return [void]
325
+ def write_related(entry, urls, data, parent_list = nil, reload = true)
326
+ # Write all related objects to the cache before caching references
327
+ data.each do |related_url, related_data|
328
+ # The main cache entry should already have been written
329
+ # byebug if related_url.is_a?(String) && related_url.include?('34C1190E-F50E-35CB-94C9-F476963D69C0')
330
+ # byebug if related_url.is_a?(Aspire::Caching::CacheEntry) && related_url.url.include?('34C1190E-F50E-35CB-94C9-F476963D69C0')
331
+ next if entry.url == cache.canonical_url(related_url)
332
+ write(related_url, {related_url => related_data},
333
+ list: parent_list, reload: reload, urls: urls)
334
+ end
335
+ end
336
+
337
+ # Returns true if the URL should be written to the cache, false if not
338
+ # @param entry [Aspire::Caching::CacheEntry] the cache entry
339
+ # @param urls [Hash] the set of URLs handled in the current operation
340
+ # @param parent_list [Aspire::Caching::CacheEntry] the parent list entry
341
+ # @param reload [Boolean] if true, reload the cache entry from the API,
342
+ # otherwise do nothing if the entry is already in the cache
343
+ # @return [Boolean] true if the URL should be written to the cache, false
344
+ # if not
345
+ def write?(entry, urls, parent_list = nil, reload = true)
346
+ # Ignore URLs previously handled in the current operation
347
+ return false if already_handled?(entry, urls)
348
+ # Ignore cached URLs
349
+ return false if already_cached?(entry, reload)
350
+ # Only follow list links for the same parent list
351
+ return false if unrelated_list?(entry, parent_list)
352
+ true
353
+ end
354
+ end
355
+ end
356
+ end