aspire 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +59 -0
- data/.rbenv-gemsets +1 -0
- data/.travis.yml +5 -0
- data/CODE_OF_CONDUCT.md +74 -0
- data/Dockerfile +20 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +851 -0
- data/Rakefile +10 -0
- data/aspire.gemspec +40 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/entrypoint.sh +11 -0
- data/exe/build-cache +13 -0
- data/lib/aspire.rb +11 -0
- data/lib/aspire/api.rb +2 -0
- data/lib/aspire/api/base.rb +198 -0
- data/lib/aspire/api/json.rb +195 -0
- data/lib/aspire/api/linked_data.rb +214 -0
- data/lib/aspire/caching.rb +4 -0
- data/lib/aspire/caching/builder.rb +356 -0
- data/lib/aspire/caching/cache.rb +365 -0
- data/lib/aspire/caching/cache_entry.rb +296 -0
- data/lib/aspire/caching/cache_logger.rb +63 -0
- data/lib/aspire/caching/util.rb +210 -0
- data/lib/aspire/cli/cache_builder.rb +123 -0
- data/lib/aspire/cli/command.rb +20 -0
- data/lib/aspire/enumerator/base.rb +29 -0
- data/lib/aspire/enumerator/json_enumerator.rb +130 -0
- data/lib/aspire/enumerator/linked_data_uri_enumerator.rb +32 -0
- data/lib/aspire/enumerator/report_enumerator.rb +64 -0
- data/lib/aspire/exceptions.rb +36 -0
- data/lib/aspire/object.rb +7 -0
- data/lib/aspire/object/base.rb +155 -0
- data/lib/aspire/object/digitisation.rb +43 -0
- data/lib/aspire/object/factory.rb +87 -0
- data/lib/aspire/object/list.rb +590 -0
- data/lib/aspire/object/module.rb +36 -0
- data/lib/aspire/object/resource.rb +371 -0
- data/lib/aspire/object/time_period.rb +47 -0
- data/lib/aspire/object/user.rb +46 -0
- data/lib/aspire/properties.rb +20 -0
- data/lib/aspire/user_lookup.rb +103 -0
- data/lib/aspire/util.rb +185 -0
- data/lib/aspire/version.rb +3 -0
- data/lib/retry.rb +197 -0
- metadata +274 -0
@@ -0,0 +1,214 @@
|
|
1
|
+
require 'uri'
|
2
|
+
|
3
|
+
require_relative 'base'
|
4
|
+
|
5
|
+
module Aspire
|
6
|
+
module API
|
7
|
+
# A wrapper class for the Aspire linked data API
|
8
|
+
class LinkedData < Base
|
9
|
+
# The tenancy domain
|
10
|
+
TENANCY_DOMAIN = 'myreadinglists.org'.freeze
|
11
|
+
|
12
|
+
# @!attribute [rw] linked_data_root
|
13
|
+
# @return [URI] the root URI of linked data URIs
|
14
|
+
attr_accessor :linked_data_root
|
15
|
+
|
16
|
+
# @!attribute [rw] tenancy_host_aliases
|
17
|
+
# @return [Array<String>] the list of non-canonical tenancy host names
|
18
|
+
attr_accessor :tenancy_host_aliases
|
19
|
+
|
20
|
+
# @!attribute [rw] tenancy_root
|
21
|
+
# @return [URI] the canonical root URI of the tenancy
|
22
|
+
attr_accessor :tenancy_root
|
23
|
+
|
24
|
+
# Initialises a new LinkedData instance
|
25
|
+
# @param tenancy_code [String] the Aspire tenancy code
|
26
|
+
# @param opts [Hash] the options hash
|
27
|
+
# @option opts [String] :linked_data_root the root URI of linked data URIs
|
28
|
+
# usually 'http://<tenancy-code>.myreadinglists.org'
|
29
|
+
# @option opts [Array<String>] :tenancy_host_aliases the list of host
|
30
|
+
# name aliases for the tenancy
|
31
|
+
# @option opts [String] :tenancy_root the canonical root URI of the
|
32
|
+
# tenancy, usually 'http://<tenancy-code>.rl.talis.com'
|
33
|
+
# @return [void]
|
34
|
+
def initialize(tenancy_code, **opts)
|
35
|
+
super(tenancy_code, **opts)
|
36
|
+
self.linked_data_root = opts[:linked_data_root]
|
37
|
+
self.tenancy_host_aliases = opts[:tenancy_host_aliases]
|
38
|
+
self.tenancy_root = opts[:tenancy_root]
|
39
|
+
end
|
40
|
+
|
41
|
+
# Returns a full Aspire tenancy URL from a partial resource path
|
42
|
+
# @param path [String] the partial resource path
|
43
|
+
# @return [String] the full tenancy URL
|
44
|
+
def api_url(path)
|
45
|
+
path.include?('//') ? path : "#{tenancy_root}/#{path}"
|
46
|
+
end
|
47
|
+
|
48
|
+
# Returns parsed JSON data for a URI using the Aspire linked data API
|
49
|
+
# @param url [String] the partial (minus the tenancy root) or complete
|
50
|
+
# tenancy URL of the resource
|
51
|
+
# @return [Hash] the parsed JSON content from the API response
|
52
|
+
# @yield [response, data] Passes the REST client response and parsed JSON
|
53
|
+
# hash to the block
|
54
|
+
# @yieldparam response [RestClient::Response] the REST client response
|
55
|
+
# @yieldparam data [Hash] the parsed JSON data from the response
|
56
|
+
def call(url)
|
57
|
+
url = api_url(url)
|
58
|
+
url = "#{url}.json" unless url.end_with?('.json')
|
59
|
+
rest_options = call_rest_options(url)
|
60
|
+
response, data = call_api(**rest_options)
|
61
|
+
yield(response, data) if block_given?
|
62
|
+
data
|
63
|
+
end
|
64
|
+
|
65
|
+
# Returns the canonical host name for an Aspire tenancy
|
66
|
+
# @return [String] the canonical host name for the tenancy
|
67
|
+
def canonical_host
|
68
|
+
"#{tenancy_code}.#{TENANCY_DOMAIN}"
|
69
|
+
end
|
70
|
+
|
71
|
+
# Converts an Aspire tenancy alias or URL to canonical form
|
72
|
+
# @param url [String] an Aspire host name or URL
|
73
|
+
# @return [String, nil] the equivalent canonical host name or URL using
|
74
|
+
# the tenancy base URL, or nil if the host is not a valid tenancy alias
|
75
|
+
def canonical_url(url)
|
76
|
+
# Set the canonical host name and add the default format extension if
|
77
|
+
# required
|
78
|
+
rewrite_url(url, tenancy_host)
|
79
|
+
end
|
80
|
+
|
81
|
+
# Returns the linked data URI host name
|
82
|
+
# @return [String] the linked data URI host name
|
83
|
+
def linked_data_host
|
84
|
+
linked_data_root.host
|
85
|
+
end
|
86
|
+
|
87
|
+
# Sets the linked data root URL
|
88
|
+
# @param url [String] the linked data root URL
|
89
|
+
# @return [URI] the linked data root URI instance
|
90
|
+
# @raise [URI::InvalidComponentError] if the URL is invalid
|
91
|
+
# @raise [URI::InvalidURIError] if the URL is invalid
|
92
|
+
def linked_data_root=(url)
|
93
|
+
@linked_data_root = parse_url(url)
|
94
|
+
end
|
95
|
+
|
96
|
+
# Converts an Aspire URL to the form used in linked data APIs
|
97
|
+
# @param url [String] an Aspire URL
|
98
|
+
# @return [String, nil] the equivalent linked data URL
|
99
|
+
def linked_data_url(url)
|
100
|
+
# Set the linked data URI host name and remove any format extension
|
101
|
+
rewrite_url(url, linked_data_host, '')
|
102
|
+
end
|
103
|
+
|
104
|
+
# Returns the canonical tenancy host name
|
105
|
+
# @return [String] the canonical tenancy host name
|
106
|
+
def tenancy_host
|
107
|
+
tenancy_root.host
|
108
|
+
end
|
109
|
+
|
110
|
+
# Sets the list of tenancy aliases
|
111
|
+
# @param aliases [Array<String>] the list of tenancy aliases
|
112
|
+
# @return [void]
|
113
|
+
def tenancy_host_aliases=(aliases)
|
114
|
+
if aliases.nil?
|
115
|
+
@tenancy_host_aliases = [canonical_host]
|
116
|
+
elsif aliases.empty?
|
117
|
+
@tenancy_host_aliases = []
|
118
|
+
else
|
119
|
+
# Extract the host name of each alias
|
120
|
+
aliases = [aliases] unless aliases.is_a?(Array)
|
121
|
+
aliases = aliases.map { |a| uri_host(a) }
|
122
|
+
@tenancy_host_aliases = aliases.reject { |a| a.nil? || a.empty? }
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
# Sets the tenancy root URL
|
127
|
+
# @param url [String] the tenancy root URL
|
128
|
+
# @return [URI] the tenancy root URI instance
|
129
|
+
# @raise [URI::InvalidComponentError] if the URL is invalid
|
130
|
+
# @raise [URI::InvalidURIError] if the URL is invalid
|
131
|
+
def tenancy_root=(url)
|
132
|
+
@tenancy_root = parse_url(url)
|
133
|
+
end
|
134
|
+
|
135
|
+
# Returns true if host is a valid tenancy hostname
|
136
|
+
# @param host [String, URI] the hostname
|
137
|
+
# @return [Boolean] true if the hostname is valid, false otherwise
|
138
|
+
def valid_host?(host)
|
139
|
+
return false if host.nil?
|
140
|
+
host = host.host if host.is_a?(URI)
|
141
|
+
host == tenancy_host || tenancy_host_aliases.include?(host)
|
142
|
+
end
|
143
|
+
|
144
|
+
# Returns true if URL is a valid tenancy URL or host
|
145
|
+
# @param url [String] the URL or host
|
146
|
+
# @return [Boolean] true if the URL or host is valid, false otherwise
|
147
|
+
def valid_url?(url)
|
148
|
+
url.nil? ? false : valid_host?(uri(url))
|
149
|
+
rescue URI::InvalidComponentError, URI::InvalidURIError
|
150
|
+
false
|
151
|
+
end
|
152
|
+
|
153
|
+
private
|
154
|
+
|
155
|
+
# Returns a URI instance for a URL
|
156
|
+
# @param url [String] the URL
|
157
|
+
# @return [URI, nil] the URI instance, or nil if the URL is invalid
|
158
|
+
# @raise [URI::InvalidComponentError] if the URL is invalid
|
159
|
+
# @raise [URI::InvalidURIError] if the URL is invalid
|
160
|
+
def parse_url(url)
|
161
|
+
# Use the default tenancy host name if no URI is specified
|
162
|
+
url = canonical_host if url.nil? || url.empty?
|
163
|
+
# If the URI contains no path components, uri.host is nil and uri.path
|
164
|
+
# contains the whole string, so use this as the host name
|
165
|
+
uri = URI.parse(url)
|
166
|
+
if uri.host.nil? || uri.host.empty?
|
167
|
+
uri.host = uri.path
|
168
|
+
uri.path = ''
|
169
|
+
end
|
170
|
+
# Set the URI scheme if required
|
171
|
+
uri.scheme ||= SCHEME
|
172
|
+
# Return the URI
|
173
|
+
uri
|
174
|
+
end
|
175
|
+
|
176
|
+
# Replaces the host name of a URL
|
177
|
+
# @param url [String] the URL
|
178
|
+
# @param host [String] the new host name
|
179
|
+
# @param format [String] the format suffix - defaults to '.json' if not
|
180
|
+
# specified, specify an empty string to remove any format
|
181
|
+
# @return [String] the new URL
|
182
|
+
def rewrite_url(url, host, format = nil)
|
183
|
+
# Ensure the host name is valid
|
184
|
+
url = uri(url)
|
185
|
+
return nil unless valid_host?(url)
|
186
|
+
# Replace the host name with the canonical host name
|
187
|
+
url.host = host
|
188
|
+
# Remove any existing format extension
|
189
|
+
url.path = rewrite_url_format(url.path, format)
|
190
|
+
# Return the URL string
|
191
|
+
url.to_s
|
192
|
+
rescue URI::InvalidComponentError, URI::InvalidURIError
|
193
|
+
return nil
|
194
|
+
end
|
195
|
+
|
196
|
+
# Replaces the format extension to the URL
|
197
|
+
# @param url [String] the URL
|
198
|
+
# @param format [String] the new format - defaults '.json' if not given.
|
199
|
+
# Specify an empty string to remove the existing format
|
200
|
+
# @return [String] the new URL
|
201
|
+
def rewrite_url_format(url, format = nil)
|
202
|
+
# Set the default format
|
203
|
+
format ||= '.json'
|
204
|
+
# Remove the existing format
|
205
|
+
ext = File.extname(url)
|
206
|
+
url = url.rpartition(ext)[0] unless ext.nil? || ext.empty?
|
207
|
+
# Add the new format if not already present
|
208
|
+
url = "#{url}#{format}" unless url.empty? || url.end_with?(format)
|
209
|
+
# Return the URL
|
210
|
+
url
|
211
|
+
end
|
212
|
+
end
|
213
|
+
end
|
214
|
+
end
|
@@ -0,0 +1,356 @@
|
|
1
|
+
require 'benchmark'
|
2
|
+
require 'json'
|
3
|
+
require 'logger'
|
4
|
+
require 'uri'
|
5
|
+
|
6
|
+
require 'aspire/caching/cache_entry'
|
7
|
+
require 'aspire/caching/cache_logger'
|
8
|
+
require 'aspire/caching/util'
|
9
|
+
require 'aspire/exceptions'
|
10
|
+
|
11
|
+
module Aspire
|
12
|
+
# Tools for building a cache from the Aspire APIs
|
13
|
+
module Caching
|
14
|
+
# Caches Aspire API objects and their references
|
15
|
+
class Builder
|
16
|
+
include Aspire::Exceptions
|
17
|
+
include Util
|
18
|
+
|
19
|
+
# @!attribute [rw] cache
|
20
|
+
# @return [Aspire::Caching::Cache] the Aspire cache
|
21
|
+
attr_accessor :cache
|
22
|
+
|
23
|
+
# Initialises a new Cache instance
|
24
|
+
# @param cache [Aspire::Caching::Cache] the Aspire cache
|
25
|
+
# @return [void]
|
26
|
+
def initialize(cache = nil)
|
27
|
+
self.cache = cache
|
28
|
+
end
|
29
|
+
|
30
|
+
# Builds a cache of Aspire lists from the Aspire All Lists report
|
31
|
+
# @param enumerator [Aspire::Enumerator::ReportEnumerator] the Aspire
|
32
|
+
# All Lists report enumerator
|
33
|
+
# @param clear [Boolean] if true, clear the cache before building
|
34
|
+
# @return [Integer] the number of lists cached
|
35
|
+
def build(enumerator, clear: false)
|
36
|
+
# Empty the cache if required
|
37
|
+
cache.clear if clear
|
38
|
+
# Cache the enumerated lists
|
39
|
+
# - call with reload: false so that existing cache entries are ignored
|
40
|
+
# to speed up processing
|
41
|
+
lists = 0
|
42
|
+
time = Benchmark.measure do
|
43
|
+
enumerator.each do |row|
|
44
|
+
write_list(row['List Link'], reload: false)
|
45
|
+
lists += 1
|
46
|
+
end
|
47
|
+
end
|
48
|
+
# Log completion
|
49
|
+
cache.logger.info("#{lists} lists cached in #{duration(time)}")
|
50
|
+
end
|
51
|
+
|
52
|
+
# Resumes an interrupted build
|
53
|
+
# @param enumerator [Aspire::Enumerator::ReportEnumerator] the Aspire
|
54
|
+
# All Lists report enumerator
|
55
|
+
def resume(enumerator)
|
56
|
+
# Log activity
|
57
|
+
cache.logger.info('Resuming previous build')
|
58
|
+
# Reload any list marked as in-progress
|
59
|
+
reload_marked_lists
|
60
|
+
# Resume the build
|
61
|
+
build(enumerator, clear: false)
|
62
|
+
end
|
63
|
+
|
64
|
+
# Caches an Aspire linked data API object.
|
65
|
+
# Use write(url) to build a cache for the first time.
|
66
|
+
# Use write(url, reload: true) to reload parts of the cache.
|
67
|
+
# @param url [String, Aspire::Caching::CacheEntry] the URL or cache entry
|
68
|
+
# # of the API object
|
69
|
+
# @param data [Hash, nil] the parsed JSON data to be written to the cache;
|
70
|
+
# if omitted, this is read from the API
|
71
|
+
# @param list [Aspire::Caching::CacheEntry] the parent list cache entry;
|
72
|
+
# if present, this implies that references to other lists are ignored
|
73
|
+
# @param reload [Boolean] if true, reload the cache entry from the API,
|
74
|
+
# otherwise do nothing if the entry is already in the cache
|
75
|
+
# @param urls [Hash] the set of URLs handled in the current operation
|
76
|
+
# @return [void]
|
77
|
+
def write(url = nil, data = nil, list: nil, reload: true, urls: {})
|
78
|
+
#
|
79
|
+
# Parsed data from the Linked Data API has the following structure:
|
80
|
+
# { url => {primary-object},
|
81
|
+
# related-url1 => {related-object1}, ... }
|
82
|
+
# where url => {primary-object} is the object referenced by the url
|
83
|
+
# parameter, and the related URLs/objects are objects referenced by
|
84
|
+
# the primary object and included in the API response.
|
85
|
+
#
|
86
|
+
# The primary and related objects are written to the caching before any
|
87
|
+
# object references within the primary and related objects are followed.
|
88
|
+
# This should reduce unnecessary duplication of API calls.
|
89
|
+
#
|
90
|
+
# Some objects with a linked data URL are not accessible through that
|
91
|
+
# API(e.g. users /users/<user-id> are not accessible, but user notes
|
92
|
+
# /users/<user-id>/notes<note-id> are accessible).
|
93
|
+
#
|
94
|
+
# Some objects with a linked data URL are accessible though the API but
|
95
|
+
# do not return JSON-LD (e.g. events /events/<event-id> return regular
|
96
|
+
# JSON rather than JSON-LD). These objects are cached but no attempt is
|
97
|
+
# made to follow LD references within them.
|
98
|
+
#
|
99
|
+
# byebug if url.is_a?(String) && url.include?('34C1190E-F50E-35CB-94C9-F476963D69C0')
|
100
|
+
# byebug if url.is_a?(Aspire::Caching::CacheEntry) && url.url.include?('34C1190E-F50E-35CB-94C9-F476963D69C0')
|
101
|
+
entry = cache_entry(url, list)
|
102
|
+
return unless entry && write?(entry, urls, list, reload)
|
103
|
+
write_data(entry, urls, data, list, reload)
|
104
|
+
rescue NotCacheable
|
105
|
+
# cache.logger.debug("#{url} not cacheable")
|
106
|
+
rescue StandardError => e
|
107
|
+
# Log the error and continue processing
|
108
|
+
Raven.capture_exception(e)
|
109
|
+
# cache.logger.error("#{e}\n#{e.backtrace.join('\n')}")
|
110
|
+
cache.logger.error(e.to_s)
|
111
|
+
rescue Exception => e
|
112
|
+
# Log the error and fail
|
113
|
+
Raven.capture_exception(e)
|
114
|
+
# cache.logger.fatal("#{e}\n#{e.backtrace.join('\n')}")
|
115
|
+
cache.logger.fatal(e.to_s)
|
116
|
+
raise e
|
117
|
+
end
|
118
|
+
|
119
|
+
# Caches an Aspire linked data API list object and ignores any references
|
120
|
+
# to other lists
|
121
|
+
# @param url [String, Aspire::Caching::CacheEntry] the URL or cache entry
|
122
|
+
# of the API list object
|
123
|
+
# @param data [Hash, nil] the parsed JSON data to be written to the cache;
|
124
|
+
# if omitted, this is read from the API
|
125
|
+
# @param reload [Boolean] if true, reload the cache entry from the API,
|
126
|
+
# otherwise do nothing if the entry is already in the cache
|
127
|
+
# @return [void]
|
128
|
+
def write_list(url = nil, data = nil, reload: true)
|
129
|
+
entry = cache_entry(url)
|
130
|
+
raise ArgumentError, 'List expected' unless entry.list?
|
131
|
+
write(entry, data, list: entry, reload: reload)
|
132
|
+
rescue NotCacheable
|
133
|
+
# cache.logger.debug("#{url} not cacheable")
|
134
|
+
end
|
135
|
+
|
136
|
+
private
|
137
|
+
|
138
|
+
# Returns true if a cached URL should be reloaded, false if not
|
139
|
+
# @param entry [Aspire::Caching::CacheEntry] the cache entry
|
140
|
+
# @param reload [Boolean] if true, reload the cache entry from the API,
|
141
|
+
# otherwise do nothing if the entry is already in the cache
|
142
|
+
def already_cached?(entry, reload)
|
143
|
+
# If reloading, skip cached entries only if marked as in-progress
|
144
|
+
# If not reloading, skip all cached entries
|
145
|
+
if entry.marked? && reload
|
146
|
+
cache.logger.debug("#{entry.url} ignored, in progress (reload)")
|
147
|
+
return true
|
148
|
+
end
|
149
|
+
if entry.cached? && !reload
|
150
|
+
cache.logger.debug("#{entry.url} ignored, in cache")
|
151
|
+
return true
|
152
|
+
end
|
153
|
+
# Otherwise the entry is not cached
|
154
|
+
false
|
155
|
+
end
|
156
|
+
|
157
|
+
# Returns true if a URL has already been handled in this transaction
|
158
|
+
# @param entry [Aspire::Caching::CacheEntry] the cache entry
|
159
|
+
# @param urls [Hash] the set of URLs handled in the current operation
|
160
|
+
# @return [Boolean] true if the URL has already been handled, false if not
|
161
|
+
def already_handled?(entry, urls)
|
162
|
+
return false unless urls.include?(entry.url)
|
163
|
+
# cache.logger.debug("#{entry.url} already handled")
|
164
|
+
true
|
165
|
+
end
|
166
|
+
|
167
|
+
# Returns the CacheEntry instance for a URL
|
168
|
+
# @param url [String, Aspire::Caching::CacheEntry] the URL or cache entry
|
169
|
+
# @param default [Aspire::Caching::CacheEntry, nil] the default if URL is
|
170
|
+
# not given
|
171
|
+
# @return [Aspire::Caching::CacheEntry] the cache entry for the URL
|
172
|
+
def cache_entry(url, default = nil)
|
173
|
+
return default if url.nil?
|
174
|
+
return url if url.is_a?(CacheEntry)
|
175
|
+
CacheEntry.new(url, cache)
|
176
|
+
end
|
177
|
+
|
178
|
+
# Reloads a cache entry
|
179
|
+
# @param entry [Aspire::Caching::CacheEntry] the cache entry
|
180
|
+
# @return [void]
|
181
|
+
def reload(entry)
|
182
|
+
cache.logger.log(Logger::INFO, "Reloading #{entry.url}")
|
183
|
+
entry.delete(force: true)
|
184
|
+
if entry.list?(strict: true)
|
185
|
+
write_list(entry, reload: true)
|
186
|
+
else
|
187
|
+
write(entry, reload: true)
|
188
|
+
end
|
189
|
+
end
|
190
|
+
|
191
|
+
# Reloads any entry marked as in-progress
|
192
|
+
# Positional parameters are the object types to include, e.g. 'lists',
|
193
|
+
# 'resources' etc. - default: all object types
|
194
|
+
# @return [void]
|
195
|
+
def reload_marked_entries(*types)
|
196
|
+
cache.marked_entries(*types) { |entry| reload(entry) }
|
197
|
+
end
|
198
|
+
|
199
|
+
# Reloads any list marked as in-progress
|
200
|
+
# @return [void]
|
201
|
+
def reload_marked_lists
|
202
|
+
cache.marked_entries('lists') { |entry| reload(entry) }
|
203
|
+
end
|
204
|
+
|
205
|
+
# Returns true if the cache entry is a list which is unrelated to the
|
206
|
+
# parent list. This prevents unrelated lists being downloaded through
|
207
|
+
# paths such as list.usedBy -> module.usesList -> [unrelated lists]).
|
208
|
+
# Returns false if:
|
209
|
+
# no parent list is provided,
|
210
|
+
# or the cache entry is not a list,
|
211
|
+
# or it is the same as the parent list,
|
212
|
+
# or it is a child of the parent list.
|
213
|
+
# @param entry [Aspire::Caching::CacheEntry] the cache entry
|
214
|
+
# @param parent_list [Aspire::Caching::CacheEntry] the parent list entry
|
215
|
+
# @return [Boolean] true if the cache entry is a list unrelated to the
|
216
|
+
# parent list, otherwise false
|
217
|
+
def unrelated_list?(entry, parent_list)
|
218
|
+
# Ignore if no parent list is given or the entry is not a list/child
|
219
|
+
return false unless parent_list
|
220
|
+
# Ignore if the entry is not a list
|
221
|
+
return false unless entry.list?(strict: false)
|
222
|
+
# Ignore if the entry is a child of (or the same as) the parent list
|
223
|
+
return false if entry.child_of?(parent_list, strict: false)
|
224
|
+
# Otherwise the entry is a list unrelated to the parent list
|
225
|
+
msg = "#{entry.url} ignored, not related to #{parent_list.url}"
|
226
|
+
cache.logger.debug(msg)
|
227
|
+
true
|
228
|
+
end
|
229
|
+
|
230
|
+
# Writes a linked data API object and its references to the caching
|
231
|
+
# @param entry [Aspire::Caching::CacheEntry] the cache entry
|
232
|
+
# @param urls [Hash] the set of URLs handled in the current operation
|
233
|
+
# @param data [Hash, nil] the parsed JSON data to be written to the cache;
|
234
|
+
# if omitted, this is read from the API
|
235
|
+
# @param parent_list [Aspire::Caching::CacheEntry] the parent list entry
|
236
|
+
# @param reload [Boolean] if true, reload the cache entry from the API,
|
237
|
+
# otherwise do nothing if the entry is already in the cache
|
238
|
+
# @return [void]
|
239
|
+
def write_data(entry, urls, data = nil, parent_list = nil, reload = true)
|
240
|
+
# Read the linked data and associated JSON API data into the cache
|
241
|
+
linked_data, json_data = write_object(entry, urls, data, reload)
|
242
|
+
if linked_data && entry.references?
|
243
|
+
# Start processing this URL
|
244
|
+
entry.mark
|
245
|
+
# Write the related linked data objects to the cache
|
246
|
+
write_related(entry, urls, linked_data, parent_list, reload)
|
247
|
+
# Write the referenced API objects to the cache
|
248
|
+
write_references(urls, linked_data, parent_list, reload)
|
249
|
+
# Finish processing this URL
|
250
|
+
entry.unmark
|
251
|
+
end
|
252
|
+
# Return the linked data and JSON API objects
|
253
|
+
[linked_data, json_data]
|
254
|
+
end
|
255
|
+
|
256
|
+
# Caches a linked data API object and any associated JSON API object
|
257
|
+
# @param entry [Aspire::Caching::CacheEntry] the cache entry
|
258
|
+
# @param urls [Hash] the set of URLs handled in the current operation
|
259
|
+
# @param data [Hash, nil] the parsed JSON linked data of the object; if
|
260
|
+
# omitted, the data is read from the API URL
|
261
|
+
# @param reload [Boolean] if true, reload the cache entry from the API,
|
262
|
+
# otherwise do nothing if the entry is already in the cache
|
263
|
+
# @return [Array] the unparsed and parsed linked data of the object
|
264
|
+
def write_object(entry, urls, data = nil, reload = true)
|
265
|
+
# Ignore the cache if reloading
|
266
|
+
use_cache = !reload
|
267
|
+
# Get the linked data object
|
268
|
+
data = write_object_data(entry, data, use_cache)
|
269
|
+
# Get the JSON API object if available
|
270
|
+
json = write_object_json(entry, use_cache)
|
271
|
+
# Flag the URL as handled
|
272
|
+
urls[entry.url] = true
|
273
|
+
# Return the object data
|
274
|
+
[data, json]
|
275
|
+
end
|
276
|
+
|
277
|
+
# Writes a linked data API object to the cache
|
278
|
+
# @param entry [Aspire::Caching::CacheEntry] the cache entry
|
279
|
+
# @param data [Hash] the data to write to the cache
|
280
|
+
# @param use_cache [Boolean] if true, return data from the cache,
|
281
|
+
# otherwise update the cache with data from the API
|
282
|
+
def write_object_data(entry, data, use_cache)
|
283
|
+
if data
|
284
|
+
cache.write(data: data, entry: entry)
|
285
|
+
else
|
286
|
+
cache.read(entry: entry, use_cache: use_cache)
|
287
|
+
end
|
288
|
+
end
|
289
|
+
|
290
|
+
# Writes a JSON API object to the cache
|
291
|
+
# @param entry [Aspire::Caching::CacheEntry] the cache entry
|
292
|
+
# @param use_cache [Boolean] if true, return data from the cache,
|
293
|
+
# otherwise update the cache with data from the API
|
294
|
+
def write_object_json(entry, use_cache)
|
295
|
+
return nil unless entry.json?
|
296
|
+
cache.read(entry: entry, json: true, use_cache: use_cache)
|
297
|
+
end
|
298
|
+
|
299
|
+
# Caches all the objects referenced by the argument object
|
300
|
+
# @param urls [Hash] the set of URLs handled in the current operation
|
301
|
+
# @param data [Hash] the parsed linked data object
|
302
|
+
# @param parent_list [Aspire::Caching::CacheEntry] the parent list entry
|
303
|
+
# @param reload [Boolean] if true, reload the cache entry from the API,
|
304
|
+
# otherwise do nothing if the entry is already in the cache
|
305
|
+
# @return [void]
|
306
|
+
def write_references(urls, data, parent_list = nil, reload = true)
|
307
|
+
data.each do |url, object|
|
308
|
+
# Write each URI to the cache
|
309
|
+
references(url, object).each do |uri|
|
310
|
+
# byebug if uri.is_a?(String) && uri.include?('34C1190E-F50E-35CB-94C9-F476963D69C0')
|
311
|
+
# byebug if uri.is_a?(Aspire::Caching::CacheEntry) && uri.url.include?('34C1190E-F50E-35CB-94C9-F476963D69C0')
|
312
|
+
write(uri, list: parent_list, reload: reload, urls: urls)
|
313
|
+
end
|
314
|
+
end
|
315
|
+
end
|
316
|
+
|
317
|
+
# Caches related linked data API objects included with the primary object
|
318
|
+
# @param entry [Aspire::Caching::CacheEntry] the cache entry
|
319
|
+
# @param urls [Hash] the set of URLs handled in the current operation
|
320
|
+
# @param data [Hash] the parsed linked data API object
|
321
|
+
# @param parent_list [Aspire::Caching::CacheEntry] the parent list entry
|
322
|
+
# @param reload [Boolean] if true, reload the cache entry from the API,
|
323
|
+
# otherwise do nothing if the entry is already in the cache
|
324
|
+
# @return [void]
|
325
|
+
def write_related(entry, urls, data, parent_list = nil, reload = true)
|
326
|
+
# Write all related objects to the cache before caching references
|
327
|
+
data.each do |related_url, related_data|
|
328
|
+
# The main cache entry should already have been written
|
329
|
+
# byebug if related_url.is_a?(String) && related_url.include?('34C1190E-F50E-35CB-94C9-F476963D69C0')
|
330
|
+
# byebug if related_url.is_a?(Aspire::Caching::CacheEntry) && related_url.url.include?('34C1190E-F50E-35CB-94C9-F476963D69C0')
|
331
|
+
next if entry.url == cache.canonical_url(related_url)
|
332
|
+
write(related_url, {related_url => related_data},
|
333
|
+
list: parent_list, reload: reload, urls: urls)
|
334
|
+
end
|
335
|
+
end
|
336
|
+
|
337
|
+
# Returns true if the URL should be written to the cache, false if not
|
338
|
+
# @param entry [Aspire::Caching::CacheEntry] the cache entry
|
339
|
+
# @param urls [Hash] the set of URLs handled in the current operation
|
340
|
+
# @param parent_list [Aspire::Caching::CacheEntry] the parent list entry
|
341
|
+
# @param reload [Boolean] if true, reload the cache entry from the API,
|
342
|
+
# otherwise do nothing if the entry is already in the cache
|
343
|
+
# @return [Boolean] true if the URL should be written to the cache, false
|
344
|
+
# if not
|
345
|
+
def write?(entry, urls, parent_list = nil, reload = true)
|
346
|
+
# Ignore URLs previously handled in the current operation
|
347
|
+
return false if already_handled?(entry, urls)
|
348
|
+
# Ignore cached URLs
|
349
|
+
return false if already_cached?(entry, reload)
|
350
|
+
# Only follow list links for the same parent list
|
351
|
+
return false if unrelated_list?(entry, parent_list)
|
352
|
+
true
|
353
|
+
end
|
354
|
+
end
|
355
|
+
end
|
356
|
+
end
|