aspire 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +59 -0
- data/.rbenv-gemsets +1 -0
- data/.travis.yml +5 -0
- data/CODE_OF_CONDUCT.md +74 -0
- data/Dockerfile +20 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +851 -0
- data/Rakefile +10 -0
- data/aspire.gemspec +40 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/entrypoint.sh +11 -0
- data/exe/build-cache +13 -0
- data/lib/aspire.rb +11 -0
- data/lib/aspire/api.rb +2 -0
- data/lib/aspire/api/base.rb +198 -0
- data/lib/aspire/api/json.rb +195 -0
- data/lib/aspire/api/linked_data.rb +214 -0
- data/lib/aspire/caching.rb +4 -0
- data/lib/aspire/caching/builder.rb +356 -0
- data/lib/aspire/caching/cache.rb +365 -0
- data/lib/aspire/caching/cache_entry.rb +296 -0
- data/lib/aspire/caching/cache_logger.rb +63 -0
- data/lib/aspire/caching/util.rb +210 -0
- data/lib/aspire/cli/cache_builder.rb +123 -0
- data/lib/aspire/cli/command.rb +20 -0
- data/lib/aspire/enumerator/base.rb +29 -0
- data/lib/aspire/enumerator/json_enumerator.rb +130 -0
- data/lib/aspire/enumerator/linked_data_uri_enumerator.rb +32 -0
- data/lib/aspire/enumerator/report_enumerator.rb +64 -0
- data/lib/aspire/exceptions.rb +36 -0
- data/lib/aspire/object.rb +7 -0
- data/lib/aspire/object/base.rb +155 -0
- data/lib/aspire/object/digitisation.rb +43 -0
- data/lib/aspire/object/factory.rb +87 -0
- data/lib/aspire/object/list.rb +590 -0
- data/lib/aspire/object/module.rb +36 -0
- data/lib/aspire/object/resource.rb +371 -0
- data/lib/aspire/object/time_period.rb +47 -0
- data/lib/aspire/object/user.rb +46 -0
- data/lib/aspire/properties.rb +20 -0
- data/lib/aspire/user_lookup.rb +103 -0
- data/lib/aspire/util.rb +185 -0
- data/lib/aspire/version.rb +3 -0
- data/lib/retry.rb +197 -0
- metadata +274 -0
@@ -0,0 +1,214 @@
|
|
1
|
+
require 'uri'
|
2
|
+
|
3
|
+
require_relative 'base'
|
4
|
+
|
5
|
+
module Aspire
|
6
|
+
module API
|
7
|
+
# A wrapper class for the Aspire linked data API
|
8
|
+
class LinkedData < Base
|
9
|
+
# The tenancy domain
|
10
|
+
TENANCY_DOMAIN = 'myreadinglists.org'.freeze
|
11
|
+
|
12
|
+
# @!attribute [rw] linked_data_root
|
13
|
+
# @return [URI] the root URI of linked data URIs
|
14
|
+
attr_accessor :linked_data_root
|
15
|
+
|
16
|
+
# @!attribute [rw] tenancy_host_aliases
|
17
|
+
# @return [Array<String>] the list of non-canonical tenancy host names
|
18
|
+
attr_accessor :tenancy_host_aliases
|
19
|
+
|
20
|
+
# @!attribute [rw] tenancy_root
|
21
|
+
# @return [URI] the canonical root URI of the tenancy
|
22
|
+
attr_accessor :tenancy_root
|
23
|
+
|
24
|
+
# Initialises a new LinkedData instance
|
25
|
+
# @param tenancy_code [String] the Aspire tenancy code
|
26
|
+
# @param opts [Hash] the options hash
|
27
|
+
# @option opts [String] :linked_data_root the root URI of linked data URIs
|
28
|
+
# usually 'http://<tenancy-code>.myreadinglists.org'
|
29
|
+
# @option opts [Array<String>] :tenancy_host_aliases the list of host
|
30
|
+
# name aliases for the tenancy
|
31
|
+
# @option opts [String] :tenancy_root the canonical root URI of the
|
32
|
+
# tenancy, usually 'http://<tenancy-code>.rl.talis.com'
|
33
|
+
# @return [void]
|
34
|
+
def initialize(tenancy_code, **opts)
|
35
|
+
super(tenancy_code, **opts)
|
36
|
+
self.linked_data_root = opts[:linked_data_root]
|
37
|
+
self.tenancy_host_aliases = opts[:tenancy_host_aliases]
|
38
|
+
self.tenancy_root = opts[:tenancy_root]
|
39
|
+
end
|
40
|
+
|
41
|
+
# Returns a full Aspire tenancy URL from a partial resource path
|
42
|
+
# @param path [String] the partial resource path
|
43
|
+
# @return [String] the full tenancy URL
|
44
|
+
def api_url(path)
|
45
|
+
path.include?('//') ? path : "#{tenancy_root}/#{path}"
|
46
|
+
end
|
47
|
+
|
48
|
+
# Returns parsed JSON data for a URI using the Aspire linked data API
|
49
|
+
# @param url [String] the partial (minus the tenancy root) or complete
|
50
|
+
# tenancy URL of the resource
|
51
|
+
# @return [Hash] the parsed JSON content from the API response
|
52
|
+
# @yield [response, data] Passes the REST client response and parsed JSON
|
53
|
+
# hash to the block
|
54
|
+
# @yieldparam response [RestClient::Response] the REST client response
|
55
|
+
# @yieldparam data [Hash] the parsed JSON data from the response
|
56
|
+
def call(url)
|
57
|
+
url = api_url(url)
|
58
|
+
url = "#{url}.json" unless url.end_with?('.json')
|
59
|
+
rest_options = call_rest_options(url)
|
60
|
+
response, data = call_api(**rest_options)
|
61
|
+
yield(response, data) if block_given?
|
62
|
+
data
|
63
|
+
end
|
64
|
+
|
65
|
+
# Returns the canonical host name for an Aspire tenancy
|
66
|
+
# @return [String] the canonical host name for the tenancy
|
67
|
+
def canonical_host
|
68
|
+
"#{tenancy_code}.#{TENANCY_DOMAIN}"
|
69
|
+
end
|
70
|
+
|
71
|
+
# Converts an Aspire tenancy alias or URL to canonical form
|
72
|
+
# @param url [String] an Aspire host name or URL
|
73
|
+
# @return [String, nil] the equivalent canonical host name or URL using
|
74
|
+
# the tenancy base URL, or nil if the host is not a valid tenancy alias
|
75
|
+
def canonical_url(url)
|
76
|
+
# Set the canonical host name and add the default format extension if
|
77
|
+
# required
|
78
|
+
rewrite_url(url, tenancy_host)
|
79
|
+
end
|
80
|
+
|
81
|
+
# Returns the linked data URI host name
|
82
|
+
# @return [String] the linked data URI host name
|
83
|
+
def linked_data_host
|
84
|
+
linked_data_root.host
|
85
|
+
end
|
86
|
+
|
87
|
+
# Sets the linked data root URL
|
88
|
+
# @param url [String] the linked data root URL
|
89
|
+
# @return [URI] the linked data root URI instance
|
90
|
+
# @raise [URI::InvalidComponentError] if the URL is invalid
|
91
|
+
# @raise [URI::InvalidURIError] if the URL is invalid
|
92
|
+
def linked_data_root=(url)
|
93
|
+
@linked_data_root = parse_url(url)
|
94
|
+
end
|
95
|
+
|
96
|
+
# Converts an Aspire URL to the form used in linked data APIs
|
97
|
+
# @param url [String] an Aspire URL
|
98
|
+
# @return [String, nil] the equivalent linked data URL
|
99
|
+
def linked_data_url(url)
|
100
|
+
# Set the linked data URI host name and remove any format extension
|
101
|
+
rewrite_url(url, linked_data_host, '')
|
102
|
+
end
|
103
|
+
|
104
|
+
# Returns the canonical tenancy host name
|
105
|
+
# @return [String] the canonical tenancy host name
|
106
|
+
def tenancy_host
|
107
|
+
tenancy_root.host
|
108
|
+
end
|
109
|
+
|
110
|
+
# Sets the list of tenancy aliases
|
111
|
+
# @param aliases [Array<String>] the list of tenancy aliases
|
112
|
+
# @return [void]
|
113
|
+
def tenancy_host_aliases=(aliases)
|
114
|
+
if aliases.nil?
|
115
|
+
@tenancy_host_aliases = [canonical_host]
|
116
|
+
elsif aliases.empty?
|
117
|
+
@tenancy_host_aliases = []
|
118
|
+
else
|
119
|
+
# Extract the host name of each alias
|
120
|
+
aliases = [aliases] unless aliases.is_a?(Array)
|
121
|
+
aliases = aliases.map { |a| uri_host(a) }
|
122
|
+
@tenancy_host_aliases = aliases.reject { |a| a.nil? || a.empty? }
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
# Sets the tenancy root URL
|
127
|
+
# @param url [String] the tenancy root URL
|
128
|
+
# @return [URI] the tenancy root URI instance
|
129
|
+
# @raise [URI::InvalidComponentError] if the URL is invalid
|
130
|
+
# @raise [URI::InvalidURIError] if the URL is invalid
|
131
|
+
def tenancy_root=(url)
|
132
|
+
@tenancy_root = parse_url(url)
|
133
|
+
end
|
134
|
+
|
135
|
+
# Returns true if host is a valid tenancy hostname
|
136
|
+
# @param host [String, URI] the hostname
|
137
|
+
# @return [Boolean] true if the hostname is valid, false otherwise
|
138
|
+
def valid_host?(host)
|
139
|
+
return false if host.nil?
|
140
|
+
host = host.host if host.is_a?(URI)
|
141
|
+
host == tenancy_host || tenancy_host_aliases.include?(host)
|
142
|
+
end
|
143
|
+
|
144
|
+
# Returns true if URL is a valid tenancy URL or host
|
145
|
+
# @param url [String] the URL or host
|
146
|
+
# @return [Boolean] true if the URL or host is valid, false otherwise
|
147
|
+
def valid_url?(url)
|
148
|
+
url.nil? ? false : valid_host?(uri(url))
|
149
|
+
rescue URI::InvalidComponentError, URI::InvalidURIError
|
150
|
+
false
|
151
|
+
end
|
152
|
+
|
153
|
+
private
|
154
|
+
|
155
|
+
# Returns a URI instance for a URL
|
156
|
+
# @param url [String] the URL
|
157
|
+
# @return [URI, nil] the URI instance, or nil if the URL is invalid
|
158
|
+
# @raise [URI::InvalidComponentError] if the URL is invalid
|
159
|
+
# @raise [URI::InvalidURIError] if the URL is invalid
|
160
|
+
def parse_url(url)
|
161
|
+
# Use the default tenancy host name if no URI is specified
|
162
|
+
url = canonical_host if url.nil? || url.empty?
|
163
|
+
# If the URI contains no path components, uri.host is nil and uri.path
|
164
|
+
# contains the whole string, so use this as the host name
|
165
|
+
uri = URI.parse(url)
|
166
|
+
if uri.host.nil? || uri.host.empty?
|
167
|
+
uri.host = uri.path
|
168
|
+
uri.path = ''
|
169
|
+
end
|
170
|
+
# Set the URI scheme if required
|
171
|
+
uri.scheme ||= SCHEME
|
172
|
+
# Return the URI
|
173
|
+
uri
|
174
|
+
end
|
175
|
+
|
176
|
+
# Replaces the host name of a URL
|
177
|
+
# @param url [String] the URL
|
178
|
+
# @param host [String] the new host name
|
179
|
+
# @param format [String] the format suffix - defaults to '.json' if not
|
180
|
+
# specified, specify an empty string to remove any format
|
181
|
+
# @return [String] the new URL
|
182
|
+
def rewrite_url(url, host, format = nil)
|
183
|
+
# Ensure the host name is valid
|
184
|
+
url = uri(url)
|
185
|
+
return nil unless valid_host?(url)
|
186
|
+
# Replace the host name with the canonical host name
|
187
|
+
url.host = host
|
188
|
+
# Remove any existing format extension
|
189
|
+
url.path = rewrite_url_format(url.path, format)
|
190
|
+
# Return the URL string
|
191
|
+
url.to_s
|
192
|
+
rescue URI::InvalidComponentError, URI::InvalidURIError
|
193
|
+
return nil
|
194
|
+
end
|
195
|
+
|
196
|
+
# Replaces the format extension to the URL
|
197
|
+
# @param url [String] the URL
|
198
|
+
# @param format [String] the new format - defaults '.json' if not given.
|
199
|
+
# Specify an empty string to remove the existing format
|
200
|
+
# @return [String] the new URL
|
201
|
+
def rewrite_url_format(url, format = nil)
|
202
|
+
# Set the default format
|
203
|
+
format ||= '.json'
|
204
|
+
# Remove the existing format
|
205
|
+
ext = File.extname(url)
|
206
|
+
url = url.rpartition(ext)[0] unless ext.nil? || ext.empty?
|
207
|
+
# Add the new format if not already present
|
208
|
+
url = "#{url}#{format}" unless url.empty? || url.end_with?(format)
|
209
|
+
# Return the URL
|
210
|
+
url
|
211
|
+
end
|
212
|
+
end
|
213
|
+
end
|
214
|
+
end
|
@@ -0,0 +1,356 @@
|
|
1
|
+
require 'benchmark'
|
2
|
+
require 'json'
|
3
|
+
require 'logger'
|
4
|
+
require 'uri'
|
5
|
+
|
6
|
+
require 'aspire/caching/cache_entry'
|
7
|
+
require 'aspire/caching/cache_logger'
|
8
|
+
require 'aspire/caching/util'
|
9
|
+
require 'aspire/exceptions'
|
10
|
+
|
11
|
+
module Aspire
|
12
|
+
# Tools for building a cache from the Aspire APIs
|
13
|
+
module Caching
|
14
|
+
# Caches Aspire API objects and their references
|
15
|
+
class Builder
|
16
|
+
include Aspire::Exceptions
|
17
|
+
include Util
|
18
|
+
|
19
|
+
# @!attribute [rw] cache
|
20
|
+
# @return [Aspire::Caching::Cache] the Aspire cache
|
21
|
+
attr_accessor :cache
|
22
|
+
|
23
|
+
# Initialises a new Cache instance
|
24
|
+
# @param cache [Aspire::Caching::Cache] the Aspire cache
|
25
|
+
# @return [void]
|
26
|
+
def initialize(cache = nil)
|
27
|
+
self.cache = cache
|
28
|
+
end
|
29
|
+
|
30
|
+
# Builds a cache of Aspire lists from the Aspire All Lists report
|
31
|
+
# @param enumerator [Aspire::Enumerator::ReportEnumerator] the Aspire
|
32
|
+
# All Lists report enumerator
|
33
|
+
# @param clear [Boolean] if true, clear the cache before building
|
34
|
+
# @return [Integer] the number of lists cached
|
35
|
+
def build(enumerator, clear: false)
|
36
|
+
# Empty the cache if required
|
37
|
+
cache.clear if clear
|
38
|
+
# Cache the enumerated lists
|
39
|
+
# - call with reload: false so that existing cache entries are ignored
|
40
|
+
# to speed up processing
|
41
|
+
lists = 0
|
42
|
+
time = Benchmark.measure do
|
43
|
+
enumerator.each do |row|
|
44
|
+
write_list(row['List Link'], reload: false)
|
45
|
+
lists += 1
|
46
|
+
end
|
47
|
+
end
|
48
|
+
# Log completion
|
49
|
+
cache.logger.info("#{lists} lists cached in #{duration(time)}")
|
50
|
+
end
|
51
|
+
|
52
|
+
# Resumes an interrupted build
|
53
|
+
# @param enumerator [Aspire::Enumerator::ReportEnumerator] the Aspire
|
54
|
+
# All Lists report enumerator
|
55
|
+
def resume(enumerator)
|
56
|
+
# Log activity
|
57
|
+
cache.logger.info('Resuming previous build')
|
58
|
+
# Reload any list marked as in-progress
|
59
|
+
reload_marked_lists
|
60
|
+
# Resume the build
|
61
|
+
build(enumerator, clear: false)
|
62
|
+
end
|
63
|
+
|
64
|
+
# Caches an Aspire linked data API object.
|
65
|
+
# Use write(url) to build a cache for the first time.
|
66
|
+
# Use write(url, reload: true) to reload parts of the cache.
|
67
|
+
# @param url [String, Aspire::Caching::CacheEntry] the URL or cache entry
|
68
|
+
# # of the API object
|
69
|
+
# @param data [Hash, nil] the parsed JSON data to be written to the cache;
|
70
|
+
# if omitted, this is read from the API
|
71
|
+
# @param list [Aspire::Caching::CacheEntry] the parent list cache entry;
|
72
|
+
# if present, this implies that references to other lists are ignored
|
73
|
+
# @param reload [Boolean] if true, reload the cache entry from the API,
|
74
|
+
# otherwise do nothing if the entry is already in the cache
|
75
|
+
# @param urls [Hash] the set of URLs handled in the current operation
|
76
|
+
# @return [void]
|
77
|
+
def write(url = nil, data = nil, list: nil, reload: true, urls: {})
|
78
|
+
#
|
79
|
+
# Parsed data from the Linked Data API has the following structure:
|
80
|
+
# { url => {primary-object},
|
81
|
+
# related-url1 => {related-object1}, ... }
|
82
|
+
# where url => {primary-object} is the object referenced by the url
|
83
|
+
# parameter, and the related URLs/objects are objects referenced by
|
84
|
+
# the primary object and included in the API response.
|
85
|
+
#
|
86
|
+
# The primary and related objects are written to the caching before any
|
87
|
+
# object references within the primary and related objects are followed.
|
88
|
+
# This should reduce unnecessary duplication of API calls.
|
89
|
+
#
|
90
|
+
# Some objects with a linked data URL are not accessible through that
|
91
|
+
# API(e.g. users /users/<user-id> are not accessible, but user notes
|
92
|
+
# /users/<user-id>/notes<note-id> are accessible).
|
93
|
+
#
|
94
|
+
# Some objects with a linked data URL are accessible though the API but
|
95
|
+
# do not return JSON-LD (e.g. events /events/<event-id> return regular
|
96
|
+
# JSON rather than JSON-LD). These objects are cached but no attempt is
|
97
|
+
# made to follow LD references within them.
|
98
|
+
#
|
99
|
+
# byebug if url.is_a?(String) && url.include?('34C1190E-F50E-35CB-94C9-F476963D69C0')
|
100
|
+
# byebug if url.is_a?(Aspire::Caching::CacheEntry) && url.url.include?('34C1190E-F50E-35CB-94C9-F476963D69C0')
|
101
|
+
entry = cache_entry(url, list)
|
102
|
+
return unless entry && write?(entry, urls, list, reload)
|
103
|
+
write_data(entry, urls, data, list, reload)
|
104
|
+
rescue NotCacheable
|
105
|
+
# cache.logger.debug("#{url} not cacheable")
|
106
|
+
rescue StandardError => e
|
107
|
+
# Log the error and continue processing
|
108
|
+
Raven.capture_exception(e)
|
109
|
+
# cache.logger.error("#{e}\n#{e.backtrace.join('\n')}")
|
110
|
+
cache.logger.error(e.to_s)
|
111
|
+
rescue Exception => e
|
112
|
+
# Log the error and fail
|
113
|
+
Raven.capture_exception(e)
|
114
|
+
# cache.logger.fatal("#{e}\n#{e.backtrace.join('\n')}")
|
115
|
+
cache.logger.fatal(e.to_s)
|
116
|
+
raise e
|
117
|
+
end
|
118
|
+
|
119
|
+
# Caches an Aspire linked data API list object and ignores any references
|
120
|
+
# to other lists
|
121
|
+
# @param url [String, Aspire::Caching::CacheEntry] the URL or cache entry
|
122
|
+
# of the API list object
|
123
|
+
# @param data [Hash, nil] the parsed JSON data to be written to the cache;
|
124
|
+
# if omitted, this is read from the API
|
125
|
+
# @param reload [Boolean] if true, reload the cache entry from the API,
|
126
|
+
# otherwise do nothing if the entry is already in the cache
|
127
|
+
# @return [void]
|
128
|
+
def write_list(url = nil, data = nil, reload: true)
|
129
|
+
entry = cache_entry(url)
|
130
|
+
raise ArgumentError, 'List expected' unless entry.list?
|
131
|
+
write(entry, data, list: entry, reload: reload)
|
132
|
+
rescue NotCacheable
|
133
|
+
# cache.logger.debug("#{url} not cacheable")
|
134
|
+
end
|
135
|
+
|
136
|
+
private
|
137
|
+
|
138
|
+
# Returns true if a cached URL should be reloaded, false if not
|
139
|
+
# @param entry [Aspire::Caching::CacheEntry] the cache entry
|
140
|
+
# @param reload [Boolean] if true, reload the cache entry from the API,
|
141
|
+
# otherwise do nothing if the entry is already in the cache
|
142
|
+
def already_cached?(entry, reload)
|
143
|
+
# If reloading, skip cached entries only if marked as in-progress
|
144
|
+
# If not reloading, skip all cached entries
|
145
|
+
if entry.marked? && reload
|
146
|
+
cache.logger.debug("#{entry.url} ignored, in progress (reload)")
|
147
|
+
return true
|
148
|
+
end
|
149
|
+
if entry.cached? && !reload
|
150
|
+
cache.logger.debug("#{entry.url} ignored, in cache")
|
151
|
+
return true
|
152
|
+
end
|
153
|
+
# Otherwise the entry is not cached
|
154
|
+
false
|
155
|
+
end
|
156
|
+
|
157
|
+
# Returns true if a URL has already been handled in this transaction
|
158
|
+
# @param entry [Aspire::Caching::CacheEntry] the cache entry
|
159
|
+
# @param urls [Hash] the set of URLs handled in the current operation
|
160
|
+
# @return [Boolean] true if the URL has already been handled, false if not
|
161
|
+
def already_handled?(entry, urls)
|
162
|
+
return false unless urls.include?(entry.url)
|
163
|
+
# cache.logger.debug("#{entry.url} already handled")
|
164
|
+
true
|
165
|
+
end
|
166
|
+
|
167
|
+
# Returns the CacheEntry instance for a URL
|
168
|
+
# @param url [String, Aspire::Caching::CacheEntry] the URL or cache entry
|
169
|
+
# @param default [Aspire::Caching::CacheEntry, nil] the default if URL is
|
170
|
+
# not given
|
171
|
+
# @return [Aspire::Caching::CacheEntry] the cache entry for the URL
|
172
|
+
def cache_entry(url, default = nil)
|
173
|
+
return default if url.nil?
|
174
|
+
return url if url.is_a?(CacheEntry)
|
175
|
+
CacheEntry.new(url, cache)
|
176
|
+
end
|
177
|
+
|
178
|
+
# Reloads a cache entry
|
179
|
+
# @param entry [Aspire::Caching::CacheEntry] the cache entry
|
180
|
+
# @return [void]
|
181
|
+
def reload(entry)
|
182
|
+
cache.logger.log(Logger::INFO, "Reloading #{entry.url}")
|
183
|
+
entry.delete(force: true)
|
184
|
+
if entry.list?(strict: true)
|
185
|
+
write_list(entry, reload: true)
|
186
|
+
else
|
187
|
+
write(entry, reload: true)
|
188
|
+
end
|
189
|
+
end
|
190
|
+
|
191
|
+
# Reloads any entry marked as in-progress
|
192
|
+
# Positional parameters are the object types to include, e.g. 'lists',
|
193
|
+
# 'resources' etc. - default: all object types
|
194
|
+
# @return [void]
|
195
|
+
def reload_marked_entries(*types)
|
196
|
+
cache.marked_entries(*types) { |entry| reload(entry) }
|
197
|
+
end
|
198
|
+
|
199
|
+
# Reloads any list marked as in-progress
|
200
|
+
# @return [void]
|
201
|
+
def reload_marked_lists
|
202
|
+
cache.marked_entries('lists') { |entry| reload(entry) }
|
203
|
+
end
|
204
|
+
|
205
|
+
# Returns true if the cache entry is a list which is unrelated to the
|
206
|
+
# parent list. This prevents unrelated lists being downloaded through
|
207
|
+
# paths such as list.usedBy -> module.usesList -> [unrelated lists]).
|
208
|
+
# Returns false if:
|
209
|
+
# no parent list is provided,
|
210
|
+
# or the cache entry is not a list,
|
211
|
+
# or it is the same as the parent list,
|
212
|
+
# or it is a child of the parent list.
|
213
|
+
# @param entry [Aspire::Caching::CacheEntry] the cache entry
|
214
|
+
# @param parent_list [Aspire::Caching::CacheEntry] the parent list entry
|
215
|
+
# @return [Boolean] true if the cache entry is a list unrelated to the
|
216
|
+
# parent list, otherwise false
|
217
|
+
def unrelated_list?(entry, parent_list)
|
218
|
+
# Ignore if no parent list is given or the entry is not a list/child
|
219
|
+
return false unless parent_list
|
220
|
+
# Ignore if the entry is not a list
|
221
|
+
return false unless entry.list?(strict: false)
|
222
|
+
# Ignore if the entry is a child of (or the same as) the parent list
|
223
|
+
return false if entry.child_of?(parent_list, strict: false)
|
224
|
+
# Otherwise the entry is a list unrelated to the parent list
|
225
|
+
msg = "#{entry.url} ignored, not related to #{parent_list.url}"
|
226
|
+
cache.logger.debug(msg)
|
227
|
+
true
|
228
|
+
end
|
229
|
+
|
230
|
+
# Writes a linked data API object and its references to the caching
|
231
|
+
# @param entry [Aspire::Caching::CacheEntry] the cache entry
|
232
|
+
# @param urls [Hash] the set of URLs handled in the current operation
|
233
|
+
# @param data [Hash, nil] the parsed JSON data to be written to the cache;
|
234
|
+
# if omitted, this is read from the API
|
235
|
+
# @param parent_list [Aspire::Caching::CacheEntry] the parent list entry
|
236
|
+
# @param reload [Boolean] if true, reload the cache entry from the API,
|
237
|
+
# otherwise do nothing if the entry is already in the cache
|
238
|
+
# @return [void]
|
239
|
+
def write_data(entry, urls, data = nil, parent_list = nil, reload = true)
|
240
|
+
# Read the linked data and associated JSON API data into the cache
|
241
|
+
linked_data, json_data = write_object(entry, urls, data, reload)
|
242
|
+
if linked_data && entry.references?
|
243
|
+
# Start processing this URL
|
244
|
+
entry.mark
|
245
|
+
# Write the related linked data objects to the cache
|
246
|
+
write_related(entry, urls, linked_data, parent_list, reload)
|
247
|
+
# Write the referenced API objects to the cache
|
248
|
+
write_references(urls, linked_data, parent_list, reload)
|
249
|
+
# Finish processing this URL
|
250
|
+
entry.unmark
|
251
|
+
end
|
252
|
+
# Return the linked data and JSON API objects
|
253
|
+
[linked_data, json_data]
|
254
|
+
end
|
255
|
+
|
256
|
+
# Caches a linked data API object and any associated JSON API object
|
257
|
+
# @param entry [Aspire::Caching::CacheEntry] the cache entry
|
258
|
+
# @param urls [Hash] the set of URLs handled in the current operation
|
259
|
+
# @param data [Hash, nil] the parsed JSON linked data of the object; if
|
260
|
+
# omitted, the data is read from the API URL
|
261
|
+
# @param reload [Boolean] if true, reload the cache entry from the API,
|
262
|
+
# otherwise do nothing if the entry is already in the cache
|
263
|
+
# @return [Array] the unparsed and parsed linked data of the object
|
264
|
+
def write_object(entry, urls, data = nil, reload = true)
|
265
|
+
# Ignore the cache if reloading
|
266
|
+
use_cache = !reload
|
267
|
+
# Get the linked data object
|
268
|
+
data = write_object_data(entry, data, use_cache)
|
269
|
+
# Get the JSON API object if available
|
270
|
+
json = write_object_json(entry, use_cache)
|
271
|
+
# Flag the URL as handled
|
272
|
+
urls[entry.url] = true
|
273
|
+
# Return the object data
|
274
|
+
[data, json]
|
275
|
+
end
|
276
|
+
|
277
|
+
# Writes a linked data API object to the cache
|
278
|
+
# @param entry [Aspire::Caching::CacheEntry] the cache entry
|
279
|
+
# @param data [Hash] the data to write to the cache
|
280
|
+
# @param use_cache [Boolean] if true, return data from the cache,
|
281
|
+
# otherwise update the cache with data from the API
|
282
|
+
def write_object_data(entry, data, use_cache)
|
283
|
+
if data
|
284
|
+
cache.write(data: data, entry: entry)
|
285
|
+
else
|
286
|
+
cache.read(entry: entry, use_cache: use_cache)
|
287
|
+
end
|
288
|
+
end
|
289
|
+
|
290
|
+
# Writes a JSON API object to the cache
|
291
|
+
# @param entry [Aspire::Caching::CacheEntry] the cache entry
|
292
|
+
# @param use_cache [Boolean] if true, return data from the cache,
|
293
|
+
# otherwise update the cache with data from the API
|
294
|
+
def write_object_json(entry, use_cache)
|
295
|
+
return nil unless entry.json?
|
296
|
+
cache.read(entry: entry, json: true, use_cache: use_cache)
|
297
|
+
end
|
298
|
+
|
299
|
+
# Caches all the objects referenced by the argument object
|
300
|
+
# @param urls [Hash] the set of URLs handled in the current operation
|
301
|
+
# @param data [Hash] the parsed linked data object
|
302
|
+
# @param parent_list [Aspire::Caching::CacheEntry] the parent list entry
|
303
|
+
# @param reload [Boolean] if true, reload the cache entry from the API,
|
304
|
+
# otherwise do nothing if the entry is already in the cache
|
305
|
+
# @return [void]
|
306
|
+
def write_references(urls, data, parent_list = nil, reload = true)
|
307
|
+
data.each do |url, object|
|
308
|
+
# Write each URI to the cache
|
309
|
+
references(url, object).each do |uri|
|
310
|
+
# byebug if uri.is_a?(String) && uri.include?('34C1190E-F50E-35CB-94C9-F476963D69C0')
|
311
|
+
# byebug if uri.is_a?(Aspire::Caching::CacheEntry) && uri.url.include?('34C1190E-F50E-35CB-94C9-F476963D69C0')
|
312
|
+
write(uri, list: parent_list, reload: reload, urls: urls)
|
313
|
+
end
|
314
|
+
end
|
315
|
+
end
|
316
|
+
|
317
|
+
# Caches related linked data API objects included with the primary object
|
318
|
+
# @param entry [Aspire::Caching::CacheEntry] the cache entry
|
319
|
+
# @param urls [Hash] the set of URLs handled in the current operation
|
320
|
+
# @param data [Hash] the parsed linked data API object
|
321
|
+
# @param parent_list [Aspire::Caching::CacheEntry] the parent list entry
|
322
|
+
# @param reload [Boolean] if true, reload the cache entry from the API,
|
323
|
+
# otherwise do nothing if the entry is already in the cache
|
324
|
+
# @return [void]
|
325
|
+
def write_related(entry, urls, data, parent_list = nil, reload = true)
|
326
|
+
# Write all related objects to the cache before caching references
|
327
|
+
data.each do |related_url, related_data|
|
328
|
+
# The main cache entry should already have been written
|
329
|
+
# byebug if related_url.is_a?(String) && related_url.include?('34C1190E-F50E-35CB-94C9-F476963D69C0')
|
330
|
+
# byebug if related_url.is_a?(Aspire::Caching::CacheEntry) && related_url.url.include?('34C1190E-F50E-35CB-94C9-F476963D69C0')
|
331
|
+
next if entry.url == cache.canonical_url(related_url)
|
332
|
+
write(related_url, {related_url => related_data},
|
333
|
+
list: parent_list, reload: reload, urls: urls)
|
334
|
+
end
|
335
|
+
end
|
336
|
+
|
337
|
+
# Returns true if the URL should be written to the cache, false if not
|
338
|
+
# @param entry [Aspire::Caching::CacheEntry] the cache entry
|
339
|
+
# @param urls [Hash] the set of URLs handled in the current operation
|
340
|
+
# @param parent_list [Aspire::Caching::CacheEntry] the parent list entry
|
341
|
+
# @param reload [Boolean] if true, reload the cache entry from the API,
|
342
|
+
# otherwise do nothing if the entry is already in the cache
|
343
|
+
# @return [Boolean] true if the URL should be written to the cache, false
|
344
|
+
# if not
|
345
|
+
def write?(entry, urls, parent_list = nil, reload = true)
|
346
|
+
# Ignore URLs previously handled in the current operation
|
347
|
+
return false if already_handled?(entry, urls)
|
348
|
+
# Ignore cached URLs
|
349
|
+
return false if already_cached?(entry, reload)
|
350
|
+
# Only follow list links for the same parent list
|
351
|
+
return false if unrelated_list?(entry, parent_list)
|
352
|
+
true
|
353
|
+
end
|
354
|
+
end
|
355
|
+
end
|
356
|
+
end
|