aspire 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +59 -0
- data/.rbenv-gemsets +1 -0
- data/.travis.yml +5 -0
- data/CODE_OF_CONDUCT.md +74 -0
- data/Dockerfile +20 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +851 -0
- data/Rakefile +10 -0
- data/aspire.gemspec +40 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/entrypoint.sh +11 -0
- data/exe/build-cache +13 -0
- data/lib/aspire.rb +11 -0
- data/lib/aspire/api.rb +2 -0
- data/lib/aspire/api/base.rb +198 -0
- data/lib/aspire/api/json.rb +195 -0
- data/lib/aspire/api/linked_data.rb +214 -0
- data/lib/aspire/caching.rb +4 -0
- data/lib/aspire/caching/builder.rb +356 -0
- data/lib/aspire/caching/cache.rb +365 -0
- data/lib/aspire/caching/cache_entry.rb +296 -0
- data/lib/aspire/caching/cache_logger.rb +63 -0
- data/lib/aspire/caching/util.rb +210 -0
- data/lib/aspire/cli/cache_builder.rb +123 -0
- data/lib/aspire/cli/command.rb +20 -0
- data/lib/aspire/enumerator/base.rb +29 -0
- data/lib/aspire/enumerator/json_enumerator.rb +130 -0
- data/lib/aspire/enumerator/linked_data_uri_enumerator.rb +32 -0
- data/lib/aspire/enumerator/report_enumerator.rb +64 -0
- data/lib/aspire/exceptions.rb +36 -0
- data/lib/aspire/object.rb +7 -0
- data/lib/aspire/object/base.rb +155 -0
- data/lib/aspire/object/digitisation.rb +43 -0
- data/lib/aspire/object/factory.rb +87 -0
- data/lib/aspire/object/list.rb +590 -0
- data/lib/aspire/object/module.rb +36 -0
- data/lib/aspire/object/resource.rb +371 -0
- data/lib/aspire/object/time_period.rb +47 -0
- data/lib/aspire/object/user.rb +46 -0
- data/lib/aspire/properties.rb +20 -0
- data/lib/aspire/user_lookup.rb +103 -0
- data/lib/aspire/util.rb +185 -0
- data/lib/aspire/version.rb +3 -0
- data/lib/retry.rb +197 -0
- metadata +274 -0
@@ -0,0 +1,130 @@
|
|
1
|
+
module Aspire
|
2
|
+
# Enumerator classes for Aspire reading list processing
|
3
|
+
module Enumerator
|
4
|
+
# Enumerates over the properties of a JSON data structure
|
5
|
+
class JSONEnumerator
|
6
|
+
# @!attribute [rw] hooks
|
7
|
+
# @return [Hash] the callback hooks
|
8
|
+
attr_accessor :hooks
|
9
|
+
|
10
|
+
# The Enumerator::Yielder instance from an Enumerator.new call
|
11
|
+
# @!attribute [rw] yielder
|
12
|
+
# @return [Enumerator::Yielder] the yielder instance from an Enumerator
|
13
|
+
attr_accessor :yielder
|
14
|
+
|
15
|
+
# Initialises a new JSONEnumerator instance
|
16
|
+
# @param yielder [Enumerator::Yielder] the yielder from an Enumerator
|
17
|
+
# @param hooks [Hash] a hash of executable callback hooks:
|
18
|
+
# {
|
19
|
+
# after_array: proc { |key,value,index| }
|
20
|
+
# after_hash: proc { |key,value,index| }
|
21
|
+
# after_yield: proc { |key,value,index| }
|
22
|
+
# before_array: proc { |key,value,index| }
|
23
|
+
# before_hash: proc { |key,value,index| }
|
24
|
+
# before_yield: proc { |key,value,index| }
|
25
|
+
# }
|
26
|
+
#
|
27
|
+
# Each callback is a Proc accepting a property key (name), value, and
|
28
|
+
# optionally the numeric index of the property in its parent array (this
|
29
|
+
# is nil if the property is not an array member).
|
30
|
+
#
|
31
|
+
# Value is an array for after/before_array, a hash for after/before_hash
|
32
|
+
# and any type for after/before_yield.
|
33
|
+
#
|
34
|
+
# All before hooks must return a truthy value to allow processing of
|
35
|
+
# the value, or a falsey value to prevent processing of the value.
|
36
|
+
#
|
37
|
+
# Filters should be implemented in before hooks
|
38
|
+
#
|
39
|
+
# Before hooks can also be used to process arrays and hashes as a whole.
|
40
|
+
# They should return false if property-level processing is not required.
|
41
|
+
# @return [void]
|
42
|
+
def initialize(yielder = nil, **hooks)
|
43
|
+
self.hooks = hooks
|
44
|
+
self.yielder = yielder
|
45
|
+
end
|
46
|
+
|
47
|
+
def [](hook, *args, **kwargs)
|
48
|
+
h = hooks[hook]
|
49
|
+
return true unless h && h.respond_to?(:call)
|
50
|
+
h.call(*args, **kwargs) ? true : false
|
51
|
+
end
|
52
|
+
|
53
|
+
def []=(hook, proc)
|
54
|
+
unless proc.is_a?(Proc) || proc.is_a?(Method)
|
55
|
+
raise ArgumentError, 'Proc or Method expected'
|
56
|
+
end
|
57
|
+
hooks[hook] = proc
|
58
|
+
end
|
59
|
+
|
60
|
+
# Enumerates an array of JSON data structures
|
61
|
+
# @param key [String] the property name
|
62
|
+
# @param array [Object] the property value
|
63
|
+
# @param index [Integer] the index of the property in its parent array, or
|
64
|
+
# nil if not part of an array
|
65
|
+
# @return [void]
|
66
|
+
def array(key, array, index)
|
67
|
+
return unless self[:before_array, key, array, index]
|
68
|
+
i = 0
|
69
|
+
array.each do |value|
|
70
|
+
enumerate(key, value, i)
|
71
|
+
i += 1
|
72
|
+
end
|
73
|
+
self[:after_array, key, array, index]
|
74
|
+
end
|
75
|
+
|
76
|
+
# Enumerates the property/value pairs of a JSON data structure
|
77
|
+
# @param key [String] the property name
|
78
|
+
# @param value [Object] the property value
|
79
|
+
# @param index [Integer] the index of the property in its parent array, or
|
80
|
+
# nil if not part of an array
|
81
|
+
# @return [void]
|
82
|
+
def enumerate(key, value, index = nil)
|
83
|
+
if value.is_a?(Array)
|
84
|
+
array(key, value, index)
|
85
|
+
elsif value.is_a?(Hash)
|
86
|
+
hash(key, value, index)
|
87
|
+
else
|
88
|
+
property(key, value, index)
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
# Returns an enumerator enumerating property/value pairs of JSON data
|
93
|
+
# @param key [String] the initial key of the data
|
94
|
+
# @param value [Object] the initial value of the data
|
95
|
+
# @return [Enumerator] the enumerator
|
96
|
+
def enumerator(key, value)
|
97
|
+
::Enumerator.new do |yielder|
|
98
|
+
self.yielder = yielder
|
99
|
+
enumerate(key, value)
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
# Enumerates the property/value pairs of a JSON hash
|
104
|
+
# @param key [String] the property name
|
105
|
+
# @param hash [Hash] the hash to enumerate
|
106
|
+
# @param index [Integer] the index of the property in its parent array, or
|
107
|
+
# nil if not part of an array
|
108
|
+
# @return [void]
|
109
|
+
def hash(key, hash, index = nil)
|
110
|
+
return unless self[:before_hash, key, hash, index]
|
111
|
+
hash.each do |k, v|
|
112
|
+
v.is_a?(Array) || v.is_a?(Hash) ? enumerate(k, v) : property(k, v)
|
113
|
+
end
|
114
|
+
self[:after_hash, key, hash, index]
|
115
|
+
end
|
116
|
+
|
117
|
+
# Yields a property/value pair
|
118
|
+
# @param key [String] the property name
|
119
|
+
# @param value [Object] the property value
|
120
|
+
# @param index [Integer] the index of the property in its parent array, or
|
121
|
+
# nil if not part of an array
|
122
|
+
# @return [void]
|
123
|
+
def property(key, value, index = nil)
|
124
|
+
return unless self[:before_yield, key, value, index]
|
125
|
+
yielder << [key, value, index]
|
126
|
+
self[:after_yield, hooks, key, value, index]
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'aspire/enumerator/json_enumerator'
|
2
|
+
|
3
|
+
module Aspire
|
4
|
+
# Enumerator classes for Aspire reading list processing
|
5
|
+
module Enumerator
|
6
|
+
# Enumerates the URI properties of a linked data API object
|
7
|
+
class LinkedDataURIEnumerator < JSONEnumerator
|
8
|
+
# Initialises a new LinkedDataAPIEnumerator instance
|
9
|
+
# @param yielder [Enumerator::Yielder] the yielder from an Enumerator
|
10
|
+
# @param hooks [Hash] the callback hooks
|
11
|
+
# @yield [key, hash, index] passes each hash to the block
|
12
|
+
# @yieldparam key [Object] the hash property name
|
13
|
+
# @yieldparam hash [Hash] the hash
|
14
|
+
# @yieldparam index [Integer, nil] the index of the hash in its parent
|
15
|
+
# array, or nil if not part of an array
|
16
|
+
def initialize(yielder = nil, **hooks)
|
17
|
+
super(yielder, **hooks)
|
18
|
+
# Yield only hashes { type: "uri", value: "..." }
|
19
|
+
self[:before_hash] = proc do |key, hash, index|
|
20
|
+
if hash['type'] == 'uri' && hash['value'] && !hash['value'].empty?
|
21
|
+
self.yielder << [key, hash, index]
|
22
|
+
false
|
23
|
+
else
|
24
|
+
true
|
25
|
+
end
|
26
|
+
end
|
27
|
+
# Do not yield properties
|
28
|
+
self[:before_yield] = proc { |_key, _value, _index| false }
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
require 'csv'
|
2
|
+
|
3
|
+
require 'aspire/enumerator/base'
|
4
|
+
|
5
|
+
module Aspire
|
6
|
+
# Enumerator classes for Aspire reading list processing
|
7
|
+
module Enumerator
|
8
|
+
# Enumerates rows from an exported Aspire report CSV (All Lists, All User
|
9
|
+
# Profiles etc.) with optional filtering
|
10
|
+
class ReportEnumerator < Base
|
11
|
+
# @!attribute [rw] file
|
12
|
+
# @return [String] the filename of the report
|
13
|
+
attr_accessor :file
|
14
|
+
|
15
|
+
# @!attribute [rw] filters
|
16
|
+
# @return [Array<Proc>] a list of filters to select rows for processing
|
17
|
+
attr_accessor :filters
|
18
|
+
|
19
|
+
# Initialises a new ListReport instance
|
20
|
+
# @param file [String] the filename of the report
|
21
|
+
# @param filters [Array<Proc>] a list of filters to select rows for
|
22
|
+
# processing. Each proc accepts a parsed row from the CSV file and
|
23
|
+
# returns true to accept it or false to reject it. All filters must
|
24
|
+
# return true for the row to be yielded.
|
25
|
+
# @return [void]
|
26
|
+
def initialize(file = nil, filters = nil)
|
27
|
+
self.file = file
|
28
|
+
self.filters = filters
|
29
|
+
end
|
30
|
+
|
31
|
+
# Enumerates the report rows
|
32
|
+
# @return [void]
|
33
|
+
def enumerate(*_args, **_kwargs)
|
34
|
+
CSV.foreach(file, converters: date_converter, headers: true, encoding: 'ISO-8859-1') do |row|
|
35
|
+
yielder << row if filter(row)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
private
|
40
|
+
|
41
|
+
# Returns a YYYY-MM-DD date converter for the CSV processor
|
42
|
+
# @return [Proc] the date converter
|
43
|
+
def date_converter
|
44
|
+
lambda do |s|
|
45
|
+
begin
|
46
|
+
Date.strptime(s, '%Y-%m-%d')
|
47
|
+
rescue ArgumentError
|
48
|
+
s
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
# Returns true if the row passes all filters, false otherwise
|
54
|
+
def filter(row)
|
55
|
+
# Return true if no filters are defined
|
56
|
+
return true if filters.nil? || filters.empty?
|
57
|
+
# Return false if any of the filters returns false
|
58
|
+
filters.each { |f| return false unless f.call(row) }
|
59
|
+
# All filters passed, return true
|
60
|
+
true
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module Aspire
|
2
|
+
module Exceptions
|
3
|
+
# The root of the caching exception hierarchy
|
4
|
+
class Error < StandardError; end
|
5
|
+
|
6
|
+
# Raised when a requested URL is not present in the cache
|
7
|
+
class CacheMiss < Error; end
|
8
|
+
|
9
|
+
# Raised when an Aspire API call fails
|
10
|
+
class APIError < Error; end
|
11
|
+
|
12
|
+
# Raised when an Aspire API call times out
|
13
|
+
class APITimeout < APIError; end
|
14
|
+
|
15
|
+
# Raised when a cache entry mark operation fails
|
16
|
+
class MarkError < Error; end
|
17
|
+
|
18
|
+
# Raised when trying to mark an already-marked cache entry
|
19
|
+
class MarkedError < Error; end
|
20
|
+
|
21
|
+
# Raised when a URL is not cacheable
|
22
|
+
class NotCacheable < Error; end
|
23
|
+
|
24
|
+
# Raised when data cannot be read from the cache
|
25
|
+
class ReadError < Error; end
|
26
|
+
|
27
|
+
# Raised when data cannot be removed from the cache
|
28
|
+
class RemoveError < Error; end
|
29
|
+
|
30
|
+
# Raised when a cache entry unmark operation fails
|
31
|
+
class UnmarkError < Error; end
|
32
|
+
|
33
|
+
# Raised when data cannot be written to the cache
|
34
|
+
class WriteError < Error; end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,155 @@
|
|
1
|
+
require 'cgi'
|
2
|
+
|
3
|
+
require 'loofah'
|
4
|
+
|
5
|
+
require 'aspire/util'
|
6
|
+
|
7
|
+
module Aspire
|
8
|
+
module Object
|
9
|
+
# The base class for Aspire API objects
|
10
|
+
class Base
|
11
|
+
include Aspire::Util
|
12
|
+
|
13
|
+
# Aspire properties containing HTML markup will have the markup stripped
|
14
|
+
# if STRIP_HTML = true"#{without format suffix (.html, .json etc.)}"
|
15
|
+
STRIP_HTML = true
|
16
|
+
|
17
|
+
# @!attribute [rw] factory
|
18
|
+
# @return [Aspire::Object::Factory] the factory for creating
|
19
|
+
# Aspire::Object instances
|
20
|
+
attr_accessor :factory
|
21
|
+
|
22
|
+
# @!attribute [rw] uri
|
23
|
+
# @return [String] the URI of the object
|
24
|
+
attr_accessor :uri
|
25
|
+
|
26
|
+
# Initialises a new Aspire::Object instance
|
27
|
+
# @param uri [String] the URI of the object
|
28
|
+
# @param factory [Aspire::Object::Factory] the factory for creating
|
29
|
+
# Aspire::Object instances
|
30
|
+
# @return [void]
|
31
|
+
def initialize(uri, factory)
|
32
|
+
self.factory = factory
|
33
|
+
# Normalise the URL to the linked data form
|
34
|
+
self.uri = factory ? factory.cache.linked_data_url(uri) : uri
|
35
|
+
end
|
36
|
+
|
37
|
+
# Returns a Boolean property value
|
38
|
+
# @param property [String] the property name
|
39
|
+
# @param data [Hash] the data hash containing the property
|
40
|
+
# (defaults to self.ld)
|
41
|
+
# @param single [Boolean] if true, return a single value, otherwise return
|
42
|
+
# an array of values
|
43
|
+
# @return [Boolean, Array<Boolean>] the property value(s)
|
44
|
+
def get_boolean(property, data, single: true)
|
45
|
+
get_property(property, data, single: single) do |value, _type|
|
46
|
+
value ? true : false
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
# Returns a DateTime instance for a timestamp property
|
51
|
+
# @param property [String] the property name
|
52
|
+
# @param data [Hash] the data hash containing the property (defaults to
|
53
|
+
# self.ld)
|
54
|
+
# @param single [Boolean] if true, return a single value, otherwise return
|
55
|
+
# an array of values
|
56
|
+
# @return [DateTime, Array<DateTime>] the property value(s)
|
57
|
+
def get_date(property, data, single: true)
|
58
|
+
get_property(property, data, single: single) do |value, _type|
|
59
|
+
DateTime.parse(value)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
# Returns the value of a property
|
64
|
+
# @param property [String] the property name
|
65
|
+
# @param data [Hash] the data hash containing the property
|
66
|
+
# (defaults to self.data)
|
67
|
+
# @param is_url [Boolean] if true, the property value is a URL
|
68
|
+
# @param single [Boolean] if true, return a single value, otherwise return
|
69
|
+
# an array of values
|
70
|
+
# @return [Object, Array<Object>] the property value(s)
|
71
|
+
# @yield [value, type] passes the value and type to the block
|
72
|
+
# @yieldparam value [Object] the property value
|
73
|
+
# @yieldparam type [String] the type of the property value
|
74
|
+
# @yieldreturn [Object] the transformed property value
|
75
|
+
def get_property(property, data, is_url: false, single: true, &block)
|
76
|
+
values = data ? data[property] : nil
|
77
|
+
if values.is_a?(Array)
|
78
|
+
values = values.map do |value|
|
79
|
+
get_property_value(value, is_url: is_url, &block)
|
80
|
+
end
|
81
|
+
single ? values[0] : values
|
82
|
+
else
|
83
|
+
value = get_property_value(values, is_url: is_url, &block)
|
84
|
+
single ? value : [value]
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
# Returns a string representation of the APIObject instance (the URI)
|
89
|
+
# @return [String] the string representation of the APIObject instance
|
90
|
+
def to_s
|
91
|
+
uri.to_s
|
92
|
+
end
|
93
|
+
|
94
|
+
# Sets the URI of the object
|
95
|
+
# @param u [String] the URI of the object
|
96
|
+
# @return [void]
|
97
|
+
def uri=(u)
|
98
|
+
# Remove any format extension (.json, .rdf etc.)
|
99
|
+
ext = File.extname(u)
|
100
|
+
@uri = ext.nil? || ext.empty? ? u : u.rpartition(ext)[0]
|
101
|
+
end
|
102
|
+
|
103
|
+
protected
|
104
|
+
|
105
|
+
# Retrieves and transforms the property value
|
106
|
+
# @param value [String] the property value from the Aspire API
|
107
|
+
# @param is_url [Boolean] if true, the property value is a URL
|
108
|
+
# @yield [value, type] Passes the property value and type URI to the block
|
109
|
+
# @yieldparam value [Object] the property value
|
110
|
+
# @yieldparam type [String] the property value's type URI
|
111
|
+
# @yieldreturn [Object] the transformed property value
|
112
|
+
# @return [String] the property value
|
113
|
+
def get_property_value(value, is_url: false)
|
114
|
+
# Assume hash values are a type/value pair
|
115
|
+
if value.is_a?(Hash)
|
116
|
+
type = value['type']
|
117
|
+
value = value['value']
|
118
|
+
else
|
119
|
+
type = nil
|
120
|
+
end
|
121
|
+
# Apply transformations to string properties
|
122
|
+
value = transform(value, is_url: is_url) if value.is_a?(String)
|
123
|
+
# If a block is present, return the result of the block
|
124
|
+
return yield(value, type) if block_given?
|
125
|
+
# Otherwise return the value
|
126
|
+
value
|
127
|
+
end
|
128
|
+
|
129
|
+
# Removes HTML markup from property values
|
130
|
+
# @param value [String] the property value from the Aspire API
|
131
|
+
# @param is_url [Boolean] if true, the property value is a URL
|
132
|
+
# @return [String] the property value
|
133
|
+
def transform(value, is_url: false)
|
134
|
+
if is_url
|
135
|
+
# Remove HTML-escaped encodings from URLs without full HTML-stripping
|
136
|
+
CGI.unescape_html(value)
|
137
|
+
elsif STRIP_HTML
|
138
|
+
# Strip HTML preserving block-level whitespace
|
139
|
+
# - Loofah seems to preserve & " etc. so we remove these with
|
140
|
+
# CGI.unescape_html
|
141
|
+
text = CGI.unescape_html(Loofah.fragment(value).to_text)
|
142
|
+
# Collapse all runs of whitespace to a single space
|
143
|
+
text.gsub!(/\s+/, ' ')
|
144
|
+
# Remove leading and trailing whitespace
|
145
|
+
text.strip!
|
146
|
+
# Return the transformed text
|
147
|
+
text
|
148
|
+
else
|
149
|
+
# Return value as-is
|
150
|
+
value
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
require 'aspire/object/base'
|
2
|
+
|
3
|
+
module Aspire
|
4
|
+
module Object
|
5
|
+
# Represents a digitisation record in the Aspire API
|
6
|
+
class Digitisation < Base
|
7
|
+
# @!attribute [rw] bundle_id
|
8
|
+
# @return [String] the digitisation bundle ID
|
9
|
+
attr_accessor :bundle_id
|
10
|
+
|
11
|
+
# @!attribute [rw] request_id
|
12
|
+
# @return [String] the digitisation request ID
|
13
|
+
attr_accessor :request_id
|
14
|
+
|
15
|
+
# @!attribute [rw] request_status
|
16
|
+
# @return [String] the digitisation request status
|
17
|
+
attr_accessor :request_status
|
18
|
+
|
19
|
+
# Initialises a new Digitisation instance
|
20
|
+
# @param json [Hash] the parsed JSON data from the JSON API
|
21
|
+
# @param ld [Hash] the parsed JSON data from the linked data API
|
22
|
+
# @return [void]
|
23
|
+
def initialize(json: nil, ld: nil)
|
24
|
+
if json
|
25
|
+
self.bundle_id = json['bundleId']
|
26
|
+
self.request_id = json['requestId']
|
27
|
+
self.request_status = json['requestStatus']
|
28
|
+
else
|
29
|
+
self.bundle_id = nil
|
30
|
+
self.request_id = nil
|
31
|
+
self.request_status = nil
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
# Returns a string representation of the Digitisation instance (the
|
36
|
+
# request ID)
|
37
|
+
# @return [String] the string representation of the Digitisation instance
|
38
|
+
def to_s
|
39
|
+
request_id.to_s
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|