aspire 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +59 -0
  3. data/.rbenv-gemsets +1 -0
  4. data/.travis.yml +5 -0
  5. data/CODE_OF_CONDUCT.md +74 -0
  6. data/Dockerfile +20 -0
  7. data/Gemfile +4 -0
  8. data/LICENSE.txt +21 -0
  9. data/README.md +851 -0
  10. data/Rakefile +10 -0
  11. data/aspire.gemspec +40 -0
  12. data/bin/console +14 -0
  13. data/bin/setup +8 -0
  14. data/entrypoint.sh +11 -0
  15. data/exe/build-cache +13 -0
  16. data/lib/aspire.rb +11 -0
  17. data/lib/aspire/api.rb +2 -0
  18. data/lib/aspire/api/base.rb +198 -0
  19. data/lib/aspire/api/json.rb +195 -0
  20. data/lib/aspire/api/linked_data.rb +214 -0
  21. data/lib/aspire/caching.rb +4 -0
  22. data/lib/aspire/caching/builder.rb +356 -0
  23. data/lib/aspire/caching/cache.rb +365 -0
  24. data/lib/aspire/caching/cache_entry.rb +296 -0
  25. data/lib/aspire/caching/cache_logger.rb +63 -0
  26. data/lib/aspire/caching/util.rb +210 -0
  27. data/lib/aspire/cli/cache_builder.rb +123 -0
  28. data/lib/aspire/cli/command.rb +20 -0
  29. data/lib/aspire/enumerator/base.rb +29 -0
  30. data/lib/aspire/enumerator/json_enumerator.rb +130 -0
  31. data/lib/aspire/enumerator/linked_data_uri_enumerator.rb +32 -0
  32. data/lib/aspire/enumerator/report_enumerator.rb +64 -0
  33. data/lib/aspire/exceptions.rb +36 -0
  34. data/lib/aspire/object.rb +7 -0
  35. data/lib/aspire/object/base.rb +155 -0
  36. data/lib/aspire/object/digitisation.rb +43 -0
  37. data/lib/aspire/object/factory.rb +87 -0
  38. data/lib/aspire/object/list.rb +590 -0
  39. data/lib/aspire/object/module.rb +36 -0
  40. data/lib/aspire/object/resource.rb +371 -0
  41. data/lib/aspire/object/time_period.rb +47 -0
  42. data/lib/aspire/object/user.rb +46 -0
  43. data/lib/aspire/properties.rb +20 -0
  44. data/lib/aspire/user_lookup.rb +103 -0
  45. data/lib/aspire/util.rb +185 -0
  46. data/lib/aspire/version.rb +3 -0
  47. data/lib/retry.rb +197 -0
  48. metadata +274 -0
@@ -0,0 +1,130 @@
1
+ module Aspire
2
+ # Enumerator classes for Aspire reading list processing
3
+ module Enumerator
4
+ # Enumerates over the properties of a JSON data structure
5
+ class JSONEnumerator
6
+ # @!attribute [rw] hooks
7
+ # @return [Hash] the callback hooks
8
+ attr_accessor :hooks
9
+
10
+ # The Enumerator::Yielder instance from an Enumerator.new call
11
+ # @!attribute [rw] yielder
12
+ # @return [Enumerator::Yielder] the yielder instance from an Enumerator
13
+ attr_accessor :yielder
14
+
15
+ # Initialises a new JSONEnumerator instance
16
+ # @param yielder [Enumerator::Yielder] the yielder from an Enumerator
17
+ # @param hooks [Hash] a hash of executable callback hooks:
18
+ # {
19
+ # after_array: proc { |key,value,index| }
20
+ # after_hash: proc { |key,value,index| }
21
+ # after_yield: proc { |key,value,index| }
22
+ # before_array: proc { |key,value,index| }
23
+ # before_hash: proc { |key,value,index| }
24
+ # before_yield: proc { |key,value,index| }
25
+ # }
26
+ #
27
+ # Each callback is a Proc accepting a property key (name), value, and
28
+ # optionally the numeric index of the property in its parent array (this
29
+ # is nil if the property is not an array member).
30
+ #
31
+ # Value is an array for after/before_array, a hash for after/before_hash
32
+ # and any type for after/before_yield.
33
+ #
34
+ # All before hooks must return a truthy value to allow processing of
35
+ # the value, or a falsey value to prevent processing of the value.
36
+ #
37
+ # Filters should be implemented in before hooks
38
+ #
39
+ # Before hooks can also be used to process arrays and hashes as a whole.
40
+ # They should return false if property-level processing is not required.
41
+ # @return [void]
42
+ def initialize(yielder = nil, **hooks)
43
+ self.hooks = hooks
44
+ self.yielder = yielder
45
+ end
46
+
47
+ def [](hook, *args, **kwargs)
48
+ h = hooks[hook]
49
+ return true unless h && h.respond_to?(:call)
50
+ h.call(*args, **kwargs) ? true : false
51
+ end
52
+
53
+ def []=(hook, proc)
54
+ unless proc.is_a?(Proc) || proc.is_a?(Method)
55
+ raise ArgumentError, 'Proc or Method expected'
56
+ end
57
+ hooks[hook] = proc
58
+ end
59
+
60
+ # Enumerates an array of JSON data structures
61
+ # @param key [String] the property name
62
+ # @param array [Object] the property value
63
+ # @param index [Integer] the index of the property in its parent array, or
64
+ # nil if not part of an array
65
+ # @return [void]
66
+ def array(key, array, index)
67
+ return unless self[:before_array, key, array, index]
68
+ i = 0
69
+ array.each do |value|
70
+ enumerate(key, value, i)
71
+ i += 1
72
+ end
73
+ self[:after_array, key, array, index]
74
+ end
75
+
76
+ # Enumerates the property/value pairs of a JSON data structure
77
+ # @param key [String] the property name
78
+ # @param value [Object] the property value
79
+ # @param index [Integer] the index of the property in its parent array, or
80
+ # nil if not part of an array
81
+ # @return [void]
82
+ def enumerate(key, value, index = nil)
83
+ if value.is_a?(Array)
84
+ array(key, value, index)
85
+ elsif value.is_a?(Hash)
86
+ hash(key, value, index)
87
+ else
88
+ property(key, value, index)
89
+ end
90
+ end
91
+
92
+ # Returns an enumerator enumerating property/value pairs of JSON data
93
+ # @param key [String] the initial key of the data
94
+ # @param value [Object] the initial value of the data
95
+ # @return [Enumerator] the enumerator
96
+ def enumerator(key, value)
97
+ ::Enumerator.new do |yielder|
98
+ self.yielder = yielder
99
+ enumerate(key, value)
100
+ end
101
+ end
102
+
103
+ # Enumerates the property/value pairs of a JSON hash
104
+ # @param key [String] the property name
105
+ # @param hash [Hash] the hash to enumerate
106
+ # @param index [Integer] the index of the property in its parent array, or
107
+ # nil if not part of an array
108
+ # @return [void]
109
+ def hash(key, hash, index = nil)
110
+ return unless self[:before_hash, key, hash, index]
111
+ hash.each do |k, v|
112
+ v.is_a?(Array) || v.is_a?(Hash) ? enumerate(k, v) : property(k, v)
113
+ end
114
+ self[:after_hash, key, hash, index]
115
+ end
116
+
117
+ # Yields a property/value pair
118
+ # @param key [String] the property name
119
+ # @param value [Object] the property value
120
+ # @param index [Integer] the index of the property in its parent array, or
121
+ # nil if not part of an array
122
+ # @return [void]
123
+ def property(key, value, index = nil)
124
+ return unless self[:before_yield, key, value, index]
125
+ yielder << [key, value, index]
126
+ self[:after_yield, hooks, key, value, index]
127
+ end
128
+ end
129
+ end
130
+ end
@@ -0,0 +1,32 @@
1
+ require 'aspire/enumerator/json_enumerator'
2
+
3
+ module Aspire
4
+ # Enumerator classes for Aspire reading list processing
5
+ module Enumerator
6
+ # Enumerates the URI properties of a linked data API object
7
+ class LinkedDataURIEnumerator < JSONEnumerator
8
+ # Initialises a new LinkedDataAPIEnumerator instance
9
+ # @param yielder [Enumerator::Yielder] the yielder from an Enumerator
10
+ # @param hooks [Hash] the callback hooks
11
+ # @yield [key, hash, index] passes each hash to the block
12
+ # @yieldparam key [Object] the hash property name
13
+ # @yieldparam hash [Hash] the hash
14
+ # @yieldparam index [Integer, nil] the index of the hash in its parent
15
+ # array, or nil if not part of an array
16
+ def initialize(yielder = nil, **hooks)
17
+ super(yielder, **hooks)
18
+ # Yield only hashes { type: "uri", value: "..." }
19
+ self[:before_hash] = proc do |key, hash, index|
20
+ if hash['type'] == 'uri' && hash['value'] && !hash['value'].empty?
21
+ self.yielder << [key, hash, index]
22
+ false
23
+ else
24
+ true
25
+ end
26
+ end
27
+ # Do not yield properties
28
+ self[:before_yield] = proc { |_key, _value, _index| false }
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,64 @@
1
+ require 'csv'
2
+
3
+ require 'aspire/enumerator/base'
4
+
5
+ module Aspire
6
+ # Enumerator classes for Aspire reading list processing
7
+ module Enumerator
8
+ # Enumerates rows from an exported Aspire report CSV (All Lists, All User
9
+ # Profiles etc.) with optional filtering
10
+ class ReportEnumerator < Base
11
+ # @!attribute [rw] file
12
+ # @return [String] the filename of the report
13
+ attr_accessor :file
14
+
15
+ # @!attribute [rw] filters
16
+ # @return [Array<Proc>] a list of filters to select rows for processing
17
+ attr_accessor :filters
18
+
19
+ # Initialises a new ListReport instance
20
+ # @param file [String] the filename of the report
21
+ # @param filters [Array<Proc>] a list of filters to select rows for
22
+ # processing. Each proc accepts a parsed row from the CSV file and
23
+ # returns true to accept it or false to reject it. All filters must
24
+ # return true for the row to be yielded.
25
+ # @return [void]
26
+ def initialize(file = nil, filters = nil)
27
+ self.file = file
28
+ self.filters = filters
29
+ end
30
+
31
+ # Enumerates the report rows
32
+ # @return [void]
33
+ def enumerate(*_args, **_kwargs)
34
+ CSV.foreach(file, converters: date_converter, headers: true, encoding: 'ISO-8859-1') do |row|
35
+ yielder << row if filter(row)
36
+ end
37
+ end
38
+
39
+ private
40
+
41
+ # Returns a YYYY-MM-DD date converter for the CSV processor
42
+ # @return [Proc] the date converter
43
+ def date_converter
44
+ lambda do |s|
45
+ begin
46
+ Date.strptime(s, '%Y-%m-%d')
47
+ rescue ArgumentError
48
+ s
49
+ end
50
+ end
51
+ end
52
+
53
+ # Returns true if the row passes all filters, false otherwise
54
+ def filter(row)
55
+ # Return true if no filters are defined
56
+ return true if filters.nil? || filters.empty?
57
+ # Return false if any of the filters returns false
58
+ filters.each { |f| return false unless f.call(row) }
59
+ # All filters passed, return true
60
+ true
61
+ end
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,36 @@
1
+ module Aspire
2
+ module Exceptions
3
+ # The root of the caching exception hierarchy
4
+ class Error < StandardError; end
5
+
6
+ # Raised when a requested URL is not present in the cache
7
+ class CacheMiss < Error; end
8
+
9
+ # Raised when an Aspire API call fails
10
+ class APIError < Error; end
11
+
12
+ # Raised when an Aspire API call times out
13
+ class APITimeout < APIError; end
14
+
15
+ # Raised when a cache entry mark operation fails
16
+ class MarkError < Error; end
17
+
18
+ # Raised when trying to mark an already-marked cache entry
19
+ class MarkedError < Error; end
20
+
21
+ # Raised when a URL is not cacheable
22
+ class NotCacheable < Error; end
23
+
24
+ # Raised when data cannot be read from the cache
25
+ class ReadError < Error; end
26
+
27
+ # Raised when data cannot be removed from the cache
28
+ class RemoveError < Error; end
29
+
30
+ # Raised when a cache entry unmark operation fails
31
+ class UnmarkError < Error; end
32
+
33
+ # Raised when data cannot be written to the cache
34
+ class WriteError < Error; end
35
+ end
36
+ end
@@ -0,0 +1,7 @@
1
+ require 'aspire/object/digitisation'
2
+ require 'aspire/object/factory'
3
+ require 'aspire/object/list'
4
+ require 'aspire/object/module'
5
+ require 'aspire/object/resource'
6
+ require 'aspire/object/time_period'
7
+ require 'aspire/object/user'
@@ -0,0 +1,155 @@
1
+ require 'cgi'
2
+
3
+ require 'loofah'
4
+
5
+ require 'aspire/util'
6
+
7
+ module Aspire
8
+ module Object
9
+ # The base class for Aspire API objects
10
+ class Base
11
+ include Aspire::Util
12
+
13
+ # Aspire properties containing HTML markup will have the markup stripped
14
+ # if STRIP_HTML = true"#{without format suffix (.html, .json etc.)}"
15
+ STRIP_HTML = true
16
+
17
+ # @!attribute [rw] factory
18
+ # @return [Aspire::Object::Factory] the factory for creating
19
+ # Aspire::Object instances
20
+ attr_accessor :factory
21
+
22
+ # @!attribute [rw] uri
23
+ # @return [String] the URI of the object
24
+ attr_accessor :uri
25
+
26
+ # Initialises a new Aspire::Object instance
27
+ # @param uri [String] the URI of the object
28
+ # @param factory [Aspire::Object::Factory] the factory for creating
29
+ # Aspire::Object instances
30
+ # @return [void]
31
+ def initialize(uri, factory)
32
+ self.factory = factory
33
+ # Normalise the URL to the linked data form
34
+ self.uri = factory ? factory.cache.linked_data_url(uri) : uri
35
+ end
36
+
37
+ # Returns a Boolean property value
38
+ # @param property [String] the property name
39
+ # @param data [Hash] the data hash containing the property
40
+ # (defaults to self.ld)
41
+ # @param single [Boolean] if true, return a single value, otherwise return
42
+ # an array of values
43
+ # @return [Boolean, Array<Boolean>] the property value(s)
44
+ def get_boolean(property, data, single: true)
45
+ get_property(property, data, single: single) do |value, _type|
46
+ value ? true : false
47
+ end
48
+ end
49
+
50
+ # Returns a DateTime instance for a timestamp property
51
+ # @param property [String] the property name
52
+ # @param data [Hash] the data hash containing the property (defaults to
53
+ # self.ld)
54
+ # @param single [Boolean] if true, return a single value, otherwise return
55
+ # an array of values
56
+ # @return [DateTime, Array<DateTime>] the property value(s)
57
+ def get_date(property, data, single: true)
58
+ get_property(property, data, single: single) do |value, _type|
59
+ DateTime.parse(value)
60
+ end
61
+ end
62
+
63
+ # Returns the value of a property
64
+ # @param property [String] the property name
65
+ # @param data [Hash] the data hash containing the property
66
+ # (defaults to self.data)
67
+ # @param is_url [Boolean] if true, the property value is a URL
68
+ # @param single [Boolean] if true, return a single value, otherwise return
69
+ # an array of values
70
+ # @return [Object, Array<Object>] the property value(s)
71
+ # @yield [value, type] passes the value and type to the block
72
+ # @yieldparam value [Object] the property value
73
+ # @yieldparam type [String] the type of the property value
74
+ # @yieldreturn [Object] the transformed property value
75
+ def get_property(property, data, is_url: false, single: true, &block)
76
+ values = data ? data[property] : nil
77
+ if values.is_a?(Array)
78
+ values = values.map do |value|
79
+ get_property_value(value, is_url: is_url, &block)
80
+ end
81
+ single ? values[0] : values
82
+ else
83
+ value = get_property_value(values, is_url: is_url, &block)
84
+ single ? value : [value]
85
+ end
86
+ end
87
+
88
+ # Returns a string representation of the APIObject instance (the URI)
89
+ # @return [String] the string representation of the APIObject instance
90
+ def to_s
91
+ uri.to_s
92
+ end
93
+
94
+ # Sets the URI of the object
95
+ # @param u [String] the URI of the object
96
+ # @return [void]
97
+ def uri=(u)
98
+ # Remove any format extension (.json, .rdf etc.)
99
+ ext = File.extname(u)
100
+ @uri = ext.nil? || ext.empty? ? u : u.rpartition(ext)[0]
101
+ end
102
+
103
+ protected
104
+
105
+ # Retrieves and transforms the property value
106
+ # @param value [String] the property value from the Aspire API
107
+ # @param is_url [Boolean] if true, the property value is a URL
108
+ # @yield [value, type] Passes the property value and type URI to the block
109
+ # @yieldparam value [Object] the property value
110
+ # @yieldparam type [String] the property value's type URI
111
+ # @yieldreturn [Object] the transformed property value
112
+ # @return [String] the property value
113
+ def get_property_value(value, is_url: false)
114
+ # Assume hash values are a type/value pair
115
+ if value.is_a?(Hash)
116
+ type = value['type']
117
+ value = value['value']
118
+ else
119
+ type = nil
120
+ end
121
+ # Apply transformations to string properties
122
+ value = transform(value, is_url: is_url) if value.is_a?(String)
123
+ # If a block is present, return the result of the block
124
+ return yield(value, type) if block_given?
125
+ # Otherwise return the value
126
+ value
127
+ end
128
+
129
+ # Removes HTML markup from property values
130
+ # @param value [String] the property value from the Aspire API
131
+ # @param is_url [Boolean] if true, the property value is a URL
132
+ # @return [String] the property value
133
+ def transform(value, is_url: false)
134
+ if is_url
135
+ # Remove HTML-escaped encodings from URLs without full HTML-stripping
136
+ CGI.unescape_html(value)
137
+ elsif STRIP_HTML
138
+ # Strip HTML preserving block-level whitespace
139
+ # - Loofah seems to preserve &amp; &quot; etc. so we remove these with
140
+ # CGI.unescape_html
141
+ text = CGI.unescape_html(Loofah.fragment(value).to_text)
142
+ # Collapse all runs of whitespace to a single space
143
+ text.gsub!(/\s+/, ' ')
144
+ # Remove leading and trailing whitespace
145
+ text.strip!
146
+ # Return the transformed text
147
+ text
148
+ else
149
+ # Return value as-is
150
+ value
151
+ end
152
+ end
153
+ end
154
+ end
155
+ end
@@ -0,0 +1,43 @@
1
+ require 'aspire/object/base'
2
+
3
+ module Aspire
4
+ module Object
5
+ # Represents a digitisation record in the Aspire API
6
+ class Digitisation < Base
7
+ # @!attribute [rw] bundle_id
8
+ # @return [String] the digitisation bundle ID
9
+ attr_accessor :bundle_id
10
+
11
+ # @!attribute [rw] request_id
12
+ # @return [String] the digitisation request ID
13
+ attr_accessor :request_id
14
+
15
+ # @!attribute [rw] request_status
16
+ # @return [String] the digitisation request status
17
+ attr_accessor :request_status
18
+
19
+ # Initialises a new Digitisation instance
20
+ # @param json [Hash] the parsed JSON data from the JSON API
21
+ # @param ld [Hash] the parsed JSON data from the linked data API
22
+ # @return [void]
23
+ def initialize(json: nil, ld: nil)
24
+ if json
25
+ self.bundle_id = json['bundleId']
26
+ self.request_id = json['requestId']
27
+ self.request_status = json['requestStatus']
28
+ else
29
+ self.bundle_id = nil
30
+ self.request_id = nil
31
+ self.request_status = nil
32
+ end
33
+ end
34
+
35
+ # Returns a string representation of the Digitisation instance (the
36
+ # request ID)
37
+ # @return [String] the string representation of the Digitisation instance
38
+ def to_s
39
+ request_id.to_s
40
+ end
41
+ end
42
+ end
43
+ end