aspire 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +59 -0
  3. data/.rbenv-gemsets +1 -0
  4. data/.travis.yml +5 -0
  5. data/CODE_OF_CONDUCT.md +74 -0
  6. data/Dockerfile +20 -0
  7. data/Gemfile +4 -0
  8. data/LICENSE.txt +21 -0
  9. data/README.md +851 -0
  10. data/Rakefile +10 -0
  11. data/aspire.gemspec +40 -0
  12. data/bin/console +14 -0
  13. data/bin/setup +8 -0
  14. data/entrypoint.sh +11 -0
  15. data/exe/build-cache +13 -0
  16. data/lib/aspire.rb +11 -0
  17. data/lib/aspire/api.rb +2 -0
  18. data/lib/aspire/api/base.rb +198 -0
  19. data/lib/aspire/api/json.rb +195 -0
  20. data/lib/aspire/api/linked_data.rb +214 -0
  21. data/lib/aspire/caching.rb +4 -0
  22. data/lib/aspire/caching/builder.rb +356 -0
  23. data/lib/aspire/caching/cache.rb +365 -0
  24. data/lib/aspire/caching/cache_entry.rb +296 -0
  25. data/lib/aspire/caching/cache_logger.rb +63 -0
  26. data/lib/aspire/caching/util.rb +210 -0
  27. data/lib/aspire/cli/cache_builder.rb +123 -0
  28. data/lib/aspire/cli/command.rb +20 -0
  29. data/lib/aspire/enumerator/base.rb +29 -0
  30. data/lib/aspire/enumerator/json_enumerator.rb +130 -0
  31. data/lib/aspire/enumerator/linked_data_uri_enumerator.rb +32 -0
  32. data/lib/aspire/enumerator/report_enumerator.rb +64 -0
  33. data/lib/aspire/exceptions.rb +36 -0
  34. data/lib/aspire/object.rb +7 -0
  35. data/lib/aspire/object/base.rb +155 -0
  36. data/lib/aspire/object/digitisation.rb +43 -0
  37. data/lib/aspire/object/factory.rb +87 -0
  38. data/lib/aspire/object/list.rb +590 -0
  39. data/lib/aspire/object/module.rb +36 -0
  40. data/lib/aspire/object/resource.rb +371 -0
  41. data/lib/aspire/object/time_period.rb +47 -0
  42. data/lib/aspire/object/user.rb +46 -0
  43. data/lib/aspire/properties.rb +20 -0
  44. data/lib/aspire/user_lookup.rb +103 -0
  45. data/lib/aspire/util.rb +185 -0
  46. data/lib/aspire/version.rb +3 -0
  47. data/lib/retry.rb +197 -0
  48. metadata +274 -0
@@ -0,0 +1,63 @@
1
+ require 'logger'
2
+
3
+ require 'aspire/exceptions'
4
+
5
+ module Aspire
6
+ # Tools for building a caching from the Aspire APIs
7
+ module Caching
8
+ # A wrapper class for Logger adding utility methods
9
+ class CacheLogger
10
+ # @!attribute [rw] logger
11
+ # @return [Logger] the logger
12
+ attr_accessor :logger
13
+
14
+ # Delegates missing methods to the logger
15
+ # @param method [Symbol] the method name
16
+ # @param args [Array] the method arguments
17
+ # @param block [Proc] the method code block
18
+ # @return [Object] the method result
19
+ def method_missing(method, *args, &block)
20
+ # Do not fail if logger is undefined
21
+ return nil unless logger
22
+ # Fail if logger does not respond to this method
23
+ super unless logger.respond_to?(method)
24
+ # Delegate to the logger method
25
+ logger.public_send(method, *args, &block)
26
+ end
27
+
28
+ # Delegates missing method respond_to? to the wrapped logger
29
+ # @param method [Symbol] the method name
30
+ # @return [Boolean] true if the wrapped logger responds to the method
31
+ def respond_to_missing?(method)
32
+ # If logger is undefined, all missing methods are accepted
33
+ logger ? logger.respond_to?(method) : true
34
+ end
35
+
36
+ # Initialises a new CacheLogger instance
37
+ # @param logger [Logger] the logger
38
+ def initialize(logger = nil)
39
+ self.logger = logger
40
+ end
41
+
42
+ # Logs and raises an exception
43
+ # @param message [String] the error message
44
+ # @param exception [Class] the class of the exception to be raised
45
+ # @param level [Symbol] the logger level (default: Logger::ERROR)
46
+ # @raise [Aspire::Caching::Exceptions::Error]
47
+ def log_exception(message, exception = nil, level: nil)
48
+ log(level || Logger::ERROR, message)
49
+ raise exception || Aspire::Exceptions::Error, message
50
+ end
51
+
52
+ # Logs an event and returns its first argument
53
+ # - allows for compact code such as 'return log_return(result, msg,...)'
54
+ # @param result [Object] the return value of the method
55
+ # @param (see #log)
56
+ # @return [Object] the result argument
57
+ def log_return(result, *args, **kwargs)
58
+ log(*args, **kwargs)
59
+ result
60
+ end
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,210 @@
1
+ require 'fileutils'
2
+ require 'json'
3
+ require 'uri'
4
+
5
+ require 'aspire/enumerator/linked_data_uri_enumerator'
6
+ require 'aspire/exceptions'
7
+ require 'aspire/util'
8
+
9
+ module Aspire
10
+ # Tools for building a caching from the Aspire APIs
11
+ module Caching
12
+ # Cache utility methods
13
+ module Util
14
+ include Aspire::Util
15
+
16
+ # Rules for determining whether an object URL is cacheable
17
+ # Each rule is a Proc which accepts a parsed URL from #parse_url and the
18
+ # CacheEntry instance, and returns true if the object is cacheable or
19
+ # false if not. Rules are applied in the order specified and all rules
20
+ # must return true for an object to be cacheable.
21
+ CACHEABLE = [
22
+ # The URL must be set and the host must mach the canonical tenancy host
23
+ proc { |u, e| u && u[:tenancy_host] == e.cache.tenancy_host },
24
+ # Catalog objects are not cacheable
25
+ proc { |u, _e| u[:type] != 'catalog' },
26
+ # User objects themselves are not cacheable but child objects e.g. notes
27
+ # are cacheable
28
+ proc { |u, _e| u[:type] != 'users' || !u[:child_type].nil? },
29
+ # Importance URI values are not cacheable
30
+ proc do |u, _e|
31
+ u[:type] != 'config' || !u[:id].to_s.start_with?('importance')
32
+ end
33
+ ].freeze
34
+
35
+ # Adds a prefix to a filename
36
+ # @param filename [String] the filename
37
+ # @param prefix [String] the prefix
38
+ # @return [String] the filename with prefix
39
+ def add_filename_prefix(filename, prefix)
40
+ filename = filename.rpartition(File.basename(filename))
41
+ filename[1] = "#{prefix}#{filename[1]}"
42
+ filename.join
43
+ end
44
+
45
+ # Adds a suffix to a filename preserving any file extension
46
+ # e.g. add_filename_suffix('file.txt', '-suffix') == 'file-suffix.txt'
47
+ # @param filename [String] the filename
48
+ # @param suffix [String] the suffix
49
+ # @return [String] the filename with suffix
50
+ def add_filename_suffix(filename, suffix)
51
+ f = filename.split(File::SEPARATOR)
52
+ # If the filename is '.' or '..' add the suffix to the parent path,
53
+ # otherwise add it to the basename
54
+ i = %w[. ..].include?(f[-1]) ? -2 : -1
55
+ # Split the basename around the file extension and prepend the suffix
56
+ # to the extension
57
+ if f[i]
58
+ file_ext = f[i].rpartition(File.extname(f[i]))
59
+ file_ext[1] = "#{suffix}#{file_ext[1]}"
60
+ f[i] = file_ext.join
61
+ end
62
+ # Reconstruct the filename, preserving any trailing path separator
63
+ f.push('') if filename.end_with?(File::SEPARATOR)
64
+ File.join(f)
65
+ end
66
+
67
+ # Parses the URL and checks that it is cacheable
68
+ # @param u [String] the URL of the API object
69
+ # @return [MarchData] the parsed URL
70
+ # @raise [Aspire::Exceptions::NotCacheable] if the URL is not
71
+ # cacheable
72
+ def cacheable_url(u)
73
+ # All rules must return true for the URL to be cacheable
74
+ u = parse_url(u)
75
+ CACHEABLE.each do |r|
76
+ raise Aspire::Exceptions::NotCacheable unless r.call(u, self)
77
+ end
78
+ # Return the parsed URL
79
+ u
80
+ end
81
+
82
+ # Returns true if the directory path has no more parents, false otherwise
83
+ # @param dir [String] the directory path
84
+ # @param root [String] the directory root - paths above this are ignored
85
+ # @return [Boolean] true if there are no more parents, false otherwise
86
+ def end_of_path?(dir, root = nil)
87
+ dir.nil? || dir.empty? || dir == '.' || dir == root
88
+ end
89
+
90
+ # Creates a directory and its parents, logs errors
91
+ # @param dir [String] the directory name
92
+ # @param logger [Aspire::Caching::CacheLogger] the logger for messages
93
+ # @param failure [String] the error message on failure
94
+ # @return [void]
95
+ # @raise [ArgumentError] if the directory is not specified
96
+ # @raise [Aspire::Cache::Exceptions::WriteError] if the operation fails
97
+ def mkdir(dir, logger = nil, success = nil, failure = nil)
98
+ raise ArgumentError, 'Directory expected' if dir.nil? || dir.empty?
99
+ FileUtils.mkdir_p(dir, mode: mode)
100
+ return if logger.nil? || success.nil? || success.empty?
101
+ logger.log(Logger::DEBUG, success)
102
+ rescue SystemCallError => e
103
+ failure ||= "Create directory #{dir} failed"
104
+ message = "#{failure}: #{e}"
105
+ raise WriteError, message if logger.nil?
106
+ logger.log_exception(message, WriteError)
107
+ end
108
+
109
+ # Returns the list of URI references from a linked data API object
110
+ # @param url [String] the URL of the API object
111
+ # @param data [Hash] the parsed JSON data for the object
112
+ # @return [Array<String>] the list of URIs referenced by the object
113
+ def references(url, data = nil)
114
+ return [] if data.nil? || data.empty?
115
+ # Enumerate the URIs and add them as keys of a hash to de-duplicate
116
+ enum = Aspire::Enumerator::LinkedDataURIEnumerator.new.enumerator(url, data)
117
+ uris = {}
118
+ enum.each { |_k, hash, _i| uris[hash['value']] = true }
119
+ # Return the list of URIs
120
+ uris.keys
121
+ end
122
+
123
+ # Removes the specified files
124
+ # @param glob [String] the file pattern to be removed
125
+ # @param logger [Aspire::Caching::CacheLogger] the logger for messages
126
+ # @param success [String] the text for success log messages
127
+ # @param failure [String] the text for failure exception/log messages
128
+ # @return [void]
129
+ # @raise [Aspire::Cache::Exceptions::RemoveError] if the removal fails
130
+ def rm(glob, logger = nil, success = nil, failure = nil)
131
+ raise ArgumentError, 'file path required' if glob.nil? || glob.empty?
132
+ FileUtils.rm_rf(Dir.glob(glob), secure: true)
133
+ return if logger.nil? || success.nil? || success.empty?
134
+ logger.log(Logger::INFO, success)
135
+ rescue SystemCallError => e
136
+ failure ||= "Remove #{glob} failed"
137
+ message = "#{failure}: #{e}"
138
+ raise RemoveError, message if logger.nil?
139
+ logger.log_exception("#{failure}: #{e}", RemoveError)
140
+ end
141
+
142
+ # Remove empty directories in a directory path
143
+ # @param path [String] the starting file or directory
144
+ # @param root
145
+ # @return [void]
146
+ # @raise [Aspire::Exceptions::RemoveError] if the operation fails
147
+ def rmdir_empty(path, root)
148
+ # The starting path is assumed to be a filename, so we append a dummy
149
+ # filename if it's a directory
150
+ path = File.directory?(path) ? File.join(path, '.') : path
151
+ loop do
152
+ # Get the parent of the current directory/file
153
+ path = File.dirname(path)
154
+ # Stop at the end of the directory path or a non-empty directory
155
+ break if end_of_path?(path, root) || !Dir.empty?(path)
156
+ # Remove the directory
157
+ Dir.rmdir(path)
158
+ end
159
+ rescue Errno::ENOTEMPTY, Errno::ENOTDIR
160
+ # Stop without error if the directory is not empty or not a directory
161
+ nil
162
+ rescue SystemCallError => e
163
+ raise RemoveError, "Rmdir #{dir} failed: #{e}"
164
+ end
165
+
166
+ # Removes the file extension from a path
167
+ # @param path [String] the file path
168
+ # @return [String] the file path with any extension removed
169
+ def strip_ext(path)
170
+ path.rpartition(File.extname(path))[0]
171
+ end
172
+
173
+ # Removes a prefix from a filename
174
+ # @param filename [String] the filename
175
+ # @param prefix [String] the prefix
176
+ # @return [String] the filename without prefix
177
+ def strip_filename_prefix(filename, prefix)
178
+ f = filename.rpartition(File.basename(filename))
179
+ f[1] = strip_prefix(f[1], prefix)
180
+ f.join
181
+ end
182
+
183
+ # Removes a suffix from a filename
184
+ # @param filename [String] the filename
185
+ # @param suffix [String] the suffix
186
+ # @return [String] the filename without suffix
187
+ def strip_filename_suffix(filename, suffix)
188
+ f = filename.rpartition(File.extname(filename))
189
+ f[0] = strip_suffix(f[0], suffix)
190
+ f.join
191
+ end
192
+
193
+ # Removes a prefix from a string
194
+ # @param str [String] the string to remove the prefix from
195
+ # @param prefix [String] the prefix to remove
196
+ # @return [String] the string with the prefix removed
197
+ def strip_prefix(str, prefix)
198
+ str.start_with?(prefix) ? str.slice(prefix.length..-1) : str
199
+ end
200
+
201
+ # Removes a suffix from a string
202
+ # @param str [String] the string to remove the suffix from
203
+ # @param suffix [String] the suffix to remove
204
+ # @return [String] the string with the suffix removed
205
+ def strip_suffix(str, suffix)
206
+ str.end_with?(suffix) ? str.slice(0...-suffix.length) : str
207
+ end
208
+ end
209
+ end
210
+ end
@@ -0,0 +1,123 @@
1
+ require 'aspire/cli/command'
2
+ require 'aspire/enumerator/report_enumerator'
3
+ require 'aspire/util'
4
+ require 'logglier'
5
+ require 'dotenv'
6
+
7
+ module Aspire
8
+ module CLI
9
+
10
+ class CacheBuilder < Command
11
+
12
+ def execute
13
+
14
+ unless (env_file.nil? || env_file.empty?)
15
+ Dotenv.load(env_file)
16
+ end
17
+
18
+ @json_api = json_api
19
+ @linked_data_api = linked_data_api
20
+ @logger = create_logger log_to_file?
21
+ @cache_path = ENV['ASPIRE_CACHE_PATH']
22
+ @list_report = ENV['ASPIRE_LIST_REPORT']
23
+ @mode = ENV['ASPIRE_CACHE_MODE']
24
+ @mode = @mode.nil? || @mode.empty? ? 0o700 : @mode.to_i(8)
25
+ cache = Aspire::Caching::Cache.new(@linked_data_api, @json_api, @cache_path,
26
+ logger: @logger)
27
+ @builder = Aspire::Caching::Builder.new(cache)
28
+
29
+ if list_uri.nil? || list_uri.empty?
30
+
31
+ raise ArgumentError if privacy_control.nil? || privacy_control.empty?
32
+
33
+ puts "Caching all lists that match arguments"
34
+
35
+ lists = list_enumerator time_period_list, status, privacy_control
36
+
37
+ @builder.build(lists)
38
+
39
+ puts "Finished caching all lists that match arguments"
40
+
41
+ else
42
+ puts "Caching list #{list_uri}"
43
+ @builder.write_list(list_uri)
44
+ puts "Finished caching list"
45
+ end
46
+
47
+ end
48
+
49
+ private
50
+
51
+ def list_enumerator(time_periods=nil, status=nil, privacy_control=nil)
52
+
53
+ filters = []
54
+
55
+ if time_periods.nil? || time_periods.empty? || time_periods == ['']
56
+ time_periods = [nil, '']
57
+ end
58
+
59
+ filters.push(proc { |row| time_periods.include?(row['Time Period']) })
60
+
61
+ unless status.nil? || status.empty?
62
+ filters.push(proc { |row| row['Status'].to_s.start_with?(status) })
63
+ end
64
+
65
+ unless privacy_control.nil? || status.empty?
66
+ filters.push(proc { |row| row['Privacy Control'] == privacy_control })
67
+ end
68
+
69
+ Aspire::Enumerator::ReportEnumerator.new(@list_report, filters)
70
+ .enumerator
71
+ end
72
+
73
+ def json_api
74
+ @api_available = ENV['ASPIRE_API_AVAILABLE'] == 'true'
75
+ @api_client_id = ENV['ASPIRE_API_CLIENT_ID']
76
+ @api_secret = ENV['ASPIRE_API_SECRET']
77
+ @tenant = ENV['ASPIRE_TENANT']
78
+ Aspire::API::JSON.new(@api_client_id, @api_secret, @tenant,
79
+ **api_opts)
80
+ end
81
+
82
+ def api_opts
83
+ @ssl_ca_file = ENV['SSL_CA_FILE']
84
+ @ssl_ca_path = ENV['SSL_CA_PATH']
85
+ {
86
+ ssl_ca_file: @ssl_ca_file,
87
+ ssl_ca_path: @ssl_ca_path
88
+ }
89
+ end
90
+
91
+ def linked_data_api
92
+ @api_available = ENV['ASPIRE_API_AVAILABLE'] == 'true'
93
+ @linked_data_root = ENV['ASPIRE_LINKED_DATA_ROOT']
94
+ @tenant = ENV['ASPIRE_TENANT']
95
+ @tenancy_host_aliases = ENV['ASPIRE_TENANCY_HOST_ALIASES'].to_s.split(';')
96
+ @tenancy_root = ENV['ASPIRE_TENANCY_ROOT']
97
+ Aspire::API::LinkedData.new(@tenant,
98
+ linked_data_root: @linked_data_root,
99
+ tenancy_host_aliases: @tenancy_host_aliases,
100
+ tenancy_root: @tenancy_root,
101
+ **api_opts)
102
+ end
103
+
104
+ def create_logger log_to_file
105
+
106
+ @log_file = ENV['ASPIRE_LOG']
107
+
108
+ if log_to_file
109
+ logger = Logger.new("| tee #{@log_file}") # @log_file || STDOUT)
110
+ logger.datetime_format = '%Y-%m-%d %H:%M:%S'
111
+ logger.formatter = proc do |severity, datetime, _program, msg|
112
+ "#{datetime} [#{severity}]: #{msg}\n"
113
+ end
114
+ return logger
115
+ end
116
+
117
+ Logglier.new("https://logs-01.loggly.com/inputs/#{ENV['LOGGLIER_TOKEN']}/tag/#{ENV['LOGGLIER_TAG']}/", :threaded => true, :format => :json)
118
+ end
119
+
120
+ end
121
+
122
+ end
123
+ end
@@ -0,0 +1,20 @@
1
+ require 'clamp'
2
+
3
+ module Aspire
4
+ module CLI
5
+
6
+ class Command < Clamp::Command
7
+
8
+ # option ['-c', '--hierarchy-code'], 'HIERARCHY_CODE', 'the hierarchy code (module etc.)'
9
+ option ['-e', '--env-file'], 'ENV_FILE', 'file containing env variable key value pairs'
10
+ option ['-l', '--list-uri'], 'LIST_URI', 'the list URI'
11
+ option ['-t', '--time-period'], 'TIME_PERIOD', 'the time period (2016-17 etc.)', :multivalued => true
12
+ option ['-p', '--privacy-control'], 'PRIVACY_CONTROL', 'the list privacy control (Public etc)'
13
+ option ['-s', '--status'], 'STATUS', 'the list status control (Published etc)'
14
+ option ['-c', '--clear-cache'], :flag, 'clear cache before running', default: false
15
+ option ['-f', '--log-to-file'], :flag, 'log output to file', default: false
16
+
17
+ end
18
+
19
+ end
20
+ end
@@ -0,0 +1,29 @@
1
+ module Aspire
2
+ # Enumerator classes for Aspire reading list processing
3
+ module Enumerator
4
+ # The abstract base class for enumerator classes
5
+ # @abstract Subclasses must implement #enumerate accepting the parameters
6
+ # passed to #enumerator and yielding values to self.yielder
7
+ class Base
8
+ # The Enumerator::Yielder instance from an Enumerator.new call
9
+ # @!attribute [rw] yielder
10
+ # @return [Enumerator::Yielder] the yielder instance from an Enumerator
11
+ attr_accessor :yielder
12
+
13
+ # Enumerates the data passed in its arguments
14
+ # @abstract Subclasses must implement this method
15
+ def enumerate(*args, **kwargs)
16
+ raise NotImplementedError
17
+ end
18
+
19
+ # Returns an enumerator enumerating property/value pairs of JSON data
20
+ # @return [Enumerator] the enumerator
21
+ def enumerator(*args, **kwargs)
22
+ ::Enumerator.new do |yielder|
23
+ self.yielder = yielder
24
+ enumerate(*args, **kwargs)
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end