aspire 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (48) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +59 -0
  3. data/.rbenv-gemsets +1 -0
  4. data/.travis.yml +5 -0
  5. data/CODE_OF_CONDUCT.md +74 -0
  6. data/Dockerfile +20 -0
  7. data/Gemfile +4 -0
  8. data/LICENSE.txt +21 -0
  9. data/README.md +851 -0
  10. data/Rakefile +10 -0
  11. data/aspire.gemspec +40 -0
  12. data/bin/console +14 -0
  13. data/bin/setup +8 -0
  14. data/entrypoint.sh +11 -0
  15. data/exe/build-cache +13 -0
  16. data/lib/aspire.rb +11 -0
  17. data/lib/aspire/api.rb +2 -0
  18. data/lib/aspire/api/base.rb +198 -0
  19. data/lib/aspire/api/json.rb +195 -0
  20. data/lib/aspire/api/linked_data.rb +214 -0
  21. data/lib/aspire/caching.rb +4 -0
  22. data/lib/aspire/caching/builder.rb +356 -0
  23. data/lib/aspire/caching/cache.rb +365 -0
  24. data/lib/aspire/caching/cache_entry.rb +296 -0
  25. data/lib/aspire/caching/cache_logger.rb +63 -0
  26. data/lib/aspire/caching/util.rb +210 -0
  27. data/lib/aspire/cli/cache_builder.rb +123 -0
  28. data/lib/aspire/cli/command.rb +20 -0
  29. data/lib/aspire/enumerator/base.rb +29 -0
  30. data/lib/aspire/enumerator/json_enumerator.rb +130 -0
  31. data/lib/aspire/enumerator/linked_data_uri_enumerator.rb +32 -0
  32. data/lib/aspire/enumerator/report_enumerator.rb +64 -0
  33. data/lib/aspire/exceptions.rb +36 -0
  34. data/lib/aspire/object.rb +7 -0
  35. data/lib/aspire/object/base.rb +155 -0
  36. data/lib/aspire/object/digitisation.rb +43 -0
  37. data/lib/aspire/object/factory.rb +87 -0
  38. data/lib/aspire/object/list.rb +590 -0
  39. data/lib/aspire/object/module.rb +36 -0
  40. data/lib/aspire/object/resource.rb +371 -0
  41. data/lib/aspire/object/time_period.rb +47 -0
  42. data/lib/aspire/object/user.rb +46 -0
  43. data/lib/aspire/properties.rb +20 -0
  44. data/lib/aspire/user_lookup.rb +103 -0
  45. data/lib/aspire/util.rb +185 -0
  46. data/lib/aspire/version.rb +3 -0
  47. data/lib/retry.rb +197 -0
  48. metadata +274 -0
@@ -0,0 +1,63 @@
1
+ require 'logger'
2
+
3
+ require 'aspire/exceptions'
4
+
5
+ module Aspire
6
+ # Tools for building a caching from the Aspire APIs
7
+ module Caching
8
+ # A wrapper class for Logger adding utility methods
9
+ class CacheLogger
10
+ # @!attribute [rw] logger
11
+ # @return [Logger] the logger
12
+ attr_accessor :logger
13
+
14
+ # Delegates missing methods to the logger
15
+ # @param method [Symbol] the method name
16
+ # @param args [Array] the method arguments
17
+ # @param block [Proc] the method code block
18
+ # @return [Object] the method result
19
+ def method_missing(method, *args, &block)
20
+ # Do not fail if logger is undefined
21
+ return nil unless logger
22
+ # Fail if logger does not respond to this method
23
+ super unless logger.respond_to?(method)
24
+ # Delegate to the logger method
25
+ logger.public_send(method, *args, &block)
26
+ end
27
+
28
+ # Delegates missing method respond_to? to the wrapped logger
29
+ # @param method [Symbol] the method name
30
+ # @return [Boolean] true if the wrapped logger responds to the method
31
+ def respond_to_missing?(method)
32
+ # If logger is undefined, all missing methods are accepted
33
+ logger ? logger.respond_to?(method) : true
34
+ end
35
+
36
+ # Initialises a new CacheLogger instance
37
+ # @param logger [Logger] the logger
38
+ def initialize(logger = nil)
39
+ self.logger = logger
40
+ end
41
+
42
+ # Logs and raises an exception
43
+ # @param message [String] the error message
44
+ # @param exception [Class] the class of the exception to be raised
45
+ # @param level [Symbol] the logger level (default: Logger::ERROR)
46
+ # @raise [Aspire::Caching::Exceptions::Error]
47
+ def log_exception(message, exception = nil, level: nil)
48
+ log(level || Logger::ERROR, message)
49
+ raise exception || Aspire::Exceptions::Error, message
50
+ end
51
+
52
+ # Logs an event and returns its first argument
53
+ # - allows for compact code such as 'return log_return(result, msg,...)'
54
+ # @param result [Object] the return value of the method
55
+ # @param (see #log)
56
+ # @return [Object] the result argument
57
+ def log_return(result, *args, **kwargs)
58
+ log(*args, **kwargs)
59
+ result
60
+ end
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,210 @@
1
+ require 'fileutils'
2
+ require 'json'
3
+ require 'uri'
4
+
5
+ require 'aspire/enumerator/linked_data_uri_enumerator'
6
+ require 'aspire/exceptions'
7
+ require 'aspire/util'
8
+
9
+ module Aspire
10
+ # Tools for building a caching from the Aspire APIs
11
+ module Caching
12
+ # Cache utility methods
13
+ module Util
14
+ include Aspire::Util
15
+
16
+ # Rules for determining whether an object URL is cacheable
17
+ # Each rule is a Proc which accepts a parsed URL from #parse_url and the
18
+ # CacheEntry instance, and returns true if the object is cacheable or
19
+ # false if not. Rules are applied in the order specified and all rules
20
+ # must return true for an object to be cacheable.
21
+ CACHEABLE = [
22
+ # The URL must be set and the host must mach the canonical tenancy host
23
+ proc { |u, e| u && u[:tenancy_host] == e.cache.tenancy_host },
24
+ # Catalog objects are not cacheable
25
+ proc { |u, _e| u[:type] != 'catalog' },
26
+ # User objects themselves are not cacheable but child objects e.g. notes
27
+ # are cacheable
28
+ proc { |u, _e| u[:type] != 'users' || !u[:child_type].nil? },
29
+ # Importance URI values are not cacheable
30
+ proc do |u, _e|
31
+ u[:type] != 'config' || !u[:id].to_s.start_with?('importance')
32
+ end
33
+ ].freeze
34
+
35
+ # Adds a prefix to a filename
36
+ # @param filename [String] the filename
37
+ # @param prefix [String] the prefix
38
+ # @return [String] the filename with prefix
39
+ def add_filename_prefix(filename, prefix)
40
+ filename = filename.rpartition(File.basename(filename))
41
+ filename[1] = "#{prefix}#{filename[1]}"
42
+ filename.join
43
+ end
44
+
45
+ # Adds a suffix to a filename preserving any file extension
46
+ # e.g. add_filename_suffix('file.txt', '-suffix') == 'file-suffix.txt'
47
+ # @param filename [String] the filename
48
+ # @param suffix [String] the suffix
49
+ # @return [String] the filename with suffix
50
+ def add_filename_suffix(filename, suffix)
51
+ f = filename.split(File::SEPARATOR)
52
+ # If the filename is '.' or '..' add the suffix to the parent path,
53
+ # otherwise add it to the basename
54
+ i = %w[. ..].include?(f[-1]) ? -2 : -1
55
+ # Split the basename around the file extension and prepend the suffix
56
+ # to the extension
57
+ if f[i]
58
+ file_ext = f[i].rpartition(File.extname(f[i]))
59
+ file_ext[1] = "#{suffix}#{file_ext[1]}"
60
+ f[i] = file_ext.join
61
+ end
62
+ # Reconstruct the filename, preserving any trailing path separator
63
+ f.push('') if filename.end_with?(File::SEPARATOR)
64
+ File.join(f)
65
+ end
66
+
67
+ # Parses the URL and checks that it is cacheable
68
+ # @param u [String] the URL of the API object
69
+ # @return [MarchData] the parsed URL
70
+ # @raise [Aspire::Exceptions::NotCacheable] if the URL is not
71
+ # cacheable
72
+ def cacheable_url(u)
73
+ # All rules must return true for the URL to be cacheable
74
+ u = parse_url(u)
75
+ CACHEABLE.each do |r|
76
+ raise Aspire::Exceptions::NotCacheable unless r.call(u, self)
77
+ end
78
+ # Return the parsed URL
79
+ u
80
+ end
81
+
82
+ # Returns true if the directory path has no more parents, false otherwise
83
+ # @param dir [String] the directory path
84
+ # @param root [String] the directory root - paths above this are ignored
85
+ # @return [Boolean] true if there are no more parents, false otherwise
86
+ def end_of_path?(dir, root = nil)
87
+ dir.nil? || dir.empty? || dir == '.' || dir == root
88
+ end
89
+
90
+ # Creates a directory and its parents, logs errors
91
+ # @param dir [String] the directory name
92
+ # @param logger [Aspire::Caching::CacheLogger] the logger for messages
93
+ # @param failure [String] the error message on failure
94
+ # @return [void]
95
+ # @raise [ArgumentError] if the directory is not specified
96
+ # @raise [Aspire::Cache::Exceptions::WriteError] if the operation fails
97
+ def mkdir(dir, logger = nil, success = nil, failure = nil)
98
+ raise ArgumentError, 'Directory expected' if dir.nil? || dir.empty?
99
+ FileUtils.mkdir_p(dir, mode: mode)
100
+ return if logger.nil? || success.nil? || success.empty?
101
+ logger.log(Logger::DEBUG, success)
102
+ rescue SystemCallError => e
103
+ failure ||= "Create directory #{dir} failed"
104
+ message = "#{failure}: #{e}"
105
+ raise WriteError, message if logger.nil?
106
+ logger.log_exception(message, WriteError)
107
+ end
108
+
109
+ # Returns the list of URI references from a linked data API object
110
+ # @param url [String] the URL of the API object
111
+ # @param data [Hash] the parsed JSON data for the object
112
+ # @return [Array<String>] the list of URIs referenced by the object
113
+ def references(url, data = nil)
114
+ return [] if data.nil? || data.empty?
115
+ # Enumerate the URIs and add them as keys of a hash to de-duplicate
116
+ enum = Aspire::Enumerator::LinkedDataURIEnumerator.new.enumerator(url, data)
117
+ uris = {}
118
+ enum.each { |_k, hash, _i| uris[hash['value']] = true }
119
+ # Return the list of URIs
120
+ uris.keys
121
+ end
122
+
123
+ # Removes the specified files
124
+ # @param glob [String] the file pattern to be removed
125
+ # @param logger [Aspire::Caching::CacheLogger] the logger for messages
126
+ # @param success [String] the text for success log messages
127
+ # @param failure [String] the text for failure exception/log messages
128
+ # @return [void]
129
+ # @raise [Aspire::Cache::Exceptions::RemoveError] if the removal fails
130
+ def rm(glob, logger = nil, success = nil, failure = nil)
131
+ raise ArgumentError, 'file path required' if glob.nil? || glob.empty?
132
+ FileUtils.rm_rf(Dir.glob(glob), secure: true)
133
+ return if logger.nil? || success.nil? || success.empty?
134
+ logger.log(Logger::INFO, success)
135
+ rescue SystemCallError => e
136
+ failure ||= "Remove #{glob} failed"
137
+ message = "#{failure}: #{e}"
138
+ raise RemoveError, message if logger.nil?
139
+ logger.log_exception("#{failure}: #{e}", RemoveError)
140
+ end
141
+
142
+ # Remove empty directories in a directory path
143
+ # @param path [String] the starting file or directory
144
+ # @param root
145
+ # @return [void]
146
+ # @raise [Aspire::Exceptions::RemoveError] if the operation fails
147
+ def rmdir_empty(path, root)
148
+ # The starting path is assumed to be a filename, so we append a dummy
149
+ # filename if it's a directory
150
+ path = File.directory?(path) ? File.join(path, '.') : path
151
+ loop do
152
+ # Get the parent of the current directory/file
153
+ path = File.dirname(path)
154
+ # Stop at the end of the directory path or a non-empty directory
155
+ break if end_of_path?(path, root) || !Dir.empty?(path)
156
+ # Remove the directory
157
+ Dir.rmdir(path)
158
+ end
159
+ rescue Errno::ENOTEMPTY, Errno::ENOTDIR
160
+ # Stop without error if the directory is not empty or not a directory
161
+ nil
162
+ rescue SystemCallError => e
163
+ raise RemoveError, "Rmdir #{dir} failed: #{e}"
164
+ end
165
+
166
+ # Removes the file extension from a path
167
+ # @param path [String] the file path
168
+ # @return [String] the file path with any extension removed
169
+ def strip_ext(path)
170
+ path.rpartition(File.extname(path))[0]
171
+ end
172
+
173
+ # Removes a prefix from a filename
174
+ # @param filename [String] the filename
175
+ # @param prefix [String] the prefix
176
+ # @return [String] the filename without prefix
177
+ def strip_filename_prefix(filename, prefix)
178
+ f = filename.rpartition(File.basename(filename))
179
+ f[1] = strip_prefix(f[1], prefix)
180
+ f.join
181
+ end
182
+
183
+ # Removes a suffix from a filename
184
+ # @param filename [String] the filename
185
+ # @param suffix [String] the suffix
186
+ # @return [String] the filename without suffix
187
+ def strip_filename_suffix(filename, suffix)
188
+ f = filename.rpartition(File.extname(filename))
189
+ f[0] = strip_suffix(f[0], suffix)
190
+ f.join
191
+ end
192
+
193
+ # Removes a prefix from a string
194
+ # @param str [String] the string to remove the prefix from
195
+ # @param prefix [String] the prefix to remove
196
+ # @return [String] the string with the prefix removed
197
+ def strip_prefix(str, prefix)
198
+ str.start_with?(prefix) ? str.slice(prefix.length..-1) : str
199
+ end
200
+
201
+ # Removes a suffix from a string
202
+ # @param str [String] the string to remove the suffix from
203
+ # @param suffix [String] the suffix to remove
204
+ # @return [String] the string with the suffix removed
205
+ def strip_suffix(str, suffix)
206
+ str.end_with?(suffix) ? str.slice(0...-suffix.length) : str
207
+ end
208
+ end
209
+ end
210
+ end
@@ -0,0 +1,123 @@
1
+ require 'aspire/cli/command'
2
+ require 'aspire/enumerator/report_enumerator'
3
+ require 'aspire/util'
4
+ require 'logglier'
5
+ require 'dotenv'
6
+
7
+ module Aspire
8
+ module CLI
9
+
10
+ class CacheBuilder < Command
11
+
12
+ def execute
13
+
14
+ unless (env_file.nil? || env_file.empty?)
15
+ Dotenv.load(env_file)
16
+ end
17
+
18
+ @json_api = json_api
19
+ @linked_data_api = linked_data_api
20
+ @logger = create_logger log_to_file?
21
+ @cache_path = ENV['ASPIRE_CACHE_PATH']
22
+ @list_report = ENV['ASPIRE_LIST_REPORT']
23
+ @mode = ENV['ASPIRE_CACHE_MODE']
24
+ @mode = @mode.nil? || @mode.empty? ? 0o700 : @mode.to_i(8)
25
+ cache = Aspire::Caching::Cache.new(@linked_data_api, @json_api, @cache_path,
26
+ logger: @logger)
27
+ @builder = Aspire::Caching::Builder.new(cache)
28
+
29
+ if list_uri.nil? || list_uri.empty?
30
+
31
+ raise ArgumentError if privacy_control.nil? || privacy_control.empty?
32
+
33
+ puts "Caching all lists that match arguments"
34
+
35
+ lists = list_enumerator time_period_list, status, privacy_control
36
+
37
+ @builder.build(lists)
38
+
39
+ puts "Finished caching all lists that match arguments"
40
+
41
+ else
42
+ puts "Caching list #{list_uri}"
43
+ @builder.write_list(list_uri)
44
+ puts "Finished caching list"
45
+ end
46
+
47
+ end
48
+
49
+ private
50
+
51
+ def list_enumerator(time_periods=nil, status=nil, privacy_control=nil)
52
+
53
+ filters = []
54
+
55
+ if time_periods.nil? || time_periods.empty? || time_periods == ['']
56
+ time_periods = [nil, '']
57
+ end
58
+
59
+ filters.push(proc { |row| time_periods.include?(row['Time Period']) })
60
+
61
+ unless status.nil? || status.empty?
62
+ filters.push(proc { |row| row['Status'].to_s.start_with?(status) })
63
+ end
64
+
65
+ unless privacy_control.nil? || status.empty?
66
+ filters.push(proc { |row| row['Privacy Control'] == privacy_control })
67
+ end
68
+
69
+ Aspire::Enumerator::ReportEnumerator.new(@list_report, filters)
70
+ .enumerator
71
+ end
72
+
73
+ def json_api
74
+ @api_available = ENV['ASPIRE_API_AVAILABLE'] == 'true'
75
+ @api_client_id = ENV['ASPIRE_API_CLIENT_ID']
76
+ @api_secret = ENV['ASPIRE_API_SECRET']
77
+ @tenant = ENV['ASPIRE_TENANT']
78
+ Aspire::API::JSON.new(@api_client_id, @api_secret, @tenant,
79
+ **api_opts)
80
+ end
81
+
82
+ def api_opts
83
+ @ssl_ca_file = ENV['SSL_CA_FILE']
84
+ @ssl_ca_path = ENV['SSL_CA_PATH']
85
+ {
86
+ ssl_ca_file: @ssl_ca_file,
87
+ ssl_ca_path: @ssl_ca_path
88
+ }
89
+ end
90
+
91
+ def linked_data_api
92
+ @api_available = ENV['ASPIRE_API_AVAILABLE'] == 'true'
93
+ @linked_data_root = ENV['ASPIRE_LINKED_DATA_ROOT']
94
+ @tenant = ENV['ASPIRE_TENANT']
95
+ @tenancy_host_aliases = ENV['ASPIRE_TENANCY_HOST_ALIASES'].to_s.split(';')
96
+ @tenancy_root = ENV['ASPIRE_TENANCY_ROOT']
97
+ Aspire::API::LinkedData.new(@tenant,
98
+ linked_data_root: @linked_data_root,
99
+ tenancy_host_aliases: @tenancy_host_aliases,
100
+ tenancy_root: @tenancy_root,
101
+ **api_opts)
102
+ end
103
+
104
+ def create_logger log_to_file
105
+
106
+ @log_file = ENV['ASPIRE_LOG']
107
+
108
+ if log_to_file
109
+ logger = Logger.new("| tee #{@log_file}") # @log_file || STDOUT)
110
+ logger.datetime_format = '%Y-%m-%d %H:%M:%S'
111
+ logger.formatter = proc do |severity, datetime, _program, msg|
112
+ "#{datetime} [#{severity}]: #{msg}\n"
113
+ end
114
+ return logger
115
+ end
116
+
117
+ Logglier.new("https://logs-01.loggly.com/inputs/#{ENV['LOGGLIER_TOKEN']}/tag/#{ENV['LOGGLIER_TAG']}/", :threaded => true, :format => :json)
118
+ end
119
+
120
+ end
121
+
122
+ end
123
+ end
@@ -0,0 +1,20 @@
1
+ require 'clamp'
2
+
3
+ module Aspire
4
+ module CLI
5
+
6
+ class Command < Clamp::Command
7
+
8
+ # option ['-c', '--hierarchy-code'], 'HIERARCHY_CODE', 'the hierarchy code (module etc.)'
9
+ option ['-e', '--env-file'], 'ENV_FILE', 'file containing env variable key value pairs'
10
+ option ['-l', '--list-uri'], 'LIST_URI', 'the list URI'
11
+ option ['-t', '--time-period'], 'TIME_PERIOD', 'the time period (2016-17 etc.)', :multivalued => true
12
+ option ['-p', '--privacy-control'], 'PRIVACY_CONTROL', 'the list privacy control (Public etc)'
13
+ option ['-s', '--status'], 'STATUS', 'the list status control (Published etc)'
14
+ option ['-c', '--clear-cache'], :flag, 'clear cache before running', default: false
15
+ option ['-f', '--log-to-file'], :flag, 'log output to file', default: false
16
+
17
+ end
18
+
19
+ end
20
+ end
@@ -0,0 +1,29 @@
1
+ module Aspire
2
+ # Enumerator classes for Aspire reading list processing
3
+ module Enumerator
4
+ # The abstract base class for enumerator classes
5
+ # @abstract Subclasses must implement #enumerate accepting the parameters
6
+ # passed to #enumerator and yielding values to self.yielder
7
+ class Base
8
+ # The Enumerator::Yielder instance from an Enumerator.new call
9
+ # @!attribute [rw] yielder
10
+ # @return [Enumerator::Yielder] the yielder instance from an Enumerator
11
+ attr_accessor :yielder
12
+
13
+ # Enumerates the data passed in its arguments
14
+ # @abstract Subclasses must implement this method
15
+ def enumerate(*args, **kwargs)
16
+ raise NotImplementedError
17
+ end
18
+
19
+ # Returns an enumerator enumerating property/value pairs of JSON data
20
+ # @return [Enumerator] the enumerator
21
+ def enumerator(*args, **kwargs)
22
+ ::Enumerator.new do |yielder|
23
+ self.yielder = yielder
24
+ enumerate(*args, **kwargs)
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end