chronicle-etl 0.5.2 → 0.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -46,7 +46,7 @@ module Chronicle
46
46
  all_secrets.each do |namespace, secrets|
47
47
  rows += secrets.map do |key, value|
48
48
  # hidden_value = (value[0..5] + ("*" * [0, [value.length - 5, 30].min].max)).truncate(30)
49
- truncated_value = value.truncate(30)
49
+ truncated_value = value&.truncate(30)
50
50
  [namespace, key, truncated_value]
51
51
  end
52
52
  end
@@ -4,6 +4,7 @@ require 'chronicle/etl'
4
4
 
5
5
  require 'chronicle/etl/cli/cli_base'
6
6
  require 'chronicle/etl/cli/subcommand_base'
7
+ require 'chronicle/etl/cli/authorizations'
7
8
  require 'chronicle/etl/cli/connectors'
8
9
  require 'chronicle/etl/cli/jobs'
9
10
  require 'chronicle/etl/cli/plugins'
@@ -1,3 +1,4 @@
1
+ require "active_support/core_ext/hash/keys"
1
2
  require 'fileutils'
2
3
  require 'yaml'
3
4
 
@@ -21,6 +22,8 @@ module Chronicle
21
22
  def write(type, identifier, data)
22
23
  base = config_pathname_for_type(type)
23
24
  path = base.join("#{identifier}.yml")
25
+
26
+ data.deep_stringify_keys!
24
27
  FileUtils.mkdir_p(File.dirname(path))
25
28
  File.open(path, 'w', 0o600) do |f|
26
29
  # Ruby likes to add --- separators when writing yaml files
@@ -108,6 +108,10 @@ module Chronicle
108
108
  end
109
109
 
110
110
  def coerce_time(value)
111
+ # parsing yml files might result in us getting Date objects
112
+ # we convert to DateTime first to to ensure UTC
113
+ return value.to_datetime.to_time if value.is_a?(Date)
114
+
111
115
  return value unless value.is_a?(String)
112
116
 
113
117
  # Hacky check for duration strings like "60m"
@@ -4,8 +4,13 @@ module Chronicle
4
4
 
5
5
  class SecretsError < Error; end
6
6
 
7
+ class AuthorizationError < Error; end
8
+
7
9
  class ConfigError < Error; end
8
10
 
11
+ class RunnerError < Error; end
12
+ class RunInterruptedError < RunnerError; end
13
+
9
14
  class RunnerTypeError < Error; end
10
15
 
11
16
  class JobDefinitionError < Error
@@ -34,7 +34,7 @@ module Chronicle
34
34
  def validate
35
35
  @errors = {}
36
36
 
37
- Chronicle::ETL::Registry::PHASES.each do |phase|
37
+ Chronicle::ETL::Registry::Connectors::PHASES.each do |phase|
38
38
  __send__("#{phase}_klass".to_sym)
39
39
  rescue Chronicle::ETL::PluginError => e
40
40
  @errors[:plugins] ||= []
@@ -66,7 +66,7 @@ module Chronicle
66
66
 
67
67
  # For each connector in this job, mix in secrets into the options
68
68
  def apply_default_secrets
69
- Chronicle::ETL::Registry::PHASES.each do |phase|
69
+ Chronicle::ETL::Registry::Connectors::PHASES.each do |phase|
70
70
  # If the option have a `secrets` key, we look up those secrets and
71
71
  # mix them in. If not, use the connector's plugin name and look up
72
72
  # secrets with the same namespace
@@ -124,11 +124,11 @@ module Chronicle
124
124
  private
125
125
 
126
126
  def load_klass(phase, identifier)
127
- Chronicle::ETL::Registry.find_by_phase_and_identifier(phase, identifier).klass
127
+ Chronicle::ETL::Registry::Connectors.find_by_phase_and_identifier(phase, identifier).klass
128
128
  end
129
129
 
130
130
  def load_credentials
131
- Chronicle::ETL::Registry::PHASES.each do |phase|
131
+ Chronicle::ETL::Registry::Connectors::PHASES.each do |phase|
132
132
  credentials_name = @definition[phase].dig(:options, :credentials)
133
133
  if credentials_name
134
134
  credentials = Chronicle::ETL::Config.load_credentials(credentials_name)
@@ -17,8 +17,8 @@ module Chronicle
17
17
  def output message, level
18
18
  return unless level >= @log_level
19
19
 
20
- if @progress_bar
21
- @progress_bar.log(message)
20
+ if @ui_element
21
+ @ui_element.log(message)
22
22
  else
23
23
  $stderr.puts(message)
24
24
  end
@@ -40,12 +40,12 @@ module Chronicle
40
40
  output(message, DEBUG)
41
41
  end
42
42
 
43
- def attach_to_progress_bar(progress_bar)
44
- @progress_bar = progress_bar
43
+ def attach_to_ui(ui_element)
44
+ @ui_element = ui_element
45
45
  end
46
46
 
47
- def detach_from_progress_bar
48
- @progress_bar = nil
47
+ def detach_from_ui
48
+ @ui_element = nil
49
49
  end
50
50
  end
51
51
  end
@@ -9,7 +9,7 @@ module Chronicle
9
9
  # @todo Experiment with just mixing in ActiveModel instead of this
10
10
  # this reimplementation
11
11
  class Base
12
- ATTRIBUTES = [:provider, :provider_id, :lat, :lng, :metadata].freeze
12
+ ATTRIBUTES = [:provider, :provider_id, :provider_namespace, :lat, :lng, :metadata].freeze
13
13
  ASSOCIATIONS = [].freeze
14
14
 
15
15
  attr_accessor(:id, :dedupe_on, *ATTRIBUTES)
@@ -10,12 +10,14 @@ module Chronicle
10
10
  # TODO: This desperately needs a validation system
11
11
  ASSOCIATIONS = [
12
12
  :involvements, # inverse of activity's `involved`
13
-
13
+ :analogous,
14
14
  :attachments,
15
15
  :abouts,
16
16
  :aboutables, # inverse of above
17
17
  :depicts,
18
18
  :consumers,
19
+ :creators,
20
+ :creations,
19
21
  :contains,
20
22
  :containers # inverse of above
21
23
  ].freeze # TODO: add these to reflect Chronicle Schema
@@ -0,0 +1,140 @@
1
+ require 'omniauth'
2
+ require 'tty-spinner'
3
+
4
+ module Chronicle
5
+ module ETL
6
+ # An authorization strategy that uses oauth2 (and omniauth under the hood)
7
+ class OauthAuthorizer < Authorizer
8
+ class << self
9
+ attr_reader :strategy, :provider_name, :authorization_to_secret_map
10
+ attr_accessor :client_id, :client_secret
11
+
12
+ # Macro for specifying which omniauth strategy to use
13
+ def omniauth_strategy(strategy)
14
+ @strategy = strategy
15
+ end
16
+
17
+ # Macro for specifying which omniauth scopes to request
18
+ def scope(value)
19
+ options[:scope] = value
20
+ end
21
+
22
+ # Macro for specifying hash of returned authorization to secrets hash
23
+ def pluck_secrets(map)
24
+ @authorization_to_secret_map = map
25
+ end
26
+
27
+ # # Macro for specifying options to pass to omniauth
28
+ def options
29
+ @options ||= {}
30
+ end
31
+
32
+ # Returns all subclasses of OauthAuthorizer
33
+ # (Used by AuthorizationServer to build omniauth providers)
34
+ def all
35
+ ObjectSpace.each_object(::Class).select { |klass| klass < self }
36
+ end
37
+ end
38
+
39
+ attr_reader :authorization
40
+
41
+ # Create a new instance of OauthAuthorizer
42
+ def initialize(port:, credentials: {})
43
+ @port = port
44
+ @credentials = credentials
45
+ super
46
+ end
47
+
48
+ # Start up an authorization server and handle the oauth flow
49
+ def authorize!
50
+ associate_oauth_credentials
51
+ @server = load_server
52
+ spinner = TTY::Spinner.new(":spinner :title", format: :dots_2)
53
+ spinner.auto_spin
54
+ spinner.update(title: "Starting temporary authorization server on port #{@port}""")
55
+
56
+ server_thread = start_authorization_server(port: @port)
57
+ start_oauth_flow
58
+
59
+ spinner.update(title: "Waiting for authorization to complete in your browser")
60
+ sleep 0.1 while authorization_pending?(server_thread)
61
+
62
+ @server.quit!
63
+ server_thread.join
64
+ spinner.success("(#{'successful'.green})")
65
+
66
+ # TODO: properly handle failed authorizations
67
+ raise Chronicle::ETL::AuthorizationError unless @server.latest_authorization
68
+
69
+ @authorization = @server.latest_authorization
70
+
71
+ extract_secrets(authorization: @authorization, pluck_values: self.class.authorization_to_secret_map)
72
+ end
73
+
74
+ private
75
+
76
+ def authorization_pending?(server_thread)
77
+ server_thread.status && !@server.latest_authorization
78
+ end
79
+
80
+ def associate_oauth_credentials
81
+ self.class.client_id = @credentials[:client_id]
82
+ self.class.client_secret = @credentials[:client_secret]
83
+ end
84
+
85
+ def load_server
86
+ # Load at runtime so that we can set omniauth strategies based on
87
+ # which chronicle plugin has been loaded.
88
+ require_relative './authorization_server'
89
+ Chronicle::ETL::AuthorizationServer
90
+ end
91
+
92
+ def start_authorization_server(port:)
93
+ @server.settings.port = port
94
+ suppress_webrick_logging(@server)
95
+ Thread.abort_on_exception = true
96
+ Thread.report_on_exception = false
97
+
98
+ Thread.new do
99
+ @server.run!({ port: @port }) do |s|
100
+ s.silent = true if s.class.to_s == "Thin::Server"
101
+ end
102
+ end
103
+ end
104
+
105
+ def start_oauth_flow
106
+ url = "http://localhost:#{@port}/auth/#{omniauth_strategy}"
107
+ Launchy.open(url)
108
+ rescue Launchy::CommandNotFoundError
109
+ Chronicle::ETL::Logger.info("Please open #{url} in a browser to continue")
110
+ end
111
+
112
+ def suppress_webrick_logging(server)
113
+ require 'webrick'
114
+ server.set(
115
+ :server_settings,
116
+ {
117
+ AccessLog: [],
118
+ # TODO: make this windows friendly
119
+ # https://github.com/winton/stasis/commit/77da36f43285fda129300e382f18dfaff48571b0
120
+ Logger: WEBrick::Log::new("/dev/null")
121
+ }
122
+ )
123
+ rescue LoadError
124
+ # no worries if we're not using WEBrick
125
+ end
126
+
127
+ def extract_secrets(authorization:, pluck_values:)
128
+ return authorization unless pluck_values&.any?
129
+
130
+ pluck_values.each_with_object({}) do |(key, identifiers), secrets|
131
+ secrets[key] = authorization.dig(*identifiers)
132
+ end
133
+ end
134
+
135
+ def omniauth_strategy
136
+ self.class.strategy
137
+ end
138
+ end
139
+ end
140
+ end
@@ -0,0 +1,60 @@
1
+ require 'rubygems'
2
+
3
+ module Chronicle
4
+ module ETL
5
+ module Registry
6
+ # A singleton class that acts as a registry of connector classes available for ETL jobs
7
+ module Connectors
8
+ PHASES = [:extractor, :transformer, :loader].freeze
9
+ public_constant :PHASES
10
+
11
+ class << self
12
+ attr_accessor :connectors
13
+
14
+ def register(connector)
15
+ connectors << connector
16
+ end
17
+
18
+ def connectors
19
+ @connectors ||= []
20
+ end
21
+
22
+ # Find connector from amongst those currently loaded
23
+ def find_by_phase_and_identifier_local(phase, identifier)
24
+ connector = connectors.find { |c| c.phase == phase && c.identifier == identifier }
25
+ end
26
+
27
+ # Find connector and load relevant plugin to find it if necessary
28
+ def find_by_phase_and_identifier(phase, identifier)
29
+ connector = find_by_phase_and_identifier_local(phase, identifier)
30
+ return connector if connector
31
+
32
+ # if not available in built-in connectors, try to activate a
33
+ # relevant plugin and try again
34
+ if identifier.include?(":")
35
+ plugin, name = identifier.split(":")
36
+ else
37
+ # This case handles the case where the identifier is a
38
+ # shorthand (ie `imessage`) because there's only one default
39
+ # connector.
40
+ plugin = identifier
41
+ end
42
+
43
+ raise(Chronicle::ETL::PluginNotInstalledError.new(plugin)) unless Chronicle::ETL::Registry::Plugins.installed?(plugin)
44
+
45
+ Chronicle::ETL::Registry::Plugins.activate(plugin)
46
+
47
+ candidates = connectors.select { |c| c.phase == phase && c.plugin == plugin }
48
+ # if no name given, just use first connector with right phase/plugin
49
+ # TODO: set up a property for connectors to specify that they're the
50
+ # default connector for the plugin
51
+ candidates = candidates.select { |c| c.identifier == name } if name
52
+ connector = candidates.first
53
+
54
+ connector || raise(ConnectorNotAvailableError, "Connector '#{identifier}' not found")
55
+ end
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,19 @@
1
+ module Chronicle
2
+ module ETL
3
+ module Registry
4
+ class PluginRegistration
5
+ attr_accessor :name, :description, :gem, :version, :installed, :gemspec
6
+
7
+ def initialize(name=nil)
8
+ @installed = false
9
+ @name = name
10
+ yield self if block_given?
11
+ end
12
+
13
+ def installed?
14
+ @installed || false
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,163 @@
1
+ require 'rubygems'
2
+ require 'rubygems/command'
3
+ require 'rubygems/commands/install_command'
4
+ require 'rubygems/uninstaller'
5
+ require 'gems'
6
+ require 'active_support/core_ext/hash/deep_merge'
7
+
8
+ module Chronicle
9
+ module ETL
10
+ module Registry
11
+ # Responsible for managing plugins available to chronicle-etl
12
+ #
13
+ # @todo Better validation for whether a gem is actually a plugin
14
+ # @todo Add ways to load a plugin that don't require a gem on rubygems.org
15
+ module Plugins
16
+ KNOWN_PLUGINS = [
17
+ 'email',
18
+ 'foursquare',
19
+ 'github',
20
+ 'imessage',
21
+ 'pinboard',
22
+ 'safari',
23
+ 'shell',
24
+ 'spotify',
25
+ 'zulip'
26
+ ].freeze
27
+ public_constant :KNOWN_PLUGINS
28
+
29
+ # Start of a system for having non-gem plugins. Right now, we just
30
+ # make registry aware of existence of name of non-gem plugin
31
+ def self.register_standalone(name:)
32
+ plugin = Chronicle::ETL::Registry::PluginRegistration.new do |p|
33
+ p.name = name
34
+ p.installed = true
35
+ end
36
+
37
+ installed_standalone << plugin
38
+ end
39
+
40
+ # Plugins either installed as gems or manually loaded/registered
41
+ def self.installed
42
+ installed_standalone + installed_as_gem
43
+ end
44
+
45
+ # Check whether a given plugin is installed
46
+ def self.installed?(name)
47
+ installed.map(&:name).include?(name)
48
+ end
49
+
50
+ # List of plugins installed as standalone
51
+ def self.installed_standalone
52
+ @standalones ||= []
53
+ end
54
+
55
+ # List of plugins installed as gems
56
+ def self.installed_as_gem
57
+ installed_gemspecs_latest.map do |gem|
58
+ Chronicle::ETL::Registry::PluginRegistration.new do |p|
59
+ p.name = gem.name.sub("chronicle-", "")
60
+ p.gem = gem.name
61
+ p.description = gem.description
62
+ p.version = gem.version.to_s
63
+ p.installed = true
64
+ end
65
+ end
66
+ end
67
+
68
+ # List of all plugins available to chronicle-etl
69
+ def self.available
70
+ available_as_gem
71
+ end
72
+
73
+ # List of plugins available through rubygems
74
+ # TODO: make this concurrent
75
+ def self.available_as_gem
76
+ KNOWN_PLUGINS.map do |name|
77
+ info = gem_info(name)
78
+ Chronicle::ETL::Registry::PluginRegistration.new do |p|
79
+ p.name = name
80
+ p.gem = info['name']
81
+ p.version = info['version']
82
+ p.description = info['info']
83
+ end
84
+ end
85
+ end
86
+
87
+ # Load info about a gem plugin from rubygems API
88
+ def self.gem_info(name)
89
+ gem_name = "chronicle-#{name}"
90
+ Gems.info(gem_name)
91
+ end
92
+
93
+ # Union of installed gems (latest version) + available gems
94
+ def self.all
95
+ (installed + available)
96
+ .group_by(&:name)
97
+ .transform_values { |plugin| plugin.find(&:installed) || plugin.first }
98
+ .values
99
+ end
100
+
101
+ # Does a plugin with a given name exist?
102
+ def self.exists?(name)
103
+ KNOWN_PLUGINS.include?(name)
104
+ end
105
+
106
+ # All versions of all plugins currently installed
107
+ def self.installed_gemspecs
108
+ # TODO: add check for chronicle-etl dependency
109
+ Gem::Specification.filter { |s| s.name.match(/^chronicle-/) && s.name != "chronicle-etl" }
110
+ end
111
+
112
+ # Latest version of each installed plugin
113
+ def self.installed_gemspecs_latest
114
+ installed_gemspecs.group_by(&:name)
115
+ .transform_values { |versions| versions.sort_by(&:version).reverse.first }
116
+ .values
117
+ end
118
+
119
+ # Activate a plugin with given name by `require`ing it
120
+ def self.activate(name)
121
+ # By default, activates the latest available version of a gem
122
+ # so don't have to run Kernel#gem separately
123
+ require "chronicle/#{name}"
124
+ rescue Gem::ConflictError => e
125
+ # TODO: figure out if there's more we can do here
126
+ raise Chronicle::ETL::PluginConflictError.new(name), "Plugin '#{name}' couldn't be loaded. #{e.message}"
127
+ rescue StandardError, LoadError => e
128
+ # StandardError to catch random non-loading problems that might occur
129
+ # when requiring the plugin (eg class macro invoked the wrong way)
130
+ # TODO: decide if this should be separated
131
+ raise Chronicle::ETL::PluginLoadError.new(name), "Plugin '#{name}' couldn't be loaded"
132
+ end
133
+
134
+ # Install a plugin to local gems
135
+ def self.install(name)
136
+ return if installed?(name)
137
+ raise(Chronicle::ETL::PluginNotAvailableError.new(name), "Plugin #{name} doesn't exist") unless exists?(name)
138
+
139
+ gem_name = "chronicle-#{name}"
140
+
141
+ Gem::DefaultUserInteraction.ui = Gem::SilentUI.new
142
+ Gem.install(gem_name)
143
+
144
+ activate(name)
145
+ rescue Gem::UnsatisfiableDependencyError
146
+ # TODO: we need to catch a lot more than this here
147
+ raise Chronicle::ETL::PluginNotAvailableError.new(name), "Plugin #{name} could not be installed."
148
+ end
149
+
150
+ # Uninstall a plugin
151
+ def self.uninstall(name)
152
+ gem_name = "chronicle-#{name}"
153
+ Gem::DefaultUserInteraction.ui = Gem::SilentUI.new
154
+ uninstaller = Gem::Uninstaller.new(gem_name)
155
+ uninstaller.uninstall
156
+ rescue Gem::InstallError
157
+ # TODO: strengthen this exception handling
158
+ raise(Chronicle::ETL::PluginError.new(name), "Plugin #{name} wasn't uninstalled")
159
+ end
160
+ end
161
+ end
162
+ end
163
+ end
@@ -1,61 +1,12 @@
1
- require 'rubygems'
2
-
3
1
  module Chronicle
4
2
  module ETL
5
- # A singleton class that acts as a registry of connector classes available for ETL jobs
6
3
  module Registry
7
- PHASES = [:extractor, :transformer, :loader]
8
-
9
- class << self
10
- attr_accessor :connectors
11
-
12
- def register(connector)
13
- connectors << connector
14
- end
15
-
16
- def connectors
17
- @connectors ||= []
18
- end
19
-
20
- # Find connector from amongst those currently loaded
21
- def find_by_phase_and_identifier_local(phase, identifier)
22
- connector = connectors.find { |c| c.phase == phase && c.identifier == identifier }
23
- end
24
-
25
- # Find connector and load relevant plugin to find it if necessary
26
- def find_by_phase_and_identifier(phase, identifier)
27
- connector = find_by_phase_and_identifier_local(phase, identifier)
28
- return connector if connector
29
-
30
- # if not available in built-in connectors, try to activate a
31
- # relevant plugin and try again
32
- if identifier.include?(":")
33
- plugin, name = identifier.split(":")
34
- else
35
- # This case handles the case where the identifier is a
36
- # shorthand (ie `imessage`) because there's only one default
37
- # connector.
38
- plugin = identifier
39
- end
40
-
41
- raise(Chronicle::ETL::PluginNotInstalledError.new(plugin)) unless PluginRegistry.installed?(plugin)
42
-
43
- PluginRegistry.activate(plugin)
44
-
45
- candidates = connectors.select { |c| c.phase == phase && c.plugin == plugin }
46
- # if no name given, just use first connector with right phase/plugin
47
- # TODO: set up a property for connectors to specify that they're the
48
- # default connector for the plugin
49
- candidates = candidates.select { |c| c.identifier == name } if name
50
- connector = candidates.first
51
-
52
- connector || raise(ConnectorNotAvailableError, "Connector '#{identifier}' not found")
53
- end
54
- end
55
4
  end
56
5
  end
57
6
  end
58
7
 
59
8
  require_relative 'self_registering'
60
9
  require_relative 'connector_registration'
61
- require_relative 'plugin_registry'
10
+ require_relative 'connectors'
11
+ require_relative 'plugin_registration'
12
+ require_relative 'plugins'
@@ -17,7 +17,7 @@ module Chronicle
17
17
  def register_connector
18
18
  @connector_registration ||= ::Chronicle::ETL::Registry::ConnectorRegistration.new(self)
19
19
  yield @connector_registration if block_given?
20
- ::Chronicle::ETL::Registry.register(@connector_registration)
20
+ ::Chronicle::ETL::Registry::Connectors.register(@connector_registration)
21
21
  end
22
22
  end
23
23
  end