chronicle-etl 0.5.2 → 0.5.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -46,7 +46,7 @@ module Chronicle
46
46
  all_secrets.each do |namespace, secrets|
47
47
  rows += secrets.map do |key, value|
48
48
  # hidden_value = (value[0..5] + ("*" * [0, [value.length - 5, 30].min].max)).truncate(30)
49
- truncated_value = value.truncate(30)
49
+ truncated_value = value&.truncate(30)
50
50
  [namespace, key, truncated_value]
51
51
  end
52
52
  end
@@ -4,6 +4,7 @@ require 'chronicle/etl'
4
4
 
5
5
  require 'chronicle/etl/cli/cli_base'
6
6
  require 'chronicle/etl/cli/subcommand_base'
7
+ require 'chronicle/etl/cli/authorizations'
7
8
  require 'chronicle/etl/cli/connectors'
8
9
  require 'chronicle/etl/cli/jobs'
9
10
  require 'chronicle/etl/cli/plugins'
@@ -1,3 +1,4 @@
1
+ require "active_support/core_ext/hash/keys"
1
2
  require 'fileutils'
2
3
  require 'yaml'
3
4
 
@@ -21,6 +22,8 @@ module Chronicle
21
22
  def write(type, identifier, data)
22
23
  base = config_pathname_for_type(type)
23
24
  path = base.join("#{identifier}.yml")
25
+
26
+ data.deep_stringify_keys!
24
27
  FileUtils.mkdir_p(File.dirname(path))
25
28
  File.open(path, 'w', 0o600) do |f|
26
29
  # Ruby likes to add --- separators when writing yaml files
@@ -108,6 +108,10 @@ module Chronicle
108
108
  end
109
109
 
110
110
  def coerce_time(value)
111
+ # parsing yml files might result in us getting Date objects
112
+ # we convert to DateTime first to to ensure UTC
113
+ return value.to_datetime.to_time if value.is_a?(Date)
114
+
111
115
  return value unless value.is_a?(String)
112
116
 
113
117
  # Hacky check for duration strings like "60m"
@@ -4,8 +4,13 @@ module Chronicle
4
4
 
5
5
  class SecretsError < Error; end
6
6
 
7
+ class AuthorizationError < Error; end
8
+
7
9
  class ConfigError < Error; end
8
10
 
11
+ class RunnerError < Error; end
12
+ class RunInterruptedError < RunnerError; end
13
+
9
14
  class RunnerTypeError < Error; end
10
15
 
11
16
  class JobDefinitionError < Error
@@ -34,7 +34,7 @@ module Chronicle
34
34
  def validate
35
35
  @errors = {}
36
36
 
37
- Chronicle::ETL::Registry::PHASES.each do |phase|
37
+ Chronicle::ETL::Registry::Connectors::PHASES.each do |phase|
38
38
  __send__("#{phase}_klass".to_sym)
39
39
  rescue Chronicle::ETL::PluginError => e
40
40
  @errors[:plugins] ||= []
@@ -66,7 +66,7 @@ module Chronicle
66
66
 
67
67
  # For each connector in this job, mix in secrets into the options
68
68
  def apply_default_secrets
69
- Chronicle::ETL::Registry::PHASES.each do |phase|
69
+ Chronicle::ETL::Registry::Connectors::PHASES.each do |phase|
70
70
  # If the option have a `secrets` key, we look up those secrets and
71
71
  # mix them in. If not, use the connector's plugin name and look up
72
72
  # secrets with the same namespace
@@ -124,11 +124,11 @@ module Chronicle
124
124
  private
125
125
 
126
126
  def load_klass(phase, identifier)
127
- Chronicle::ETL::Registry.find_by_phase_and_identifier(phase, identifier).klass
127
+ Chronicle::ETL::Registry::Connectors.find_by_phase_and_identifier(phase, identifier).klass
128
128
  end
129
129
 
130
130
  def load_credentials
131
- Chronicle::ETL::Registry::PHASES.each do |phase|
131
+ Chronicle::ETL::Registry::Connectors::PHASES.each do |phase|
132
132
  credentials_name = @definition[phase].dig(:options, :credentials)
133
133
  if credentials_name
134
134
  credentials = Chronicle::ETL::Config.load_credentials(credentials_name)
@@ -17,8 +17,8 @@ module Chronicle
17
17
  def output message, level
18
18
  return unless level >= @log_level
19
19
 
20
- if @progress_bar
21
- @progress_bar.log(message)
20
+ if @ui_element
21
+ @ui_element.log(message)
22
22
  else
23
23
  $stderr.puts(message)
24
24
  end
@@ -40,12 +40,12 @@ module Chronicle
40
40
  output(message, DEBUG)
41
41
  end
42
42
 
43
- def attach_to_progress_bar(progress_bar)
44
- @progress_bar = progress_bar
43
+ def attach_to_ui(ui_element)
44
+ @ui_element = ui_element
45
45
  end
46
46
 
47
- def detach_from_progress_bar
48
- @progress_bar = nil
47
+ def detach_from_ui
48
+ @ui_element = nil
49
49
  end
50
50
  end
51
51
  end
@@ -9,7 +9,7 @@ module Chronicle
9
9
  # @todo Experiment with just mixing in ActiveModel instead of this
10
10
  # this reimplementation
11
11
  class Base
12
- ATTRIBUTES = [:provider, :provider_id, :lat, :lng, :metadata].freeze
12
+ ATTRIBUTES = [:provider, :provider_id, :provider_namespace, :lat, :lng, :metadata].freeze
13
13
  ASSOCIATIONS = [].freeze
14
14
 
15
15
  attr_accessor(:id, :dedupe_on, *ATTRIBUTES)
@@ -10,12 +10,14 @@ module Chronicle
10
10
  # TODO: This desperately needs a validation system
11
11
  ASSOCIATIONS = [
12
12
  :involvements, # inverse of activity's `involved`
13
-
13
+ :analogous,
14
14
  :attachments,
15
15
  :abouts,
16
16
  :aboutables, # inverse of above
17
17
  :depicts,
18
18
  :consumers,
19
+ :creators,
20
+ :creations,
19
21
  :contains,
20
22
  :containers # inverse of above
21
23
  ].freeze # TODO: add these to reflect Chronicle Schema
@@ -0,0 +1,140 @@
1
+ require 'omniauth'
2
+ require 'tty-spinner'
3
+
4
+ module Chronicle
5
+ module ETL
6
+ # An authorization strategy that uses oauth2 (and omniauth under the hood)
7
+ class OauthAuthorizer < Authorizer
8
+ class << self
9
+ attr_reader :strategy, :provider_name, :authorization_to_secret_map
10
+ attr_accessor :client_id, :client_secret
11
+
12
+ # Macro for specifying which omniauth strategy to use
13
+ def omniauth_strategy(strategy)
14
+ @strategy = strategy
15
+ end
16
+
17
+ # Macro for specifying which omniauth scopes to request
18
+ def scope(value)
19
+ options[:scope] = value
20
+ end
21
+
22
+ # Macro for specifying hash of returned authorization to secrets hash
23
+ def pluck_secrets(map)
24
+ @authorization_to_secret_map = map
25
+ end
26
+
27
+ # # Macro for specifying options to pass to omniauth
28
+ def options
29
+ @options ||= {}
30
+ end
31
+
32
+ # Returns all subclasses of OauthAuthorizer
33
+ # (Used by AuthorizationServer to build omniauth providers)
34
+ def all
35
+ ObjectSpace.each_object(::Class).select { |klass| klass < self }
36
+ end
37
+ end
38
+
39
+ attr_reader :authorization
40
+
41
+ # Create a new instance of OauthAuthorizer
42
+ def initialize(port:, credentials: {})
43
+ @port = port
44
+ @credentials = credentials
45
+ super
46
+ end
47
+
48
+ # Start up an authorization server and handle the oauth flow
49
+ def authorize!
50
+ associate_oauth_credentials
51
+ @server = load_server
52
+ spinner = TTY::Spinner.new(":spinner :title", format: :dots_2)
53
+ spinner.auto_spin
54
+ spinner.update(title: "Starting temporary authorization server on port #{@port}""")
55
+
56
+ server_thread = start_authorization_server(port: @port)
57
+ start_oauth_flow
58
+
59
+ spinner.update(title: "Waiting for authorization to complete in your browser")
60
+ sleep 0.1 while authorization_pending?(server_thread)
61
+
62
+ @server.quit!
63
+ server_thread.join
64
+ spinner.success("(#{'successful'.green})")
65
+
66
+ # TODO: properly handle failed authorizations
67
+ raise Chronicle::ETL::AuthorizationError unless @server.latest_authorization
68
+
69
+ @authorization = @server.latest_authorization
70
+
71
+ extract_secrets(authorization: @authorization, pluck_values: self.class.authorization_to_secret_map)
72
+ end
73
+
74
+ private
75
+
76
+ def authorization_pending?(server_thread)
77
+ server_thread.status && !@server.latest_authorization
78
+ end
79
+
80
+ def associate_oauth_credentials
81
+ self.class.client_id = @credentials[:client_id]
82
+ self.class.client_secret = @credentials[:client_secret]
83
+ end
84
+
85
+ def load_server
86
+ # Load at runtime so that we can set omniauth strategies based on
87
+ # which chronicle plugin has been loaded.
88
+ require_relative './authorization_server'
89
+ Chronicle::ETL::AuthorizationServer
90
+ end
91
+
92
+ def start_authorization_server(port:)
93
+ @server.settings.port = port
94
+ suppress_webrick_logging(@server)
95
+ Thread.abort_on_exception = true
96
+ Thread.report_on_exception = false
97
+
98
+ Thread.new do
99
+ @server.run!({ port: @port }) do |s|
100
+ s.silent = true if s.class.to_s == "Thin::Server"
101
+ end
102
+ end
103
+ end
104
+
105
+ def start_oauth_flow
106
+ url = "http://localhost:#{@port}/auth/#{omniauth_strategy}"
107
+ Launchy.open(url)
108
+ rescue Launchy::CommandNotFoundError
109
+ Chronicle::ETL::Logger.info("Please open #{url} in a browser to continue")
110
+ end
111
+
112
+ def suppress_webrick_logging(server)
113
+ require 'webrick'
114
+ server.set(
115
+ :server_settings,
116
+ {
117
+ AccessLog: [],
118
+ # TODO: make this windows friendly
119
+ # https://github.com/winton/stasis/commit/77da36f43285fda129300e382f18dfaff48571b0
120
+ Logger: WEBrick::Log::new("/dev/null")
121
+ }
122
+ )
123
+ rescue LoadError
124
+ # no worries if we're not using WEBrick
125
+ end
126
+
127
+ def extract_secrets(authorization:, pluck_values:)
128
+ return authorization unless pluck_values&.any?
129
+
130
+ pluck_values.each_with_object({}) do |(key, identifiers), secrets|
131
+ secrets[key] = authorization.dig(*identifiers)
132
+ end
133
+ end
134
+
135
+ def omniauth_strategy
136
+ self.class.strategy
137
+ end
138
+ end
139
+ end
140
+ end
@@ -0,0 +1,60 @@
1
+ require 'rubygems'
2
+
3
+ module Chronicle
4
+ module ETL
5
+ module Registry
6
+ # A singleton class that acts as a registry of connector classes available for ETL jobs
7
+ module Connectors
8
+ PHASES = [:extractor, :transformer, :loader].freeze
9
+ public_constant :PHASES
10
+
11
+ class << self
12
+ attr_accessor :connectors
13
+
14
+ def register(connector)
15
+ connectors << connector
16
+ end
17
+
18
+ def connectors
19
+ @connectors ||= []
20
+ end
21
+
22
+ # Find connector from amongst those currently loaded
23
+ def find_by_phase_and_identifier_local(phase, identifier)
24
+ connector = connectors.find { |c| c.phase == phase && c.identifier == identifier }
25
+ end
26
+
27
+ # Find connector and load relevant plugin to find it if necessary
28
+ def find_by_phase_and_identifier(phase, identifier)
29
+ connector = find_by_phase_and_identifier_local(phase, identifier)
30
+ return connector if connector
31
+
32
+ # if not available in built-in connectors, try to activate a
33
+ # relevant plugin and try again
34
+ if identifier.include?(":")
35
+ plugin, name = identifier.split(":")
36
+ else
37
+ # This case handles the case where the identifier is a
38
+ # shorthand (ie `imessage`) because there's only one default
39
+ # connector.
40
+ plugin = identifier
41
+ end
42
+
43
+ raise(Chronicle::ETL::PluginNotInstalledError.new(plugin)) unless Chronicle::ETL::Registry::Plugins.installed?(plugin)
44
+
45
+ Chronicle::ETL::Registry::Plugins.activate(plugin)
46
+
47
+ candidates = connectors.select { |c| c.phase == phase && c.plugin == plugin }
48
+ # if no name given, just use first connector with right phase/plugin
49
+ # TODO: set up a property for connectors to specify that they're the
50
+ # default connector for the plugin
51
+ candidates = candidates.select { |c| c.identifier == name } if name
52
+ connector = candidates.first
53
+
54
+ connector || raise(ConnectorNotAvailableError, "Connector '#{identifier}' not found")
55
+ end
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,19 @@
1
+ module Chronicle
2
+ module ETL
3
+ module Registry
4
+ class PluginRegistration
5
+ attr_accessor :name, :description, :gem, :version, :installed, :gemspec
6
+
7
+ def initialize(name=nil)
8
+ @installed = false
9
+ @name = name
10
+ yield self if block_given?
11
+ end
12
+
13
+ def installed?
14
+ @installed || false
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,163 @@
1
+ require 'rubygems'
2
+ require 'rubygems/command'
3
+ require 'rubygems/commands/install_command'
4
+ require 'rubygems/uninstaller'
5
+ require 'gems'
6
+ require 'active_support/core_ext/hash/deep_merge'
7
+
8
+ module Chronicle
9
+ module ETL
10
+ module Registry
11
+ # Responsible for managing plugins available to chronicle-etl
12
+ #
13
+ # @todo Better validation for whether a gem is actually a plugin
14
+ # @todo Add ways to load a plugin that don't require a gem on rubygems.org
15
+ module Plugins
16
+ KNOWN_PLUGINS = [
17
+ 'email',
18
+ 'foursquare',
19
+ 'github',
20
+ 'imessage',
21
+ 'pinboard',
22
+ 'safari',
23
+ 'shell',
24
+ 'spotify',
25
+ 'zulip'
26
+ ].freeze
27
+ public_constant :KNOWN_PLUGINS
28
+
29
+ # Start of a system for having non-gem plugins. Right now, we just
30
+ # make registry aware of existence of name of non-gem plugin
31
+ def self.register_standalone(name:)
32
+ plugin = Chronicle::ETL::Registry::PluginRegistration.new do |p|
33
+ p.name = name
34
+ p.installed = true
35
+ end
36
+
37
+ installed_standalone << plugin
38
+ end
39
+
40
+ # Plugins either installed as gems or manually loaded/registered
41
+ def self.installed
42
+ installed_standalone + installed_as_gem
43
+ end
44
+
45
+ # Check whether a given plugin is installed
46
+ def self.installed?(name)
47
+ installed.map(&:name).include?(name)
48
+ end
49
+
50
+ # List of plugins installed as standalone
51
+ def self.installed_standalone
52
+ @standalones ||= []
53
+ end
54
+
55
+ # List of plugins installed as gems
56
+ def self.installed_as_gem
57
+ installed_gemspecs_latest.map do |gem|
58
+ Chronicle::ETL::Registry::PluginRegistration.new do |p|
59
+ p.name = gem.name.sub("chronicle-", "")
60
+ p.gem = gem.name
61
+ p.description = gem.description
62
+ p.version = gem.version.to_s
63
+ p.installed = true
64
+ end
65
+ end
66
+ end
67
+
68
+ # List of all plugins available to chronicle-etl
69
+ def self.available
70
+ available_as_gem
71
+ end
72
+
73
+ # List of plugins available through rubygems
74
+ # TODO: make this concurrent
75
+ def self.available_as_gem
76
+ KNOWN_PLUGINS.map do |name|
77
+ info = gem_info(name)
78
+ Chronicle::ETL::Registry::PluginRegistration.new do |p|
79
+ p.name = name
80
+ p.gem = info['name']
81
+ p.version = info['version']
82
+ p.description = info['info']
83
+ end
84
+ end
85
+ end
86
+
87
+ # Load info about a gem plugin from rubygems API
88
+ def self.gem_info(name)
89
+ gem_name = "chronicle-#{name}"
90
+ Gems.info(gem_name)
91
+ end
92
+
93
+ # Union of installed gems (latest version) + available gems
94
+ def self.all
95
+ (installed + available)
96
+ .group_by(&:name)
97
+ .transform_values { |plugin| plugin.find(&:installed) || plugin.first }
98
+ .values
99
+ end
100
+
101
+ # Does a plugin with a given name exist?
102
+ def self.exists?(name)
103
+ KNOWN_PLUGINS.include?(name)
104
+ end
105
+
106
+ # All versions of all plugins currently installed
107
+ def self.installed_gemspecs
108
+ # TODO: add check for chronicle-etl dependency
109
+ Gem::Specification.filter { |s| s.name.match(/^chronicle-/) && s.name != "chronicle-etl" }
110
+ end
111
+
112
+ # Latest version of each installed plugin
113
+ def self.installed_gemspecs_latest
114
+ installed_gemspecs.group_by(&:name)
115
+ .transform_values { |versions| versions.sort_by(&:version).reverse.first }
116
+ .values
117
+ end
118
+
119
+ # Activate a plugin with given name by `require`ing it
120
+ def self.activate(name)
121
+ # By default, activates the latest available version of a gem
122
+ # so don't have to run Kernel#gem separately
123
+ require "chronicle/#{name}"
124
+ rescue Gem::ConflictError => e
125
+ # TODO: figure out if there's more we can do here
126
+ raise Chronicle::ETL::PluginConflictError.new(name), "Plugin '#{name}' couldn't be loaded. #{e.message}"
127
+ rescue StandardError, LoadError => e
128
+ # StandardError to catch random non-loading problems that might occur
129
+ # when requiring the plugin (eg class macro invoked the wrong way)
130
+ # TODO: decide if this should be separated
131
+ raise Chronicle::ETL::PluginLoadError.new(name), "Plugin '#{name}' couldn't be loaded"
132
+ end
133
+
134
+ # Install a plugin to local gems
135
+ def self.install(name)
136
+ return if installed?(name)
137
+ raise(Chronicle::ETL::PluginNotAvailableError.new(name), "Plugin #{name} doesn't exist") unless exists?(name)
138
+
139
+ gem_name = "chronicle-#{name}"
140
+
141
+ Gem::DefaultUserInteraction.ui = Gem::SilentUI.new
142
+ Gem.install(gem_name)
143
+
144
+ activate(name)
145
+ rescue Gem::UnsatisfiableDependencyError
146
+ # TODO: we need to catch a lot more than this here
147
+ raise Chronicle::ETL::PluginNotAvailableError.new(name), "Plugin #{name} could not be installed."
148
+ end
149
+
150
+ # Uninstall a plugin
151
+ def self.uninstall(name)
152
+ gem_name = "chronicle-#{name}"
153
+ Gem::DefaultUserInteraction.ui = Gem::SilentUI.new
154
+ uninstaller = Gem::Uninstaller.new(gem_name)
155
+ uninstaller.uninstall
156
+ rescue Gem::InstallError
157
+ # TODO: strengthen this exception handling
158
+ raise(Chronicle::ETL::PluginError.new(name), "Plugin #{name} wasn't uninstalled")
159
+ end
160
+ end
161
+ end
162
+ end
163
+ end
@@ -1,61 +1,12 @@
1
- require 'rubygems'
2
-
3
1
  module Chronicle
4
2
  module ETL
5
- # A singleton class that acts as a registry of connector classes available for ETL jobs
6
3
  module Registry
7
- PHASES = [:extractor, :transformer, :loader]
8
-
9
- class << self
10
- attr_accessor :connectors
11
-
12
- def register(connector)
13
- connectors << connector
14
- end
15
-
16
- def connectors
17
- @connectors ||= []
18
- end
19
-
20
- # Find connector from amongst those currently loaded
21
- def find_by_phase_and_identifier_local(phase, identifier)
22
- connector = connectors.find { |c| c.phase == phase && c.identifier == identifier }
23
- end
24
-
25
- # Find connector and load relevant plugin to find it if necessary
26
- def find_by_phase_and_identifier(phase, identifier)
27
- connector = find_by_phase_and_identifier_local(phase, identifier)
28
- return connector if connector
29
-
30
- # if not available in built-in connectors, try to activate a
31
- # relevant plugin and try again
32
- if identifier.include?(":")
33
- plugin, name = identifier.split(":")
34
- else
35
- # This case handles the case where the identifier is a
36
- # shorthand (ie `imessage`) because there's only one default
37
- # connector.
38
- plugin = identifier
39
- end
40
-
41
- raise(Chronicle::ETL::PluginNotInstalledError.new(plugin)) unless PluginRegistry.installed?(plugin)
42
-
43
- PluginRegistry.activate(plugin)
44
-
45
- candidates = connectors.select { |c| c.phase == phase && c.plugin == plugin }
46
- # if no name given, just use first connector with right phase/plugin
47
- # TODO: set up a property for connectors to specify that they're the
48
- # default connector for the plugin
49
- candidates = candidates.select { |c| c.identifier == name } if name
50
- connector = candidates.first
51
-
52
- connector || raise(ConnectorNotAvailableError, "Connector '#{identifier}' not found")
53
- end
54
- end
55
4
  end
56
5
  end
57
6
  end
58
7
 
59
8
  require_relative 'self_registering'
60
9
  require_relative 'connector_registration'
61
- require_relative 'plugin_registry'
10
+ require_relative 'connectors'
11
+ require_relative 'plugin_registration'
12
+ require_relative 'plugins'
@@ -17,7 +17,7 @@ module Chronicle
17
17
  def register_connector
18
18
  @connector_registration ||= ::Chronicle::ETL::Registry::ConnectorRegistration.new(self)
19
19
  yield @connector_registration if block_given?
20
- ::Chronicle::ETL::Registry.register(@connector_registration)
20
+ ::Chronicle::ETL::Registry::Connectors.register(@connector_registration)
21
21
  end
22
22
  end
23
23
  end