chronicle-etl 0.5.5 → 0.6.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +15 -25
- data/.rubocop.yml +2 -44
- data/Gemfile +2 -2
- data/Guardfile +3 -3
- data/README.md +75 -68
- data/Rakefile +2 -2
- data/bin/console +4 -5
- data/chronicle-etl.gemspec +51 -49
- data/exe/chronicle-etl +1 -1
- data/lib/chronicle/etl/authorizer.rb +3 -4
- data/lib/chronicle/etl/cli/authorizations.rb +8 -6
- data/lib/chronicle/etl/cli/connectors.rb +7 -7
- data/lib/chronicle/etl/cli/jobs.rb +130 -53
- data/lib/chronicle/etl/cli/main.rb +29 -29
- data/lib/chronicle/etl/cli/plugins.rb +14 -15
- data/lib/chronicle/etl/cli/secrets.rb +14 -12
- data/lib/chronicle/etl/cli/subcommand_base.rb +5 -3
- data/lib/chronicle/etl/config.rb +18 -8
- data/lib/chronicle/etl/configurable.rb +20 -9
- data/lib/chronicle/etl/exceptions.rb +3 -3
- data/lib/chronicle/etl/extraction.rb +12 -2
- data/lib/chronicle/etl/extractors/csv_extractor.rb +9 -0
- data/lib/chronicle/etl/extractors/extractor.rb +15 -2
- data/lib/chronicle/etl/extractors/file_extractor.rb +5 -3
- data/lib/chronicle/etl/extractors/helpers/input_reader.rb +2 -2
- data/lib/chronicle/etl/extractors/json_extractor.rb +14 -4
- data/lib/chronicle/etl/extractors/stdin_extractor.rb +3 -0
- data/lib/chronicle/etl/job.rb +35 -17
- data/lib/chronicle/etl/job_definition.rb +38 -26
- data/lib/chronicle/etl/job_log.rb +14 -16
- data/lib/chronicle/etl/job_logger.rb +4 -4
- data/lib/chronicle/etl/loaders/csv_loader.rb +17 -4
- data/lib/chronicle/etl/loaders/helpers/stdout_helper.rb +4 -0
- data/lib/chronicle/etl/loaders/json_loader.rb +30 -10
- data/lib/chronicle/etl/loaders/loader.rb +0 -17
- data/lib/chronicle/etl/loaders/rest_loader.rb +7 -7
- data/lib/chronicle/etl/loaders/table_loader.rb +37 -12
- data/lib/chronicle/etl/logger.rb +2 -2
- data/lib/chronicle/etl/oauth_authorizer.rb +8 -8
- data/lib/chronicle/etl/record.rb +15 -0
- data/lib/chronicle/etl/registry/connector_registration.rb +15 -23
- data/lib/chronicle/etl/registry/connectors.rb +93 -36
- data/lib/chronicle/etl/registry/plugin_registration.rb +1 -1
- data/lib/chronicle/etl/registry/plugins.rb +27 -19
- data/lib/chronicle/etl/runner.rb +158 -128
- data/lib/chronicle/etl/secrets.rb +4 -4
- data/lib/chronicle/etl/transformers/buffer_transformer.rb +29 -0
- data/lib/chronicle/etl/transformers/chronicle_transformer.rb +32 -0
- data/lib/chronicle/etl/transformers/chronobase_transformer.rb +100 -0
- data/lib/chronicle/etl/transformers/fields_limit_transformer.rb +23 -0
- data/lib/chronicle/etl/transformers/filter_fields_transformer.rb +60 -0
- data/lib/chronicle/etl/transformers/filter_transformer.rb +30 -0
- data/lib/chronicle/etl/transformers/format_transformer.rb +32 -0
- data/lib/chronicle/etl/transformers/merge_meta_transformer.rb +19 -0
- data/lib/chronicle/etl/transformers/multiply_transformer.rb +21 -0
- data/lib/chronicle/etl/transformers/null_transformer.rb +5 -7
- data/lib/chronicle/etl/transformers/sampler_transformer.rb +21 -0
- data/lib/chronicle/etl/transformers/sort_transformer.rb +31 -0
- data/lib/chronicle/etl/transformers/transformer.rb +63 -41
- data/lib/chronicle/etl/utils/binary_attachments.rb +1 -1
- data/lib/chronicle/etl/utils/progress_bar.rb +2 -3
- data/lib/chronicle/etl/version.rb +1 -1
- data/lib/chronicle/etl.rb +6 -8
- metadata +49 -47
- data/lib/chronicle/etl/models/activity.rb +0 -15
- data/lib/chronicle/etl/models/attachment.rb +0 -14
- data/lib/chronicle/etl/models/base.rb +0 -122
- data/lib/chronicle/etl/models/entity.rb +0 -29
- data/lib/chronicle/etl/models/raw.rb +0 -26
- data/lib/chronicle/etl/serializers/jsonapi_serializer.rb +0 -31
- data/lib/chronicle/etl/serializers/raw_serializer.rb +0 -10
- data/lib/chronicle/etl/serializers/serializer.rb +0 -28
- data/lib/chronicle/etl/transformers/image_file_transformer.rb +0 -247
- data/lib/chronicle/etl/utils/hash_utilities.rb +0 -19
- data/lib/chronicle/etl/utils/text_recognition.rb +0 -15
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'rubygems'
|
2
4
|
|
3
5
|
module Chronicle
|
@@ -5,54 +7,109 @@ module Chronicle
|
|
5
7
|
module Registry
|
6
8
|
# A singleton class that acts as a registry of connector classes available for ETL jobs
|
7
9
|
module Connectors
|
8
|
-
PHASES = [
|
10
|
+
PHASES = %i[extractor transformer loader].freeze
|
9
11
|
public_constant :PHASES
|
10
12
|
|
11
13
|
class << self
|
12
14
|
attr_accessor :connectors
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.register(connector)
|
18
|
+
connectors << connector
|
19
|
+
end
|
13
20
|
|
14
|
-
|
15
|
-
|
21
|
+
def self.connectors
|
22
|
+
@connectors ||= []
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.ancestor_for_phase(phase)
|
26
|
+
case phase
|
27
|
+
when :extractor
|
28
|
+
Chronicle::ETL::Extractor
|
29
|
+
when :transformer
|
30
|
+
Chronicle::ETL::Transformer
|
31
|
+
when :loader
|
32
|
+
Chronicle::ETL::Loader
|
16
33
|
end
|
34
|
+
end
|
17
35
|
|
18
|
-
|
19
|
-
|
36
|
+
def self.find_converter_for_source(source:, type: nil, strategy: nil, target: nil)
|
37
|
+
# FIXME: we're assuming extractor plugin has been loaded already
|
38
|
+
# This may not be the case if the schema converter is running
|
39
|
+
# off a json dump off extraction data.
|
40
|
+
# plugin = source_klass.connector_registration.source
|
41
|
+
# type = source_klass.connector_registration.type
|
42
|
+
# strategy = source_klass.connector_registration.strategy
|
43
|
+
|
44
|
+
connectors.find do |c|
|
45
|
+
c.phase == :transformer &&
|
46
|
+
c.source == source &&
|
47
|
+
(type.nil? || c.type == type) &&
|
48
|
+
(strategy.nil? || c.strategy == strategy || c.strategy.nil?) &&
|
49
|
+
(target.nil? || c.to_schema == target)
|
20
50
|
end
|
51
|
+
end
|
52
|
+
|
53
|
+
# Find connector from amongst those currently loaded
|
54
|
+
def self.find_by_phase_and_identifier_built_in(phase, identifier)
|
55
|
+
connectors.find { |c| c.phase == phase.to_sym && c.identifier == identifier.to_sym }
|
56
|
+
end
|
57
|
+
|
58
|
+
# Find connector and load relevant plugin to find it if necessary
|
59
|
+
def self.find_by_phase_and_identifier(phase, identifier)
|
60
|
+
connector = find_by_phase_and_identifier_built_in(phase, identifier)
|
61
|
+
return connector if connector
|
62
|
+
|
63
|
+
# determine if we need to try to load a local file. if it has a dot in the identifier, we treat it as a file
|
64
|
+
return find_by_phase_and_identifier_local(phase, identifier) if identifier.to_s.include?('.')
|
65
|
+
|
66
|
+
# Example identifier: lastfm:listens:api
|
67
|
+
plugin, type, strategy = identifier.split(':')
|
68
|
+
.map { |part| part.gsub('-', '_') }
|
69
|
+
.map(&:to_sym)
|
21
70
|
|
22
|
-
|
23
|
-
|
24
|
-
|
71
|
+
plugin_identifier = plugin.to_s.gsub('_', '-')
|
72
|
+
|
73
|
+
unless Chronicle::ETL::Registry::Plugins.installed?(plugin_identifier)
|
74
|
+
raise Chronicle::ETL::PluginNotInstalledError, plugin_identifier
|
25
75
|
end
|
26
76
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
# if not available in built-in connectors, try to activate a
|
33
|
-
# relevant plugin and try again
|
34
|
-
if identifier.include?(":")
|
35
|
-
plugin, name = identifier.split(":")
|
36
|
-
else
|
37
|
-
# This case handles the case where the identifier is a
|
38
|
-
# shorthand (ie `imessage`) because there's only one default
|
39
|
-
# connector.
|
40
|
-
plugin = identifier
|
41
|
-
end
|
42
|
-
|
43
|
-
raise(Chronicle::ETL::PluginNotInstalledError.new(plugin)) unless Chronicle::ETL::Registry::Plugins.installed?(plugin)
|
44
|
-
|
45
|
-
Chronicle::ETL::Registry::Plugins.activate(plugin)
|
46
|
-
|
47
|
-
candidates = connectors.select { |c| c.phase == phase && c.plugin == plugin }
|
48
|
-
# if no name given, just use first connector with right phase/plugin
|
49
|
-
# TODO: set up a property for connectors to specify that they're the
|
50
|
-
# default connector for the plugin
|
51
|
-
candidates = candidates.select { |c| c.identifier == name } if name
|
52
|
-
connector = candidates.first
|
53
|
-
|
54
|
-
connector || raise(ConnectorNotAvailableError, "Connector '#{identifier}' not found")
|
77
|
+
Chronicle::ETL::Registry::Plugins.activate(plugin_identifier)
|
78
|
+
|
79
|
+
# find most specific connector that matches the identifier
|
80
|
+
connector = connectors.find do |c|
|
81
|
+
c.plugin == plugin && (type.nil? || c.type == type) && (strategy.nil? || c.strategy == strategy)
|
55
82
|
end
|
83
|
+
|
84
|
+
connector || raise(ConnectorNotAvailableError, "Connector '#{identifier}' not found")
|
85
|
+
end
|
86
|
+
|
87
|
+
# Load a plugin from local file system
|
88
|
+
def self.find_by_phase_and_identifier_local(phase, identifier)
|
89
|
+
script = File.read(identifier)
|
90
|
+
raise ConnectorNotAvailableError, "Connector '#{identifier}' not found" if script.nil?
|
91
|
+
|
92
|
+
# load the file by evaluating the contents
|
93
|
+
eval(script, TOPLEVEL_BINDING, __FILE__, __LINE__) # rubocop:disable Security/Eval
|
94
|
+
|
95
|
+
# read the file and look for all class definitions in the ruby script.
|
96
|
+
class_names = script.scan(/class (\w+)/).flatten
|
97
|
+
|
98
|
+
class_names.each do |class_name|
|
99
|
+
klass = Object.const_get(class_name)
|
100
|
+
|
101
|
+
next unless klass.ancestors.include?(ancestor_for_phase(phase))
|
102
|
+
|
103
|
+
registration = ::Chronicle::ETL::Registry::ConnectorRegistration.new(klass)
|
104
|
+
|
105
|
+
klass.connector_registration = registration
|
106
|
+
return registration
|
107
|
+
# return klass
|
108
|
+
rescue NameError
|
109
|
+
# ignore
|
110
|
+
end
|
111
|
+
|
112
|
+
raise ConnectorNotAvailableError, "Connector '#{identifier}' not found"
|
56
113
|
end
|
57
114
|
end
|
58
115
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'rubygems'
|
2
4
|
require 'rubygems/command'
|
3
5
|
require 'rubygems/commands/install_command'
|
@@ -13,16 +15,17 @@ module Chronicle
|
|
13
15
|
# @todo Better validation for whether a gem is actually a plugin
|
14
16
|
# @todo Add ways to load a plugin that don't require a gem on rubygems.org
|
15
17
|
module Plugins
|
16
|
-
KNOWN_PLUGINS = [
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
18
|
+
KNOWN_PLUGINS = %w[
|
19
|
+
apple-podcasts
|
20
|
+
email
|
21
|
+
foursquare
|
22
|
+
github
|
23
|
+
imessage
|
24
|
+
pinboard
|
25
|
+
safari
|
26
|
+
shell
|
27
|
+
spotify
|
28
|
+
zulip
|
26
29
|
].freeze
|
27
30
|
public_constant :KNOWN_PLUGINS
|
28
31
|
|
@@ -30,7 +33,7 @@ module Chronicle
|
|
30
33
|
# make registry aware of existence of name of non-gem plugin
|
31
34
|
def self.register_standalone(name:)
|
32
35
|
plugin = Chronicle::ETL::Registry::PluginRegistration.new do |p|
|
33
|
-
p.name = name
|
36
|
+
p.name = name.to_sym
|
34
37
|
p.installed = true
|
35
38
|
end
|
36
39
|
|
@@ -44,19 +47,19 @@ module Chronicle
|
|
44
47
|
|
45
48
|
# Check whether a given plugin is installed
|
46
49
|
def self.installed?(name)
|
47
|
-
installed.map(&:name).include?(name)
|
50
|
+
installed.map(&:name).include?(name.to_sym)
|
48
51
|
end
|
49
52
|
|
50
53
|
# List of plugins installed as standalone
|
51
54
|
def self.installed_standalone
|
52
|
-
@
|
55
|
+
@installed_standalone ||= []
|
53
56
|
end
|
54
57
|
|
55
58
|
# List of plugins installed as gems
|
56
59
|
def self.installed_as_gem
|
57
60
|
installed_gemspecs_latest.map do |gem|
|
58
61
|
Chronicle::ETL::Registry::PluginRegistration.new do |p|
|
59
|
-
p.name = gem.name.sub(
|
62
|
+
p.name = gem.name.sub('chronicle-', '').to_sym
|
60
63
|
p.gem = gem.name
|
61
64
|
p.description = gem.description
|
62
65
|
p.version = gem.version.to_s
|
@@ -106,7 +109,9 @@ module Chronicle
|
|
106
109
|
# All versions of all plugins currently installed
|
107
110
|
def self.installed_gemspecs
|
108
111
|
# TODO: add check for chronicle-etl dependency
|
109
|
-
Gem::Specification.filter
|
112
|
+
Gem::Specification.filter do |s|
|
113
|
+
s.name.match(/^chronicle-/) && s.name != 'chronicle-etl' && s.name != 'chronicle-core'
|
114
|
+
end
|
110
115
|
end
|
111
116
|
|
112
117
|
# Latest version of each installed plugin
|
@@ -120,15 +125,18 @@ module Chronicle
|
|
120
125
|
def self.activate(name)
|
121
126
|
# By default, activates the latest available version of a gem
|
122
127
|
# so don't have to run Kernel#gem separately
|
123
|
-
|
128
|
+
|
129
|
+
plugin_require_name = name.to_s.gsub('-', '_')
|
130
|
+
require "chronicle/#{plugin_require_name}"
|
124
131
|
rescue Gem::ConflictError => e
|
125
132
|
# TODO: figure out if there's more we can do here
|
126
|
-
raise Chronicle::ETL::PluginConflictError.new(name),
|
127
|
-
|
133
|
+
raise Chronicle::ETL::PluginConflictError.new(name),
|
134
|
+
"Plugin '#{plugin_require_name}' couldn't be loaded. #{e.message}"
|
135
|
+
rescue StandardError, LoadError
|
128
136
|
# StandardError to catch random non-loading problems that might occur
|
129
137
|
# when requiring the plugin (eg class macro invoked the wrong way)
|
130
138
|
# TODO: decide if this should be separated
|
131
|
-
raise Chronicle::ETL::PluginLoadError.new(name), "Plugin '#{
|
139
|
+
raise Chronicle::ETL::PluginLoadError.new(name), "Plugin '#{plugin_require_name}' couldn't be loaded"
|
132
140
|
end
|
133
141
|
|
134
142
|
# Install a plugin to local gems
|
data/lib/chronicle/etl/runner.rb
CHANGED
@@ -1,133 +1,163 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'colorize'
|
2
4
|
require 'chronic_duration'
|
3
|
-
require
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
5
|
+
require 'tty-spinner'
|
6
|
+
|
7
|
+
module Chronicle
|
8
|
+
module ETL
|
9
|
+
class Runner
|
10
|
+
def initialize(job)
|
11
|
+
@job = job
|
12
|
+
@job_logger = Chronicle::ETL::JobLogger.new(@job)
|
13
|
+
end
|
14
|
+
|
15
|
+
def run!
|
16
|
+
begin_job
|
17
|
+
validate_job
|
18
|
+
instantiate_connectors
|
19
|
+
prepare_job
|
20
|
+
prepare_ui
|
21
|
+
run_extraction
|
22
|
+
rescue Chronicle::ETL::ExtractionError => e
|
23
|
+
@job_logger&.error
|
24
|
+
raise(Chronicle::ETL::RunnerError, "Extraction failed. #{e.message}")
|
25
|
+
rescue Interrupt
|
26
|
+
@job_logger&.error
|
27
|
+
raise(Chronicle::ETL::RunInterruptedError, 'Job interrupted.')
|
28
|
+
# rescue StandardError => e
|
29
|
+
# # Just throwing this in here until we have better exception handling in
|
30
|
+
# # loaders, etc
|
31
|
+
# @job_logger&.error
|
32
|
+
# raise(Chronicle::ETL::RunnerError, "Error running job. #{e.message}")
|
33
|
+
ensure
|
34
|
+
finish_job
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
def begin_job
|
40
|
+
Chronicle::ETL::Logger.info(tty_log_job_initialize)
|
41
|
+
@initialization_spinner = TTY::Spinner.new(':spinner :title', format: :dots_2)
|
42
|
+
end
|
43
|
+
|
44
|
+
def validate_job
|
45
|
+
@initialization_spinner.update(title: 'Validating job')
|
46
|
+
@job.job_definition.validate!
|
47
|
+
end
|
48
|
+
|
49
|
+
def instantiate_connectors
|
50
|
+
@initialization_spinner.update(title: 'Initializing connectors')
|
51
|
+
@extractor = @job.instantiate_extractor
|
52
|
+
@transformers = @job.instantiate_transformers
|
53
|
+
@loader = @job.instantiate_loader
|
54
|
+
end
|
55
|
+
|
56
|
+
def prepare_job
|
57
|
+
@initialization_spinner.update(title: 'Preparing job')
|
58
|
+
@job_logger.start
|
59
|
+
@loader.start
|
60
|
+
|
61
|
+
@initialization_spinner.update(title: 'Preparing extraction')
|
62
|
+
@initialization_spinner.auto_spin
|
63
|
+
@extractor.prepare
|
64
|
+
@initialization_spinner.success("(#{'successful'.green})")
|
65
|
+
Chronicle::ETL::Logger.info("\n")
|
66
|
+
end
|
67
|
+
|
68
|
+
def prepare_ui
|
69
|
+
total = @extractor.results_count
|
70
|
+
@progress_bar = Chronicle::ETL::Utils::ProgressBar.new(title: 'Running job', total: total)
|
71
|
+
Chronicle::ETL::Logger.attach_to_ui(@progress_bar)
|
72
|
+
end
|
73
|
+
|
74
|
+
def run_extraction
|
75
|
+
# Pattern based on Kiba's StreamingRunner
|
76
|
+
# https://github.com/thbar/kiba/blob/master/lib/kiba/streaming_runner.rb
|
77
|
+
stream = extractor_stream
|
78
|
+
recurser = ->(s, t) { transform_stream(s, t) }
|
79
|
+
@transformers.reduce(stream, &recurser).each do |record|
|
80
|
+
Chronicle::ETL::Logger.debug(tty_log_transformation(record))
|
81
|
+
@job_logger.log_transformation(record)
|
82
|
+
@progress_bar.increment
|
83
|
+
load_record(record)
|
84
|
+
end
|
85
|
+
|
86
|
+
@progress_bar.finish
|
87
|
+
|
88
|
+
# This is typically a slow method (writing to stdout, writing a big file, etc)
|
89
|
+
# TODO: consider adding a spinner?
|
90
|
+
@loader.finish
|
91
|
+
@job_logger.finish
|
92
|
+
end
|
93
|
+
|
94
|
+
# Initial steam of extracted data, wrapped in a Record class
|
95
|
+
def extractor_stream
|
96
|
+
Enumerator.new do |y|
|
97
|
+
@extractor.extract do |extraction|
|
98
|
+
record = Chronicle::ETL::Record.new(data: extraction.data, extraction: extraction)
|
99
|
+
y << record
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
# For a given stream of records and a given transformer,
|
105
|
+
# returns a new stream of transformed records and finally
|
106
|
+
# calls the finish method on the transformer
|
107
|
+
def transform_stream(stream, transformer)
|
108
|
+
Enumerator.new do |y|
|
109
|
+
stream.each do |record|
|
110
|
+
transformer.call(record) do |transformed_record|
|
111
|
+
y << transformed_record
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
transformer.call_finish do |transformed_record|
|
116
|
+
y << transformed_record
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
def load_record(record)
|
122
|
+
@loader.load(record.data) unless @job.dry_run?
|
123
|
+
end
|
124
|
+
|
125
|
+
def finish_job
|
126
|
+
@job_logger.save
|
127
|
+
@progress_bar&.finish
|
128
|
+
Chronicle::ETL::Logger.detach_from_ui
|
129
|
+
Chronicle::ETL::Logger.info(tty_log_completion)
|
130
|
+
end
|
131
|
+
|
132
|
+
def tty_log_job_initialize
|
133
|
+
output = 'Beginning job '
|
134
|
+
output += "'#{@job.name}'".bold if @job.name
|
135
|
+
output
|
136
|
+
end
|
137
|
+
|
138
|
+
def tty_log_transformation(record)
|
139
|
+
output = ' ✓'.green
|
140
|
+
output + " #{record}"
|
141
|
+
end
|
142
|
+
|
143
|
+
def tty_log_transformation_failure(exception, transformer)
|
144
|
+
output = ' ✖'.red
|
145
|
+
output + " Failed to transform #{transformer}. #{exception.message}"
|
146
|
+
end
|
147
|
+
|
148
|
+
def tty_log_completion
|
149
|
+
status = @job_logger.success ? 'Success' : 'Failed'
|
150
|
+
job_completion = @job_logger.success ? 'Completed' : 'Partially completed'
|
151
|
+
output = "\n#{job_completion} job"
|
152
|
+
output += " '#{@job.name}'".bold if @job.name
|
153
|
+
output += " in #{ChronicDuration.output(@job_logger.duration)}" if @job_logger.duration
|
154
|
+
output += "\n Status:\t".light_black + status
|
155
|
+
output += "\n Completed:\t".light_black + @job_logger.job_log.num_records_processed.to_s
|
156
|
+
if @job_logger.job_log.highest_timestamp
|
157
|
+
output += "\n Latest:\t".light_black + @job_logger.job_log.highest_timestamp.iso8601.to_s
|
158
|
+
end
|
159
|
+
output
|
160
|
+
end
|
73
161
|
end
|
74
|
-
|
75
|
-
@progress_bar.finish
|
76
|
-
|
77
|
-
# This is typically a slow method (writing to stdout, writing a big file, etc)
|
78
|
-
# TODO: consider adding a spinner?
|
79
|
-
@loader.finish
|
80
|
-
@job_logger.finish
|
81
|
-
end
|
82
|
-
|
83
|
-
def process_extraction(extraction)
|
84
|
-
# For each extraction from our extractor, we create a new tarnsformer
|
85
|
-
transformer = @job.instantiate_transformer(extraction)
|
86
|
-
|
87
|
-
# And then transform that record, logging it if we're in debug log level
|
88
|
-
record = transformer.transform
|
89
|
-
Chronicle::ETL::Logger.debug(tty_log_transformation(transformer))
|
90
|
-
@job_logger.log_transformation(transformer)
|
91
|
-
|
92
|
-
# Then send the results to the loader
|
93
|
-
@loader.load(record) unless @job.dry_run?
|
94
|
-
rescue Chronicle::ETL::TransformationError => e
|
95
|
-
# TODO: have an option to cancel job if we encounter an error
|
96
|
-
Chronicle::ETL::Logger.error(tty_log_transformation_failure(e, transformer))
|
97
|
-
end
|
98
|
-
|
99
|
-
def finish_job
|
100
|
-
@job_logger.save
|
101
|
-
@progress_bar&.finish
|
102
|
-
Chronicle::ETL::Logger.detach_from_ui
|
103
|
-
Chronicle::ETL::Logger.info(tty_log_completion)
|
104
|
-
end
|
105
|
-
|
106
|
-
def tty_log_job_initialize
|
107
|
-
output = "Beginning job "
|
108
|
-
output += "'#{@job.name}'".bold if @job.name
|
109
|
-
output
|
110
|
-
end
|
111
|
-
|
112
|
-
def tty_log_transformation(transformer)
|
113
|
-
output = " ✓".green
|
114
|
-
output += " #{transformer}"
|
115
|
-
end
|
116
|
-
|
117
|
-
def tty_log_transformation_failure(exception, transformer)
|
118
|
-
output = " ✖".red
|
119
|
-
output += " Failed to build #{transformer}. #{exception.message}"
|
120
|
-
end
|
121
|
-
|
122
|
-
def tty_log_completion
|
123
|
-
status = @job_logger.success ? 'Success' : 'Failed'
|
124
|
-
job_completion = @job_logger.success ? 'Completed' : 'Partially completed'
|
125
|
-
output = "\n#{job_completion} job"
|
126
|
-
output += " '#{@job.name}'".bold if @job.name
|
127
|
-
output += " in #{ChronicDuration.output(@job_logger.duration)}" if @job_logger.duration
|
128
|
-
output += "\n Status:\t".light_black + status
|
129
|
-
output += "\n Completed:\t".light_black + "#{@job_logger.job_log.num_records_processed}"
|
130
|
-
output += "\n Latest:\t".light_black + "#{@job_logger.job_log.highest_timestamp.iso8601}" if @job_logger.job_log.highest_timestamp
|
131
|
-
output
|
132
162
|
end
|
133
163
|
end
|
@@ -1,4 +1,4 @@
|
|
1
|
-
require
|
1
|
+
require 'active_support/core_ext/hash/keys'
|
2
2
|
|
3
3
|
module Chronicle
|
4
4
|
module ETL
|
@@ -8,7 +8,7 @@ module Chronicle
|
|
8
8
|
|
9
9
|
# Whether a given namespace exists
|
10
10
|
def exists?(namespace)
|
11
|
-
Chronicle::ETL::Config.exists?(
|
11
|
+
Chronicle::ETL::Config.exists?('secrets', namespace)
|
12
12
|
end
|
13
13
|
|
14
14
|
# Save a setting to a namespaced config file
|
@@ -47,7 +47,7 @@ module Chronicle
|
|
47
47
|
|
48
48
|
# Read secrets from a config file
|
49
49
|
def read(namespace)
|
50
|
-
definition = Chronicle::ETL::Config.load(
|
50
|
+
definition = Chronicle::ETL::Config.load('secrets', namespace)
|
51
51
|
definition[:secrets] || {}
|
52
52
|
end
|
53
53
|
|
@@ -57,7 +57,7 @@ module Chronicle
|
|
57
57
|
secrets: (secrets || {}).transform_keys(&:to_s),
|
58
58
|
chronicle_etl_version: Chronicle::ETL::VERSION
|
59
59
|
}
|
60
|
-
Chronicle::ETL::Config.write(
|
60
|
+
Chronicle::ETL::Config.write('secrets', namespace, data)
|
61
61
|
end
|
62
62
|
|
63
63
|
# Which config files are available in ~/.config/chronicle/etl/secrets
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Chronicle
|
4
|
+
module ETL
|
5
|
+
class BufferTransformer < Chronicle::ETL::Transformer
|
6
|
+
register_connector do |r|
|
7
|
+
r.identifier = :buffer
|
8
|
+
r.description = 'by buffering'
|
9
|
+
end
|
10
|
+
|
11
|
+
setting :size, default: 10, description: 'The size of the buffer'
|
12
|
+
|
13
|
+
def transform(record)
|
14
|
+
stash_record(record)
|
15
|
+
|
16
|
+
# FIXME: this doesn't seem to be working with the runner
|
17
|
+
return if @stashed_records.size < @config.size
|
18
|
+
|
19
|
+
# FIXME: this will result in the wrong extraction being associated with
|
20
|
+
# the batch of flushed records
|
21
|
+
flush_stashed_records.map(&:data)
|
22
|
+
end
|
23
|
+
|
24
|
+
def finish
|
25
|
+
flush_stashed_records
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Chronicle
|
4
|
+
module ETL
|
5
|
+
class ChronicleTransformer < Chronicle::ETL::Transformer
|
6
|
+
register_connector do |r|
|
7
|
+
r.identifier = :chronicle
|
8
|
+
r.description = 'records to Chronicle schema'
|
9
|
+
end
|
10
|
+
|
11
|
+
def transform(record)
|
12
|
+
converter_klass = find_converter(record.extraction)
|
13
|
+
# TODO: handle missing converter
|
14
|
+
|
15
|
+
converter_klass.new.call(record) do |transformed_record|
|
16
|
+
yield transformed_record.data
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def find_converter(extraction)
|
23
|
+
Chronicle::ETL::Registry::Connectors.find_converter_for_source(
|
24
|
+
source: extraction.source,
|
25
|
+
type: extraction.type,
|
26
|
+
strategy: extraction.strategy,
|
27
|
+
target: :chronicle
|
28
|
+
)&.klass
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|