chronicle-etl 0.5.5 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +15 -25
- data/.rubocop.yml +2 -44
- data/Gemfile +2 -2
- data/Guardfile +3 -3
- data/README.md +75 -68
- data/Rakefile +2 -2
- data/bin/console +4 -5
- data/chronicle-etl.gemspec +51 -49
- data/exe/chronicle-etl +1 -1
- data/lib/chronicle/etl/authorizer.rb +3 -4
- data/lib/chronicle/etl/cli/authorizations.rb +8 -6
- data/lib/chronicle/etl/cli/connectors.rb +7 -7
- data/lib/chronicle/etl/cli/jobs.rb +130 -53
- data/lib/chronicle/etl/cli/main.rb +29 -29
- data/lib/chronicle/etl/cli/plugins.rb +14 -15
- data/lib/chronicle/etl/cli/secrets.rb +14 -12
- data/lib/chronicle/etl/cli/subcommand_base.rb +5 -3
- data/lib/chronicle/etl/config.rb +18 -8
- data/lib/chronicle/etl/configurable.rb +20 -9
- data/lib/chronicle/etl/exceptions.rb +3 -3
- data/lib/chronicle/etl/extraction.rb +12 -2
- data/lib/chronicle/etl/extractors/csv_extractor.rb +9 -0
- data/lib/chronicle/etl/extractors/extractor.rb +15 -2
- data/lib/chronicle/etl/extractors/file_extractor.rb +5 -3
- data/lib/chronicle/etl/extractors/helpers/input_reader.rb +2 -2
- data/lib/chronicle/etl/extractors/json_extractor.rb +14 -4
- data/lib/chronicle/etl/extractors/stdin_extractor.rb +3 -0
- data/lib/chronicle/etl/job.rb +35 -17
- data/lib/chronicle/etl/job_definition.rb +38 -26
- data/lib/chronicle/etl/job_log.rb +14 -16
- data/lib/chronicle/etl/job_logger.rb +4 -4
- data/lib/chronicle/etl/loaders/csv_loader.rb +17 -4
- data/lib/chronicle/etl/loaders/helpers/stdout_helper.rb +4 -0
- data/lib/chronicle/etl/loaders/json_loader.rb +30 -10
- data/lib/chronicle/etl/loaders/loader.rb +0 -17
- data/lib/chronicle/etl/loaders/rest_loader.rb +7 -7
- data/lib/chronicle/etl/loaders/table_loader.rb +37 -12
- data/lib/chronicle/etl/logger.rb +2 -2
- data/lib/chronicle/etl/oauth_authorizer.rb +8 -8
- data/lib/chronicle/etl/record.rb +15 -0
- data/lib/chronicle/etl/registry/connector_registration.rb +15 -23
- data/lib/chronicle/etl/registry/connectors.rb +93 -36
- data/lib/chronicle/etl/registry/plugin_registration.rb +1 -1
- data/lib/chronicle/etl/registry/plugins.rb +27 -19
- data/lib/chronicle/etl/runner.rb +158 -128
- data/lib/chronicle/etl/secrets.rb +4 -4
- data/lib/chronicle/etl/transformers/buffer_transformer.rb +29 -0
- data/lib/chronicle/etl/transformers/chronicle_transformer.rb +32 -0
- data/lib/chronicle/etl/transformers/chronobase_transformer.rb +100 -0
- data/lib/chronicle/etl/transformers/fields_limit_transformer.rb +23 -0
- data/lib/chronicle/etl/transformers/filter_fields_transformer.rb +60 -0
- data/lib/chronicle/etl/transformers/filter_transformer.rb +30 -0
- data/lib/chronicle/etl/transformers/format_transformer.rb +32 -0
- data/lib/chronicle/etl/transformers/merge_meta_transformer.rb +19 -0
- data/lib/chronicle/etl/transformers/multiply_transformer.rb +21 -0
- data/lib/chronicle/etl/transformers/null_transformer.rb +5 -7
- data/lib/chronicle/etl/transformers/sampler_transformer.rb +21 -0
- data/lib/chronicle/etl/transformers/sort_transformer.rb +31 -0
- data/lib/chronicle/etl/transformers/transformer.rb +63 -41
- data/lib/chronicle/etl/utils/binary_attachments.rb +1 -1
- data/lib/chronicle/etl/utils/progress_bar.rb +2 -3
- data/lib/chronicle/etl/version.rb +1 -1
- data/lib/chronicle/etl.rb +6 -8
- metadata +49 -47
- data/lib/chronicle/etl/models/activity.rb +0 -15
- data/lib/chronicle/etl/models/attachment.rb +0 -14
- data/lib/chronicle/etl/models/base.rb +0 -122
- data/lib/chronicle/etl/models/entity.rb +0 -29
- data/lib/chronicle/etl/models/raw.rb +0 -26
- data/lib/chronicle/etl/serializers/jsonapi_serializer.rb +0 -31
- data/lib/chronicle/etl/serializers/raw_serializer.rb +0 -10
- data/lib/chronicle/etl/serializers/serializer.rb +0 -28
- data/lib/chronicle/etl/transformers/image_file_transformer.rb +0 -247
- data/lib/chronicle/etl/utils/hash_utilities.rb +0 -19
- data/lib/chronicle/etl/utils/text_recognition.rb +0 -15
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'rubygems'
|
2
4
|
|
3
5
|
module Chronicle
|
@@ -5,54 +7,109 @@ module Chronicle
|
|
5
7
|
module Registry
|
6
8
|
# A singleton class that acts as a registry of connector classes available for ETL jobs
|
7
9
|
module Connectors
|
8
|
-
PHASES = [
|
10
|
+
PHASES = %i[extractor transformer loader].freeze
|
9
11
|
public_constant :PHASES
|
10
12
|
|
11
13
|
class << self
|
12
14
|
attr_accessor :connectors
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.register(connector)
|
18
|
+
connectors << connector
|
19
|
+
end
|
13
20
|
|
14
|
-
|
15
|
-
|
21
|
+
def self.connectors
|
22
|
+
@connectors ||= []
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.ancestor_for_phase(phase)
|
26
|
+
case phase
|
27
|
+
when :extractor
|
28
|
+
Chronicle::ETL::Extractor
|
29
|
+
when :transformer
|
30
|
+
Chronicle::ETL::Transformer
|
31
|
+
when :loader
|
32
|
+
Chronicle::ETL::Loader
|
16
33
|
end
|
34
|
+
end
|
17
35
|
|
18
|
-
|
19
|
-
|
36
|
+
def self.find_converter_for_source(source:, type: nil, strategy: nil, target: nil)
|
37
|
+
# FIXME: we're assuming extractor plugin has been loaded already
|
38
|
+
# This may not be the case if the schema converter is running
|
39
|
+
# off a json dump off extraction data.
|
40
|
+
# plugin = source_klass.connector_registration.source
|
41
|
+
# type = source_klass.connector_registration.type
|
42
|
+
# strategy = source_klass.connector_registration.strategy
|
43
|
+
|
44
|
+
connectors.find do |c|
|
45
|
+
c.phase == :transformer &&
|
46
|
+
c.source == source &&
|
47
|
+
(type.nil? || c.type == type) &&
|
48
|
+
(strategy.nil? || c.strategy == strategy || c.strategy.nil?) &&
|
49
|
+
(target.nil? || c.to_schema == target)
|
20
50
|
end
|
51
|
+
end
|
52
|
+
|
53
|
+
# Find connector from amongst those currently loaded
|
54
|
+
def self.find_by_phase_and_identifier_built_in(phase, identifier)
|
55
|
+
connectors.find { |c| c.phase == phase.to_sym && c.identifier == identifier.to_sym }
|
56
|
+
end
|
57
|
+
|
58
|
+
# Find connector and load relevant plugin to find it if necessary
|
59
|
+
def self.find_by_phase_and_identifier(phase, identifier)
|
60
|
+
connector = find_by_phase_and_identifier_built_in(phase, identifier)
|
61
|
+
return connector if connector
|
62
|
+
|
63
|
+
# determine if we need to try to load a local file. if it has a dot in the identifier, we treat it as a file
|
64
|
+
return find_by_phase_and_identifier_local(phase, identifier) if identifier.to_s.include?('.')
|
65
|
+
|
66
|
+
# Example identifier: lastfm:listens:api
|
67
|
+
plugin, type, strategy = identifier.split(':')
|
68
|
+
.map { |part| part.gsub('-', '_') }
|
69
|
+
.map(&:to_sym)
|
21
70
|
|
22
|
-
|
23
|
-
|
24
|
-
|
71
|
+
plugin_identifier = plugin.to_s.gsub('_', '-')
|
72
|
+
|
73
|
+
unless Chronicle::ETL::Registry::Plugins.installed?(plugin_identifier)
|
74
|
+
raise Chronicle::ETL::PluginNotInstalledError, plugin_identifier
|
25
75
|
end
|
26
76
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
# if not available in built-in connectors, try to activate a
|
33
|
-
# relevant plugin and try again
|
34
|
-
if identifier.include?(":")
|
35
|
-
plugin, name = identifier.split(":")
|
36
|
-
else
|
37
|
-
# This case handles the case where the identifier is a
|
38
|
-
# shorthand (ie `imessage`) because there's only one default
|
39
|
-
# connector.
|
40
|
-
plugin = identifier
|
41
|
-
end
|
42
|
-
|
43
|
-
raise(Chronicle::ETL::PluginNotInstalledError.new(plugin)) unless Chronicle::ETL::Registry::Plugins.installed?(plugin)
|
44
|
-
|
45
|
-
Chronicle::ETL::Registry::Plugins.activate(plugin)
|
46
|
-
|
47
|
-
candidates = connectors.select { |c| c.phase == phase && c.plugin == plugin }
|
48
|
-
# if no name given, just use first connector with right phase/plugin
|
49
|
-
# TODO: set up a property for connectors to specify that they're the
|
50
|
-
# default connector for the plugin
|
51
|
-
candidates = candidates.select { |c| c.identifier == name } if name
|
52
|
-
connector = candidates.first
|
53
|
-
|
54
|
-
connector || raise(ConnectorNotAvailableError, "Connector '#{identifier}' not found")
|
77
|
+
Chronicle::ETL::Registry::Plugins.activate(plugin_identifier)
|
78
|
+
|
79
|
+
# find most specific connector that matches the identifier
|
80
|
+
connector = connectors.find do |c|
|
81
|
+
c.plugin == plugin && (type.nil? || c.type == type) && (strategy.nil? || c.strategy == strategy)
|
55
82
|
end
|
83
|
+
|
84
|
+
connector || raise(ConnectorNotAvailableError, "Connector '#{identifier}' not found")
|
85
|
+
end
|
86
|
+
|
87
|
+
# Load a plugin from local file system
|
88
|
+
def self.find_by_phase_and_identifier_local(phase, identifier)
|
89
|
+
script = File.read(identifier)
|
90
|
+
raise ConnectorNotAvailableError, "Connector '#{identifier}' not found" if script.nil?
|
91
|
+
|
92
|
+
# load the file by evaluating the contents
|
93
|
+
eval(script, TOPLEVEL_BINDING, __FILE__, __LINE__) # rubocop:disable Security/Eval
|
94
|
+
|
95
|
+
# read the file and look for all class definitions in the ruby script.
|
96
|
+
class_names = script.scan(/class (\w+)/).flatten
|
97
|
+
|
98
|
+
class_names.each do |class_name|
|
99
|
+
klass = Object.const_get(class_name)
|
100
|
+
|
101
|
+
next unless klass.ancestors.include?(ancestor_for_phase(phase))
|
102
|
+
|
103
|
+
registration = ::Chronicle::ETL::Registry::ConnectorRegistration.new(klass)
|
104
|
+
|
105
|
+
klass.connector_registration = registration
|
106
|
+
return registration
|
107
|
+
# return klass
|
108
|
+
rescue NameError
|
109
|
+
# ignore
|
110
|
+
end
|
111
|
+
|
112
|
+
raise ConnectorNotAvailableError, "Connector '#{identifier}' not found"
|
56
113
|
end
|
57
114
|
end
|
58
115
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'rubygems'
|
2
4
|
require 'rubygems/command'
|
3
5
|
require 'rubygems/commands/install_command'
|
@@ -13,16 +15,17 @@ module Chronicle
|
|
13
15
|
# @todo Better validation for whether a gem is actually a plugin
|
14
16
|
# @todo Add ways to load a plugin that don't require a gem on rubygems.org
|
15
17
|
module Plugins
|
16
|
-
KNOWN_PLUGINS = [
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
18
|
+
KNOWN_PLUGINS = %w[
|
19
|
+
apple-podcasts
|
20
|
+
email
|
21
|
+
foursquare
|
22
|
+
github
|
23
|
+
imessage
|
24
|
+
pinboard
|
25
|
+
safari
|
26
|
+
shell
|
27
|
+
spotify
|
28
|
+
zulip
|
26
29
|
].freeze
|
27
30
|
public_constant :KNOWN_PLUGINS
|
28
31
|
|
@@ -30,7 +33,7 @@ module Chronicle
|
|
30
33
|
# make registry aware of existence of name of non-gem plugin
|
31
34
|
def self.register_standalone(name:)
|
32
35
|
plugin = Chronicle::ETL::Registry::PluginRegistration.new do |p|
|
33
|
-
p.name = name
|
36
|
+
p.name = name.to_sym
|
34
37
|
p.installed = true
|
35
38
|
end
|
36
39
|
|
@@ -44,19 +47,19 @@ module Chronicle
|
|
44
47
|
|
45
48
|
# Check whether a given plugin is installed
|
46
49
|
def self.installed?(name)
|
47
|
-
installed.map(&:name).include?(name)
|
50
|
+
installed.map(&:name).include?(name.to_sym)
|
48
51
|
end
|
49
52
|
|
50
53
|
# List of plugins installed as standalone
|
51
54
|
def self.installed_standalone
|
52
|
-
@
|
55
|
+
@installed_standalone ||= []
|
53
56
|
end
|
54
57
|
|
55
58
|
# List of plugins installed as gems
|
56
59
|
def self.installed_as_gem
|
57
60
|
installed_gemspecs_latest.map do |gem|
|
58
61
|
Chronicle::ETL::Registry::PluginRegistration.new do |p|
|
59
|
-
p.name = gem.name.sub(
|
62
|
+
p.name = gem.name.sub('chronicle-', '').to_sym
|
60
63
|
p.gem = gem.name
|
61
64
|
p.description = gem.description
|
62
65
|
p.version = gem.version.to_s
|
@@ -106,7 +109,9 @@ module Chronicle
|
|
106
109
|
# All versions of all plugins currently installed
|
107
110
|
def self.installed_gemspecs
|
108
111
|
# TODO: add check for chronicle-etl dependency
|
109
|
-
Gem::Specification.filter
|
112
|
+
Gem::Specification.filter do |s|
|
113
|
+
s.name.match(/^chronicle-/) && s.name != 'chronicle-etl' && s.name != 'chronicle-core'
|
114
|
+
end
|
110
115
|
end
|
111
116
|
|
112
117
|
# Latest version of each installed plugin
|
@@ -120,15 +125,18 @@ module Chronicle
|
|
120
125
|
def self.activate(name)
|
121
126
|
# By default, activates the latest available version of a gem
|
122
127
|
# so don't have to run Kernel#gem separately
|
123
|
-
|
128
|
+
|
129
|
+
plugin_require_name = name.to_s.gsub('-', '_')
|
130
|
+
require "chronicle/#{plugin_require_name}"
|
124
131
|
rescue Gem::ConflictError => e
|
125
132
|
# TODO: figure out if there's more we can do here
|
126
|
-
raise Chronicle::ETL::PluginConflictError.new(name),
|
127
|
-
|
133
|
+
raise Chronicle::ETL::PluginConflictError.new(name),
|
134
|
+
"Plugin '#{plugin_require_name}' couldn't be loaded. #{e.message}"
|
135
|
+
rescue StandardError, LoadError
|
128
136
|
# StandardError to catch random non-loading problems that might occur
|
129
137
|
# when requiring the plugin (eg class macro invoked the wrong way)
|
130
138
|
# TODO: decide if this should be separated
|
131
|
-
raise Chronicle::ETL::PluginLoadError.new(name), "Plugin '#{
|
139
|
+
raise Chronicle::ETL::PluginLoadError.new(name), "Plugin '#{plugin_require_name}' couldn't be loaded"
|
132
140
|
end
|
133
141
|
|
134
142
|
# Install a plugin to local gems
|
data/lib/chronicle/etl/runner.rb
CHANGED
@@ -1,133 +1,163 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'colorize'
|
2
4
|
require 'chronic_duration'
|
3
|
-
require
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
5
|
+
require 'tty-spinner'
|
6
|
+
|
7
|
+
module Chronicle
|
8
|
+
module ETL
|
9
|
+
class Runner
|
10
|
+
def initialize(job)
|
11
|
+
@job = job
|
12
|
+
@job_logger = Chronicle::ETL::JobLogger.new(@job)
|
13
|
+
end
|
14
|
+
|
15
|
+
def run!
|
16
|
+
begin_job
|
17
|
+
validate_job
|
18
|
+
instantiate_connectors
|
19
|
+
prepare_job
|
20
|
+
prepare_ui
|
21
|
+
run_extraction
|
22
|
+
rescue Chronicle::ETL::ExtractionError => e
|
23
|
+
@job_logger&.error
|
24
|
+
raise(Chronicle::ETL::RunnerError, "Extraction failed. #{e.message}")
|
25
|
+
rescue Interrupt
|
26
|
+
@job_logger&.error
|
27
|
+
raise(Chronicle::ETL::RunInterruptedError, 'Job interrupted.')
|
28
|
+
# rescue StandardError => e
|
29
|
+
# # Just throwing this in here until we have better exception handling in
|
30
|
+
# # loaders, etc
|
31
|
+
# @job_logger&.error
|
32
|
+
# raise(Chronicle::ETL::RunnerError, "Error running job. #{e.message}")
|
33
|
+
ensure
|
34
|
+
finish_job
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
def begin_job
|
40
|
+
Chronicle::ETL::Logger.info(tty_log_job_initialize)
|
41
|
+
@initialization_spinner = TTY::Spinner.new(':spinner :title', format: :dots_2)
|
42
|
+
end
|
43
|
+
|
44
|
+
def validate_job
|
45
|
+
@initialization_spinner.update(title: 'Validating job')
|
46
|
+
@job.job_definition.validate!
|
47
|
+
end
|
48
|
+
|
49
|
+
def instantiate_connectors
|
50
|
+
@initialization_spinner.update(title: 'Initializing connectors')
|
51
|
+
@extractor = @job.instantiate_extractor
|
52
|
+
@transformers = @job.instantiate_transformers
|
53
|
+
@loader = @job.instantiate_loader
|
54
|
+
end
|
55
|
+
|
56
|
+
def prepare_job
|
57
|
+
@initialization_spinner.update(title: 'Preparing job')
|
58
|
+
@job_logger.start
|
59
|
+
@loader.start
|
60
|
+
|
61
|
+
@initialization_spinner.update(title: 'Preparing extraction')
|
62
|
+
@initialization_spinner.auto_spin
|
63
|
+
@extractor.prepare
|
64
|
+
@initialization_spinner.success("(#{'successful'.green})")
|
65
|
+
Chronicle::ETL::Logger.info("\n")
|
66
|
+
end
|
67
|
+
|
68
|
+
def prepare_ui
|
69
|
+
total = @extractor.results_count
|
70
|
+
@progress_bar = Chronicle::ETL::Utils::ProgressBar.new(title: 'Running job', total: total)
|
71
|
+
Chronicle::ETL::Logger.attach_to_ui(@progress_bar)
|
72
|
+
end
|
73
|
+
|
74
|
+
def run_extraction
|
75
|
+
# Pattern based on Kiba's StreamingRunner
|
76
|
+
# https://github.com/thbar/kiba/blob/master/lib/kiba/streaming_runner.rb
|
77
|
+
stream = extractor_stream
|
78
|
+
recurser = ->(s, t) { transform_stream(s, t) }
|
79
|
+
@transformers.reduce(stream, &recurser).each do |record|
|
80
|
+
Chronicle::ETL::Logger.debug(tty_log_transformation(record))
|
81
|
+
@job_logger.log_transformation(record)
|
82
|
+
@progress_bar.increment
|
83
|
+
load_record(record)
|
84
|
+
end
|
85
|
+
|
86
|
+
@progress_bar.finish
|
87
|
+
|
88
|
+
# This is typically a slow method (writing to stdout, writing a big file, etc)
|
89
|
+
# TODO: consider adding a spinner?
|
90
|
+
@loader.finish
|
91
|
+
@job_logger.finish
|
92
|
+
end
|
93
|
+
|
94
|
+
# Initial steam of extracted data, wrapped in a Record class
|
95
|
+
def extractor_stream
|
96
|
+
Enumerator.new do |y|
|
97
|
+
@extractor.extract do |extraction|
|
98
|
+
record = Chronicle::ETL::Record.new(data: extraction.data, extraction: extraction)
|
99
|
+
y << record
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
# For a given stream of records and a given transformer,
|
105
|
+
# returns a new stream of transformed records and finally
|
106
|
+
# calls the finish method on the transformer
|
107
|
+
def transform_stream(stream, transformer)
|
108
|
+
Enumerator.new do |y|
|
109
|
+
stream.each do |record|
|
110
|
+
transformer.call(record) do |transformed_record|
|
111
|
+
y << transformed_record
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
transformer.call_finish do |transformed_record|
|
116
|
+
y << transformed_record
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
def load_record(record)
|
122
|
+
@loader.load(record.data) unless @job.dry_run?
|
123
|
+
end
|
124
|
+
|
125
|
+
def finish_job
|
126
|
+
@job_logger.save
|
127
|
+
@progress_bar&.finish
|
128
|
+
Chronicle::ETL::Logger.detach_from_ui
|
129
|
+
Chronicle::ETL::Logger.info(tty_log_completion)
|
130
|
+
end
|
131
|
+
|
132
|
+
def tty_log_job_initialize
|
133
|
+
output = 'Beginning job '
|
134
|
+
output += "'#{@job.name}'".bold if @job.name
|
135
|
+
output
|
136
|
+
end
|
137
|
+
|
138
|
+
def tty_log_transformation(record)
|
139
|
+
output = ' ✓'.green
|
140
|
+
output + " #{record}"
|
141
|
+
end
|
142
|
+
|
143
|
+
def tty_log_transformation_failure(exception, transformer)
|
144
|
+
output = ' ✖'.red
|
145
|
+
output + " Failed to transform #{transformer}. #{exception.message}"
|
146
|
+
end
|
147
|
+
|
148
|
+
def tty_log_completion
|
149
|
+
status = @job_logger.success ? 'Success' : 'Failed'
|
150
|
+
job_completion = @job_logger.success ? 'Completed' : 'Partially completed'
|
151
|
+
output = "\n#{job_completion} job"
|
152
|
+
output += " '#{@job.name}'".bold if @job.name
|
153
|
+
output += " in #{ChronicDuration.output(@job_logger.duration)}" if @job_logger.duration
|
154
|
+
output += "\n Status:\t".light_black + status
|
155
|
+
output += "\n Completed:\t".light_black + @job_logger.job_log.num_records_processed.to_s
|
156
|
+
if @job_logger.job_log.highest_timestamp
|
157
|
+
output += "\n Latest:\t".light_black + @job_logger.job_log.highest_timestamp.iso8601.to_s
|
158
|
+
end
|
159
|
+
output
|
160
|
+
end
|
73
161
|
end
|
74
|
-
|
75
|
-
@progress_bar.finish
|
76
|
-
|
77
|
-
# This is typically a slow method (writing to stdout, writing a big file, etc)
|
78
|
-
# TODO: consider adding a spinner?
|
79
|
-
@loader.finish
|
80
|
-
@job_logger.finish
|
81
|
-
end
|
82
|
-
|
83
|
-
def process_extraction(extraction)
|
84
|
-
# For each extraction from our extractor, we create a new tarnsformer
|
85
|
-
transformer = @job.instantiate_transformer(extraction)
|
86
|
-
|
87
|
-
# And then transform that record, logging it if we're in debug log level
|
88
|
-
record = transformer.transform
|
89
|
-
Chronicle::ETL::Logger.debug(tty_log_transformation(transformer))
|
90
|
-
@job_logger.log_transformation(transformer)
|
91
|
-
|
92
|
-
# Then send the results to the loader
|
93
|
-
@loader.load(record) unless @job.dry_run?
|
94
|
-
rescue Chronicle::ETL::TransformationError => e
|
95
|
-
# TODO: have an option to cancel job if we encounter an error
|
96
|
-
Chronicle::ETL::Logger.error(tty_log_transformation_failure(e, transformer))
|
97
|
-
end
|
98
|
-
|
99
|
-
def finish_job
|
100
|
-
@job_logger.save
|
101
|
-
@progress_bar&.finish
|
102
|
-
Chronicle::ETL::Logger.detach_from_ui
|
103
|
-
Chronicle::ETL::Logger.info(tty_log_completion)
|
104
|
-
end
|
105
|
-
|
106
|
-
def tty_log_job_initialize
|
107
|
-
output = "Beginning job "
|
108
|
-
output += "'#{@job.name}'".bold if @job.name
|
109
|
-
output
|
110
|
-
end
|
111
|
-
|
112
|
-
def tty_log_transformation(transformer)
|
113
|
-
output = " ✓".green
|
114
|
-
output += " #{transformer}"
|
115
|
-
end
|
116
|
-
|
117
|
-
def tty_log_transformation_failure(exception, transformer)
|
118
|
-
output = " ✖".red
|
119
|
-
output += " Failed to build #{transformer}. #{exception.message}"
|
120
|
-
end
|
121
|
-
|
122
|
-
def tty_log_completion
|
123
|
-
status = @job_logger.success ? 'Success' : 'Failed'
|
124
|
-
job_completion = @job_logger.success ? 'Completed' : 'Partially completed'
|
125
|
-
output = "\n#{job_completion} job"
|
126
|
-
output += " '#{@job.name}'".bold if @job.name
|
127
|
-
output += " in #{ChronicDuration.output(@job_logger.duration)}" if @job_logger.duration
|
128
|
-
output += "\n Status:\t".light_black + status
|
129
|
-
output += "\n Completed:\t".light_black + "#{@job_logger.job_log.num_records_processed}"
|
130
|
-
output += "\n Latest:\t".light_black + "#{@job_logger.job_log.highest_timestamp.iso8601}" if @job_logger.job_log.highest_timestamp
|
131
|
-
output
|
132
162
|
end
|
133
163
|
end
|
@@ -1,4 +1,4 @@
|
|
1
|
-
require
|
1
|
+
require 'active_support/core_ext/hash/keys'
|
2
2
|
|
3
3
|
module Chronicle
|
4
4
|
module ETL
|
@@ -8,7 +8,7 @@ module Chronicle
|
|
8
8
|
|
9
9
|
# Whether a given namespace exists
|
10
10
|
def exists?(namespace)
|
11
|
-
Chronicle::ETL::Config.exists?(
|
11
|
+
Chronicle::ETL::Config.exists?('secrets', namespace)
|
12
12
|
end
|
13
13
|
|
14
14
|
# Save a setting to a namespaced config file
|
@@ -47,7 +47,7 @@ module Chronicle
|
|
47
47
|
|
48
48
|
# Read secrets from a config file
|
49
49
|
def read(namespace)
|
50
|
-
definition = Chronicle::ETL::Config.load(
|
50
|
+
definition = Chronicle::ETL::Config.load('secrets', namespace)
|
51
51
|
definition[:secrets] || {}
|
52
52
|
end
|
53
53
|
|
@@ -57,7 +57,7 @@ module Chronicle
|
|
57
57
|
secrets: (secrets || {}).transform_keys(&:to_s),
|
58
58
|
chronicle_etl_version: Chronicle::ETL::VERSION
|
59
59
|
}
|
60
|
-
Chronicle::ETL::Config.write(
|
60
|
+
Chronicle::ETL::Config.write('secrets', namespace, data)
|
61
61
|
end
|
62
62
|
|
63
63
|
# Which config files are available in ~/.config/chronicle/etl/secrets
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Chronicle
|
4
|
+
module ETL
|
5
|
+
class BufferTransformer < Chronicle::ETL::Transformer
|
6
|
+
register_connector do |r|
|
7
|
+
r.identifier = :buffer
|
8
|
+
r.description = 'by buffering'
|
9
|
+
end
|
10
|
+
|
11
|
+
setting :size, default: 10, description: 'The size of the buffer'
|
12
|
+
|
13
|
+
def transform(record)
|
14
|
+
stash_record(record)
|
15
|
+
|
16
|
+
# FIXME: this doesn't seem to be working with the runner
|
17
|
+
return if @stashed_records.size < @config.size
|
18
|
+
|
19
|
+
# FIXME: this will result in the wrong extraction being associated with
|
20
|
+
# the batch of flushed records
|
21
|
+
flush_stashed_records.map(&:data)
|
22
|
+
end
|
23
|
+
|
24
|
+
def finish
|
25
|
+
flush_stashed_records
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Chronicle
|
4
|
+
module ETL
|
5
|
+
class ChronicleTransformer < Chronicle::ETL::Transformer
|
6
|
+
register_connector do |r|
|
7
|
+
r.identifier = :chronicle
|
8
|
+
r.description = 'records to Chronicle schema'
|
9
|
+
end
|
10
|
+
|
11
|
+
def transform(record)
|
12
|
+
converter_klass = find_converter(record.extraction)
|
13
|
+
# TODO: handle missing converter
|
14
|
+
|
15
|
+
converter_klass.new.call(record) do |transformed_record|
|
16
|
+
yield transformed_record.data
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def find_converter(extraction)
|
23
|
+
Chronicle::ETL::Registry::Connectors.find_converter_for_source(
|
24
|
+
source: extraction.source,
|
25
|
+
type: extraction.type,
|
26
|
+
strategy: extraction.strategy,
|
27
|
+
target: :chronicle
|
28
|
+
)&.klass
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|