chronicle-etl 0.5.5 → 0.6.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +15 -25
- data/.rubocop.yml +2 -44
- data/Gemfile +2 -2
- data/Guardfile +3 -3
- data/README.md +75 -68
- data/Rakefile +2 -2
- data/bin/console +4 -5
- data/chronicle-etl.gemspec +51 -49
- data/exe/chronicle-etl +1 -1
- data/lib/chronicle/etl/authorizer.rb +3 -4
- data/lib/chronicle/etl/cli/authorizations.rb +8 -6
- data/lib/chronicle/etl/cli/connectors.rb +7 -7
- data/lib/chronicle/etl/cli/jobs.rb +130 -53
- data/lib/chronicle/etl/cli/main.rb +29 -29
- data/lib/chronicle/etl/cli/plugins.rb +14 -15
- data/lib/chronicle/etl/cli/secrets.rb +14 -12
- data/lib/chronicle/etl/cli/subcommand_base.rb +5 -3
- data/lib/chronicle/etl/config.rb +18 -8
- data/lib/chronicle/etl/configurable.rb +20 -9
- data/lib/chronicle/etl/exceptions.rb +3 -3
- data/lib/chronicle/etl/extraction.rb +12 -2
- data/lib/chronicle/etl/extractors/csv_extractor.rb +9 -0
- data/lib/chronicle/etl/extractors/extractor.rb +15 -2
- data/lib/chronicle/etl/extractors/file_extractor.rb +5 -3
- data/lib/chronicle/etl/extractors/helpers/input_reader.rb +2 -2
- data/lib/chronicle/etl/extractors/json_extractor.rb +14 -4
- data/lib/chronicle/etl/extractors/stdin_extractor.rb +3 -0
- data/lib/chronicle/etl/job.rb +35 -17
- data/lib/chronicle/etl/job_definition.rb +38 -26
- data/lib/chronicle/etl/job_log.rb +14 -16
- data/lib/chronicle/etl/job_logger.rb +4 -4
- data/lib/chronicle/etl/loaders/csv_loader.rb +17 -4
- data/lib/chronicle/etl/loaders/helpers/stdout_helper.rb +4 -0
- data/lib/chronicle/etl/loaders/json_loader.rb +30 -10
- data/lib/chronicle/etl/loaders/loader.rb +0 -17
- data/lib/chronicle/etl/loaders/rest_loader.rb +7 -7
- data/lib/chronicle/etl/loaders/table_loader.rb +37 -12
- data/lib/chronicle/etl/logger.rb +2 -2
- data/lib/chronicle/etl/oauth_authorizer.rb +8 -8
- data/lib/chronicle/etl/record.rb +15 -0
- data/lib/chronicle/etl/registry/connector_registration.rb +15 -23
- data/lib/chronicle/etl/registry/connectors.rb +93 -36
- data/lib/chronicle/etl/registry/plugin_registration.rb +1 -1
- data/lib/chronicle/etl/registry/plugins.rb +27 -19
- data/lib/chronicle/etl/runner.rb +158 -128
- data/lib/chronicle/etl/secrets.rb +4 -4
- data/lib/chronicle/etl/transformers/buffer_transformer.rb +29 -0
- data/lib/chronicle/etl/transformers/chronicle_transformer.rb +32 -0
- data/lib/chronicle/etl/transformers/chronobase_transformer.rb +100 -0
- data/lib/chronicle/etl/transformers/fields_limit_transformer.rb +23 -0
- data/lib/chronicle/etl/transformers/filter_fields_transformer.rb +60 -0
- data/lib/chronicle/etl/transformers/filter_transformer.rb +30 -0
- data/lib/chronicle/etl/transformers/format_transformer.rb +32 -0
- data/lib/chronicle/etl/transformers/merge_meta_transformer.rb +19 -0
- data/lib/chronicle/etl/transformers/multiply_transformer.rb +21 -0
- data/lib/chronicle/etl/transformers/null_transformer.rb +5 -7
- data/lib/chronicle/etl/transformers/sampler_transformer.rb +21 -0
- data/lib/chronicle/etl/transformers/sort_transformer.rb +31 -0
- data/lib/chronicle/etl/transformers/transformer.rb +63 -41
- data/lib/chronicle/etl/utils/binary_attachments.rb +1 -1
- data/lib/chronicle/etl/utils/progress_bar.rb +2 -3
- data/lib/chronicle/etl/version.rb +1 -1
- data/lib/chronicle/etl.rb +6 -8
- metadata +49 -47
- data/lib/chronicle/etl/models/activity.rb +0 -15
- data/lib/chronicle/etl/models/attachment.rb +0 -14
- data/lib/chronicle/etl/models/base.rb +0 -122
- data/lib/chronicle/etl/models/entity.rb +0 -29
- data/lib/chronicle/etl/models/raw.rb +0 -26
- data/lib/chronicle/etl/serializers/jsonapi_serializer.rb +0 -31
- data/lib/chronicle/etl/serializers/raw_serializer.rb +0 -10
- data/lib/chronicle/etl/serializers/serializer.rb +0 -28
- data/lib/chronicle/etl/transformers/image_file_transformer.rb +0 -247
- data/lib/chronicle/etl/utils/hash_utilities.rb +0 -19
- data/lib/chronicle/etl/utils/text_recognition.rb +0 -15
@@ -1,6 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require
|
3
|
+
require 'tty-prompt'
|
4
4
|
|
5
5
|
module Chronicle
|
6
6
|
module ETL
|
@@ -10,8 +10,8 @@ module Chronicle
|
|
10
10
|
default_task 'list'
|
11
11
|
namespace :secrets
|
12
12
|
|
13
|
-
desc
|
14
|
-
def set(namespace, key, value=nil)
|
13
|
+
desc 'set NAMESPACE KEY [VALUE]', 'Add a secret. VALUE can be set as argument or from stdin'
|
14
|
+
def set(namespace, key, value = nil)
|
15
15
|
validate_namespace(namespace)
|
16
16
|
|
17
17
|
if value
|
@@ -24,23 +24,23 @@ module Chronicle
|
|
24
24
|
end
|
25
25
|
|
26
26
|
Chronicle::ETL::Secrets.set(namespace, key, value.strip)
|
27
|
-
cli_exit(message:
|
27
|
+
cli_exit(message: 'Secret set')
|
28
28
|
rescue TTY::Reader::InputInterrupt
|
29
29
|
cli_fail(message: "\nSecret not set")
|
30
30
|
end
|
31
31
|
|
32
|
-
desc
|
32
|
+
desc 'unset NAMESPACE KEY', 'Remove a secret'
|
33
33
|
def unset(namespace, key)
|
34
34
|
validate_namespace(namespace)
|
35
35
|
|
36
36
|
Chronicle::ETL::Secrets.unset(namespace, key)
|
37
|
-
cli_exit(message:
|
37
|
+
cli_exit(message: 'Secret unset')
|
38
38
|
end
|
39
39
|
|
40
|
-
desc
|
41
|
-
def list(namespace=nil)
|
40
|
+
desc 'list', 'List available secrets'
|
41
|
+
def list(namespace = nil)
|
42
42
|
all_secrets = Chronicle::ETL::Secrets.all(namespace)
|
43
|
-
cli_exit(message:
|
43
|
+
cli_exit(message: 'No secrets are stored') unless all_secrets.any?
|
44
44
|
|
45
45
|
rows = []
|
46
46
|
all_secrets.each do |namespace, secrets|
|
@@ -51,9 +51,9 @@ module Chronicle
|
|
51
51
|
end
|
52
52
|
end
|
53
53
|
|
54
|
-
headers = [
|
54
|
+
headers = %w[namespace key value].map { |h| h.upcase.bold }
|
55
55
|
|
56
|
-
puts
|
56
|
+
puts 'Available secrets:'
|
57
57
|
table = TTY::Table.new(headers, rows)
|
58
58
|
puts table.render(indent: 0, padding: [0, 2])
|
59
59
|
end
|
@@ -61,7 +61,9 @@ module Chronicle
|
|
61
61
|
private
|
62
62
|
|
63
63
|
def validate_namespace(namespace)
|
64
|
-
|
64
|
+
return if Chronicle::ETL::Secrets.valid_namespace_name?(namespace)
|
65
|
+
|
66
|
+
cli_fail(message: "'#{namespace}' is not a valid namespace")
|
65
67
|
end
|
66
68
|
end
|
67
69
|
end
|
@@ -11,7 +11,7 @@ module Chronicle
|
|
11
11
|
end
|
12
12
|
list.sort! { |a, b| a[0] <=> b[0] }
|
13
13
|
|
14
|
-
shell.say
|
14
|
+
shell.say 'COMMANDS'.bold
|
15
15
|
shell.print_table(list, indent: 2, truncate: true)
|
16
16
|
shell.say
|
17
17
|
class_options_help(shell)
|
@@ -19,7 +19,7 @@ module Chronicle
|
|
19
19
|
|
20
20
|
# Show docs with command:subcommand pattern.
|
21
21
|
# For `help` command, don't use colon
|
22
|
-
def self.banner(command,
|
22
|
+
def self.banner(command, _namespace = nil, _subcommand = false)
|
23
23
|
if command.name == 'help'
|
24
24
|
"#{subcommand_prefix} #{command.usage}"
|
25
25
|
else
|
@@ -29,7 +29,9 @@ module Chronicle
|
|
29
29
|
|
30
30
|
# Use subcommand classname to derive display name for subcommand
|
31
31
|
def self.subcommand_prefix
|
32
|
-
|
32
|
+
name.gsub(/.*::/, '').gsub(/^[A-Z]/) do |match|
|
33
|
+
match[0].downcase
|
34
|
+
end.gsub(/[A-Z]/) { |match| "-#{match[0].downcase}" }
|
33
35
|
end
|
34
36
|
end
|
35
37
|
end
|
data/lib/chronicle/etl/config.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
require
|
1
|
+
require 'active_support/core_ext/hash/keys'
|
2
2
|
require 'fileutils'
|
3
3
|
require 'yaml'
|
4
4
|
|
@@ -15,7 +15,7 @@ module Chronicle
|
|
15
15
|
path = base.join("#{identifier}.yml")
|
16
16
|
return {} unless path.exist?
|
17
17
|
|
18
|
-
YAML.
|
18
|
+
YAML.safe_load_file(path, symbolize_names: true, permitted_classes: [Symbol, Date, Time])
|
19
19
|
end
|
20
20
|
|
21
21
|
# Writes a hash as a yml config file
|
@@ -31,28 +31,38 @@ module Chronicle
|
|
31
31
|
end
|
32
32
|
end
|
33
33
|
|
34
|
+
# Returns path for a given config type and identifier
|
35
|
+
def path(type, identifier)
|
36
|
+
base = config_pathname_for_type(type)
|
37
|
+
base.join("#{identifier}.yml")
|
38
|
+
end
|
39
|
+
|
40
|
+
# Whether a config exists for a given type and identifier
|
34
41
|
def exists?(type, identifier)
|
35
42
|
base = config_pathname_for_type(type)
|
36
43
|
path = base.join("#{identifier}.yml")
|
37
|
-
|
44
|
+
path.exist?
|
38
45
|
end
|
39
46
|
|
40
47
|
# Returns all jobs available in ~/.config/chronicle/etl/jobs/*.yml
|
41
48
|
def available_jobs
|
42
|
-
Dir.glob(File.join(config_pathname_for_type(
|
43
|
-
File.basename(filename,
|
49
|
+
Dir.glob(File.join(config_pathname_for_type('jobs'), '*.yml')).map do |filename|
|
50
|
+
File.basename(filename, '.*')
|
44
51
|
end
|
45
52
|
end
|
46
53
|
|
54
|
+
# Returns all configs available for a given type
|
47
55
|
def available_configs(type)
|
48
|
-
Dir.glob(File.join(config_pathname_for_type(type),
|
49
|
-
File.basename(filename,
|
56
|
+
Dir.glob(File.join(config_pathname_for_type(type), '*.yml')).map do |filename|
|
57
|
+
File.basename(filename, '.*')
|
50
58
|
end
|
51
59
|
end
|
52
60
|
|
53
61
|
# Load a job definition from job config directory
|
54
62
|
def read_job(job_name)
|
55
|
-
load('jobs', job_name)
|
63
|
+
definition = load('jobs', job_name)
|
64
|
+
definition[:name] ||= job_name
|
65
|
+
definition
|
56
66
|
end
|
57
67
|
|
58
68
|
def config_pathname
|
@@ -1,7 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require
|
4
|
-
require
|
3
|
+
require 'ostruct'
|
4
|
+
require 'chronic_duration'
|
5
5
|
|
6
6
|
module Chronicle
|
7
7
|
module ETL
|
@@ -19,7 +19,7 @@ module Chronicle
|
|
19
19
|
# t.config.when
|
20
20
|
module Configurable
|
21
21
|
# An individual setting for this Configurable
|
22
|
-
Setting = Struct.new(:default, :required, :type)
|
22
|
+
Setting = Struct.new(:default, :required, :type, :description)
|
23
23
|
private_constant :Setting
|
24
24
|
|
25
25
|
# Collection of user-supplied options for this Configurable
|
@@ -62,7 +62,7 @@ module Chronicle
|
|
62
62
|
# Do nothing with a given option if it's not a connector setting
|
63
63
|
next unless setting
|
64
64
|
|
65
|
-
@config[name] = coerced_value(setting, value)
|
65
|
+
@config[name] = coerced_value(setting, name, value)
|
66
66
|
end
|
67
67
|
validate_config
|
68
68
|
options
|
@@ -84,10 +84,17 @@ module Chronicle
|
|
84
84
|
raise Chronicle::ETL::ConnectorConfigurationError, "Missing options: #{missing}" if missing.count.positive?
|
85
85
|
end
|
86
86
|
|
87
|
-
def coerced_value(setting, value)
|
87
|
+
def coerced_value(setting, name, value)
|
88
88
|
setting.type ? __send__("coerce_#{setting.type}", value) : value
|
89
89
|
rescue StandardError
|
90
|
-
raise(
|
90
|
+
raise(
|
91
|
+
Chronicle::ETL::ConnectorConfigurationError,
|
92
|
+
"Could not convert value '#{value}' into a #{setting.type} for setting '#{name}'"
|
93
|
+
)
|
94
|
+
end
|
95
|
+
|
96
|
+
def coerce_hash(value)
|
97
|
+
value.is_a?(Hash) ? value : {}
|
91
98
|
end
|
92
99
|
|
93
100
|
def coerce_string(value)
|
@@ -101,12 +108,16 @@ module Chronicle
|
|
101
108
|
|
102
109
|
def coerce_boolean(value)
|
103
110
|
if value.is_a?(String)
|
104
|
-
value.downcase ==
|
111
|
+
value.downcase == 'true'
|
105
112
|
else
|
106
113
|
value
|
107
114
|
end
|
108
115
|
end
|
109
116
|
|
117
|
+
def coerce_array(value)
|
118
|
+
value.is_a?(Array) ? value : [value]
|
119
|
+
end
|
120
|
+
|
110
121
|
def coerce_time(value)
|
111
122
|
# parsing yml files might result in us getting Date objects
|
112
123
|
# we convert to DateTime first to to ensure UTC
|
@@ -138,8 +149,8 @@ module Chronicle
|
|
138
149
|
# setting :when, type: :date, required: true
|
139
150
|
#
|
140
151
|
# @see ::Chronicle::ETL::Configurable
|
141
|
-
def setting(name, default: nil, required: false, type: nil)
|
142
|
-
s = Setting.new(default, required, type)
|
152
|
+
def setting(name, default: nil, required: false, type: nil, description: nil)
|
153
|
+
s = Setting.new(default, required, type, description)
|
143
154
|
settings[name] = s
|
144
155
|
end
|
145
156
|
|
@@ -26,6 +26,7 @@ module Chronicle
|
|
26
26
|
attr_reader :name
|
27
27
|
|
28
28
|
def initialize(name)
|
29
|
+
super
|
29
30
|
@name = name
|
30
31
|
end
|
31
32
|
end
|
@@ -51,10 +52,9 @@ module Chronicle
|
|
51
52
|
|
52
53
|
class ExtractionError < Error; end
|
53
54
|
|
54
|
-
class SerializationError < Error; end
|
55
|
-
|
56
55
|
class TransformationError < Error; end
|
57
|
-
|
58
56
|
class UntransformableRecordError < TransformationError; end
|
57
|
+
|
58
|
+
class LoaderError < Error; end
|
59
59
|
end
|
60
60
|
end
|
@@ -1,11 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Chronicle
|
2
4
|
module ETL
|
3
5
|
class Extraction
|
4
|
-
attr_accessor :data, :meta
|
6
|
+
attr_accessor :data, :meta, :source, :type, :strategy, :extractor
|
5
7
|
|
6
|
-
def initialize(data: {}, meta: {})
|
8
|
+
def initialize(data: {}, meta: {}, source: nil, type: nil, strategy: nil, extractor: nil)
|
7
9
|
@data = data
|
8
10
|
@meta = meta
|
11
|
+
@source = source
|
12
|
+
@type = type
|
13
|
+
@strategy = strategy
|
14
|
+
@extractor = extractor
|
15
|
+
end
|
16
|
+
|
17
|
+
def to_h
|
18
|
+
{ data: @data, meta: @meta, source: @source }
|
9
19
|
end
|
10
20
|
end
|
11
21
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'csv'
|
2
4
|
|
3
5
|
module Chronicle
|
@@ -6,6 +8,7 @@ module Chronicle
|
|
6
8
|
include Extractors::Helpers::InputReader
|
7
9
|
|
8
10
|
register_connector do |r|
|
11
|
+
r.identifier = :csv
|
9
12
|
r.description = 'CSV'
|
10
13
|
end
|
11
14
|
|
@@ -33,6 +36,12 @@ module Chronicle
|
|
33
36
|
|
34
37
|
private
|
35
38
|
|
39
|
+
def all_rows
|
40
|
+
@csvs.reduce([]) do |all_rows, csv|
|
41
|
+
all_rows + csv.to_a.map(&:to_h)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
36
45
|
def prepare_sources
|
37
46
|
@csvs = []
|
38
47
|
read_input do |csv_data|
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'chronicle/etl'
|
2
4
|
|
3
5
|
module Chronicle
|
@@ -21,7 +23,7 @@ module Chronicle
|
|
21
23
|
apply_options(options)
|
22
24
|
end
|
23
25
|
|
24
|
-
# Hook called before #extract. Useful for gathering data,
|
26
|
+
# Hook called before #extract. Useful for gathering data, initializing proxies, etc
|
25
27
|
def prepare; end
|
26
28
|
|
27
29
|
# An optional method to calculate how many records there are to extract. Used primarily for
|
@@ -33,7 +35,18 @@ module Chronicle
|
|
33
35
|
raise NotImplementedError
|
34
36
|
end
|
35
37
|
|
36
|
-
|
38
|
+
protected
|
39
|
+
|
40
|
+
def build_extraction(data:, meta: nil, source: nil, type: nil, strategy: nil)
|
41
|
+
Extraction.new(
|
42
|
+
extractor: self.class,
|
43
|
+
data: data,
|
44
|
+
meta: meta,
|
45
|
+
source: source || self.class.connector_registration.source,
|
46
|
+
type: type || self.class.connector_registration.type,
|
47
|
+
strategy: strategy || self.class.connector_registration.strategy
|
48
|
+
)
|
49
|
+
end
|
37
50
|
|
38
51
|
# TODO: reimplemenet this
|
39
52
|
# def handle_continuation
|
@@ -1,16 +1,18 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'pathname'
|
2
4
|
|
3
5
|
module Chronicle
|
4
6
|
module ETL
|
5
7
|
# Return filenames that match a pattern in a directory
|
6
8
|
class FileExtractor < Chronicle::ETL::Extractor
|
7
|
-
|
8
9
|
register_connector do |r|
|
10
|
+
r.identifier = :file
|
9
11
|
r.description = 'file or directory of files'
|
10
12
|
end
|
11
13
|
|
12
14
|
setting :input, default: ['.']
|
13
|
-
setting :dir_glob_pattern, default:
|
15
|
+
setting :dir_glob_pattern, default: '**/*'
|
14
16
|
setting :larger_than
|
15
17
|
setting :smaller_than
|
16
18
|
|
@@ -32,7 +34,7 @@ module Chronicle
|
|
32
34
|
|
33
35
|
def gather_files
|
34
36
|
roots = [@config.input].flatten.map { |filename| Pathname.new(filename) }
|
35
|
-
raise(ExtractionError,
|
37
|
+
raise(ExtractionError, 'Input must exist') unless roots.all?(&:exist?)
|
36
38
|
|
37
39
|
directories, files = roots.partition(&:directory?)
|
38
40
|
|
@@ -37,7 +37,7 @@ module Chronicle
|
|
37
37
|
elsif read_from_stdin?
|
38
38
|
yield $stdin.read, $stdin
|
39
39
|
else
|
40
|
-
raise ExtractionError,
|
40
|
+
raise ExtractionError, 'No input files or stdin provided'
|
41
41
|
end
|
42
42
|
end
|
43
43
|
|
@@ -48,7 +48,7 @@ module Chronicle
|
|
48
48
|
elsif read_from_stdin?
|
49
49
|
lines_from_stdin(&block)
|
50
50
|
else
|
51
|
-
raise ExtractionError,
|
51
|
+
raise ExtractionError, 'No input files or stdin provided'
|
52
52
|
end
|
53
53
|
end
|
54
54
|
|
@@ -1,18 +1,23 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Chronicle
|
2
4
|
module ETL
|
3
5
|
class JSONExtractor < Chronicle::ETL::Extractor
|
4
6
|
include Extractors::Helpers::InputReader
|
5
7
|
|
6
8
|
register_connector do |r|
|
9
|
+
r.identifier = :json
|
7
10
|
r.description = 'JSON'
|
8
11
|
end
|
9
12
|
|
10
13
|
setting :jsonl, default: true, type: :boolean
|
14
|
+
setting :path, default: nil, type: :string
|
11
15
|
|
12
16
|
def prepare
|
13
17
|
@jsons = []
|
14
18
|
load_input do |input|
|
15
|
-
|
19
|
+
data = parse_data(input)
|
20
|
+
@jsons += [data].flatten
|
16
21
|
end
|
17
22
|
end
|
18
23
|
|
@@ -28,10 +33,15 @@ module Chronicle
|
|
28
33
|
|
29
34
|
private
|
30
35
|
|
31
|
-
def parse_data
|
32
|
-
JSON.parse(data)
|
36
|
+
def parse_data(data)
|
37
|
+
parsed_data = JSON.parse(data)
|
38
|
+
if @config.path
|
39
|
+
parsed_data.dig(*@config.path.split('.'))
|
40
|
+
else
|
41
|
+
parsed_data
|
42
|
+
end
|
33
43
|
rescue JSON::ParserError
|
34
|
-
raise Chronicle::ETL::ExtractionError,
|
44
|
+
raise Chronicle::ETL::ExtractionError, 'Could not parse JSON'
|
35
45
|
end
|
36
46
|
|
37
47
|
def load_input(&block)
|
data/lib/chronicle/etl/job.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'forwardable'
|
2
4
|
|
3
5
|
module Chronicle
|
@@ -12,13 +14,13 @@ module Chronicle
|
|
12
14
|
def_delegators :@job_definition, :dry_run?
|
13
15
|
|
14
16
|
attr_accessor :name,
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
17
|
+
:extractor_klass,
|
18
|
+
:extractor_options,
|
19
|
+
:transformer_klasses,
|
20
|
+
:transformer_options,
|
21
|
+
:loader_klass,
|
22
|
+
:loader_options,
|
23
|
+
:job_definition
|
22
24
|
|
23
25
|
# TODO: build a proper id system
|
24
26
|
alias id name
|
@@ -39,9 +41,10 @@ module Chronicle
|
|
39
41
|
@extractor_klass.new(@extractor_options)
|
40
42
|
end
|
41
43
|
|
42
|
-
def
|
43
|
-
@
|
44
|
-
|
44
|
+
def instantiate_transformers
|
45
|
+
@job_definition.transformer_klasses.each_with_index.map do |klass, i|
|
46
|
+
klass.new(@transformer_options[i] || {})
|
47
|
+
end
|
45
48
|
end
|
46
49
|
|
47
50
|
def instantiate_loader
|
@@ -51,20 +54,35 @@ module Chronicle
|
|
51
54
|
|
52
55
|
def save_log?
|
53
56
|
# TODO: this needs more nuance
|
54
|
-
|
57
|
+
!id.nil?
|
55
58
|
end
|
56
59
|
|
57
60
|
def to_s
|
58
|
-
output = "Job"
|
59
|
-
output
|
60
|
-
output += "
|
61
|
-
output += "
|
62
|
-
output +=
|
63
|
-
|
61
|
+
output = "Job summary\n".upcase.bold
|
62
|
+
# output = ""
|
63
|
+
output += "#{name}:\n" if name
|
64
|
+
output += "→ Extracting from #{@job_definition.extractor_klass.description}\n"
|
65
|
+
output += options_to_s(@extractor_options)
|
66
|
+
|
67
|
+
@job_definition.transformer_klasses.each do |klass|
|
68
|
+
output += "→ Transforming #{klass.description}\n"
|
69
|
+
end
|
70
|
+
# TODO: transformer options
|
71
|
+
output += "→ Loading to #{@job_definition.loader_klass.description}\n"
|
72
|
+
output += options_to_s(@loader_options)
|
73
|
+
output
|
64
74
|
end
|
65
75
|
|
66
76
|
private
|
67
77
|
|
78
|
+
def options_to_s(options, indent: 4)
|
79
|
+
output = ''
|
80
|
+
options.each do |k, v|
|
81
|
+
output += "#{' ' * indent}#{k.to_s.light_blue}: #{v}\n"
|
82
|
+
end
|
83
|
+
output
|
84
|
+
end
|
85
|
+
|
68
86
|
def set_continuation
|
69
87
|
continuation = Chronicle::ETL::JobLogger.load_latest(@id)
|
70
88
|
@extractor_options[:continuation] = continuation
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'active_support/core_ext/hash/deep_merge'
|
2
4
|
|
3
5
|
module Chronicle
|
@@ -9,12 +11,14 @@ module Chronicle
|
|
9
11
|
name: 'stdin',
|
10
12
|
options: {}
|
11
13
|
},
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
14
|
+
transformers: [
|
15
|
+
{
|
16
|
+
name: 'null',
|
17
|
+
options: {}
|
18
|
+
}
|
19
|
+
],
|
16
20
|
loader: {
|
17
|
-
name: '
|
21
|
+
name: 'json',
|
18
22
|
options: {}
|
19
23
|
}
|
20
24
|
}.freeze
|
@@ -22,7 +26,7 @@ module Chronicle
|
|
22
26
|
attr_reader :errors
|
23
27
|
attr_accessor :definition
|
24
28
|
|
25
|
-
def initialize
|
29
|
+
def initialize
|
26
30
|
@definition = SKELETON_DEFINITION
|
27
31
|
end
|
28
32
|
|
@@ -34,12 +38,12 @@ module Chronicle
|
|
34
38
|
def validate
|
35
39
|
@errors = {}
|
36
40
|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
41
|
+
extractor_klass
|
42
|
+
transformer_klasses
|
43
|
+
loader_klass
|
44
|
+
rescue Chronicle::ETL::PluginError => e
|
45
|
+
@errors[:plugins] ||= []
|
46
|
+
@errors[:plugins] << e
|
43
47
|
end
|
44
48
|
|
45
49
|
def plugins_missing?
|
@@ -48,12 +52,11 @@ module Chronicle
|
|
48
52
|
return false unless @errors[:plugins]&.any?
|
49
53
|
|
50
54
|
@errors[:plugins]
|
51
|
-
.
|
52
|
-
.any?
|
55
|
+
.any? { |e| e.instance_of?(Chronicle::ETL::PluginNotInstalledError) }
|
53
56
|
end
|
54
57
|
|
55
58
|
def validate!
|
56
|
-
raise(Chronicle::ETL::JobDefinitionError.new(self),
|
59
|
+
raise(Chronicle::ETL::JobDefinitionError.new(self), 'Job definition is invalid') unless valid?
|
57
60
|
|
58
61
|
true
|
59
62
|
end
|
@@ -66,19 +69,20 @@ module Chronicle
|
|
66
69
|
|
67
70
|
# For each connector in this job, mix in secrets into the options
|
68
71
|
def apply_default_secrets
|
69
|
-
|
72
|
+
# FIXME: handle transformer secrets
|
73
|
+
%i[extractor loader].each do |phase|
|
70
74
|
# If the option have a `secrets` key, we look up those secrets and
|
71
|
-
# mix them in. If not, use the connector's plugin name and look up
|
75
|
+
# mix them in. If not, use the connector's plugin name and look up
|
72
76
|
# secrets with the same namespace
|
73
77
|
if @definition[phase][:options][:secrets]
|
74
78
|
namespace = @definition[phase][:options][:secrets]
|
75
79
|
else
|
76
80
|
# We don't want to do this lookup for built-in connectors
|
77
|
-
next if __send__("#{phase}_klass"
|
81
|
+
next if __send__(:"#{phase}_klass").connector_registration.built_in?
|
78
82
|
|
79
83
|
# infer plugin name from connector name and use it for secrets
|
80
84
|
# namesepace
|
81
|
-
namespace = @definition[phase][:name].split(
|
85
|
+
namespace = @definition[phase][:name].split(':').first
|
82
86
|
end
|
83
87
|
|
84
88
|
# Reverse merge secrets into connector's options (we want to preserve
|
@@ -98,15 +102,17 @@ module Chronicle
|
|
98
102
|
end
|
99
103
|
|
100
104
|
def extractor_klass
|
101
|
-
|
105
|
+
find_connector_klass(:extractor, @definition[:extractor][:name])
|
102
106
|
end
|
103
107
|
|
104
|
-
def
|
105
|
-
|
108
|
+
def transformer_klasses
|
109
|
+
@definition[:transformers].map do |transformer|
|
110
|
+
find_connector_klass(:transformer, transformer[:name])
|
111
|
+
end
|
106
112
|
end
|
107
113
|
|
108
114
|
def loader_klass
|
109
|
-
|
115
|
+
find_connector_klass(:loader, @definition[:loader][:name])
|
110
116
|
end
|
111
117
|
|
112
118
|
def extractor_options
|
@@ -114,7 +120,9 @@ module Chronicle
|
|
114
120
|
end
|
115
121
|
|
116
122
|
def transformer_options
|
117
|
-
@definition[:transformer
|
123
|
+
@definition[:transformers].map do |transformer|
|
124
|
+
transformer[:options]
|
125
|
+
end
|
118
126
|
end
|
119
127
|
|
120
128
|
def loader_options
|
@@ -123,12 +131,16 @@ module Chronicle
|
|
123
131
|
|
124
132
|
private
|
125
133
|
|
126
|
-
def
|
134
|
+
def find_schema_transformer_klass(source_klass, target)
|
135
|
+
Chronicle::ETL::Registry::Connectors.find_converter_for_source(source_klass, target).klass
|
136
|
+
end
|
137
|
+
|
138
|
+
def find_connector_klass(phase, identifier)
|
127
139
|
Chronicle::ETL::Registry::Connectors.find_by_phase_and_identifier(phase, identifier).klass
|
128
140
|
end
|
129
141
|
|
130
142
|
def load_credentials
|
131
|
-
|
143
|
+
%i[extractor loader].each do |phase|
|
132
144
|
credentials_name = @definition[phase].dig(:options, :credentials)
|
133
145
|
if credentials_name
|
134
146
|
credentials = Chronicle::ETL::Config.load_credentials(credentials_name)
|