chronicle-etl 0.5.5 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +15 -25
- data/.rubocop.yml +2 -44
- data/Gemfile +2 -2
- data/Guardfile +3 -3
- data/README.md +75 -68
- data/Rakefile +2 -2
- data/bin/console +4 -5
- data/chronicle-etl.gemspec +51 -49
- data/exe/chronicle-etl +1 -1
- data/lib/chronicle/etl/authorizer.rb +3 -4
- data/lib/chronicle/etl/cli/authorizations.rb +8 -6
- data/lib/chronicle/etl/cli/connectors.rb +7 -7
- data/lib/chronicle/etl/cli/jobs.rb +130 -53
- data/lib/chronicle/etl/cli/main.rb +29 -29
- data/lib/chronicle/etl/cli/plugins.rb +14 -15
- data/lib/chronicle/etl/cli/secrets.rb +14 -12
- data/lib/chronicle/etl/cli/subcommand_base.rb +5 -3
- data/lib/chronicle/etl/config.rb +18 -8
- data/lib/chronicle/etl/configurable.rb +20 -9
- data/lib/chronicle/etl/exceptions.rb +3 -3
- data/lib/chronicle/etl/extraction.rb +12 -2
- data/lib/chronicle/etl/extractors/csv_extractor.rb +9 -0
- data/lib/chronicle/etl/extractors/extractor.rb +15 -2
- data/lib/chronicle/etl/extractors/file_extractor.rb +5 -3
- data/lib/chronicle/etl/extractors/helpers/input_reader.rb +2 -2
- data/lib/chronicle/etl/extractors/json_extractor.rb +14 -4
- data/lib/chronicle/etl/extractors/stdin_extractor.rb +3 -0
- data/lib/chronicle/etl/job.rb +35 -17
- data/lib/chronicle/etl/job_definition.rb +38 -26
- data/lib/chronicle/etl/job_log.rb +14 -16
- data/lib/chronicle/etl/job_logger.rb +4 -4
- data/lib/chronicle/etl/loaders/csv_loader.rb +17 -4
- data/lib/chronicle/etl/loaders/helpers/stdout_helper.rb +4 -0
- data/lib/chronicle/etl/loaders/json_loader.rb +30 -10
- data/lib/chronicle/etl/loaders/loader.rb +0 -17
- data/lib/chronicle/etl/loaders/rest_loader.rb +7 -7
- data/lib/chronicle/etl/loaders/table_loader.rb +37 -12
- data/lib/chronicle/etl/logger.rb +2 -2
- data/lib/chronicle/etl/oauth_authorizer.rb +8 -8
- data/lib/chronicle/etl/record.rb +15 -0
- data/lib/chronicle/etl/registry/connector_registration.rb +15 -23
- data/lib/chronicle/etl/registry/connectors.rb +93 -36
- data/lib/chronicle/etl/registry/plugin_registration.rb +1 -1
- data/lib/chronicle/etl/registry/plugins.rb +27 -19
- data/lib/chronicle/etl/runner.rb +158 -128
- data/lib/chronicle/etl/secrets.rb +4 -4
- data/lib/chronicle/etl/transformers/buffer_transformer.rb +29 -0
- data/lib/chronicle/etl/transformers/chronicle_transformer.rb +32 -0
- data/lib/chronicle/etl/transformers/chronobase_transformer.rb +100 -0
- data/lib/chronicle/etl/transformers/fields_limit_transformer.rb +23 -0
- data/lib/chronicle/etl/transformers/filter_fields_transformer.rb +60 -0
- data/lib/chronicle/etl/transformers/filter_transformer.rb +30 -0
- data/lib/chronicle/etl/transformers/format_transformer.rb +32 -0
- data/lib/chronicle/etl/transformers/merge_meta_transformer.rb +19 -0
- data/lib/chronicle/etl/transformers/multiply_transformer.rb +21 -0
- data/lib/chronicle/etl/transformers/null_transformer.rb +5 -7
- data/lib/chronicle/etl/transformers/sampler_transformer.rb +21 -0
- data/lib/chronicle/etl/transformers/sort_transformer.rb +31 -0
- data/lib/chronicle/etl/transformers/transformer.rb +63 -41
- data/lib/chronicle/etl/utils/binary_attachments.rb +1 -1
- data/lib/chronicle/etl/utils/progress_bar.rb +2 -3
- data/lib/chronicle/etl/version.rb +1 -1
- data/lib/chronicle/etl.rb +6 -8
- metadata +49 -47
- data/lib/chronicle/etl/models/activity.rb +0 -15
- data/lib/chronicle/etl/models/attachment.rb +0 -14
- data/lib/chronicle/etl/models/base.rb +0 -122
- data/lib/chronicle/etl/models/entity.rb +0 -29
- data/lib/chronicle/etl/models/raw.rb +0 -26
- data/lib/chronicle/etl/serializers/jsonapi_serializer.rb +0 -31
- data/lib/chronicle/etl/serializers/raw_serializer.rb +0 -10
- data/lib/chronicle/etl/serializers/serializer.rb +0 -28
- data/lib/chronicle/etl/transformers/image_file_transformer.rb +0 -247
- data/lib/chronicle/etl/utils/hash_utilities.rb +0 -19
- data/lib/chronicle/etl/utils/text_recognition.rb +0 -15
@@ -1,6 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require
|
3
|
+
require 'tty-prompt'
|
4
4
|
|
5
5
|
module Chronicle
|
6
6
|
module ETL
|
@@ -10,8 +10,8 @@ module Chronicle
|
|
10
10
|
default_task 'list'
|
11
11
|
namespace :secrets
|
12
12
|
|
13
|
-
desc
|
14
|
-
def set(namespace, key, value=nil)
|
13
|
+
desc 'set NAMESPACE KEY [VALUE]', 'Add a secret. VALUE can be set as argument or from stdin'
|
14
|
+
def set(namespace, key, value = nil)
|
15
15
|
validate_namespace(namespace)
|
16
16
|
|
17
17
|
if value
|
@@ -24,23 +24,23 @@ module Chronicle
|
|
24
24
|
end
|
25
25
|
|
26
26
|
Chronicle::ETL::Secrets.set(namespace, key, value.strip)
|
27
|
-
cli_exit(message:
|
27
|
+
cli_exit(message: 'Secret set')
|
28
28
|
rescue TTY::Reader::InputInterrupt
|
29
29
|
cli_fail(message: "\nSecret not set")
|
30
30
|
end
|
31
31
|
|
32
|
-
desc
|
32
|
+
desc 'unset NAMESPACE KEY', 'Remove a secret'
|
33
33
|
def unset(namespace, key)
|
34
34
|
validate_namespace(namespace)
|
35
35
|
|
36
36
|
Chronicle::ETL::Secrets.unset(namespace, key)
|
37
|
-
cli_exit(message:
|
37
|
+
cli_exit(message: 'Secret unset')
|
38
38
|
end
|
39
39
|
|
40
|
-
desc
|
41
|
-
def list(namespace=nil)
|
40
|
+
desc 'list', 'List available secrets'
|
41
|
+
def list(namespace = nil)
|
42
42
|
all_secrets = Chronicle::ETL::Secrets.all(namespace)
|
43
|
-
cli_exit(message:
|
43
|
+
cli_exit(message: 'No secrets are stored') unless all_secrets.any?
|
44
44
|
|
45
45
|
rows = []
|
46
46
|
all_secrets.each do |namespace, secrets|
|
@@ -51,9 +51,9 @@ module Chronicle
|
|
51
51
|
end
|
52
52
|
end
|
53
53
|
|
54
|
-
headers = [
|
54
|
+
headers = %w[namespace key value].map { |h| h.upcase.bold }
|
55
55
|
|
56
|
-
puts
|
56
|
+
puts 'Available secrets:'
|
57
57
|
table = TTY::Table.new(headers, rows)
|
58
58
|
puts table.render(indent: 0, padding: [0, 2])
|
59
59
|
end
|
@@ -61,7 +61,9 @@ module Chronicle
|
|
61
61
|
private
|
62
62
|
|
63
63
|
def validate_namespace(namespace)
|
64
|
-
|
64
|
+
return if Chronicle::ETL::Secrets.valid_namespace_name?(namespace)
|
65
|
+
|
66
|
+
cli_fail(message: "'#{namespace}' is not a valid namespace")
|
65
67
|
end
|
66
68
|
end
|
67
69
|
end
|
@@ -11,7 +11,7 @@ module Chronicle
|
|
11
11
|
end
|
12
12
|
list.sort! { |a, b| a[0] <=> b[0] }
|
13
13
|
|
14
|
-
shell.say
|
14
|
+
shell.say 'COMMANDS'.bold
|
15
15
|
shell.print_table(list, indent: 2, truncate: true)
|
16
16
|
shell.say
|
17
17
|
class_options_help(shell)
|
@@ -19,7 +19,7 @@ module Chronicle
|
|
19
19
|
|
20
20
|
# Show docs with command:subcommand pattern.
|
21
21
|
# For `help` command, don't use colon
|
22
|
-
def self.banner(command,
|
22
|
+
def self.banner(command, _namespace = nil, _subcommand = false)
|
23
23
|
if command.name == 'help'
|
24
24
|
"#{subcommand_prefix} #{command.usage}"
|
25
25
|
else
|
@@ -29,7 +29,9 @@ module Chronicle
|
|
29
29
|
|
30
30
|
# Use subcommand classname to derive display name for subcommand
|
31
31
|
def self.subcommand_prefix
|
32
|
-
|
32
|
+
name.gsub(/.*::/, '').gsub(/^[A-Z]/) do |match|
|
33
|
+
match[0].downcase
|
34
|
+
end.gsub(/[A-Z]/) { |match| "-#{match[0].downcase}" }
|
33
35
|
end
|
34
36
|
end
|
35
37
|
end
|
data/lib/chronicle/etl/config.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
require
|
1
|
+
require 'active_support/core_ext/hash/keys'
|
2
2
|
require 'fileutils'
|
3
3
|
require 'yaml'
|
4
4
|
|
@@ -15,7 +15,7 @@ module Chronicle
|
|
15
15
|
path = base.join("#{identifier}.yml")
|
16
16
|
return {} unless path.exist?
|
17
17
|
|
18
|
-
YAML.
|
18
|
+
YAML.safe_load_file(path, symbolize_names: true, permitted_classes: [Symbol, Date, Time])
|
19
19
|
end
|
20
20
|
|
21
21
|
# Writes a hash as a yml config file
|
@@ -31,28 +31,38 @@ module Chronicle
|
|
31
31
|
end
|
32
32
|
end
|
33
33
|
|
34
|
+
# Returns path for a given config type and identifier
|
35
|
+
def path(type, identifier)
|
36
|
+
base = config_pathname_for_type(type)
|
37
|
+
base.join("#{identifier}.yml")
|
38
|
+
end
|
39
|
+
|
40
|
+
# Whether a config exists for a given type and identifier
|
34
41
|
def exists?(type, identifier)
|
35
42
|
base = config_pathname_for_type(type)
|
36
43
|
path = base.join("#{identifier}.yml")
|
37
|
-
|
44
|
+
path.exist?
|
38
45
|
end
|
39
46
|
|
40
47
|
# Returns all jobs available in ~/.config/chronicle/etl/jobs/*.yml
|
41
48
|
def available_jobs
|
42
|
-
Dir.glob(File.join(config_pathname_for_type(
|
43
|
-
File.basename(filename,
|
49
|
+
Dir.glob(File.join(config_pathname_for_type('jobs'), '*.yml')).map do |filename|
|
50
|
+
File.basename(filename, '.*')
|
44
51
|
end
|
45
52
|
end
|
46
53
|
|
54
|
+
# Returns all configs available for a given type
|
47
55
|
def available_configs(type)
|
48
|
-
Dir.glob(File.join(config_pathname_for_type(type),
|
49
|
-
File.basename(filename,
|
56
|
+
Dir.glob(File.join(config_pathname_for_type(type), '*.yml')).map do |filename|
|
57
|
+
File.basename(filename, '.*')
|
50
58
|
end
|
51
59
|
end
|
52
60
|
|
53
61
|
# Load a job definition from job config directory
|
54
62
|
def read_job(job_name)
|
55
|
-
load('jobs', job_name)
|
63
|
+
definition = load('jobs', job_name)
|
64
|
+
definition[:name] ||= job_name
|
65
|
+
definition
|
56
66
|
end
|
57
67
|
|
58
68
|
def config_pathname
|
@@ -1,7 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require
|
4
|
-
require
|
3
|
+
require 'ostruct'
|
4
|
+
require 'chronic_duration'
|
5
5
|
|
6
6
|
module Chronicle
|
7
7
|
module ETL
|
@@ -19,7 +19,7 @@ module Chronicle
|
|
19
19
|
# t.config.when
|
20
20
|
module Configurable
|
21
21
|
# An individual setting for this Configurable
|
22
|
-
Setting = Struct.new(:default, :required, :type)
|
22
|
+
Setting = Struct.new(:default, :required, :type, :description)
|
23
23
|
private_constant :Setting
|
24
24
|
|
25
25
|
# Collection of user-supplied options for this Configurable
|
@@ -62,7 +62,7 @@ module Chronicle
|
|
62
62
|
# Do nothing with a given option if it's not a connector setting
|
63
63
|
next unless setting
|
64
64
|
|
65
|
-
@config[name] = coerced_value(setting, value)
|
65
|
+
@config[name] = coerced_value(setting, name, value)
|
66
66
|
end
|
67
67
|
validate_config
|
68
68
|
options
|
@@ -84,10 +84,17 @@ module Chronicle
|
|
84
84
|
raise Chronicle::ETL::ConnectorConfigurationError, "Missing options: #{missing}" if missing.count.positive?
|
85
85
|
end
|
86
86
|
|
87
|
-
def coerced_value(setting, value)
|
87
|
+
def coerced_value(setting, name, value)
|
88
88
|
setting.type ? __send__("coerce_#{setting.type}", value) : value
|
89
89
|
rescue StandardError
|
90
|
-
raise(
|
90
|
+
raise(
|
91
|
+
Chronicle::ETL::ConnectorConfigurationError,
|
92
|
+
"Could not convert value '#{value}' into a #{setting.type} for setting '#{name}'"
|
93
|
+
)
|
94
|
+
end
|
95
|
+
|
96
|
+
def coerce_hash(value)
|
97
|
+
value.is_a?(Hash) ? value : {}
|
91
98
|
end
|
92
99
|
|
93
100
|
def coerce_string(value)
|
@@ -101,12 +108,16 @@ module Chronicle
|
|
101
108
|
|
102
109
|
def coerce_boolean(value)
|
103
110
|
if value.is_a?(String)
|
104
|
-
value.downcase ==
|
111
|
+
value.downcase == 'true'
|
105
112
|
else
|
106
113
|
value
|
107
114
|
end
|
108
115
|
end
|
109
116
|
|
117
|
+
def coerce_array(value)
|
118
|
+
value.is_a?(Array) ? value : [value]
|
119
|
+
end
|
120
|
+
|
110
121
|
def coerce_time(value)
|
111
122
|
# parsing yml files might result in us getting Date objects
|
112
123
|
# we convert to DateTime first to to ensure UTC
|
@@ -138,8 +149,8 @@ module Chronicle
|
|
138
149
|
# setting :when, type: :date, required: true
|
139
150
|
#
|
140
151
|
# @see ::Chronicle::ETL::Configurable
|
141
|
-
def setting(name, default: nil, required: false, type: nil)
|
142
|
-
s = Setting.new(default, required, type)
|
152
|
+
def setting(name, default: nil, required: false, type: nil, description: nil)
|
153
|
+
s = Setting.new(default, required, type, description)
|
143
154
|
settings[name] = s
|
144
155
|
end
|
145
156
|
|
@@ -26,6 +26,7 @@ module Chronicle
|
|
26
26
|
attr_reader :name
|
27
27
|
|
28
28
|
def initialize(name)
|
29
|
+
super
|
29
30
|
@name = name
|
30
31
|
end
|
31
32
|
end
|
@@ -51,10 +52,9 @@ module Chronicle
|
|
51
52
|
|
52
53
|
class ExtractionError < Error; end
|
53
54
|
|
54
|
-
class SerializationError < Error; end
|
55
|
-
|
56
55
|
class TransformationError < Error; end
|
57
|
-
|
58
56
|
class UntransformableRecordError < TransformationError; end
|
57
|
+
|
58
|
+
class LoaderError < Error; end
|
59
59
|
end
|
60
60
|
end
|
@@ -1,11 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Chronicle
|
2
4
|
module ETL
|
3
5
|
class Extraction
|
4
|
-
attr_accessor :data, :meta
|
6
|
+
attr_accessor :data, :meta, :source, :type, :strategy, :extractor
|
5
7
|
|
6
|
-
def initialize(data: {}, meta: {})
|
8
|
+
def initialize(data: {}, meta: {}, source: nil, type: nil, strategy: nil, extractor: nil)
|
7
9
|
@data = data
|
8
10
|
@meta = meta
|
11
|
+
@source = source
|
12
|
+
@type = type
|
13
|
+
@strategy = strategy
|
14
|
+
@extractor = extractor
|
15
|
+
end
|
16
|
+
|
17
|
+
def to_h
|
18
|
+
{ data: @data, meta: @meta, source: @source }
|
9
19
|
end
|
10
20
|
end
|
11
21
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'csv'
|
2
4
|
|
3
5
|
module Chronicle
|
@@ -6,6 +8,7 @@ module Chronicle
|
|
6
8
|
include Extractors::Helpers::InputReader
|
7
9
|
|
8
10
|
register_connector do |r|
|
11
|
+
r.identifier = :csv
|
9
12
|
r.description = 'CSV'
|
10
13
|
end
|
11
14
|
|
@@ -33,6 +36,12 @@ module Chronicle
|
|
33
36
|
|
34
37
|
private
|
35
38
|
|
39
|
+
def all_rows
|
40
|
+
@csvs.reduce([]) do |all_rows, csv|
|
41
|
+
all_rows + csv.to_a.map(&:to_h)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
36
45
|
def prepare_sources
|
37
46
|
@csvs = []
|
38
47
|
read_input do |csv_data|
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'chronicle/etl'
|
2
4
|
|
3
5
|
module Chronicle
|
@@ -21,7 +23,7 @@ module Chronicle
|
|
21
23
|
apply_options(options)
|
22
24
|
end
|
23
25
|
|
24
|
-
# Hook called before #extract. Useful for gathering data,
|
26
|
+
# Hook called before #extract. Useful for gathering data, initializing proxies, etc
|
25
27
|
def prepare; end
|
26
28
|
|
27
29
|
# An optional method to calculate how many records there are to extract. Used primarily for
|
@@ -33,7 +35,18 @@ module Chronicle
|
|
33
35
|
raise NotImplementedError
|
34
36
|
end
|
35
37
|
|
36
|
-
|
38
|
+
protected
|
39
|
+
|
40
|
+
def build_extraction(data:, meta: nil, source: nil, type: nil, strategy: nil)
|
41
|
+
Extraction.new(
|
42
|
+
extractor: self.class,
|
43
|
+
data: data,
|
44
|
+
meta: meta,
|
45
|
+
source: source || self.class.connector_registration.source,
|
46
|
+
type: type || self.class.connector_registration.type,
|
47
|
+
strategy: strategy || self.class.connector_registration.strategy
|
48
|
+
)
|
49
|
+
end
|
37
50
|
|
38
51
|
# TODO: reimplemenet this
|
39
52
|
# def handle_continuation
|
@@ -1,16 +1,18 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'pathname'
|
2
4
|
|
3
5
|
module Chronicle
|
4
6
|
module ETL
|
5
7
|
# Return filenames that match a pattern in a directory
|
6
8
|
class FileExtractor < Chronicle::ETL::Extractor
|
7
|
-
|
8
9
|
register_connector do |r|
|
10
|
+
r.identifier = :file
|
9
11
|
r.description = 'file or directory of files'
|
10
12
|
end
|
11
13
|
|
12
14
|
setting :input, default: ['.']
|
13
|
-
setting :dir_glob_pattern, default:
|
15
|
+
setting :dir_glob_pattern, default: '**/*'
|
14
16
|
setting :larger_than
|
15
17
|
setting :smaller_than
|
16
18
|
|
@@ -32,7 +34,7 @@ module Chronicle
|
|
32
34
|
|
33
35
|
def gather_files
|
34
36
|
roots = [@config.input].flatten.map { |filename| Pathname.new(filename) }
|
35
|
-
raise(ExtractionError,
|
37
|
+
raise(ExtractionError, 'Input must exist') unless roots.all?(&:exist?)
|
36
38
|
|
37
39
|
directories, files = roots.partition(&:directory?)
|
38
40
|
|
@@ -37,7 +37,7 @@ module Chronicle
|
|
37
37
|
elsif read_from_stdin?
|
38
38
|
yield $stdin.read, $stdin
|
39
39
|
else
|
40
|
-
raise ExtractionError,
|
40
|
+
raise ExtractionError, 'No input files or stdin provided'
|
41
41
|
end
|
42
42
|
end
|
43
43
|
|
@@ -48,7 +48,7 @@ module Chronicle
|
|
48
48
|
elsif read_from_stdin?
|
49
49
|
lines_from_stdin(&block)
|
50
50
|
else
|
51
|
-
raise ExtractionError,
|
51
|
+
raise ExtractionError, 'No input files or stdin provided'
|
52
52
|
end
|
53
53
|
end
|
54
54
|
|
@@ -1,18 +1,23 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Chronicle
|
2
4
|
module ETL
|
3
5
|
class JSONExtractor < Chronicle::ETL::Extractor
|
4
6
|
include Extractors::Helpers::InputReader
|
5
7
|
|
6
8
|
register_connector do |r|
|
9
|
+
r.identifier = :json
|
7
10
|
r.description = 'JSON'
|
8
11
|
end
|
9
12
|
|
10
13
|
setting :jsonl, default: true, type: :boolean
|
14
|
+
setting :path, default: nil, type: :string
|
11
15
|
|
12
16
|
def prepare
|
13
17
|
@jsons = []
|
14
18
|
load_input do |input|
|
15
|
-
|
19
|
+
data = parse_data(input)
|
20
|
+
@jsons += [data].flatten
|
16
21
|
end
|
17
22
|
end
|
18
23
|
|
@@ -28,10 +33,15 @@ module Chronicle
|
|
28
33
|
|
29
34
|
private
|
30
35
|
|
31
|
-
def parse_data
|
32
|
-
JSON.parse(data)
|
36
|
+
def parse_data(data)
|
37
|
+
parsed_data = JSON.parse(data)
|
38
|
+
if @config.path
|
39
|
+
parsed_data.dig(*@config.path.split('.'))
|
40
|
+
else
|
41
|
+
parsed_data
|
42
|
+
end
|
33
43
|
rescue JSON::ParserError
|
34
|
-
raise Chronicle::ETL::ExtractionError,
|
44
|
+
raise Chronicle::ETL::ExtractionError, 'Could not parse JSON'
|
35
45
|
end
|
36
46
|
|
37
47
|
def load_input(&block)
|
data/lib/chronicle/etl/job.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'forwardable'
|
2
4
|
|
3
5
|
module Chronicle
|
@@ -12,13 +14,13 @@ module Chronicle
|
|
12
14
|
def_delegators :@job_definition, :dry_run?
|
13
15
|
|
14
16
|
attr_accessor :name,
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
17
|
+
:extractor_klass,
|
18
|
+
:extractor_options,
|
19
|
+
:transformer_klasses,
|
20
|
+
:transformer_options,
|
21
|
+
:loader_klass,
|
22
|
+
:loader_options,
|
23
|
+
:job_definition
|
22
24
|
|
23
25
|
# TODO: build a proper id system
|
24
26
|
alias id name
|
@@ -39,9 +41,10 @@ module Chronicle
|
|
39
41
|
@extractor_klass.new(@extractor_options)
|
40
42
|
end
|
41
43
|
|
42
|
-
def
|
43
|
-
@
|
44
|
-
|
44
|
+
def instantiate_transformers
|
45
|
+
@job_definition.transformer_klasses.each_with_index.map do |klass, i|
|
46
|
+
klass.new(@transformer_options[i] || {})
|
47
|
+
end
|
45
48
|
end
|
46
49
|
|
47
50
|
def instantiate_loader
|
@@ -51,20 +54,35 @@ module Chronicle
|
|
51
54
|
|
52
55
|
def save_log?
|
53
56
|
# TODO: this needs more nuance
|
54
|
-
|
57
|
+
!id.nil?
|
55
58
|
end
|
56
59
|
|
57
60
|
def to_s
|
58
|
-
output = "Job"
|
59
|
-
output
|
60
|
-
output += "
|
61
|
-
output += "
|
62
|
-
output +=
|
63
|
-
|
61
|
+
output = "Job summary\n".upcase.bold
|
62
|
+
# output = ""
|
63
|
+
output += "#{name}:\n" if name
|
64
|
+
output += "→ Extracting from #{@job_definition.extractor_klass.description}\n"
|
65
|
+
output += options_to_s(@extractor_options)
|
66
|
+
|
67
|
+
@job_definition.transformer_klasses.each do |klass|
|
68
|
+
output += "→ Transforming #{klass.description}\n"
|
69
|
+
end
|
70
|
+
# TODO: transformer options
|
71
|
+
output += "→ Loading to #{@job_definition.loader_klass.description}\n"
|
72
|
+
output += options_to_s(@loader_options)
|
73
|
+
output
|
64
74
|
end
|
65
75
|
|
66
76
|
private
|
67
77
|
|
78
|
+
def options_to_s(options, indent: 4)
|
79
|
+
output = ''
|
80
|
+
options.each do |k, v|
|
81
|
+
output += "#{' ' * indent}#{k.to_s.light_blue}: #{v}\n"
|
82
|
+
end
|
83
|
+
output
|
84
|
+
end
|
85
|
+
|
68
86
|
def set_continuation
|
69
87
|
continuation = Chronicle::ETL::JobLogger.load_latest(@id)
|
70
88
|
@extractor_options[:continuation] = continuation
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'active_support/core_ext/hash/deep_merge'
|
2
4
|
|
3
5
|
module Chronicle
|
@@ -9,12 +11,14 @@ module Chronicle
|
|
9
11
|
name: 'stdin',
|
10
12
|
options: {}
|
11
13
|
},
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
14
|
+
transformers: [
|
15
|
+
{
|
16
|
+
name: 'null',
|
17
|
+
options: {}
|
18
|
+
}
|
19
|
+
],
|
16
20
|
loader: {
|
17
|
-
name: '
|
21
|
+
name: 'json',
|
18
22
|
options: {}
|
19
23
|
}
|
20
24
|
}.freeze
|
@@ -22,7 +26,7 @@ module Chronicle
|
|
22
26
|
attr_reader :errors
|
23
27
|
attr_accessor :definition
|
24
28
|
|
25
|
-
def initialize
|
29
|
+
def initialize
|
26
30
|
@definition = SKELETON_DEFINITION
|
27
31
|
end
|
28
32
|
|
@@ -34,12 +38,12 @@ module Chronicle
|
|
34
38
|
def validate
|
35
39
|
@errors = {}
|
36
40
|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
41
|
+
extractor_klass
|
42
|
+
transformer_klasses
|
43
|
+
loader_klass
|
44
|
+
rescue Chronicle::ETL::PluginError => e
|
45
|
+
@errors[:plugins] ||= []
|
46
|
+
@errors[:plugins] << e
|
43
47
|
end
|
44
48
|
|
45
49
|
def plugins_missing?
|
@@ -48,12 +52,11 @@ module Chronicle
|
|
48
52
|
return false unless @errors[:plugins]&.any?
|
49
53
|
|
50
54
|
@errors[:plugins]
|
51
|
-
.
|
52
|
-
.any?
|
55
|
+
.any? { |e| e.instance_of?(Chronicle::ETL::PluginNotInstalledError) }
|
53
56
|
end
|
54
57
|
|
55
58
|
def validate!
|
56
|
-
raise(Chronicle::ETL::JobDefinitionError.new(self),
|
59
|
+
raise(Chronicle::ETL::JobDefinitionError.new(self), 'Job definition is invalid') unless valid?
|
57
60
|
|
58
61
|
true
|
59
62
|
end
|
@@ -66,19 +69,20 @@ module Chronicle
|
|
66
69
|
|
67
70
|
# For each connector in this job, mix in secrets into the options
|
68
71
|
def apply_default_secrets
|
69
|
-
|
72
|
+
# FIXME: handle transformer secrets
|
73
|
+
%i[extractor loader].each do |phase|
|
70
74
|
# If the option have a `secrets` key, we look up those secrets and
|
71
|
-
# mix them in. If not, use the connector's plugin name and look up
|
75
|
+
# mix them in. If not, use the connector's plugin name and look up
|
72
76
|
# secrets with the same namespace
|
73
77
|
if @definition[phase][:options][:secrets]
|
74
78
|
namespace = @definition[phase][:options][:secrets]
|
75
79
|
else
|
76
80
|
# We don't want to do this lookup for built-in connectors
|
77
|
-
next if __send__("#{phase}_klass"
|
81
|
+
next if __send__(:"#{phase}_klass").connector_registration.built_in?
|
78
82
|
|
79
83
|
# infer plugin name from connector name and use it for secrets
|
80
84
|
# namesepace
|
81
|
-
namespace = @definition[phase][:name].split(
|
85
|
+
namespace = @definition[phase][:name].split(':').first
|
82
86
|
end
|
83
87
|
|
84
88
|
# Reverse merge secrets into connector's options (we want to preserve
|
@@ -98,15 +102,17 @@ module Chronicle
|
|
98
102
|
end
|
99
103
|
|
100
104
|
def extractor_klass
|
101
|
-
|
105
|
+
find_connector_klass(:extractor, @definition[:extractor][:name])
|
102
106
|
end
|
103
107
|
|
104
|
-
def
|
105
|
-
|
108
|
+
def transformer_klasses
|
109
|
+
@definition[:transformers].map do |transformer|
|
110
|
+
find_connector_klass(:transformer, transformer[:name])
|
111
|
+
end
|
106
112
|
end
|
107
113
|
|
108
114
|
def loader_klass
|
109
|
-
|
115
|
+
find_connector_klass(:loader, @definition[:loader][:name])
|
110
116
|
end
|
111
117
|
|
112
118
|
def extractor_options
|
@@ -114,7 +120,9 @@ module Chronicle
|
|
114
120
|
end
|
115
121
|
|
116
122
|
def transformer_options
|
117
|
-
@definition[:transformer
|
123
|
+
@definition[:transformers].map do |transformer|
|
124
|
+
transformer[:options]
|
125
|
+
end
|
118
126
|
end
|
119
127
|
|
120
128
|
def loader_options
|
@@ -123,12 +131,16 @@ module Chronicle
|
|
123
131
|
|
124
132
|
private
|
125
133
|
|
126
|
-
def
|
134
|
+
def find_schema_transformer_klass(source_klass, target)
|
135
|
+
Chronicle::ETL::Registry::Connectors.find_converter_for_source(source_klass, target).klass
|
136
|
+
end
|
137
|
+
|
138
|
+
def find_connector_klass(phase, identifier)
|
127
139
|
Chronicle::ETL::Registry::Connectors.find_by_phase_and_identifier(phase, identifier).klass
|
128
140
|
end
|
129
141
|
|
130
142
|
def load_credentials
|
131
|
-
|
143
|
+
%i[extractor loader].each do |phase|
|
132
144
|
credentials_name = @definition[phase].dig(:options, :credentials)
|
133
145
|
if credentials_name
|
134
146
|
credentials = Chronicle::ETL::Config.load_credentials(credentials_name)
|