chronicle-etl 0.5.4 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +15 -25
- data/.rubocop.yml +2 -44
- data/Gemfile +2 -2
- data/Guardfile +3 -3
- data/README.md +98 -73
- data/Rakefile +2 -2
- data/bin/console +4 -5
- data/chronicle-etl.gemspec +50 -45
- data/exe/chronicle-etl +1 -1
- data/lib/chronicle/etl/authorizer.rb +3 -4
- data/lib/chronicle/etl/cli/authorizations.rb +10 -8
- data/lib/chronicle/etl/cli/connectors.rb +9 -9
- data/lib/chronicle/etl/cli/jobs.rb +130 -53
- data/lib/chronicle/etl/cli/main.rb +29 -29
- data/lib/chronicle/etl/cli/plugins.rb +29 -26
- data/lib/chronicle/etl/cli/secrets.rb +14 -12
- data/lib/chronicle/etl/cli/subcommand_base.rb +5 -3
- data/lib/chronicle/etl/config.rb +20 -7
- data/lib/chronicle/etl/configurable.rb +24 -9
- data/lib/chronicle/etl/exceptions.rb +3 -3
- data/lib/chronicle/etl/extraction.rb +12 -2
- data/lib/chronicle/etl/extractors/csv_extractor.rb +9 -0
- data/lib/chronicle/etl/extractors/extractor.rb +15 -2
- data/lib/chronicle/etl/extractors/file_extractor.rb +5 -3
- data/lib/chronicle/etl/extractors/helpers/input_reader.rb +2 -2
- data/lib/chronicle/etl/extractors/json_extractor.rb +14 -4
- data/lib/chronicle/etl/extractors/stdin_extractor.rb +3 -0
- data/lib/chronicle/etl/job.rb +35 -17
- data/lib/chronicle/etl/job_definition.rb +39 -27
- data/lib/chronicle/etl/job_log.rb +14 -16
- data/lib/chronicle/etl/job_logger.rb +4 -4
- data/lib/chronicle/etl/loaders/csv_loader.rb +17 -4
- data/lib/chronicle/etl/loaders/helpers/stdout_helper.rb +4 -0
- data/lib/chronicle/etl/loaders/json_loader.rb +30 -10
- data/lib/chronicle/etl/loaders/loader.rb +0 -17
- data/lib/chronicle/etl/loaders/rest_loader.rb +7 -7
- data/lib/chronicle/etl/loaders/table_loader.rb +37 -12
- data/lib/chronicle/etl/logger.rb +3 -3
- data/lib/chronicle/etl/oauth_authorizer.rb +8 -10
- data/lib/chronicle/etl/record.rb +15 -0
- data/lib/chronicle/etl/registry/connector_registration.rb +15 -23
- data/lib/chronicle/etl/registry/connectors.rb +117 -0
- data/lib/chronicle/etl/registry/plugin_registration.rb +19 -0
- data/lib/chronicle/etl/registry/plugins.rb +171 -0
- data/lib/chronicle/etl/registry/registry.rb +3 -52
- data/lib/chronicle/etl/registry/self_registering.rb +1 -1
- data/lib/chronicle/etl/runner.rb +158 -128
- data/lib/chronicle/etl/secrets.rb +5 -5
- data/lib/chronicle/etl/transformers/buffer_transformer.rb +29 -0
- data/lib/chronicle/etl/transformers/chronicle_transformer.rb +32 -0
- data/lib/chronicle/etl/transformers/chronobase_transformer.rb +100 -0
- data/lib/chronicle/etl/transformers/fields_limit_transformer.rb +23 -0
- data/lib/chronicle/etl/transformers/filter_fields_transformer.rb +60 -0
- data/lib/chronicle/etl/transformers/filter_transformer.rb +30 -0
- data/lib/chronicle/etl/transformers/format_transformer.rb +32 -0
- data/lib/chronicle/etl/transformers/merge_meta_transformer.rb +19 -0
- data/lib/chronicle/etl/transformers/multiply_transformer.rb +21 -0
- data/lib/chronicle/etl/transformers/null_transformer.rb +5 -7
- data/lib/chronicle/etl/transformers/sampler_transformer.rb +21 -0
- data/lib/chronicle/etl/transformers/sort_transformer.rb +31 -0
- data/lib/chronicle/etl/transformers/transformer.rb +63 -41
- data/lib/chronicle/etl/utils/binary_attachments.rb +1 -1
- data/lib/chronicle/etl/utils/progress_bar.rb +2 -3
- data/lib/chronicle/etl/version.rb +1 -1
- data/lib/chronicle/etl.rb +6 -8
- metadata +91 -45
- data/lib/chronicle/etl/models/activity.rb +0 -15
- data/lib/chronicle/etl/models/attachment.rb +0 -14
- data/lib/chronicle/etl/models/base.rb +0 -122
- data/lib/chronicle/etl/models/entity.rb +0 -29
- data/lib/chronicle/etl/models/raw.rb +0 -26
- data/lib/chronicle/etl/registry/plugin_registry.rb +0 -95
- data/lib/chronicle/etl/serializers/jsonapi_serializer.rb +0 -31
- data/lib/chronicle/etl/serializers/raw_serializer.rb +0 -10
- data/lib/chronicle/etl/serializers/serializer.rb +0 -28
- data/lib/chronicle/etl/transformers/image_file_transformer.rb +0 -247
- data/lib/chronicle/etl/utils/hash_utilities.rb +0 -19
- data/lib/chronicle/etl/utils/text_recognition.rb +0 -15
@@ -1,6 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require
|
3
|
+
require 'tty-prompt'
|
4
4
|
|
5
5
|
module Chronicle
|
6
6
|
module ETL
|
@@ -10,8 +10,8 @@ module Chronicle
|
|
10
10
|
default_task 'list'
|
11
11
|
namespace :secrets
|
12
12
|
|
13
|
-
desc
|
14
|
-
def set(namespace, key, value=nil)
|
13
|
+
desc 'set NAMESPACE KEY [VALUE]', 'Add a secret. VALUE can be set as argument or from stdin'
|
14
|
+
def set(namespace, key, value = nil)
|
15
15
|
validate_namespace(namespace)
|
16
16
|
|
17
17
|
if value
|
@@ -24,23 +24,23 @@ module Chronicle
|
|
24
24
|
end
|
25
25
|
|
26
26
|
Chronicle::ETL::Secrets.set(namespace, key, value.strip)
|
27
|
-
cli_exit(message:
|
27
|
+
cli_exit(message: 'Secret set')
|
28
28
|
rescue TTY::Reader::InputInterrupt
|
29
29
|
cli_fail(message: "\nSecret not set")
|
30
30
|
end
|
31
31
|
|
32
|
-
desc
|
32
|
+
desc 'unset NAMESPACE KEY', 'Remove a secret'
|
33
33
|
def unset(namespace, key)
|
34
34
|
validate_namespace(namespace)
|
35
35
|
|
36
36
|
Chronicle::ETL::Secrets.unset(namespace, key)
|
37
|
-
cli_exit(message:
|
37
|
+
cli_exit(message: 'Secret unset')
|
38
38
|
end
|
39
39
|
|
40
|
-
desc
|
41
|
-
def list(namespace=nil)
|
40
|
+
desc 'list', 'List available secrets'
|
41
|
+
def list(namespace = nil)
|
42
42
|
all_secrets = Chronicle::ETL::Secrets.all(namespace)
|
43
|
-
cli_exit(message:
|
43
|
+
cli_exit(message: 'No secrets are stored') unless all_secrets.any?
|
44
44
|
|
45
45
|
rows = []
|
46
46
|
all_secrets.each do |namespace, secrets|
|
@@ -51,9 +51,9 @@ module Chronicle
|
|
51
51
|
end
|
52
52
|
end
|
53
53
|
|
54
|
-
headers = [
|
54
|
+
headers = %w[namespace key value].map { |h| h.upcase.bold }
|
55
55
|
|
56
|
-
puts
|
56
|
+
puts 'Available secrets:'
|
57
57
|
table = TTY::Table.new(headers, rows)
|
58
58
|
puts table.render(indent: 0, padding: [0, 2])
|
59
59
|
end
|
@@ -61,7 +61,9 @@ module Chronicle
|
|
61
61
|
private
|
62
62
|
|
63
63
|
def validate_namespace(namespace)
|
64
|
-
|
64
|
+
return if Chronicle::ETL::Secrets.valid_namespace_name?(namespace)
|
65
|
+
|
66
|
+
cli_fail(message: "'#{namespace}' is not a valid namespace")
|
65
67
|
end
|
66
68
|
end
|
67
69
|
end
|
@@ -11,7 +11,7 @@ module Chronicle
|
|
11
11
|
end
|
12
12
|
list.sort! { |a, b| a[0] <=> b[0] }
|
13
13
|
|
14
|
-
shell.say
|
14
|
+
shell.say 'COMMANDS'.bold
|
15
15
|
shell.print_table(list, indent: 2, truncate: true)
|
16
16
|
shell.say
|
17
17
|
class_options_help(shell)
|
@@ -19,7 +19,7 @@ module Chronicle
|
|
19
19
|
|
20
20
|
# Show docs with command:subcommand pattern.
|
21
21
|
# For `help` command, don't use colon
|
22
|
-
def self.banner(command,
|
22
|
+
def self.banner(command, _namespace = nil, _subcommand = false)
|
23
23
|
if command.name == 'help'
|
24
24
|
"#{subcommand_prefix} #{command.usage}"
|
25
25
|
else
|
@@ -29,7 +29,9 @@ module Chronicle
|
|
29
29
|
|
30
30
|
# Use subcommand classname to derive display name for subcommand
|
31
31
|
def self.subcommand_prefix
|
32
|
-
|
32
|
+
name.gsub(/.*::/, '').gsub(/^[A-Z]/) do |match|
|
33
|
+
match[0].downcase
|
34
|
+
end.gsub(/[A-Z]/) { |match| "-#{match[0].downcase}" }
|
33
35
|
end
|
34
36
|
end
|
35
37
|
end
|
data/lib/chronicle/etl/config.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'active_support/core_ext/hash/keys'
|
1
2
|
require 'fileutils'
|
2
3
|
require 'yaml'
|
3
4
|
|
@@ -14,13 +15,15 @@ module Chronicle
|
|
14
15
|
path = base.join("#{identifier}.yml")
|
15
16
|
return {} unless path.exist?
|
16
17
|
|
17
|
-
YAML.
|
18
|
+
YAML.safe_load_file(path, symbolize_names: true, permitted_classes: [Symbol, Date, Time])
|
18
19
|
end
|
19
20
|
|
20
21
|
# Writes a hash as a yml config file
|
21
22
|
def write(type, identifier, data)
|
22
23
|
base = config_pathname_for_type(type)
|
23
24
|
path = base.join("#{identifier}.yml")
|
25
|
+
|
26
|
+
data.deep_stringify_keys!
|
24
27
|
FileUtils.mkdir_p(File.dirname(path))
|
25
28
|
File.open(path, 'w', 0o600) do |f|
|
26
29
|
# Ruby likes to add --- separators when writing yaml files
|
@@ -28,28 +31,38 @@ module Chronicle
|
|
28
31
|
end
|
29
32
|
end
|
30
33
|
|
34
|
+
# Returns path for a given config type and identifier
|
35
|
+
def path(type, identifier)
|
36
|
+
base = config_pathname_for_type(type)
|
37
|
+
base.join("#{identifier}.yml")
|
38
|
+
end
|
39
|
+
|
40
|
+
# Whether a config exists for a given type and identifier
|
31
41
|
def exists?(type, identifier)
|
32
42
|
base = config_pathname_for_type(type)
|
33
43
|
path = base.join("#{identifier}.yml")
|
34
|
-
|
44
|
+
path.exist?
|
35
45
|
end
|
36
46
|
|
37
47
|
# Returns all jobs available in ~/.config/chronicle/etl/jobs/*.yml
|
38
48
|
def available_jobs
|
39
|
-
Dir.glob(File.join(config_pathname_for_type(
|
40
|
-
File.basename(filename,
|
49
|
+
Dir.glob(File.join(config_pathname_for_type('jobs'), '*.yml')).map do |filename|
|
50
|
+
File.basename(filename, '.*')
|
41
51
|
end
|
42
52
|
end
|
43
53
|
|
54
|
+
# Returns all configs available for a given type
|
44
55
|
def available_configs(type)
|
45
|
-
Dir.glob(File.join(config_pathname_for_type(type),
|
46
|
-
File.basename(filename,
|
56
|
+
Dir.glob(File.join(config_pathname_for_type(type), '*.yml')).map do |filename|
|
57
|
+
File.basename(filename, '.*')
|
47
58
|
end
|
48
59
|
end
|
49
60
|
|
50
61
|
# Load a job definition from job config directory
|
51
62
|
def read_job(job_name)
|
52
|
-
load('jobs', job_name)
|
63
|
+
definition = load('jobs', job_name)
|
64
|
+
definition[:name] ||= job_name
|
65
|
+
definition
|
53
66
|
end
|
54
67
|
|
55
68
|
def config_pathname
|
@@ -1,7 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require
|
4
|
-
require
|
3
|
+
require 'ostruct'
|
4
|
+
require 'chronic_duration'
|
5
5
|
|
6
6
|
module Chronicle
|
7
7
|
module ETL
|
@@ -19,7 +19,7 @@ module Chronicle
|
|
19
19
|
# t.config.when
|
20
20
|
module Configurable
|
21
21
|
# An individual setting for this Configurable
|
22
|
-
Setting = Struct.new(:default, :required, :type)
|
22
|
+
Setting = Struct.new(:default, :required, :type, :description)
|
23
23
|
private_constant :Setting
|
24
24
|
|
25
25
|
# Collection of user-supplied options for this Configurable
|
@@ -62,7 +62,7 @@ module Chronicle
|
|
62
62
|
# Do nothing with a given option if it's not a connector setting
|
63
63
|
next unless setting
|
64
64
|
|
65
|
-
@config[name] = coerced_value(setting, value)
|
65
|
+
@config[name] = coerced_value(setting, name, value)
|
66
66
|
end
|
67
67
|
validate_config
|
68
68
|
options
|
@@ -84,10 +84,17 @@ module Chronicle
|
|
84
84
|
raise Chronicle::ETL::ConnectorConfigurationError, "Missing options: #{missing}" if missing.count.positive?
|
85
85
|
end
|
86
86
|
|
87
|
-
def coerced_value(setting, value)
|
87
|
+
def coerced_value(setting, name, value)
|
88
88
|
setting.type ? __send__("coerce_#{setting.type}", value) : value
|
89
89
|
rescue StandardError
|
90
|
-
raise(
|
90
|
+
raise(
|
91
|
+
Chronicle::ETL::ConnectorConfigurationError,
|
92
|
+
"Could not convert value '#{value}' into a #{setting.type} for setting '#{name}'"
|
93
|
+
)
|
94
|
+
end
|
95
|
+
|
96
|
+
def coerce_hash(value)
|
97
|
+
value.is_a?(Hash) ? value : {}
|
91
98
|
end
|
92
99
|
|
93
100
|
def coerce_string(value)
|
@@ -101,13 +108,21 @@ module Chronicle
|
|
101
108
|
|
102
109
|
def coerce_boolean(value)
|
103
110
|
if value.is_a?(String)
|
104
|
-
value.downcase ==
|
111
|
+
value.downcase == 'true'
|
105
112
|
else
|
106
113
|
value
|
107
114
|
end
|
108
115
|
end
|
109
116
|
|
117
|
+
def coerce_array(value)
|
118
|
+
value.is_a?(Array) ? value : [value]
|
119
|
+
end
|
120
|
+
|
110
121
|
def coerce_time(value)
|
122
|
+
# parsing yml files might result in us getting Date objects
|
123
|
+
# we convert to DateTime first to to ensure UTC
|
124
|
+
return value.to_datetime.to_time if value.is_a?(Date)
|
125
|
+
|
111
126
|
return value unless value.is_a?(String)
|
112
127
|
|
113
128
|
# Hacky check for duration strings like "60m"
|
@@ -134,8 +149,8 @@ module Chronicle
|
|
134
149
|
# setting :when, type: :date, required: true
|
135
150
|
#
|
136
151
|
# @see ::Chronicle::ETL::Configurable
|
137
|
-
def setting(name, default: nil, required: false, type: nil)
|
138
|
-
s = Setting.new(default, required, type)
|
152
|
+
def setting(name, default: nil, required: false, type: nil, description: nil)
|
153
|
+
s = Setting.new(default, required, type, description)
|
139
154
|
settings[name] = s
|
140
155
|
end
|
141
156
|
|
@@ -26,6 +26,7 @@ module Chronicle
|
|
26
26
|
attr_reader :name
|
27
27
|
|
28
28
|
def initialize(name)
|
29
|
+
super
|
29
30
|
@name = name
|
30
31
|
end
|
31
32
|
end
|
@@ -51,10 +52,9 @@ module Chronicle
|
|
51
52
|
|
52
53
|
class ExtractionError < Error; end
|
53
54
|
|
54
|
-
class SerializationError < Error; end
|
55
|
-
|
56
55
|
class TransformationError < Error; end
|
57
|
-
|
58
56
|
class UntransformableRecordError < TransformationError; end
|
57
|
+
|
58
|
+
class LoaderError < Error; end
|
59
59
|
end
|
60
60
|
end
|
@@ -1,11 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Chronicle
|
2
4
|
module ETL
|
3
5
|
class Extraction
|
4
|
-
attr_accessor :data, :meta
|
6
|
+
attr_accessor :data, :meta, :source, :type, :strategy, :extractor
|
5
7
|
|
6
|
-
def initialize(data: {}, meta: {})
|
8
|
+
def initialize(data: {}, meta: {}, source: nil, type: nil, strategy: nil, extractor: nil)
|
7
9
|
@data = data
|
8
10
|
@meta = meta
|
11
|
+
@source = source
|
12
|
+
@type = type
|
13
|
+
@strategy = strategy
|
14
|
+
@extractor = extractor
|
15
|
+
end
|
16
|
+
|
17
|
+
def to_h
|
18
|
+
{ data: @data, meta: @meta, source: @source }
|
9
19
|
end
|
10
20
|
end
|
11
21
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'csv'
|
2
4
|
|
3
5
|
module Chronicle
|
@@ -6,6 +8,7 @@ module Chronicle
|
|
6
8
|
include Extractors::Helpers::InputReader
|
7
9
|
|
8
10
|
register_connector do |r|
|
11
|
+
r.identifier = :csv
|
9
12
|
r.description = 'CSV'
|
10
13
|
end
|
11
14
|
|
@@ -33,6 +36,12 @@ module Chronicle
|
|
33
36
|
|
34
37
|
private
|
35
38
|
|
39
|
+
def all_rows
|
40
|
+
@csvs.reduce([]) do |all_rows, csv|
|
41
|
+
all_rows + csv.to_a.map(&:to_h)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
36
45
|
def prepare_sources
|
37
46
|
@csvs = []
|
38
47
|
read_input do |csv_data|
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'chronicle/etl'
|
2
4
|
|
3
5
|
module Chronicle
|
@@ -21,7 +23,7 @@ module Chronicle
|
|
21
23
|
apply_options(options)
|
22
24
|
end
|
23
25
|
|
24
|
-
# Hook called before #extract. Useful for gathering data,
|
26
|
+
# Hook called before #extract. Useful for gathering data, initializing proxies, etc
|
25
27
|
def prepare; end
|
26
28
|
|
27
29
|
# An optional method to calculate how many records there are to extract. Used primarily for
|
@@ -33,7 +35,18 @@ module Chronicle
|
|
33
35
|
raise NotImplementedError
|
34
36
|
end
|
35
37
|
|
36
|
-
|
38
|
+
protected
|
39
|
+
|
40
|
+
def build_extraction(data:, meta: nil, source: nil, type: nil, strategy: nil)
|
41
|
+
Extraction.new(
|
42
|
+
extractor: self.class,
|
43
|
+
data: data,
|
44
|
+
meta: meta,
|
45
|
+
source: source || self.class.connector_registration.source,
|
46
|
+
type: type || self.class.connector_registration.type,
|
47
|
+
strategy: strategy || self.class.connector_registration.strategy
|
48
|
+
)
|
49
|
+
end
|
37
50
|
|
38
51
|
# TODO: reimplemenet this
|
39
52
|
# def handle_continuation
|
@@ -1,16 +1,18 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'pathname'
|
2
4
|
|
3
5
|
module Chronicle
|
4
6
|
module ETL
|
5
7
|
# Return filenames that match a pattern in a directory
|
6
8
|
class FileExtractor < Chronicle::ETL::Extractor
|
7
|
-
|
8
9
|
register_connector do |r|
|
10
|
+
r.identifier = :file
|
9
11
|
r.description = 'file or directory of files'
|
10
12
|
end
|
11
13
|
|
12
14
|
setting :input, default: ['.']
|
13
|
-
setting :dir_glob_pattern, default:
|
15
|
+
setting :dir_glob_pattern, default: '**/*'
|
14
16
|
setting :larger_than
|
15
17
|
setting :smaller_than
|
16
18
|
|
@@ -32,7 +34,7 @@ module Chronicle
|
|
32
34
|
|
33
35
|
def gather_files
|
34
36
|
roots = [@config.input].flatten.map { |filename| Pathname.new(filename) }
|
35
|
-
raise(ExtractionError,
|
37
|
+
raise(ExtractionError, 'Input must exist') unless roots.all?(&:exist?)
|
36
38
|
|
37
39
|
directories, files = roots.partition(&:directory?)
|
38
40
|
|
@@ -37,7 +37,7 @@ module Chronicle
|
|
37
37
|
elsif read_from_stdin?
|
38
38
|
yield $stdin.read, $stdin
|
39
39
|
else
|
40
|
-
raise ExtractionError,
|
40
|
+
raise ExtractionError, 'No input files or stdin provided'
|
41
41
|
end
|
42
42
|
end
|
43
43
|
|
@@ -48,7 +48,7 @@ module Chronicle
|
|
48
48
|
elsif read_from_stdin?
|
49
49
|
lines_from_stdin(&block)
|
50
50
|
else
|
51
|
-
raise ExtractionError,
|
51
|
+
raise ExtractionError, 'No input files or stdin provided'
|
52
52
|
end
|
53
53
|
end
|
54
54
|
|
@@ -1,18 +1,23 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Chronicle
|
2
4
|
module ETL
|
3
5
|
class JSONExtractor < Chronicle::ETL::Extractor
|
4
6
|
include Extractors::Helpers::InputReader
|
5
7
|
|
6
8
|
register_connector do |r|
|
9
|
+
r.identifier = :json
|
7
10
|
r.description = 'JSON'
|
8
11
|
end
|
9
12
|
|
10
13
|
setting :jsonl, default: true, type: :boolean
|
14
|
+
setting :path, default: nil, type: :string
|
11
15
|
|
12
16
|
def prepare
|
13
17
|
@jsons = []
|
14
18
|
load_input do |input|
|
15
|
-
|
19
|
+
data = parse_data(input)
|
20
|
+
@jsons += [data].flatten
|
16
21
|
end
|
17
22
|
end
|
18
23
|
|
@@ -28,10 +33,15 @@ module Chronicle
|
|
28
33
|
|
29
34
|
private
|
30
35
|
|
31
|
-
def parse_data
|
32
|
-
JSON.parse(data)
|
36
|
+
def parse_data(data)
|
37
|
+
parsed_data = JSON.parse(data)
|
38
|
+
if @config.path
|
39
|
+
parsed_data.dig(*@config.path.split('.'))
|
40
|
+
else
|
41
|
+
parsed_data
|
42
|
+
end
|
33
43
|
rescue JSON::ParserError
|
34
|
-
raise Chronicle::ETL::ExtractionError,
|
44
|
+
raise Chronicle::ETL::ExtractionError, 'Could not parse JSON'
|
35
45
|
end
|
36
46
|
|
37
47
|
def load_input(&block)
|
data/lib/chronicle/etl/job.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'forwardable'
|
2
4
|
|
3
5
|
module Chronicle
|
@@ -12,13 +14,13 @@ module Chronicle
|
|
12
14
|
def_delegators :@job_definition, :dry_run?
|
13
15
|
|
14
16
|
attr_accessor :name,
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
17
|
+
:extractor_klass,
|
18
|
+
:extractor_options,
|
19
|
+
:transformer_klasses,
|
20
|
+
:transformer_options,
|
21
|
+
:loader_klass,
|
22
|
+
:loader_options,
|
23
|
+
:job_definition
|
22
24
|
|
23
25
|
# TODO: build a proper id system
|
24
26
|
alias id name
|
@@ -39,9 +41,10 @@ module Chronicle
|
|
39
41
|
@extractor_klass.new(@extractor_options)
|
40
42
|
end
|
41
43
|
|
42
|
-
def
|
43
|
-
@
|
44
|
-
|
44
|
+
def instantiate_transformers
|
45
|
+
@job_definition.transformer_klasses.each_with_index.map do |klass, i|
|
46
|
+
klass.new(@transformer_options[i] || {})
|
47
|
+
end
|
45
48
|
end
|
46
49
|
|
47
50
|
def instantiate_loader
|
@@ -51,20 +54,35 @@ module Chronicle
|
|
51
54
|
|
52
55
|
def save_log?
|
53
56
|
# TODO: this needs more nuance
|
54
|
-
|
57
|
+
!id.nil?
|
55
58
|
end
|
56
59
|
|
57
60
|
def to_s
|
58
|
-
output = "Job"
|
59
|
-
output
|
60
|
-
output += "
|
61
|
-
output += "
|
62
|
-
output +=
|
63
|
-
|
61
|
+
output = "Job summary\n".upcase.bold
|
62
|
+
# output = ""
|
63
|
+
output += "#{name}:\n" if name
|
64
|
+
output += "→ Extracting from #{@job_definition.extractor_klass.description}\n"
|
65
|
+
output += options_to_s(@extractor_options)
|
66
|
+
|
67
|
+
@job_definition.transformer_klasses.each do |klass|
|
68
|
+
output += "→ Transforming #{klass.description}\n"
|
69
|
+
end
|
70
|
+
# TODO: transformer options
|
71
|
+
output += "→ Loading to #{@job_definition.loader_klass.description}\n"
|
72
|
+
output += options_to_s(@loader_options)
|
73
|
+
output
|
64
74
|
end
|
65
75
|
|
66
76
|
private
|
67
77
|
|
78
|
+
def options_to_s(options, indent: 4)
|
79
|
+
output = ''
|
80
|
+
options.each do |k, v|
|
81
|
+
output += "#{' ' * indent}#{k.to_s.light_blue}: #{v}\n"
|
82
|
+
end
|
83
|
+
output
|
84
|
+
end
|
85
|
+
|
68
86
|
def set_continuation
|
69
87
|
continuation = Chronicle::ETL::JobLogger.load_latest(@id)
|
70
88
|
@extractor_options[:continuation] = continuation
|