chronicle-etl 0.5.5 → 0.6.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (76) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +15 -25
  3. data/.rubocop.yml +2 -44
  4. data/Gemfile +2 -2
  5. data/Guardfile +3 -3
  6. data/README.md +75 -68
  7. data/Rakefile +2 -2
  8. data/bin/console +4 -5
  9. data/chronicle-etl.gemspec +51 -49
  10. data/exe/chronicle-etl +1 -1
  11. data/lib/chronicle/etl/authorizer.rb +3 -4
  12. data/lib/chronicle/etl/cli/authorizations.rb +8 -6
  13. data/lib/chronicle/etl/cli/connectors.rb +7 -7
  14. data/lib/chronicle/etl/cli/jobs.rb +130 -53
  15. data/lib/chronicle/etl/cli/main.rb +29 -29
  16. data/lib/chronicle/etl/cli/plugins.rb +14 -15
  17. data/lib/chronicle/etl/cli/secrets.rb +14 -12
  18. data/lib/chronicle/etl/cli/subcommand_base.rb +5 -3
  19. data/lib/chronicle/etl/config.rb +18 -8
  20. data/lib/chronicle/etl/configurable.rb +20 -9
  21. data/lib/chronicle/etl/exceptions.rb +3 -3
  22. data/lib/chronicle/etl/extraction.rb +12 -2
  23. data/lib/chronicle/etl/extractors/csv_extractor.rb +9 -0
  24. data/lib/chronicle/etl/extractors/extractor.rb +15 -2
  25. data/lib/chronicle/etl/extractors/file_extractor.rb +5 -3
  26. data/lib/chronicle/etl/extractors/helpers/input_reader.rb +2 -2
  27. data/lib/chronicle/etl/extractors/json_extractor.rb +14 -4
  28. data/lib/chronicle/etl/extractors/stdin_extractor.rb +3 -0
  29. data/lib/chronicle/etl/job.rb +35 -17
  30. data/lib/chronicle/etl/job_definition.rb +38 -26
  31. data/lib/chronicle/etl/job_log.rb +14 -16
  32. data/lib/chronicle/etl/job_logger.rb +4 -4
  33. data/lib/chronicle/etl/loaders/csv_loader.rb +17 -4
  34. data/lib/chronicle/etl/loaders/helpers/stdout_helper.rb +4 -0
  35. data/lib/chronicle/etl/loaders/json_loader.rb +30 -10
  36. data/lib/chronicle/etl/loaders/loader.rb +0 -17
  37. data/lib/chronicle/etl/loaders/rest_loader.rb +7 -7
  38. data/lib/chronicle/etl/loaders/table_loader.rb +37 -12
  39. data/lib/chronicle/etl/logger.rb +2 -2
  40. data/lib/chronicle/etl/oauth_authorizer.rb +8 -8
  41. data/lib/chronicle/etl/record.rb +15 -0
  42. data/lib/chronicle/etl/registry/connector_registration.rb +15 -23
  43. data/lib/chronicle/etl/registry/connectors.rb +93 -36
  44. data/lib/chronicle/etl/registry/plugin_registration.rb +1 -1
  45. data/lib/chronicle/etl/registry/plugins.rb +27 -19
  46. data/lib/chronicle/etl/runner.rb +158 -128
  47. data/lib/chronicle/etl/secrets.rb +4 -4
  48. data/lib/chronicle/etl/transformers/buffer_transformer.rb +29 -0
  49. data/lib/chronicle/etl/transformers/chronicle_transformer.rb +32 -0
  50. data/lib/chronicle/etl/transformers/chronobase_transformer.rb +100 -0
  51. data/lib/chronicle/etl/transformers/fields_limit_transformer.rb +23 -0
  52. data/lib/chronicle/etl/transformers/filter_fields_transformer.rb +60 -0
  53. data/lib/chronicle/etl/transformers/filter_transformer.rb +30 -0
  54. data/lib/chronicle/etl/transformers/format_transformer.rb +32 -0
  55. data/lib/chronicle/etl/transformers/merge_meta_transformer.rb +19 -0
  56. data/lib/chronicle/etl/transformers/multiply_transformer.rb +21 -0
  57. data/lib/chronicle/etl/transformers/null_transformer.rb +5 -7
  58. data/lib/chronicle/etl/transformers/sampler_transformer.rb +21 -0
  59. data/lib/chronicle/etl/transformers/sort_transformer.rb +31 -0
  60. data/lib/chronicle/etl/transformers/transformer.rb +63 -41
  61. data/lib/chronicle/etl/utils/binary_attachments.rb +1 -1
  62. data/lib/chronicle/etl/utils/progress_bar.rb +2 -3
  63. data/lib/chronicle/etl/version.rb +1 -1
  64. data/lib/chronicle/etl.rb +6 -8
  65. metadata +49 -47
  66. data/lib/chronicle/etl/models/activity.rb +0 -15
  67. data/lib/chronicle/etl/models/attachment.rb +0 -14
  68. data/lib/chronicle/etl/models/base.rb +0 -122
  69. data/lib/chronicle/etl/models/entity.rb +0 -29
  70. data/lib/chronicle/etl/models/raw.rb +0 -26
  71. data/lib/chronicle/etl/serializers/jsonapi_serializer.rb +0 -31
  72. data/lib/chronicle/etl/serializers/raw_serializer.rb +0 -10
  73. data/lib/chronicle/etl/serializers/serializer.rb +0 -28
  74. data/lib/chronicle/etl/transformers/image_file_transformer.rb +0 -247
  75. data/lib/chronicle/etl/utils/hash_utilities.rb +0 -19
  76. data/lib/chronicle/etl/utils/text_recognition.rb +0 -15
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "tty-prompt"
3
+ require 'tty-prompt'
4
4
 
5
5
  module Chronicle
6
6
  module ETL
@@ -10,8 +10,8 @@ module Chronicle
10
10
  default_task 'list'
11
11
  namespace :secrets
12
12
 
13
- desc "set NAMESPACE KEY [VALUE]", "Add a secret. VALUE can be set as argument or from stdin"
14
- def set(namespace, key, value=nil)
13
+ desc 'set NAMESPACE KEY [VALUE]', 'Add a secret. VALUE can be set as argument or from stdin'
14
+ def set(namespace, key, value = nil)
15
15
  validate_namespace(namespace)
16
16
 
17
17
  if value
@@ -24,23 +24,23 @@ module Chronicle
24
24
  end
25
25
 
26
26
  Chronicle::ETL::Secrets.set(namespace, key, value.strip)
27
- cli_exit(message: "Secret set")
27
+ cli_exit(message: 'Secret set')
28
28
  rescue TTY::Reader::InputInterrupt
29
29
  cli_fail(message: "\nSecret not set")
30
30
  end
31
31
 
32
- desc "unset NAMESPACE KEY", "Remove a secret"
32
+ desc 'unset NAMESPACE KEY', 'Remove a secret'
33
33
  def unset(namespace, key)
34
34
  validate_namespace(namespace)
35
35
 
36
36
  Chronicle::ETL::Secrets.unset(namespace, key)
37
- cli_exit(message: "Secret unset")
37
+ cli_exit(message: 'Secret unset')
38
38
  end
39
39
 
40
- desc "list", "List available secrets"
41
- def list(namespace=nil)
40
+ desc 'list', 'List available secrets'
41
+ def list(namespace = nil)
42
42
  all_secrets = Chronicle::ETL::Secrets.all(namespace)
43
- cli_exit(message: "No secrets are stored") unless all_secrets.any?
43
+ cli_exit(message: 'No secrets are stored') unless all_secrets.any?
44
44
 
45
45
  rows = []
46
46
  all_secrets.each do |namespace, secrets|
@@ -51,9 +51,9 @@ module Chronicle
51
51
  end
52
52
  end
53
53
 
54
- headers = ['namespace', 'key', 'value'].map { |h| h.upcase.bold }
54
+ headers = %w[namespace key value].map { |h| h.upcase.bold }
55
55
 
56
- puts "Available secrets:"
56
+ puts 'Available secrets:'
57
57
  table = TTY::Table.new(headers, rows)
58
58
  puts table.render(indent: 0, padding: [0, 2])
59
59
  end
@@ -61,7 +61,9 @@ module Chronicle
61
61
  private
62
62
 
63
63
  def validate_namespace(namespace)
64
- cli_fail(message: "'#{namespace}' is not a valid namespace") unless Chronicle::ETL::Secrets.valid_namespace_name?(namespace)
64
+ return if Chronicle::ETL::Secrets.valid_namespace_name?(namespace)
65
+
66
+ cli_fail(message: "'#{namespace}' is not a valid namespace")
65
67
  end
66
68
  end
67
69
  end
@@ -11,7 +11,7 @@ module Chronicle
11
11
  end
12
12
  list.sort! { |a, b| a[0] <=> b[0] }
13
13
 
14
- shell.say "COMMANDS".bold
14
+ shell.say 'COMMANDS'.bold
15
15
  shell.print_table(list, indent: 2, truncate: true)
16
16
  shell.say
17
17
  class_options_help(shell)
@@ -19,7 +19,7 @@ module Chronicle
19
19
 
20
20
  # Show docs with command:subcommand pattern.
21
21
  # For `help` command, don't use colon
22
- def self.banner(command, namespace = nil, subcommand = false)
22
+ def self.banner(command, _namespace = nil, _subcommand = false)
23
23
  if command.name == 'help'
24
24
  "#{subcommand_prefix} #{command.usage}"
25
25
  else
@@ -29,7 +29,9 @@ module Chronicle
29
29
 
30
30
  # Use subcommand classname to derive display name for subcommand
31
31
  def self.subcommand_prefix
32
- self.name.gsub(%r{.*::}, '').gsub(%r{^[A-Z]}) { |match| match[0].downcase }.gsub(%r{[A-Z]}) { |match| "-#{match[0].downcase}" }
32
+ name.gsub(/.*::/, '').gsub(/^[A-Z]/) do |match|
33
+ match[0].downcase
34
+ end.gsub(/[A-Z]/) { |match| "-#{match[0].downcase}" }
33
35
  end
34
36
  end
35
37
  end
@@ -1,4 +1,4 @@
1
- require "active_support/core_ext/hash/keys"
1
+ require 'active_support/core_ext/hash/keys'
2
2
  require 'fileutils'
3
3
  require 'yaml'
4
4
 
@@ -15,7 +15,7 @@ module Chronicle
15
15
  path = base.join("#{identifier}.yml")
16
16
  return {} unless path.exist?
17
17
 
18
- YAML.safe_load(File.read(path), symbolize_names: true, permitted_classes: [Symbol, Date, Time])
18
+ YAML.safe_load_file(path, symbolize_names: true, permitted_classes: [Symbol, Date, Time])
19
19
  end
20
20
 
21
21
  # Writes a hash as a yml config file
@@ -31,28 +31,38 @@ module Chronicle
31
31
  end
32
32
  end
33
33
 
34
+ # Returns path for a given config type and identifier
35
+ def path(type, identifier)
36
+ base = config_pathname_for_type(type)
37
+ base.join("#{identifier}.yml")
38
+ end
39
+
40
+ # Whether a config exists for a given type and identifier
34
41
  def exists?(type, identifier)
35
42
  base = config_pathname_for_type(type)
36
43
  path = base.join("#{identifier}.yml")
37
- return path.exist?
44
+ path.exist?
38
45
  end
39
46
 
40
47
  # Returns all jobs available in ~/.config/chronicle/etl/jobs/*.yml
41
48
  def available_jobs
42
- Dir.glob(File.join(config_pathname_for_type("jobs"), "*.yml")).map do |filename|
43
- File.basename(filename, ".*")
49
+ Dir.glob(File.join(config_pathname_for_type('jobs'), '*.yml')).map do |filename|
50
+ File.basename(filename, '.*')
44
51
  end
45
52
  end
46
53
 
54
+ # Returns all configs available for a given type
47
55
  def available_configs(type)
48
- Dir.glob(File.join(config_pathname_for_type(type), "*.yml")).map do |filename|
49
- File.basename(filename, ".*")
56
+ Dir.glob(File.join(config_pathname_for_type(type), '*.yml')).map do |filename|
57
+ File.basename(filename, '.*')
50
58
  end
51
59
  end
52
60
 
53
61
  # Load a job definition from job config directory
54
62
  def read_job(job_name)
55
- load('jobs', job_name)
63
+ definition = load('jobs', job_name)
64
+ definition[:name] ||= job_name
65
+ definition
56
66
  end
57
67
 
58
68
  def config_pathname
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "ostruct"
4
- require "chronic_duration"
3
+ require 'ostruct'
4
+ require 'chronic_duration'
5
5
 
6
6
  module Chronicle
7
7
  module ETL
@@ -19,7 +19,7 @@ module Chronicle
19
19
  # t.config.when
20
20
  module Configurable
21
21
  # An individual setting for this Configurable
22
- Setting = Struct.new(:default, :required, :type)
22
+ Setting = Struct.new(:default, :required, :type, :description)
23
23
  private_constant :Setting
24
24
 
25
25
  # Collection of user-supplied options for this Configurable
@@ -62,7 +62,7 @@ module Chronicle
62
62
  # Do nothing with a given option if it's not a connector setting
63
63
  next unless setting
64
64
 
65
- @config[name] = coerced_value(setting, value)
65
+ @config[name] = coerced_value(setting, name, value)
66
66
  end
67
67
  validate_config
68
68
  options
@@ -84,10 +84,17 @@ module Chronicle
84
84
  raise Chronicle::ETL::ConnectorConfigurationError, "Missing options: #{missing}" if missing.count.positive?
85
85
  end
86
86
 
87
- def coerced_value(setting, value)
87
+ def coerced_value(setting, name, value)
88
88
  setting.type ? __send__("coerce_#{setting.type}", value) : value
89
89
  rescue StandardError
90
- raise(Chronicle::ETL::ConnectorConfigurationError, "Could not coerce #{value} into a #{setting.type}")
90
+ raise(
91
+ Chronicle::ETL::ConnectorConfigurationError,
92
+ "Could not convert value '#{value}' into a #{setting.type} for setting '#{name}'"
93
+ )
94
+ end
95
+
96
+ def coerce_hash(value)
97
+ value.is_a?(Hash) ? value : {}
91
98
  end
92
99
 
93
100
  def coerce_string(value)
@@ -101,12 +108,16 @@ module Chronicle
101
108
 
102
109
  def coerce_boolean(value)
103
110
  if value.is_a?(String)
104
- value.downcase == "true"
111
+ value.downcase == 'true'
105
112
  else
106
113
  value
107
114
  end
108
115
  end
109
116
 
117
+ def coerce_array(value)
118
+ value.is_a?(Array) ? value : [value]
119
+ end
120
+
110
121
  def coerce_time(value)
111
122
  # parsing yml files might result in us getting Date objects
112
123
  # we convert to DateTime first to to ensure UTC
@@ -138,8 +149,8 @@ module Chronicle
138
149
  # setting :when, type: :date, required: true
139
150
  #
140
151
  # @see ::Chronicle::ETL::Configurable
141
- def setting(name, default: nil, required: false, type: nil)
142
- s = Setting.new(default, required, type)
152
+ def setting(name, default: nil, required: false, type: nil, description: nil)
153
+ s = Setting.new(default, required, type, description)
143
154
  settings[name] = s
144
155
  end
145
156
 
@@ -26,6 +26,7 @@ module Chronicle
26
26
  attr_reader :name
27
27
 
28
28
  def initialize(name)
29
+ super
29
30
  @name = name
30
31
  end
31
32
  end
@@ -51,10 +52,9 @@ module Chronicle
51
52
 
52
53
  class ExtractionError < Error; end
53
54
 
54
- class SerializationError < Error; end
55
-
56
55
  class TransformationError < Error; end
57
-
58
56
  class UntransformableRecordError < TransformationError; end
57
+
58
+ class LoaderError < Error; end
59
59
  end
60
60
  end
@@ -1,11 +1,21 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Chronicle
2
4
  module ETL
3
5
  class Extraction
4
- attr_accessor :data, :meta
6
+ attr_accessor :data, :meta, :source, :type, :strategy, :extractor
5
7
 
6
- def initialize(data: {}, meta: {})
8
+ def initialize(data: {}, meta: {}, source: nil, type: nil, strategy: nil, extractor: nil)
7
9
  @data = data
8
10
  @meta = meta
11
+ @source = source
12
+ @type = type
13
+ @strategy = strategy
14
+ @extractor = extractor
15
+ end
16
+
17
+ def to_h
18
+ { data: @data, meta: @meta, source: @source }
9
19
  end
10
20
  end
11
21
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'csv'
2
4
 
3
5
  module Chronicle
@@ -6,6 +8,7 @@ module Chronicle
6
8
  include Extractors::Helpers::InputReader
7
9
 
8
10
  register_connector do |r|
11
+ r.identifier = :csv
9
12
  r.description = 'CSV'
10
13
  end
11
14
 
@@ -33,6 +36,12 @@ module Chronicle
33
36
 
34
37
  private
35
38
 
39
+ def all_rows
40
+ @csvs.reduce([]) do |all_rows, csv|
41
+ all_rows + csv.to_a.map(&:to_h)
42
+ end
43
+ end
44
+
36
45
  def prepare_sources
37
46
  @csvs = []
38
47
  read_input do |csv_data|
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'chronicle/etl'
2
4
 
3
5
  module Chronicle
@@ -21,7 +23,7 @@ module Chronicle
21
23
  apply_options(options)
22
24
  end
23
25
 
24
- # Hook called before #extract. Useful for gathering data, initailizing proxies, etc
26
+ # Hook called before #extract. Useful for gathering data, initializing proxies, etc
25
27
  def prepare; end
26
28
 
27
29
  # An optional method to calculate how many records there are to extract. Used primarily for
@@ -33,7 +35,18 @@ module Chronicle
33
35
  raise NotImplementedError
34
36
  end
35
37
 
36
- private
38
+ protected
39
+
40
+ def build_extraction(data:, meta: nil, source: nil, type: nil, strategy: nil)
41
+ Extraction.new(
42
+ extractor: self.class,
43
+ data: data,
44
+ meta: meta,
45
+ source: source || self.class.connector_registration.source,
46
+ type: type || self.class.connector_registration.type,
47
+ strategy: strategy || self.class.connector_registration.strategy
48
+ )
49
+ end
37
50
 
38
51
  # TODO: reimplemenet this
39
52
  # def handle_continuation
@@ -1,16 +1,18 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'pathname'
2
4
 
3
5
  module Chronicle
4
6
  module ETL
5
7
  # Return filenames that match a pattern in a directory
6
8
  class FileExtractor < Chronicle::ETL::Extractor
7
-
8
9
  register_connector do |r|
10
+ r.identifier = :file
9
11
  r.description = 'file or directory of files'
10
12
  end
11
13
 
12
14
  setting :input, default: ['.']
13
- setting :dir_glob_pattern, default: "**/*"
15
+ setting :dir_glob_pattern, default: '**/*'
14
16
  setting :larger_than
15
17
  setting :smaller_than
16
18
 
@@ -32,7 +34,7 @@ module Chronicle
32
34
 
33
35
  def gather_files
34
36
  roots = [@config.input].flatten.map { |filename| Pathname.new(filename) }
35
- raise(ExtractionError, "Input must exist") unless roots.all?(&:exist?)
37
+ raise(ExtractionError, 'Input must exist') unless roots.all?(&:exist?)
36
38
 
37
39
  directories, files = roots.partition(&:directory?)
38
40
 
@@ -37,7 +37,7 @@ module Chronicle
37
37
  elsif read_from_stdin?
38
38
  yield $stdin.read, $stdin
39
39
  else
40
- raise ExtractionError, "No input files or stdin provided"
40
+ raise ExtractionError, 'No input files or stdin provided'
41
41
  end
42
42
  end
43
43
 
@@ -48,7 +48,7 @@ module Chronicle
48
48
  elsif read_from_stdin?
49
49
  lines_from_stdin(&block)
50
50
  else
51
- raise ExtractionError, "No input files or stdin provided"
51
+ raise ExtractionError, 'No input files or stdin provided'
52
52
  end
53
53
  end
54
54
 
@@ -1,18 +1,23 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Chronicle
2
4
  module ETL
3
5
  class JSONExtractor < Chronicle::ETL::Extractor
4
6
  include Extractors::Helpers::InputReader
5
7
 
6
8
  register_connector do |r|
9
+ r.identifier = :json
7
10
  r.description = 'JSON'
8
11
  end
9
12
 
10
13
  setting :jsonl, default: true, type: :boolean
14
+ setting :path, default: nil, type: :string
11
15
 
12
16
  def prepare
13
17
  @jsons = []
14
18
  load_input do |input|
15
- @jsons << parse_data(input)
19
+ data = parse_data(input)
20
+ @jsons += [data].flatten
16
21
  end
17
22
  end
18
23
 
@@ -28,10 +33,15 @@ module Chronicle
28
33
 
29
34
  private
30
35
 
31
- def parse_data data
32
- JSON.parse(data)
36
+ def parse_data(data)
37
+ parsed_data = JSON.parse(data)
38
+ if @config.path
39
+ parsed_data.dig(*@config.path.split('.'))
40
+ else
41
+ parsed_data
42
+ end
33
43
  rescue JSON::ParserError
34
- raise Chronicle::ETL::ExtractionError, "Could not parse JSON"
44
+ raise Chronicle::ETL::ExtractionError, 'Could not parse JSON'
35
45
  end
36
46
 
37
47
  def load_input(&block)
@@ -1,7 +1,10 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Chronicle
2
4
  module ETL
3
5
  class StdinExtractor < Chronicle::ETL::Extractor
4
6
  register_connector do |r|
7
+ r.identifier = :stdin
5
8
  r.description = 'stdin'
6
9
  end
7
10
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'forwardable'
2
4
 
3
5
  module Chronicle
@@ -12,13 +14,13 @@ module Chronicle
12
14
  def_delegators :@job_definition, :dry_run?
13
15
 
14
16
  attr_accessor :name,
15
- :extractor_klass,
16
- :extractor_options,
17
- :transformer_klass,
18
- :transformer_options,
19
- :loader_klass,
20
- :loader_options,
21
- :job_definition
17
+ :extractor_klass,
18
+ :extractor_options,
19
+ :transformer_klasses,
20
+ :transformer_options,
21
+ :loader_klass,
22
+ :loader_options,
23
+ :job_definition
22
24
 
23
25
  # TODO: build a proper id system
24
26
  alias id name
@@ -39,9 +41,10 @@ module Chronicle
39
41
  @extractor_klass.new(@extractor_options)
40
42
  end
41
43
 
42
- def instantiate_transformer(extraction)
43
- @transformer_klass = @job_definition.transformer_klass
44
- @transformer_klass.new(extraction, @transformer_options)
44
+ def instantiate_transformers
45
+ @job_definition.transformer_klasses.each_with_index.map do |klass, i|
46
+ klass.new(@transformer_options[i] || {})
47
+ end
45
48
  end
46
49
 
47
50
  def instantiate_loader
@@ -51,20 +54,35 @@ module Chronicle
51
54
 
52
55
  def save_log?
53
56
  # TODO: this needs more nuance
54
- return !id.nil?
57
+ !id.nil?
55
58
  end
56
59
 
57
60
  def to_s
58
- output = "Job"
59
- output += " '#{name}'".bold if name
60
- output += "\n"
61
- output += " → Extracting from #{@job_definition.extractor_klass.description}\n"
62
- output += " → Transforming #{@job_definition.transformer_klass.description}\n"
63
- output += " → Loading to #{@job_definition.loader_klass.description}\n"
61
+ output = "Job summary\n".upcase.bold
62
+ # output = ""
63
+ output += "#{name}:\n" if name
64
+ output += "→ Extracting from #{@job_definition.extractor_klass.description}\n"
65
+ output += options_to_s(@extractor_options)
66
+
67
+ @job_definition.transformer_klasses.each do |klass|
68
+ output += "→ Transforming #{klass.description}\n"
69
+ end
70
+ # TODO: transformer options
71
+ output += "→ Loading to #{@job_definition.loader_klass.description}\n"
72
+ output += options_to_s(@loader_options)
73
+ output
64
74
  end
65
75
 
66
76
  private
67
77
 
78
+ def options_to_s(options, indent: 4)
79
+ output = ''
80
+ options.each do |k, v|
81
+ output += "#{' ' * indent}#{k.to_s.light_blue}: #{v}\n"
82
+ end
83
+ output
84
+ end
85
+
68
86
  def set_continuation
69
87
  continuation = Chronicle::ETL::JobLogger.load_latest(@id)
70
88
  @extractor_options[:continuation] = continuation
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'active_support/core_ext/hash/deep_merge'
2
4
 
3
5
  module Chronicle
@@ -9,12 +11,14 @@ module Chronicle
9
11
  name: 'stdin',
10
12
  options: {}
11
13
  },
12
- transformer: {
13
- name: 'null',
14
- options: {}
15
- },
14
+ transformers: [
15
+ {
16
+ name: 'null',
17
+ options: {}
18
+ }
19
+ ],
16
20
  loader: {
17
- name: 'table',
21
+ name: 'json',
18
22
  options: {}
19
23
  }
20
24
  }.freeze
@@ -22,7 +26,7 @@ module Chronicle
22
26
  attr_reader :errors
23
27
  attr_accessor :definition
24
28
 
25
- def initialize()
29
+ def initialize
26
30
  @definition = SKELETON_DEFINITION
27
31
  end
28
32
 
@@ -34,12 +38,12 @@ module Chronicle
34
38
  def validate
35
39
  @errors = {}
36
40
 
37
- Chronicle::ETL::Registry::Connectors::PHASES.each do |phase|
38
- __send__("#{phase}_klass".to_sym)
39
- rescue Chronicle::ETL::PluginError => e
40
- @errors[:plugins] ||= []
41
- @errors[:plugins] << e
42
- end
41
+ extractor_klass
42
+ transformer_klasses
43
+ loader_klass
44
+ rescue Chronicle::ETL::PluginError => e
45
+ @errors[:plugins] ||= []
46
+ @errors[:plugins] << e
43
47
  end
44
48
 
45
49
  def plugins_missing?
@@ -48,12 +52,11 @@ module Chronicle
48
52
  return false unless @errors[:plugins]&.any?
49
53
 
50
54
  @errors[:plugins]
51
- .filter { |e| e.instance_of?(Chronicle::ETL::PluginNotInstalledError) }
52
- .any?
55
+ .any? { |e| e.instance_of?(Chronicle::ETL::PluginNotInstalledError) }
53
56
  end
54
57
 
55
58
  def validate!
56
- raise(Chronicle::ETL::JobDefinitionError.new(self), "Job definition is invalid") unless valid?
59
+ raise(Chronicle::ETL::JobDefinitionError.new(self), 'Job definition is invalid') unless valid?
57
60
 
58
61
  true
59
62
  end
@@ -66,19 +69,20 @@ module Chronicle
66
69
 
67
70
  # For each connector in this job, mix in secrets into the options
68
71
  def apply_default_secrets
69
- Chronicle::ETL::Registry::Connectors::PHASES.each do |phase|
72
+ # FIXME: handle transformer secrets
73
+ %i[extractor loader].each do |phase|
70
74
  # If the option have a `secrets` key, we look up those secrets and
71
- # mix them in. If not, use the connector's plugin name and look up
75
+ # mix them in. If not, use the connector's plugin name and look up
72
76
  # secrets with the same namespace
73
77
  if @definition[phase][:options][:secrets]
74
78
  namespace = @definition[phase][:options][:secrets]
75
79
  else
76
80
  # We don't want to do this lookup for built-in connectors
77
- next if __send__("#{phase}_klass".to_sym).connector_registration.built_in?
81
+ next if __send__(:"#{phase}_klass").connector_registration.built_in?
78
82
 
79
83
  # infer plugin name from connector name and use it for secrets
80
84
  # namesepace
81
- namespace = @definition[phase][:name].split(":").first
85
+ namespace = @definition[phase][:name].split(':').first
82
86
  end
83
87
 
84
88
  # Reverse merge secrets into connector's options (we want to preserve
@@ -98,15 +102,17 @@ module Chronicle
98
102
  end
99
103
 
100
104
  def extractor_klass
101
- load_klass(:extractor, @definition[:extractor][:name])
105
+ find_connector_klass(:extractor, @definition[:extractor][:name])
102
106
  end
103
107
 
104
- def transformer_klass
105
- load_klass(:transformer, @definition[:transformer][:name])
108
+ def transformer_klasses
109
+ @definition[:transformers].map do |transformer|
110
+ find_connector_klass(:transformer, transformer[:name])
111
+ end
106
112
  end
107
113
 
108
114
  def loader_klass
109
- load_klass(:loader, @definition[:loader][:name])
115
+ find_connector_klass(:loader, @definition[:loader][:name])
110
116
  end
111
117
 
112
118
  def extractor_options
@@ -114,7 +120,9 @@ module Chronicle
114
120
  end
115
121
 
116
122
  def transformer_options
117
- @definition[:transformer][:options]
123
+ @definition[:transformers].map do |transformer|
124
+ transformer[:options]
125
+ end
118
126
  end
119
127
 
120
128
  def loader_options
@@ -123,12 +131,16 @@ module Chronicle
123
131
 
124
132
  private
125
133
 
126
- def load_klass(phase, identifier)
134
+ def find_schema_transformer_klass(source_klass, target)
135
+ Chronicle::ETL::Registry::Connectors.find_converter_for_source(source_klass, target).klass
136
+ end
137
+
138
+ def find_connector_klass(phase, identifier)
127
139
  Chronicle::ETL::Registry::Connectors.find_by_phase_and_identifier(phase, identifier).klass
128
140
  end
129
141
 
130
142
  def load_credentials
131
- Chronicle::ETL::Registry::Connectors::PHASES.each do |phase|
143
+ %i[extractor loader].each do |phase|
132
144
  credentials_name = @definition[phase].dig(:options, :credentials)
133
145
  if credentials_name
134
146
  credentials = Chronicle::ETL::Config.load_credentials(credentials_name)