chronicle-etl 0.5.5 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +15 -25
  3. data/.rubocop.yml +2 -44
  4. data/Gemfile +2 -2
  5. data/Guardfile +3 -3
  6. data/README.md +75 -68
  7. data/Rakefile +2 -2
  8. data/bin/console +4 -5
  9. data/chronicle-etl.gemspec +51 -49
  10. data/exe/chronicle-etl +1 -1
  11. data/lib/chronicle/etl/authorizer.rb +3 -4
  12. data/lib/chronicle/etl/cli/authorizations.rb +8 -6
  13. data/lib/chronicle/etl/cli/connectors.rb +7 -7
  14. data/lib/chronicle/etl/cli/jobs.rb +130 -53
  15. data/lib/chronicle/etl/cli/main.rb +29 -29
  16. data/lib/chronicle/etl/cli/plugins.rb +14 -15
  17. data/lib/chronicle/etl/cli/secrets.rb +14 -12
  18. data/lib/chronicle/etl/cli/subcommand_base.rb +5 -3
  19. data/lib/chronicle/etl/config.rb +18 -8
  20. data/lib/chronicle/etl/configurable.rb +20 -9
  21. data/lib/chronicle/etl/exceptions.rb +3 -3
  22. data/lib/chronicle/etl/extraction.rb +12 -2
  23. data/lib/chronicle/etl/extractors/csv_extractor.rb +9 -0
  24. data/lib/chronicle/etl/extractors/extractor.rb +15 -2
  25. data/lib/chronicle/etl/extractors/file_extractor.rb +5 -3
  26. data/lib/chronicle/etl/extractors/helpers/input_reader.rb +2 -2
  27. data/lib/chronicle/etl/extractors/json_extractor.rb +14 -4
  28. data/lib/chronicle/etl/extractors/stdin_extractor.rb +3 -0
  29. data/lib/chronicle/etl/job.rb +35 -17
  30. data/lib/chronicle/etl/job_definition.rb +38 -26
  31. data/lib/chronicle/etl/job_log.rb +14 -16
  32. data/lib/chronicle/etl/job_logger.rb +4 -4
  33. data/lib/chronicle/etl/loaders/csv_loader.rb +17 -4
  34. data/lib/chronicle/etl/loaders/helpers/stdout_helper.rb +4 -0
  35. data/lib/chronicle/etl/loaders/json_loader.rb +30 -10
  36. data/lib/chronicle/etl/loaders/loader.rb +0 -17
  37. data/lib/chronicle/etl/loaders/rest_loader.rb +7 -7
  38. data/lib/chronicle/etl/loaders/table_loader.rb +37 -12
  39. data/lib/chronicle/etl/logger.rb +2 -2
  40. data/lib/chronicle/etl/oauth_authorizer.rb +8 -8
  41. data/lib/chronicle/etl/record.rb +15 -0
  42. data/lib/chronicle/etl/registry/connector_registration.rb +15 -23
  43. data/lib/chronicle/etl/registry/connectors.rb +93 -36
  44. data/lib/chronicle/etl/registry/plugin_registration.rb +1 -1
  45. data/lib/chronicle/etl/registry/plugins.rb +27 -19
  46. data/lib/chronicle/etl/runner.rb +158 -128
  47. data/lib/chronicle/etl/secrets.rb +4 -4
  48. data/lib/chronicle/etl/transformers/buffer_transformer.rb +29 -0
  49. data/lib/chronicle/etl/transformers/chronicle_transformer.rb +32 -0
  50. data/lib/chronicle/etl/transformers/chronobase_transformer.rb +100 -0
  51. data/lib/chronicle/etl/transformers/fields_limit_transformer.rb +23 -0
  52. data/lib/chronicle/etl/transformers/filter_fields_transformer.rb +60 -0
  53. data/lib/chronicle/etl/transformers/filter_transformer.rb +30 -0
  54. data/lib/chronicle/etl/transformers/format_transformer.rb +32 -0
  55. data/lib/chronicle/etl/transformers/merge_meta_transformer.rb +19 -0
  56. data/lib/chronicle/etl/transformers/multiply_transformer.rb +21 -0
  57. data/lib/chronicle/etl/transformers/null_transformer.rb +5 -7
  58. data/lib/chronicle/etl/transformers/sampler_transformer.rb +21 -0
  59. data/lib/chronicle/etl/transformers/sort_transformer.rb +31 -0
  60. data/lib/chronicle/etl/transformers/transformer.rb +63 -41
  61. data/lib/chronicle/etl/utils/binary_attachments.rb +1 -1
  62. data/lib/chronicle/etl/utils/progress_bar.rb +2 -3
  63. data/lib/chronicle/etl/version.rb +1 -1
  64. data/lib/chronicle/etl.rb +6 -8
  65. metadata +49 -47
  66. data/lib/chronicle/etl/models/activity.rb +0 -15
  67. data/lib/chronicle/etl/models/attachment.rb +0 -14
  68. data/lib/chronicle/etl/models/base.rb +0 -122
  69. data/lib/chronicle/etl/models/entity.rb +0 -29
  70. data/lib/chronicle/etl/models/raw.rb +0 -26
  71. data/lib/chronicle/etl/serializers/jsonapi_serializer.rb +0 -31
  72. data/lib/chronicle/etl/serializers/raw_serializer.rb +0 -10
  73. data/lib/chronicle/etl/serializers/serializer.rb +0 -28
  74. data/lib/chronicle/etl/transformers/image_file_transformer.rb +0 -247
  75. data/lib/chronicle/etl/utils/hash_utilities.rb +0 -19
  76. data/lib/chronicle/etl/utils/text_recognition.rb +0 -15
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "tty-prompt"
3
+ require 'tty-prompt'
4
4
 
5
5
  module Chronicle
6
6
  module ETL
@@ -10,8 +10,8 @@ module Chronicle
10
10
  default_task 'list'
11
11
  namespace :secrets
12
12
 
13
- desc "set NAMESPACE KEY [VALUE]", "Add a secret. VALUE can be set as argument or from stdin"
14
- def set(namespace, key, value=nil)
13
+ desc 'set NAMESPACE KEY [VALUE]', 'Add a secret. VALUE can be set as argument or from stdin'
14
+ def set(namespace, key, value = nil)
15
15
  validate_namespace(namespace)
16
16
 
17
17
  if value
@@ -24,23 +24,23 @@ module Chronicle
24
24
  end
25
25
 
26
26
  Chronicle::ETL::Secrets.set(namespace, key, value.strip)
27
- cli_exit(message: "Secret set")
27
+ cli_exit(message: 'Secret set')
28
28
  rescue TTY::Reader::InputInterrupt
29
29
  cli_fail(message: "\nSecret not set")
30
30
  end
31
31
 
32
- desc "unset NAMESPACE KEY", "Remove a secret"
32
+ desc 'unset NAMESPACE KEY', 'Remove a secret'
33
33
  def unset(namespace, key)
34
34
  validate_namespace(namespace)
35
35
 
36
36
  Chronicle::ETL::Secrets.unset(namespace, key)
37
- cli_exit(message: "Secret unset")
37
+ cli_exit(message: 'Secret unset')
38
38
  end
39
39
 
40
- desc "list", "List available secrets"
41
- def list(namespace=nil)
40
+ desc 'list', 'List available secrets'
41
+ def list(namespace = nil)
42
42
  all_secrets = Chronicle::ETL::Secrets.all(namespace)
43
- cli_exit(message: "No secrets are stored") unless all_secrets.any?
43
+ cli_exit(message: 'No secrets are stored') unless all_secrets.any?
44
44
 
45
45
  rows = []
46
46
  all_secrets.each do |namespace, secrets|
@@ -51,9 +51,9 @@ module Chronicle
51
51
  end
52
52
  end
53
53
 
54
- headers = ['namespace', 'key', 'value'].map { |h| h.upcase.bold }
54
+ headers = %w[namespace key value].map { |h| h.upcase.bold }
55
55
 
56
- puts "Available secrets:"
56
+ puts 'Available secrets:'
57
57
  table = TTY::Table.new(headers, rows)
58
58
  puts table.render(indent: 0, padding: [0, 2])
59
59
  end
@@ -61,7 +61,9 @@ module Chronicle
61
61
  private
62
62
 
63
63
  def validate_namespace(namespace)
64
- cli_fail(message: "'#{namespace}' is not a valid namespace") unless Chronicle::ETL::Secrets.valid_namespace_name?(namespace)
64
+ return if Chronicle::ETL::Secrets.valid_namespace_name?(namespace)
65
+
66
+ cli_fail(message: "'#{namespace}' is not a valid namespace")
65
67
  end
66
68
  end
67
69
  end
@@ -11,7 +11,7 @@ module Chronicle
11
11
  end
12
12
  list.sort! { |a, b| a[0] <=> b[0] }
13
13
 
14
- shell.say "COMMANDS".bold
14
+ shell.say 'COMMANDS'.bold
15
15
  shell.print_table(list, indent: 2, truncate: true)
16
16
  shell.say
17
17
  class_options_help(shell)
@@ -19,7 +19,7 @@ module Chronicle
19
19
 
20
20
  # Show docs with command:subcommand pattern.
21
21
  # For `help` command, don't use colon
22
- def self.banner(command, namespace = nil, subcommand = false)
22
+ def self.banner(command, _namespace = nil, _subcommand = false)
23
23
  if command.name == 'help'
24
24
  "#{subcommand_prefix} #{command.usage}"
25
25
  else
@@ -29,7 +29,9 @@ module Chronicle
29
29
 
30
30
  # Use subcommand classname to derive display name for subcommand
31
31
  def self.subcommand_prefix
32
- self.name.gsub(%r{.*::}, '').gsub(%r{^[A-Z]}) { |match| match[0].downcase }.gsub(%r{[A-Z]}) { |match| "-#{match[0].downcase}" }
32
+ name.gsub(/.*::/, '').gsub(/^[A-Z]/) do |match|
33
+ match[0].downcase
34
+ end.gsub(/[A-Z]/) { |match| "-#{match[0].downcase}" }
33
35
  end
34
36
  end
35
37
  end
@@ -1,4 +1,4 @@
1
- require "active_support/core_ext/hash/keys"
1
+ require 'active_support/core_ext/hash/keys'
2
2
  require 'fileutils'
3
3
  require 'yaml'
4
4
 
@@ -15,7 +15,7 @@ module Chronicle
15
15
  path = base.join("#{identifier}.yml")
16
16
  return {} unless path.exist?
17
17
 
18
- YAML.safe_load(File.read(path), symbolize_names: true, permitted_classes: [Symbol, Date, Time])
18
+ YAML.safe_load_file(path, symbolize_names: true, permitted_classes: [Symbol, Date, Time])
19
19
  end
20
20
 
21
21
  # Writes a hash as a yml config file
@@ -31,28 +31,38 @@ module Chronicle
31
31
  end
32
32
  end
33
33
 
34
+ # Returns path for a given config type and identifier
35
+ def path(type, identifier)
36
+ base = config_pathname_for_type(type)
37
+ base.join("#{identifier}.yml")
38
+ end
39
+
40
+ # Whether a config exists for a given type and identifier
34
41
  def exists?(type, identifier)
35
42
  base = config_pathname_for_type(type)
36
43
  path = base.join("#{identifier}.yml")
37
- return path.exist?
44
+ path.exist?
38
45
  end
39
46
 
40
47
  # Returns all jobs available in ~/.config/chronicle/etl/jobs/*.yml
41
48
  def available_jobs
42
- Dir.glob(File.join(config_pathname_for_type("jobs"), "*.yml")).map do |filename|
43
- File.basename(filename, ".*")
49
+ Dir.glob(File.join(config_pathname_for_type('jobs'), '*.yml')).map do |filename|
50
+ File.basename(filename, '.*')
44
51
  end
45
52
  end
46
53
 
54
+ # Returns all configs available for a given type
47
55
  def available_configs(type)
48
- Dir.glob(File.join(config_pathname_for_type(type), "*.yml")).map do |filename|
49
- File.basename(filename, ".*")
56
+ Dir.glob(File.join(config_pathname_for_type(type), '*.yml')).map do |filename|
57
+ File.basename(filename, '.*')
50
58
  end
51
59
  end
52
60
 
53
61
  # Load a job definition from job config directory
54
62
  def read_job(job_name)
55
- load('jobs', job_name)
63
+ definition = load('jobs', job_name)
64
+ definition[:name] ||= job_name
65
+ definition
56
66
  end
57
67
 
58
68
  def config_pathname
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "ostruct"
4
- require "chronic_duration"
3
+ require 'ostruct'
4
+ require 'chronic_duration'
5
5
 
6
6
  module Chronicle
7
7
  module ETL
@@ -19,7 +19,7 @@ module Chronicle
19
19
  # t.config.when
20
20
  module Configurable
21
21
  # An individual setting for this Configurable
22
- Setting = Struct.new(:default, :required, :type)
22
+ Setting = Struct.new(:default, :required, :type, :description)
23
23
  private_constant :Setting
24
24
 
25
25
  # Collection of user-supplied options for this Configurable
@@ -62,7 +62,7 @@ module Chronicle
62
62
  # Do nothing with a given option if it's not a connector setting
63
63
  next unless setting
64
64
 
65
- @config[name] = coerced_value(setting, value)
65
+ @config[name] = coerced_value(setting, name, value)
66
66
  end
67
67
  validate_config
68
68
  options
@@ -84,10 +84,17 @@ module Chronicle
84
84
  raise Chronicle::ETL::ConnectorConfigurationError, "Missing options: #{missing}" if missing.count.positive?
85
85
  end
86
86
 
87
- def coerced_value(setting, value)
87
+ def coerced_value(setting, name, value)
88
88
  setting.type ? __send__("coerce_#{setting.type}", value) : value
89
89
  rescue StandardError
90
- raise(Chronicle::ETL::ConnectorConfigurationError, "Could not coerce #{value} into a #{setting.type}")
90
+ raise(
91
+ Chronicle::ETL::ConnectorConfigurationError,
92
+ "Could not convert value '#{value}' into a #{setting.type} for setting '#{name}'"
93
+ )
94
+ end
95
+
96
+ def coerce_hash(value)
97
+ value.is_a?(Hash) ? value : {}
91
98
  end
92
99
 
93
100
  def coerce_string(value)
@@ -101,12 +108,16 @@ module Chronicle
101
108
 
102
109
  def coerce_boolean(value)
103
110
  if value.is_a?(String)
104
- value.downcase == "true"
111
+ value.downcase == 'true'
105
112
  else
106
113
  value
107
114
  end
108
115
  end
109
116
 
117
+ def coerce_array(value)
118
+ value.is_a?(Array) ? value : [value]
119
+ end
120
+
110
121
  def coerce_time(value)
111
122
  # parsing yml files might result in us getting Date objects
112
123
  # we convert to DateTime first to to ensure UTC
@@ -138,8 +149,8 @@ module Chronicle
138
149
  # setting :when, type: :date, required: true
139
150
  #
140
151
  # @see ::Chronicle::ETL::Configurable
141
- def setting(name, default: nil, required: false, type: nil)
142
- s = Setting.new(default, required, type)
152
+ def setting(name, default: nil, required: false, type: nil, description: nil)
153
+ s = Setting.new(default, required, type, description)
143
154
  settings[name] = s
144
155
  end
145
156
 
@@ -26,6 +26,7 @@ module Chronicle
26
26
  attr_reader :name
27
27
 
28
28
  def initialize(name)
29
+ super
29
30
  @name = name
30
31
  end
31
32
  end
@@ -51,10 +52,9 @@ module Chronicle
51
52
 
52
53
  class ExtractionError < Error; end
53
54
 
54
- class SerializationError < Error; end
55
-
56
55
  class TransformationError < Error; end
57
-
58
56
  class UntransformableRecordError < TransformationError; end
57
+
58
+ class LoaderError < Error; end
59
59
  end
60
60
  end
@@ -1,11 +1,21 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Chronicle
2
4
  module ETL
3
5
  class Extraction
4
- attr_accessor :data, :meta
6
+ attr_accessor :data, :meta, :source, :type, :strategy, :extractor
5
7
 
6
- def initialize(data: {}, meta: {})
8
+ def initialize(data: {}, meta: {}, source: nil, type: nil, strategy: nil, extractor: nil)
7
9
  @data = data
8
10
  @meta = meta
11
+ @source = source
12
+ @type = type
13
+ @strategy = strategy
14
+ @extractor = extractor
15
+ end
16
+
17
+ def to_h
18
+ { data: @data, meta: @meta, source: @source }
9
19
  end
10
20
  end
11
21
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'csv'
2
4
 
3
5
  module Chronicle
@@ -6,6 +8,7 @@ module Chronicle
6
8
  include Extractors::Helpers::InputReader
7
9
 
8
10
  register_connector do |r|
11
+ r.identifier = :csv
9
12
  r.description = 'CSV'
10
13
  end
11
14
 
@@ -33,6 +36,12 @@ module Chronicle
33
36
 
34
37
  private
35
38
 
39
+ def all_rows
40
+ @csvs.reduce([]) do |all_rows, csv|
41
+ all_rows + csv.to_a.map(&:to_h)
42
+ end
43
+ end
44
+
36
45
  def prepare_sources
37
46
  @csvs = []
38
47
  read_input do |csv_data|
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'chronicle/etl'
2
4
 
3
5
  module Chronicle
@@ -21,7 +23,7 @@ module Chronicle
21
23
  apply_options(options)
22
24
  end
23
25
 
24
- # Hook called before #extract. Useful for gathering data, initailizing proxies, etc
26
+ # Hook called before #extract. Useful for gathering data, initializing proxies, etc
25
27
  def prepare; end
26
28
 
27
29
  # An optional method to calculate how many records there are to extract. Used primarily for
@@ -33,7 +35,18 @@ module Chronicle
33
35
  raise NotImplementedError
34
36
  end
35
37
 
36
- private
38
+ protected
39
+
40
+ def build_extraction(data:, meta: nil, source: nil, type: nil, strategy: nil)
41
+ Extraction.new(
42
+ extractor: self.class,
43
+ data: data,
44
+ meta: meta,
45
+ source: source || self.class.connector_registration.source,
46
+ type: type || self.class.connector_registration.type,
47
+ strategy: strategy || self.class.connector_registration.strategy
48
+ )
49
+ end
37
50
 
38
51
  # TODO: reimplemenet this
39
52
  # def handle_continuation
@@ -1,16 +1,18 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'pathname'
2
4
 
3
5
  module Chronicle
4
6
  module ETL
5
7
  # Return filenames that match a pattern in a directory
6
8
  class FileExtractor < Chronicle::ETL::Extractor
7
-
8
9
  register_connector do |r|
10
+ r.identifier = :file
9
11
  r.description = 'file or directory of files'
10
12
  end
11
13
 
12
14
  setting :input, default: ['.']
13
- setting :dir_glob_pattern, default: "**/*"
15
+ setting :dir_glob_pattern, default: '**/*'
14
16
  setting :larger_than
15
17
  setting :smaller_than
16
18
 
@@ -32,7 +34,7 @@ module Chronicle
32
34
 
33
35
  def gather_files
34
36
  roots = [@config.input].flatten.map { |filename| Pathname.new(filename) }
35
- raise(ExtractionError, "Input must exist") unless roots.all?(&:exist?)
37
+ raise(ExtractionError, 'Input must exist') unless roots.all?(&:exist?)
36
38
 
37
39
  directories, files = roots.partition(&:directory?)
38
40
 
@@ -37,7 +37,7 @@ module Chronicle
37
37
  elsif read_from_stdin?
38
38
  yield $stdin.read, $stdin
39
39
  else
40
- raise ExtractionError, "No input files or stdin provided"
40
+ raise ExtractionError, 'No input files or stdin provided'
41
41
  end
42
42
  end
43
43
 
@@ -48,7 +48,7 @@ module Chronicle
48
48
  elsif read_from_stdin?
49
49
  lines_from_stdin(&block)
50
50
  else
51
- raise ExtractionError, "No input files or stdin provided"
51
+ raise ExtractionError, 'No input files or stdin provided'
52
52
  end
53
53
  end
54
54
 
@@ -1,18 +1,23 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Chronicle
2
4
  module ETL
3
5
  class JSONExtractor < Chronicle::ETL::Extractor
4
6
  include Extractors::Helpers::InputReader
5
7
 
6
8
  register_connector do |r|
9
+ r.identifier = :json
7
10
  r.description = 'JSON'
8
11
  end
9
12
 
10
13
  setting :jsonl, default: true, type: :boolean
14
+ setting :path, default: nil, type: :string
11
15
 
12
16
  def prepare
13
17
  @jsons = []
14
18
  load_input do |input|
15
- @jsons << parse_data(input)
19
+ data = parse_data(input)
20
+ @jsons += [data].flatten
16
21
  end
17
22
  end
18
23
 
@@ -28,10 +33,15 @@ module Chronicle
28
33
 
29
34
  private
30
35
 
31
- def parse_data data
32
- JSON.parse(data)
36
+ def parse_data(data)
37
+ parsed_data = JSON.parse(data)
38
+ if @config.path
39
+ parsed_data.dig(*@config.path.split('.'))
40
+ else
41
+ parsed_data
42
+ end
33
43
  rescue JSON::ParserError
34
- raise Chronicle::ETL::ExtractionError, "Could not parse JSON"
44
+ raise Chronicle::ETL::ExtractionError, 'Could not parse JSON'
35
45
  end
36
46
 
37
47
  def load_input(&block)
@@ -1,7 +1,10 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Chronicle
2
4
  module ETL
3
5
  class StdinExtractor < Chronicle::ETL::Extractor
4
6
  register_connector do |r|
7
+ r.identifier = :stdin
5
8
  r.description = 'stdin'
6
9
  end
7
10
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'forwardable'
2
4
 
3
5
  module Chronicle
@@ -12,13 +14,13 @@ module Chronicle
12
14
  def_delegators :@job_definition, :dry_run?
13
15
 
14
16
  attr_accessor :name,
15
- :extractor_klass,
16
- :extractor_options,
17
- :transformer_klass,
18
- :transformer_options,
19
- :loader_klass,
20
- :loader_options,
21
- :job_definition
17
+ :extractor_klass,
18
+ :extractor_options,
19
+ :transformer_klasses,
20
+ :transformer_options,
21
+ :loader_klass,
22
+ :loader_options,
23
+ :job_definition
22
24
 
23
25
  # TODO: build a proper id system
24
26
  alias id name
@@ -39,9 +41,10 @@ module Chronicle
39
41
  @extractor_klass.new(@extractor_options)
40
42
  end
41
43
 
42
- def instantiate_transformer(extraction)
43
- @transformer_klass = @job_definition.transformer_klass
44
- @transformer_klass.new(extraction, @transformer_options)
44
+ def instantiate_transformers
45
+ @job_definition.transformer_klasses.each_with_index.map do |klass, i|
46
+ klass.new(@transformer_options[i] || {})
47
+ end
45
48
  end
46
49
 
47
50
  def instantiate_loader
@@ -51,20 +54,35 @@ module Chronicle
51
54
 
52
55
  def save_log?
53
56
  # TODO: this needs more nuance
54
- return !id.nil?
57
+ !id.nil?
55
58
  end
56
59
 
57
60
  def to_s
58
- output = "Job"
59
- output += " '#{name}'".bold if name
60
- output += "\n"
61
- output += " → Extracting from #{@job_definition.extractor_klass.description}\n"
62
- output += " → Transforming #{@job_definition.transformer_klass.description}\n"
63
- output += " → Loading to #{@job_definition.loader_klass.description}\n"
61
+ output = "Job summary\n".upcase.bold
62
+ # output = ""
63
+ output += "#{name}:\n" if name
64
+ output += "→ Extracting from #{@job_definition.extractor_klass.description}\n"
65
+ output += options_to_s(@extractor_options)
66
+
67
+ @job_definition.transformer_klasses.each do |klass|
68
+ output += "→ Transforming #{klass.description}\n"
69
+ end
70
+ # TODO: transformer options
71
+ output += "→ Loading to #{@job_definition.loader_klass.description}\n"
72
+ output += options_to_s(@loader_options)
73
+ output
64
74
  end
65
75
 
66
76
  private
67
77
 
78
+ def options_to_s(options, indent: 4)
79
+ output = ''
80
+ options.each do |k, v|
81
+ output += "#{' ' * indent}#{k.to_s.light_blue}: #{v}\n"
82
+ end
83
+ output
84
+ end
85
+
68
86
  def set_continuation
69
87
  continuation = Chronicle::ETL::JobLogger.load_latest(@id)
70
88
  @extractor_options[:continuation] = continuation
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'active_support/core_ext/hash/deep_merge'
2
4
 
3
5
  module Chronicle
@@ -9,12 +11,14 @@ module Chronicle
9
11
  name: 'stdin',
10
12
  options: {}
11
13
  },
12
- transformer: {
13
- name: 'null',
14
- options: {}
15
- },
14
+ transformers: [
15
+ {
16
+ name: 'null',
17
+ options: {}
18
+ }
19
+ ],
16
20
  loader: {
17
- name: 'table',
21
+ name: 'json',
18
22
  options: {}
19
23
  }
20
24
  }.freeze
@@ -22,7 +26,7 @@ module Chronicle
22
26
  attr_reader :errors
23
27
  attr_accessor :definition
24
28
 
25
- def initialize()
29
+ def initialize
26
30
  @definition = SKELETON_DEFINITION
27
31
  end
28
32
 
@@ -34,12 +38,12 @@ module Chronicle
34
38
  def validate
35
39
  @errors = {}
36
40
 
37
- Chronicle::ETL::Registry::Connectors::PHASES.each do |phase|
38
- __send__("#{phase}_klass".to_sym)
39
- rescue Chronicle::ETL::PluginError => e
40
- @errors[:plugins] ||= []
41
- @errors[:plugins] << e
42
- end
41
+ extractor_klass
42
+ transformer_klasses
43
+ loader_klass
44
+ rescue Chronicle::ETL::PluginError => e
45
+ @errors[:plugins] ||= []
46
+ @errors[:plugins] << e
43
47
  end
44
48
 
45
49
  def plugins_missing?
@@ -48,12 +52,11 @@ module Chronicle
48
52
  return false unless @errors[:plugins]&.any?
49
53
 
50
54
  @errors[:plugins]
51
- .filter { |e| e.instance_of?(Chronicle::ETL::PluginNotInstalledError) }
52
- .any?
55
+ .any? { |e| e.instance_of?(Chronicle::ETL::PluginNotInstalledError) }
53
56
  end
54
57
 
55
58
  def validate!
56
- raise(Chronicle::ETL::JobDefinitionError.new(self), "Job definition is invalid") unless valid?
59
+ raise(Chronicle::ETL::JobDefinitionError.new(self), 'Job definition is invalid') unless valid?
57
60
 
58
61
  true
59
62
  end
@@ -66,19 +69,20 @@ module Chronicle
66
69
 
67
70
  # For each connector in this job, mix in secrets into the options
68
71
  def apply_default_secrets
69
- Chronicle::ETL::Registry::Connectors::PHASES.each do |phase|
72
+ # FIXME: handle transformer secrets
73
+ %i[extractor loader].each do |phase|
70
74
  # If the option have a `secrets` key, we look up those secrets and
71
- # mix them in. If not, use the connector's plugin name and look up
75
+ # mix them in. If not, use the connector's plugin name and look up
72
76
  # secrets with the same namespace
73
77
  if @definition[phase][:options][:secrets]
74
78
  namespace = @definition[phase][:options][:secrets]
75
79
  else
76
80
  # We don't want to do this lookup for built-in connectors
77
- next if __send__("#{phase}_klass".to_sym).connector_registration.built_in?
81
+ next if __send__(:"#{phase}_klass").connector_registration.built_in?
78
82
 
79
83
  # infer plugin name from connector name and use it for secrets
80
84
  # namesepace
81
- namespace = @definition[phase][:name].split(":").first
85
+ namespace = @definition[phase][:name].split(':').first
82
86
  end
83
87
 
84
88
  # Reverse merge secrets into connector's options (we want to preserve
@@ -98,15 +102,17 @@ module Chronicle
98
102
  end
99
103
 
100
104
  def extractor_klass
101
- load_klass(:extractor, @definition[:extractor][:name])
105
+ find_connector_klass(:extractor, @definition[:extractor][:name])
102
106
  end
103
107
 
104
- def transformer_klass
105
- load_klass(:transformer, @definition[:transformer][:name])
108
+ def transformer_klasses
109
+ @definition[:transformers].map do |transformer|
110
+ find_connector_klass(:transformer, transformer[:name])
111
+ end
106
112
  end
107
113
 
108
114
  def loader_klass
109
- load_klass(:loader, @definition[:loader][:name])
115
+ find_connector_klass(:loader, @definition[:loader][:name])
110
116
  end
111
117
 
112
118
  def extractor_options
@@ -114,7 +120,9 @@ module Chronicle
114
120
  end
115
121
 
116
122
  def transformer_options
117
- @definition[:transformer][:options]
123
+ @definition[:transformers].map do |transformer|
124
+ transformer[:options]
125
+ end
118
126
  end
119
127
 
120
128
  def loader_options
@@ -123,12 +131,16 @@ module Chronicle
123
131
 
124
132
  private
125
133
 
126
- def load_klass(phase, identifier)
134
+ def find_schema_transformer_klass(source_klass, target)
135
+ Chronicle::ETL::Registry::Connectors.find_converter_for_source(source_klass, target).klass
136
+ end
137
+
138
+ def find_connector_klass(phase, identifier)
127
139
  Chronicle::ETL::Registry::Connectors.find_by_phase_and_identifier(phase, identifier).klass
128
140
  end
129
141
 
130
142
  def load_credentials
131
- Chronicle::ETL::Registry::Connectors::PHASES.each do |phase|
143
+ %i[extractor loader].each do |phase|
132
144
  credentials_name = @definition[phase].dig(:options, :credentials)
133
145
  if credentials_name
134
146
  credentials = Chronicle::ETL::Config.load_credentials(credentials_name)