chronicle-etl 0.5.5 → 0.6.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (76) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +15 -25
  3. data/.rubocop.yml +2 -44
  4. data/Gemfile +2 -2
  5. data/Guardfile +3 -3
  6. data/README.md +75 -68
  7. data/Rakefile +2 -2
  8. data/bin/console +4 -5
  9. data/chronicle-etl.gemspec +51 -49
  10. data/exe/chronicle-etl +1 -1
  11. data/lib/chronicle/etl/authorizer.rb +3 -4
  12. data/lib/chronicle/etl/cli/authorizations.rb +8 -6
  13. data/lib/chronicle/etl/cli/connectors.rb +7 -7
  14. data/lib/chronicle/etl/cli/jobs.rb +130 -53
  15. data/lib/chronicle/etl/cli/main.rb +29 -29
  16. data/lib/chronicle/etl/cli/plugins.rb +14 -15
  17. data/lib/chronicle/etl/cli/secrets.rb +14 -12
  18. data/lib/chronicle/etl/cli/subcommand_base.rb +5 -3
  19. data/lib/chronicle/etl/config.rb +18 -8
  20. data/lib/chronicle/etl/configurable.rb +20 -9
  21. data/lib/chronicle/etl/exceptions.rb +3 -3
  22. data/lib/chronicle/etl/extraction.rb +12 -2
  23. data/lib/chronicle/etl/extractors/csv_extractor.rb +9 -0
  24. data/lib/chronicle/etl/extractors/extractor.rb +15 -2
  25. data/lib/chronicle/etl/extractors/file_extractor.rb +5 -3
  26. data/lib/chronicle/etl/extractors/helpers/input_reader.rb +2 -2
  27. data/lib/chronicle/etl/extractors/json_extractor.rb +14 -4
  28. data/lib/chronicle/etl/extractors/stdin_extractor.rb +3 -0
  29. data/lib/chronicle/etl/job.rb +35 -17
  30. data/lib/chronicle/etl/job_definition.rb +38 -26
  31. data/lib/chronicle/etl/job_log.rb +14 -16
  32. data/lib/chronicle/etl/job_logger.rb +4 -4
  33. data/lib/chronicle/etl/loaders/csv_loader.rb +17 -4
  34. data/lib/chronicle/etl/loaders/helpers/stdout_helper.rb +4 -0
  35. data/lib/chronicle/etl/loaders/json_loader.rb +30 -10
  36. data/lib/chronicle/etl/loaders/loader.rb +0 -17
  37. data/lib/chronicle/etl/loaders/rest_loader.rb +7 -7
  38. data/lib/chronicle/etl/loaders/table_loader.rb +37 -12
  39. data/lib/chronicle/etl/logger.rb +2 -2
  40. data/lib/chronicle/etl/oauth_authorizer.rb +8 -8
  41. data/lib/chronicle/etl/record.rb +15 -0
  42. data/lib/chronicle/etl/registry/connector_registration.rb +15 -23
  43. data/lib/chronicle/etl/registry/connectors.rb +93 -36
  44. data/lib/chronicle/etl/registry/plugin_registration.rb +1 -1
  45. data/lib/chronicle/etl/registry/plugins.rb +27 -19
  46. data/lib/chronicle/etl/runner.rb +158 -128
  47. data/lib/chronicle/etl/secrets.rb +4 -4
  48. data/lib/chronicle/etl/transformers/buffer_transformer.rb +29 -0
  49. data/lib/chronicle/etl/transformers/chronicle_transformer.rb +32 -0
  50. data/lib/chronicle/etl/transformers/chronobase_transformer.rb +100 -0
  51. data/lib/chronicle/etl/transformers/fields_limit_transformer.rb +23 -0
  52. data/lib/chronicle/etl/transformers/filter_fields_transformer.rb +60 -0
  53. data/lib/chronicle/etl/transformers/filter_transformer.rb +30 -0
  54. data/lib/chronicle/etl/transformers/format_transformer.rb +32 -0
  55. data/lib/chronicle/etl/transformers/merge_meta_transformer.rb +19 -0
  56. data/lib/chronicle/etl/transformers/multiply_transformer.rb +21 -0
  57. data/lib/chronicle/etl/transformers/null_transformer.rb +5 -7
  58. data/lib/chronicle/etl/transformers/sampler_transformer.rb +21 -0
  59. data/lib/chronicle/etl/transformers/sort_transformer.rb +31 -0
  60. data/lib/chronicle/etl/transformers/transformer.rb +63 -41
  61. data/lib/chronicle/etl/utils/binary_attachments.rb +1 -1
  62. data/lib/chronicle/etl/utils/progress_bar.rb +2 -3
  63. data/lib/chronicle/etl/version.rb +1 -1
  64. data/lib/chronicle/etl.rb +6 -8
  65. metadata +49 -47
  66. data/lib/chronicle/etl/models/activity.rb +0 -15
  67. data/lib/chronicle/etl/models/attachment.rb +0 -14
  68. data/lib/chronicle/etl/models/base.rb +0 -122
  69. data/lib/chronicle/etl/models/entity.rb +0 -29
  70. data/lib/chronicle/etl/models/raw.rb +0 -26
  71. data/lib/chronicle/etl/serializers/jsonapi_serializer.rb +0 -31
  72. data/lib/chronicle/etl/serializers/raw_serializer.rb +0 -10
  73. data/lib/chronicle/etl/serializers/serializer.rb +0 -28
  74. data/lib/chronicle/etl/transformers/image_file_transformer.rb +0 -247
  75. data/lib/chronicle/etl/utils/hash_utilities.rb +0 -19
  76. data/lib/chronicle/etl/utils/text_recognition.rb +0 -15
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'rubygems'
2
4
 
3
5
  module Chronicle
@@ -5,54 +7,109 @@ module Chronicle
5
7
  module Registry
6
8
  # A singleton class that acts as a registry of connector classes available for ETL jobs
7
9
  module Connectors
8
- PHASES = [:extractor, :transformer, :loader].freeze
10
+ PHASES = %i[extractor transformer loader].freeze
9
11
  public_constant :PHASES
10
12
 
11
13
  class << self
12
14
  attr_accessor :connectors
15
+ end
16
+
17
+ def self.register(connector)
18
+ connectors << connector
19
+ end
13
20
 
14
- def register(connector)
15
- connectors << connector
21
+ def self.connectors
22
+ @connectors ||= []
23
+ end
24
+
25
+ def self.ancestor_for_phase(phase)
26
+ case phase
27
+ when :extractor
28
+ Chronicle::ETL::Extractor
29
+ when :transformer
30
+ Chronicle::ETL::Transformer
31
+ when :loader
32
+ Chronicle::ETL::Loader
16
33
  end
34
+ end
17
35
 
18
- def connectors
19
- @connectors ||= []
36
+ def self.find_converter_for_source(source:, type: nil, strategy: nil, target: nil)
37
+ # FIXME: we're assuming extractor plugin has been loaded already
38
+ # This may not be the case if the schema converter is running
39
+ # off a json dump off extraction data.
40
+ # plugin = source_klass.connector_registration.source
41
+ # type = source_klass.connector_registration.type
42
+ # strategy = source_klass.connector_registration.strategy
43
+
44
+ connectors.find do |c|
45
+ c.phase == :transformer &&
46
+ c.source == source &&
47
+ (type.nil? || c.type == type) &&
48
+ (strategy.nil? || c.strategy == strategy || c.strategy.nil?) &&
49
+ (target.nil? || c.to_schema == target)
20
50
  end
51
+ end
52
+
53
+ # Find connector from amongst those currently loaded
54
+ def self.find_by_phase_and_identifier_built_in(phase, identifier)
55
+ connectors.find { |c| c.phase == phase.to_sym && c.identifier == identifier.to_sym }
56
+ end
57
+
58
+ # Find connector and load relevant plugin to find it if necessary
59
+ def self.find_by_phase_and_identifier(phase, identifier)
60
+ connector = find_by_phase_and_identifier_built_in(phase, identifier)
61
+ return connector if connector
62
+
63
+ # determine if we need to try to load a local file. if it has a dot in the identifier, we treat it as a file
64
+ return find_by_phase_and_identifier_local(phase, identifier) if identifier.to_s.include?('.')
65
+
66
+ # Example identifier: lastfm:listens:api
67
+ plugin, type, strategy = identifier.split(':')
68
+ .map { |part| part.gsub('-', '_') }
69
+ .map(&:to_sym)
21
70
 
22
- # Find connector from amongst those currently loaded
23
- def find_by_phase_and_identifier_local(phase, identifier)
24
- connector = connectors.find { |c| c.phase == phase && c.identifier == identifier }
71
+ plugin_identifier = plugin.to_s.gsub('_', '-')
72
+
73
+ unless Chronicle::ETL::Registry::Plugins.installed?(plugin_identifier)
74
+ raise Chronicle::ETL::PluginNotInstalledError, plugin_identifier
25
75
  end
26
76
 
27
- # Find connector and load relevant plugin to find it if necessary
28
- def find_by_phase_and_identifier(phase, identifier)
29
- connector = find_by_phase_and_identifier_local(phase, identifier)
30
- return connector if connector
31
-
32
- # if not available in built-in connectors, try to activate a
33
- # relevant plugin and try again
34
- if identifier.include?(":")
35
- plugin, name = identifier.split(":")
36
- else
37
- # This case handles the case where the identifier is a
38
- # shorthand (ie `imessage`) because there's only one default
39
- # connector.
40
- plugin = identifier
41
- end
42
-
43
- raise(Chronicle::ETL::PluginNotInstalledError.new(plugin)) unless Chronicle::ETL::Registry::Plugins.installed?(plugin)
44
-
45
- Chronicle::ETL::Registry::Plugins.activate(plugin)
46
-
47
- candidates = connectors.select { |c| c.phase == phase && c.plugin == plugin }
48
- # if no name given, just use first connector with right phase/plugin
49
- # TODO: set up a property for connectors to specify that they're the
50
- # default connector for the plugin
51
- candidates = candidates.select { |c| c.identifier == name } if name
52
- connector = candidates.first
53
-
54
- connector || raise(ConnectorNotAvailableError, "Connector '#{identifier}' not found")
77
+ Chronicle::ETL::Registry::Plugins.activate(plugin_identifier)
78
+
79
+ # find most specific connector that matches the identifier
80
+ connector = connectors.find do |c|
81
+ c.plugin == plugin && (type.nil? || c.type == type) && (strategy.nil? || c.strategy == strategy)
55
82
  end
83
+
84
+ connector || raise(ConnectorNotAvailableError, "Connector '#{identifier}' not found")
85
+ end
86
+
87
+ # Load a plugin from local file system
88
+ def self.find_by_phase_and_identifier_local(phase, identifier)
89
+ script = File.read(identifier)
90
+ raise ConnectorNotAvailableError, "Connector '#{identifier}' not found" if script.nil?
91
+
92
+ # load the file by evaluating the contents
93
+ eval(script, TOPLEVEL_BINDING, __FILE__, __LINE__) # rubocop:disable Security/Eval
94
+
95
+ # read the file and look for all class definitions in the ruby script.
96
+ class_names = script.scan(/class (\w+)/).flatten
97
+
98
+ class_names.each do |class_name|
99
+ klass = Object.const_get(class_name)
100
+
101
+ next unless klass.ancestors.include?(ancestor_for_phase(phase))
102
+
103
+ registration = ::Chronicle::ETL::Registry::ConnectorRegistration.new(klass)
104
+
105
+ klass.connector_registration = registration
106
+ return registration
107
+ # return klass
108
+ rescue NameError
109
+ # ignore
110
+ end
111
+
112
+ raise ConnectorNotAvailableError, "Connector '#{identifier}' not found"
56
113
  end
57
114
  end
58
115
  end
@@ -4,7 +4,7 @@ module Chronicle
4
4
  class PluginRegistration
5
5
  attr_accessor :name, :description, :gem, :version, :installed, :gemspec
6
6
 
7
- def initialize(name=nil)
7
+ def initialize(name = nil)
8
8
  @installed = false
9
9
  @name = name
10
10
  yield self if block_given?
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'rubygems'
2
4
  require 'rubygems/command'
3
5
  require 'rubygems/commands/install_command'
@@ -13,16 +15,17 @@ module Chronicle
13
15
  # @todo Better validation for whether a gem is actually a plugin
14
16
  # @todo Add ways to load a plugin that don't require a gem on rubygems.org
15
17
  module Plugins
16
- KNOWN_PLUGINS = [
17
- 'email',
18
- 'foursquare',
19
- 'github',
20
- 'imessage',
21
- 'pinboard',
22
- 'safari',
23
- 'shell',
24
- 'spotify',
25
- 'zulip'
18
+ KNOWN_PLUGINS = %w[
19
+ apple-podcasts
20
+ email
21
+ foursquare
22
+ github
23
+ imessage
24
+ pinboard
25
+ safari
26
+ shell
27
+ spotify
28
+ zulip
26
29
  ].freeze
27
30
  public_constant :KNOWN_PLUGINS
28
31
 
@@ -30,7 +33,7 @@ module Chronicle
30
33
  # make registry aware of existence of name of non-gem plugin
31
34
  def self.register_standalone(name:)
32
35
  plugin = Chronicle::ETL::Registry::PluginRegistration.new do |p|
33
- p.name = name
36
+ p.name = name.to_sym
34
37
  p.installed = true
35
38
  end
36
39
 
@@ -44,19 +47,19 @@ module Chronicle
44
47
 
45
48
  # Check whether a given plugin is installed
46
49
  def self.installed?(name)
47
- installed.map(&:name).include?(name)
50
+ installed.map(&:name).include?(name.to_sym)
48
51
  end
49
52
 
50
53
  # List of plugins installed as standalone
51
54
  def self.installed_standalone
52
- @standalones ||= []
55
+ @installed_standalone ||= []
53
56
  end
54
57
 
55
58
  # List of plugins installed as gems
56
59
  def self.installed_as_gem
57
60
  installed_gemspecs_latest.map do |gem|
58
61
  Chronicle::ETL::Registry::PluginRegistration.new do |p|
59
- p.name = gem.name.sub("chronicle-", "")
62
+ p.name = gem.name.sub('chronicle-', '').to_sym
60
63
  p.gem = gem.name
61
64
  p.description = gem.description
62
65
  p.version = gem.version.to_s
@@ -106,7 +109,9 @@ module Chronicle
106
109
  # All versions of all plugins currently installed
107
110
  def self.installed_gemspecs
108
111
  # TODO: add check for chronicle-etl dependency
109
- Gem::Specification.filter { |s| s.name.match(/^chronicle-/) && s.name != "chronicle-etl" }
112
+ Gem::Specification.filter do |s|
113
+ s.name.match(/^chronicle-/) && s.name != 'chronicle-etl' && s.name != 'chronicle-core'
114
+ end
110
115
  end
111
116
 
112
117
  # Latest version of each installed plugin
@@ -120,15 +125,18 @@ module Chronicle
120
125
  def self.activate(name)
121
126
  # By default, activates the latest available version of a gem
122
127
  # so don't have to run Kernel#gem separately
123
- require "chronicle/#{name}"
128
+
129
+ plugin_require_name = name.to_s.gsub('-', '_')
130
+ require "chronicle/#{plugin_require_name}"
124
131
  rescue Gem::ConflictError => e
125
132
  # TODO: figure out if there's more we can do here
126
- raise Chronicle::ETL::PluginConflictError.new(name), "Plugin '#{name}' couldn't be loaded. #{e.message}"
127
- rescue StandardError, LoadError => e
133
+ raise Chronicle::ETL::PluginConflictError.new(name),
134
+ "Plugin '#{plugin_require_name}' couldn't be loaded. #{e.message}"
135
+ rescue StandardError, LoadError
128
136
  # StandardError to catch random non-loading problems that might occur
129
137
  # when requiring the plugin (eg class macro invoked the wrong way)
130
138
  # TODO: decide if this should be separated
131
- raise Chronicle::ETL::PluginLoadError.new(name), "Plugin '#{name}' couldn't be loaded"
139
+ raise Chronicle::ETL::PluginLoadError.new(name), "Plugin '#{plugin_require_name}' couldn't be loaded"
132
140
  end
133
141
 
134
142
  # Install a plugin to local gems
@@ -1,133 +1,163 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'colorize'
2
4
  require 'chronic_duration'
3
- require "tty-spinner"
4
-
5
- class Chronicle::ETL::Runner
6
- def initialize(job)
7
- @job = job
8
- @job_logger = Chronicle::ETL::JobLogger.new(@job)
9
- end
10
-
11
- def run!
12
- begin_job
13
- validate_job
14
- instantiate_connectors
15
- prepare_job
16
- prepare_ui
17
- run_extraction
18
- rescue Chronicle::ETL::ExtractionError => e
19
- @job_logger&.error
20
- raise(Chronicle::ETL::RunnerError, "Extraction failed. #{e.message}")
21
- rescue Interrupt
22
- @job_logger&.error
23
- raise(Chronicle::ETL::RunInterruptedError, "Job interrupted.")
24
- rescue StandardError => e
25
- # Just throwing this in here until we have better exception handling in
26
- # loaders, etc
27
- @job_logger&.error
28
- raise(Chronicle::ETL::RunnerError, "Error running job. #{e.message}")
29
- ensure
30
- finish_job
31
- end
32
-
33
- private
34
-
35
- def begin_job
36
- Chronicle::ETL::Logger.info(tty_log_job_initialize)
37
- @initialization_spinner = TTY::Spinner.new(":spinner :title", format: :dots_2)
38
- end
39
-
40
- def validate_job
41
- @initialization_spinner.update(title: "Validating job")
42
- @job.job_definition.validate!
43
- end
44
-
45
- def instantiate_connectors
46
- @initialization_spinner.update(title: "Initializing connectors")
47
- @extractor = @job.instantiate_extractor
48
- @loader = @job.instantiate_loader
49
- end
50
-
51
- def prepare_job
52
- @initialization_spinner.update(title: "Preparing job")
53
- @job_logger.start
54
- @loader.start
55
-
56
- @initialization_spinner.update(title: "Preparing extraction")
57
- @initialization_spinner.auto_spin
58
- @extractor.prepare
59
- @initialization_spinner.success("(#{'successful'.green})")
60
- Chronicle::ETL::Logger.info("\n")
61
- end
62
-
63
- def prepare_ui
64
- total = @extractor.results_count
65
- @progress_bar = Chronicle::ETL::Utils::ProgressBar.new(title: 'Running job', total: total)
66
- Chronicle::ETL::Logger.attach_to_ui(@progress_bar)
67
- end
68
-
69
- def run_extraction
70
- @extractor.extract do |extraction|
71
- process_extraction(extraction)
72
- @progress_bar.increment
5
+ require 'tty-spinner'
6
+
7
+ module Chronicle
8
+ module ETL
9
+ class Runner
10
+ def initialize(job)
11
+ @job = job
12
+ @job_logger = Chronicle::ETL::JobLogger.new(@job)
13
+ end
14
+
15
+ def run!
16
+ begin_job
17
+ validate_job
18
+ instantiate_connectors
19
+ prepare_job
20
+ prepare_ui
21
+ run_extraction
22
+ rescue Chronicle::ETL::ExtractionError => e
23
+ @job_logger&.error
24
+ raise(Chronicle::ETL::RunnerError, "Extraction failed. #{e.message}")
25
+ rescue Interrupt
26
+ @job_logger&.error
27
+ raise(Chronicle::ETL::RunInterruptedError, 'Job interrupted.')
28
+ # rescue StandardError => e
29
+ # # Just throwing this in here until we have better exception handling in
30
+ # # loaders, etc
31
+ # @job_logger&.error
32
+ # raise(Chronicle::ETL::RunnerError, "Error running job. #{e.message}")
33
+ ensure
34
+ finish_job
35
+ end
36
+
37
+ private
38
+
39
+ def begin_job
40
+ Chronicle::ETL::Logger.info(tty_log_job_initialize)
41
+ @initialization_spinner = TTY::Spinner.new(':spinner :title', format: :dots_2)
42
+ end
43
+
44
+ def validate_job
45
+ @initialization_spinner.update(title: 'Validating job')
46
+ @job.job_definition.validate!
47
+ end
48
+
49
+ def instantiate_connectors
50
+ @initialization_spinner.update(title: 'Initializing connectors')
51
+ @extractor = @job.instantiate_extractor
52
+ @transformers = @job.instantiate_transformers
53
+ @loader = @job.instantiate_loader
54
+ end
55
+
56
+ def prepare_job
57
+ @initialization_spinner.update(title: 'Preparing job')
58
+ @job_logger.start
59
+ @loader.start
60
+
61
+ @initialization_spinner.update(title: 'Preparing extraction')
62
+ @initialization_spinner.auto_spin
63
+ @extractor.prepare
64
+ @initialization_spinner.success("(#{'successful'.green})")
65
+ Chronicle::ETL::Logger.info("\n")
66
+ end
67
+
68
+ def prepare_ui
69
+ total = @extractor.results_count
70
+ @progress_bar = Chronicle::ETL::Utils::ProgressBar.new(title: 'Running job', total: total)
71
+ Chronicle::ETL::Logger.attach_to_ui(@progress_bar)
72
+ end
73
+
74
+ def run_extraction
75
+ # Pattern based on Kiba's StreamingRunner
76
+ # https://github.com/thbar/kiba/blob/master/lib/kiba/streaming_runner.rb
77
+ stream = extractor_stream
78
+ recurser = ->(s, t) { transform_stream(s, t) }
79
+ @transformers.reduce(stream, &recurser).each do |record|
80
+ Chronicle::ETL::Logger.debug(tty_log_transformation(record))
81
+ @job_logger.log_transformation(record)
82
+ @progress_bar.increment
83
+ load_record(record)
84
+ end
85
+
86
+ @progress_bar.finish
87
+
88
+ # This is typically a slow method (writing to stdout, writing a big file, etc)
89
+ # TODO: consider adding a spinner?
90
+ @loader.finish
91
+ @job_logger.finish
92
+ end
93
+
94
+ # Initial steam of extracted data, wrapped in a Record class
95
+ def extractor_stream
96
+ Enumerator.new do |y|
97
+ @extractor.extract do |extraction|
98
+ record = Chronicle::ETL::Record.new(data: extraction.data, extraction: extraction)
99
+ y << record
100
+ end
101
+ end
102
+ end
103
+
104
+ # For a given stream of records and a given transformer,
105
+ # returns a new stream of transformed records and finally
106
+ # calls the finish method on the transformer
107
+ def transform_stream(stream, transformer)
108
+ Enumerator.new do |y|
109
+ stream.each do |record|
110
+ transformer.call(record) do |transformed_record|
111
+ y << transformed_record
112
+ end
113
+ end
114
+
115
+ transformer.call_finish do |transformed_record|
116
+ y << transformed_record
117
+ end
118
+ end
119
+ end
120
+
121
+ def load_record(record)
122
+ @loader.load(record.data) unless @job.dry_run?
123
+ end
124
+
125
+ def finish_job
126
+ @job_logger.save
127
+ @progress_bar&.finish
128
+ Chronicle::ETL::Logger.detach_from_ui
129
+ Chronicle::ETL::Logger.info(tty_log_completion)
130
+ end
131
+
132
+ def tty_log_job_initialize
133
+ output = 'Beginning job '
134
+ output += "'#{@job.name}'".bold if @job.name
135
+ output
136
+ end
137
+
138
+ def tty_log_transformation(record)
139
+ output = ' ✓'.green
140
+ output + " #{record}"
141
+ end
142
+
143
+ def tty_log_transformation_failure(exception, transformer)
144
+ output = ' ✖'.red
145
+ output + " Failed to transform #{transformer}. #{exception.message}"
146
+ end
147
+
148
+ def tty_log_completion
149
+ status = @job_logger.success ? 'Success' : 'Failed'
150
+ job_completion = @job_logger.success ? 'Completed' : 'Partially completed'
151
+ output = "\n#{job_completion} job"
152
+ output += " '#{@job.name}'".bold if @job.name
153
+ output += " in #{ChronicDuration.output(@job_logger.duration)}" if @job_logger.duration
154
+ output += "\n Status:\t".light_black + status
155
+ output += "\n Completed:\t".light_black + @job_logger.job_log.num_records_processed.to_s
156
+ if @job_logger.job_log.highest_timestamp
157
+ output += "\n Latest:\t".light_black + @job_logger.job_log.highest_timestamp.iso8601.to_s
158
+ end
159
+ output
160
+ end
73
161
  end
74
-
75
- @progress_bar.finish
76
-
77
- # This is typically a slow method (writing to stdout, writing a big file, etc)
78
- # TODO: consider adding a spinner?
79
- @loader.finish
80
- @job_logger.finish
81
- end
82
-
83
- def process_extraction(extraction)
84
- # For each extraction from our extractor, we create a new tarnsformer
85
- transformer = @job.instantiate_transformer(extraction)
86
-
87
- # And then transform that record, logging it if we're in debug log level
88
- record = transformer.transform
89
- Chronicle::ETL::Logger.debug(tty_log_transformation(transformer))
90
- @job_logger.log_transformation(transformer)
91
-
92
- # Then send the results to the loader
93
- @loader.load(record) unless @job.dry_run?
94
- rescue Chronicle::ETL::TransformationError => e
95
- # TODO: have an option to cancel job if we encounter an error
96
- Chronicle::ETL::Logger.error(tty_log_transformation_failure(e, transformer))
97
- end
98
-
99
- def finish_job
100
- @job_logger.save
101
- @progress_bar&.finish
102
- Chronicle::ETL::Logger.detach_from_ui
103
- Chronicle::ETL::Logger.info(tty_log_completion)
104
- end
105
-
106
- def tty_log_job_initialize
107
- output = "Beginning job "
108
- output += "'#{@job.name}'".bold if @job.name
109
- output
110
- end
111
-
112
- def tty_log_transformation(transformer)
113
- output = " ✓".green
114
- output += " #{transformer}"
115
- end
116
-
117
- def tty_log_transformation_failure(exception, transformer)
118
- output = " ✖".red
119
- output += " Failed to build #{transformer}. #{exception.message}"
120
- end
121
-
122
- def tty_log_completion
123
- status = @job_logger.success ? 'Success' : 'Failed'
124
- job_completion = @job_logger.success ? 'Completed' : 'Partially completed'
125
- output = "\n#{job_completion} job"
126
- output += " '#{@job.name}'".bold if @job.name
127
- output += " in #{ChronicDuration.output(@job_logger.duration)}" if @job_logger.duration
128
- output += "\n Status:\t".light_black + status
129
- output += "\n Completed:\t".light_black + "#{@job_logger.job_log.num_records_processed}"
130
- output += "\n Latest:\t".light_black + "#{@job_logger.job_log.highest_timestamp.iso8601}" if @job_logger.job_log.highest_timestamp
131
- output
132
162
  end
133
163
  end
@@ -1,4 +1,4 @@
1
- require "active_support/core_ext/hash/keys"
1
+ require 'active_support/core_ext/hash/keys'
2
2
 
3
3
  module Chronicle
4
4
  module ETL
@@ -8,7 +8,7 @@ module Chronicle
8
8
 
9
9
  # Whether a given namespace exists
10
10
  def exists?(namespace)
11
- Chronicle::ETL::Config.exists?("secrets", namespace)
11
+ Chronicle::ETL::Config.exists?('secrets', namespace)
12
12
  end
13
13
 
14
14
  # Save a setting to a namespaced config file
@@ -47,7 +47,7 @@ module Chronicle
47
47
 
48
48
  # Read secrets from a config file
49
49
  def read(namespace)
50
- definition = Chronicle::ETL::Config.load("secrets", namespace)
50
+ definition = Chronicle::ETL::Config.load('secrets', namespace)
51
51
  definition[:secrets] || {}
52
52
  end
53
53
 
@@ -57,7 +57,7 @@ module Chronicle
57
57
  secrets: (secrets || {}).transform_keys(&:to_s),
58
58
  chronicle_etl_version: Chronicle::ETL::VERSION
59
59
  }
60
- Chronicle::ETL::Config.write("secrets", namespace, data)
60
+ Chronicle::ETL::Config.write('secrets', namespace, data)
61
61
  end
62
62
 
63
63
  # Which config files are available in ~/.config/chronicle/etl/secrets
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Chronicle
4
+ module ETL
5
+ class BufferTransformer < Chronicle::ETL::Transformer
6
+ register_connector do |r|
7
+ r.identifier = :buffer
8
+ r.description = 'by buffering'
9
+ end
10
+
11
+ setting :size, default: 10, description: 'The size of the buffer'
12
+
13
+ def transform(record)
14
+ stash_record(record)
15
+
16
+ # FIXME: this doesn't seem to be working with the runner
17
+ return if @stashed_records.size < @config.size
18
+
19
+ # FIXME: this will result in the wrong extraction being associated with
20
+ # the batch of flushed records
21
+ flush_stashed_records.map(&:data)
22
+ end
23
+
24
+ def finish
25
+ flush_stashed_records
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Chronicle
4
+ module ETL
5
+ class ChronicleTransformer < Chronicle::ETL::Transformer
6
+ register_connector do |r|
7
+ r.identifier = :chronicle
8
+ r.description = 'records to Chronicle schema'
9
+ end
10
+
11
+ def transform(record)
12
+ converter_klass = find_converter(record.extraction)
13
+ # TODO: handle missing converter
14
+
15
+ converter_klass.new.call(record) do |transformed_record|
16
+ yield transformed_record.data
17
+ end
18
+ end
19
+
20
+ private
21
+
22
+ def find_converter(extraction)
23
+ Chronicle::ETL::Registry::Connectors.find_converter_for_source(
24
+ source: extraction.source,
25
+ type: extraction.type,
26
+ strategy: extraction.strategy,
27
+ target: :chronicle
28
+ )&.klass
29
+ end
30
+ end
31
+ end
32
+ end