chronicle-etl 0.5.5 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +15 -25
  3. data/.rubocop.yml +2 -44
  4. data/Gemfile +2 -2
  5. data/Guardfile +3 -3
  6. data/README.md +75 -68
  7. data/Rakefile +2 -2
  8. data/bin/console +4 -5
  9. data/chronicle-etl.gemspec +51 -49
  10. data/exe/chronicle-etl +1 -1
  11. data/lib/chronicle/etl/authorizer.rb +3 -4
  12. data/lib/chronicle/etl/cli/authorizations.rb +8 -6
  13. data/lib/chronicle/etl/cli/connectors.rb +7 -7
  14. data/lib/chronicle/etl/cli/jobs.rb +130 -53
  15. data/lib/chronicle/etl/cli/main.rb +29 -29
  16. data/lib/chronicle/etl/cli/plugins.rb +14 -15
  17. data/lib/chronicle/etl/cli/secrets.rb +14 -12
  18. data/lib/chronicle/etl/cli/subcommand_base.rb +5 -3
  19. data/lib/chronicle/etl/config.rb +18 -8
  20. data/lib/chronicle/etl/configurable.rb +20 -9
  21. data/lib/chronicle/etl/exceptions.rb +3 -3
  22. data/lib/chronicle/etl/extraction.rb +12 -2
  23. data/lib/chronicle/etl/extractors/csv_extractor.rb +9 -0
  24. data/lib/chronicle/etl/extractors/extractor.rb +15 -2
  25. data/lib/chronicle/etl/extractors/file_extractor.rb +5 -3
  26. data/lib/chronicle/etl/extractors/helpers/input_reader.rb +2 -2
  27. data/lib/chronicle/etl/extractors/json_extractor.rb +14 -4
  28. data/lib/chronicle/etl/extractors/stdin_extractor.rb +3 -0
  29. data/lib/chronicle/etl/job.rb +35 -17
  30. data/lib/chronicle/etl/job_definition.rb +38 -26
  31. data/lib/chronicle/etl/job_log.rb +14 -16
  32. data/lib/chronicle/etl/job_logger.rb +4 -4
  33. data/lib/chronicle/etl/loaders/csv_loader.rb +17 -4
  34. data/lib/chronicle/etl/loaders/helpers/stdout_helper.rb +4 -0
  35. data/lib/chronicle/etl/loaders/json_loader.rb +30 -10
  36. data/lib/chronicle/etl/loaders/loader.rb +0 -17
  37. data/lib/chronicle/etl/loaders/rest_loader.rb +7 -7
  38. data/lib/chronicle/etl/loaders/table_loader.rb +37 -12
  39. data/lib/chronicle/etl/logger.rb +2 -2
  40. data/lib/chronicle/etl/oauth_authorizer.rb +8 -8
  41. data/lib/chronicle/etl/record.rb +15 -0
  42. data/lib/chronicle/etl/registry/connector_registration.rb +15 -23
  43. data/lib/chronicle/etl/registry/connectors.rb +93 -36
  44. data/lib/chronicle/etl/registry/plugin_registration.rb +1 -1
  45. data/lib/chronicle/etl/registry/plugins.rb +27 -19
  46. data/lib/chronicle/etl/runner.rb +158 -128
  47. data/lib/chronicle/etl/secrets.rb +4 -4
  48. data/lib/chronicle/etl/transformers/buffer_transformer.rb +29 -0
  49. data/lib/chronicle/etl/transformers/chronicle_transformer.rb +32 -0
  50. data/lib/chronicle/etl/transformers/chronobase_transformer.rb +100 -0
  51. data/lib/chronicle/etl/transformers/fields_limit_transformer.rb +23 -0
  52. data/lib/chronicle/etl/transformers/filter_fields_transformer.rb +60 -0
  53. data/lib/chronicle/etl/transformers/filter_transformer.rb +30 -0
  54. data/lib/chronicle/etl/transformers/format_transformer.rb +32 -0
  55. data/lib/chronicle/etl/transformers/merge_meta_transformer.rb +19 -0
  56. data/lib/chronicle/etl/transformers/multiply_transformer.rb +21 -0
  57. data/lib/chronicle/etl/transformers/null_transformer.rb +5 -7
  58. data/lib/chronicle/etl/transformers/sampler_transformer.rb +21 -0
  59. data/lib/chronicle/etl/transformers/sort_transformer.rb +31 -0
  60. data/lib/chronicle/etl/transformers/transformer.rb +63 -41
  61. data/lib/chronicle/etl/utils/binary_attachments.rb +1 -1
  62. data/lib/chronicle/etl/utils/progress_bar.rb +2 -3
  63. data/lib/chronicle/etl/version.rb +1 -1
  64. data/lib/chronicle/etl.rb +6 -8
  65. metadata +49 -47
  66. data/lib/chronicle/etl/models/activity.rb +0 -15
  67. data/lib/chronicle/etl/models/attachment.rb +0 -14
  68. data/lib/chronicle/etl/models/base.rb +0 -122
  69. data/lib/chronicle/etl/models/entity.rb +0 -29
  70. data/lib/chronicle/etl/models/raw.rb +0 -26
  71. data/lib/chronicle/etl/serializers/jsonapi_serializer.rb +0 -31
  72. data/lib/chronicle/etl/serializers/raw_serializer.rb +0 -10
  73. data/lib/chronicle/etl/serializers/serializer.rb +0 -28
  74. data/lib/chronicle/etl/transformers/image_file_transformer.rb +0 -247
  75. data/lib/chronicle/etl/utils/hash_utilities.rb +0 -19
  76. data/lib/chronicle/etl/utils/text_recognition.rb +0 -15
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'rubygems'
2
4
 
3
5
  module Chronicle
@@ -5,54 +7,109 @@ module Chronicle
5
7
  module Registry
6
8
  # A singleton class that acts as a registry of connector classes available for ETL jobs
7
9
  module Connectors
8
- PHASES = [:extractor, :transformer, :loader].freeze
10
+ PHASES = %i[extractor transformer loader].freeze
9
11
  public_constant :PHASES
10
12
 
11
13
  class << self
12
14
  attr_accessor :connectors
15
+ end
16
+
17
+ def self.register(connector)
18
+ connectors << connector
19
+ end
13
20
 
14
- def register(connector)
15
- connectors << connector
21
+ def self.connectors
22
+ @connectors ||= []
23
+ end
24
+
25
+ def self.ancestor_for_phase(phase)
26
+ case phase
27
+ when :extractor
28
+ Chronicle::ETL::Extractor
29
+ when :transformer
30
+ Chronicle::ETL::Transformer
31
+ when :loader
32
+ Chronicle::ETL::Loader
16
33
  end
34
+ end
17
35
 
18
- def connectors
19
- @connectors ||= []
36
+ def self.find_converter_for_source(source:, type: nil, strategy: nil, target: nil)
37
+ # FIXME: we're assuming extractor plugin has been loaded already
38
+ # This may not be the case if the schema converter is running
39
+ # off a json dump off extraction data.
40
+ # plugin = source_klass.connector_registration.source
41
+ # type = source_klass.connector_registration.type
42
+ # strategy = source_klass.connector_registration.strategy
43
+
44
+ connectors.find do |c|
45
+ c.phase == :transformer &&
46
+ c.source == source &&
47
+ (type.nil? || c.type == type) &&
48
+ (strategy.nil? || c.strategy == strategy || c.strategy.nil?) &&
49
+ (target.nil? || c.to_schema == target)
20
50
  end
51
+ end
52
+
53
+ # Find connector from amongst those currently loaded
54
+ def self.find_by_phase_and_identifier_built_in(phase, identifier)
55
+ connectors.find { |c| c.phase == phase.to_sym && c.identifier == identifier.to_sym }
56
+ end
57
+
58
+ # Find connector and load relevant plugin to find it if necessary
59
+ def self.find_by_phase_and_identifier(phase, identifier)
60
+ connector = find_by_phase_and_identifier_built_in(phase, identifier)
61
+ return connector if connector
62
+
63
+ # determine if we need to try to load a local file. if it has a dot in the identifier, we treat it as a file
64
+ return find_by_phase_and_identifier_local(phase, identifier) if identifier.to_s.include?('.')
65
+
66
+ # Example identifier: lastfm:listens:api
67
+ plugin, type, strategy = identifier.split(':')
68
+ .map { |part| part.gsub('-', '_') }
69
+ .map(&:to_sym)
21
70
 
22
- # Find connector from amongst those currently loaded
23
- def find_by_phase_and_identifier_local(phase, identifier)
24
- connector = connectors.find { |c| c.phase == phase && c.identifier == identifier }
71
+ plugin_identifier = plugin.to_s.gsub('_', '-')
72
+
73
+ unless Chronicle::ETL::Registry::Plugins.installed?(plugin_identifier)
74
+ raise Chronicle::ETL::PluginNotInstalledError, plugin_identifier
25
75
  end
26
76
 
27
- # Find connector and load relevant plugin to find it if necessary
28
- def find_by_phase_and_identifier(phase, identifier)
29
- connector = find_by_phase_and_identifier_local(phase, identifier)
30
- return connector if connector
31
-
32
- # if not available in built-in connectors, try to activate a
33
- # relevant plugin and try again
34
- if identifier.include?(":")
35
- plugin, name = identifier.split(":")
36
- else
37
- # This case handles the case where the identifier is a
38
- # shorthand (ie `imessage`) because there's only one default
39
- # connector.
40
- plugin = identifier
41
- end
42
-
43
- raise(Chronicle::ETL::PluginNotInstalledError.new(plugin)) unless Chronicle::ETL::Registry::Plugins.installed?(plugin)
44
-
45
- Chronicle::ETL::Registry::Plugins.activate(plugin)
46
-
47
- candidates = connectors.select { |c| c.phase == phase && c.plugin == plugin }
48
- # if no name given, just use first connector with right phase/plugin
49
- # TODO: set up a property for connectors to specify that they're the
50
- # default connector for the plugin
51
- candidates = candidates.select { |c| c.identifier == name } if name
52
- connector = candidates.first
53
-
54
- connector || raise(ConnectorNotAvailableError, "Connector '#{identifier}' not found")
77
+ Chronicle::ETL::Registry::Plugins.activate(plugin_identifier)
78
+
79
+ # find most specific connector that matches the identifier
80
+ connector = connectors.find do |c|
81
+ c.plugin == plugin && (type.nil? || c.type == type) && (strategy.nil? || c.strategy == strategy)
55
82
  end
83
+
84
+ connector || raise(ConnectorNotAvailableError, "Connector '#{identifier}' not found")
85
+ end
86
+
87
+ # Load a plugin from local file system
88
+ def self.find_by_phase_and_identifier_local(phase, identifier)
89
+ script = File.read(identifier)
90
+ raise ConnectorNotAvailableError, "Connector '#{identifier}' not found" if script.nil?
91
+
92
+ # load the file by evaluating the contents
93
+ eval(script, TOPLEVEL_BINDING, __FILE__, __LINE__) # rubocop:disable Security/Eval
94
+
95
+ # read the file and look for all class definitions in the ruby script.
96
+ class_names = script.scan(/class (\w+)/).flatten
97
+
98
+ class_names.each do |class_name|
99
+ klass = Object.const_get(class_name)
100
+
101
+ next unless klass.ancestors.include?(ancestor_for_phase(phase))
102
+
103
+ registration = ::Chronicle::ETL::Registry::ConnectorRegistration.new(klass)
104
+
105
+ klass.connector_registration = registration
106
+ return registration
107
+ # return klass
108
+ rescue NameError
109
+ # ignore
110
+ end
111
+
112
+ raise ConnectorNotAvailableError, "Connector '#{identifier}' not found"
56
113
  end
57
114
  end
58
115
  end
@@ -4,7 +4,7 @@ module Chronicle
4
4
  class PluginRegistration
5
5
  attr_accessor :name, :description, :gem, :version, :installed, :gemspec
6
6
 
7
- def initialize(name=nil)
7
+ def initialize(name = nil)
8
8
  @installed = false
9
9
  @name = name
10
10
  yield self if block_given?
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'rubygems'
2
4
  require 'rubygems/command'
3
5
  require 'rubygems/commands/install_command'
@@ -13,16 +15,17 @@ module Chronicle
13
15
  # @todo Better validation for whether a gem is actually a plugin
14
16
  # @todo Add ways to load a plugin that don't require a gem on rubygems.org
15
17
  module Plugins
16
- KNOWN_PLUGINS = [
17
- 'email',
18
- 'foursquare',
19
- 'github',
20
- 'imessage',
21
- 'pinboard',
22
- 'safari',
23
- 'shell',
24
- 'spotify',
25
- 'zulip'
18
+ KNOWN_PLUGINS = %w[
19
+ apple-podcasts
20
+ email
21
+ foursquare
22
+ github
23
+ imessage
24
+ pinboard
25
+ safari
26
+ shell
27
+ spotify
28
+ zulip
26
29
  ].freeze
27
30
  public_constant :KNOWN_PLUGINS
28
31
 
@@ -30,7 +33,7 @@ module Chronicle
30
33
  # make registry aware of existence of name of non-gem plugin
31
34
  def self.register_standalone(name:)
32
35
  plugin = Chronicle::ETL::Registry::PluginRegistration.new do |p|
33
- p.name = name
36
+ p.name = name.to_sym
34
37
  p.installed = true
35
38
  end
36
39
 
@@ -44,19 +47,19 @@ module Chronicle
44
47
 
45
48
  # Check whether a given plugin is installed
46
49
  def self.installed?(name)
47
- installed.map(&:name).include?(name)
50
+ installed.map(&:name).include?(name.to_sym)
48
51
  end
49
52
 
50
53
  # List of plugins installed as standalone
51
54
  def self.installed_standalone
52
- @standalones ||= []
55
+ @installed_standalone ||= []
53
56
  end
54
57
 
55
58
  # List of plugins installed as gems
56
59
  def self.installed_as_gem
57
60
  installed_gemspecs_latest.map do |gem|
58
61
  Chronicle::ETL::Registry::PluginRegistration.new do |p|
59
- p.name = gem.name.sub("chronicle-", "")
62
+ p.name = gem.name.sub('chronicle-', '').to_sym
60
63
  p.gem = gem.name
61
64
  p.description = gem.description
62
65
  p.version = gem.version.to_s
@@ -106,7 +109,9 @@ module Chronicle
106
109
  # All versions of all plugins currently installed
107
110
  def self.installed_gemspecs
108
111
  # TODO: add check for chronicle-etl dependency
109
- Gem::Specification.filter { |s| s.name.match(/^chronicle-/) && s.name != "chronicle-etl" }
112
+ Gem::Specification.filter do |s|
113
+ s.name.match(/^chronicle-/) && s.name != 'chronicle-etl' && s.name != 'chronicle-core'
114
+ end
110
115
  end
111
116
 
112
117
  # Latest version of each installed plugin
@@ -120,15 +125,18 @@ module Chronicle
120
125
  def self.activate(name)
121
126
  # By default, activates the latest available version of a gem
122
127
  # so don't have to run Kernel#gem separately
123
- require "chronicle/#{name}"
128
+
129
+ plugin_require_name = name.to_s.gsub('-', '_')
130
+ require "chronicle/#{plugin_require_name}"
124
131
  rescue Gem::ConflictError => e
125
132
  # TODO: figure out if there's more we can do here
126
- raise Chronicle::ETL::PluginConflictError.new(name), "Plugin '#{name}' couldn't be loaded. #{e.message}"
127
- rescue StandardError, LoadError => e
133
+ raise Chronicle::ETL::PluginConflictError.new(name),
134
+ "Plugin '#{plugin_require_name}' couldn't be loaded. #{e.message}"
135
+ rescue StandardError, LoadError
128
136
  # StandardError to catch random non-loading problems that might occur
129
137
  # when requiring the plugin (eg class macro invoked the wrong way)
130
138
  # TODO: decide if this should be separated
131
- raise Chronicle::ETL::PluginLoadError.new(name), "Plugin '#{name}' couldn't be loaded"
139
+ raise Chronicle::ETL::PluginLoadError.new(name), "Plugin '#{plugin_require_name}' couldn't be loaded"
132
140
  end
133
141
 
134
142
  # Install a plugin to local gems
@@ -1,133 +1,163 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'colorize'
2
4
  require 'chronic_duration'
3
- require "tty-spinner"
4
-
5
- class Chronicle::ETL::Runner
6
- def initialize(job)
7
- @job = job
8
- @job_logger = Chronicle::ETL::JobLogger.new(@job)
9
- end
10
-
11
- def run!
12
- begin_job
13
- validate_job
14
- instantiate_connectors
15
- prepare_job
16
- prepare_ui
17
- run_extraction
18
- rescue Chronicle::ETL::ExtractionError => e
19
- @job_logger&.error
20
- raise(Chronicle::ETL::RunnerError, "Extraction failed. #{e.message}")
21
- rescue Interrupt
22
- @job_logger&.error
23
- raise(Chronicle::ETL::RunInterruptedError, "Job interrupted.")
24
- rescue StandardError => e
25
- # Just throwing this in here until we have better exception handling in
26
- # loaders, etc
27
- @job_logger&.error
28
- raise(Chronicle::ETL::RunnerError, "Error running job. #{e.message}")
29
- ensure
30
- finish_job
31
- end
32
-
33
- private
34
-
35
- def begin_job
36
- Chronicle::ETL::Logger.info(tty_log_job_initialize)
37
- @initialization_spinner = TTY::Spinner.new(":spinner :title", format: :dots_2)
38
- end
39
-
40
- def validate_job
41
- @initialization_spinner.update(title: "Validating job")
42
- @job.job_definition.validate!
43
- end
44
-
45
- def instantiate_connectors
46
- @initialization_spinner.update(title: "Initializing connectors")
47
- @extractor = @job.instantiate_extractor
48
- @loader = @job.instantiate_loader
49
- end
50
-
51
- def prepare_job
52
- @initialization_spinner.update(title: "Preparing job")
53
- @job_logger.start
54
- @loader.start
55
-
56
- @initialization_spinner.update(title: "Preparing extraction")
57
- @initialization_spinner.auto_spin
58
- @extractor.prepare
59
- @initialization_spinner.success("(#{'successful'.green})")
60
- Chronicle::ETL::Logger.info("\n")
61
- end
62
-
63
- def prepare_ui
64
- total = @extractor.results_count
65
- @progress_bar = Chronicle::ETL::Utils::ProgressBar.new(title: 'Running job', total: total)
66
- Chronicle::ETL::Logger.attach_to_ui(@progress_bar)
67
- end
68
-
69
- def run_extraction
70
- @extractor.extract do |extraction|
71
- process_extraction(extraction)
72
- @progress_bar.increment
5
+ require 'tty-spinner'
6
+
7
+ module Chronicle
8
+ module ETL
9
+ class Runner
10
+ def initialize(job)
11
+ @job = job
12
+ @job_logger = Chronicle::ETL::JobLogger.new(@job)
13
+ end
14
+
15
+ def run!
16
+ begin_job
17
+ validate_job
18
+ instantiate_connectors
19
+ prepare_job
20
+ prepare_ui
21
+ run_extraction
22
+ rescue Chronicle::ETL::ExtractionError => e
23
+ @job_logger&.error
24
+ raise(Chronicle::ETL::RunnerError, "Extraction failed. #{e.message}")
25
+ rescue Interrupt
26
+ @job_logger&.error
27
+ raise(Chronicle::ETL::RunInterruptedError, 'Job interrupted.')
28
+ # rescue StandardError => e
29
+ # # Just throwing this in here until we have better exception handling in
30
+ # # loaders, etc
31
+ # @job_logger&.error
32
+ # raise(Chronicle::ETL::RunnerError, "Error running job. #{e.message}")
33
+ ensure
34
+ finish_job
35
+ end
36
+
37
+ private
38
+
39
+ def begin_job
40
+ Chronicle::ETL::Logger.info(tty_log_job_initialize)
41
+ @initialization_spinner = TTY::Spinner.new(':spinner :title', format: :dots_2)
42
+ end
43
+
44
+ def validate_job
45
+ @initialization_spinner.update(title: 'Validating job')
46
+ @job.job_definition.validate!
47
+ end
48
+
49
+ def instantiate_connectors
50
+ @initialization_spinner.update(title: 'Initializing connectors')
51
+ @extractor = @job.instantiate_extractor
52
+ @transformers = @job.instantiate_transformers
53
+ @loader = @job.instantiate_loader
54
+ end
55
+
56
+ def prepare_job
57
+ @initialization_spinner.update(title: 'Preparing job')
58
+ @job_logger.start
59
+ @loader.start
60
+
61
+ @initialization_spinner.update(title: 'Preparing extraction')
62
+ @initialization_spinner.auto_spin
63
+ @extractor.prepare
64
+ @initialization_spinner.success("(#{'successful'.green})")
65
+ Chronicle::ETL::Logger.info("\n")
66
+ end
67
+
68
+ def prepare_ui
69
+ total = @extractor.results_count
70
+ @progress_bar = Chronicle::ETL::Utils::ProgressBar.new(title: 'Running job', total: total)
71
+ Chronicle::ETL::Logger.attach_to_ui(@progress_bar)
72
+ end
73
+
74
+ def run_extraction
75
+ # Pattern based on Kiba's StreamingRunner
76
+ # https://github.com/thbar/kiba/blob/master/lib/kiba/streaming_runner.rb
77
+ stream = extractor_stream
78
+ recurser = ->(s, t) { transform_stream(s, t) }
79
+ @transformers.reduce(stream, &recurser).each do |record|
80
+ Chronicle::ETL::Logger.debug(tty_log_transformation(record))
81
+ @job_logger.log_transformation(record)
82
+ @progress_bar.increment
83
+ load_record(record)
84
+ end
85
+
86
+ @progress_bar.finish
87
+
88
+ # This is typically a slow method (writing to stdout, writing a big file, etc)
89
+ # TODO: consider adding a spinner?
90
+ @loader.finish
91
+ @job_logger.finish
92
+ end
93
+
94
+ # Initial steam of extracted data, wrapped in a Record class
95
+ def extractor_stream
96
+ Enumerator.new do |y|
97
+ @extractor.extract do |extraction|
98
+ record = Chronicle::ETL::Record.new(data: extraction.data, extraction: extraction)
99
+ y << record
100
+ end
101
+ end
102
+ end
103
+
104
+ # For a given stream of records and a given transformer,
105
+ # returns a new stream of transformed records and finally
106
+ # calls the finish method on the transformer
107
+ def transform_stream(stream, transformer)
108
+ Enumerator.new do |y|
109
+ stream.each do |record|
110
+ transformer.call(record) do |transformed_record|
111
+ y << transformed_record
112
+ end
113
+ end
114
+
115
+ transformer.call_finish do |transformed_record|
116
+ y << transformed_record
117
+ end
118
+ end
119
+ end
120
+
121
+ def load_record(record)
122
+ @loader.load(record.data) unless @job.dry_run?
123
+ end
124
+
125
+ def finish_job
126
+ @job_logger.save
127
+ @progress_bar&.finish
128
+ Chronicle::ETL::Logger.detach_from_ui
129
+ Chronicle::ETL::Logger.info(tty_log_completion)
130
+ end
131
+
132
+ def tty_log_job_initialize
133
+ output = 'Beginning job '
134
+ output += "'#{@job.name}'".bold if @job.name
135
+ output
136
+ end
137
+
138
+ def tty_log_transformation(record)
139
+ output = ' ✓'.green
140
+ output + " #{record}"
141
+ end
142
+
143
+ def tty_log_transformation_failure(exception, transformer)
144
+ output = ' ✖'.red
145
+ output + " Failed to transform #{transformer}. #{exception.message}"
146
+ end
147
+
148
+ def tty_log_completion
149
+ status = @job_logger.success ? 'Success' : 'Failed'
150
+ job_completion = @job_logger.success ? 'Completed' : 'Partially completed'
151
+ output = "\n#{job_completion} job"
152
+ output += " '#{@job.name}'".bold if @job.name
153
+ output += " in #{ChronicDuration.output(@job_logger.duration)}" if @job_logger.duration
154
+ output += "\n Status:\t".light_black + status
155
+ output += "\n Completed:\t".light_black + @job_logger.job_log.num_records_processed.to_s
156
+ if @job_logger.job_log.highest_timestamp
157
+ output += "\n Latest:\t".light_black + @job_logger.job_log.highest_timestamp.iso8601.to_s
158
+ end
159
+ output
160
+ end
73
161
  end
74
-
75
- @progress_bar.finish
76
-
77
- # This is typically a slow method (writing to stdout, writing a big file, etc)
78
- # TODO: consider adding a spinner?
79
- @loader.finish
80
- @job_logger.finish
81
- end
82
-
83
- def process_extraction(extraction)
84
- # For each extraction from our extractor, we create a new tarnsformer
85
- transformer = @job.instantiate_transformer(extraction)
86
-
87
- # And then transform that record, logging it if we're in debug log level
88
- record = transformer.transform
89
- Chronicle::ETL::Logger.debug(tty_log_transformation(transformer))
90
- @job_logger.log_transformation(transformer)
91
-
92
- # Then send the results to the loader
93
- @loader.load(record) unless @job.dry_run?
94
- rescue Chronicle::ETL::TransformationError => e
95
- # TODO: have an option to cancel job if we encounter an error
96
- Chronicle::ETL::Logger.error(tty_log_transformation_failure(e, transformer))
97
- end
98
-
99
- def finish_job
100
- @job_logger.save
101
- @progress_bar&.finish
102
- Chronicle::ETL::Logger.detach_from_ui
103
- Chronicle::ETL::Logger.info(tty_log_completion)
104
- end
105
-
106
- def tty_log_job_initialize
107
- output = "Beginning job "
108
- output += "'#{@job.name}'".bold if @job.name
109
- output
110
- end
111
-
112
- def tty_log_transformation(transformer)
113
- output = " ✓".green
114
- output += " #{transformer}"
115
- end
116
-
117
- def tty_log_transformation_failure(exception, transformer)
118
- output = " ✖".red
119
- output += " Failed to build #{transformer}. #{exception.message}"
120
- end
121
-
122
- def tty_log_completion
123
- status = @job_logger.success ? 'Success' : 'Failed'
124
- job_completion = @job_logger.success ? 'Completed' : 'Partially completed'
125
- output = "\n#{job_completion} job"
126
- output += " '#{@job.name}'".bold if @job.name
127
- output += " in #{ChronicDuration.output(@job_logger.duration)}" if @job_logger.duration
128
- output += "\n Status:\t".light_black + status
129
- output += "\n Completed:\t".light_black + "#{@job_logger.job_log.num_records_processed}"
130
- output += "\n Latest:\t".light_black + "#{@job_logger.job_log.highest_timestamp.iso8601}" if @job_logger.job_log.highest_timestamp
131
- output
132
162
  end
133
163
  end
@@ -1,4 +1,4 @@
1
- require "active_support/core_ext/hash/keys"
1
+ require 'active_support/core_ext/hash/keys'
2
2
 
3
3
  module Chronicle
4
4
  module ETL
@@ -8,7 +8,7 @@ module Chronicle
8
8
 
9
9
  # Whether a given namespace exists
10
10
  def exists?(namespace)
11
- Chronicle::ETL::Config.exists?("secrets", namespace)
11
+ Chronicle::ETL::Config.exists?('secrets', namespace)
12
12
  end
13
13
 
14
14
  # Save a setting to a namespaced config file
@@ -47,7 +47,7 @@ module Chronicle
47
47
 
48
48
  # Read secrets from a config file
49
49
  def read(namespace)
50
- definition = Chronicle::ETL::Config.load("secrets", namespace)
50
+ definition = Chronicle::ETL::Config.load('secrets', namespace)
51
51
  definition[:secrets] || {}
52
52
  end
53
53
 
@@ -57,7 +57,7 @@ module Chronicle
57
57
  secrets: (secrets || {}).transform_keys(&:to_s),
58
58
  chronicle_etl_version: Chronicle::ETL::VERSION
59
59
  }
60
- Chronicle::ETL::Config.write("secrets", namespace, data)
60
+ Chronicle::ETL::Config.write('secrets', namespace, data)
61
61
  end
62
62
 
63
63
  # Which config files are available in ~/.config/chronicle/etl/secrets
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Chronicle
4
+ module ETL
5
+ class BufferTransformer < Chronicle::ETL::Transformer
6
+ register_connector do |r|
7
+ r.identifier = :buffer
8
+ r.description = 'by buffering'
9
+ end
10
+
11
+ setting :size, default: 10, description: 'The size of the buffer'
12
+
13
+ def transform(record)
14
+ stash_record(record)
15
+
16
+ # FIXME: this doesn't seem to be working with the runner
17
+ return if @stashed_records.size < @config.size
18
+
19
+ # FIXME: this will result in the wrong extraction being associated with
20
+ # the batch of flushed records
21
+ flush_stashed_records.map(&:data)
22
+ end
23
+
24
+ def finish
25
+ flush_stashed_records
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Chronicle
4
+ module ETL
5
+ class ChronicleTransformer < Chronicle::ETL::Transformer
6
+ register_connector do |r|
7
+ r.identifier = :chronicle
8
+ r.description = 'records to Chronicle schema'
9
+ end
10
+
11
+ def transform(record)
12
+ converter_klass = find_converter(record.extraction)
13
+ # TODO: handle missing converter
14
+
15
+ converter_klass.new.call(record) do |transformed_record|
16
+ yield transformed_record.data
17
+ end
18
+ end
19
+
20
+ private
21
+
22
+ def find_converter(extraction)
23
+ Chronicle::ETL::Registry::Connectors.find_converter_for_source(
24
+ source: extraction.source,
25
+ type: extraction.type,
26
+ strategy: extraction.strategy,
27
+ target: :chronicle
28
+ )&.klass
29
+ end
30
+ end
31
+ end
32
+ end