chronicle-etl 0.5.4 → 0.6.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (79) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +15 -25
  3. data/.rubocop.yml +2 -44
  4. data/Gemfile +2 -2
  5. data/Guardfile +3 -3
  6. data/README.md +98 -73
  7. data/Rakefile +2 -2
  8. data/bin/console +4 -5
  9. data/chronicle-etl.gemspec +50 -45
  10. data/exe/chronicle-etl +1 -1
  11. data/lib/chronicle/etl/authorizer.rb +3 -4
  12. data/lib/chronicle/etl/cli/authorizations.rb +10 -8
  13. data/lib/chronicle/etl/cli/connectors.rb +9 -9
  14. data/lib/chronicle/etl/cli/jobs.rb +130 -53
  15. data/lib/chronicle/etl/cli/main.rb +29 -29
  16. data/lib/chronicle/etl/cli/plugins.rb +29 -26
  17. data/lib/chronicle/etl/cli/secrets.rb +14 -12
  18. data/lib/chronicle/etl/cli/subcommand_base.rb +5 -3
  19. data/lib/chronicle/etl/config.rb +20 -7
  20. data/lib/chronicle/etl/configurable.rb +24 -9
  21. data/lib/chronicle/etl/exceptions.rb +3 -3
  22. data/lib/chronicle/etl/extraction.rb +12 -2
  23. data/lib/chronicle/etl/extractors/csv_extractor.rb +9 -0
  24. data/lib/chronicle/etl/extractors/extractor.rb +15 -2
  25. data/lib/chronicle/etl/extractors/file_extractor.rb +5 -3
  26. data/lib/chronicle/etl/extractors/helpers/input_reader.rb +2 -2
  27. data/lib/chronicle/etl/extractors/json_extractor.rb +14 -4
  28. data/lib/chronicle/etl/extractors/stdin_extractor.rb +3 -0
  29. data/lib/chronicle/etl/job.rb +35 -17
  30. data/lib/chronicle/etl/job_definition.rb +39 -27
  31. data/lib/chronicle/etl/job_log.rb +14 -16
  32. data/lib/chronicle/etl/job_logger.rb +4 -4
  33. data/lib/chronicle/etl/loaders/csv_loader.rb +17 -4
  34. data/lib/chronicle/etl/loaders/helpers/stdout_helper.rb +4 -0
  35. data/lib/chronicle/etl/loaders/json_loader.rb +30 -10
  36. data/lib/chronicle/etl/loaders/loader.rb +0 -17
  37. data/lib/chronicle/etl/loaders/rest_loader.rb +7 -7
  38. data/lib/chronicle/etl/loaders/table_loader.rb +37 -12
  39. data/lib/chronicle/etl/logger.rb +3 -3
  40. data/lib/chronicle/etl/oauth_authorizer.rb +8 -10
  41. data/lib/chronicle/etl/record.rb +15 -0
  42. data/lib/chronicle/etl/registry/connector_registration.rb +15 -23
  43. data/lib/chronicle/etl/registry/connectors.rb +117 -0
  44. data/lib/chronicle/etl/registry/plugin_registration.rb +19 -0
  45. data/lib/chronicle/etl/registry/plugins.rb +171 -0
  46. data/lib/chronicle/etl/registry/registry.rb +3 -52
  47. data/lib/chronicle/etl/registry/self_registering.rb +1 -1
  48. data/lib/chronicle/etl/runner.rb +158 -128
  49. data/lib/chronicle/etl/secrets.rb +5 -5
  50. data/lib/chronicle/etl/transformers/buffer_transformer.rb +29 -0
  51. data/lib/chronicle/etl/transformers/chronicle_transformer.rb +32 -0
  52. data/lib/chronicle/etl/transformers/chronobase_transformer.rb +100 -0
  53. data/lib/chronicle/etl/transformers/fields_limit_transformer.rb +23 -0
  54. data/lib/chronicle/etl/transformers/filter_fields_transformer.rb +60 -0
  55. data/lib/chronicle/etl/transformers/filter_transformer.rb +30 -0
  56. data/lib/chronicle/etl/transformers/format_transformer.rb +32 -0
  57. data/lib/chronicle/etl/transformers/merge_meta_transformer.rb +19 -0
  58. data/lib/chronicle/etl/transformers/multiply_transformer.rb +21 -0
  59. data/lib/chronicle/etl/transformers/null_transformer.rb +5 -7
  60. data/lib/chronicle/etl/transformers/sampler_transformer.rb +21 -0
  61. data/lib/chronicle/etl/transformers/sort_transformer.rb +31 -0
  62. data/lib/chronicle/etl/transformers/transformer.rb +63 -41
  63. data/lib/chronicle/etl/utils/binary_attachments.rb +1 -1
  64. data/lib/chronicle/etl/utils/progress_bar.rb +2 -3
  65. data/lib/chronicle/etl/version.rb +1 -1
  66. data/lib/chronicle/etl.rb +6 -8
  67. metadata +91 -45
  68. data/lib/chronicle/etl/models/activity.rb +0 -15
  69. data/lib/chronicle/etl/models/attachment.rb +0 -14
  70. data/lib/chronicle/etl/models/base.rb +0 -122
  71. data/lib/chronicle/etl/models/entity.rb +0 -29
  72. data/lib/chronicle/etl/models/raw.rb +0 -26
  73. data/lib/chronicle/etl/registry/plugin_registry.rb +0 -95
  74. data/lib/chronicle/etl/serializers/jsonapi_serializer.rb +0 -31
  75. data/lib/chronicle/etl/serializers/raw_serializer.rb +0 -10
  76. data/lib/chronicle/etl/serializers/serializer.rb +0 -28
  77. data/lib/chronicle/etl/transformers/image_file_transformer.rb +0 -247
  78. data/lib/chronicle/etl/utils/hash_utilities.rb +0 -19
  79. data/lib/chronicle/etl/utils/text_recognition.rb +0 -15
@@ -7,7 +7,7 @@ module Chronicle
7
7
 
8
8
  # Macro for setting provider on an Authorizer
9
9
  def provider(provider_name)
10
- @provider_name = provider_name
10
+ @provider_name = provider_name.to_sym
11
11
  end
12
12
 
13
13
  # From all loaded Authorizers, return the first one that matches
@@ -16,15 +16,14 @@ module Chronicle
16
16
  # @todo Have a proper identifier system for authorizers
17
17
  # (to have more than one per plugin)
18
18
  def find_by_provider(provider)
19
- ObjectSpace.each_object(::Class).select {|klass| klass < self }.find do |authorizer|
19
+ ObjectSpace.each_object(::Class).select { |klass| klass < self }.find do |authorizer|
20
20
  authorizer.provider_name == provider
21
21
  end
22
22
  end
23
23
  end
24
24
 
25
25
  # Construct a new authorizer
26
- def initialize(args)
27
- end
26
+ def initialize(args); end
28
27
 
29
28
  # Main entry-point for authorization flows. Implemented by subclass
30
29
  def authorize!
@@ -2,7 +2,6 @@
2
2
 
3
3
  require 'sinatra'
4
4
  require 'launchy'
5
- require 'pp'
6
5
 
7
6
  module Chronicle
8
7
  module ETL
@@ -12,11 +11,14 @@ module Chronicle
12
11
  default_task 'new'
13
12
  namespace :authorizations
14
13
 
15
- desc "authorize", "Authorize with a third-party provider"
14
+ desc 'authorize', 'Authorize with a third-party provider'
16
15
  option :port, desc: 'Port to run authorization server on', type: :numeric, default: 4567
17
- option :credentials, desc: 'Secrets namespace for where to read credentials from (default: PROVIDER)', type: :string, banner: 'NAMESPACE'
18
- option :secrets, desc: 'Secrets namespace for where authorization should be saved to (default: PROVIDER)', type: :string, banner: 'NAMESPACE'
19
- option :print, desc: 'Show authorization results (instead of just saving secrets)', type: :boolean, default: false
16
+ option :credentials, desc: 'Secrets namespace for where to read credentials from (default: PROVIDER)',
17
+ type: :string, banner: 'NAMESPACE'
18
+ option :secrets, desc: 'Secrets namespace for where authorization should be saved to (default: PROVIDER)',
19
+ type: :string, banner: 'NAMESPACE'
20
+ option :print, desc: 'Show authorization results (instead of just saving secrets)', type: :boolean,
21
+ default: false
20
22
  def new(provider)
21
23
  authorizer_klass = find_authorizer_klass(provider)
22
24
  credentials = load_credentials(provider: provider, credentials_source: options[:credentials])
@@ -30,19 +32,19 @@ module Chronicle
30
32
 
31
33
  cli_exit(message: "Authorization saved to '#{secrets_namespace}' secrets")
32
34
  rescue StandardError => e
33
- cli_fail(message: "Authorization not successful.\n" + e.message, exception: e)
35
+ cli_fail(message: "Authorization not successful.\n#{e.message}", exception: e)
34
36
  end
35
37
 
36
38
  private
37
39
 
38
40
  def find_authorizer_klass(provider)
39
41
  # TODO: this assumes provider:plugin one-to-one
40
- unless Chronicle::ETL::Registry::PluginRegistry.installed?(provider)
42
+ unless Chronicle::ETL::Registry::Plugins.installed?(provider)
41
43
  cli_fail(message: "Plugin for #{provider} is not installed.")
42
44
  end
43
45
 
44
46
  begin
45
- Chronicle::ETL::Registry::PluginRegistry.activate(provider)
47
+ Chronicle::ETL::Registry::Plugins.activate(provider)
46
48
  rescue PluginError => e
47
49
  cli_fail(message: "Could not load plugin '#{provider}'.\n" + e.message, exception: e)
48
50
  end
@@ -10,15 +10,15 @@ module Chronicle
10
10
  default_task 'list'
11
11
  namespace :connectors
12
12
 
13
- desc "list", "Lists available connectors"
13
+ desc 'list', 'Lists available connectors'
14
14
  # Display all available connectors that chronicle-etl has access to
15
15
  def list
16
- connector_info = Chronicle::ETL::Registry.connectors.map do |connector_registration|
16
+ connector_info = Chronicle::ETL::Registry::Connectors.connectors.map do |connector_registration|
17
17
  {
18
18
  identifier: connector_registration.identifier,
19
19
  phase: connector_registration.phase,
20
20
  description: connector_registration.descriptive_phrase,
21
- provider: connector_registration.provider,
21
+ source: connector_registration.source,
22
22
  core: connector_registration.built_in? ? '✓' : '',
23
23
  class: connector_registration.klass_name
24
24
  }
@@ -36,14 +36,14 @@ module Chronicle
36
36
  puts table.render(indent: 0, padding: [0, 2])
37
37
  end
38
38
 
39
- desc "show PHASE IDENTIFIER", "Show information about a connector"
39
+ desc 'show PHASE IDENTIFIER', 'Show information about a connector'
40
40
  def show(phase, identifier)
41
- unless ['extractor', 'transformer', 'loader'].include?(phase)
42
- cli_fail(message: "Phase argument must be one of: [extractor, transformer, loader]")
41
+ unless %w[extractor transformer loader].include?(phase)
42
+ cli_fail(message: 'Phase argument must be one of: [extractor, transformer, loader]')
43
43
  end
44
44
 
45
45
  begin
46
- connector = Chronicle::ETL::Registry.find_by_phase_and_identifier(phase.to_sym, identifier)
46
+ connector = Chronicle::ETL::Registry::Connectors.find_by_phase_and_identifier(phase.to_sym, identifier)
47
47
  rescue Chronicle::ETL::ConnectorNotAvailableError, Chronicle::ETL::PluginError => e
48
48
  cli_fail(message: "Could not find #{phase} #{identifier}", exception: e)
49
49
  end
@@ -51,9 +51,9 @@ module Chronicle
51
51
  puts connector.klass.to_s.bold
52
52
  puts " #{connector.descriptive_phrase}"
53
53
  puts
54
- puts "Settings:"
54
+ puts 'Settings:'
55
55
 
56
- headers = ['name', 'default', 'required'].map{ |h| h.to_s.upcase.bold }
56
+ headers = %w[name default required].map { |h| h.to_s.upcase.bold }
57
57
 
58
58
  settings = connector.klass.settings.map do |name, setting|
59
59
  [
@@ -1,4 +1,5 @@
1
- require 'pp'
1
+ # frozen_string_literal: true
2
+
2
3
  require 'tty-prompt'
3
4
 
4
5
  module Chronicle
@@ -6,43 +7,67 @@ module Chronicle
6
7
  module CLI
7
8
  # CLI commands for working with ETL jobs
8
9
  class Jobs < SubcommandBase
9
- default_task "start"
10
+ default_task 'start'
10
11
  namespace :jobs
11
12
 
12
- class_option :extractor, aliases: '-e', desc: "Extractor class. Default: stdin", banner: 'NAME'
13
+ class_option :extractor, aliases: '-e', desc: 'Extractor class. Default: stdin', banner: 'NAME'
13
14
  class_option :'extractor-opts', desc: 'Extractor options', type: :hash, default: {}
14
- class_option :transformer, aliases: '-t', desc: 'Transformer class. Default: null', banner: 'NAME'
15
- class_option :'transformer-opts', desc: 'Transformer options', type: :hash, default: {}
15
+ class_option :transformer,
16
+ aliases: '-t',
17
+ desc: 'Transformer identifier. Default: null',
18
+ banner: 'NAME',
19
+ type: 'array',
20
+ repeatable: true
16
21
  class_option :loader, aliases: '-l', desc: 'Loader class. Default: table', banner: 'NAME'
17
22
  class_option :'loader-opts', desc: 'Loader options', type: :hash, default: {}
18
23
 
19
24
  # This is an array to deal with shell globbing
20
- class_option :input, aliases: '-i', desc: 'Input filename or directory', default: [], type: 'array', banner: 'FILENAME'
21
- class_option :since, desc: "Load records SINCE this date (or fuzzy time duration)", banner: 'DATE'
22
- class_option :until, desc: "Load records UNTIL this date (or fuzzy time duration)", banner: 'DATE'
23
- class_option :limit, desc: "Only extract the first LIMIT records", banner: 'N'
25
+ class_option :input,
26
+ aliases: '-i',
27
+ desc: 'Input filename or directory',
28
+ default: [],
29
+ type: 'array',
30
+ banner: 'FILENAME'
31
+ class_option :since, desc: 'Load records SINCE this date (or fuzzy time duration)', banner: 'DATE'
32
+ class_option :until, desc: 'Load records UNTIL this date (or fuzzy time duration)', banner: 'DATE'
33
+ class_option :limit, desc: 'Only extract the first LIMIT records', banner: 'N'
34
+
35
+ class_option :schema,
36
+ desc: 'Which Schema to transform',
37
+ banner: 'SCHEMA_NAME',
38
+ type: 'string',
39
+ enum: %w[chronicle activitystream schemaorg chronobase]
40
+ class_option :format,
41
+ desc: 'How to serialize results',
42
+ banner: 'SCHEMA_NAME',
43
+ type: 'string',
44
+ enum: %w[jsonapi jsonld]
24
45
 
25
46
  class_option :output, aliases: '-o', desc: 'Output filename', type: 'string'
26
47
  class_option :fields, desc: 'Output only these fields', type: 'array', banner: 'field1 field2 ...'
48
+ class_option :'fields-limit', desc: 'Output first N fields', type: :numeric
49
+ class_option :filter, desc: 'Filter records', type: 'array', banner: 'field=value'
27
50
  class_option :header_row, desc: 'Output the header row of tabular output', type: 'boolean'
28
51
 
29
52
  # Thor doesn't like `run` as a command name
30
53
  map run: :start
31
- desc "run", "Start a job"
54
+ desc 'run', 'Start a job'
32
55
  option :dry_run, desc: 'Only run the extraction and transform steps, not the loading', type: :boolean
33
56
  long_desc <<-LONG_DESC
34
57
  This will run an ETL job. Each job needs three parts:
35
58
 
36
59
  1. #{'Extractor'.underline}: pulls data from an external source. By default, this is stdout. Other common options including pulling data from an API or reading JSON from a file.
37
60
 
38
- 2. #{'Transformer'.underline}: transforms data into a new format. If none is specified, we use the `null` transformer which does nothing to the data.
61
+ 2. #{'Transformers'.underline}: transform data into a new format. If none is specified, we use the `null` transformer which does nothing to the data.
39
62
 
40
63
  3. #{'Loader'.underline}: takes that transformed data and loads it externally. This can be an API, flat files, (or by default), stdout. With the --dry-run option, this step won't be run.
41
64
 
42
65
  If you do not want to use the command line flags, you can also configure a job with a .yml config file. You can either specify the path to this file or use the filename and place the file in ~/.config/chronicle/etl/jobs/NAME.yml and call it with `--job NAME`
43
- LONG_DESC
66
+ LONG_DESC
44
67
  # Run an ETL job
45
- def start(name = nil)
68
+ def start(*args)
69
+ name = args.first
70
+
46
71
  # If someone runs `$ chronicle-etl` with no arguments, show help menu.
47
72
  # TODO: decide if we should check that there's nothing in stdin pipe
48
73
  # in case user wants to actually run this sort of job stdin->null->stdout
@@ -52,7 +77,7 @@ LONG_DESC
52
77
  cli_exit
53
78
  end
54
79
 
55
- cli_fail(message: "Job '#{name}' does not exist") if name && !Chronicle::ETL::Config.exists?("jobs", name)
80
+ cli_fail(message: "Job '#{name}' does not exist") if name && !Chronicle::ETL::Config.exists?('jobs', name)
56
81
 
57
82
  job_definition = build_job_definition(name, options)
58
83
 
@@ -66,7 +91,7 @@ LONG_DESC
66
91
 
67
92
  run_job(job_definition)
68
93
  rescue Chronicle::ETL::JobDefinitionError => e
69
- message = ""
94
+ message = ''
70
95
  job_definition.errors.each_pair do |category, errors|
71
96
  message << "Problem with #{category}:\n - #{errors.map(&:to_s).join("\n - ")}"
72
97
  end
@@ -74,14 +99,14 @@ LONG_DESC
74
99
  end
75
100
 
76
101
  option :'skip-confirmation', aliases: '-y', type: :boolean
77
- desc "save", "Save a job"
102
+ desc 'save', 'Save a job'
78
103
  # Create an ETL job
79
104
  def save(name)
80
105
  write_config = true
81
106
  job_definition = build_job_definition(name, options)
82
107
  job_definition.validate!
83
108
 
84
- if Chronicle::ETL::Config.exists?("jobs", name) && !options[:'skip-confirmation']
109
+ if Chronicle::ETL::Config.exists?('jobs', name) && !options[:'skip-confirmation']
85
110
  prompt = TTY::Prompt.new
86
111
  write_config = false
87
112
  message = "Job '#{name}' exists already. Ovewrite it?"
@@ -92,34 +117,50 @@ LONG_DESC
92
117
  end
93
118
 
94
119
  if write_config
95
- Chronicle::ETL::Config.write("jobs", name, job_definition.definition)
120
+ Chronicle::ETL::Config.write('jobs', name, job_definition.definition)
96
121
  cli_exit(message: "Job saved. Run it with `$ chronicle-etl jobs:run #{name}`")
97
122
  else
98
123
  cli_fail(message: "\nJob not saved")
99
124
  end
100
125
  rescue Chronicle::ETL::JobDefinitionError => e
101
- cli_fail(message: "Job definition error", exception: e)
126
+ cli_fail(message: 'Job definition error', exception: e)
102
127
  end
103
128
 
104
- desc "show", "Show details about a job"
129
+ desc 'show', 'Show details about a job'
105
130
  # Show an ETL job
106
131
  def show(name = nil)
107
- cli_fail(message: "Job '#{name}' does not exist") if name && !Chronicle::ETL::Config.exists?("jobs", name)
132
+ cli_fail(message: "Job '#{name}' does not exist") if name && !Chronicle::ETL::Config.exists?('jobs', name)
108
133
 
109
134
  job_definition = build_job_definition(name, options)
110
135
  job_definition.validate!
111
136
  puts Chronicle::ETL::Job.new(job_definition)
112
137
  rescue Chronicle::ETL::JobDefinitionError => e
113
- cli_fail(message: "Job definition error", exception: e)
138
+ cli_fail(message: 'Job definition error', exception: e)
139
+ end
140
+
141
+ desc 'edit', 'Edit a job in default editor ($EDITOR)'
142
+ def edit(name = nil)
143
+ cli_fail(message: "Job '#{name}' does not exist") if name && !Chronicle::ETL::Config.exists?('jobs', name)
144
+
145
+ filename = Chronicle::ETL::Config.path('jobs', name)
146
+ system "${VISUAL:-${EDITOR:-vi}} \"#{filename}\""
147
+
148
+ definition = Chronicle::ETL::JobDefinition.new
149
+ definition.add_config(load_job_config(name))
150
+ definition.validate!
151
+
152
+ cli_exit(message: "Job '#{name}' saved")
153
+ rescue Chronicle::ETL::JobDefinitionError => e
154
+ cli_fail(message: 'Job definition error', exception: e)
114
155
  end
115
156
 
116
- desc "list", "List all available jobs"
157
+ desc 'list', 'List all available jobs'
117
158
  # List available ETL jobs
118
159
  def list
119
160
  jobs = Chronicle::ETL::Config.available_jobs
120
161
 
121
162
  job_details = jobs.map do |job|
122
- r = Chronicle::ETL::Config.load("jobs", job)
163
+ r = Chronicle::ETL::Config.load('jobs', job)
123
164
 
124
165
  extractor = r[:extractor][:name] if r[:extractor]
125
166
  transformer = r[:transformer][:name] if r[:transformer]
@@ -128,9 +169,9 @@ LONG_DESC
128
169
  [job, extractor, transformer, loader]
129
170
  end
130
171
 
131
- headers = ['name', 'extractor', 'transformer', 'loader'].map { |h| h.upcase.bold }
172
+ headers = %w[name extractor transformer loader].map { |h| h.upcase.bold }
132
173
 
133
- puts "Available jobs:"
174
+ puts 'Available jobs:'
134
175
  table = TTY::Table.new(headers, job_details)
135
176
  puts table.render(indent: 0, padding: [0, 2])
136
177
  rescue Chronicle::ETL::ConfigError => e
@@ -148,18 +189,19 @@ LONG_DESC
148
189
  runner = Chronicle::ETL::Runner.new(job)
149
190
  runner.run!
150
191
  rescue RunnerError => e
151
- cli_fail(message: "#{e.message}", exception: e)
192
+ cli_fail(message: e.message.to_s, exception: e)
152
193
  end
153
194
 
154
195
  # TODO: probably could merge this with something in cli/plugin
155
196
  def install_missing_plugins(missing_plugins)
156
197
  prompt = TTY::Prompt.new
157
198
  message = "Plugin#{'s' if missing_plugins.count > 1} specified by job not installed.\n"
158
- message += "Do you want to install "
159
- message += missing_plugins.map { |name| "chronicle-#{name}".bold}.join(", ")
160
- message += " and start the job?"
199
+ message += 'Do you want to install '
200
+ message += missing_plugins.map { |name| "chronicle-#{name}".bold }
201
+ .join(', ')
202
+ message += ' and start the job?'
161
203
  will_install = prompt.yes?(message)
162
- cli_fail(message: "Must install #{missing_plugins.join(", ")} plugin to run job") unless will_install
204
+ cli_fail(message: "Must install #{missing_plugins.join(', ')} plugin to run job") unless will_install
163
205
 
164
206
  Chronicle::ETL::CLI::Plugins.new.install(*missing_plugins)
165
207
  end
@@ -172,43 +214,78 @@ LONG_DESC
172
214
  definition
173
215
  end
174
216
 
175
- def load_job_config name
217
+ def load_job_config(name)
176
218
  Chronicle::ETL::Config.read_job(name)
177
219
  end
178
220
 
179
221
  # Takes flag options and turns them into a runner config
180
222
  # TODO: this needs a lot of refactoring
181
- def process_flag_options options
182
- extractor_options = options[:'extractor-opts'].transform_keys(&:to_sym).merge({
183
- input: (options[:input] if options[:input].any?),
184
- since: options[:since],
185
- until: options[:until],
186
- limit: options[:limit]
187
- }.compact)
188
-
189
- transformer_options = options[:'transformer-opts'].transform_keys(&:to_sym)
190
-
191
- loader_options = options[:'loader-opts'].transform_keys(&:to_sym).merge({
192
- output: options[:output],
193
- header_row: options[:header_row],
194
- fields: options[:fields]
195
- }.compact)
196
-
197
- {
223
+ def process_flag_options(options)
224
+ extractor_options = options[:'extractor-opts'].transform_keys(&:to_sym).merge(
225
+ {
226
+ input: (options[:input] if options[:input].any?),
227
+ since: options[:since],
228
+ until: options[:until],
229
+ limit: options[:limit]
230
+ }.compact
231
+ )
232
+
233
+ loader_options = options[:'loader-opts'].transform_keys(&:to_sym).merge(
234
+ {
235
+ output: options[:output],
236
+ header_row: options[:header_row]
237
+ }.compact
238
+ )
239
+
240
+ processed_options = {
198
241
  dry_run: options[:dry_run],
199
242
  extractor: {
200
243
  name: options[:extractor],
201
244
  options: extractor_options
202
245
  }.compact,
203
- transformer: {
204
- name: options[:transformer],
205
- options: transformer_options
206
- }.compact,
207
246
  loader: {
208
247
  name: options[:loader],
209
248
  options: loader_options
210
249
  }.compact
211
250
  }
251
+
252
+ add_transformer(processed_options, 'chronicle') if options[:schema]
253
+ add_transformer(processed_options, options[:schema]) if options[:schema] && options[:schema] != 'chronicle'
254
+ add_transformers_from_option(processed_options, options[:transformer]) if options[:transformer]&.any?
255
+ if options[:filter]
256
+ add_transformer(processed_options, :filter, { filters: options[:filter].to_h do |f|
257
+ f.split('=')
258
+ end })
259
+ end
260
+ add_transformer(processed_options, :format, { format: options[:format] }) if options[:format]
261
+ add_transformer(processed_options, :filter_fields, { fields: options[:fields] }) if options[:fields]
262
+ if options[:'fields-limit']
263
+ add_transformer(processed_options, :fields_limit,
264
+ { limit: options[:'fields-limit'] })
265
+ end
266
+
267
+ processed_options
268
+ end
269
+
270
+ def add_transformer(processed_options, name, options = {})
271
+ processed_options[:transformers] ||= []
272
+ processed_options[:transformers] << { name:, options: }
273
+ end
274
+
275
+ def add_transformers_from_option(processed_options, transformer_option)
276
+ processed_options[:transformers] ||= []
277
+ processed_options[:transformers] += transformer_option.map do |transformer_args|
278
+ transformer_name, *transformer_options = transformer_args
279
+ transformer_options = transformer_options.filter { |opt| opt.include?('=') }
280
+
281
+ {
282
+ name: transformer_name,
283
+ options: transformer_options.to_h do |opt|
284
+ key, value = opt.split('=')
285
+ [key.to_sym, value]
286
+ end
287
+ }
288
+ end
212
289
  end
213
290
  end
214
291
  end
@@ -13,7 +13,7 @@ module Chronicle
13
13
  class_option :silent, desc: 'Silence all output', type: :boolean
14
14
  class_option :'no-color', desc: 'Disable colour output', type: :boolean
15
15
 
16
- default_task "jobs"
16
+ default_task 'jobs'
17
17
 
18
18
  desc 'connectors:COMMAND', 'Connectors available for ETL jobs', hide: true
19
19
  subcommand 'connectors', Connectors
@@ -45,49 +45,49 @@ module Chronicle
45
45
  true
46
46
  end
47
47
 
48
- desc "version", "Show version"
49
- map %w(--version -v) => :version
48
+ desc 'version', 'Show version'
49
+ map %w[--version -v] => :version
50
50
  def version
51
51
  shell.say "chronicle-etl #{Chronicle::ETL::VERSION}"
52
52
  end
53
53
 
54
54
  # Displays help options for chronicle-etl
55
- def help(meth = nil, subcommand = false)
55
+ def help(meth = nil, _subcommand = false)
56
56
  if meth && !respond_to?(meth)
57
57
  klass, task = ::Thor::Util.find_class_and_task_by_namespace("#{meth}:#{meth}")
58
- klass.start(['-h', task].compact, shell: shell)
58
+ klass.start(['-h', task].compact, shell:)
59
59
  else
60
- shell.say "ABOUT:".bold
60
+ shell.say 'ABOUT:'.bold
61
61
  shell.say " #{'chronicle-etl'.italic} is a toolkit for extracting and working with your digital"
62
- shell.say " history. 📜"
62
+ shell.say ' history. 📜'
63
63
  shell.say
64
64
  shell.say " A job #{'extracts'.underline} personal data from a source, #{'transforms'.underline} it (Chronicle"
65
65
  shell.say " Schema or preserves raw data), and then #{'loads'.underline} it to a destination. Use"
66
- shell.say " built-in extractors (json, csv, stdin) and loaders (csv, json, table,"
67
- shell.say " rest) or use plugins to connect to third-party services."
66
+ shell.say ' built-in extractors (json, csv, stdin) and loaders (csv, json, table,'
67
+ shell.say ' rest) or use plugins to connect to third-party services.'
68
68
  shell.say
69
- shell.say " Plugins: https://github.com/chronicle-app/chronicle-etl#currently-available"
69
+ shell.say ' Plugins: https://github.com/chronicle-app/chronicle-etl#currently-available'
70
70
  shell.say
71
- shell.say "USAGE:".bold
72
- shell.say " # Basic job usage:".italic.light_black
73
- shell.say " $ chronicle-etl --extractor NAME --transformer NAME --loader NAME"
71
+ shell.say 'USAGE:'.bold
72
+ shell.say ' # Basic job usage:'.italic.light_black
73
+ shell.say ' $ chronicle-etl --extractor NAME --transformer NAME --loader NAME'
74
74
  shell.say
75
- shell.say " # Read test.csv and display it to stdout as a table:".italic.light_black
76
- shell.say " $ chronicle-etl --extractor csv --input data.csv --loader table"
75
+ shell.say ' # Read test.csv and display it to stdout as a table:'.italic.light_black
76
+ shell.say ' $ chronicle-etl --extractor csv --input data.csv --loader table'
77
77
  shell.say
78
- shell.say " # Show available plugins:".italic.light_black
79
- shell.say " $ chronicle-etl plugins:list"
78
+ shell.say ' # Show available plugins:'.italic.light_black
79
+ shell.say ' $ chronicle-etl plugins:list'
80
80
  shell.say
81
- shell.say " # Save an access token as a secret and use it in a job:".italic.light_black
82
- shell.say " $ chronicle-etl secrets:set pinboard access_token username:foo123"
83
- shell.say " $ chronicle-etl secrets:list"
84
- shell.say " $ chronicle-etl -e pinboard --since 1mo"
81
+ shell.say ' # Save an access token as a secret and use it in a job:'.italic.light_black
82
+ shell.say ' $ chronicle-etl secrets:set pinboard access_token username:foo123'
83
+ shell.say ' $ chronicle-etl secrets:list'
84
+ shell.say ' $ chronicle-etl -e pinboard --since 1mo'
85
85
  shell.say
86
- shell.say " # Show full job options:".italic.light_black
87
- shell.say " $ chronicle-etl jobs help run"
86
+ shell.say ' # Show full job options:'.italic.light_black
87
+ shell.say ' $ chronicle-etl jobs help run'
88
88
  shell.say
89
- shell.say "FULL DOCUMENTATION:".bold
90
- shell.say " https://github.com/chronicle-app/chronicle-etl".blue
89
+ shell.say 'FULL DOCUMENTATION:'.bold
90
+ shell.say ' https://github.com/chronicle-app/chronicle-etl'.blue
91
91
  shell.say
92
92
 
93
93
  list = []
@@ -95,17 +95,17 @@ module Chronicle
95
95
  list += thor_class.printable_tasks(false)
96
96
  end
97
97
  list.sort! { |a, b| a[0] <=> b[0] }
98
- list.unshift ["help", "# This help menu"]
98
+ list.unshift ['help', '# This help menu']
99
99
 
100
100
  shell.say
101
101
  shell.say 'ALL COMMANDS:'.bold
102
102
  shell.print_table(list, indent: 2, truncate: true)
103
103
  shell.say
104
- shell.say "VERSION:".bold
104
+ shell.say 'VERSION:'.bold
105
105
  shell.say " #{Chronicle::ETL::VERSION}"
106
106
  shell.say
107
- shell.say " Display current version:".italic.light_black
108
- shell.say " $ chronicle-etl --version"
107
+ shell.say ' Display current version:'.italic.light_black
108
+ shell.say ' $ chronicle-etl --version'
109
109
  end
110
110
  end
111
111
 
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "tty-prompt"
4
- require "tty-spinner"
3
+ require 'tty-prompt'
4
+ require 'tty-spinner'
5
5
 
6
6
  module Chronicle
7
7
  module ETL
@@ -11,60 +11,63 @@ module Chronicle
11
11
  default_task 'list'
12
12
  namespace :plugins
13
13
 
14
- desc "install", "Install a plugin"
14
+ desc 'install', 'Install a plugin'
15
15
  def install(*plugins)
16
- cli_fail(message: "Please specify a plugin to install") unless plugins.any?
16
+ cli_fail(message: 'Please specify a plugin to install') unless plugins.any?
17
17
 
18
18
  installed, not_installed = plugins.partition do |plugin|
19
- Chronicle::ETL::Registry::PluginRegistry.installed?(plugin)
19
+ Chronicle::ETL::Registry::Plugins.installed?(plugin)
20
20
  end
21
21
 
22
- puts "Already installed: #{installed.join(", ")}" if installed.any?
22
+ puts "Already installed: #{installed.join(', ')}" if installed.any?
23
23
  cli_exit unless not_installed.any?
24
24
 
25
- spinner = TTY::Spinner.new("[:spinner] Installing #{not_installed.join(", ")}...", format: :dots_2)
25
+ spinner = TTY::Spinner.new("[:spinner] Installing #{not_installed.join(', ')}...", format: :dots_2)
26
26
  spinner.auto_spin
27
27
 
28
28
  not_installed.each do |plugin|
29
29
  spinner.update(title: "Installing #{plugin}")
30
- Chronicle::ETL::Registry::PluginRegistry.install(plugin)
31
-
30
+ Chronicle::ETL::Registry::Plugins.install(plugin)
32
31
  rescue Chronicle::ETL::PluginError => e
33
- spinner.error("Error".red)
32
+ spinner.error('Error'.red)
34
33
  cli_fail(message: "Plugin '#{plugin}' could not be installed", exception: e)
35
34
  end
36
35
 
37
36
  spinner.success("(#{'successful'.green})")
38
37
  end
39
38
 
40
- desc "uninstall", "Unintall a plugin"
39
+ desc 'uninstall', 'Unintall a plugin'
41
40
  def uninstall(name)
42
41
  spinner = TTY::Spinner.new("[:spinner] Uninstalling plugin #{name}...", format: :dots_2)
43
42
  spinner.auto_spin
44
- Chronicle::ETL::Registry::PluginRegistry.uninstall(name)
43
+ Chronicle::ETL::Registry::Plugins.uninstall(name)
45
44
  spinner.success("(#{'successful'.green})")
46
45
  rescue Chronicle::ETL::PluginError => e
47
- spinner.error("Error".red)
46
+ spinner.error('Error'.red)
48
47
  cli_fail(message: "Plugin '#{name}' could not be uninstalled (was it installed?)", exception: e)
49
48
  end
50
49
 
51
- desc "list", "Lists available plugins"
50
+ desc 'list', 'Lists available plugins'
52
51
  # Display all available plugins that chronicle-etl has access to
53
52
  def list
54
- plugins = Chronicle::ETL::Registry::PluginRegistry.all_installed_latest
55
-
56
- info = plugins.map do |plugin|
57
- {
58
- name: plugin.name.sub("chronicle-", ""),
59
- description: plugin.description,
60
- version: plugin.version
61
- }
53
+ values = Chronicle::ETL::Registry::Plugins.all
54
+ .map do |plugin|
55
+ [
56
+ plugin.name,
57
+ plugin.description,
58
+ plugin.installed ? '✓' : '',
59
+ plugin.version
60
+ ]
62
61
  end
63
62
 
64
- headers = ['name', 'description', 'version'].map{ |h| h.to_s.upcase.bold }
65
- table = TTY::Table.new(headers, info.map(&:values))
66
- puts "Installed plugins:"
67
- puts table.render(indent: 2, padding: [0, 0])
63
+ headers = %w[name description installed version].map { |h| h.to_s.upcase.bold }
64
+ table = TTY::Table.new(headers, values)
65
+ puts 'Available plugins:'
66
+ puts table.render(
67
+ indent: 2,
68
+ padding: [0, 0],
69
+ alignments: %i[left left center left]
70
+ )
68
71
  end
69
72
  end
70
73
  end