chronicle-etl 0.5.4 → 0.6.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +15 -25
- data/.rubocop.yml +2 -44
- data/Gemfile +2 -2
- data/Guardfile +3 -3
- data/README.md +98 -73
- data/Rakefile +2 -2
- data/bin/console +4 -5
- data/chronicle-etl.gemspec +50 -45
- data/exe/chronicle-etl +1 -1
- data/lib/chronicle/etl/authorizer.rb +3 -4
- data/lib/chronicle/etl/cli/authorizations.rb +10 -8
- data/lib/chronicle/etl/cli/connectors.rb +9 -9
- data/lib/chronicle/etl/cli/jobs.rb +130 -53
- data/lib/chronicle/etl/cli/main.rb +29 -29
- data/lib/chronicle/etl/cli/plugins.rb +29 -26
- data/lib/chronicle/etl/cli/secrets.rb +14 -12
- data/lib/chronicle/etl/cli/subcommand_base.rb +5 -3
- data/lib/chronicle/etl/config.rb +20 -7
- data/lib/chronicle/etl/configurable.rb +24 -9
- data/lib/chronicle/etl/exceptions.rb +3 -3
- data/lib/chronicle/etl/extraction.rb +12 -2
- data/lib/chronicle/etl/extractors/csv_extractor.rb +9 -0
- data/lib/chronicle/etl/extractors/extractor.rb +15 -2
- data/lib/chronicle/etl/extractors/file_extractor.rb +5 -3
- data/lib/chronicle/etl/extractors/helpers/input_reader.rb +2 -2
- data/lib/chronicle/etl/extractors/json_extractor.rb +14 -4
- data/lib/chronicle/etl/extractors/stdin_extractor.rb +3 -0
- data/lib/chronicle/etl/job.rb +35 -17
- data/lib/chronicle/etl/job_definition.rb +39 -27
- data/lib/chronicle/etl/job_log.rb +14 -16
- data/lib/chronicle/etl/job_logger.rb +4 -4
- data/lib/chronicle/etl/loaders/csv_loader.rb +17 -4
- data/lib/chronicle/etl/loaders/helpers/stdout_helper.rb +4 -0
- data/lib/chronicle/etl/loaders/json_loader.rb +30 -10
- data/lib/chronicle/etl/loaders/loader.rb +0 -17
- data/lib/chronicle/etl/loaders/rest_loader.rb +7 -7
- data/lib/chronicle/etl/loaders/table_loader.rb +37 -12
- data/lib/chronicle/etl/logger.rb +3 -3
- data/lib/chronicle/etl/oauth_authorizer.rb +8 -10
- data/lib/chronicle/etl/record.rb +15 -0
- data/lib/chronicle/etl/registry/connector_registration.rb +15 -23
- data/lib/chronicle/etl/registry/connectors.rb +117 -0
- data/lib/chronicle/etl/registry/plugin_registration.rb +19 -0
- data/lib/chronicle/etl/registry/plugins.rb +171 -0
- data/lib/chronicle/etl/registry/registry.rb +3 -52
- data/lib/chronicle/etl/registry/self_registering.rb +1 -1
- data/lib/chronicle/etl/runner.rb +158 -128
- data/lib/chronicle/etl/secrets.rb +5 -5
- data/lib/chronicle/etl/transformers/buffer_transformer.rb +29 -0
- data/lib/chronicle/etl/transformers/chronicle_transformer.rb +32 -0
- data/lib/chronicle/etl/transformers/chronobase_transformer.rb +100 -0
- data/lib/chronicle/etl/transformers/fields_limit_transformer.rb +23 -0
- data/lib/chronicle/etl/transformers/filter_fields_transformer.rb +60 -0
- data/lib/chronicle/etl/transformers/filter_transformer.rb +30 -0
- data/lib/chronicle/etl/transformers/format_transformer.rb +32 -0
- data/lib/chronicle/etl/transformers/merge_meta_transformer.rb +19 -0
- data/lib/chronicle/etl/transformers/multiply_transformer.rb +21 -0
- data/lib/chronicle/etl/transformers/null_transformer.rb +5 -7
- data/lib/chronicle/etl/transformers/sampler_transformer.rb +21 -0
- data/lib/chronicle/etl/transformers/sort_transformer.rb +31 -0
- data/lib/chronicle/etl/transformers/transformer.rb +63 -41
- data/lib/chronicle/etl/utils/binary_attachments.rb +1 -1
- data/lib/chronicle/etl/utils/progress_bar.rb +2 -3
- data/lib/chronicle/etl/version.rb +1 -1
- data/lib/chronicle/etl.rb +6 -8
- metadata +91 -45
- data/lib/chronicle/etl/models/activity.rb +0 -15
- data/lib/chronicle/etl/models/attachment.rb +0 -14
- data/lib/chronicle/etl/models/base.rb +0 -122
- data/lib/chronicle/etl/models/entity.rb +0 -29
- data/lib/chronicle/etl/models/raw.rb +0 -26
- data/lib/chronicle/etl/registry/plugin_registry.rb +0 -95
- data/lib/chronicle/etl/serializers/jsonapi_serializer.rb +0 -31
- data/lib/chronicle/etl/serializers/raw_serializer.rb +0 -10
- data/lib/chronicle/etl/serializers/serializer.rb +0 -28
- data/lib/chronicle/etl/transformers/image_file_transformer.rb +0 -247
- data/lib/chronicle/etl/utils/hash_utilities.rb +0 -19
- data/lib/chronicle/etl/utils/text_recognition.rb +0 -15
@@ -7,7 +7,7 @@ module Chronicle
|
|
7
7
|
|
8
8
|
# Macro for setting provider on an Authorizer
|
9
9
|
def provider(provider_name)
|
10
|
-
@provider_name = provider_name
|
10
|
+
@provider_name = provider_name.to_sym
|
11
11
|
end
|
12
12
|
|
13
13
|
# From all loaded Authorizers, return the first one that matches
|
@@ -16,15 +16,14 @@ module Chronicle
|
|
16
16
|
# @todo Have a proper identifier system for authorizers
|
17
17
|
# (to have more than one per plugin)
|
18
18
|
def find_by_provider(provider)
|
19
|
-
ObjectSpace.each_object(::Class).select {|klass| klass < self }.find do |authorizer|
|
19
|
+
ObjectSpace.each_object(::Class).select { |klass| klass < self }.find do |authorizer|
|
20
20
|
authorizer.provider_name == provider
|
21
21
|
end
|
22
22
|
end
|
23
23
|
end
|
24
24
|
|
25
25
|
# Construct a new authorizer
|
26
|
-
def initialize(args)
|
27
|
-
end
|
26
|
+
def initialize(args); end
|
28
27
|
|
29
28
|
# Main entry-point for authorization flows. Implemented by subclass
|
30
29
|
def authorize!
|
@@ -2,7 +2,6 @@
|
|
2
2
|
|
3
3
|
require 'sinatra'
|
4
4
|
require 'launchy'
|
5
|
-
require 'pp'
|
6
5
|
|
7
6
|
module Chronicle
|
8
7
|
module ETL
|
@@ -12,11 +11,14 @@ module Chronicle
|
|
12
11
|
default_task 'new'
|
13
12
|
namespace :authorizations
|
14
13
|
|
15
|
-
desc
|
14
|
+
desc 'authorize', 'Authorize with a third-party provider'
|
16
15
|
option :port, desc: 'Port to run authorization server on', type: :numeric, default: 4567
|
17
|
-
option :credentials, desc: 'Secrets namespace for where to read credentials from (default: PROVIDER)',
|
18
|
-
|
19
|
-
option :
|
16
|
+
option :credentials, desc: 'Secrets namespace for where to read credentials from (default: PROVIDER)',
|
17
|
+
type: :string, banner: 'NAMESPACE'
|
18
|
+
option :secrets, desc: 'Secrets namespace for where authorization should be saved to (default: PROVIDER)',
|
19
|
+
type: :string, banner: 'NAMESPACE'
|
20
|
+
option :print, desc: 'Show authorization results (instead of just saving secrets)', type: :boolean,
|
21
|
+
default: false
|
20
22
|
def new(provider)
|
21
23
|
authorizer_klass = find_authorizer_klass(provider)
|
22
24
|
credentials = load_credentials(provider: provider, credentials_source: options[:credentials])
|
@@ -30,19 +32,19 @@ module Chronicle
|
|
30
32
|
|
31
33
|
cli_exit(message: "Authorization saved to '#{secrets_namespace}' secrets")
|
32
34
|
rescue StandardError => e
|
33
|
-
cli_fail(message: "Authorization not successful.\n
|
35
|
+
cli_fail(message: "Authorization not successful.\n#{e.message}", exception: e)
|
34
36
|
end
|
35
37
|
|
36
38
|
private
|
37
39
|
|
38
40
|
def find_authorizer_klass(provider)
|
39
41
|
# TODO: this assumes provider:plugin one-to-one
|
40
|
-
unless Chronicle::ETL::Registry::
|
42
|
+
unless Chronicle::ETL::Registry::Plugins.installed?(provider)
|
41
43
|
cli_fail(message: "Plugin for #{provider} is not installed.")
|
42
44
|
end
|
43
45
|
|
44
46
|
begin
|
45
|
-
Chronicle::ETL::Registry::
|
47
|
+
Chronicle::ETL::Registry::Plugins.activate(provider)
|
46
48
|
rescue PluginError => e
|
47
49
|
cli_fail(message: "Could not load plugin '#{provider}'.\n" + e.message, exception: e)
|
48
50
|
end
|
@@ -10,15 +10,15 @@ module Chronicle
|
|
10
10
|
default_task 'list'
|
11
11
|
namespace :connectors
|
12
12
|
|
13
|
-
desc
|
13
|
+
desc 'list', 'Lists available connectors'
|
14
14
|
# Display all available connectors that chronicle-etl has access to
|
15
15
|
def list
|
16
|
-
connector_info = Chronicle::ETL::Registry.connectors.map do |connector_registration|
|
16
|
+
connector_info = Chronicle::ETL::Registry::Connectors.connectors.map do |connector_registration|
|
17
17
|
{
|
18
18
|
identifier: connector_registration.identifier,
|
19
19
|
phase: connector_registration.phase,
|
20
20
|
description: connector_registration.descriptive_phrase,
|
21
|
-
|
21
|
+
source: connector_registration.source,
|
22
22
|
core: connector_registration.built_in? ? '✓' : '',
|
23
23
|
class: connector_registration.klass_name
|
24
24
|
}
|
@@ -36,14 +36,14 @@ module Chronicle
|
|
36
36
|
puts table.render(indent: 0, padding: [0, 2])
|
37
37
|
end
|
38
38
|
|
39
|
-
desc
|
39
|
+
desc 'show PHASE IDENTIFIER', 'Show information about a connector'
|
40
40
|
def show(phase, identifier)
|
41
|
-
unless [
|
42
|
-
cli_fail(message:
|
41
|
+
unless %w[extractor transformer loader].include?(phase)
|
42
|
+
cli_fail(message: 'Phase argument must be one of: [extractor, transformer, loader]')
|
43
43
|
end
|
44
44
|
|
45
45
|
begin
|
46
|
-
connector = Chronicle::ETL::Registry.find_by_phase_and_identifier(phase.to_sym, identifier)
|
46
|
+
connector = Chronicle::ETL::Registry::Connectors.find_by_phase_and_identifier(phase.to_sym, identifier)
|
47
47
|
rescue Chronicle::ETL::ConnectorNotAvailableError, Chronicle::ETL::PluginError => e
|
48
48
|
cli_fail(message: "Could not find #{phase} #{identifier}", exception: e)
|
49
49
|
end
|
@@ -51,9 +51,9 @@ module Chronicle
|
|
51
51
|
puts connector.klass.to_s.bold
|
52
52
|
puts " #{connector.descriptive_phrase}"
|
53
53
|
puts
|
54
|
-
puts
|
54
|
+
puts 'Settings:'
|
55
55
|
|
56
|
-
headers = [
|
56
|
+
headers = %w[name default required].map { |h| h.to_s.upcase.bold }
|
57
57
|
|
58
58
|
settings = connector.klass.settings.map do |name, setting|
|
59
59
|
[
|
@@ -1,4 +1,5 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
2
3
|
require 'tty-prompt'
|
3
4
|
|
4
5
|
module Chronicle
|
@@ -6,43 +7,67 @@ module Chronicle
|
|
6
7
|
module CLI
|
7
8
|
# CLI commands for working with ETL jobs
|
8
9
|
class Jobs < SubcommandBase
|
9
|
-
default_task
|
10
|
+
default_task 'start'
|
10
11
|
namespace :jobs
|
11
12
|
|
12
|
-
class_option :extractor, aliases: '-e', desc:
|
13
|
+
class_option :extractor, aliases: '-e', desc: 'Extractor class. Default: stdin', banner: 'NAME'
|
13
14
|
class_option :'extractor-opts', desc: 'Extractor options', type: :hash, default: {}
|
14
|
-
class_option :transformer,
|
15
|
-
|
15
|
+
class_option :transformer,
|
16
|
+
aliases: '-t',
|
17
|
+
desc: 'Transformer identifier. Default: null',
|
18
|
+
banner: 'NAME',
|
19
|
+
type: 'array',
|
20
|
+
repeatable: true
|
16
21
|
class_option :loader, aliases: '-l', desc: 'Loader class. Default: table', banner: 'NAME'
|
17
22
|
class_option :'loader-opts', desc: 'Loader options', type: :hash, default: {}
|
18
23
|
|
19
24
|
# This is an array to deal with shell globbing
|
20
|
-
class_option :input,
|
21
|
-
|
22
|
-
|
23
|
-
|
25
|
+
class_option :input,
|
26
|
+
aliases: '-i',
|
27
|
+
desc: 'Input filename or directory',
|
28
|
+
default: [],
|
29
|
+
type: 'array',
|
30
|
+
banner: 'FILENAME'
|
31
|
+
class_option :since, desc: 'Load records SINCE this date (or fuzzy time duration)', banner: 'DATE'
|
32
|
+
class_option :until, desc: 'Load records UNTIL this date (or fuzzy time duration)', banner: 'DATE'
|
33
|
+
class_option :limit, desc: 'Only extract the first LIMIT records', banner: 'N'
|
34
|
+
|
35
|
+
class_option :schema,
|
36
|
+
desc: 'Which Schema to transform',
|
37
|
+
banner: 'SCHEMA_NAME',
|
38
|
+
type: 'string',
|
39
|
+
enum: %w[chronicle activitystream schemaorg chronobase]
|
40
|
+
class_option :format,
|
41
|
+
desc: 'How to serialize results',
|
42
|
+
banner: 'SCHEMA_NAME',
|
43
|
+
type: 'string',
|
44
|
+
enum: %w[jsonapi jsonld]
|
24
45
|
|
25
46
|
class_option :output, aliases: '-o', desc: 'Output filename', type: 'string'
|
26
47
|
class_option :fields, desc: 'Output only these fields', type: 'array', banner: 'field1 field2 ...'
|
48
|
+
class_option :'fields-limit', desc: 'Output first N fields', type: :numeric
|
49
|
+
class_option :filter, desc: 'Filter records', type: 'array', banner: 'field=value'
|
27
50
|
class_option :header_row, desc: 'Output the header row of tabular output', type: 'boolean'
|
28
51
|
|
29
52
|
# Thor doesn't like `run` as a command name
|
30
53
|
map run: :start
|
31
|
-
desc
|
54
|
+
desc 'run', 'Start a job'
|
32
55
|
option :dry_run, desc: 'Only run the extraction and transform steps, not the loading', type: :boolean
|
33
56
|
long_desc <<-LONG_DESC
|
34
57
|
This will run an ETL job. Each job needs three parts:
|
35
58
|
|
36
59
|
1. #{'Extractor'.underline}: pulls data from an external source. By default, this is stdout. Other common options including pulling data from an API or reading JSON from a file.
|
37
60
|
|
38
|
-
2. #{'
|
61
|
+
2. #{'Transformers'.underline}: transform data into a new format. If none is specified, we use the `null` transformer which does nothing to the data.
|
39
62
|
|
40
63
|
3. #{'Loader'.underline}: takes that transformed data and loads it externally. This can be an API, flat files, (or by default), stdout. With the --dry-run option, this step won't be run.
|
41
64
|
|
42
65
|
If you do not want to use the command line flags, you can also configure a job with a .yml config file. You can either specify the path to this file or use the filename and place the file in ~/.config/chronicle/etl/jobs/NAME.yml and call it with `--job NAME`
|
43
|
-
LONG_DESC
|
66
|
+
LONG_DESC
|
44
67
|
# Run an ETL job
|
45
|
-
def start(
|
68
|
+
def start(*args)
|
69
|
+
name = args.first
|
70
|
+
|
46
71
|
# If someone runs `$ chronicle-etl` with no arguments, show help menu.
|
47
72
|
# TODO: decide if we should check that there's nothing in stdin pipe
|
48
73
|
# in case user wants to actually run this sort of job stdin->null->stdout
|
@@ -52,7 +77,7 @@ LONG_DESC
|
|
52
77
|
cli_exit
|
53
78
|
end
|
54
79
|
|
55
|
-
cli_fail(message: "Job '#{name}' does not exist") if name && !Chronicle::ETL::Config.exists?(
|
80
|
+
cli_fail(message: "Job '#{name}' does not exist") if name && !Chronicle::ETL::Config.exists?('jobs', name)
|
56
81
|
|
57
82
|
job_definition = build_job_definition(name, options)
|
58
83
|
|
@@ -66,7 +91,7 @@ LONG_DESC
|
|
66
91
|
|
67
92
|
run_job(job_definition)
|
68
93
|
rescue Chronicle::ETL::JobDefinitionError => e
|
69
|
-
message =
|
94
|
+
message = ''
|
70
95
|
job_definition.errors.each_pair do |category, errors|
|
71
96
|
message << "Problem with #{category}:\n - #{errors.map(&:to_s).join("\n - ")}"
|
72
97
|
end
|
@@ -74,14 +99,14 @@ LONG_DESC
|
|
74
99
|
end
|
75
100
|
|
76
101
|
option :'skip-confirmation', aliases: '-y', type: :boolean
|
77
|
-
desc
|
102
|
+
desc 'save', 'Save a job'
|
78
103
|
# Create an ETL job
|
79
104
|
def save(name)
|
80
105
|
write_config = true
|
81
106
|
job_definition = build_job_definition(name, options)
|
82
107
|
job_definition.validate!
|
83
108
|
|
84
|
-
if Chronicle::ETL::Config.exists?(
|
109
|
+
if Chronicle::ETL::Config.exists?('jobs', name) && !options[:'skip-confirmation']
|
85
110
|
prompt = TTY::Prompt.new
|
86
111
|
write_config = false
|
87
112
|
message = "Job '#{name}' exists already. Ovewrite it?"
|
@@ -92,34 +117,50 @@ LONG_DESC
|
|
92
117
|
end
|
93
118
|
|
94
119
|
if write_config
|
95
|
-
Chronicle::ETL::Config.write(
|
120
|
+
Chronicle::ETL::Config.write('jobs', name, job_definition.definition)
|
96
121
|
cli_exit(message: "Job saved. Run it with `$ chronicle-etl jobs:run #{name}`")
|
97
122
|
else
|
98
123
|
cli_fail(message: "\nJob not saved")
|
99
124
|
end
|
100
125
|
rescue Chronicle::ETL::JobDefinitionError => e
|
101
|
-
cli_fail(message:
|
126
|
+
cli_fail(message: 'Job definition error', exception: e)
|
102
127
|
end
|
103
128
|
|
104
|
-
desc
|
129
|
+
desc 'show', 'Show details about a job'
|
105
130
|
# Show an ETL job
|
106
131
|
def show(name = nil)
|
107
|
-
cli_fail(message: "Job '#{name}' does not exist") if name && !Chronicle::ETL::Config.exists?(
|
132
|
+
cli_fail(message: "Job '#{name}' does not exist") if name && !Chronicle::ETL::Config.exists?('jobs', name)
|
108
133
|
|
109
134
|
job_definition = build_job_definition(name, options)
|
110
135
|
job_definition.validate!
|
111
136
|
puts Chronicle::ETL::Job.new(job_definition)
|
112
137
|
rescue Chronicle::ETL::JobDefinitionError => e
|
113
|
-
cli_fail(message:
|
138
|
+
cli_fail(message: 'Job definition error', exception: e)
|
139
|
+
end
|
140
|
+
|
141
|
+
desc 'edit', 'Edit a job in default editor ($EDITOR)'
|
142
|
+
def edit(name = nil)
|
143
|
+
cli_fail(message: "Job '#{name}' does not exist") if name && !Chronicle::ETL::Config.exists?('jobs', name)
|
144
|
+
|
145
|
+
filename = Chronicle::ETL::Config.path('jobs', name)
|
146
|
+
system "${VISUAL:-${EDITOR:-vi}} \"#{filename}\""
|
147
|
+
|
148
|
+
definition = Chronicle::ETL::JobDefinition.new
|
149
|
+
definition.add_config(load_job_config(name))
|
150
|
+
definition.validate!
|
151
|
+
|
152
|
+
cli_exit(message: "Job '#{name}' saved")
|
153
|
+
rescue Chronicle::ETL::JobDefinitionError => e
|
154
|
+
cli_fail(message: 'Job definition error', exception: e)
|
114
155
|
end
|
115
156
|
|
116
|
-
desc
|
157
|
+
desc 'list', 'List all available jobs'
|
117
158
|
# List available ETL jobs
|
118
159
|
def list
|
119
160
|
jobs = Chronicle::ETL::Config.available_jobs
|
120
161
|
|
121
162
|
job_details = jobs.map do |job|
|
122
|
-
r = Chronicle::ETL::Config.load(
|
163
|
+
r = Chronicle::ETL::Config.load('jobs', job)
|
123
164
|
|
124
165
|
extractor = r[:extractor][:name] if r[:extractor]
|
125
166
|
transformer = r[:transformer][:name] if r[:transformer]
|
@@ -128,9 +169,9 @@ LONG_DESC
|
|
128
169
|
[job, extractor, transformer, loader]
|
129
170
|
end
|
130
171
|
|
131
|
-
headers = [
|
172
|
+
headers = %w[name extractor transformer loader].map { |h| h.upcase.bold }
|
132
173
|
|
133
|
-
puts
|
174
|
+
puts 'Available jobs:'
|
134
175
|
table = TTY::Table.new(headers, job_details)
|
135
176
|
puts table.render(indent: 0, padding: [0, 2])
|
136
177
|
rescue Chronicle::ETL::ConfigError => e
|
@@ -148,18 +189,19 @@ LONG_DESC
|
|
148
189
|
runner = Chronicle::ETL::Runner.new(job)
|
149
190
|
runner.run!
|
150
191
|
rescue RunnerError => e
|
151
|
-
cli_fail(message:
|
192
|
+
cli_fail(message: e.message.to_s, exception: e)
|
152
193
|
end
|
153
194
|
|
154
195
|
# TODO: probably could merge this with something in cli/plugin
|
155
196
|
def install_missing_plugins(missing_plugins)
|
156
197
|
prompt = TTY::Prompt.new
|
157
198
|
message = "Plugin#{'s' if missing_plugins.count > 1} specified by job not installed.\n"
|
158
|
-
message +=
|
159
|
-
message += missing_plugins.map { |name| "chronicle-#{name}".bold}
|
160
|
-
|
199
|
+
message += 'Do you want to install '
|
200
|
+
message += missing_plugins.map { |name| "chronicle-#{name}".bold }
|
201
|
+
.join(', ')
|
202
|
+
message += ' and start the job?'
|
161
203
|
will_install = prompt.yes?(message)
|
162
|
-
cli_fail(message: "Must install #{missing_plugins.join(
|
204
|
+
cli_fail(message: "Must install #{missing_plugins.join(', ')} plugin to run job") unless will_install
|
163
205
|
|
164
206
|
Chronicle::ETL::CLI::Plugins.new.install(*missing_plugins)
|
165
207
|
end
|
@@ -172,43 +214,78 @@ LONG_DESC
|
|
172
214
|
definition
|
173
215
|
end
|
174
216
|
|
175
|
-
def load_job_config
|
217
|
+
def load_job_config(name)
|
176
218
|
Chronicle::ETL::Config.read_job(name)
|
177
219
|
end
|
178
220
|
|
179
221
|
# Takes flag options and turns them into a runner config
|
180
222
|
# TODO: this needs a lot of refactoring
|
181
|
-
def process_flag_options
|
182
|
-
extractor_options = options[:'extractor-opts'].transform_keys(&:to_sym).merge(
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
loader_options = options[:'loader-opts'].transform_keys(&:to_sym).merge(
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
223
|
+
def process_flag_options(options)
|
224
|
+
extractor_options = options[:'extractor-opts'].transform_keys(&:to_sym).merge(
|
225
|
+
{
|
226
|
+
input: (options[:input] if options[:input].any?),
|
227
|
+
since: options[:since],
|
228
|
+
until: options[:until],
|
229
|
+
limit: options[:limit]
|
230
|
+
}.compact
|
231
|
+
)
|
232
|
+
|
233
|
+
loader_options = options[:'loader-opts'].transform_keys(&:to_sym).merge(
|
234
|
+
{
|
235
|
+
output: options[:output],
|
236
|
+
header_row: options[:header_row]
|
237
|
+
}.compact
|
238
|
+
)
|
239
|
+
|
240
|
+
processed_options = {
|
198
241
|
dry_run: options[:dry_run],
|
199
242
|
extractor: {
|
200
243
|
name: options[:extractor],
|
201
244
|
options: extractor_options
|
202
245
|
}.compact,
|
203
|
-
transformer: {
|
204
|
-
name: options[:transformer],
|
205
|
-
options: transformer_options
|
206
|
-
}.compact,
|
207
246
|
loader: {
|
208
247
|
name: options[:loader],
|
209
248
|
options: loader_options
|
210
249
|
}.compact
|
211
250
|
}
|
251
|
+
|
252
|
+
add_transformer(processed_options, 'chronicle') if options[:schema]
|
253
|
+
add_transformer(processed_options, options[:schema]) if options[:schema] && options[:schema] != 'chronicle'
|
254
|
+
add_transformers_from_option(processed_options, options[:transformer]) if options[:transformer]&.any?
|
255
|
+
if options[:filter]
|
256
|
+
add_transformer(processed_options, :filter, { filters: options[:filter].to_h do |f|
|
257
|
+
f.split('=')
|
258
|
+
end })
|
259
|
+
end
|
260
|
+
add_transformer(processed_options, :format, { format: options[:format] }) if options[:format]
|
261
|
+
add_transformer(processed_options, :filter_fields, { fields: options[:fields] }) if options[:fields]
|
262
|
+
if options[:'fields-limit']
|
263
|
+
add_transformer(processed_options, :fields_limit,
|
264
|
+
{ limit: options[:'fields-limit'] })
|
265
|
+
end
|
266
|
+
|
267
|
+
processed_options
|
268
|
+
end
|
269
|
+
|
270
|
+
def add_transformer(processed_options, name, options = {})
|
271
|
+
processed_options[:transformers] ||= []
|
272
|
+
processed_options[:transformers] << { name:, options: }
|
273
|
+
end
|
274
|
+
|
275
|
+
def add_transformers_from_option(processed_options, transformer_option)
|
276
|
+
processed_options[:transformers] ||= []
|
277
|
+
processed_options[:transformers] += transformer_option.map do |transformer_args|
|
278
|
+
transformer_name, *transformer_options = transformer_args
|
279
|
+
transformer_options = transformer_options.filter { |opt| opt.include?('=') }
|
280
|
+
|
281
|
+
{
|
282
|
+
name: transformer_name,
|
283
|
+
options: transformer_options.to_h do |opt|
|
284
|
+
key, value = opt.split('=')
|
285
|
+
[key.to_sym, value]
|
286
|
+
end
|
287
|
+
}
|
288
|
+
end
|
212
289
|
end
|
213
290
|
end
|
214
291
|
end
|
@@ -13,7 +13,7 @@ module Chronicle
|
|
13
13
|
class_option :silent, desc: 'Silence all output', type: :boolean
|
14
14
|
class_option :'no-color', desc: 'Disable colour output', type: :boolean
|
15
15
|
|
16
|
-
default_task
|
16
|
+
default_task 'jobs'
|
17
17
|
|
18
18
|
desc 'connectors:COMMAND', 'Connectors available for ETL jobs', hide: true
|
19
19
|
subcommand 'connectors', Connectors
|
@@ -45,49 +45,49 @@ module Chronicle
|
|
45
45
|
true
|
46
46
|
end
|
47
47
|
|
48
|
-
desc
|
49
|
-
map %w
|
48
|
+
desc 'version', 'Show version'
|
49
|
+
map %w[--version -v] => :version
|
50
50
|
def version
|
51
51
|
shell.say "chronicle-etl #{Chronicle::ETL::VERSION}"
|
52
52
|
end
|
53
53
|
|
54
54
|
# Displays help options for chronicle-etl
|
55
|
-
def help(meth = nil,
|
55
|
+
def help(meth = nil, _subcommand = false)
|
56
56
|
if meth && !respond_to?(meth)
|
57
57
|
klass, task = ::Thor::Util.find_class_and_task_by_namespace("#{meth}:#{meth}")
|
58
|
-
klass.start(['-h', task].compact, shell:
|
58
|
+
klass.start(['-h', task].compact, shell:)
|
59
59
|
else
|
60
|
-
shell.say
|
60
|
+
shell.say 'ABOUT:'.bold
|
61
61
|
shell.say " #{'chronicle-etl'.italic} is a toolkit for extracting and working with your digital"
|
62
|
-
shell.say
|
62
|
+
shell.say ' history. 📜'
|
63
63
|
shell.say
|
64
64
|
shell.say " A job #{'extracts'.underline} personal data from a source, #{'transforms'.underline} it (Chronicle"
|
65
65
|
shell.say " Schema or preserves raw data), and then #{'loads'.underline} it to a destination. Use"
|
66
|
-
shell.say
|
67
|
-
shell.say
|
66
|
+
shell.say ' built-in extractors (json, csv, stdin) and loaders (csv, json, table,'
|
67
|
+
shell.say ' rest) or use plugins to connect to third-party services.'
|
68
68
|
shell.say
|
69
|
-
shell.say
|
69
|
+
shell.say ' Plugins: https://github.com/chronicle-app/chronicle-etl#currently-available'
|
70
70
|
shell.say
|
71
|
-
shell.say
|
72
|
-
shell.say
|
73
|
-
shell.say
|
71
|
+
shell.say 'USAGE:'.bold
|
72
|
+
shell.say ' # Basic job usage:'.italic.light_black
|
73
|
+
shell.say ' $ chronicle-etl --extractor NAME --transformer NAME --loader NAME'
|
74
74
|
shell.say
|
75
|
-
shell.say
|
76
|
-
shell.say
|
75
|
+
shell.say ' # Read test.csv and display it to stdout as a table:'.italic.light_black
|
76
|
+
shell.say ' $ chronicle-etl --extractor csv --input data.csv --loader table'
|
77
77
|
shell.say
|
78
|
-
shell.say
|
79
|
-
shell.say
|
78
|
+
shell.say ' # Show available plugins:'.italic.light_black
|
79
|
+
shell.say ' $ chronicle-etl plugins:list'
|
80
80
|
shell.say
|
81
|
-
shell.say
|
82
|
-
shell.say
|
83
|
-
shell.say
|
84
|
-
shell.say
|
81
|
+
shell.say ' # Save an access token as a secret and use it in a job:'.italic.light_black
|
82
|
+
shell.say ' $ chronicle-etl secrets:set pinboard access_token username:foo123'
|
83
|
+
shell.say ' $ chronicle-etl secrets:list'
|
84
|
+
shell.say ' $ chronicle-etl -e pinboard --since 1mo'
|
85
85
|
shell.say
|
86
|
-
shell.say
|
87
|
-
shell.say
|
86
|
+
shell.say ' # Show full job options:'.italic.light_black
|
87
|
+
shell.say ' $ chronicle-etl jobs help run'
|
88
88
|
shell.say
|
89
|
-
shell.say
|
90
|
-
shell.say
|
89
|
+
shell.say 'FULL DOCUMENTATION:'.bold
|
90
|
+
shell.say ' https://github.com/chronicle-app/chronicle-etl'.blue
|
91
91
|
shell.say
|
92
92
|
|
93
93
|
list = []
|
@@ -95,17 +95,17 @@ module Chronicle
|
|
95
95
|
list += thor_class.printable_tasks(false)
|
96
96
|
end
|
97
97
|
list.sort! { |a, b| a[0] <=> b[0] }
|
98
|
-
list.unshift [
|
98
|
+
list.unshift ['help', '# This help menu']
|
99
99
|
|
100
100
|
shell.say
|
101
101
|
shell.say 'ALL COMMANDS:'.bold
|
102
102
|
shell.print_table(list, indent: 2, truncate: true)
|
103
103
|
shell.say
|
104
|
-
shell.say
|
104
|
+
shell.say 'VERSION:'.bold
|
105
105
|
shell.say " #{Chronicle::ETL::VERSION}"
|
106
106
|
shell.say
|
107
|
-
shell.say
|
108
|
-
shell.say
|
107
|
+
shell.say ' Display current version:'.italic.light_black
|
108
|
+
shell.say ' $ chronicle-etl --version'
|
109
109
|
end
|
110
110
|
end
|
111
111
|
|
@@ -1,7 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require
|
4
|
-
require
|
3
|
+
require 'tty-prompt'
|
4
|
+
require 'tty-spinner'
|
5
5
|
|
6
6
|
module Chronicle
|
7
7
|
module ETL
|
@@ -11,60 +11,63 @@ module Chronicle
|
|
11
11
|
default_task 'list'
|
12
12
|
namespace :plugins
|
13
13
|
|
14
|
-
desc
|
14
|
+
desc 'install', 'Install a plugin'
|
15
15
|
def install(*plugins)
|
16
|
-
cli_fail(message:
|
16
|
+
cli_fail(message: 'Please specify a plugin to install') unless plugins.any?
|
17
17
|
|
18
18
|
installed, not_installed = plugins.partition do |plugin|
|
19
|
-
Chronicle::ETL::Registry::
|
19
|
+
Chronicle::ETL::Registry::Plugins.installed?(plugin)
|
20
20
|
end
|
21
21
|
|
22
|
-
puts "Already installed: #{installed.join(
|
22
|
+
puts "Already installed: #{installed.join(', ')}" if installed.any?
|
23
23
|
cli_exit unless not_installed.any?
|
24
24
|
|
25
|
-
spinner = TTY::Spinner.new("[:spinner] Installing #{not_installed.join(
|
25
|
+
spinner = TTY::Spinner.new("[:spinner] Installing #{not_installed.join(', ')}...", format: :dots_2)
|
26
26
|
spinner.auto_spin
|
27
27
|
|
28
28
|
not_installed.each do |plugin|
|
29
29
|
spinner.update(title: "Installing #{plugin}")
|
30
|
-
Chronicle::ETL::Registry::
|
31
|
-
|
30
|
+
Chronicle::ETL::Registry::Plugins.install(plugin)
|
32
31
|
rescue Chronicle::ETL::PluginError => e
|
33
|
-
spinner.error(
|
32
|
+
spinner.error('Error'.red)
|
34
33
|
cli_fail(message: "Plugin '#{plugin}' could not be installed", exception: e)
|
35
34
|
end
|
36
35
|
|
37
36
|
spinner.success("(#{'successful'.green})")
|
38
37
|
end
|
39
38
|
|
40
|
-
desc
|
39
|
+
desc 'uninstall', 'Unintall a plugin'
|
41
40
|
def uninstall(name)
|
42
41
|
spinner = TTY::Spinner.new("[:spinner] Uninstalling plugin #{name}...", format: :dots_2)
|
43
42
|
spinner.auto_spin
|
44
|
-
Chronicle::ETL::Registry::
|
43
|
+
Chronicle::ETL::Registry::Plugins.uninstall(name)
|
45
44
|
spinner.success("(#{'successful'.green})")
|
46
45
|
rescue Chronicle::ETL::PluginError => e
|
47
|
-
spinner.error(
|
46
|
+
spinner.error('Error'.red)
|
48
47
|
cli_fail(message: "Plugin '#{name}' could not be uninstalled (was it installed?)", exception: e)
|
49
48
|
end
|
50
49
|
|
51
|
-
desc
|
50
|
+
desc 'list', 'Lists available plugins'
|
52
51
|
# Display all available plugins that chronicle-etl has access to
|
53
52
|
def list
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
53
|
+
values = Chronicle::ETL::Registry::Plugins.all
|
54
|
+
.map do |plugin|
|
55
|
+
[
|
56
|
+
plugin.name,
|
57
|
+
plugin.description,
|
58
|
+
plugin.installed ? '✓' : '',
|
59
|
+
plugin.version
|
60
|
+
]
|
62
61
|
end
|
63
62
|
|
64
|
-
headers = [
|
65
|
-
table = TTY::Table.new(headers,
|
66
|
-
puts
|
67
|
-
puts table.render(
|
63
|
+
headers = %w[name description installed version].map { |h| h.to_s.upcase.bold }
|
64
|
+
table = TTY::Table.new(headers, values)
|
65
|
+
puts 'Available plugins:'
|
66
|
+
puts table.render(
|
67
|
+
indent: 2,
|
68
|
+
padding: [0, 0],
|
69
|
+
alignments: %i[left left center left]
|
70
|
+
)
|
68
71
|
end
|
69
72
|
end
|
70
73
|
end
|