chronicle-etl 0.5.5 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +15 -25
- data/.rubocop.yml +2 -44
- data/Gemfile +2 -2
- data/Guardfile +3 -3
- data/README.md +75 -68
- data/Rakefile +2 -2
- data/bin/console +4 -5
- data/chronicle-etl.gemspec +51 -49
- data/exe/chronicle-etl +1 -1
- data/lib/chronicle/etl/authorizer.rb +3 -4
- data/lib/chronicle/etl/cli/authorizations.rb +8 -6
- data/lib/chronicle/etl/cli/connectors.rb +7 -7
- data/lib/chronicle/etl/cli/jobs.rb +130 -53
- data/lib/chronicle/etl/cli/main.rb +29 -29
- data/lib/chronicle/etl/cli/plugins.rb +14 -15
- data/lib/chronicle/etl/cli/secrets.rb +14 -12
- data/lib/chronicle/etl/cli/subcommand_base.rb +5 -3
- data/lib/chronicle/etl/config.rb +18 -8
- data/lib/chronicle/etl/configurable.rb +20 -9
- data/lib/chronicle/etl/exceptions.rb +3 -3
- data/lib/chronicle/etl/extraction.rb +12 -2
- data/lib/chronicle/etl/extractors/csv_extractor.rb +9 -0
- data/lib/chronicle/etl/extractors/extractor.rb +15 -2
- data/lib/chronicle/etl/extractors/file_extractor.rb +5 -3
- data/lib/chronicle/etl/extractors/helpers/input_reader.rb +2 -2
- data/lib/chronicle/etl/extractors/json_extractor.rb +14 -4
- data/lib/chronicle/etl/extractors/stdin_extractor.rb +3 -0
- data/lib/chronicle/etl/job.rb +35 -17
- data/lib/chronicle/etl/job_definition.rb +38 -26
- data/lib/chronicle/etl/job_log.rb +14 -16
- data/lib/chronicle/etl/job_logger.rb +4 -4
- data/lib/chronicle/etl/loaders/csv_loader.rb +17 -4
- data/lib/chronicle/etl/loaders/helpers/stdout_helper.rb +4 -0
- data/lib/chronicle/etl/loaders/json_loader.rb +30 -10
- data/lib/chronicle/etl/loaders/loader.rb +0 -17
- data/lib/chronicle/etl/loaders/rest_loader.rb +7 -7
- data/lib/chronicle/etl/loaders/table_loader.rb +37 -12
- data/lib/chronicle/etl/logger.rb +2 -2
- data/lib/chronicle/etl/oauth_authorizer.rb +8 -8
- data/lib/chronicle/etl/record.rb +15 -0
- data/lib/chronicle/etl/registry/connector_registration.rb +15 -23
- data/lib/chronicle/etl/registry/connectors.rb +93 -36
- data/lib/chronicle/etl/registry/plugin_registration.rb +1 -1
- data/lib/chronicle/etl/registry/plugins.rb +27 -19
- data/lib/chronicle/etl/runner.rb +158 -128
- data/lib/chronicle/etl/secrets.rb +4 -4
- data/lib/chronicle/etl/transformers/buffer_transformer.rb +29 -0
- data/lib/chronicle/etl/transformers/chronicle_transformer.rb +32 -0
- data/lib/chronicle/etl/transformers/chronobase_transformer.rb +100 -0
- data/lib/chronicle/etl/transformers/fields_limit_transformer.rb +23 -0
- data/lib/chronicle/etl/transformers/filter_fields_transformer.rb +60 -0
- data/lib/chronicle/etl/transformers/filter_transformer.rb +30 -0
- data/lib/chronicle/etl/transformers/format_transformer.rb +32 -0
- data/lib/chronicle/etl/transformers/merge_meta_transformer.rb +19 -0
- data/lib/chronicle/etl/transformers/multiply_transformer.rb +21 -0
- data/lib/chronicle/etl/transformers/null_transformer.rb +5 -7
- data/lib/chronicle/etl/transformers/sampler_transformer.rb +21 -0
- data/lib/chronicle/etl/transformers/sort_transformer.rb +31 -0
- data/lib/chronicle/etl/transformers/transformer.rb +63 -41
- data/lib/chronicle/etl/utils/binary_attachments.rb +1 -1
- data/lib/chronicle/etl/utils/progress_bar.rb +2 -3
- data/lib/chronicle/etl/version.rb +1 -1
- data/lib/chronicle/etl.rb +6 -8
- metadata +49 -47
- data/lib/chronicle/etl/models/activity.rb +0 -15
- data/lib/chronicle/etl/models/attachment.rb +0 -14
- data/lib/chronicle/etl/models/base.rb +0 -122
- data/lib/chronicle/etl/models/entity.rb +0 -29
- data/lib/chronicle/etl/models/raw.rb +0 -26
- data/lib/chronicle/etl/serializers/jsonapi_serializer.rb +0 -31
- data/lib/chronicle/etl/serializers/raw_serializer.rb +0 -10
- data/lib/chronicle/etl/serializers/serializer.rb +0 -28
- data/lib/chronicle/etl/transformers/image_file_transformer.rb +0 -247
- data/lib/chronicle/etl/utils/hash_utilities.rb +0 -19
- data/lib/chronicle/etl/utils/text_recognition.rb +0 -15
@@ -7,7 +7,7 @@ module Chronicle
|
|
7
7
|
|
8
8
|
# Macro for setting provider on an Authorizer
|
9
9
|
def provider(provider_name)
|
10
|
-
@provider_name = provider_name
|
10
|
+
@provider_name = provider_name.to_sym
|
11
11
|
end
|
12
12
|
|
13
13
|
# From all loaded Authorizers, return the first one that matches
|
@@ -16,15 +16,14 @@ module Chronicle
|
|
16
16
|
# @todo Have a proper identifier system for authorizers
|
17
17
|
# (to have more than one per plugin)
|
18
18
|
def find_by_provider(provider)
|
19
|
-
ObjectSpace.each_object(::Class).select {|klass| klass < self }.find do |authorizer|
|
19
|
+
ObjectSpace.each_object(::Class).select { |klass| klass < self }.find do |authorizer|
|
20
20
|
authorizer.provider_name == provider
|
21
21
|
end
|
22
22
|
end
|
23
23
|
end
|
24
24
|
|
25
25
|
# Construct a new authorizer
|
26
|
-
def initialize(args)
|
27
|
-
end
|
26
|
+
def initialize(args); end
|
28
27
|
|
29
28
|
# Main entry-point for authorization flows. Implemented by subclass
|
30
29
|
def authorize!
|
@@ -2,7 +2,6 @@
|
|
2
2
|
|
3
3
|
require 'sinatra'
|
4
4
|
require 'launchy'
|
5
|
-
require 'pp'
|
6
5
|
|
7
6
|
module Chronicle
|
8
7
|
module ETL
|
@@ -12,11 +11,14 @@ module Chronicle
|
|
12
11
|
default_task 'new'
|
13
12
|
namespace :authorizations
|
14
13
|
|
15
|
-
desc
|
14
|
+
desc 'authorize', 'Authorize with a third-party provider'
|
16
15
|
option :port, desc: 'Port to run authorization server on', type: :numeric, default: 4567
|
17
|
-
option :credentials, desc: 'Secrets namespace for where to read credentials from (default: PROVIDER)',
|
18
|
-
|
19
|
-
option :
|
16
|
+
option :credentials, desc: 'Secrets namespace for where to read credentials from (default: PROVIDER)',
|
17
|
+
type: :string, banner: 'NAMESPACE'
|
18
|
+
option :secrets, desc: 'Secrets namespace for where authorization should be saved to (default: PROVIDER)',
|
19
|
+
type: :string, banner: 'NAMESPACE'
|
20
|
+
option :print, desc: 'Show authorization results (instead of just saving secrets)', type: :boolean,
|
21
|
+
default: false
|
20
22
|
def new(provider)
|
21
23
|
authorizer_klass = find_authorizer_klass(provider)
|
22
24
|
credentials = load_credentials(provider: provider, credentials_source: options[:credentials])
|
@@ -30,7 +32,7 @@ module Chronicle
|
|
30
32
|
|
31
33
|
cli_exit(message: "Authorization saved to '#{secrets_namespace}' secrets")
|
32
34
|
rescue StandardError => e
|
33
|
-
cli_fail(message: "Authorization not successful.\n
|
35
|
+
cli_fail(message: "Authorization not successful.\n#{e.message}", exception: e)
|
34
36
|
end
|
35
37
|
|
36
38
|
private
|
@@ -10,7 +10,7 @@ module Chronicle
|
|
10
10
|
default_task 'list'
|
11
11
|
namespace :connectors
|
12
12
|
|
13
|
-
desc
|
13
|
+
desc 'list', 'Lists available connectors'
|
14
14
|
# Display all available connectors that chronicle-etl has access to
|
15
15
|
def list
|
16
16
|
connector_info = Chronicle::ETL::Registry::Connectors.connectors.map do |connector_registration|
|
@@ -18,7 +18,7 @@ module Chronicle
|
|
18
18
|
identifier: connector_registration.identifier,
|
19
19
|
phase: connector_registration.phase,
|
20
20
|
description: connector_registration.descriptive_phrase,
|
21
|
-
|
21
|
+
source: connector_registration.source,
|
22
22
|
core: connector_registration.built_in? ? '✓' : '',
|
23
23
|
class: connector_registration.klass_name
|
24
24
|
}
|
@@ -36,10 +36,10 @@ module Chronicle
|
|
36
36
|
puts table.render(indent: 0, padding: [0, 2])
|
37
37
|
end
|
38
38
|
|
39
|
-
desc
|
39
|
+
desc 'show PHASE IDENTIFIER', 'Show information about a connector'
|
40
40
|
def show(phase, identifier)
|
41
|
-
unless [
|
42
|
-
cli_fail(message:
|
41
|
+
unless %w[extractor transformer loader].include?(phase)
|
42
|
+
cli_fail(message: 'Phase argument must be one of: [extractor, transformer, loader]')
|
43
43
|
end
|
44
44
|
|
45
45
|
begin
|
@@ -51,9 +51,9 @@ module Chronicle
|
|
51
51
|
puts connector.klass.to_s.bold
|
52
52
|
puts " #{connector.descriptive_phrase}"
|
53
53
|
puts
|
54
|
-
puts
|
54
|
+
puts 'Settings:'
|
55
55
|
|
56
|
-
headers = [
|
56
|
+
headers = %w[name default required].map { |h| h.to_s.upcase.bold }
|
57
57
|
|
58
58
|
settings = connector.klass.settings.map do |name, setting|
|
59
59
|
[
|
@@ -1,4 +1,5 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
2
3
|
require 'tty-prompt'
|
3
4
|
|
4
5
|
module Chronicle
|
@@ -6,43 +7,67 @@ module Chronicle
|
|
6
7
|
module CLI
|
7
8
|
# CLI commands for working with ETL jobs
|
8
9
|
class Jobs < SubcommandBase
|
9
|
-
default_task
|
10
|
+
default_task 'start'
|
10
11
|
namespace :jobs
|
11
12
|
|
12
|
-
class_option :extractor, aliases: '-e', desc:
|
13
|
+
class_option :extractor, aliases: '-e', desc: 'Extractor class. Default: stdin', banner: 'NAME'
|
13
14
|
class_option :'extractor-opts', desc: 'Extractor options', type: :hash, default: {}
|
14
|
-
class_option :transformer,
|
15
|
-
|
15
|
+
class_option :transformer,
|
16
|
+
aliases: '-t',
|
17
|
+
desc: 'Transformer identifier. Default: null',
|
18
|
+
banner: 'NAME',
|
19
|
+
type: 'array',
|
20
|
+
repeatable: true
|
16
21
|
class_option :loader, aliases: '-l', desc: 'Loader class. Default: table', banner: 'NAME'
|
17
22
|
class_option :'loader-opts', desc: 'Loader options', type: :hash, default: {}
|
18
23
|
|
19
24
|
# This is an array to deal with shell globbing
|
20
|
-
class_option :input,
|
21
|
-
|
22
|
-
|
23
|
-
|
25
|
+
class_option :input,
|
26
|
+
aliases: '-i',
|
27
|
+
desc: 'Input filename or directory',
|
28
|
+
default: [],
|
29
|
+
type: 'array',
|
30
|
+
banner: 'FILENAME'
|
31
|
+
class_option :since, desc: 'Load records SINCE this date (or fuzzy time duration)', banner: 'DATE'
|
32
|
+
class_option :until, desc: 'Load records UNTIL this date (or fuzzy time duration)', banner: 'DATE'
|
33
|
+
class_option :limit, desc: 'Only extract the first LIMIT records', banner: 'N'
|
34
|
+
|
35
|
+
class_option :schema,
|
36
|
+
desc: 'Which Schema to transform',
|
37
|
+
banner: 'SCHEMA_NAME',
|
38
|
+
type: 'string',
|
39
|
+
enum: %w[chronicle activitystream schemaorg chronobase]
|
40
|
+
class_option :format,
|
41
|
+
desc: 'How to serialize results',
|
42
|
+
banner: 'SCHEMA_NAME',
|
43
|
+
type: 'string',
|
44
|
+
enum: %w[jsonapi jsonld]
|
24
45
|
|
25
46
|
class_option :output, aliases: '-o', desc: 'Output filename', type: 'string'
|
26
47
|
class_option :fields, desc: 'Output only these fields', type: 'array', banner: 'field1 field2 ...'
|
48
|
+
class_option :'fields-limit', desc: 'Output first N fields', type: :numeric
|
49
|
+
class_option :filter, desc: 'Filter records', type: 'array', banner: 'field=value'
|
27
50
|
class_option :header_row, desc: 'Output the header row of tabular output', type: 'boolean'
|
28
51
|
|
29
52
|
# Thor doesn't like `run` as a command name
|
30
53
|
map run: :start
|
31
|
-
desc
|
54
|
+
desc 'run', 'Start a job'
|
32
55
|
option :dry_run, desc: 'Only run the extraction and transform steps, not the loading', type: :boolean
|
33
56
|
long_desc <<-LONG_DESC
|
34
57
|
This will run an ETL job. Each job needs three parts:
|
35
58
|
|
36
59
|
1. #{'Extractor'.underline}: pulls data from an external source. By default, this is stdout. Other common options including pulling data from an API or reading JSON from a file.
|
37
60
|
|
38
|
-
2. #{'
|
61
|
+
2. #{'Transformers'.underline}: transform data into a new format. If none is specified, we use the `null` transformer which does nothing to the data.
|
39
62
|
|
40
63
|
3. #{'Loader'.underline}: takes that transformed data and loads it externally. This can be an API, flat files, (or by default), stdout. With the --dry-run option, this step won't be run.
|
41
64
|
|
42
65
|
If you do not want to use the command line flags, you can also configure a job with a .yml config file. You can either specify the path to this file or use the filename and place the file in ~/.config/chronicle/etl/jobs/NAME.yml and call it with `--job NAME`
|
43
|
-
LONG_DESC
|
66
|
+
LONG_DESC
|
44
67
|
# Run an ETL job
|
45
|
-
def start(
|
68
|
+
def start(*args)
|
69
|
+
name = args.first
|
70
|
+
|
46
71
|
# If someone runs `$ chronicle-etl` with no arguments, show help menu.
|
47
72
|
# TODO: decide if we should check that there's nothing in stdin pipe
|
48
73
|
# in case user wants to actually run this sort of job stdin->null->stdout
|
@@ -52,7 +77,7 @@ LONG_DESC
|
|
52
77
|
cli_exit
|
53
78
|
end
|
54
79
|
|
55
|
-
cli_fail(message: "Job '#{name}' does not exist") if name && !Chronicle::ETL::Config.exists?(
|
80
|
+
cli_fail(message: "Job '#{name}' does not exist") if name && !Chronicle::ETL::Config.exists?('jobs', name)
|
56
81
|
|
57
82
|
job_definition = build_job_definition(name, options)
|
58
83
|
|
@@ -66,7 +91,7 @@ LONG_DESC
|
|
66
91
|
|
67
92
|
run_job(job_definition)
|
68
93
|
rescue Chronicle::ETL::JobDefinitionError => e
|
69
|
-
message =
|
94
|
+
message = ''
|
70
95
|
job_definition.errors.each_pair do |category, errors|
|
71
96
|
message << "Problem with #{category}:\n - #{errors.map(&:to_s).join("\n - ")}"
|
72
97
|
end
|
@@ -74,14 +99,14 @@ LONG_DESC
|
|
74
99
|
end
|
75
100
|
|
76
101
|
option :'skip-confirmation', aliases: '-y', type: :boolean
|
77
|
-
desc
|
102
|
+
desc 'save', 'Save a job'
|
78
103
|
# Create an ETL job
|
79
104
|
def save(name)
|
80
105
|
write_config = true
|
81
106
|
job_definition = build_job_definition(name, options)
|
82
107
|
job_definition.validate!
|
83
108
|
|
84
|
-
if Chronicle::ETL::Config.exists?(
|
109
|
+
if Chronicle::ETL::Config.exists?('jobs', name) && !options[:'skip-confirmation']
|
85
110
|
prompt = TTY::Prompt.new
|
86
111
|
write_config = false
|
87
112
|
message = "Job '#{name}' exists already. Ovewrite it?"
|
@@ -92,34 +117,50 @@ LONG_DESC
|
|
92
117
|
end
|
93
118
|
|
94
119
|
if write_config
|
95
|
-
Chronicle::ETL::Config.write(
|
120
|
+
Chronicle::ETL::Config.write('jobs', name, job_definition.definition)
|
96
121
|
cli_exit(message: "Job saved. Run it with `$ chronicle-etl jobs:run #{name}`")
|
97
122
|
else
|
98
123
|
cli_fail(message: "\nJob not saved")
|
99
124
|
end
|
100
125
|
rescue Chronicle::ETL::JobDefinitionError => e
|
101
|
-
cli_fail(message:
|
126
|
+
cli_fail(message: 'Job definition error', exception: e)
|
102
127
|
end
|
103
128
|
|
104
|
-
desc
|
129
|
+
desc 'show', 'Show details about a job'
|
105
130
|
# Show an ETL job
|
106
131
|
def show(name = nil)
|
107
|
-
cli_fail(message: "Job '#{name}' does not exist") if name && !Chronicle::ETL::Config.exists?(
|
132
|
+
cli_fail(message: "Job '#{name}' does not exist") if name && !Chronicle::ETL::Config.exists?('jobs', name)
|
108
133
|
|
109
134
|
job_definition = build_job_definition(name, options)
|
110
135
|
job_definition.validate!
|
111
136
|
puts Chronicle::ETL::Job.new(job_definition)
|
112
137
|
rescue Chronicle::ETL::JobDefinitionError => e
|
113
|
-
cli_fail(message:
|
138
|
+
cli_fail(message: 'Job definition error', exception: e)
|
139
|
+
end
|
140
|
+
|
141
|
+
desc 'edit', 'Edit a job in default editor ($EDITOR)'
|
142
|
+
def edit(name = nil)
|
143
|
+
cli_fail(message: "Job '#{name}' does not exist") if name && !Chronicle::ETL::Config.exists?('jobs', name)
|
144
|
+
|
145
|
+
filename = Chronicle::ETL::Config.path('jobs', name)
|
146
|
+
system "${VISUAL:-${EDITOR:-vi}} \"#{filename}\""
|
147
|
+
|
148
|
+
definition = Chronicle::ETL::JobDefinition.new
|
149
|
+
definition.add_config(load_job_config(name))
|
150
|
+
definition.validate!
|
151
|
+
|
152
|
+
cli_exit(message: "Job '#{name}' saved")
|
153
|
+
rescue Chronicle::ETL::JobDefinitionError => e
|
154
|
+
cli_fail(message: 'Job definition error', exception: e)
|
114
155
|
end
|
115
156
|
|
116
|
-
desc
|
157
|
+
desc 'list', 'List all available jobs'
|
117
158
|
# List available ETL jobs
|
118
159
|
def list
|
119
160
|
jobs = Chronicle::ETL::Config.available_jobs
|
120
161
|
|
121
162
|
job_details = jobs.map do |job|
|
122
|
-
r = Chronicle::ETL::Config.load(
|
163
|
+
r = Chronicle::ETL::Config.load('jobs', job)
|
123
164
|
|
124
165
|
extractor = r[:extractor][:name] if r[:extractor]
|
125
166
|
transformer = r[:transformer][:name] if r[:transformer]
|
@@ -128,9 +169,9 @@ LONG_DESC
|
|
128
169
|
[job, extractor, transformer, loader]
|
129
170
|
end
|
130
171
|
|
131
|
-
headers = [
|
172
|
+
headers = %w[name extractor transformer loader].map { |h| h.upcase.bold }
|
132
173
|
|
133
|
-
puts
|
174
|
+
puts 'Available jobs:'
|
134
175
|
table = TTY::Table.new(headers, job_details)
|
135
176
|
puts table.render(indent: 0, padding: [0, 2])
|
136
177
|
rescue Chronicle::ETL::ConfigError => e
|
@@ -148,18 +189,19 @@ LONG_DESC
|
|
148
189
|
runner = Chronicle::ETL::Runner.new(job)
|
149
190
|
runner.run!
|
150
191
|
rescue RunnerError => e
|
151
|
-
cli_fail(message:
|
192
|
+
cli_fail(message: e.message.to_s, exception: e)
|
152
193
|
end
|
153
194
|
|
154
195
|
# TODO: probably could merge this with something in cli/plugin
|
155
196
|
def install_missing_plugins(missing_plugins)
|
156
197
|
prompt = TTY::Prompt.new
|
157
198
|
message = "Plugin#{'s' if missing_plugins.count > 1} specified by job not installed.\n"
|
158
|
-
message +=
|
159
|
-
message += missing_plugins.map { |name| "chronicle-#{name}".bold}
|
160
|
-
|
199
|
+
message += 'Do you want to install '
|
200
|
+
message += missing_plugins.map { |name| "chronicle-#{name}".bold }
|
201
|
+
.join(', ')
|
202
|
+
message += ' and start the job?'
|
161
203
|
will_install = prompt.yes?(message)
|
162
|
-
cli_fail(message: "Must install #{missing_plugins.join(
|
204
|
+
cli_fail(message: "Must install #{missing_plugins.join(', ')} plugin to run job") unless will_install
|
163
205
|
|
164
206
|
Chronicle::ETL::CLI::Plugins.new.install(*missing_plugins)
|
165
207
|
end
|
@@ -172,43 +214,78 @@ LONG_DESC
|
|
172
214
|
definition
|
173
215
|
end
|
174
216
|
|
175
|
-
def load_job_config
|
217
|
+
def load_job_config(name)
|
176
218
|
Chronicle::ETL::Config.read_job(name)
|
177
219
|
end
|
178
220
|
|
179
221
|
# Takes flag options and turns them into a runner config
|
180
222
|
# TODO: this needs a lot of refactoring
|
181
|
-
def process_flag_options
|
182
|
-
extractor_options = options[:'extractor-opts'].transform_keys(&:to_sym).merge(
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
loader_options = options[:'loader-opts'].transform_keys(&:to_sym).merge(
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
223
|
+
def process_flag_options(options)
|
224
|
+
extractor_options = options[:'extractor-opts'].transform_keys(&:to_sym).merge(
|
225
|
+
{
|
226
|
+
input: (options[:input] if options[:input].any?),
|
227
|
+
since: options[:since],
|
228
|
+
until: options[:until],
|
229
|
+
limit: options[:limit]
|
230
|
+
}.compact
|
231
|
+
)
|
232
|
+
|
233
|
+
loader_options = options[:'loader-opts'].transform_keys(&:to_sym).merge(
|
234
|
+
{
|
235
|
+
output: options[:output],
|
236
|
+
header_row: options[:header_row]
|
237
|
+
}.compact
|
238
|
+
)
|
239
|
+
|
240
|
+
processed_options = {
|
198
241
|
dry_run: options[:dry_run],
|
199
242
|
extractor: {
|
200
243
|
name: options[:extractor],
|
201
244
|
options: extractor_options
|
202
245
|
}.compact,
|
203
|
-
transformer: {
|
204
|
-
name: options[:transformer],
|
205
|
-
options: transformer_options
|
206
|
-
}.compact,
|
207
246
|
loader: {
|
208
247
|
name: options[:loader],
|
209
248
|
options: loader_options
|
210
249
|
}.compact
|
211
250
|
}
|
251
|
+
|
252
|
+
add_transformer(processed_options, 'chronicle') if options[:schema]
|
253
|
+
add_transformer(processed_options, options[:schema]) if options[:schema] && options[:schema] != 'chronicle'
|
254
|
+
add_transformers_from_option(processed_options, options[:transformer]) if options[:transformer]&.any?
|
255
|
+
if options[:filter]
|
256
|
+
add_transformer(processed_options, :filter, { filters: options[:filter].to_h do |f|
|
257
|
+
f.split('=')
|
258
|
+
end })
|
259
|
+
end
|
260
|
+
add_transformer(processed_options, :format, { format: options[:format] }) if options[:format]
|
261
|
+
add_transformer(processed_options, :filter_fields, { fields: options[:fields] }) if options[:fields]
|
262
|
+
if options[:'fields-limit']
|
263
|
+
add_transformer(processed_options, :fields_limit,
|
264
|
+
{ limit: options[:'fields-limit'] })
|
265
|
+
end
|
266
|
+
|
267
|
+
processed_options
|
268
|
+
end
|
269
|
+
|
270
|
+
def add_transformer(processed_options, name, options = {})
|
271
|
+
processed_options[:transformers] ||= []
|
272
|
+
processed_options[:transformers] << { name:, options: }
|
273
|
+
end
|
274
|
+
|
275
|
+
def add_transformers_from_option(processed_options, transformer_option)
|
276
|
+
processed_options[:transformers] ||= []
|
277
|
+
processed_options[:transformers] += transformer_option.map do |transformer_args|
|
278
|
+
transformer_name, *transformer_options = transformer_args
|
279
|
+
transformer_options = transformer_options.filter { |opt| opt.include?('=') }
|
280
|
+
|
281
|
+
{
|
282
|
+
name: transformer_name,
|
283
|
+
options: transformer_options.to_h do |opt|
|
284
|
+
key, value = opt.split('=')
|
285
|
+
[key.to_sym, value]
|
286
|
+
end
|
287
|
+
}
|
288
|
+
end
|
212
289
|
end
|
213
290
|
end
|
214
291
|
end
|
@@ -13,7 +13,7 @@ module Chronicle
|
|
13
13
|
class_option :silent, desc: 'Silence all output', type: :boolean
|
14
14
|
class_option :'no-color', desc: 'Disable colour output', type: :boolean
|
15
15
|
|
16
|
-
default_task
|
16
|
+
default_task 'jobs'
|
17
17
|
|
18
18
|
desc 'connectors:COMMAND', 'Connectors available for ETL jobs', hide: true
|
19
19
|
subcommand 'connectors', Connectors
|
@@ -45,49 +45,49 @@ module Chronicle
|
|
45
45
|
true
|
46
46
|
end
|
47
47
|
|
48
|
-
desc
|
49
|
-
map %w
|
48
|
+
desc 'version', 'Show version'
|
49
|
+
map %w[--version -v] => :version
|
50
50
|
def version
|
51
51
|
shell.say "chronicle-etl #{Chronicle::ETL::VERSION}"
|
52
52
|
end
|
53
53
|
|
54
54
|
# Displays help options for chronicle-etl
|
55
|
-
def help(meth = nil,
|
55
|
+
def help(meth = nil, _subcommand = false)
|
56
56
|
if meth && !respond_to?(meth)
|
57
57
|
klass, task = ::Thor::Util.find_class_and_task_by_namespace("#{meth}:#{meth}")
|
58
|
-
klass.start(['-h', task].compact, shell:
|
58
|
+
klass.start(['-h', task].compact, shell:)
|
59
59
|
else
|
60
|
-
shell.say
|
60
|
+
shell.say 'ABOUT:'.bold
|
61
61
|
shell.say " #{'chronicle-etl'.italic} is a toolkit for extracting and working with your digital"
|
62
|
-
shell.say
|
62
|
+
shell.say ' history. 📜'
|
63
63
|
shell.say
|
64
64
|
shell.say " A job #{'extracts'.underline} personal data from a source, #{'transforms'.underline} it (Chronicle"
|
65
65
|
shell.say " Schema or preserves raw data), and then #{'loads'.underline} it to a destination. Use"
|
66
|
-
shell.say
|
67
|
-
shell.say
|
66
|
+
shell.say ' built-in extractors (json, csv, stdin) and loaders (csv, json, table,'
|
67
|
+
shell.say ' rest) or use plugins to connect to third-party services.'
|
68
68
|
shell.say
|
69
|
-
shell.say
|
69
|
+
shell.say ' Plugins: https://github.com/chronicle-app/chronicle-etl#currently-available'
|
70
70
|
shell.say
|
71
|
-
shell.say
|
72
|
-
shell.say
|
73
|
-
shell.say
|
71
|
+
shell.say 'USAGE:'.bold
|
72
|
+
shell.say ' # Basic job usage:'.italic.light_black
|
73
|
+
shell.say ' $ chronicle-etl --extractor NAME --transformer NAME --loader NAME'
|
74
74
|
shell.say
|
75
|
-
shell.say
|
76
|
-
shell.say
|
75
|
+
shell.say ' # Read test.csv and display it to stdout as a table:'.italic.light_black
|
76
|
+
shell.say ' $ chronicle-etl --extractor csv --input data.csv --loader table'
|
77
77
|
shell.say
|
78
|
-
shell.say
|
79
|
-
shell.say
|
78
|
+
shell.say ' # Show available plugins:'.italic.light_black
|
79
|
+
shell.say ' $ chronicle-etl plugins:list'
|
80
80
|
shell.say
|
81
|
-
shell.say
|
82
|
-
shell.say
|
83
|
-
shell.say
|
84
|
-
shell.say
|
81
|
+
shell.say ' # Save an access token as a secret and use it in a job:'.italic.light_black
|
82
|
+
shell.say ' $ chronicle-etl secrets:set pinboard access_token username:foo123'
|
83
|
+
shell.say ' $ chronicle-etl secrets:list'
|
84
|
+
shell.say ' $ chronicle-etl -e pinboard --since 1mo'
|
85
85
|
shell.say
|
86
|
-
shell.say
|
87
|
-
shell.say
|
86
|
+
shell.say ' # Show full job options:'.italic.light_black
|
87
|
+
shell.say ' $ chronicle-etl jobs help run'
|
88
88
|
shell.say
|
89
|
-
shell.say
|
90
|
-
shell.say
|
89
|
+
shell.say 'FULL DOCUMENTATION:'.bold
|
90
|
+
shell.say ' https://github.com/chronicle-app/chronicle-etl'.blue
|
91
91
|
shell.say
|
92
92
|
|
93
93
|
list = []
|
@@ -95,17 +95,17 @@ module Chronicle
|
|
95
95
|
list += thor_class.printable_tasks(false)
|
96
96
|
end
|
97
97
|
list.sort! { |a, b| a[0] <=> b[0] }
|
98
|
-
list.unshift [
|
98
|
+
list.unshift ['help', '# This help menu']
|
99
99
|
|
100
100
|
shell.say
|
101
101
|
shell.say 'ALL COMMANDS:'.bold
|
102
102
|
shell.print_table(list, indent: 2, truncate: true)
|
103
103
|
shell.say
|
104
|
-
shell.say
|
104
|
+
shell.say 'VERSION:'.bold
|
105
105
|
shell.say " #{Chronicle::ETL::VERSION}"
|
106
106
|
shell.say
|
107
|
-
shell.say
|
108
|
-
shell.say
|
107
|
+
shell.say ' Display current version:'.italic.light_black
|
108
|
+
shell.say ' $ chronicle-etl --version'
|
109
109
|
end
|
110
110
|
end
|
111
111
|
|
@@ -1,7 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require
|
4
|
-
require
|
3
|
+
require 'tty-prompt'
|
4
|
+
require 'tty-spinner'
|
5
5
|
|
6
6
|
module Chronicle
|
7
7
|
module ETL
|
@@ -11,63 +11,62 @@ module Chronicle
|
|
11
11
|
default_task 'list'
|
12
12
|
namespace :plugins
|
13
13
|
|
14
|
-
desc
|
14
|
+
desc 'install', 'Install a plugin'
|
15
15
|
def install(*plugins)
|
16
|
-
cli_fail(message:
|
16
|
+
cli_fail(message: 'Please specify a plugin to install') unless plugins.any?
|
17
17
|
|
18
18
|
installed, not_installed = plugins.partition do |plugin|
|
19
19
|
Chronicle::ETL::Registry::Plugins.installed?(plugin)
|
20
20
|
end
|
21
21
|
|
22
|
-
puts "Already installed: #{installed.join(
|
22
|
+
puts "Already installed: #{installed.join(', ')}" if installed.any?
|
23
23
|
cli_exit unless not_installed.any?
|
24
24
|
|
25
|
-
spinner = TTY::Spinner.new("[:spinner] Installing #{not_installed.join(
|
25
|
+
spinner = TTY::Spinner.new("[:spinner] Installing #{not_installed.join(', ')}...", format: :dots_2)
|
26
26
|
spinner.auto_spin
|
27
27
|
|
28
28
|
not_installed.each do |plugin|
|
29
29
|
spinner.update(title: "Installing #{plugin}")
|
30
30
|
Chronicle::ETL::Registry::Plugins.install(plugin)
|
31
|
-
|
32
31
|
rescue Chronicle::ETL::PluginError => e
|
33
|
-
spinner.error(
|
32
|
+
spinner.error('Error'.red)
|
34
33
|
cli_fail(message: "Plugin '#{plugin}' could not be installed", exception: e)
|
35
34
|
end
|
36
35
|
|
37
36
|
spinner.success("(#{'successful'.green})")
|
38
37
|
end
|
39
38
|
|
40
|
-
desc
|
39
|
+
desc 'uninstall', 'Unintall a plugin'
|
41
40
|
def uninstall(name)
|
42
41
|
spinner = TTY::Spinner.new("[:spinner] Uninstalling plugin #{name}...", format: :dots_2)
|
43
42
|
spinner.auto_spin
|
44
43
|
Chronicle::ETL::Registry::Plugins.uninstall(name)
|
45
44
|
spinner.success("(#{'successful'.green})")
|
46
45
|
rescue Chronicle::ETL::PluginError => e
|
47
|
-
spinner.error(
|
46
|
+
spinner.error('Error'.red)
|
48
47
|
cli_fail(message: "Plugin '#{name}' could not be uninstalled (was it installed?)", exception: e)
|
49
48
|
end
|
50
49
|
|
51
|
-
desc
|
50
|
+
desc 'list', 'Lists available plugins'
|
52
51
|
# Display all available plugins that chronicle-etl has access to
|
53
52
|
def list
|
54
53
|
values = Chronicle::ETL::Registry::Plugins.all
|
55
54
|
.map do |plugin|
|
56
55
|
[
|
57
|
-
plugin.name,
|
56
|
+
plugin.name,
|
58
57
|
plugin.description,
|
59
58
|
plugin.installed ? '✓' : '',
|
60
59
|
plugin.version
|
61
60
|
]
|
62
61
|
end
|
63
62
|
|
64
|
-
headers = [
|
63
|
+
headers = %w[name description installed version].map { |h| h.to_s.upcase.bold }
|
65
64
|
table = TTY::Table.new(headers, values)
|
66
|
-
puts
|
65
|
+
puts 'Available plugins:'
|
67
66
|
puts table.render(
|
68
67
|
indent: 2,
|
69
68
|
padding: [0, 0],
|
70
|
-
alignments: [
|
69
|
+
alignments: %i[left left center left]
|
71
70
|
)
|
72
71
|
end
|
73
72
|
end
|