chronicle-etl 0.2.4 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +35 -0
- data/.gitignore +3 -0
- data/.rubocop.yml +31 -1
- data/Guardfile +7 -0
- data/README.md +21 -14
- data/Rakefile +4 -2
- data/chronicle-etl.gemspec +18 -10
- data/exe/chronicle-etl +1 -1
- data/lib/chronicle/etl/cli/connectors.rb +53 -7
- data/lib/chronicle/etl/cli/jobs.rb +59 -24
- data/lib/chronicle/etl/cli/main.rb +18 -16
- data/lib/chronicle/etl/cli/subcommand_base.rb +2 -2
- data/lib/chronicle/etl/cli.rb +7 -0
- data/lib/chronicle/etl/config.rb +1 -1
- data/lib/chronicle/etl/configurable.rb +150 -0
- data/lib/chronicle/etl/exceptions.rb +14 -1
- data/lib/chronicle/etl/extraction.rb +12 -0
- data/lib/chronicle/etl/extractors/csv_extractor.rb +32 -31
- data/lib/chronicle/etl/extractors/extractor.rb +25 -13
- data/lib/chronicle/etl/extractors/file_extractor.rb +17 -32
- data/lib/chronicle/etl/extractors/helpers/filesystem_reader.rb +104 -0
- data/lib/chronicle/etl/extractors/json_extractor.rb +37 -0
- data/lib/chronicle/etl/extractors/stdin_extractor.rb +6 -1
- data/lib/chronicle/etl/job.rb +30 -29
- data/lib/chronicle/etl/job_definition.rb +45 -7
- data/lib/chronicle/etl/job_log.rb +10 -0
- data/lib/chronicle/etl/job_logger.rb +23 -20
- data/lib/chronicle/etl/loaders/csv_loader.rb +5 -1
- data/lib/chronicle/etl/loaders/loader.rb +5 -2
- data/lib/chronicle/etl/loaders/rest_loader.rb +9 -5
- data/lib/chronicle/etl/loaders/stdout_loader.rb +6 -1
- data/lib/chronicle/etl/loaders/table_loader.rb +51 -7
- data/lib/chronicle/etl/logger.rb +48 -0
- data/lib/chronicle/etl/models/attachment.rb +14 -0
- data/lib/chronicle/etl/models/base.rb +23 -7
- data/lib/chronicle/etl/models/entity.rb +9 -3
- data/lib/chronicle/etl/registry/connector_registration.rb +62 -0
- data/lib/chronicle/etl/registry/registry.rb +52 -0
- data/lib/chronicle/etl/registry/self_registering.rb +25 -0
- data/lib/chronicle/etl/runner.rb +58 -7
- data/lib/chronicle/etl/serializers/jsonapi_serializer.rb +25 -0
- data/lib/chronicle/etl/serializers/serializer.rb +27 -0
- data/lib/chronicle/etl/transformers/image_file_transformer.rb +247 -0
- data/lib/chronicle/etl/transformers/null_transformer.rb +10 -1
- data/lib/chronicle/etl/transformers/transformer.rb +41 -10
- data/lib/chronicle/etl/utils/binary_attachments.rb +21 -0
- data/lib/chronicle/etl/utils/progress_bar.rb +3 -1
- data/lib/chronicle/etl/utils/text_recognition.rb +15 -0
- data/lib/chronicle/etl/version.rb +1 -1
- data/lib/chronicle/etl.rb +8 -2
- metadata +146 -34
- data/.ruby-version +0 -1
- data/Gemfile.lock +0 -91
- data/lib/chronicle/etl/catalog.rb +0 -108
- data/lib/chronicle/etl/utils/jsonapi.rb +0 -28
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5fd411a9a41a645b85780230c79b09f361e121d0e8ca7f3270ca8eba55a76ca8
|
4
|
+
data.tar.gz: c09053715910ab4f027fbdc3a5b7d10c042eee962f7fa93c6571ce8359f51009
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2c9ec14b6c0a51f1c5ec77ee8d9a7f016d16bdc35db5634f9fa5d38aabc30dec201cd4b8bef06a31b86773a0c1cda2d271d7008dcb247a86d956c094919f3c0f
|
7
|
+
data.tar.gz: 0dca41e1654e5b2b98a148f853492a67126cdac767000b3c5f97c5c8ff88b77464e17a2fab38b72c1f014f3515c911e5f3f391eaf68d64e73dcfcff5d8e6cb6a
|
@@ -0,0 +1,35 @@
|
|
1
|
+
# This workflow uses actions that are not certified by GitHub.
|
2
|
+
# They are provided by a third-party and are governed by
|
3
|
+
# separate terms of service, privacy policy, and support
|
4
|
+
# documentation.
|
5
|
+
# This workflow will download a prebuilt Ruby version, install dependencies and run tests with Rake
|
6
|
+
# For more information see: https://github.com/marketplace/actions/setup-ruby-jruby-and-truffleruby
|
7
|
+
|
8
|
+
name: Ruby
|
9
|
+
|
10
|
+
on:
|
11
|
+
push:
|
12
|
+
branches: [ master ]
|
13
|
+
pull_request:
|
14
|
+
branches: [ master ]
|
15
|
+
|
16
|
+
jobs:
|
17
|
+
test:
|
18
|
+
|
19
|
+
runs-on: ubuntu-latest
|
20
|
+
strategy:
|
21
|
+
matrix:
|
22
|
+
ruby-version: ['2.7', '3.0']
|
23
|
+
|
24
|
+
steps:
|
25
|
+
- uses: actions/checkout@v2
|
26
|
+
- name: Set up Ruby
|
27
|
+
# To automatically get bug fixes and new Ruby versions for ruby/setup-ruby,
|
28
|
+
# change this to (see https://github.com/ruby/setup-ruby#versioning):
|
29
|
+
# uses: ruby/setup-ruby@v1
|
30
|
+
uses: ruby/setup-ruby@473e4d8fe5dd94ee328fdfca9f8c9c7afc9dae5e
|
31
|
+
with:
|
32
|
+
ruby-version: ${{ matrix.ruby-version }}
|
33
|
+
bundler-cache: true # runs 'bundle install' and caches installed gems automatically
|
34
|
+
- name: Run tests
|
35
|
+
run: bundle exec rake
|
data/.gitignore
CHANGED
data/.rubocop.yml
CHANGED
@@ -1,8 +1,38 @@
|
|
1
1
|
AllCops:
|
2
2
|
EnabledByDefault: true
|
3
|
+
TargetRubyVersion: 2.7
|
4
|
+
|
5
|
+
Style/FrozenStringLiteralComment:
|
6
|
+
SafeAutoCorrect: true
|
3
7
|
|
4
8
|
Style/StringLiterals:
|
5
9
|
Enabled: false
|
6
10
|
|
11
|
+
Layout/MultilineAssignmentLayout:
|
12
|
+
Enabled: false
|
13
|
+
|
14
|
+
Layout/RedundantLineBreak:
|
15
|
+
Enabled: false
|
16
|
+
|
7
17
|
Style/MethodCallWithArgsParentheses:
|
8
|
-
Enabled: false
|
18
|
+
Enabled: false
|
19
|
+
|
20
|
+
Style/MethodCalledOnDoEndBlock:
|
21
|
+
Exclude:
|
22
|
+
- 'spec/**/*'
|
23
|
+
|
24
|
+
Style/OpenStructUse:
|
25
|
+
Enabled: false
|
26
|
+
|
27
|
+
Style/Copyright:
|
28
|
+
Enabled: false
|
29
|
+
|
30
|
+
Style/SymbolArray:
|
31
|
+
EnforcedStyle: brackets
|
32
|
+
|
33
|
+
Style/WordArray:
|
34
|
+
EnforcedStyle: brackets
|
35
|
+
|
36
|
+
Lint/ConstantResolution:
|
37
|
+
Enabled: false
|
38
|
+
|
data/Guardfile
ADDED
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# Chronicle::ETL
|
2
2
|
|
3
|
-
[](https://badge.fury.io/rb/chronicle-etl)
|
3
|
+
[](https://badge.fury.io/rb/chronicle-etl) [](https://github.com/chronicle-app/chronicle-etl/actions/workflows/ruby.yml)
|
4
4
|
|
5
5
|
Chronicle ETL is a utility that helps you archive and processes personal data. You can *extract* it from a variety of sources, *transform* it, and *load* it to an external API, file, or stdout.
|
6
6
|
|
@@ -31,6 +31,9 @@ Connectors are available to read, process, and load data from different formats
|
|
31
31
|
```bash
|
32
32
|
# List all available connectors
|
33
33
|
$ chronicle-etl connectors:list
|
34
|
+
|
35
|
+
# Install a connector
|
36
|
+
$ chronicle-etl connectors:install imessage
|
34
37
|
```
|
35
38
|
|
36
39
|
Built in connectors:
|
@@ -44,16 +47,18 @@ Built in connectors:
|
|
44
47
|
- `null` - (default) Don't do anything
|
45
48
|
|
46
49
|
### Loaders
|
47
|
-
- `stdout` - (default) output
|
50
|
+
- `stdout` - (default) output records to stdout serialized as JSON
|
48
51
|
- `csv` - Load records to a csv file
|
52
|
+
- `rest` - Serialize records with [JSONAPI](https://jsonapi.org/) and send to a REST API
|
49
53
|
- `table` - Output an ascii table of records. Useful for debugging.
|
50
54
|
|
51
55
|
### Provider-specific importers
|
52
56
|
|
53
57
|
In addition to the built-in importers, importers for third-party platforms are available. They are packaged as individual Ruby gems.
|
54
58
|
|
55
|
-
- [email](https://github.com/chronicle-app/chronicle-email). Extractors for `mbox` and other email files
|
56
|
-
- [
|
59
|
+
- [email](https://github.com/chronicle-app/chronicle-email). Extractors for `mbox` and other email files
|
60
|
+
- [shell](https://github.com/chronicle-app/chronicle-shell). Extract shell history from Bash or Zsh`
|
61
|
+
- [imessage](https://github.com/chronicle-app/chronicle-imessage). Extract iMessage messages from a local macOS installation
|
57
62
|
|
58
63
|
To install any of these, run `gem install chronicle-PROVIDER`.
|
59
64
|
|
@@ -61,7 +66,7 @@ If you don't want to use the available rubygem importers, `chronicle-etl` can us
|
|
61
66
|
|
62
67
|
I'll be open-sourcing more importers. Please [contact me](mailto:andrew@hyfen.net) to chat about what will be available!
|
63
68
|
|
64
|
-
|
69
|
+
## Full commands
|
65
70
|
|
66
71
|
```
|
67
72
|
$ chronicle-etl help
|
@@ -75,26 +80,28 @@ ALL COMMANDS
|
|
75
80
|
jobs:create # Create a job
|
76
81
|
jobs:list # List all available jobs
|
77
82
|
jobs:run # Start a job
|
78
|
-
jobs:show # Show a job
|
83
|
+
jobs:show # Show details about a job
|
79
84
|
```
|
80
85
|
|
81
|
-
###
|
86
|
+
### Running a job
|
82
87
|
|
83
88
|
```
|
84
89
|
Usage:
|
85
90
|
chronicle-etl jobs:run
|
86
91
|
|
87
92
|
Options:
|
88
|
-
|
89
|
-
# Default:
|
93
|
+
[--log-level=LOG_LEVEL] # Log level (debug, info, warn, error, fatal)
|
94
|
+
# Default: info
|
95
|
+
-v, [--verbose], [--no-verbose] # Set log level to verbose
|
96
|
+
[--dry-run], [--no-dry-run] # Only run the extraction and transform steps, not the loading
|
97
|
+
-e, [--extractor=extractor-name] # Extractor class. Default: stdin
|
90
98
|
[--extractor-opts=key:value] # Extractor options
|
91
|
-
-t, [--transformer=transformer-name] # Transformer class
|
92
|
-
# Default: null
|
99
|
+
-t, [--transformer=transformer-name] # Transformer class. Default: null
|
93
100
|
[--transformer-opts=key:value] # Transformer options
|
94
|
-
-l, [--loader=loader-name] # Loader class
|
95
|
-
# Default: stdout
|
101
|
+
-l, [--loader=loader-name] # Loader class. Default: stdout
|
96
102
|
[--loader-opts=key:value] # Loader options
|
97
|
-
-j, [--
|
103
|
+
-j, [--name=NAME] # Job configuration name
|
104
|
+
|
98
105
|
|
99
106
|
Runs an ETL job
|
100
107
|
```
|
data/Rakefile
CHANGED
data/chronicle-etl.gemspec
CHANGED
@@ -17,11 +17,11 @@ Gem::Specification.new do |spec|
|
|
17
17
|
# Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
|
18
18
|
# to allow pushing to a single host or delete this section to allow pushing to any host.
|
19
19
|
if spec.respond_to?(:metadata)
|
20
|
-
|
20
|
+
spec.metadata['allowed_push_host'] = "https://rubygems.org"
|
21
21
|
|
22
22
|
spec.metadata["homepage_uri"] = spec.homepage
|
23
23
|
spec.metadata["source_code_uri"] = "https://github.com/chronicle-app/chronicle-etl"
|
24
|
-
spec.metadata["changelog_uri"] = "https://github.com/chronicle-app/chronicle-etl/
|
24
|
+
spec.metadata["changelog_uri"] = "https://github.com/chronicle-app/chronicle-etl/releases"
|
25
25
|
else
|
26
26
|
raise "RubyGems 2.0 or newer is required to protect against " \
|
27
27
|
"public gem pushes."
|
@@ -35,19 +35,27 @@ Gem::Specification.new do |spec|
|
|
35
35
|
spec.bindir = "exe"
|
36
36
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
37
37
|
spec.require_paths = ["lib"]
|
38
|
+
spec.required_ruby_version = ">= 2.7"
|
38
39
|
|
39
|
-
spec.add_dependency "
|
40
|
+
spec.add_dependency "activesupport", "~> 7.0"
|
41
|
+
spec.add_dependency "chronic_duration", "~> 0.10.6"
|
40
42
|
spec.add_dependency "colorize", "~> 0.8.1"
|
41
|
-
spec.add_dependency "
|
43
|
+
spec.add_dependency "marcel", "~> 1.0.2"
|
44
|
+
spec.add_dependency "mini_exiftool", "~> 2.10"
|
45
|
+
spec.add_dependency "nokogiri", "~> 1.13"
|
46
|
+
spec.add_dependency "runcom", ">= 6.0"
|
47
|
+
spec.add_dependency "sequel", "~> 5.35"
|
48
|
+
spec.add_dependency "sqlite3", "~> 1.4"
|
49
|
+
spec.add_dependency "thor", "~> 1.2"
|
42
50
|
spec.add_dependency "tty-progressbar", "~> 0.17"
|
43
|
-
spec.add_dependency
|
44
|
-
spec.add_dependency 'deep_merge', '~> 1.2'
|
51
|
+
spec.add_dependency "tty-table", "~> 0.11"
|
45
52
|
|
46
53
|
spec.add_development_dependency "bundler", "~> 2.1"
|
54
|
+
spec.add_development_dependency "pry-byebug", "~> 3.9"
|
47
55
|
spec.add_development_dependency "rake", "~> 13.0"
|
48
56
|
spec.add_development_dependency "rspec", "~> 3.9"
|
49
|
-
spec.add_development_dependency "
|
50
|
-
spec.add_development_dependency
|
51
|
-
spec.add_development_dependency
|
52
|
-
spec.add_development_dependency
|
57
|
+
spec.add_development_dependency "simplecov", "~> 0.21"
|
58
|
+
spec.add_development_dependency "guard-rspec", "~> 4.7.3"
|
59
|
+
spec.add_development_dependency "yard", "~> 0.9.7"
|
60
|
+
spec.add_development_dependency "rubocop", "~> 1.25.1"
|
53
61
|
end
|
data/exe/chronicle-etl
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Chronicle
|
2
4
|
module ETL
|
3
5
|
module CLI
|
@@ -7,23 +9,67 @@ module Chronicle
|
|
7
9
|
namespace :connectors
|
8
10
|
|
9
11
|
desc "install NAME", "Installs connector NAME"
|
10
|
-
def install
|
11
|
-
|
12
|
+
def install(name)
|
13
|
+
Chronicle::ETL::Registry.install_connector(name)
|
12
14
|
end
|
13
15
|
|
14
16
|
desc "list", "Lists available connectors"
|
15
17
|
# Display all available connectors that chronicle-etl has access to
|
16
18
|
def list
|
17
|
-
|
18
|
-
|
19
|
-
|
19
|
+
Chronicle::ETL::Registry.load_all!
|
20
|
+
|
21
|
+
connector_info = Chronicle::ETL::Registry.connectors.map do |connector_registration|
|
22
|
+
{
|
23
|
+
identifier: connector_registration.identifier,
|
24
|
+
phase: connector_registration.phase,
|
25
|
+
description: connector_registration.descriptive_phrase,
|
26
|
+
provider: connector_registration.provider,
|
27
|
+
core: connector_registration.built_in? ? '✓' : '',
|
28
|
+
class: connector_registration.klass_name
|
29
|
+
}
|
30
|
+
end
|
31
|
+
|
32
|
+
connector_info = connector_info.sort_by do |a|
|
33
|
+
[a[:core].to_s, a[:provider], a[:phase], a[:identifier]]
|
20
34
|
end
|
21
35
|
|
22
|
-
headers =
|
36
|
+
headers = connector_info.first.keys.map do |key|
|
23
37
|
key.to_s.upcase.bold
|
24
38
|
end
|
25
39
|
|
26
|
-
table = TTY::Table.new(headers,
|
40
|
+
table = TTY::Table.new(headers, connector_info.map(&:values))
|
41
|
+
puts table.render(indent: 0, padding: [0, 2])
|
42
|
+
end
|
43
|
+
|
44
|
+
desc "show PHASE IDENTIFIER", "Show information about a connector"
|
45
|
+
def show(phase, identifier)
|
46
|
+
unless ['extractor', 'transformer', 'loader'].include?(phase)
|
47
|
+
puts "phase argument must be one of: [extractor, transformer, loader]"
|
48
|
+
return
|
49
|
+
end
|
50
|
+
|
51
|
+
begin
|
52
|
+
connector = Chronicle::ETL::Registry.find_by_phase_and_identifier(phase.to_sym, identifier)
|
53
|
+
rescue Chronicle::ETL::ConnectorNotAvailableError
|
54
|
+
puts "Could not find #{phase} #{identifier}"
|
55
|
+
return
|
56
|
+
end
|
57
|
+
|
58
|
+
puts connector.klass.to_s.bold
|
59
|
+
puts " #{connector.descriptive_phrase}"
|
60
|
+
puts
|
61
|
+
puts "OPTIONS"
|
62
|
+
|
63
|
+
headers = ['name', 'default', 'required'].map{ |h| h.to_s.upcase.bold }
|
64
|
+
|
65
|
+
settings = connector.klass.settings.map do |name, setting|
|
66
|
+
[
|
67
|
+
name,
|
68
|
+
setting.default,
|
69
|
+
setting.required ? 'yes' : 'no'
|
70
|
+
]
|
71
|
+
end
|
72
|
+
table = TTY::Table.new(headers, settings)
|
27
73
|
puts table.render(indent: 0, padding: [0, 2])
|
28
74
|
end
|
29
75
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'pp'
|
2
|
+
|
2
3
|
module Chronicle
|
3
4
|
module ETL
|
4
5
|
module CLI
|
@@ -7,16 +8,33 @@ module Chronicle
|
|
7
8
|
default_task "start"
|
8
9
|
namespace :jobs
|
9
10
|
|
10
|
-
class_option :
|
11
|
+
class_option :name, aliases: '-j', desc: 'Job configuration name'
|
12
|
+
|
13
|
+
class_option :extractor, aliases: '-e', desc: "Extractor class. Default: stdin", banner: 'extractor-name'
|
11
14
|
class_option :'extractor-opts', desc: 'Extractor options', type: :hash, default: {}
|
12
|
-
class_option :transformer, aliases: '-t', desc: 'Transformer class
|
15
|
+
class_option :transformer, aliases: '-t', desc: 'Transformer class. Default: null', banner: 'transformer-name'
|
13
16
|
class_option :'transformer-opts', desc: 'Transformer options', type: :hash, default: {}
|
14
|
-
class_option :loader, aliases: '-l', desc: 'Loader class
|
17
|
+
class_option :loader, aliases: '-l', desc: 'Loader class. Default: stdout', banner: 'loader-name'
|
15
18
|
class_option :'loader-opts', desc: 'Loader options', type: :hash, default: {}
|
16
|
-
class_option :name, aliases: '-j', desc: 'Job configuration name'
|
17
19
|
|
18
|
-
|
20
|
+
# This is an array to deal with shell globbing
|
21
|
+
class_option :input, aliases: '-i', desc: 'Input filename or directory', default: [], type: 'array', banner: 'FILENAME'
|
22
|
+
class_option :since, desc: "Load records SINCE this date. Overrides job's `load_since` configuration option in extractor's options", banner: 'DATE'
|
23
|
+
class_option :until, desc: "Load records UNTIL this date", banner: 'DATE'
|
24
|
+
class_option :limit, desc: "Only extract the first LIMIT records", banner: 'N'
|
25
|
+
|
26
|
+
class_option :output, aliases: '-o', desc: 'Output filename', type: 'string'
|
27
|
+
class_option :fields, desc: 'Output only these fields', type: 'array', banner: 'field1 field2 ...'
|
28
|
+
|
29
|
+
class_option :log_level, desc: 'Log level (debug, info, warn, error, fatal)', default: 'info'
|
30
|
+
class_option :verbose, aliases: '-v', desc: 'Set log level to verbose', type: :boolean
|
31
|
+
|
32
|
+
# Thor doesn't like `run` as a command name
|
33
|
+
map run: :start
|
19
34
|
desc "run", "Start a job"
|
35
|
+
option :log_level, desc: 'Log level (debug, info, warn, error, fatal)', default: 'info'
|
36
|
+
option :verbose, aliases: '-v', desc: 'Set log level to verbose', type: :boolean
|
37
|
+
option :dry_run, desc: 'Only run the extraction and transform steps, not the loading', type: :boolean
|
20
38
|
long_desc <<-LONG_DESC
|
21
39
|
This will run an ETL job. Each job needs three parts:
|
22
40
|
|
@@ -24,23 +42,17 @@ module Chronicle
|
|
24
42
|
|
25
43
|
2. #{'Transformer'.underline}: transforms data into a new format. If none is specified, we use the `null` transformer which does nothing to the data.
|
26
44
|
|
27
|
-
3. #{'Loader'.underline}: takes that transformed data and loads it externally. This can be an API, flat files, (or by default), stdout.
|
45
|
+
3. #{'Loader'.underline}: takes that transformed data and loads it externally. This can be an API, flat files, (or by default), stdout. With the --dry-run option, this step won't be run.
|
28
46
|
|
29
47
|
If you do not want to use the command line flags, you can also configure a job with a .yml config file. You can either specify the path to this file or use the filename and place the file in ~/.config/chronicle/etl/jobs/NAME.yml and call it with `--job NAME`
|
30
48
|
LONG_DESC
|
31
49
|
# Run an ETL job
|
32
50
|
def start
|
51
|
+
setup_log_level
|
33
52
|
job_definition = build_job_definition(options)
|
34
53
|
job = Chronicle::ETL::Job.new(job_definition)
|
35
54
|
runner = Chronicle::ETL::Runner.new(job)
|
36
55
|
runner.run!
|
37
|
-
rescue Chronicle::ETL::ProviderNotAvailableError => e
|
38
|
-
warn(e.message.red)
|
39
|
-
warn(" Perhaps you haven't installed it yet: `$ gem install chronicle-#{e.provider}`")
|
40
|
-
exit(false)
|
41
|
-
rescue Chronicle::ETL::ConnectorNotAvailableError => e
|
42
|
-
warn(e.message.red)
|
43
|
-
exit(false)
|
44
56
|
end
|
45
57
|
|
46
58
|
desc "create", "Create a job"
|
@@ -48,14 +60,13 @@ LONG_DESC
|
|
48
60
|
def create
|
49
61
|
job_definition = build_job_definition(options)
|
50
62
|
path = File.join('chronicle', 'etl', 'jobs', options[:name])
|
51
|
-
Chronicle::ETL::Config.write(path, job_definition)
|
63
|
+
Chronicle::ETL::Config.write(path, job_definition.definition)
|
52
64
|
end
|
53
65
|
|
54
66
|
desc "show", "Show details about a job"
|
55
67
|
# Show an ETL job
|
56
68
|
def show
|
57
|
-
|
58
|
-
pp job_config
|
69
|
+
puts Chronicle::ETL::Job.new(build_job_definition(options))
|
59
70
|
end
|
60
71
|
|
61
72
|
desc "list", "List all available jobs"
|
@@ -73,7 +84,7 @@ LONG_DESC
|
|
73
84
|
[job, extractor, transformer, loader]
|
74
85
|
end
|
75
86
|
|
76
|
-
headers = ['name', 'extractor', 'transformer', 'loader'].map{|h| h.upcase.bold }
|
87
|
+
headers = ['name', 'extractor', 'transformer', 'loader'].map { |h| h.upcase.bold }
|
77
88
|
|
78
89
|
table = TTY::Table.new(headers, job_details)
|
79
90
|
puts table.render(indent: 0, padding: [0, 2])
|
@@ -81,11 +92,20 @@ LONG_DESC
|
|
81
92
|
|
82
93
|
private
|
83
94
|
|
95
|
+
def setup_log_level
|
96
|
+
if options[:verbose]
|
97
|
+
Chronicle::ETL::Logger.log_level = Chronicle::ETL::Logger::DEBUG
|
98
|
+
elsif options[:log_level]
|
99
|
+
level = Chronicle::ETL::Logger.const_get(options[:log_level].upcase)
|
100
|
+
Chronicle::ETL::Logger.log_level = level
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
84
104
|
# Create job definition by reading config file and then overwriting with flag options
|
85
105
|
def build_job_definition(options)
|
86
106
|
definition = Chronicle::ETL::JobDefinition.new
|
87
|
-
definition.add_config(process_flag_options(options))
|
88
107
|
definition.add_config(load_job_config(options[:name]))
|
108
|
+
definition.add_config(process_flag_options(options).transform_keys(&:to_sym))
|
89
109
|
definition
|
90
110
|
end
|
91
111
|
|
@@ -95,19 +115,34 @@ LONG_DESC
|
|
95
115
|
|
96
116
|
# Takes flag options and turns them into a runner config
|
97
117
|
def process_flag_options options
|
118
|
+
extractor_options = options[:'extractor-opts'].merge({
|
119
|
+
filename: (options[:input] if options[:input].any?),
|
120
|
+
since: options[:since],
|
121
|
+
until: options[:until],
|
122
|
+
limit: options[:limit],
|
123
|
+
}.compact)
|
124
|
+
|
125
|
+
transformer_options = options[:'transformer-opts']
|
126
|
+
|
127
|
+
loader_options = options[:'loader-opts'].merge({
|
128
|
+
output: options[:output],
|
129
|
+
fields: options[:fields]
|
130
|
+
}.compact)
|
131
|
+
|
98
132
|
{
|
133
|
+
dry_run: options[:dry_run],
|
99
134
|
extractor: {
|
100
135
|
name: options[:extractor],
|
101
|
-
options:
|
102
|
-
},
|
136
|
+
options: extractor_options
|
137
|
+
}.compact,
|
103
138
|
transformer: {
|
104
139
|
name: options[:transformer],
|
105
|
-
options:
|
106
|
-
},
|
140
|
+
options: transformer_options
|
141
|
+
}.compact,
|
107
142
|
loader: {
|
108
143
|
name: options[:loader],
|
109
|
-
options:
|
110
|
-
}
|
144
|
+
options: loader_options
|
145
|
+
}.compact
|
111
146
|
}
|
112
147
|
end
|
113
148
|
end
|
@@ -1,17 +1,10 @@
|
|
1
|
-
require 'thor'
|
2
|
-
require 'chronicle/etl'
|
3
1
|
require 'colorize'
|
4
2
|
|
5
|
-
require 'chronicle/etl/cli/subcommand_base'
|
6
|
-
require 'chronicle/etl/cli/connectors'
|
7
|
-
require 'chronicle/etl/cli/jobs'
|
8
|
-
|
9
3
|
module Chronicle
|
10
4
|
module ETL
|
11
5
|
module CLI
|
12
6
|
# Main entrypoint for CLI app
|
13
|
-
class Main < Thor
|
14
|
-
class_option "verbose", type: :boolean, default: false
|
7
|
+
class Main < ::Thor
|
15
8
|
default_task "jobs"
|
16
9
|
|
17
10
|
desc 'connectors:COMMAND', 'Connectors available for ETL jobs', hide: true
|
@@ -22,10 +15,6 @@ module Chronicle
|
|
22
15
|
|
23
16
|
# Entrypoint for the CLI
|
24
17
|
def self.start(given_args = ARGV, config = {})
|
25
|
-
if given_args.none?
|
26
|
-
abort "No command entered or job specified. To see commands, run `chronicle-etl help`".red
|
27
|
-
end
|
28
|
-
|
29
18
|
# take a subcommand:command and splits them so Thor knows how to hand off to the subcommand class
|
30
19
|
if given_args.any? && given_args[0].include?(':')
|
31
20
|
commands = given_args.shift.split(':')
|
@@ -35,10 +24,20 @@ module Chronicle
|
|
35
24
|
super(given_args, config)
|
36
25
|
end
|
37
26
|
|
27
|
+
def self.exit_on_failure?
|
28
|
+
true
|
29
|
+
end
|
30
|
+
|
31
|
+
desc "version", "Show version"
|
32
|
+
map %w(--version -v) => :version
|
33
|
+
def version
|
34
|
+
shell.say "chronicle-etl #{Chronicle::ETL::VERSION}"
|
35
|
+
end
|
36
|
+
|
38
37
|
# Displays help options for chronicle-etl
|
39
38
|
def help(meth = nil, subcommand = false)
|
40
39
|
if meth && !respond_to?(meth)
|
41
|
-
klass, task = Thor::Util.find_class_and_task_by_namespace("#{meth}:#{meth}")
|
40
|
+
klass, task = ::Thor::Util.find_class_and_task_by_namespace("#{meth}:#{meth}")
|
42
41
|
klass.start(['-h', task].compact, shell: shell)
|
43
42
|
else
|
44
43
|
shell.say "ABOUT".bold
|
@@ -52,14 +51,14 @@ module Chronicle
|
|
52
51
|
shell.say " $ chronicle-etl connectors:list"
|
53
52
|
shell.say
|
54
53
|
shell.say " Run a simple job:".italic.light_black
|
55
|
-
shell.say " $ chronicle-etl jobs:
|
54
|
+
shell.say " $ chronicle-etl jobs:run --extractor stdin --transformer null --loader stdout"
|
56
55
|
shell.say
|
57
56
|
shell.say " Show full job options:".italic.light_black
|
58
|
-
shell.say " $ chronicle-etl jobs help
|
57
|
+
shell.say " $ chronicle-etl jobs help run"
|
59
58
|
|
60
59
|
list = []
|
61
60
|
|
62
|
-
Thor::Util.thor_classes_in(Chronicle::ETL::CLI).each do |thor_class|
|
61
|
+
::Thor::Util.thor_classes_in(Chronicle::ETL::CLI).each do |thor_class|
|
63
62
|
list += thor_class.printable_tasks(false)
|
64
63
|
end
|
65
64
|
list.sort! { |a, b| a[0] <=> b[0] }
|
@@ -72,6 +71,9 @@ module Chronicle
|
|
72
71
|
shell.say "VERSION".bold
|
73
72
|
shell.say " #{Chronicle::ETL::VERSION}"
|
74
73
|
shell.say
|
74
|
+
shell.say " Display current version:".italic.light_black
|
75
|
+
shell.say " $ chronicle-etl --version"
|
76
|
+
shell.say
|
75
77
|
shell.say "FULL DOCUMENTATION".bold
|
76
78
|
shell.say " https://github.com/chronicle-app/chronicle-etl".blue
|
77
79
|
shell.say
|
@@ -2,11 +2,11 @@ module Chronicle
|
|
2
2
|
module ETL
|
3
3
|
module CLI
|
4
4
|
# Base class for CLI subcommands. Overrides Thor methods so we can use command:subcommand syntax
|
5
|
-
class SubcommandBase < Thor
|
5
|
+
class SubcommandBase < ::Thor
|
6
6
|
# Print usage instructions for a subcommand
|
7
7
|
def self.help(shell, subcommand = false)
|
8
8
|
list = printable_commands(true, subcommand)
|
9
|
-
Thor::Util.thor_classes_in(self).each do |klass|
|
9
|
+
::Thor::Util.thor_classes_in(self).each do |klass|
|
10
10
|
list += klass.printable_commands(false)
|
11
11
|
end
|
12
12
|
list.sort! { |a, b| a[0] <=> b[0] }
|
data/lib/chronicle/etl/config.rb
CHANGED
@@ -30,7 +30,7 @@ module Chronicle
|
|
30
30
|
end
|
31
31
|
end
|
32
32
|
|
33
|
-
# Returns all available credentials available in ~/.config/
|
33
|
+
# Returns all available credentials available in ~/.config/chronicle/etl/credentials/*.yml
|
34
34
|
def available_credentials
|
35
35
|
job_directory = Runcom::Config.new('chronicle/etl/credentials').current
|
36
36
|
Dir.glob(File.join(job_directory, "*.yml")).map do |filename|
|