chronicle-etl 0.2.4 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +35 -0
- data/.gitignore +3 -0
- data/.rubocop.yml +31 -1
- data/Guardfile +7 -0
- data/README.md +21 -14
- data/Rakefile +4 -2
- data/chronicle-etl.gemspec +18 -10
- data/exe/chronicle-etl +1 -1
- data/lib/chronicle/etl/cli/connectors.rb +53 -7
- data/lib/chronicle/etl/cli/jobs.rb +59 -24
- data/lib/chronicle/etl/cli/main.rb +18 -16
- data/lib/chronicle/etl/cli/subcommand_base.rb +2 -2
- data/lib/chronicle/etl/cli.rb +7 -0
- data/lib/chronicle/etl/config.rb +1 -1
- data/lib/chronicle/etl/configurable.rb +150 -0
- data/lib/chronicle/etl/exceptions.rb +14 -1
- data/lib/chronicle/etl/extraction.rb +12 -0
- data/lib/chronicle/etl/extractors/csv_extractor.rb +32 -31
- data/lib/chronicle/etl/extractors/extractor.rb +25 -13
- data/lib/chronicle/etl/extractors/file_extractor.rb +17 -32
- data/lib/chronicle/etl/extractors/helpers/filesystem_reader.rb +104 -0
- data/lib/chronicle/etl/extractors/json_extractor.rb +37 -0
- data/lib/chronicle/etl/extractors/stdin_extractor.rb +6 -1
- data/lib/chronicle/etl/job.rb +30 -29
- data/lib/chronicle/etl/job_definition.rb +45 -7
- data/lib/chronicle/etl/job_log.rb +10 -0
- data/lib/chronicle/etl/job_logger.rb +23 -20
- data/lib/chronicle/etl/loaders/csv_loader.rb +5 -1
- data/lib/chronicle/etl/loaders/loader.rb +5 -2
- data/lib/chronicle/etl/loaders/rest_loader.rb +9 -5
- data/lib/chronicle/etl/loaders/stdout_loader.rb +6 -1
- data/lib/chronicle/etl/loaders/table_loader.rb +51 -7
- data/lib/chronicle/etl/logger.rb +48 -0
- data/lib/chronicle/etl/models/attachment.rb +14 -0
- data/lib/chronicle/etl/models/base.rb +23 -7
- data/lib/chronicle/etl/models/entity.rb +9 -3
- data/lib/chronicle/etl/registry/connector_registration.rb +62 -0
- data/lib/chronicle/etl/registry/registry.rb +52 -0
- data/lib/chronicle/etl/registry/self_registering.rb +25 -0
- data/lib/chronicle/etl/runner.rb +58 -7
- data/lib/chronicle/etl/serializers/jsonapi_serializer.rb +25 -0
- data/lib/chronicle/etl/serializers/serializer.rb +27 -0
- data/lib/chronicle/etl/transformers/image_file_transformer.rb +247 -0
- data/lib/chronicle/etl/transformers/null_transformer.rb +10 -1
- data/lib/chronicle/etl/transformers/transformer.rb +41 -10
- data/lib/chronicle/etl/utils/binary_attachments.rb +21 -0
- data/lib/chronicle/etl/utils/progress_bar.rb +3 -1
- data/lib/chronicle/etl/utils/text_recognition.rb +15 -0
- data/lib/chronicle/etl/version.rb +1 -1
- data/lib/chronicle/etl.rb +8 -2
- metadata +146 -34
- data/.ruby-version +0 -1
- data/Gemfile.lock +0 -91
- data/lib/chronicle/etl/catalog.rb +0 -108
- data/lib/chronicle/etl/utils/jsonapi.rb +0 -28
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5fd411a9a41a645b85780230c79b09f361e121d0e8ca7f3270ca8eba55a76ca8
|
4
|
+
data.tar.gz: c09053715910ab4f027fbdc3a5b7d10c042eee962f7fa93c6571ce8359f51009
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2c9ec14b6c0a51f1c5ec77ee8d9a7f016d16bdc35db5634f9fa5d38aabc30dec201cd4b8bef06a31b86773a0c1cda2d271d7008dcb247a86d956c094919f3c0f
|
7
|
+
data.tar.gz: 0dca41e1654e5b2b98a148f853492a67126cdac767000b3c5f97c5c8ff88b77464e17a2fab38b72c1f014f3515c911e5f3f391eaf68d64e73dcfcff5d8e6cb6a
|
@@ -0,0 +1,35 @@
|
|
1
|
+
# This workflow uses actions that are not certified by GitHub.
|
2
|
+
# They are provided by a third-party and are governed by
|
3
|
+
# separate terms of service, privacy policy, and support
|
4
|
+
# documentation.
|
5
|
+
# This workflow will download a prebuilt Ruby version, install dependencies and run tests with Rake
|
6
|
+
# For more information see: https://github.com/marketplace/actions/setup-ruby-jruby-and-truffleruby
|
7
|
+
|
8
|
+
name: Ruby
|
9
|
+
|
10
|
+
on:
|
11
|
+
push:
|
12
|
+
branches: [ master ]
|
13
|
+
pull_request:
|
14
|
+
branches: [ master ]
|
15
|
+
|
16
|
+
jobs:
|
17
|
+
test:
|
18
|
+
|
19
|
+
runs-on: ubuntu-latest
|
20
|
+
strategy:
|
21
|
+
matrix:
|
22
|
+
ruby-version: ['2.7', '3.0']
|
23
|
+
|
24
|
+
steps:
|
25
|
+
- uses: actions/checkout@v2
|
26
|
+
- name: Set up Ruby
|
27
|
+
# To automatically get bug fixes and new Ruby versions for ruby/setup-ruby,
|
28
|
+
# change this to (see https://github.com/ruby/setup-ruby#versioning):
|
29
|
+
# uses: ruby/setup-ruby@v1
|
30
|
+
uses: ruby/setup-ruby@473e4d8fe5dd94ee328fdfca9f8c9c7afc9dae5e
|
31
|
+
with:
|
32
|
+
ruby-version: ${{ matrix.ruby-version }}
|
33
|
+
bundler-cache: true # runs 'bundle install' and caches installed gems automatically
|
34
|
+
- name: Run tests
|
35
|
+
run: bundle exec rake
|
data/.gitignore
CHANGED
data/.rubocop.yml
CHANGED
@@ -1,8 +1,38 @@
|
|
1
1
|
AllCops:
|
2
2
|
EnabledByDefault: true
|
3
|
+
TargetRubyVersion: 2.7
|
4
|
+
|
5
|
+
Style/FrozenStringLiteralComment:
|
6
|
+
SafeAutoCorrect: true
|
3
7
|
|
4
8
|
Style/StringLiterals:
|
5
9
|
Enabled: false
|
6
10
|
|
11
|
+
Layout/MultilineAssignmentLayout:
|
12
|
+
Enabled: false
|
13
|
+
|
14
|
+
Layout/RedundantLineBreak:
|
15
|
+
Enabled: false
|
16
|
+
|
7
17
|
Style/MethodCallWithArgsParentheses:
|
8
|
-
Enabled: false
|
18
|
+
Enabled: false
|
19
|
+
|
20
|
+
Style/MethodCalledOnDoEndBlock:
|
21
|
+
Exclude:
|
22
|
+
- 'spec/**/*'
|
23
|
+
|
24
|
+
Style/OpenStructUse:
|
25
|
+
Enabled: false
|
26
|
+
|
27
|
+
Style/Copyright:
|
28
|
+
Enabled: false
|
29
|
+
|
30
|
+
Style/SymbolArray:
|
31
|
+
EnforcedStyle: brackets
|
32
|
+
|
33
|
+
Style/WordArray:
|
34
|
+
EnforcedStyle: brackets
|
35
|
+
|
36
|
+
Lint/ConstantResolution:
|
37
|
+
Enabled: false
|
38
|
+
|
data/Guardfile
ADDED
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# Chronicle::ETL
|
2
2
|
|
3
|
-
[![Gem Version](https://badge.fury.io/rb/chronicle-etl.svg)](https://badge.fury.io/rb/chronicle-etl)
|
3
|
+
[![Gem Version](https://badge.fury.io/rb/chronicle-etl.svg)](https://badge.fury.io/rb/chronicle-etl) [![Ruby](https://github.com/chronicle-app/chronicle-etl/actions/workflows/ruby.yml/badge.svg)](https://github.com/chronicle-app/chronicle-etl/actions/workflows/ruby.yml)
|
4
4
|
|
5
5
|
Chronicle ETL is a utility that helps you archive and processes personal data. You can *extract* it from a variety of sources, *transform* it, and *load* it to an external API, file, or stdout.
|
6
6
|
|
@@ -31,6 +31,9 @@ Connectors are available to read, process, and load data from different formats
|
|
31
31
|
```bash
|
32
32
|
# List all available connectors
|
33
33
|
$ chronicle-etl connectors:list
|
34
|
+
|
35
|
+
# Install a connector
|
36
|
+
$ chronicle-etl connectors:install imessage
|
34
37
|
```
|
35
38
|
|
36
39
|
Built in connectors:
|
@@ -44,16 +47,18 @@ Built in connectors:
|
|
44
47
|
- `null` - (default) Don't do anything
|
45
48
|
|
46
49
|
### Loaders
|
47
|
-
- `stdout` - (default) output
|
50
|
+
- `stdout` - (default) output records to stdout serialized as JSON
|
48
51
|
- `csv` - Load records to a csv file
|
52
|
+
- `rest` - Serialize records with [JSONAPI](https://jsonapi.org/) and send to a REST API
|
49
53
|
- `table` - Output an ascii table of records. Useful for debugging.
|
50
54
|
|
51
55
|
### Provider-specific importers
|
52
56
|
|
53
57
|
In addition to the built-in importers, importers for third-party platforms are available. They are packaged as individual Ruby gems.
|
54
58
|
|
55
|
-
- [email](https://github.com/chronicle-app/chronicle-email). Extractors for `mbox` and other email files
|
56
|
-
- [
|
59
|
+
- [email](https://github.com/chronicle-app/chronicle-email). Extractors for `mbox` and other email files
|
60
|
+
- [shell](https://github.com/chronicle-app/chronicle-shell). Extract shell history from Bash or Zsh`
|
61
|
+
- [imessage](https://github.com/chronicle-app/chronicle-imessage). Extract iMessage messages from a local macOS installation
|
57
62
|
|
58
63
|
To install any of these, run `gem install chronicle-PROVIDER`.
|
59
64
|
|
@@ -61,7 +66,7 @@ If you don't want to use the available rubygem importers, `chronicle-etl` can us
|
|
61
66
|
|
62
67
|
I'll be open-sourcing more importers. Please [contact me](mailto:andrew@hyfen.net) to chat about what will be available!
|
63
68
|
|
64
|
-
|
69
|
+
## Full commands
|
65
70
|
|
66
71
|
```
|
67
72
|
$ chronicle-etl help
|
@@ -75,26 +80,28 @@ ALL COMMANDS
|
|
75
80
|
jobs:create # Create a job
|
76
81
|
jobs:list # List all available jobs
|
77
82
|
jobs:run # Start a job
|
78
|
-
jobs:show # Show a job
|
83
|
+
jobs:show # Show details about a job
|
79
84
|
```
|
80
85
|
|
81
|
-
###
|
86
|
+
### Running a job
|
82
87
|
|
83
88
|
```
|
84
89
|
Usage:
|
85
90
|
chronicle-etl jobs:run
|
86
91
|
|
87
92
|
Options:
|
88
|
-
|
89
|
-
# Default:
|
93
|
+
[--log-level=LOG_LEVEL] # Log level (debug, info, warn, error, fatal)
|
94
|
+
# Default: info
|
95
|
+
-v, [--verbose], [--no-verbose] # Set log level to verbose
|
96
|
+
[--dry-run], [--no-dry-run] # Only run the extraction and transform steps, not the loading
|
97
|
+
-e, [--extractor=extractor-name] # Extractor class. Default: stdin
|
90
98
|
[--extractor-opts=key:value] # Extractor options
|
91
|
-
-t, [--transformer=transformer-name] # Transformer class
|
92
|
-
# Default: null
|
99
|
+
-t, [--transformer=transformer-name] # Transformer class. Default: null
|
93
100
|
[--transformer-opts=key:value] # Transformer options
|
94
|
-
-l, [--loader=loader-name] # Loader class
|
95
|
-
# Default: stdout
|
101
|
+
-l, [--loader=loader-name] # Loader class. Default: stdout
|
96
102
|
[--loader-opts=key:value] # Loader options
|
97
|
-
-j, [--
|
103
|
+
-j, [--name=NAME] # Job configuration name
|
104
|
+
|
98
105
|
|
99
106
|
Runs an ETL job
|
100
107
|
```
|
data/Rakefile
CHANGED
data/chronicle-etl.gemspec
CHANGED
@@ -17,11 +17,11 @@ Gem::Specification.new do |spec|
|
|
17
17
|
# Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
|
18
18
|
# to allow pushing to a single host or delete this section to allow pushing to any host.
|
19
19
|
if spec.respond_to?(:metadata)
|
20
|
-
|
20
|
+
spec.metadata['allowed_push_host'] = "https://rubygems.org"
|
21
21
|
|
22
22
|
spec.metadata["homepage_uri"] = spec.homepage
|
23
23
|
spec.metadata["source_code_uri"] = "https://github.com/chronicle-app/chronicle-etl"
|
24
|
-
spec.metadata["changelog_uri"] = "https://github.com/chronicle-app/chronicle-etl/
|
24
|
+
spec.metadata["changelog_uri"] = "https://github.com/chronicle-app/chronicle-etl/releases"
|
25
25
|
else
|
26
26
|
raise "RubyGems 2.0 or newer is required to protect against " \
|
27
27
|
"public gem pushes."
|
@@ -35,19 +35,27 @@ Gem::Specification.new do |spec|
|
|
35
35
|
spec.bindir = "exe"
|
36
36
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
37
37
|
spec.require_paths = ["lib"]
|
38
|
+
spec.required_ruby_version = ">= 2.7"
|
38
39
|
|
39
|
-
spec.add_dependency "
|
40
|
+
spec.add_dependency "activesupport", "~> 7.0"
|
41
|
+
spec.add_dependency "chronic_duration", "~> 0.10.6"
|
40
42
|
spec.add_dependency "colorize", "~> 0.8.1"
|
41
|
-
spec.add_dependency "
|
43
|
+
spec.add_dependency "marcel", "~> 1.0.2"
|
44
|
+
spec.add_dependency "mini_exiftool", "~> 2.10"
|
45
|
+
spec.add_dependency "nokogiri", "~> 1.13"
|
46
|
+
spec.add_dependency "runcom", ">= 6.0"
|
47
|
+
spec.add_dependency "sequel", "~> 5.35"
|
48
|
+
spec.add_dependency "sqlite3", "~> 1.4"
|
49
|
+
spec.add_dependency "thor", "~> 1.2"
|
42
50
|
spec.add_dependency "tty-progressbar", "~> 0.17"
|
43
|
-
spec.add_dependency
|
44
|
-
spec.add_dependency 'deep_merge', '~> 1.2'
|
51
|
+
spec.add_dependency "tty-table", "~> 0.11"
|
45
52
|
|
46
53
|
spec.add_development_dependency "bundler", "~> 2.1"
|
54
|
+
spec.add_development_dependency "pry-byebug", "~> 3.9"
|
47
55
|
spec.add_development_dependency "rake", "~> 13.0"
|
48
56
|
spec.add_development_dependency "rspec", "~> 3.9"
|
49
|
-
spec.add_development_dependency "
|
50
|
-
spec.add_development_dependency
|
51
|
-
spec.add_development_dependency
|
52
|
-
spec.add_development_dependency
|
57
|
+
spec.add_development_dependency "simplecov", "~> 0.21"
|
58
|
+
spec.add_development_dependency "guard-rspec", "~> 4.7.3"
|
59
|
+
spec.add_development_dependency "yard", "~> 0.9.7"
|
60
|
+
spec.add_development_dependency "rubocop", "~> 1.25.1"
|
53
61
|
end
|
data/exe/chronicle-etl
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Chronicle
|
2
4
|
module ETL
|
3
5
|
module CLI
|
@@ -7,23 +9,67 @@ module Chronicle
|
|
7
9
|
namespace :connectors
|
8
10
|
|
9
11
|
desc "install NAME", "Installs connector NAME"
|
10
|
-
def install
|
11
|
-
|
12
|
+
def install(name)
|
13
|
+
Chronicle::ETL::Registry.install_connector(name)
|
12
14
|
end
|
13
15
|
|
14
16
|
desc "list", "Lists available connectors"
|
15
17
|
# Display all available connectors that chronicle-etl has access to
|
16
18
|
def list
|
17
|
-
|
18
|
-
|
19
|
-
|
19
|
+
Chronicle::ETL::Registry.load_all!
|
20
|
+
|
21
|
+
connector_info = Chronicle::ETL::Registry.connectors.map do |connector_registration|
|
22
|
+
{
|
23
|
+
identifier: connector_registration.identifier,
|
24
|
+
phase: connector_registration.phase,
|
25
|
+
description: connector_registration.descriptive_phrase,
|
26
|
+
provider: connector_registration.provider,
|
27
|
+
core: connector_registration.built_in? ? '✓' : '',
|
28
|
+
class: connector_registration.klass_name
|
29
|
+
}
|
30
|
+
end
|
31
|
+
|
32
|
+
connector_info = connector_info.sort_by do |a|
|
33
|
+
[a[:core].to_s, a[:provider], a[:phase], a[:identifier]]
|
20
34
|
end
|
21
35
|
|
22
|
-
headers =
|
36
|
+
headers = connector_info.first.keys.map do |key|
|
23
37
|
key.to_s.upcase.bold
|
24
38
|
end
|
25
39
|
|
26
|
-
table = TTY::Table.new(headers,
|
40
|
+
table = TTY::Table.new(headers, connector_info.map(&:values))
|
41
|
+
puts table.render(indent: 0, padding: [0, 2])
|
42
|
+
end
|
43
|
+
|
44
|
+
desc "show PHASE IDENTIFIER", "Show information about a connector"
|
45
|
+
def show(phase, identifier)
|
46
|
+
unless ['extractor', 'transformer', 'loader'].include?(phase)
|
47
|
+
puts "phase argument must be one of: [extractor, transformer, loader]"
|
48
|
+
return
|
49
|
+
end
|
50
|
+
|
51
|
+
begin
|
52
|
+
connector = Chronicle::ETL::Registry.find_by_phase_and_identifier(phase.to_sym, identifier)
|
53
|
+
rescue Chronicle::ETL::ConnectorNotAvailableError
|
54
|
+
puts "Could not find #{phase} #{identifier}"
|
55
|
+
return
|
56
|
+
end
|
57
|
+
|
58
|
+
puts connector.klass.to_s.bold
|
59
|
+
puts " #{connector.descriptive_phrase}"
|
60
|
+
puts
|
61
|
+
puts "OPTIONS"
|
62
|
+
|
63
|
+
headers = ['name', 'default', 'required'].map{ |h| h.to_s.upcase.bold }
|
64
|
+
|
65
|
+
settings = connector.klass.settings.map do |name, setting|
|
66
|
+
[
|
67
|
+
name,
|
68
|
+
setting.default,
|
69
|
+
setting.required ? 'yes' : 'no'
|
70
|
+
]
|
71
|
+
end
|
72
|
+
table = TTY::Table.new(headers, settings)
|
27
73
|
puts table.render(indent: 0, padding: [0, 2])
|
28
74
|
end
|
29
75
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'pp'
|
2
|
+
|
2
3
|
module Chronicle
|
3
4
|
module ETL
|
4
5
|
module CLI
|
@@ -7,16 +8,33 @@ module Chronicle
|
|
7
8
|
default_task "start"
|
8
9
|
namespace :jobs
|
9
10
|
|
10
|
-
class_option :
|
11
|
+
class_option :name, aliases: '-j', desc: 'Job configuration name'
|
12
|
+
|
13
|
+
class_option :extractor, aliases: '-e', desc: "Extractor class. Default: stdin", banner: 'extractor-name'
|
11
14
|
class_option :'extractor-opts', desc: 'Extractor options', type: :hash, default: {}
|
12
|
-
class_option :transformer, aliases: '-t', desc: 'Transformer class
|
15
|
+
class_option :transformer, aliases: '-t', desc: 'Transformer class. Default: null', banner: 'transformer-name'
|
13
16
|
class_option :'transformer-opts', desc: 'Transformer options', type: :hash, default: {}
|
14
|
-
class_option :loader, aliases: '-l', desc: 'Loader class
|
17
|
+
class_option :loader, aliases: '-l', desc: 'Loader class. Default: stdout', banner: 'loader-name'
|
15
18
|
class_option :'loader-opts', desc: 'Loader options', type: :hash, default: {}
|
16
|
-
class_option :name, aliases: '-j', desc: 'Job configuration name'
|
17
19
|
|
18
|
-
|
20
|
+
# This is an array to deal with shell globbing
|
21
|
+
class_option :input, aliases: '-i', desc: 'Input filename or directory', default: [], type: 'array', banner: 'FILENAME'
|
22
|
+
class_option :since, desc: "Load records SINCE this date. Overrides job's `load_since` configuration option in extractor's options", banner: 'DATE'
|
23
|
+
class_option :until, desc: "Load records UNTIL this date", banner: 'DATE'
|
24
|
+
class_option :limit, desc: "Only extract the first LIMIT records", banner: 'N'
|
25
|
+
|
26
|
+
class_option :output, aliases: '-o', desc: 'Output filename', type: 'string'
|
27
|
+
class_option :fields, desc: 'Output only these fields', type: 'array', banner: 'field1 field2 ...'
|
28
|
+
|
29
|
+
class_option :log_level, desc: 'Log level (debug, info, warn, error, fatal)', default: 'info'
|
30
|
+
class_option :verbose, aliases: '-v', desc: 'Set log level to verbose', type: :boolean
|
31
|
+
|
32
|
+
# Thor doesn't like `run` as a command name
|
33
|
+
map run: :start
|
19
34
|
desc "run", "Start a job"
|
35
|
+
option :log_level, desc: 'Log level (debug, info, warn, error, fatal)', default: 'info'
|
36
|
+
option :verbose, aliases: '-v', desc: 'Set log level to verbose', type: :boolean
|
37
|
+
option :dry_run, desc: 'Only run the extraction and transform steps, not the loading', type: :boolean
|
20
38
|
long_desc <<-LONG_DESC
|
21
39
|
This will run an ETL job. Each job needs three parts:
|
22
40
|
|
@@ -24,23 +42,17 @@ module Chronicle
|
|
24
42
|
|
25
43
|
2. #{'Transformer'.underline}: transforms data into a new format. If none is specified, we use the `null` transformer which does nothing to the data.
|
26
44
|
|
27
|
-
3. #{'Loader'.underline}: takes that transformed data and loads it externally. This can be an API, flat files, (or by default), stdout.
|
45
|
+
3. #{'Loader'.underline}: takes that transformed data and loads it externally. This can be an API, flat files, (or by default), stdout. With the --dry-run option, this step won't be run.
|
28
46
|
|
29
47
|
If you do not want to use the command line flags, you can also configure a job with a .yml config file. You can either specify the path to this file or use the filename and place the file in ~/.config/chronicle/etl/jobs/NAME.yml and call it with `--job NAME`
|
30
48
|
LONG_DESC
|
31
49
|
# Run an ETL job
|
32
50
|
def start
|
51
|
+
setup_log_level
|
33
52
|
job_definition = build_job_definition(options)
|
34
53
|
job = Chronicle::ETL::Job.new(job_definition)
|
35
54
|
runner = Chronicle::ETL::Runner.new(job)
|
36
55
|
runner.run!
|
37
|
-
rescue Chronicle::ETL::ProviderNotAvailableError => e
|
38
|
-
warn(e.message.red)
|
39
|
-
warn(" Perhaps you haven't installed it yet: `$ gem install chronicle-#{e.provider}`")
|
40
|
-
exit(false)
|
41
|
-
rescue Chronicle::ETL::ConnectorNotAvailableError => e
|
42
|
-
warn(e.message.red)
|
43
|
-
exit(false)
|
44
56
|
end
|
45
57
|
|
46
58
|
desc "create", "Create a job"
|
@@ -48,14 +60,13 @@ LONG_DESC
|
|
48
60
|
def create
|
49
61
|
job_definition = build_job_definition(options)
|
50
62
|
path = File.join('chronicle', 'etl', 'jobs', options[:name])
|
51
|
-
Chronicle::ETL::Config.write(path, job_definition)
|
63
|
+
Chronicle::ETL::Config.write(path, job_definition.definition)
|
52
64
|
end
|
53
65
|
|
54
66
|
desc "show", "Show details about a job"
|
55
67
|
# Show an ETL job
|
56
68
|
def show
|
57
|
-
|
58
|
-
pp job_config
|
69
|
+
puts Chronicle::ETL::Job.new(build_job_definition(options))
|
59
70
|
end
|
60
71
|
|
61
72
|
desc "list", "List all available jobs"
|
@@ -73,7 +84,7 @@ LONG_DESC
|
|
73
84
|
[job, extractor, transformer, loader]
|
74
85
|
end
|
75
86
|
|
76
|
-
headers = ['name', 'extractor', 'transformer', 'loader'].map{|h| h.upcase.bold }
|
87
|
+
headers = ['name', 'extractor', 'transformer', 'loader'].map { |h| h.upcase.bold }
|
77
88
|
|
78
89
|
table = TTY::Table.new(headers, job_details)
|
79
90
|
puts table.render(indent: 0, padding: [0, 2])
|
@@ -81,11 +92,20 @@ LONG_DESC
|
|
81
92
|
|
82
93
|
private
|
83
94
|
|
95
|
+
def setup_log_level
|
96
|
+
if options[:verbose]
|
97
|
+
Chronicle::ETL::Logger.log_level = Chronicle::ETL::Logger::DEBUG
|
98
|
+
elsif options[:log_level]
|
99
|
+
level = Chronicle::ETL::Logger.const_get(options[:log_level].upcase)
|
100
|
+
Chronicle::ETL::Logger.log_level = level
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
84
104
|
# Create job definition by reading config file and then overwriting with flag options
|
85
105
|
def build_job_definition(options)
|
86
106
|
definition = Chronicle::ETL::JobDefinition.new
|
87
|
-
definition.add_config(process_flag_options(options))
|
88
107
|
definition.add_config(load_job_config(options[:name]))
|
108
|
+
definition.add_config(process_flag_options(options).transform_keys(&:to_sym))
|
89
109
|
definition
|
90
110
|
end
|
91
111
|
|
@@ -95,19 +115,34 @@ LONG_DESC
|
|
95
115
|
|
96
116
|
# Takes flag options and turns them into a runner config
|
97
117
|
def process_flag_options options
|
118
|
+
extractor_options = options[:'extractor-opts'].merge({
|
119
|
+
filename: (options[:input] if options[:input].any?),
|
120
|
+
since: options[:since],
|
121
|
+
until: options[:until],
|
122
|
+
limit: options[:limit],
|
123
|
+
}.compact)
|
124
|
+
|
125
|
+
transformer_options = options[:'transformer-opts']
|
126
|
+
|
127
|
+
loader_options = options[:'loader-opts'].merge({
|
128
|
+
output: options[:output],
|
129
|
+
fields: options[:fields]
|
130
|
+
}.compact)
|
131
|
+
|
98
132
|
{
|
133
|
+
dry_run: options[:dry_run],
|
99
134
|
extractor: {
|
100
135
|
name: options[:extractor],
|
101
|
-
options:
|
102
|
-
},
|
136
|
+
options: extractor_options
|
137
|
+
}.compact,
|
103
138
|
transformer: {
|
104
139
|
name: options[:transformer],
|
105
|
-
options:
|
106
|
-
},
|
140
|
+
options: transformer_options
|
141
|
+
}.compact,
|
107
142
|
loader: {
|
108
143
|
name: options[:loader],
|
109
|
-
options:
|
110
|
-
}
|
144
|
+
options: loader_options
|
145
|
+
}.compact
|
111
146
|
}
|
112
147
|
end
|
113
148
|
end
|
@@ -1,17 +1,10 @@
|
|
1
|
-
require 'thor'
|
2
|
-
require 'chronicle/etl'
|
3
1
|
require 'colorize'
|
4
2
|
|
5
|
-
require 'chronicle/etl/cli/subcommand_base'
|
6
|
-
require 'chronicle/etl/cli/connectors'
|
7
|
-
require 'chronicle/etl/cli/jobs'
|
8
|
-
|
9
3
|
module Chronicle
|
10
4
|
module ETL
|
11
5
|
module CLI
|
12
6
|
# Main entrypoint for CLI app
|
13
|
-
class Main < Thor
|
14
|
-
class_option "verbose", type: :boolean, default: false
|
7
|
+
class Main < ::Thor
|
15
8
|
default_task "jobs"
|
16
9
|
|
17
10
|
desc 'connectors:COMMAND', 'Connectors available for ETL jobs', hide: true
|
@@ -22,10 +15,6 @@ module Chronicle
|
|
22
15
|
|
23
16
|
# Entrypoint for the CLI
|
24
17
|
def self.start(given_args = ARGV, config = {})
|
25
|
-
if given_args.none?
|
26
|
-
abort "No command entered or job specified. To see commands, run `chronicle-etl help`".red
|
27
|
-
end
|
28
|
-
|
29
18
|
# take a subcommand:command and splits them so Thor knows how to hand off to the subcommand class
|
30
19
|
if given_args.any? && given_args[0].include?(':')
|
31
20
|
commands = given_args.shift.split(':')
|
@@ -35,10 +24,20 @@ module Chronicle
|
|
35
24
|
super(given_args, config)
|
36
25
|
end
|
37
26
|
|
27
|
+
def self.exit_on_failure?
|
28
|
+
true
|
29
|
+
end
|
30
|
+
|
31
|
+
desc "version", "Show version"
|
32
|
+
map %w(--version -v) => :version
|
33
|
+
def version
|
34
|
+
shell.say "chronicle-etl #{Chronicle::ETL::VERSION}"
|
35
|
+
end
|
36
|
+
|
38
37
|
# Displays help options for chronicle-etl
|
39
38
|
def help(meth = nil, subcommand = false)
|
40
39
|
if meth && !respond_to?(meth)
|
41
|
-
klass, task = Thor::Util.find_class_and_task_by_namespace("#{meth}:#{meth}")
|
40
|
+
klass, task = ::Thor::Util.find_class_and_task_by_namespace("#{meth}:#{meth}")
|
42
41
|
klass.start(['-h', task].compact, shell: shell)
|
43
42
|
else
|
44
43
|
shell.say "ABOUT".bold
|
@@ -52,14 +51,14 @@ module Chronicle
|
|
52
51
|
shell.say " $ chronicle-etl connectors:list"
|
53
52
|
shell.say
|
54
53
|
shell.say " Run a simple job:".italic.light_black
|
55
|
-
shell.say " $ chronicle-etl jobs:
|
54
|
+
shell.say " $ chronicle-etl jobs:run --extractor stdin --transformer null --loader stdout"
|
56
55
|
shell.say
|
57
56
|
shell.say " Show full job options:".italic.light_black
|
58
|
-
shell.say " $ chronicle-etl jobs help
|
57
|
+
shell.say " $ chronicle-etl jobs help run"
|
59
58
|
|
60
59
|
list = []
|
61
60
|
|
62
|
-
Thor::Util.thor_classes_in(Chronicle::ETL::CLI).each do |thor_class|
|
61
|
+
::Thor::Util.thor_classes_in(Chronicle::ETL::CLI).each do |thor_class|
|
63
62
|
list += thor_class.printable_tasks(false)
|
64
63
|
end
|
65
64
|
list.sort! { |a, b| a[0] <=> b[0] }
|
@@ -72,6 +71,9 @@ module Chronicle
|
|
72
71
|
shell.say "VERSION".bold
|
73
72
|
shell.say " #{Chronicle::ETL::VERSION}"
|
74
73
|
shell.say
|
74
|
+
shell.say " Display current version:".italic.light_black
|
75
|
+
shell.say " $ chronicle-etl --version"
|
76
|
+
shell.say
|
75
77
|
shell.say "FULL DOCUMENTATION".bold
|
76
78
|
shell.say " https://github.com/chronicle-app/chronicle-etl".blue
|
77
79
|
shell.say
|
@@ -2,11 +2,11 @@ module Chronicle
|
|
2
2
|
module ETL
|
3
3
|
module CLI
|
4
4
|
# Base class for CLI subcommands. Overrides Thor methods so we can use command:subcommand syntax
|
5
|
-
class SubcommandBase < Thor
|
5
|
+
class SubcommandBase < ::Thor
|
6
6
|
# Print usage instructions for a subcommand
|
7
7
|
def self.help(shell, subcommand = false)
|
8
8
|
list = printable_commands(true, subcommand)
|
9
|
-
Thor::Util.thor_classes_in(self).each do |klass|
|
9
|
+
::Thor::Util.thor_classes_in(self).each do |klass|
|
10
10
|
list += klass.printable_commands(false)
|
11
11
|
end
|
12
12
|
list.sort! { |a, b| a[0] <=> b[0] }
|
data/lib/chronicle/etl/config.rb
CHANGED
@@ -30,7 +30,7 @@ module Chronicle
|
|
30
30
|
end
|
31
31
|
end
|
32
32
|
|
33
|
-
# Returns all available credentials available in ~/.config/
|
33
|
+
# Returns all available credentials available in ~/.config/chronicle/etl/credentials/*.yml
|
34
34
|
def available_credentials
|
35
35
|
job_directory = Runcom::Config.new('chronicle/etl/credentials').current
|
36
36
|
Dir.glob(File.join(job_directory, "*.yml")).map do |filename|
|