chronicle-etl 0.1.1 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +8 -0
- data/.ruby-version +1 -0
- data/.yardopts +1 -0
- data/CHANGELOG.md +23 -0
- data/Gemfile.lock +42 -10
- data/README.md +64 -11
- data/bin/console +16 -4
- data/chronicle-etl.gemspec +9 -7
- data/exe/chronicle-etl +2 -2
- data/lib/chronicle/etl.rb +5 -2
- data/lib/chronicle/etl/catalog.rb +62 -0
- data/lib/chronicle/etl/cli/connectors.rb +32 -0
- data/lib/chronicle/etl/cli/jobs.rb +111 -0
- data/lib/chronicle/etl/cli/main.rb +83 -0
- data/lib/chronicle/etl/cli/subcommand_base.rb +37 -0
- data/lib/chronicle/etl/config.rb +32 -0
- data/lib/chronicle/etl/extractors/{csv.rb → csv_extractor.rb} +3 -3
- data/lib/chronicle/etl/extractors/extractor.rb +23 -12
- data/lib/chronicle/etl/extractors/file_extractor.rb +52 -0
- data/lib/chronicle/etl/extractors/stdin_extractor.rb +11 -0
- data/lib/chronicle/etl/loaders/csv_loader.rb +29 -0
- data/lib/chronicle/etl/loaders/loader.rb +23 -16
- data/lib/chronicle/etl/loaders/rest_loader.rb +30 -0
- data/lib/chronicle/etl/loaders/stdout_loader.rb +9 -0
- data/lib/chronicle/etl/loaders/table_loader.rb +21 -0
- data/lib/chronicle/etl/runner.rb +33 -11
- data/lib/chronicle/etl/transformers/json_transformer.rb +11 -0
- data/lib/chronicle/etl/transformers/null_transformer.rb +10 -0
- data/lib/chronicle/etl/transformers/transformer.rb +27 -11
- data/lib/chronicle/etl/utils/progress_bar.rb +76 -0
- data/lib/chronicle/etl/version.rb +2 -2
- metadata +69 -30
- data/lib/chronicle/etl/cli.rb +0 -38
- data/lib/chronicle/etl/extractors/stdin.rb +0 -13
- data/lib/chronicle/etl/loaders/csv.rb +0 -31
- data/lib/chronicle/etl/loaders/stdout.rb +0 -11
- data/lib/chronicle/etl/loaders/table.rb +0 -22
- data/lib/chronicle/etl/transformers/json.rb +0 -13
- data/lib/chronicle/etl/transformers/null.rb +0 -11
- data/lib/chronicle/etl/utils/progress_bar_wrapper.rb +0 -43
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3d9be4b073385d9758a8b709bb6726d8dae648b8c4dbef474343840c011d1178
|
4
|
+
data.tar.gz: 0d95395407d37f7e322287a0920bba60e1b4f81eb8649190d021e13580604a2d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 386c96518aa2d2810ae2a93bbe3af5bb08e26e132608b4e6ed8a278da076783e453854a2120c5016b6d02cd5dea406146d10ef3c7c1e77d854acd8ff2608eaf7
|
7
|
+
data.tar.gz: eb14402be5d6db44a6f06e6ec930acc5103b36e5c2e5a13e89137c9ee45f5f11c1e9e6ab13d4d44e6ee06bd9b02309e02ac33e81256645ef71d5b431c97eb199
|
data/.rubocop.yml
ADDED
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
2.7.1
|
data/.yardopts
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--markup=markdown
|
data/CHANGELOG.md
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# Changelog
|
2
|
+
|
3
|
+
This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
4
|
+
|
5
|
+
## [0.1.4] - 2020-08-18
|
6
|
+
### Updated
|
7
|
+
- Better display of available ETL classes
|
8
|
+
- Updated documentation
|
9
|
+
|
10
|
+
## [0.1.3] - 2020-08-13
|
11
|
+
### Added
|
12
|
+
- Ability to list all available ETL classes
|
13
|
+
- Refactored E, T, L module and class structure
|
14
|
+
- Better progress bar
|
15
|
+
|
16
|
+
## [0.1.2] - 2020-08-02
|
17
|
+
### Added
|
18
|
+
- This changelog
|
19
|
+
- Ability to use extractors, transformers, and loaders from other gems
|
20
|
+
|
21
|
+
## [0.1.0] - 2020-08-01
|
22
|
+
### Added
|
23
|
+
- Basic job runner and ETL classes
|
data/Gemfile.lock
CHANGED
@@ -1,11 +1,11 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
chronicle-etl (0.
|
4
|
+
chronicle-etl (0.2.1)
|
5
5
|
colorize (~> 0.8.1)
|
6
|
-
ruby-progressbar (~> 1.10)
|
7
|
-
table_print
|
8
6
|
thor (~> 0.20)
|
7
|
+
tty-progressbar (~> 0.17)
|
8
|
+
tty-table (~> 0.11)
|
9
9
|
|
10
10
|
GEM
|
11
11
|
remote: https://rubygems.org/
|
@@ -14,14 +14,21 @@ GEM
|
|
14
14
|
coderay (1.1.3)
|
15
15
|
colorize (0.8.1)
|
16
16
|
diff-lcs (1.4.4)
|
17
|
+
equatable (0.6.1)
|
17
18
|
method_source (1.0.0)
|
19
|
+
necromancer (0.6.0)
|
20
|
+
pastel (0.7.4)
|
21
|
+
equatable (~> 0.6)
|
22
|
+
tty-color (~> 0.5)
|
18
23
|
pry (0.13.1)
|
19
24
|
coderay (~> 1.1)
|
20
25
|
method_source (~> 1.0)
|
21
26
|
pry-byebug (3.9.0)
|
22
27
|
byebug (~> 11.0)
|
23
28
|
pry (~> 0.13.0)
|
24
|
-
rake (
|
29
|
+
rake (13.0.1)
|
30
|
+
redcarpet (3.5.0)
|
31
|
+
refinements (7.7.0)
|
25
32
|
rspec (3.9.0)
|
26
33
|
rspec-core (~> 3.9.0)
|
27
34
|
rspec-expectations (~> 3.9.0)
|
@@ -35,19 +42,44 @@ GEM
|
|
35
42
|
diff-lcs (>= 1.2.0, < 2.0)
|
36
43
|
rspec-support (~> 3.9.0)
|
37
44
|
rspec-support (3.9.3)
|
38
|
-
|
39
|
-
|
45
|
+
runcom (6.2.0)
|
46
|
+
refinements (~> 7.4)
|
47
|
+
xdg (~> 4.0)
|
48
|
+
strings (0.1.8)
|
49
|
+
strings-ansi (~> 0.1)
|
50
|
+
unicode-display_width (~> 1.5)
|
51
|
+
unicode_utils (~> 1.4)
|
52
|
+
strings-ansi (0.1.0)
|
40
53
|
thor (0.20.3)
|
54
|
+
tty-color (0.5.2)
|
55
|
+
tty-cursor (0.7.1)
|
56
|
+
tty-progressbar (0.17.0)
|
57
|
+
strings-ansi (~> 0.1.0)
|
58
|
+
tty-cursor (~> 0.7)
|
59
|
+
tty-screen (~> 0.7)
|
60
|
+
unicode-display_width (~> 1.6)
|
61
|
+
tty-screen (0.8.1)
|
62
|
+
tty-table (0.11.0)
|
63
|
+
equatable (~> 0.6)
|
64
|
+
necromancer (~> 0.5)
|
65
|
+
pastel (~> 0.7.2)
|
66
|
+
strings (~> 0.1.5)
|
67
|
+
tty-screen (~> 0.7)
|
68
|
+
unicode-display_width (1.7.0)
|
69
|
+
unicode_utils (1.4.0)
|
70
|
+
xdg (4.2.0)
|
41
71
|
|
42
72
|
PLATFORMS
|
43
73
|
ruby
|
44
74
|
|
45
75
|
DEPENDENCIES
|
46
|
-
bundler (~> 1
|
76
|
+
bundler (~> 2.1)
|
47
77
|
chronicle-etl!
|
48
78
|
pry-byebug (~> 3.9)
|
49
|
-
rake (~>
|
50
|
-
|
79
|
+
rake (~> 13.0)
|
80
|
+
redcarpet (~> 3.5)
|
81
|
+
rspec (~> 3.9)
|
82
|
+
runcom (~> 6.2)
|
51
83
|
|
52
84
|
BUNDLED WITH
|
53
|
-
1.
|
85
|
+
2.1.4
|
data/README.md
CHANGED
@@ -1,6 +1,10 @@
|
|
1
|
-
# Chronicle::
|
1
|
+
# Chronicle::ETL
|
2
2
|
|
3
|
-
|
3
|
+
[](https://badge.fury.io/rb/chronicle-etl)
|
4
|
+
|
5
|
+
Chronicle ETL is a utility tool for archiving and processing personal data. You can extract it from a variety of source, transform it, and load it to different APIs or file formats.
|
6
|
+
|
7
|
+
This project is an adaptation of Andrew Louis's experimental [Memex project](https://hyfen.net/memex).
|
4
8
|
|
5
9
|
## Installation
|
6
10
|
|
@@ -8,28 +12,77 @@ Chronicle ETL is a utility tool for manipulating personal data. You can extract
|
|
8
12
|
$ gem install chronicle-etl
|
9
13
|
```
|
10
14
|
|
11
|
-
##
|
15
|
+
## Usage
|
12
16
|
|
13
17
|
After installing the gem, `chronicle-etl` is available to run in your shell.
|
14
18
|
|
19
|
+
```bash
|
20
|
+
# read test.csv and display it as a table
|
21
|
+
$ chronicle-etl jobs:run --extractor csv --extractor-opts filename:test.csv --loader table
|
22
|
+
|
23
|
+
# Display help for the jobs:run command
|
24
|
+
$ chronicle-etl jobs help run
|
15
25
|
```
|
16
|
-
|
17
|
-
|
26
|
+
|
27
|
+
## Connectors
|
28
|
+
|
29
|
+
Connectors are available to read, process, and load data from different formats or external services.
|
30
|
+
|
31
|
+
```bash
|
32
|
+
# List all available connectors
|
33
|
+
$ chronicle-etl connectors:list
|
18
34
|
```
|
19
35
|
|
20
|
-
|
36
|
+
Built in connectors:
|
37
|
+
|
38
|
+
### Extractors
|
39
|
+
- `stdin` - (default) Load records from line-separated stdin
|
40
|
+
- `csv`
|
41
|
+
- `file` - load from a single file or directory (with a glob pattern)
|
42
|
+
|
43
|
+
### Transformers
|
44
|
+
- `null` - (default) Don't do anything
|
45
|
+
|
46
|
+
### Loaders
|
47
|
+
- `stdout` - (default) output transformed records to stdount
|
48
|
+
- `csv` - Load records to a csv file
|
49
|
+
- `table` - Output an ascii table of records. Useful for debugging.
|
50
|
+
|
51
|
+
### Provider-specific importers
|
52
|
+
|
53
|
+
In addition to the built-in importers, importers for third-party platforms are available. They are packaged as individual Ruby gems.
|
54
|
+
|
55
|
+
- [email](https://github.com/chronicle-app/chronicle-email). Extractors for `mbox` files. Transformers for chronicle schema
|
56
|
+
- [bash](https://github.com/chronicle-app/chronicle-bash). Extract bash history from `~/.bash_history`. Transform it for chronicle schema
|
57
|
+
|
58
|
+
To install any of these, run `gem install chronicle-PROVIDER`.
|
59
|
+
|
60
|
+
If you don't want to use the available rubygem importers, `chronicle-etl` can use `stdin` as an Extractor source (newline separated records). You can also use `stdout` as a loader — transformed records will be outputted separated by newlines.
|
61
|
+
|
62
|
+
I'll be open-sourcing more importers. Please [contact me](mailto:andrew@hyfen.net) to chat about what will be available!
|
63
|
+
|
64
|
+
### Full commands
|
21
65
|
|
22
66
|
```
|
23
|
-
|
24
|
-
|
25
|
-
|
67
|
+
$ chronicle-etl help
|
68
|
+
|
69
|
+
ALL COMMANDS
|
70
|
+
help # This help menu
|
71
|
+
connectors help [COMMAND] # Describe subcommands or one specific subcommand
|
72
|
+
connectors:install NAME # Installs connector NAME
|
73
|
+
connectors:list # Lists available connectors
|
74
|
+
jobs help [COMMAND] # Describe subcommands or one specific subcommand
|
75
|
+
jobs:create # Create a job
|
76
|
+
jobs:list # List all available jobs
|
77
|
+
jobs:run # Start a job
|
78
|
+
jobs:show # Show a job
|
26
79
|
```
|
27
80
|
|
28
81
|
### Job options
|
29
82
|
|
30
83
|
```
|
31
84
|
Usage:
|
32
|
-
chronicle-etl
|
85
|
+
chronicle-etl jobs:run
|
33
86
|
|
34
87
|
Options:
|
35
88
|
-e, [--extractor=extractor-name] # Extractor class (available: stdin, csv, file)
|
@@ -62,4 +115,4 @@ The gem is available as open source under the terms of the [MIT License](https:/
|
|
62
115
|
|
63
116
|
## Code of Conduct
|
64
117
|
|
65
|
-
Everyone interacting in the Chronicle::
|
118
|
+
Everyone interacting in the Chronicle::ETL project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/chronicle-app/chronicle-etl/blob/master/CODE_OF_CONDUCT.md).
|
data/bin/console
CHANGED
@@ -7,8 +7,20 @@ require "chronicle/etl"
|
|
7
7
|
# with your gem easier. You can also use a different console, if you like.
|
8
8
|
|
9
9
|
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
-
|
11
|
-
|
10
|
+
require "pry"
|
11
|
+
Pry.start
|
12
|
+
|
13
|
+
def reload!(print = true)
|
14
|
+
puts 'Reloading ...' if print
|
15
|
+
# Main project directory.
|
16
|
+
root_dir = File.expand_path('..', __dir__)
|
17
|
+
# Directories within the project that should be reloaded.
|
18
|
+
reload_dirs = %w{lib}
|
19
|
+
# Loop through and reload every file in all relevant project directories.
|
20
|
+
reload_dirs.each do |dir|
|
21
|
+
Dir.glob("#{root_dir}/#{dir}/**/*.rb").each { |f| load(f) }
|
22
|
+
end
|
23
|
+
# Return true when complete.
|
24
|
+
true
|
25
|
+
end
|
12
26
|
|
13
|
-
require "irb"
|
14
|
-
IRB.start(__FILE__)
|
data/chronicle-etl.gemspec
CHANGED
@@ -5,7 +5,7 @@ require "chronicle/etl/version"
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |spec|
|
7
7
|
spec.name = "chronicle-etl"
|
8
|
-
spec.version = Chronicle::
|
8
|
+
spec.version = Chronicle::ETL::VERSION
|
9
9
|
spec.authors = ["Andrew Louis"]
|
10
10
|
spec.email = ["andrew@hyfen.net"]
|
11
11
|
|
@@ -21,7 +21,7 @@ Gem::Specification.new do |spec|
|
|
21
21
|
|
22
22
|
spec.metadata["homepage_uri"] = spec.homepage
|
23
23
|
spec.metadata["source_code_uri"] = "https://github.com/chronicle-app/chronicle-etl"
|
24
|
-
spec.metadata["changelog_uri"] = "https://github.com/chronicle-app/chronicle-etl"
|
24
|
+
spec.metadata["changelog_uri"] = "https://github.com/chronicle-app/chronicle-etl/blob/master/CHANGELOG.md"
|
25
25
|
else
|
26
26
|
raise "RubyGems 2.0 or newer is required to protect against " \
|
27
27
|
"public gem pushes."
|
@@ -38,11 +38,13 @@ Gem::Specification.new do |spec|
|
|
38
38
|
|
39
39
|
spec.add_dependency "thor", "~> 0.20"
|
40
40
|
spec.add_dependency "colorize", "~> 0.8.1"
|
41
|
-
spec.add_dependency "
|
42
|
-
spec.add_dependency "
|
41
|
+
spec.add_dependency "tty-table", "~> 0.11"
|
42
|
+
spec.add_dependency "tty-progressbar", "~> 0.17"
|
43
43
|
|
44
|
-
spec.add_development_dependency "bundler", "~> 1
|
45
|
-
spec.add_development_dependency "rake", "~>
|
46
|
-
spec.add_development_dependency "rspec", "~> 3.
|
44
|
+
spec.add_development_dependency "bundler", "~> 2.1"
|
45
|
+
spec.add_development_dependency "rake", "~> 13.0"
|
46
|
+
spec.add_development_dependency "rspec", "~> 3.9"
|
47
47
|
spec.add_development_dependency "pry-byebug", "~> 3.9"
|
48
|
+
spec.add_development_dependency 'runcom', '~> 6.2'
|
49
|
+
spec.add_development_dependency 'redcarpet', '~> 3.5'
|
48
50
|
end
|
data/exe/chronicle-etl
CHANGED
data/lib/chronicle/etl.rb
CHANGED
@@ -1,5 +1,8 @@
|
|
1
|
+
require_relative 'etl/catalog'
|
2
|
+
require_relative 'etl/config'
|
1
3
|
require_relative 'etl/extractors/extractor'
|
2
|
-
require_relative 'etl/transformers/transformer'
|
3
4
|
require_relative 'etl/loaders/loader'
|
4
|
-
require_relative 'etl/utils/progress_bar_wrapper'
|
5
5
|
require_relative 'etl/runner'
|
6
|
+
require_relative 'etl/transformers/transformer'
|
7
|
+
require_relative 'etl/utils/progress_bar'
|
8
|
+
require_relative 'etl/version'
|
@@ -0,0 +1,62 @@
|
|
1
|
+
module Chronicle
|
2
|
+
module ETL
|
3
|
+
# Utility methods to catalogue which Extractor, Transformer, and
|
4
|
+
# Loader connector classes are available to chronicle-etl
|
5
|
+
module Catalog
|
6
|
+
PLUGINS = ['email', 'bash']
|
7
|
+
|
8
|
+
# Return which ETL connectors are available, both built in and externally-defined
|
9
|
+
def self.available_classes
|
10
|
+
# TODO: have a registry of plugins
|
11
|
+
|
12
|
+
# Attempt to load each chronicle plugin that we might know about so
|
13
|
+
# that we can later search for subclasses to build our list of
|
14
|
+
# available classes
|
15
|
+
PLUGINS.each do |plugin|
|
16
|
+
require "chronicle/#{plugin}"
|
17
|
+
rescue LoadError
|
18
|
+
# this will happen if the gem isn't available globally
|
19
|
+
end
|
20
|
+
|
21
|
+
parent_klasses = [
|
22
|
+
::Chronicle::ETL::Extractor,
|
23
|
+
::Chronicle::ETL::Transformer,
|
24
|
+
::Chronicle::ETL::Loader
|
25
|
+
]
|
26
|
+
klasses = []
|
27
|
+
parent_klasses.map do |parent|
|
28
|
+
klasses += ::ObjectSpace.each_object(::Class).select { |klass| klass < parent }
|
29
|
+
end
|
30
|
+
|
31
|
+
klasses.map do |klass|
|
32
|
+
{
|
33
|
+
name: klass.name,
|
34
|
+
built_in: klass.built_in?,
|
35
|
+
provider: klass.provider,
|
36
|
+
phase: klass.phase
|
37
|
+
}
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# Returns whether a class is an Extractor, Transformer, or Loader
|
42
|
+
def phase
|
43
|
+
ancestors = self.ancestors
|
44
|
+
return :extractor if ancestors.include? Chronicle::ETL::Extractor
|
45
|
+
return :transformer if ancestors.include? Chronicle::ETL::Transformer
|
46
|
+
return :loader if ancestors.include? Chronicle::ETL::Loader
|
47
|
+
end
|
48
|
+
|
49
|
+
# Returns which third-party provider this connector is associated wtih
|
50
|
+
def provider
|
51
|
+
# TODO: needs better convention for a gem reporting its provider name
|
52
|
+
provider = to_s.split('::')[1].downcase
|
53
|
+
provider == 'etl' ? 'chronicle' : provider
|
54
|
+
end
|
55
|
+
|
56
|
+
# Returns whether this connector is a built-in one
|
57
|
+
def built_in?
|
58
|
+
to_s.include? 'Chronicle::ETL'
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module Chronicle
|
2
|
+
module ETL
|
3
|
+
module CLI
|
4
|
+
# CLI commands for working with ETL connectors
|
5
|
+
class Connectors < SubcommandBase
|
6
|
+
default_task 'list'
|
7
|
+
namespace :connectors
|
8
|
+
|
9
|
+
desc "install NAME", "Installs connector NAME"
|
10
|
+
def install
|
11
|
+
puts "Installing"
|
12
|
+
end
|
13
|
+
|
14
|
+
desc "list", "Lists available connectors"
|
15
|
+
# Display all available connectors that chronicle-etl has access to
|
16
|
+
def list
|
17
|
+
klasses = Chronicle::ETL::Catalog.available_classes
|
18
|
+
klasses = klasses.sort_by do |a|
|
19
|
+
[a[:built_in].to_s, a[:provider], a[:phase]]
|
20
|
+
end
|
21
|
+
|
22
|
+
headers = klasses.first.keys.map do |key|
|
23
|
+
key.to_s.upcase.bold
|
24
|
+
end
|
25
|
+
|
26
|
+
table = TTY::Table.new(headers, klasses.map(&:values))
|
27
|
+
puts table.render(indent: 0, padding: [0, 2])
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,111 @@
|
|
1
|
+
require 'pp'
|
2
|
+
require 'pry'
|
3
|
+
|
4
|
+
module Chronicle
|
5
|
+
module ETL
|
6
|
+
module CLI
|
7
|
+
# CLI commands for working with ETL jobs
|
8
|
+
class Jobs < SubcommandBase
|
9
|
+
default_task "start"
|
10
|
+
namespace :jobs
|
11
|
+
|
12
|
+
class_option :extractor, aliases: '-e', desc: 'Extractor class (available: stdin, csv, file)', default: 'stdin', banner: 'extractor-name'
|
13
|
+
class_option :'extractor-opts', desc: 'Extractor options', type: :hash, default: {}
|
14
|
+
class_option :transformer, aliases: '-t', desc: 'Transformer class (available: null)', default: 'null', banner: 'transformer-name'
|
15
|
+
class_option :'transformer-opts', desc: 'Transformer options', type: :hash, default: {}
|
16
|
+
class_option :loader, aliases: '-l', desc: 'Loader class (available: stdout, csv, table)', default: 'stdout', banner: 'loader-name'
|
17
|
+
class_option :'loader-opts', desc: 'Loader options', type: :hash, default: {}
|
18
|
+
class_option :job, aliases: '-j', desc: 'Job configuration name (or filename)'
|
19
|
+
|
20
|
+
map run: :start # Thor doesn't like `run` as a command name
|
21
|
+
desc "run", "Start a job"
|
22
|
+
long_desc <<-LONG_DESC
|
23
|
+
This will run an ETL job. Each job needs three parts:
|
24
|
+
|
25
|
+
1. #{'Extractor'.underline}: pulls data from an external source. By default, this is stdout. Other common options including pulling data from an API or reading JSON from a file.
|
26
|
+
|
27
|
+
2. #{'Transformer'.underline}: transforms data into a new format. If none is specified, we use the `null` transformer which does nothing to the data.
|
28
|
+
|
29
|
+
3. #{'Loader'.underline}: takes that transformed data and loads it externally. This can be an API, flat files, (or by default), stdout.
|
30
|
+
|
31
|
+
If you do not want to use the command line flags, you can also configure a job with a .yml config file. You can either specify the path to this file or use the filename and place the file in ~/.config/chronicle/etl/jobs/NAME.yml and call it with `--job NAME`
|
32
|
+
LONG_DESC
|
33
|
+
# Run an ETL job
|
34
|
+
def start
|
35
|
+
runner_options = build_runner_options(options)
|
36
|
+
runner = Chronicle::ETL::Runner.new(runner_options)
|
37
|
+
runner.run!
|
38
|
+
end
|
39
|
+
|
40
|
+
desc "create", "Create a job"
|
41
|
+
# Create an ETL job
|
42
|
+
def create
|
43
|
+
runner_options = build_runner_options(options)
|
44
|
+
path = File.join('chronicle', 'etl', 'jobs', options[:job])
|
45
|
+
Chronicle::ETL::Config.write(path, runner_options)
|
46
|
+
end
|
47
|
+
|
48
|
+
desc "show", "Show details about a job"
|
49
|
+
# Show an ETL job
|
50
|
+
def show
|
51
|
+
runner_options = build_runner_options(options)
|
52
|
+
pp runner_options
|
53
|
+
end
|
54
|
+
|
55
|
+
desc "list", "List all available jobs"
|
56
|
+
# List available ETL jobs
|
57
|
+
def list
|
58
|
+
jobs = Chronicle::ETL::Config.jobs
|
59
|
+
|
60
|
+
job_details = jobs.map do |job|
|
61
|
+
r = Chronicle::ETL::Config.load("chronicle/etl/jobs/#{job}.yml")
|
62
|
+
|
63
|
+
extractor = r[:extractor][:name] if r[:extractor]
|
64
|
+
transformer = r[:transformer][:name] if r[:transformer]
|
65
|
+
loader = r[:loader][:name] if r[:loader]
|
66
|
+
|
67
|
+
[job, extractor, transformer, loader]
|
68
|
+
end
|
69
|
+
|
70
|
+
headers = ['name', 'extractor', 'transformer', 'loader'].map{|h| h.upcase.bold }
|
71
|
+
|
72
|
+
table = TTY::Table.new(headers, job_details)
|
73
|
+
puts table.render(indent: 0, padding: [0, 2])
|
74
|
+
end
|
75
|
+
|
76
|
+
private
|
77
|
+
|
78
|
+
# Create runner options by reading config file and then overwriting with flag options
|
79
|
+
def build_runner_options options
|
80
|
+
flag_options = process_flag_options(options)
|
81
|
+
job_options = load_job(options[:job])
|
82
|
+
flag_options.merge(job_options)
|
83
|
+
end
|
84
|
+
|
85
|
+
def load_job job
|
86
|
+
yml_config = Chronicle::ETL::Config.load("chronicle/etl/jobs/#{job}.yml")
|
87
|
+
# FIXME: use better trick to depely symbolize keys
|
88
|
+
JSON.parse(yml_config.to_json, symbolize_names: true)
|
89
|
+
end
|
90
|
+
|
91
|
+
# Takes flag options and turns them into a runner config
|
92
|
+
def process_flag_options options
|
93
|
+
{
|
94
|
+
extractor: {
|
95
|
+
name: options[:extractor],
|
96
|
+
options: options[:'extractor-opts']
|
97
|
+
},
|
98
|
+
transformer: {
|
99
|
+
name: options[:transformer],
|
100
|
+
options: options[:'transformer-opts']
|
101
|
+
},
|
102
|
+
loader: {
|
103
|
+
name: options[:loader],
|
104
|
+
options: options[:'loader-opts']
|
105
|
+
}
|
106
|
+
}
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|