chronicle-etl 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +8 -0
- data/.ruby-version +1 -0
- data/.yardopts +1 -0
- data/CHANGELOG.md +23 -0
- data/Gemfile.lock +42 -10
- data/README.md +64 -11
- data/bin/console +16 -4
- data/chronicle-etl.gemspec +9 -7
- data/exe/chronicle-etl +2 -2
- data/lib/chronicle/etl.rb +5 -2
- data/lib/chronicle/etl/catalog.rb +62 -0
- data/lib/chronicle/etl/cli/connectors.rb +32 -0
- data/lib/chronicle/etl/cli/jobs.rb +111 -0
- data/lib/chronicle/etl/cli/main.rb +83 -0
- data/lib/chronicle/etl/cli/subcommand_base.rb +37 -0
- data/lib/chronicle/etl/config.rb +32 -0
- data/lib/chronicle/etl/extractors/{csv.rb → csv_extractor.rb} +3 -5
- data/lib/chronicle/etl/extractors/extractor.rb +23 -12
- data/lib/chronicle/etl/extractors/file_extractor.rb +52 -0
- data/lib/chronicle/etl/extractors/stdin_extractor.rb +11 -0
- data/lib/chronicle/etl/loaders/csv_loader.rb +29 -0
- data/lib/chronicle/etl/loaders/loader.rb +22 -16
- data/lib/chronicle/etl/loaders/stdout_loader.rb +9 -0
- data/lib/chronicle/etl/loaders/table_loader.rb +21 -0
- data/lib/chronicle/etl/runner.rb +33 -11
- data/lib/chronicle/etl/transformers/json_transformer.rb +11 -0
- data/lib/chronicle/etl/transformers/null_transformer.rb +10 -0
- data/lib/chronicle/etl/transformers/transformer.rb +27 -11
- data/lib/chronicle/etl/utils/progress_bar.rb +76 -0
- data/lib/chronicle/etl/version.rb +2 -2
- metadata +68 -30
- data/lib/chronicle/etl/cli.rb +0 -38
- data/lib/chronicle/etl/extractors/stdin.rb +0 -13
- data/lib/chronicle/etl/loaders/csv.rb +0 -31
- data/lib/chronicle/etl/loaders/stdout.rb +0 -13
- data/lib/chronicle/etl/loaders/table.rb +0 -22
- data/lib/chronicle/etl/transformers/json.rb +0 -13
- data/lib/chronicle/etl/transformers/null.rb +0 -11
- data/lib/chronicle/etl/utils/progress_bar_wrapper.rb +0 -43
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 77abdc6f98e01300e0cf0ff4c8737fdc3bfe395754e9ddba1b6d3de86f2d6be8
|
4
|
+
data.tar.gz: 4a76565cfe9448b8ee7a6aa253a98923d7beccf02cc1ea7c8bacf3e7f7ab88ab
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cc6aaafde633a7316e26a23984f6a6965977e7bba388d2bb09751d3ef8ba0ed6a1ff78e50816af3a9806d5b03720fc524a270e228be0cc2894298acbbc342155
|
7
|
+
data.tar.gz: 32dd999d57c307b9e57db3b877362b0e780f8a618d5c573a5295241ec47dee65472b1e98414e4cb0959d978f83cb7cfde1177c9fb1d11c54e5edf1ca5ba4419a
|
data/.rubocop.yml
ADDED
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
2.7.1
|
data/.yardopts
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--markup=markdown
|
data/CHANGELOG.md
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# Changelog
|
2
|
+
|
3
|
+
This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
4
|
+
|
5
|
+
## [0.1.4] - 2020-08-18
|
6
|
+
### Updated
|
7
|
+
- Better display of available ETL classes
|
8
|
+
- Updated documentation
|
9
|
+
|
10
|
+
## [0.1.3] - 2020-08-13
|
11
|
+
### Added
|
12
|
+
- Ability to list all available ETL classes
|
13
|
+
- Refactored E, T, L module and class structure
|
14
|
+
- Better progress bar
|
15
|
+
|
16
|
+
## [0.1.2] - 2020-08-02
|
17
|
+
### Added
|
18
|
+
- This changelog
|
19
|
+
- Ability to use extractors, transformers, and loaders from other gems
|
20
|
+
|
21
|
+
## [0.1.0] - 2020-08-01
|
22
|
+
### Added
|
23
|
+
- Basic job runner and ETL classes
|
data/Gemfile.lock
CHANGED
@@ -1,11 +1,11 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
chronicle-etl (0.
|
4
|
+
chronicle-etl (0.2.0)
|
5
5
|
colorize (~> 0.8.1)
|
6
|
-
ruby-progressbar (~> 1.10)
|
7
|
-
table_print
|
8
6
|
thor (~> 0.20)
|
7
|
+
tty-progressbar (~> 0.17)
|
8
|
+
tty-table (~> 0.11)
|
9
9
|
|
10
10
|
GEM
|
11
11
|
remote: https://rubygems.org/
|
@@ -14,14 +14,21 @@ GEM
|
|
14
14
|
coderay (1.1.3)
|
15
15
|
colorize (0.8.1)
|
16
16
|
diff-lcs (1.4.4)
|
17
|
+
equatable (0.6.1)
|
17
18
|
method_source (1.0.0)
|
19
|
+
necromancer (0.6.0)
|
20
|
+
pastel (0.7.4)
|
21
|
+
equatable (~> 0.6)
|
22
|
+
tty-color (~> 0.5)
|
18
23
|
pry (0.13.1)
|
19
24
|
coderay (~> 1.1)
|
20
25
|
method_source (~> 1.0)
|
21
26
|
pry-byebug (3.9.0)
|
22
27
|
byebug (~> 11.0)
|
23
28
|
pry (~> 0.13.0)
|
24
|
-
rake (
|
29
|
+
rake (13.0.1)
|
30
|
+
redcarpet (3.5.0)
|
31
|
+
refinements (7.7.0)
|
25
32
|
rspec (3.9.0)
|
26
33
|
rspec-core (~> 3.9.0)
|
27
34
|
rspec-expectations (~> 3.9.0)
|
@@ -35,19 +42,44 @@ GEM
|
|
35
42
|
diff-lcs (>= 1.2.0, < 2.0)
|
36
43
|
rspec-support (~> 3.9.0)
|
37
44
|
rspec-support (3.9.3)
|
38
|
-
|
39
|
-
|
45
|
+
runcom (6.2.0)
|
46
|
+
refinements (~> 7.4)
|
47
|
+
xdg (~> 4.0)
|
48
|
+
strings (0.1.8)
|
49
|
+
strings-ansi (~> 0.1)
|
50
|
+
unicode-display_width (~> 1.5)
|
51
|
+
unicode_utils (~> 1.4)
|
52
|
+
strings-ansi (0.1.0)
|
40
53
|
thor (0.20.3)
|
54
|
+
tty-color (0.5.2)
|
55
|
+
tty-cursor (0.7.1)
|
56
|
+
tty-progressbar (0.17.0)
|
57
|
+
strings-ansi (~> 0.1.0)
|
58
|
+
tty-cursor (~> 0.7)
|
59
|
+
tty-screen (~> 0.7)
|
60
|
+
unicode-display_width (~> 1.6)
|
61
|
+
tty-screen (0.8.1)
|
62
|
+
tty-table (0.11.0)
|
63
|
+
equatable (~> 0.6)
|
64
|
+
necromancer (~> 0.5)
|
65
|
+
pastel (~> 0.7.2)
|
66
|
+
strings (~> 0.1.5)
|
67
|
+
tty-screen (~> 0.7)
|
68
|
+
unicode-display_width (1.7.0)
|
69
|
+
unicode_utils (1.4.0)
|
70
|
+
xdg (4.2.0)
|
41
71
|
|
42
72
|
PLATFORMS
|
43
73
|
ruby
|
44
74
|
|
45
75
|
DEPENDENCIES
|
46
|
-
bundler (~> 1
|
76
|
+
bundler (~> 2.1)
|
47
77
|
chronicle-etl!
|
48
78
|
pry-byebug (~> 3.9)
|
49
|
-
rake (~>
|
50
|
-
|
79
|
+
rake (~> 13.0)
|
80
|
+
redcarpet (~> 3.5)
|
81
|
+
rspec (~> 3.9)
|
82
|
+
runcom (~> 6.2)
|
51
83
|
|
52
84
|
BUNDLED WITH
|
53
|
-
1.
|
85
|
+
2.1.4
|
data/README.md
CHANGED
@@ -1,6 +1,10 @@
|
|
1
|
-
# Chronicle::
|
1
|
+
# Chronicle::ETL
|
2
2
|
|
3
|
-
|
3
|
+
[![Gem Version](https://badge.fury.io/rb/chronicle-etl.svg)](https://badge.fury.io/rb/chronicle-etl)
|
4
|
+
|
5
|
+
Chronicle ETL is a utility tool for archiving and processing personal data. You can extract it from a variety of source, transform it, and load it to different APIs or file formats.
|
6
|
+
|
7
|
+
This project is an adaptation of Andrew Louis's experimental [Memex project](https://hyfen.net/memex).
|
4
8
|
|
5
9
|
## Installation
|
6
10
|
|
@@ -8,28 +12,77 @@ Chronicle ETL is a utility tool for manipulating personal data. You can extract
|
|
8
12
|
$ gem install chronicle-etl
|
9
13
|
```
|
10
14
|
|
11
|
-
##
|
15
|
+
## Usage
|
12
16
|
|
13
17
|
After installing the gem, `chronicle-etl` is available to run in your shell.
|
14
18
|
|
19
|
+
```bash
|
20
|
+
# read test.csv and display it as a table
|
21
|
+
$ chronicle-etl jobs:run --extractor csv --extractor-opts filename:test.csv --loader table
|
22
|
+
|
23
|
+
# Display help for the jobs:run command
|
24
|
+
$ chronicle-etl jobs help run
|
15
25
|
```
|
16
|
-
|
17
|
-
|
26
|
+
|
27
|
+
## Connectors
|
28
|
+
|
29
|
+
Connectors are available to read, process, and load data from different formats or external services.
|
30
|
+
|
31
|
+
```bash
|
32
|
+
# List all available connectors
|
33
|
+
$ chronicle-etl connectors:list
|
18
34
|
```
|
19
35
|
|
20
|
-
|
36
|
+
Built in connectors:
|
37
|
+
|
38
|
+
### Extractors
|
39
|
+
- `stdin` - (default) Load records from line-separated stdin
|
40
|
+
- `csv`
|
41
|
+
- `file` - load from a single file or directory (with a glob pattern)
|
42
|
+
|
43
|
+
### Transformers
|
44
|
+
- `null` - (default) Don't do anything
|
45
|
+
|
46
|
+
### Loaders
|
47
|
+
- `stdout` - (default) output transformed records to stdount
|
48
|
+
- `csv` - Load records to a csv file
|
49
|
+
- `table` - Output an ascii table of records. Useful for debugging.
|
50
|
+
|
51
|
+
### Provider-specific importers
|
52
|
+
|
53
|
+
In addition to the built-in importers, importers for third-party platforms are available. They are packaged as individual Ruby gems.
|
54
|
+
|
55
|
+
- [email](https://github.com/chronicle-app/chronicle-email). Extractors for `mbox` files. Transformers for chronicle schema
|
56
|
+
- [bash](https://github.com/chronicle-app/chronicle-bash). Extract bash history from `~/.bash_history`. Transform it for chronicle schema
|
57
|
+
|
58
|
+
To install any of these, run `gem install chronicle-PROVIDER`.
|
59
|
+
|
60
|
+
If you don't want to use the available rubygem importers, `chronicle-etl` can use `stdin` as an Extractor source (newline separated records). You can also use `stdout` as a loader — transformed records will be outputted separated by newlines.
|
61
|
+
|
62
|
+
I'll be open-sourcing more importers. Please [contact me](mailto:andrew@hyfen.net) to chat about what will be available!
|
63
|
+
|
64
|
+
### Full commands
|
21
65
|
|
22
66
|
```
|
23
|
-
|
24
|
-
|
25
|
-
|
67
|
+
$ chronicle-etl help
|
68
|
+
|
69
|
+
ALL COMMANDS
|
70
|
+
help # This help menu
|
71
|
+
connectors help [COMMAND] # Describe subcommands or one specific subcommand
|
72
|
+
connectors:install NAME # Installs connector NAME
|
73
|
+
connectors:list # Lists available connectors
|
74
|
+
jobs help [COMMAND] # Describe subcommands or one specific subcommand
|
75
|
+
jobs:create # Create a job
|
76
|
+
jobs:list # List all available jobs
|
77
|
+
jobs:run # Start a job
|
78
|
+
jobs:show # Show a job
|
26
79
|
```
|
27
80
|
|
28
81
|
### Job options
|
29
82
|
|
30
83
|
```
|
31
84
|
Usage:
|
32
|
-
chronicle-etl
|
85
|
+
chronicle-etl jobs:run
|
33
86
|
|
34
87
|
Options:
|
35
88
|
-e, [--extractor=extractor-name] # Extractor class (available: stdin, csv, file)
|
@@ -62,4 +115,4 @@ The gem is available as open source under the terms of the [MIT License](https:/
|
|
62
115
|
|
63
116
|
## Code of Conduct
|
64
117
|
|
65
|
-
Everyone interacting in the Chronicle::
|
118
|
+
Everyone interacting in the Chronicle::ETL project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/chronicle-app/chronicle-etl/blob/master/CODE_OF_CONDUCT.md).
|
data/bin/console
CHANGED
@@ -7,8 +7,20 @@ require "chronicle/etl"
|
|
7
7
|
# with your gem easier. You can also use a different console, if you like.
|
8
8
|
|
9
9
|
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
-
|
11
|
-
|
10
|
+
require "pry"
|
11
|
+
Pry.start
|
12
|
+
|
13
|
+
def reload!(print = true)
|
14
|
+
puts 'Reloading ...' if print
|
15
|
+
# Main project directory.
|
16
|
+
root_dir = File.expand_path('..', __dir__)
|
17
|
+
# Directories within the project that should be reloaded.
|
18
|
+
reload_dirs = %w{lib}
|
19
|
+
# Loop through and reload every file in all relevant project directories.
|
20
|
+
reload_dirs.each do |dir|
|
21
|
+
Dir.glob("#{root_dir}/#{dir}/**/*.rb").each { |f| load(f) }
|
22
|
+
end
|
23
|
+
# Return true when complete.
|
24
|
+
true
|
25
|
+
end
|
12
26
|
|
13
|
-
require "irb"
|
14
|
-
IRB.start(__FILE__)
|
data/chronicle-etl.gemspec
CHANGED
@@ -5,7 +5,7 @@ require "chronicle/etl/version"
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |spec|
|
7
7
|
spec.name = "chronicle-etl"
|
8
|
-
spec.version = Chronicle::
|
8
|
+
spec.version = Chronicle::ETL::VERSION
|
9
9
|
spec.authors = ["Andrew Louis"]
|
10
10
|
spec.email = ["andrew@hyfen.net"]
|
11
11
|
|
@@ -21,7 +21,7 @@ Gem::Specification.new do |spec|
|
|
21
21
|
|
22
22
|
spec.metadata["homepage_uri"] = spec.homepage
|
23
23
|
spec.metadata["source_code_uri"] = "https://github.com/chronicle-app/chronicle-etl"
|
24
|
-
spec.metadata["changelog_uri"] = "https://github.com/chronicle-app/chronicle-etl"
|
24
|
+
spec.metadata["changelog_uri"] = "https://github.com/chronicle-app/chronicle-etl/blob/master/CHANGELOG.md"
|
25
25
|
else
|
26
26
|
raise "RubyGems 2.0 or newer is required to protect against " \
|
27
27
|
"public gem pushes."
|
@@ -38,11 +38,13 @@ Gem::Specification.new do |spec|
|
|
38
38
|
|
39
39
|
spec.add_dependency "thor", "~> 0.20"
|
40
40
|
spec.add_dependency "colorize", "~> 0.8.1"
|
41
|
-
spec.add_dependency "
|
42
|
-
spec.add_dependency "
|
41
|
+
spec.add_dependency "tty-table", "~> 0.11"
|
42
|
+
spec.add_dependency "tty-progressbar", "~> 0.17"
|
43
43
|
|
44
|
-
spec.add_development_dependency "bundler", "~> 1
|
45
|
-
spec.add_development_dependency "rake", "~>
|
46
|
-
spec.add_development_dependency "rspec", "~> 3.
|
44
|
+
spec.add_development_dependency "bundler", "~> 2.1"
|
45
|
+
spec.add_development_dependency "rake", "~> 13.0"
|
46
|
+
spec.add_development_dependency "rspec", "~> 3.9"
|
47
47
|
spec.add_development_dependency "pry-byebug", "~> 3.9"
|
48
|
+
spec.add_development_dependency 'runcom', '~> 6.2'
|
49
|
+
spec.add_development_dependency 'redcarpet', '~> 3.5'
|
48
50
|
end
|
data/exe/chronicle-etl
CHANGED
data/lib/chronicle/etl.rb
CHANGED
@@ -1,5 +1,8 @@
|
|
1
|
+
require_relative 'etl/catalog'
|
2
|
+
require_relative 'etl/config'
|
1
3
|
require_relative 'etl/extractors/extractor'
|
2
|
-
require_relative 'etl/transformers/transformer'
|
3
4
|
require_relative 'etl/loaders/loader'
|
4
|
-
require_relative 'etl/utils/progress_bar_wrapper'
|
5
5
|
require_relative 'etl/runner'
|
6
|
+
require_relative 'etl/transformers/transformer'
|
7
|
+
require_relative 'etl/utils/progress_bar'
|
8
|
+
require_relative 'etl/version'
|
@@ -0,0 +1,62 @@
|
|
1
|
+
module Chronicle
|
2
|
+
module ETL
|
3
|
+
# Utility methods to catalogue which Extractor, Transformer, and
|
4
|
+
# Loader connector classes are available to chronicle-etl
|
5
|
+
module Catalog
|
6
|
+
PLUGINS = ['email', 'bash']
|
7
|
+
|
8
|
+
# Return which ETL connectors are available, both built in and externally-defined
|
9
|
+
def self.available_classes
|
10
|
+
# TODO: have a registry of plugins
|
11
|
+
|
12
|
+
# Attempt to load each chronicle plugin that we might know about so
|
13
|
+
# that we can later search for subclasses to build our list of
|
14
|
+
# available classes
|
15
|
+
PLUGINS.each do |plugin|
|
16
|
+
require "chronicle/#{plugin}"
|
17
|
+
rescue LoadError
|
18
|
+
# this will happen if the gem isn't available globally
|
19
|
+
end
|
20
|
+
|
21
|
+
parent_klasses = [
|
22
|
+
::Chronicle::ETL::Extractor,
|
23
|
+
::Chronicle::ETL::Transformer,
|
24
|
+
::Chronicle::ETL::Loader
|
25
|
+
]
|
26
|
+
klasses = []
|
27
|
+
parent_klasses.map do |parent|
|
28
|
+
klasses += ::ObjectSpace.each_object(::Class).select { |klass| klass < parent }
|
29
|
+
end
|
30
|
+
|
31
|
+
klasses.map do |klass|
|
32
|
+
{
|
33
|
+
name: klass.name,
|
34
|
+
built_in: klass.built_in?,
|
35
|
+
provider: klass.provider,
|
36
|
+
phase: klass.phase
|
37
|
+
}
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# Returns whether a class is an Extractor, Transformer, or Loader
|
42
|
+
def phase
|
43
|
+
ancestors = self.ancestors
|
44
|
+
return :extractor if ancestors.include? Chronicle::ETL::Extractor
|
45
|
+
return :transformer if ancestors.include? Chronicle::ETL::Transformer
|
46
|
+
return :loader if ancestors.include? Chronicle::ETL::Loader
|
47
|
+
end
|
48
|
+
|
49
|
+
# Returns which third-party provider this connector is associated wtih
|
50
|
+
def provider
|
51
|
+
# TODO: needs better convention for a gem reporting its provider name
|
52
|
+
provider = to_s.split('::')[1].downcase
|
53
|
+
provider == 'etl' ? 'chronicle' : provider
|
54
|
+
end
|
55
|
+
|
56
|
+
# Returns whether this connector is a built-in one
|
57
|
+
def built_in?
|
58
|
+
to_s.include? 'Chronicle::ETL'
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module Chronicle
|
2
|
+
module ETL
|
3
|
+
module CLI
|
4
|
+
# CLI commands for working with ETL connectors
|
5
|
+
class Connectors < SubcommandBase
|
6
|
+
default_task 'list'
|
7
|
+
namespace :connectors
|
8
|
+
|
9
|
+
desc "install NAME", "Installs connector NAME"
|
10
|
+
def install
|
11
|
+
puts "Installing"
|
12
|
+
end
|
13
|
+
|
14
|
+
desc "list", "Lists available connectors"
|
15
|
+
# Display all available connectors that chronicle-etl has access to
|
16
|
+
def list
|
17
|
+
klasses = Chronicle::ETL::Catalog.available_classes
|
18
|
+
klasses = klasses.sort_by do |a|
|
19
|
+
[a[:built_in].to_s, a[:provider], a[:phase]]
|
20
|
+
end
|
21
|
+
|
22
|
+
headers = klasses.first.keys.map do |key|
|
23
|
+
key.to_s.upcase.bold
|
24
|
+
end
|
25
|
+
|
26
|
+
table = TTY::Table.new(headers, klasses.map(&:values))
|
27
|
+
puts table.render(indent: 0, padding: [0, 2])
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,111 @@
|
|
1
|
+
require 'pp'
|
2
|
+
require 'pry'
|
3
|
+
|
4
|
+
module Chronicle
|
5
|
+
module ETL
|
6
|
+
module CLI
|
7
|
+
# CLI commands for working with ETL jobs
|
8
|
+
class Jobs < SubcommandBase
|
9
|
+
default_task "start"
|
10
|
+
namespace :jobs
|
11
|
+
|
12
|
+
class_option :extractor, aliases: '-e', desc: 'Extractor class (available: stdin, csv, file)', default: 'stdin', banner: 'extractor-name'
|
13
|
+
class_option :'extractor-opts', desc: 'Extractor options', type: :hash, default: {}
|
14
|
+
class_option :transformer, aliases: '-t', desc: 'Transformer class (available: null)', default: 'null', banner: 'transformer-name'
|
15
|
+
class_option :'transformer-opts', desc: 'Transformer options', type: :hash, default: {}
|
16
|
+
class_option :loader, aliases: '-l', desc: 'Loader class (available: stdout, csv, table)', default: 'stdout', banner: 'loader-name'
|
17
|
+
class_option :'loader-opts', desc: 'Loader options', type: :hash, default: {}
|
18
|
+
class_option :job, aliases: '-j', desc: 'Job configuration name (or filename)'
|
19
|
+
|
20
|
+
map run: :start # Thor doesn't like `run` as a command name
|
21
|
+
desc "run", "Start a job"
|
22
|
+
long_desc <<-LONG_DESC
|
23
|
+
This will run an ETL job. Each job needs three parts:
|
24
|
+
|
25
|
+
1. #{'Extractor'.underline}: pulls data from an external source. By default, this is stdout. Other common options including pulling data from an API or reading JSON from a file.
|
26
|
+
|
27
|
+
2. #{'Transformer'.underline}: transforms data into a new format. If none is specified, we use the `null` transformer which does nothing to the data.
|
28
|
+
|
29
|
+
3. #{'Loader'.underline}: takes that transformed data and loads it externally. This can be an API, flat files, (or by default), stdout.
|
30
|
+
|
31
|
+
If you do not want to use the command line flags, you can also configure a job with a .yml config file. You can either specify the path to this file or use the filename and place the file in ~/.config/chronicle/etl/jobs/NAME.yml and call it with `--job NAME`
|
32
|
+
LONG_DESC
|
33
|
+
# Run an ETL job
|
34
|
+
def start
|
35
|
+
runner_options = build_runner_options(options)
|
36
|
+
runner = Chronicle::ETL::Runner.new(runner_options)
|
37
|
+
runner.run!
|
38
|
+
end
|
39
|
+
|
40
|
+
desc "create", "Create a job"
|
41
|
+
# Create an ETL job
|
42
|
+
def create
|
43
|
+
runner_options = build_runner_options(options)
|
44
|
+
path = File.join('chronicle', 'etl', 'jobs', options[:job])
|
45
|
+
Chronicle::ETL::Config.write(path, runner_options)
|
46
|
+
end
|
47
|
+
|
48
|
+
desc "show", "Show details about a job"
|
49
|
+
# Show an ETL job
|
50
|
+
def show
|
51
|
+
runner_options = build_runner_options(options)
|
52
|
+
pp runner_options
|
53
|
+
end
|
54
|
+
|
55
|
+
desc "list", "List all available jobs"
|
56
|
+
# List available ETL jobs
|
57
|
+
def list
|
58
|
+
jobs = Chronicle::ETL::Config.jobs
|
59
|
+
|
60
|
+
job_details = jobs.map do |job|
|
61
|
+
r = Chronicle::ETL::Config.load("chronicle/etl/jobs/#{job}.yml")
|
62
|
+
|
63
|
+
extractor = r[:extractor][:name] if r[:extractor]
|
64
|
+
transformer = r[:transformer][:name] if r[:transformer]
|
65
|
+
loader = r[:loader][:name] if r[:loader]
|
66
|
+
|
67
|
+
[job, extractor, transformer, loader]
|
68
|
+
end
|
69
|
+
|
70
|
+
headers = ['name', 'extractor', 'transformer', 'loader'].map{|h| h.upcase.bold }
|
71
|
+
|
72
|
+
table = TTY::Table.new(headers, job_details)
|
73
|
+
puts table.render(indent: 0, padding: [0, 2])
|
74
|
+
end
|
75
|
+
|
76
|
+
private
|
77
|
+
|
78
|
+
# Create runner options by reading config file and then overwriting with flag options
|
79
|
+
def build_runner_options options
|
80
|
+
flag_options = process_flag_options(options)
|
81
|
+
job_options = load_job(options[:job])
|
82
|
+
flag_options.merge(job_options)
|
83
|
+
end
|
84
|
+
|
85
|
+
def load_job job
|
86
|
+
yml_config = Chronicle::ETL::Config.load("chronicle/etl/jobs/#{job}.yml")
|
87
|
+
# FIXME: use better trick to depely symbolize keys
|
88
|
+
JSON.parse(yml_config.to_json, symbolize_names: true)
|
89
|
+
end
|
90
|
+
|
91
|
+
# Takes flag options and turns them into a runner config
|
92
|
+
def process_flag_options options
|
93
|
+
{
|
94
|
+
extractor: {
|
95
|
+
name: options[:extractor],
|
96
|
+
options: options[:'extractor-opts']
|
97
|
+
},
|
98
|
+
transformer: {
|
99
|
+
name: options[:transformer],
|
100
|
+
options: options[:'transformer-opts']
|
101
|
+
},
|
102
|
+
loader: {
|
103
|
+
name: options[:loader],
|
104
|
+
options: options[:'loader-opts']
|
105
|
+
}
|
106
|
+
}
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|