chronicle-etl 0.1.4 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +8 -0
- data/.yardopts +1 -0
- data/Gemfile.lock +9 -1
- data/README.md +28 -10
- data/chronicle-etl.gemspec +3 -1
- data/exe/chronicle-etl +2 -2
- data/lib/chronicle/etl.rb +4 -2
- data/lib/chronicle/etl/catalog.rb +20 -16
- data/lib/chronicle/etl/cli/connectors.rb +32 -0
- data/lib/chronicle/etl/cli/jobs.rb +111 -0
- data/lib/chronicle/etl/cli/main.rb +83 -0
- data/lib/chronicle/etl/cli/subcommand_base.rb +37 -0
- data/lib/chronicle/etl/config.rb +32 -0
- data/lib/chronicle/etl/extractors/csv_extractor.rb +1 -1
- data/lib/chronicle/etl/extractors/extractor.rb +11 -5
- data/lib/chronicle/etl/extractors/file_extractor.rb +2 -2
- data/lib/chronicle/etl/extractors/stdin_extractor.rb +2 -2
- data/lib/chronicle/etl/loaders/csv_loader.rb +2 -2
- data/lib/chronicle/etl/loaders/loader.rb +10 -2
- data/lib/chronicle/etl/loaders/stdout_loader.rb +2 -2
- data/lib/chronicle/etl/loaders/table_loader.rb +2 -2
- data/lib/chronicle/etl/runner.rb +4 -4
- data/lib/chronicle/etl/transformers/json_transformer.rb +2 -2
- data/lib/chronicle/etl/transformers/null_transformer.rb +2 -2
- data/lib/chronicle/etl/transformers/transformer.rb +18 -2
- data/lib/chronicle/etl/utils/progress_bar.rb +1 -1
- data/lib/chronicle/etl/version.rb +2 -2
- metadata +37 -3
- data/lib/chronicle/etl/cli.rb +0 -56
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 77abdc6f98e01300e0cf0ff4c8737fdc3bfe395754e9ddba1b6d3de86f2d6be8
|
4
|
+
data.tar.gz: 4a76565cfe9448b8ee7a6aa253a98923d7beccf02cc1ea7c8bacf3e7f7ab88ab
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cc6aaafde633a7316e26a23984f6a6965977e7bba388d2bb09751d3ef8ba0ed6a1ff78e50816af3a9806d5b03720fc524a270e228be0cc2894298acbbc342155
|
7
|
+
data.tar.gz: 32dd999d57c307b9e57db3b877362b0e780f8a618d5c573a5295241ec47dee65472b1e98414e4cb0959d978f83cb7cfde1177c9fb1d11c54e5edf1ca5ba4419a
|
data/.rubocop.yml
ADDED
data/.yardopts
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--markup=markdown
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
chronicle-etl (0.
|
4
|
+
chronicle-etl (0.2.0)
|
5
5
|
colorize (~> 0.8.1)
|
6
6
|
thor (~> 0.20)
|
7
7
|
tty-progressbar (~> 0.17)
|
@@ -27,6 +27,8 @@ GEM
|
|
27
27
|
byebug (~> 11.0)
|
28
28
|
pry (~> 0.13.0)
|
29
29
|
rake (13.0.1)
|
30
|
+
redcarpet (3.5.0)
|
31
|
+
refinements (7.7.0)
|
30
32
|
rspec (3.9.0)
|
31
33
|
rspec-core (~> 3.9.0)
|
32
34
|
rspec-expectations (~> 3.9.0)
|
@@ -40,6 +42,9 @@ GEM
|
|
40
42
|
diff-lcs (>= 1.2.0, < 2.0)
|
41
43
|
rspec-support (~> 3.9.0)
|
42
44
|
rspec-support (3.9.3)
|
45
|
+
runcom (6.2.0)
|
46
|
+
refinements (~> 7.4)
|
47
|
+
xdg (~> 4.0)
|
43
48
|
strings (0.1.8)
|
44
49
|
strings-ansi (~> 0.1)
|
45
50
|
unicode-display_width (~> 1.5)
|
@@ -62,6 +67,7 @@ GEM
|
|
62
67
|
tty-screen (~> 0.7)
|
63
68
|
unicode-display_width (1.7.0)
|
64
69
|
unicode_utils (1.4.0)
|
70
|
+
xdg (4.2.0)
|
65
71
|
|
66
72
|
PLATFORMS
|
67
73
|
ruby
|
@@ -71,7 +77,9 @@ DEPENDENCIES
|
|
71
77
|
chronicle-etl!
|
72
78
|
pry-byebug (~> 3.9)
|
73
79
|
rake (~> 13.0)
|
80
|
+
redcarpet (~> 3.5)
|
74
81
|
rspec (~> 3.9)
|
82
|
+
runcom (~> 6.2)
|
75
83
|
|
76
84
|
BUNDLED WITH
|
77
85
|
2.1.4
|
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Chronicle::
|
1
|
+
# Chronicle::ETL
|
2
2
|
|
3
3
|
[![Gem Version](https://badge.fury.io/rb/chronicle-etl.svg)](https://badge.fury.io/rb/chronicle-etl)
|
4
4
|
|
@@ -16,12 +16,24 @@ $ gem install chronicle-etl
|
|
16
16
|
|
17
17
|
After installing the gem, `chronicle-etl` is available to run in your shell.
|
18
18
|
|
19
|
+
```bash
|
20
|
+
# read test.csv and display it as a table
|
21
|
+
$ chronicle-etl jobs:run --extractor csv --extractor-opts filename:test.csv --loader table
|
22
|
+
|
23
|
+
# Display help for the jobs:run command
|
24
|
+
$ chronicle-etl jobs help run
|
19
25
|
```
|
20
|
-
|
21
|
-
|
26
|
+
|
27
|
+
## Connectors
|
28
|
+
|
29
|
+
Connectors are available to read, process, and load data from different formats or external services.
|
30
|
+
|
31
|
+
```bash
|
32
|
+
# List all available connectors
|
33
|
+
$ chronicle-etl connectors:list
|
22
34
|
```
|
23
35
|
|
24
|
-
|
36
|
+
Built in connectors:
|
25
37
|
|
26
38
|
### Extractors
|
27
39
|
- `stdin` - (default) Load records from line-separated stdin
|
@@ -54,17 +66,23 @@ I'll be open-sourcing more importers. Please [contact me](mailto:andrew@hyfen.ne
|
|
54
66
|
```
|
55
67
|
$ chronicle-etl help
|
56
68
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
69
|
+
ALL COMMANDS
|
70
|
+
help # This help menu
|
71
|
+
connectors help [COMMAND] # Describe subcommands or one specific subcommand
|
72
|
+
connectors:install NAME # Installs connector NAME
|
73
|
+
connectors:list # Lists available connectors
|
74
|
+
jobs help [COMMAND] # Describe subcommands or one specific subcommand
|
75
|
+
jobs:create # Create a job
|
76
|
+
jobs:list # List all available jobs
|
77
|
+
jobs:run # Start a job
|
78
|
+
jobs:show # Show a job
|
61
79
|
```
|
62
80
|
|
63
81
|
### Job options
|
64
82
|
|
65
83
|
```
|
66
84
|
Usage:
|
67
|
-
chronicle-etl
|
85
|
+
chronicle-etl jobs:run
|
68
86
|
|
69
87
|
Options:
|
70
88
|
-e, [--extractor=extractor-name] # Extractor class (available: stdin, csv, file)
|
@@ -97,4 +115,4 @@ The gem is available as open source under the terms of the [MIT License](https:/
|
|
97
115
|
|
98
116
|
## Code of Conduct
|
99
117
|
|
100
|
-
Everyone interacting in the Chronicle::
|
118
|
+
Everyone interacting in the Chronicle::ETL project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/chronicle-app/chronicle-etl/blob/master/CODE_OF_CONDUCT.md).
|
data/chronicle-etl.gemspec
CHANGED
@@ -5,7 +5,7 @@ require "chronicle/etl/version"
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |spec|
|
7
7
|
spec.name = "chronicle-etl"
|
8
|
-
spec.version = Chronicle::
|
8
|
+
spec.version = Chronicle::ETL::VERSION
|
9
9
|
spec.authors = ["Andrew Louis"]
|
10
10
|
spec.email = ["andrew@hyfen.net"]
|
11
11
|
|
@@ -45,4 +45,6 @@ Gem::Specification.new do |spec|
|
|
45
45
|
spec.add_development_dependency "rake", "~> 13.0"
|
46
46
|
spec.add_development_dependency "rspec", "~> 3.9"
|
47
47
|
spec.add_development_dependency "pry-byebug", "~> 3.9"
|
48
|
+
spec.add_development_dependency 'runcom', '~> 6.2'
|
49
|
+
spec.add_development_dependency 'redcarpet', '~> 3.5'
|
48
50
|
end
|
data/exe/chronicle-etl
CHANGED
data/lib/chronicle/etl.rb
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
require_relative 'etl/catalog'
|
2
|
+
require_relative 'etl/config'
|
2
3
|
require_relative 'etl/extractors/extractor'
|
3
|
-
require_relative 'etl/transformers/transformer'
|
4
4
|
require_relative 'etl/loaders/loader'
|
5
|
-
require_relative 'etl/utils/progress_bar'
|
6
5
|
require_relative 'etl/runner'
|
6
|
+
require_relative 'etl/transformers/transformer'
|
7
|
+
require_relative 'etl/utils/progress_bar'
|
8
|
+
require_relative 'etl/version'
|
@@ -1,30 +1,31 @@
|
|
1
1
|
module Chronicle
|
2
|
-
module
|
2
|
+
module ETL
|
3
3
|
# Utility methods to catalogue which Extractor, Transformer, and
|
4
|
-
# Loader classes are available to chronicle-etl
|
4
|
+
# Loader connector classes are available to chronicle-etl
|
5
5
|
module Catalog
|
6
|
-
|
7
|
-
parent_klasses = [
|
8
|
-
Chronicle::Etl::Extractor,
|
9
|
-
Chronicle::Etl::Transformer,
|
10
|
-
Chronicle::Etl::Loader
|
11
|
-
]
|
6
|
+
PLUGINS = ['email', 'bash']
|
12
7
|
|
8
|
+
# Return which ETL connectors are available, both built in and externally-defined
|
9
|
+
def self.available_classes
|
13
10
|
# TODO: have a registry of plugins
|
14
|
-
plugins = ['email', 'bash']
|
15
11
|
|
16
12
|
# Attempt to load each chronicle plugin that we might know about so
|
17
13
|
# that we can later search for subclasses to build our list of
|
18
14
|
# available classes
|
19
|
-
|
15
|
+
PLUGINS.each do |plugin|
|
20
16
|
require "chronicle/#{plugin}"
|
21
17
|
rescue LoadError
|
22
18
|
# this will happen if the gem isn't available globally
|
23
19
|
end
|
24
20
|
|
21
|
+
parent_klasses = [
|
22
|
+
::Chronicle::ETL::Extractor,
|
23
|
+
::Chronicle::ETL::Transformer,
|
24
|
+
::Chronicle::ETL::Loader
|
25
|
+
]
|
25
26
|
klasses = []
|
26
|
-
parent_klasses.
|
27
|
-
klasses += ObjectSpace.each_object(Class).select { |klass| klass < parent }
|
27
|
+
parent_klasses.map do |parent|
|
28
|
+
klasses += ::ObjectSpace.each_object(::Class).select { |klass| klass < parent }
|
28
29
|
end
|
29
30
|
|
30
31
|
klasses.map do |klass|
|
@@ -37,21 +38,24 @@ module Chronicle
|
|
37
38
|
end
|
38
39
|
end
|
39
40
|
|
41
|
+
# Returns whether a class is an Extractor, Transformer, or Loader
|
40
42
|
def phase
|
41
43
|
ancestors = self.ancestors
|
42
|
-
return :extractor if ancestors.include? Chronicle::
|
43
|
-
return :transformer if ancestors.include? Chronicle::
|
44
|
-
return :loader if ancestors.include? Chronicle::
|
44
|
+
return :extractor if ancestors.include? Chronicle::ETL::Extractor
|
45
|
+
return :transformer if ancestors.include? Chronicle::ETL::Transformer
|
46
|
+
return :loader if ancestors.include? Chronicle::ETL::Loader
|
45
47
|
end
|
46
48
|
|
49
|
+
# Returns which third-party provider this connector is associated wtih
|
47
50
|
def provider
|
48
51
|
# TODO: needs better convention for a gem reporting its provider name
|
49
52
|
provider = to_s.split('::')[1].downcase
|
50
53
|
provider == 'etl' ? 'chronicle' : provider
|
51
54
|
end
|
52
55
|
|
56
|
+
# Returns whether this connector is a built-in one
|
53
57
|
def built_in?
|
54
|
-
to_s.include? 'Chronicle::
|
58
|
+
to_s.include? 'Chronicle::ETL'
|
55
59
|
end
|
56
60
|
end
|
57
61
|
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module Chronicle
|
2
|
+
module ETL
|
3
|
+
module CLI
|
4
|
+
# CLI commands for working with ETL connectors
|
5
|
+
class Connectors < SubcommandBase
|
6
|
+
default_task 'list'
|
7
|
+
namespace :connectors
|
8
|
+
|
9
|
+
desc "install NAME", "Installs connector NAME"
|
10
|
+
def install
|
11
|
+
puts "Installing"
|
12
|
+
end
|
13
|
+
|
14
|
+
desc "list", "Lists available connectors"
|
15
|
+
# Display all available connectors that chronicle-etl has access to
|
16
|
+
def list
|
17
|
+
klasses = Chronicle::ETL::Catalog.available_classes
|
18
|
+
klasses = klasses.sort_by do |a|
|
19
|
+
[a[:built_in].to_s, a[:provider], a[:phase]]
|
20
|
+
end
|
21
|
+
|
22
|
+
headers = klasses.first.keys.map do |key|
|
23
|
+
key.to_s.upcase.bold
|
24
|
+
end
|
25
|
+
|
26
|
+
table = TTY::Table.new(headers, klasses.map(&:values))
|
27
|
+
puts table.render(indent: 0, padding: [0, 2])
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,111 @@
|
|
1
|
+
require 'pp'
|
2
|
+
require 'pry'
|
3
|
+
|
4
|
+
module Chronicle
|
5
|
+
module ETL
|
6
|
+
module CLI
|
7
|
+
# CLI commands for working with ETL jobs
|
8
|
+
class Jobs < SubcommandBase
|
9
|
+
default_task "start"
|
10
|
+
namespace :jobs
|
11
|
+
|
12
|
+
class_option :extractor, aliases: '-e', desc: 'Extractor class (available: stdin, csv, file)', default: 'stdin', banner: 'extractor-name'
|
13
|
+
class_option :'extractor-opts', desc: 'Extractor options', type: :hash, default: {}
|
14
|
+
class_option :transformer, aliases: '-t', desc: 'Transformer class (available: null)', default: 'null', banner: 'transformer-name'
|
15
|
+
class_option :'transformer-opts', desc: 'Transformer options', type: :hash, default: {}
|
16
|
+
class_option :loader, aliases: '-l', desc: 'Loader class (available: stdout, csv, table)', default: 'stdout', banner: 'loader-name'
|
17
|
+
class_option :'loader-opts', desc: 'Loader options', type: :hash, default: {}
|
18
|
+
class_option :job, aliases: '-j', desc: 'Job configuration name (or filename)'
|
19
|
+
|
20
|
+
map run: :start # Thor doesn't like `run` as a command name
|
21
|
+
desc "run", "Start a job"
|
22
|
+
long_desc <<-LONG_DESC
|
23
|
+
This will run an ETL job. Each job needs three parts:
|
24
|
+
|
25
|
+
1. #{'Extractor'.underline}: pulls data from an external source. By default, this is stdout. Other common options including pulling data from an API or reading JSON from a file.
|
26
|
+
|
27
|
+
2. #{'Transformer'.underline}: transforms data into a new format. If none is specified, we use the `null` transformer which does nothing to the data.
|
28
|
+
|
29
|
+
3. #{'Loader'.underline}: takes that transformed data and loads it externally. This can be an API, flat files, (or by default), stdout.
|
30
|
+
|
31
|
+
If you do not want to use the command line flags, you can also configure a job with a .yml config file. You can either specify the path to this file or use the filename and place the file in ~/.config/chronicle/etl/jobs/NAME.yml and call it with `--job NAME`
|
32
|
+
LONG_DESC
|
33
|
+
# Run an ETL job
|
34
|
+
def start
|
35
|
+
runner_options = build_runner_options(options)
|
36
|
+
runner = Chronicle::ETL::Runner.new(runner_options)
|
37
|
+
runner.run!
|
38
|
+
end
|
39
|
+
|
40
|
+
desc "create", "Create a job"
|
41
|
+
# Create an ETL job
|
42
|
+
def create
|
43
|
+
runner_options = build_runner_options(options)
|
44
|
+
path = File.join('chronicle', 'etl', 'jobs', options[:job])
|
45
|
+
Chronicle::ETL::Config.write(path, runner_options)
|
46
|
+
end
|
47
|
+
|
48
|
+
desc "show", "Show details about a job"
|
49
|
+
# Show an ETL job
|
50
|
+
def show
|
51
|
+
runner_options = build_runner_options(options)
|
52
|
+
pp runner_options
|
53
|
+
end
|
54
|
+
|
55
|
+
desc "list", "List all available jobs"
|
56
|
+
# List available ETL jobs
|
57
|
+
def list
|
58
|
+
jobs = Chronicle::ETL::Config.jobs
|
59
|
+
|
60
|
+
job_details = jobs.map do |job|
|
61
|
+
r = Chronicle::ETL::Config.load("chronicle/etl/jobs/#{job}.yml")
|
62
|
+
|
63
|
+
extractor = r[:extractor][:name] if r[:extractor]
|
64
|
+
transformer = r[:transformer][:name] if r[:transformer]
|
65
|
+
loader = r[:loader][:name] if r[:loader]
|
66
|
+
|
67
|
+
[job, extractor, transformer, loader]
|
68
|
+
end
|
69
|
+
|
70
|
+
headers = ['name', 'extractor', 'transformer', 'loader'].map{|h| h.upcase.bold }
|
71
|
+
|
72
|
+
table = TTY::Table.new(headers, job_details)
|
73
|
+
puts table.render(indent: 0, padding: [0, 2])
|
74
|
+
end
|
75
|
+
|
76
|
+
private
|
77
|
+
|
78
|
+
# Create runner options by reading config file and then overwriting with flag options
|
79
|
+
def build_runner_options options
|
80
|
+
flag_options = process_flag_options(options)
|
81
|
+
job_options = load_job(options[:job])
|
82
|
+
flag_options.merge(job_options)
|
83
|
+
end
|
84
|
+
|
85
|
+
def load_job job
|
86
|
+
yml_config = Chronicle::ETL::Config.load("chronicle/etl/jobs/#{job}.yml")
|
87
|
+
# FIXME: use better trick to depely symbolize keys
|
88
|
+
JSON.parse(yml_config.to_json, symbolize_names: true)
|
89
|
+
end
|
90
|
+
|
91
|
+
# Takes flag options and turns them into a runner config
|
92
|
+
def process_flag_options options
|
93
|
+
{
|
94
|
+
extractor: {
|
95
|
+
name: options[:extractor],
|
96
|
+
options: options[:'extractor-opts']
|
97
|
+
},
|
98
|
+
transformer: {
|
99
|
+
name: options[:transformer],
|
100
|
+
options: options[:'transformer-opts']
|
101
|
+
},
|
102
|
+
loader: {
|
103
|
+
name: options[:loader],
|
104
|
+
options: options[:'loader-opts']
|
105
|
+
}
|
106
|
+
}
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
require 'thor'
|
2
|
+
require 'chronicle/etl'
|
3
|
+
require 'colorize'
|
4
|
+
|
5
|
+
require 'chronicle/etl/cli/subcommand_base'
|
6
|
+
require 'chronicle/etl/cli/connectors'
|
7
|
+
require 'chronicle/etl/cli/jobs'
|
8
|
+
|
9
|
+
module Chronicle
|
10
|
+
module ETL
|
11
|
+
module CLI
|
12
|
+
# Main entrypoint for CLI app
|
13
|
+
class Main < Thor
|
14
|
+
class_option "verbose", type: :boolean, default: false
|
15
|
+
default_task "jobs"
|
16
|
+
|
17
|
+
desc 'connectors:COMMAND', 'Connectors available for ETL jobs', hide: true
|
18
|
+
subcommand 'connectors', Connectors
|
19
|
+
|
20
|
+
desc 'jobs:COMMAND', 'Configure and run jobs', hide: true
|
21
|
+
subcommand 'jobs', Jobs
|
22
|
+
|
23
|
+
# Entrypoint for the CLI
|
24
|
+
def self.start(given_args = ARGV, config = {})
|
25
|
+
if given_args.none?
|
26
|
+
abort "No command entered or job specified. To see commands, run `chronicle-etl help`".red
|
27
|
+
end
|
28
|
+
|
29
|
+
# take a subcommand:command and splits them so Thor knows how to hand off to the subcommand class
|
30
|
+
if given_args.any? && given_args[0].include?(':')
|
31
|
+
commands = given_args.shift.split(':')
|
32
|
+
given_args = given_args.unshift(commands).flatten
|
33
|
+
end
|
34
|
+
|
35
|
+
super(given_args, config)
|
36
|
+
end
|
37
|
+
|
38
|
+
# Displays help options for chronicle-etl
|
39
|
+
def help(meth = nil, subcommand = false)
|
40
|
+
if meth && !respond_to?(meth)
|
41
|
+
klass, task = Thor::Util.find_class_and_task_by_namespace("#{meth}:#{meth}")
|
42
|
+
klass.start(['-h', task].compact, shell: shell)
|
43
|
+
else
|
44
|
+
shell.say "ABOUT".bold
|
45
|
+
shell.say " #{'chronicle-etl'.italic} is a utility tool for #{'extracting'.underline}, #{'transforming'.underline}, and #{'loading'.underline} personal data."
|
46
|
+
shell.say
|
47
|
+
shell.say "USAGE".bold
|
48
|
+
shell.say " $ chronicle-etl COMMAND"
|
49
|
+
shell.say
|
50
|
+
shell.say "EXAMPLES".bold
|
51
|
+
shell.say " Show available connectors:".italic.light_black
|
52
|
+
shell.say " $ chronicle-etl connectors:list"
|
53
|
+
shell.say
|
54
|
+
shell.say " Run a simple job:".italic.light_black
|
55
|
+
shell.say " $ chronicle-etl jobs:start --extractor stdin --transformer null --loader stdout"
|
56
|
+
shell.say
|
57
|
+
shell.say " Show full job options:".italic.light_black
|
58
|
+
shell.say " $ chronicle-etl jobs help start"
|
59
|
+
|
60
|
+
list = []
|
61
|
+
|
62
|
+
Thor::Util.thor_classes_in(Chronicle::ETL::CLI).each do |thor_class|
|
63
|
+
list += thor_class.printable_tasks(false)
|
64
|
+
end
|
65
|
+
list.sort! { |a, b| a[0] <=> b[0] }
|
66
|
+
list.unshift ["help", "# This help menu"]
|
67
|
+
|
68
|
+
shell.say
|
69
|
+
shell.say 'ALL COMMANDS'.bold
|
70
|
+
shell.print_table(list, indent: 2, truncate: true)
|
71
|
+
shell.say
|
72
|
+
shell.say "VERSION".bold
|
73
|
+
shell.say " #{Chronicle::ETL::VERSION}"
|
74
|
+
shell.say
|
75
|
+
shell.say "FULL DOCUMENTATION".bold
|
76
|
+
shell.say " https://github.com/chronicle-app/chronicle-etl".blue
|
77
|
+
shell.say
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module Chronicle
|
2
|
+
module ETL
|
3
|
+
module CLI
|
4
|
+
# Base class for CLI subcommands. Overrides Thor methods so we can use command:subcommand syntax
|
5
|
+
class SubcommandBase < Thor
|
6
|
+
# Print usage instructions for a subcommand
|
7
|
+
def self.help(shell, subcommand = false)
|
8
|
+
list = printable_commands(true, subcommand)
|
9
|
+
Thor::Util.thor_classes_in(self).each do |klass|
|
10
|
+
list += klass.printable_commands(false)
|
11
|
+
end
|
12
|
+
list.sort! { |a, b| a[0] <=> b[0] }
|
13
|
+
|
14
|
+
shell.say "COMMANDS".bold
|
15
|
+
shell.print_table(list, indent: 2, truncate: true)
|
16
|
+
shell.say
|
17
|
+
class_options_help(shell)
|
18
|
+
end
|
19
|
+
|
20
|
+
# Show docs with command:subcommand pattern.
|
21
|
+
# For `help` command, don't use colon
|
22
|
+
def self.banner(command, namespace = nil, subcommand = false)
|
23
|
+
if command.name == 'help'
|
24
|
+
"#{subcommand_prefix} #{command.usage}"
|
25
|
+
else
|
26
|
+
"#{subcommand_prefix}:#{command.usage}"
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
# Use subcommand classname to derive display name for subcommand
|
31
|
+
def self.subcommand_prefix
|
32
|
+
self.name.gsub(%r{.*::}, '').gsub(%r{^[A-Z]}) { |match| match[0].downcase }.gsub(%r{[A-Z]}) { |match| "-#{match[0].downcase}" }
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'runcom'
|
2
|
+
|
3
|
+
module Chronicle
|
4
|
+
module ETL
|
5
|
+
# Utility methods to read, write, and access config files
|
6
|
+
module Config
|
7
|
+
# Loads a yml config file
|
8
|
+
def self.load(path)
|
9
|
+
config = Runcom::Config.new(path)
|
10
|
+
# FIXME: hack to deeply symbolize keys
|
11
|
+
JSON.parse(config.to_h.to_json, symbolize_names: true)
|
12
|
+
end
|
13
|
+
|
14
|
+
# Writes a hash as a yml config file
|
15
|
+
def self.write(path, data)
|
16
|
+
config = Runcom::Config.new(path)
|
17
|
+
filename = config.all[0].to_s + '.yml'
|
18
|
+
File.open(filename, 'w') do |f|
|
19
|
+
f << data.to_yaml
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# Returns all jobs available in ~/.config/chronicle/etl/jobs/*.yml
|
24
|
+
def self.jobs
|
25
|
+
job_directory = Runcom::Config.new('chronicle/etl/jobs').current
|
26
|
+
Dir.glob(File.join(job_directory, "*.yml")).map do |filename|
|
27
|
+
File.basename(filename, ".*")
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -1,20 +1,26 @@
|
|
1
1
|
require 'chronicle/etl'
|
2
2
|
|
3
3
|
module Chronicle
|
4
|
-
module
|
4
|
+
module ETL
|
5
|
+
# Abstract class representing an Extractor for an ETL job
|
5
6
|
class Extractor
|
6
|
-
extend Chronicle::
|
7
|
-
|
8
|
-
ETL_PHASE = :extractor
|
7
|
+
extend Chronicle::ETL::Catalog
|
9
8
|
|
9
|
+
# Construct a new instance of this extractor. Options are passed in from a Runner
|
10
|
+
# == Paramters:
|
11
|
+
# options::
|
12
|
+
# Options for configuring this Extractor
|
10
13
|
def initialize(options = {})
|
11
|
-
@options = options.transform_keys!(&:to_sym)
|
14
|
+
@options = options.transform_keys!(&:to_sym)
|
12
15
|
end
|
13
16
|
|
17
|
+
# Entrypoint for this Extractor. Called by a Runner. Expects a series of records to be yielded
|
14
18
|
def extract
|
15
19
|
raise NotImplementedError
|
16
20
|
end
|
17
21
|
|
22
|
+
# An optional method to calculate how many records there are to extract. Used primarily for
|
23
|
+
# building the progress bar
|
18
24
|
def results_count; end
|
19
25
|
end
|
20
26
|
end
|
@@ -1,18 +1,26 @@
|
|
1
1
|
module Chronicle
|
2
|
-
module
|
2
|
+
module ETL
|
3
|
+
# Abstract class representing a Loader for an ETL job
|
3
4
|
class Loader
|
4
|
-
extend Chronicle::
|
5
|
+
extend Chronicle::ETL::Catalog
|
5
6
|
|
7
|
+
# Construct a new instance of this loader. Options are passed in from a Runner
|
8
|
+
# == Paramters:
|
9
|
+
# options::
|
10
|
+
# Options for configuring this Loader
|
6
11
|
def initialize(options = {})
|
7
12
|
@options = options
|
8
13
|
end
|
9
14
|
|
15
|
+
# Called once before processing records
|
10
16
|
def start; end
|
11
17
|
|
18
|
+
# Load a single record
|
12
19
|
def load
|
13
20
|
raise NotImplementedError
|
14
21
|
end
|
15
22
|
|
23
|
+
# Called once there are no more records to process
|
16
24
|
def finish; end
|
17
25
|
end
|
18
26
|
end
|
data/lib/chronicle/etl/runner.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
class Chronicle::
|
1
|
+
class Chronicle::ETL::Runner
|
2
2
|
BUILTIN = {
|
3
3
|
extractor: ['stdin', 'json', 'csv', 'file'],
|
4
4
|
transformer: ['null'],
|
@@ -13,7 +13,7 @@ class Chronicle::Etl::Runner
|
|
13
13
|
|
14
14
|
def run!
|
15
15
|
total = @extractor.results_count
|
16
|
-
progress_bar = Chronicle::
|
16
|
+
progress_bar = Chronicle::ETL::Utils::ProgressBar.new(title: 'Running job', total: total)
|
17
17
|
count = 0
|
18
18
|
|
19
19
|
@loader.start
|
@@ -40,7 +40,7 @@ class Chronicle::Etl::Runner
|
|
40
40
|
|
41
41
|
def load_etl_class(phase, x)
|
42
42
|
if BUILTIN[phase].include? x
|
43
|
-
klass_name = "Chronicle::
|
43
|
+
klass_name = "Chronicle::ETL::#{x.capitalize}#{phase.to_s.capitalize}"
|
44
44
|
else
|
45
45
|
# TODO: come up with syntax for specifying a particular extractor in a provider library
|
46
46
|
provider, name = x.split(":")
|
@@ -48,7 +48,7 @@ class Chronicle::Etl::Runner
|
|
48
48
|
begin
|
49
49
|
require "chronicle/#{provider}"
|
50
50
|
rescue LoadError => e
|
51
|
-
warn("Error loading #{phase} '#{provider}'")
|
51
|
+
warn("Error loading #{phase} '#{provider}'".red)
|
52
52
|
warn(" Perhaps you haven't installed it yet: `$ gem install chronicle-#{provider}`")
|
53
53
|
exit(false)
|
54
54
|
end
|
@@ -1,15 +1,31 @@
|
|
1
1
|
module Chronicle
|
2
|
-
module
|
2
|
+
module ETL
|
3
|
+
# Abstract class representing an Transformer for an ETL job
|
3
4
|
class Transformer
|
4
|
-
extend Chronicle::
|
5
|
+
extend Chronicle::ETL::Catalog
|
5
6
|
|
7
|
+
# Construct a new instance of this transformer. Options are passed in from a Runner
|
8
|
+
# == Paramters:
|
9
|
+
# options::
|
10
|
+
# Options for configuring this Transformer
|
6
11
|
def initialize(options = {})
|
7
12
|
@options = options
|
8
13
|
end
|
9
14
|
|
15
|
+
# The main entrypoint for transforming a record. Called by a Runner on each extracted record
|
10
16
|
def transform data
|
11
17
|
raise NotImplementedError
|
12
18
|
end
|
19
|
+
|
20
|
+
# The domain or provider-specific id of the record this transformer is working on.
|
21
|
+
# Used for building a cursor so an extractor doesn't have to start from the beginning of a
|
22
|
+
# data source from the beginning.
|
23
|
+
def id; end
|
24
|
+
|
25
|
+
# The domain or provider-specific timestamp of the record this transformer is working on.
|
26
|
+
# Used for building a cursor so an extractor doesn't have to start from the beginning of a
|
27
|
+
# data source from the beginning.
|
28
|
+
def timestamp; end
|
13
29
|
end
|
14
30
|
end
|
15
31
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: chronicle-etl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Louis
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-08-
|
11
|
+
date: 2020-08-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: thor
|
@@ -122,6 +122,34 @@ dependencies:
|
|
122
122
|
- - "~>"
|
123
123
|
- !ruby/object:Gem::Version
|
124
124
|
version: '3.9'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: runcom
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - "~>"
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '6.2'
|
132
|
+
type: :development
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - "~>"
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '6.2'
|
139
|
+
- !ruby/object:Gem::Dependency
|
140
|
+
name: redcarpet
|
141
|
+
requirement: !ruby/object:Gem::Requirement
|
142
|
+
requirements:
|
143
|
+
- - "~>"
|
144
|
+
- !ruby/object:Gem::Version
|
145
|
+
version: '3.5'
|
146
|
+
type: :development
|
147
|
+
prerelease: false
|
148
|
+
version_requirements: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - "~>"
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: '3.5'
|
125
153
|
description: Chronicle-ETL allows you to extract personal data from a variety of services,
|
126
154
|
transformer it, and load it.
|
127
155
|
email:
|
@@ -133,8 +161,10 @@ extra_rdoc_files: []
|
|
133
161
|
files:
|
134
162
|
- ".gitignore"
|
135
163
|
- ".rspec"
|
164
|
+
- ".rubocop.yml"
|
136
165
|
- ".ruby-version"
|
137
166
|
- ".travis.yml"
|
167
|
+
- ".yardopts"
|
138
168
|
- CHANGELOG.md
|
139
169
|
- CODE_OF_CONDUCT.md
|
140
170
|
- Gemfile
|
@@ -148,7 +178,11 @@ files:
|
|
148
178
|
- exe/chronicle-etl
|
149
179
|
- lib/chronicle/etl.rb
|
150
180
|
- lib/chronicle/etl/catalog.rb
|
151
|
-
- lib/chronicle/etl/cli.rb
|
181
|
+
- lib/chronicle/etl/cli/connectors.rb
|
182
|
+
- lib/chronicle/etl/cli/jobs.rb
|
183
|
+
- lib/chronicle/etl/cli/main.rb
|
184
|
+
- lib/chronicle/etl/cli/subcommand_base.rb
|
185
|
+
- lib/chronicle/etl/config.rb
|
152
186
|
- lib/chronicle/etl/extractors/csv_extractor.rb
|
153
187
|
- lib/chronicle/etl/extractors/extractor.rb
|
154
188
|
- lib/chronicle/etl/extractors/file_extractor.rb
|
data/lib/chronicle/etl/cli.rb
DELETED
@@ -1,56 +0,0 @@
|
|
1
|
-
require 'thor'
|
2
|
-
require 'chronicle/etl'
|
3
|
-
require 'colorize'
|
4
|
-
|
5
|
-
module Chronicle
|
6
|
-
module Etl
|
7
|
-
class CLI < Thor
|
8
|
-
default_task :job
|
9
|
-
|
10
|
-
desc 'job', 'Runs an ETL job'
|
11
|
-
method_option :extractor, aliases: '-e', desc: 'Extractor class (available: stdin, csv, file)', default: 'stdin', banner: 'extractor-name'
|
12
|
-
method_option :'extractor-opts', desc: 'Extractor options', type: :hash, default: {}
|
13
|
-
method_option :transformer, aliases: '-t', desc: 'Transformer class (available: null)', default: 'null', banner: 'transformer-name'
|
14
|
-
method_option :'transformer-opts', desc: 'Transformer options', type: :hash, default: {}
|
15
|
-
method_option :loader, aliases: '-l', desc: 'Loader class (available: stdout, csv, table)', default: 'stdout', banner: 'loader-name'
|
16
|
-
method_option :'loader-opts', desc: 'Loader options', type: :hash, default: {}
|
17
|
-
method_option :job, aliases: '-j', desc: 'Job configuration file'
|
18
|
-
|
19
|
-
def job
|
20
|
-
runner_options = {
|
21
|
-
extractor: {
|
22
|
-
name: options[:extractor],
|
23
|
-
options: options[:'extractor-opts']
|
24
|
-
},
|
25
|
-
transformer: {
|
26
|
-
name: options[:transformer],
|
27
|
-
options: options[:'transformer-opts']
|
28
|
-
},
|
29
|
-
loader: {
|
30
|
-
name: options[:loader],
|
31
|
-
options: options[:'loader-opts']
|
32
|
-
}
|
33
|
-
}
|
34
|
-
|
35
|
-
runner = Runner.new(runner_options)
|
36
|
-
runner.run!
|
37
|
-
end
|
38
|
-
|
39
|
-
# FIXME: namespace this differently
|
40
|
-
desc 'list', 'List all ETL classes'
|
41
|
-
def list
|
42
|
-
klasses = Chronicle::Etl::Catalog.available_classes
|
43
|
-
klasses = klasses.sort_by do |a|
|
44
|
-
[a[:built_in].to_s, a[:provider], a[:phase]]
|
45
|
-
end
|
46
|
-
|
47
|
-
headers = klasses.first.keys.map do |key|
|
48
|
-
key.to_s.capitalize.light_white
|
49
|
-
end
|
50
|
-
|
51
|
-
table = TTY::Table.new(headers, klasses.map(&:values))
|
52
|
-
puts table.render(padding: [0, 2])
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|
56
|
-
end
|