chronicle-etl 0.1.4 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +8 -0
- data/.yardopts +1 -0
- data/Gemfile.lock +9 -1
- data/README.md +28 -10
- data/chronicle-etl.gemspec +3 -1
- data/exe/chronicle-etl +2 -2
- data/lib/chronicle/etl.rb +4 -2
- data/lib/chronicle/etl/catalog.rb +20 -16
- data/lib/chronicle/etl/cli/connectors.rb +32 -0
- data/lib/chronicle/etl/cli/jobs.rb +111 -0
- data/lib/chronicle/etl/cli/main.rb +83 -0
- data/lib/chronicle/etl/cli/subcommand_base.rb +37 -0
- data/lib/chronicle/etl/config.rb +32 -0
- data/lib/chronicle/etl/extractors/csv_extractor.rb +1 -1
- data/lib/chronicle/etl/extractors/extractor.rb +11 -5
- data/lib/chronicle/etl/extractors/file_extractor.rb +2 -2
- data/lib/chronicle/etl/extractors/stdin_extractor.rb +2 -2
- data/lib/chronicle/etl/loaders/csv_loader.rb +2 -2
- data/lib/chronicle/etl/loaders/loader.rb +10 -2
- data/lib/chronicle/etl/loaders/stdout_loader.rb +2 -2
- data/lib/chronicle/etl/loaders/table_loader.rb +2 -2
- data/lib/chronicle/etl/runner.rb +4 -4
- data/lib/chronicle/etl/transformers/json_transformer.rb +2 -2
- data/lib/chronicle/etl/transformers/null_transformer.rb +2 -2
- data/lib/chronicle/etl/transformers/transformer.rb +18 -2
- data/lib/chronicle/etl/utils/progress_bar.rb +1 -1
- data/lib/chronicle/etl/version.rb +2 -2
- metadata +37 -3
- data/lib/chronicle/etl/cli.rb +0 -56
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 77abdc6f98e01300e0cf0ff4c8737fdc3bfe395754e9ddba1b6d3de86f2d6be8
|
4
|
+
data.tar.gz: 4a76565cfe9448b8ee7a6aa253a98923d7beccf02cc1ea7c8bacf3e7f7ab88ab
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cc6aaafde633a7316e26a23984f6a6965977e7bba388d2bb09751d3ef8ba0ed6a1ff78e50816af3a9806d5b03720fc524a270e228be0cc2894298acbbc342155
|
7
|
+
data.tar.gz: 32dd999d57c307b9e57db3b877362b0e780f8a618d5c573a5295241ec47dee65472b1e98414e4cb0959d978f83cb7cfde1177c9fb1d11c54e5edf1ca5ba4419a
|
data/.rubocop.yml
ADDED
data/.yardopts
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--markup=markdown
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
chronicle-etl (0.
|
4
|
+
chronicle-etl (0.2.0)
|
5
5
|
colorize (~> 0.8.1)
|
6
6
|
thor (~> 0.20)
|
7
7
|
tty-progressbar (~> 0.17)
|
@@ -27,6 +27,8 @@ GEM
|
|
27
27
|
byebug (~> 11.0)
|
28
28
|
pry (~> 0.13.0)
|
29
29
|
rake (13.0.1)
|
30
|
+
redcarpet (3.5.0)
|
31
|
+
refinements (7.7.0)
|
30
32
|
rspec (3.9.0)
|
31
33
|
rspec-core (~> 3.9.0)
|
32
34
|
rspec-expectations (~> 3.9.0)
|
@@ -40,6 +42,9 @@ GEM
|
|
40
42
|
diff-lcs (>= 1.2.0, < 2.0)
|
41
43
|
rspec-support (~> 3.9.0)
|
42
44
|
rspec-support (3.9.3)
|
45
|
+
runcom (6.2.0)
|
46
|
+
refinements (~> 7.4)
|
47
|
+
xdg (~> 4.0)
|
43
48
|
strings (0.1.8)
|
44
49
|
strings-ansi (~> 0.1)
|
45
50
|
unicode-display_width (~> 1.5)
|
@@ -62,6 +67,7 @@ GEM
|
|
62
67
|
tty-screen (~> 0.7)
|
63
68
|
unicode-display_width (1.7.0)
|
64
69
|
unicode_utils (1.4.0)
|
70
|
+
xdg (4.2.0)
|
65
71
|
|
66
72
|
PLATFORMS
|
67
73
|
ruby
|
@@ -71,7 +77,9 @@ DEPENDENCIES
|
|
71
77
|
chronicle-etl!
|
72
78
|
pry-byebug (~> 3.9)
|
73
79
|
rake (~> 13.0)
|
80
|
+
redcarpet (~> 3.5)
|
74
81
|
rspec (~> 3.9)
|
82
|
+
runcom (~> 6.2)
|
75
83
|
|
76
84
|
BUNDLED WITH
|
77
85
|
2.1.4
|
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Chronicle::
|
1
|
+
# Chronicle::ETL
|
2
2
|
|
3
3
|
[](https://badge.fury.io/rb/chronicle-etl)
|
4
4
|
|
@@ -16,12 +16,24 @@ $ gem install chronicle-etl
|
|
16
16
|
|
17
17
|
After installing the gem, `chronicle-etl` is available to run in your shell.
|
18
18
|
|
19
|
+
```bash
|
20
|
+
# read test.csv and display it as a table
|
21
|
+
$ chronicle-etl jobs:run --extractor csv --extractor-opts filename:test.csv --loader table
|
22
|
+
|
23
|
+
# Display help for the jobs:run command
|
24
|
+
$ chronicle-etl jobs help run
|
19
25
|
```
|
20
|
-
|
21
|
-
|
26
|
+
|
27
|
+
## Connectors
|
28
|
+
|
29
|
+
Connectors are available to read, process, and load data from different formats or external services.
|
30
|
+
|
31
|
+
```bash
|
32
|
+
# List all available connectors
|
33
|
+
$ chronicle-etl connectors:list
|
22
34
|
```
|
23
35
|
|
24
|
-
|
36
|
+
Built in connectors:
|
25
37
|
|
26
38
|
### Extractors
|
27
39
|
- `stdin` - (default) Load records from line-separated stdin
|
@@ -54,17 +66,23 @@ I'll be open-sourcing more importers. Please [contact me](mailto:andrew@hyfen.ne
|
|
54
66
|
```
|
55
67
|
$ chronicle-etl help
|
56
68
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
69
|
+
ALL COMMANDS
|
70
|
+
help # This help menu
|
71
|
+
connectors help [COMMAND] # Describe subcommands or one specific subcommand
|
72
|
+
connectors:install NAME # Installs connector NAME
|
73
|
+
connectors:list # Lists available connectors
|
74
|
+
jobs help [COMMAND] # Describe subcommands or one specific subcommand
|
75
|
+
jobs:create # Create a job
|
76
|
+
jobs:list # List all available jobs
|
77
|
+
jobs:run # Start a job
|
78
|
+
jobs:show # Show a job
|
61
79
|
```
|
62
80
|
|
63
81
|
### Job options
|
64
82
|
|
65
83
|
```
|
66
84
|
Usage:
|
67
|
-
chronicle-etl
|
85
|
+
chronicle-etl jobs:run
|
68
86
|
|
69
87
|
Options:
|
70
88
|
-e, [--extractor=extractor-name] # Extractor class (available: stdin, csv, file)
|
@@ -97,4 +115,4 @@ The gem is available as open source under the terms of the [MIT License](https:/
|
|
97
115
|
|
98
116
|
## Code of Conduct
|
99
117
|
|
100
|
-
Everyone interacting in the Chronicle::
|
118
|
+
Everyone interacting in the Chronicle::ETL project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/chronicle-app/chronicle-etl/blob/master/CODE_OF_CONDUCT.md).
|
data/chronicle-etl.gemspec
CHANGED
@@ -5,7 +5,7 @@ require "chronicle/etl/version"
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |spec|
|
7
7
|
spec.name = "chronicle-etl"
|
8
|
-
spec.version = Chronicle::
|
8
|
+
spec.version = Chronicle::ETL::VERSION
|
9
9
|
spec.authors = ["Andrew Louis"]
|
10
10
|
spec.email = ["andrew@hyfen.net"]
|
11
11
|
|
@@ -45,4 +45,6 @@ Gem::Specification.new do |spec|
|
|
45
45
|
spec.add_development_dependency "rake", "~> 13.0"
|
46
46
|
spec.add_development_dependency "rspec", "~> 3.9"
|
47
47
|
spec.add_development_dependency "pry-byebug", "~> 3.9"
|
48
|
+
spec.add_development_dependency 'runcom', '~> 6.2'
|
49
|
+
spec.add_development_dependency 'redcarpet', '~> 3.5'
|
48
50
|
end
|
data/exe/chronicle-etl
CHANGED
data/lib/chronicle/etl.rb
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
require_relative 'etl/catalog'
|
2
|
+
require_relative 'etl/config'
|
2
3
|
require_relative 'etl/extractors/extractor'
|
3
|
-
require_relative 'etl/transformers/transformer'
|
4
4
|
require_relative 'etl/loaders/loader'
|
5
|
-
require_relative 'etl/utils/progress_bar'
|
6
5
|
require_relative 'etl/runner'
|
6
|
+
require_relative 'etl/transformers/transformer'
|
7
|
+
require_relative 'etl/utils/progress_bar'
|
8
|
+
require_relative 'etl/version'
|
@@ -1,30 +1,31 @@
|
|
1
1
|
module Chronicle
|
2
|
-
module
|
2
|
+
module ETL
|
3
3
|
# Utility methods to catalogue which Extractor, Transformer, and
|
4
|
-
# Loader classes are available to chronicle-etl
|
4
|
+
# Loader connector classes are available to chronicle-etl
|
5
5
|
module Catalog
|
6
|
-
|
7
|
-
parent_klasses = [
|
8
|
-
Chronicle::Etl::Extractor,
|
9
|
-
Chronicle::Etl::Transformer,
|
10
|
-
Chronicle::Etl::Loader
|
11
|
-
]
|
6
|
+
PLUGINS = ['email', 'bash']
|
12
7
|
|
8
|
+
# Return which ETL connectors are available, both built in and externally-defined
|
9
|
+
def self.available_classes
|
13
10
|
# TODO: have a registry of plugins
|
14
|
-
plugins = ['email', 'bash']
|
15
11
|
|
16
12
|
# Attempt to load each chronicle plugin that we might know about so
|
17
13
|
# that we can later search for subclasses to build our list of
|
18
14
|
# available classes
|
19
|
-
|
15
|
+
PLUGINS.each do |plugin|
|
20
16
|
require "chronicle/#{plugin}"
|
21
17
|
rescue LoadError
|
22
18
|
# this will happen if the gem isn't available globally
|
23
19
|
end
|
24
20
|
|
21
|
+
parent_klasses = [
|
22
|
+
::Chronicle::ETL::Extractor,
|
23
|
+
::Chronicle::ETL::Transformer,
|
24
|
+
::Chronicle::ETL::Loader
|
25
|
+
]
|
25
26
|
klasses = []
|
26
|
-
parent_klasses.
|
27
|
-
klasses += ObjectSpace.each_object(Class).select { |klass| klass < parent }
|
27
|
+
parent_klasses.map do |parent|
|
28
|
+
klasses += ::ObjectSpace.each_object(::Class).select { |klass| klass < parent }
|
28
29
|
end
|
29
30
|
|
30
31
|
klasses.map do |klass|
|
@@ -37,21 +38,24 @@ module Chronicle
|
|
37
38
|
end
|
38
39
|
end
|
39
40
|
|
41
|
+
# Returns whether a class is an Extractor, Transformer, or Loader
|
40
42
|
def phase
|
41
43
|
ancestors = self.ancestors
|
42
|
-
return :extractor if ancestors.include? Chronicle::
|
43
|
-
return :transformer if ancestors.include? Chronicle::
|
44
|
-
return :loader if ancestors.include? Chronicle::
|
44
|
+
return :extractor if ancestors.include? Chronicle::ETL::Extractor
|
45
|
+
return :transformer if ancestors.include? Chronicle::ETL::Transformer
|
46
|
+
return :loader if ancestors.include? Chronicle::ETL::Loader
|
45
47
|
end
|
46
48
|
|
49
|
+
# Returns which third-party provider this connector is associated wtih
|
47
50
|
def provider
|
48
51
|
# TODO: needs better convention for a gem reporting its provider name
|
49
52
|
provider = to_s.split('::')[1].downcase
|
50
53
|
provider == 'etl' ? 'chronicle' : provider
|
51
54
|
end
|
52
55
|
|
56
|
+
# Returns whether this connector is a built-in one
|
53
57
|
def built_in?
|
54
|
-
to_s.include? 'Chronicle::
|
58
|
+
to_s.include? 'Chronicle::ETL'
|
55
59
|
end
|
56
60
|
end
|
57
61
|
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module Chronicle
|
2
|
+
module ETL
|
3
|
+
module CLI
|
4
|
+
# CLI commands for working with ETL connectors
|
5
|
+
class Connectors < SubcommandBase
|
6
|
+
default_task 'list'
|
7
|
+
namespace :connectors
|
8
|
+
|
9
|
+
desc "install NAME", "Installs connector NAME"
|
10
|
+
def install
|
11
|
+
puts "Installing"
|
12
|
+
end
|
13
|
+
|
14
|
+
desc "list", "Lists available connectors"
|
15
|
+
# Display all available connectors that chronicle-etl has access to
|
16
|
+
def list
|
17
|
+
klasses = Chronicle::ETL::Catalog.available_classes
|
18
|
+
klasses = klasses.sort_by do |a|
|
19
|
+
[a[:built_in].to_s, a[:provider], a[:phase]]
|
20
|
+
end
|
21
|
+
|
22
|
+
headers = klasses.first.keys.map do |key|
|
23
|
+
key.to_s.upcase.bold
|
24
|
+
end
|
25
|
+
|
26
|
+
table = TTY::Table.new(headers, klasses.map(&:values))
|
27
|
+
puts table.render(indent: 0, padding: [0, 2])
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,111 @@
|
|
1
|
+
require 'pp'
|
2
|
+
require 'pry'
|
3
|
+
|
4
|
+
module Chronicle
|
5
|
+
module ETL
|
6
|
+
module CLI
|
7
|
+
# CLI commands for working with ETL jobs
|
8
|
+
class Jobs < SubcommandBase
|
9
|
+
default_task "start"
|
10
|
+
namespace :jobs
|
11
|
+
|
12
|
+
class_option :extractor, aliases: '-e', desc: 'Extractor class (available: stdin, csv, file)', default: 'stdin', banner: 'extractor-name'
|
13
|
+
class_option :'extractor-opts', desc: 'Extractor options', type: :hash, default: {}
|
14
|
+
class_option :transformer, aliases: '-t', desc: 'Transformer class (available: null)', default: 'null', banner: 'transformer-name'
|
15
|
+
class_option :'transformer-opts', desc: 'Transformer options', type: :hash, default: {}
|
16
|
+
class_option :loader, aliases: '-l', desc: 'Loader class (available: stdout, csv, table)', default: 'stdout', banner: 'loader-name'
|
17
|
+
class_option :'loader-opts', desc: 'Loader options', type: :hash, default: {}
|
18
|
+
class_option :job, aliases: '-j', desc: 'Job configuration name (or filename)'
|
19
|
+
|
20
|
+
map run: :start # Thor doesn't like `run` as a command name
|
21
|
+
desc "run", "Start a job"
|
22
|
+
long_desc <<-LONG_DESC
|
23
|
+
This will run an ETL job. Each job needs three parts:
|
24
|
+
|
25
|
+
1. #{'Extractor'.underline}: pulls data from an external source. By default, this is stdout. Other common options including pulling data from an API or reading JSON from a file.
|
26
|
+
|
27
|
+
2. #{'Transformer'.underline}: transforms data into a new format. If none is specified, we use the `null` transformer which does nothing to the data.
|
28
|
+
|
29
|
+
3. #{'Loader'.underline}: takes that transformed data and loads it externally. This can be an API, flat files, (or by default), stdout.
|
30
|
+
|
31
|
+
If you do not want to use the command line flags, you can also configure a job with a .yml config file. You can either specify the path to this file or use the filename and place the file in ~/.config/chronicle/etl/jobs/NAME.yml and call it with `--job NAME`
|
32
|
+
LONG_DESC
|
33
|
+
# Run an ETL job
|
34
|
+
def start
|
35
|
+
runner_options = build_runner_options(options)
|
36
|
+
runner = Chronicle::ETL::Runner.new(runner_options)
|
37
|
+
runner.run!
|
38
|
+
end
|
39
|
+
|
40
|
+
desc "create", "Create a job"
|
41
|
+
# Create an ETL job
|
42
|
+
def create
|
43
|
+
runner_options = build_runner_options(options)
|
44
|
+
path = File.join('chronicle', 'etl', 'jobs', options[:job])
|
45
|
+
Chronicle::ETL::Config.write(path, runner_options)
|
46
|
+
end
|
47
|
+
|
48
|
+
desc "show", "Show details about a job"
|
49
|
+
# Show an ETL job
|
50
|
+
def show
|
51
|
+
runner_options = build_runner_options(options)
|
52
|
+
pp runner_options
|
53
|
+
end
|
54
|
+
|
55
|
+
desc "list", "List all available jobs"
|
56
|
+
# List available ETL jobs
|
57
|
+
def list
|
58
|
+
jobs = Chronicle::ETL::Config.jobs
|
59
|
+
|
60
|
+
job_details = jobs.map do |job|
|
61
|
+
r = Chronicle::ETL::Config.load("chronicle/etl/jobs/#{job}.yml")
|
62
|
+
|
63
|
+
extractor = r[:extractor][:name] if r[:extractor]
|
64
|
+
transformer = r[:transformer][:name] if r[:transformer]
|
65
|
+
loader = r[:loader][:name] if r[:loader]
|
66
|
+
|
67
|
+
[job, extractor, transformer, loader]
|
68
|
+
end
|
69
|
+
|
70
|
+
headers = ['name', 'extractor', 'transformer', 'loader'].map{|h| h.upcase.bold }
|
71
|
+
|
72
|
+
table = TTY::Table.new(headers, job_details)
|
73
|
+
puts table.render(indent: 0, padding: [0, 2])
|
74
|
+
end
|
75
|
+
|
76
|
+
private
|
77
|
+
|
78
|
+
# Create runner options by reading config file and then overwriting with flag options
|
79
|
+
def build_runner_options options
|
80
|
+
flag_options = process_flag_options(options)
|
81
|
+
job_options = load_job(options[:job])
|
82
|
+
flag_options.merge(job_options)
|
83
|
+
end
|
84
|
+
|
85
|
+
def load_job job
|
86
|
+
yml_config = Chronicle::ETL::Config.load("chronicle/etl/jobs/#{job}.yml")
|
87
|
+
# FIXME: use better trick to depely symbolize keys
|
88
|
+
JSON.parse(yml_config.to_json, symbolize_names: true)
|
89
|
+
end
|
90
|
+
|
91
|
+
# Takes flag options and turns them into a runner config
|
92
|
+
def process_flag_options options
|
93
|
+
{
|
94
|
+
extractor: {
|
95
|
+
name: options[:extractor],
|
96
|
+
options: options[:'extractor-opts']
|
97
|
+
},
|
98
|
+
transformer: {
|
99
|
+
name: options[:transformer],
|
100
|
+
options: options[:'transformer-opts']
|
101
|
+
},
|
102
|
+
loader: {
|
103
|
+
name: options[:loader],
|
104
|
+
options: options[:'loader-opts']
|
105
|
+
}
|
106
|
+
}
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
require 'thor'
|
2
|
+
require 'chronicle/etl'
|
3
|
+
require 'colorize'
|
4
|
+
|
5
|
+
require 'chronicle/etl/cli/subcommand_base'
|
6
|
+
require 'chronicle/etl/cli/connectors'
|
7
|
+
require 'chronicle/etl/cli/jobs'
|
8
|
+
|
9
|
+
module Chronicle
|
10
|
+
module ETL
|
11
|
+
module CLI
|
12
|
+
# Main entrypoint for CLI app
|
13
|
+
class Main < Thor
|
14
|
+
class_option "verbose", type: :boolean, default: false
|
15
|
+
default_task "jobs"
|
16
|
+
|
17
|
+
desc 'connectors:COMMAND', 'Connectors available for ETL jobs', hide: true
|
18
|
+
subcommand 'connectors', Connectors
|
19
|
+
|
20
|
+
desc 'jobs:COMMAND', 'Configure and run jobs', hide: true
|
21
|
+
subcommand 'jobs', Jobs
|
22
|
+
|
23
|
+
# Entrypoint for the CLI
|
24
|
+
def self.start(given_args = ARGV, config = {})
|
25
|
+
if given_args.none?
|
26
|
+
abort "No command entered or job specified. To see commands, run `chronicle-etl help`".red
|
27
|
+
end
|
28
|
+
|
29
|
+
# take a subcommand:command and splits them so Thor knows how to hand off to the subcommand class
|
30
|
+
if given_args.any? && given_args[0].include?(':')
|
31
|
+
commands = given_args.shift.split(':')
|
32
|
+
given_args = given_args.unshift(commands).flatten
|
33
|
+
end
|
34
|
+
|
35
|
+
super(given_args, config)
|
36
|
+
end
|
37
|
+
|
38
|
+
# Displays help options for chronicle-etl
|
39
|
+
def help(meth = nil, subcommand = false)
|
40
|
+
if meth && !respond_to?(meth)
|
41
|
+
klass, task = Thor::Util.find_class_and_task_by_namespace("#{meth}:#{meth}")
|
42
|
+
klass.start(['-h', task].compact, shell: shell)
|
43
|
+
else
|
44
|
+
shell.say "ABOUT".bold
|
45
|
+
shell.say " #{'chronicle-etl'.italic} is a utility tool for #{'extracting'.underline}, #{'transforming'.underline}, and #{'loading'.underline} personal data."
|
46
|
+
shell.say
|
47
|
+
shell.say "USAGE".bold
|
48
|
+
shell.say " $ chronicle-etl COMMAND"
|
49
|
+
shell.say
|
50
|
+
shell.say "EXAMPLES".bold
|
51
|
+
shell.say " Show available connectors:".italic.light_black
|
52
|
+
shell.say " $ chronicle-etl connectors:list"
|
53
|
+
shell.say
|
54
|
+
shell.say " Run a simple job:".italic.light_black
|
55
|
+
shell.say " $ chronicle-etl jobs:start --extractor stdin --transformer null --loader stdout"
|
56
|
+
shell.say
|
57
|
+
shell.say " Show full job options:".italic.light_black
|
58
|
+
shell.say " $ chronicle-etl jobs help start"
|
59
|
+
|
60
|
+
list = []
|
61
|
+
|
62
|
+
Thor::Util.thor_classes_in(Chronicle::ETL::CLI).each do |thor_class|
|
63
|
+
list += thor_class.printable_tasks(false)
|
64
|
+
end
|
65
|
+
list.sort! { |a, b| a[0] <=> b[0] }
|
66
|
+
list.unshift ["help", "# This help menu"]
|
67
|
+
|
68
|
+
shell.say
|
69
|
+
shell.say 'ALL COMMANDS'.bold
|
70
|
+
shell.print_table(list, indent: 2, truncate: true)
|
71
|
+
shell.say
|
72
|
+
shell.say "VERSION".bold
|
73
|
+
shell.say " #{Chronicle::ETL::VERSION}"
|
74
|
+
shell.say
|
75
|
+
shell.say "FULL DOCUMENTATION".bold
|
76
|
+
shell.say " https://github.com/chronicle-app/chronicle-etl".blue
|
77
|
+
shell.say
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module Chronicle
|
2
|
+
module ETL
|
3
|
+
module CLI
|
4
|
+
# Base class for CLI subcommands. Overrides Thor methods so we can use command:subcommand syntax
|
5
|
+
class SubcommandBase < Thor
|
6
|
+
# Print usage instructions for a subcommand
|
7
|
+
def self.help(shell, subcommand = false)
|
8
|
+
list = printable_commands(true, subcommand)
|
9
|
+
Thor::Util.thor_classes_in(self).each do |klass|
|
10
|
+
list += klass.printable_commands(false)
|
11
|
+
end
|
12
|
+
list.sort! { |a, b| a[0] <=> b[0] }
|
13
|
+
|
14
|
+
shell.say "COMMANDS".bold
|
15
|
+
shell.print_table(list, indent: 2, truncate: true)
|
16
|
+
shell.say
|
17
|
+
class_options_help(shell)
|
18
|
+
end
|
19
|
+
|
20
|
+
# Show docs with command:subcommand pattern.
|
21
|
+
# For `help` command, don't use colon
|
22
|
+
def self.banner(command, namespace = nil, subcommand = false)
|
23
|
+
if command.name == 'help'
|
24
|
+
"#{subcommand_prefix} #{command.usage}"
|
25
|
+
else
|
26
|
+
"#{subcommand_prefix}:#{command.usage}"
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
# Use subcommand classname to derive display name for subcommand
|
31
|
+
def self.subcommand_prefix
|
32
|
+
self.name.gsub(%r{.*::}, '').gsub(%r{^[A-Z]}) { |match| match[0].downcase }.gsub(%r{[A-Z]}) { |match| "-#{match[0].downcase}" }
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'runcom'
|
2
|
+
|
3
|
+
module Chronicle
|
4
|
+
module ETL
|
5
|
+
# Utility methods to read, write, and access config files
|
6
|
+
module Config
|
7
|
+
# Loads a yml config file
|
8
|
+
def self.load(path)
|
9
|
+
config = Runcom::Config.new(path)
|
10
|
+
# FIXME: hack to deeply symbolize keys
|
11
|
+
JSON.parse(config.to_h.to_json, symbolize_names: true)
|
12
|
+
end
|
13
|
+
|
14
|
+
# Writes a hash as a yml config file
|
15
|
+
def self.write(path, data)
|
16
|
+
config = Runcom::Config.new(path)
|
17
|
+
filename = config.all[0].to_s + '.yml'
|
18
|
+
File.open(filename, 'w') do |f|
|
19
|
+
f << data.to_yaml
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# Returns all jobs available in ~/.config/chronicle/etl/jobs/*.yml
|
24
|
+
def self.jobs
|
25
|
+
job_directory = Runcom::Config.new('chronicle/etl/jobs').current
|
26
|
+
Dir.glob(File.join(job_directory, "*.yml")).map do |filename|
|
27
|
+
File.basename(filename, ".*")
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -1,20 +1,26 @@
|
|
1
1
|
require 'chronicle/etl'
|
2
2
|
|
3
3
|
module Chronicle
|
4
|
-
module
|
4
|
+
module ETL
|
5
|
+
# Abstract class representing an Extractor for an ETL job
|
5
6
|
class Extractor
|
6
|
-
extend Chronicle::
|
7
|
-
|
8
|
-
ETL_PHASE = :extractor
|
7
|
+
extend Chronicle::ETL::Catalog
|
9
8
|
|
9
|
+
# Construct a new instance of this extractor. Options are passed in from a Runner
|
10
|
+
# == Paramters:
|
11
|
+
# options::
|
12
|
+
# Options for configuring this Extractor
|
10
13
|
def initialize(options = {})
|
11
|
-
@options = options.transform_keys!(&:to_sym)
|
14
|
+
@options = options.transform_keys!(&:to_sym)
|
12
15
|
end
|
13
16
|
|
17
|
+
# Entrypoint for this Extractor. Called by a Runner. Expects a series of records to be yielded
|
14
18
|
def extract
|
15
19
|
raise NotImplementedError
|
16
20
|
end
|
17
21
|
|
22
|
+
# An optional method to calculate how many records there are to extract. Used primarily for
|
23
|
+
# building the progress bar
|
18
24
|
def results_count; end
|
19
25
|
end
|
20
26
|
end
|
@@ -1,18 +1,26 @@
|
|
1
1
|
module Chronicle
|
2
|
-
module
|
2
|
+
module ETL
|
3
|
+
# Abstract class representing a Loader for an ETL job
|
3
4
|
class Loader
|
4
|
-
extend Chronicle::
|
5
|
+
extend Chronicle::ETL::Catalog
|
5
6
|
|
7
|
+
# Construct a new instance of this loader. Options are passed in from a Runner
|
8
|
+
# == Paramters:
|
9
|
+
# options::
|
10
|
+
# Options for configuring this Loader
|
6
11
|
def initialize(options = {})
|
7
12
|
@options = options
|
8
13
|
end
|
9
14
|
|
15
|
+
# Called once before processing records
|
10
16
|
def start; end
|
11
17
|
|
18
|
+
# Load a single record
|
12
19
|
def load
|
13
20
|
raise NotImplementedError
|
14
21
|
end
|
15
22
|
|
23
|
+
# Called once there are no more records to process
|
16
24
|
def finish; end
|
17
25
|
end
|
18
26
|
end
|
data/lib/chronicle/etl/runner.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
class Chronicle::
|
1
|
+
class Chronicle::ETL::Runner
|
2
2
|
BUILTIN = {
|
3
3
|
extractor: ['stdin', 'json', 'csv', 'file'],
|
4
4
|
transformer: ['null'],
|
@@ -13,7 +13,7 @@ class Chronicle::Etl::Runner
|
|
13
13
|
|
14
14
|
def run!
|
15
15
|
total = @extractor.results_count
|
16
|
-
progress_bar = Chronicle::
|
16
|
+
progress_bar = Chronicle::ETL::Utils::ProgressBar.new(title: 'Running job', total: total)
|
17
17
|
count = 0
|
18
18
|
|
19
19
|
@loader.start
|
@@ -40,7 +40,7 @@ class Chronicle::Etl::Runner
|
|
40
40
|
|
41
41
|
def load_etl_class(phase, x)
|
42
42
|
if BUILTIN[phase].include? x
|
43
|
-
klass_name = "Chronicle::
|
43
|
+
klass_name = "Chronicle::ETL::#{x.capitalize}#{phase.to_s.capitalize}"
|
44
44
|
else
|
45
45
|
# TODO: come up with syntax for specifying a particular extractor in a provider library
|
46
46
|
provider, name = x.split(":")
|
@@ -48,7 +48,7 @@ class Chronicle::Etl::Runner
|
|
48
48
|
begin
|
49
49
|
require "chronicle/#{provider}"
|
50
50
|
rescue LoadError => e
|
51
|
-
warn("Error loading #{phase} '#{provider}'")
|
51
|
+
warn("Error loading #{phase} '#{provider}'".red)
|
52
52
|
warn(" Perhaps you haven't installed it yet: `$ gem install chronicle-#{provider}`")
|
53
53
|
exit(false)
|
54
54
|
end
|
@@ -1,15 +1,31 @@
|
|
1
1
|
module Chronicle
|
2
|
-
module
|
2
|
+
module ETL
|
3
|
+
# Abstract class representing an Transformer for an ETL job
|
3
4
|
class Transformer
|
4
|
-
extend Chronicle::
|
5
|
+
extend Chronicle::ETL::Catalog
|
5
6
|
|
7
|
+
# Construct a new instance of this transformer. Options are passed in from a Runner
|
8
|
+
# == Paramters:
|
9
|
+
# options::
|
10
|
+
# Options for configuring this Transformer
|
6
11
|
def initialize(options = {})
|
7
12
|
@options = options
|
8
13
|
end
|
9
14
|
|
15
|
+
# The main entrypoint for transforming a record. Called by a Runner on each extracted record
|
10
16
|
def transform data
|
11
17
|
raise NotImplementedError
|
12
18
|
end
|
19
|
+
|
20
|
+
# The domain or provider-specific id of the record this transformer is working on.
|
21
|
+
# Used for building a cursor so an extractor doesn't have to start from the beginning of a
|
22
|
+
# data source from the beginning.
|
23
|
+
def id; end
|
24
|
+
|
25
|
+
# The domain or provider-specific timestamp of the record this transformer is working on.
|
26
|
+
# Used for building a cursor so an extractor doesn't have to start from the beginning of a
|
27
|
+
# data source from the beginning.
|
28
|
+
def timestamp; end
|
13
29
|
end
|
14
30
|
end
|
15
31
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: chronicle-etl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Louis
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-08-
|
11
|
+
date: 2020-08-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: thor
|
@@ -122,6 +122,34 @@ dependencies:
|
|
122
122
|
- - "~>"
|
123
123
|
- !ruby/object:Gem::Version
|
124
124
|
version: '3.9'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: runcom
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - "~>"
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '6.2'
|
132
|
+
type: :development
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - "~>"
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '6.2'
|
139
|
+
- !ruby/object:Gem::Dependency
|
140
|
+
name: redcarpet
|
141
|
+
requirement: !ruby/object:Gem::Requirement
|
142
|
+
requirements:
|
143
|
+
- - "~>"
|
144
|
+
- !ruby/object:Gem::Version
|
145
|
+
version: '3.5'
|
146
|
+
type: :development
|
147
|
+
prerelease: false
|
148
|
+
version_requirements: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - "~>"
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: '3.5'
|
125
153
|
description: Chronicle-ETL allows you to extract personal data from a variety of services,
|
126
154
|
transformer it, and load it.
|
127
155
|
email:
|
@@ -133,8 +161,10 @@ extra_rdoc_files: []
|
|
133
161
|
files:
|
134
162
|
- ".gitignore"
|
135
163
|
- ".rspec"
|
164
|
+
- ".rubocop.yml"
|
136
165
|
- ".ruby-version"
|
137
166
|
- ".travis.yml"
|
167
|
+
- ".yardopts"
|
138
168
|
- CHANGELOG.md
|
139
169
|
- CODE_OF_CONDUCT.md
|
140
170
|
- Gemfile
|
@@ -148,7 +178,11 @@ files:
|
|
148
178
|
- exe/chronicle-etl
|
149
179
|
- lib/chronicle/etl.rb
|
150
180
|
- lib/chronicle/etl/catalog.rb
|
151
|
-
- lib/chronicle/etl/cli.rb
|
181
|
+
- lib/chronicle/etl/cli/connectors.rb
|
182
|
+
- lib/chronicle/etl/cli/jobs.rb
|
183
|
+
- lib/chronicle/etl/cli/main.rb
|
184
|
+
- lib/chronicle/etl/cli/subcommand_base.rb
|
185
|
+
- lib/chronicle/etl/config.rb
|
152
186
|
- lib/chronicle/etl/extractors/csv_extractor.rb
|
153
187
|
- lib/chronicle/etl/extractors/extractor.rb
|
154
188
|
- lib/chronicle/etl/extractors/file_extractor.rb
|
data/lib/chronicle/etl/cli.rb
DELETED
@@ -1,56 +0,0 @@
|
|
1
|
-
require 'thor'
|
2
|
-
require 'chronicle/etl'
|
3
|
-
require 'colorize'
|
4
|
-
|
5
|
-
module Chronicle
|
6
|
-
module Etl
|
7
|
-
class CLI < Thor
|
8
|
-
default_task :job
|
9
|
-
|
10
|
-
desc 'job', 'Runs an ETL job'
|
11
|
-
method_option :extractor, aliases: '-e', desc: 'Extractor class (available: stdin, csv, file)', default: 'stdin', banner: 'extractor-name'
|
12
|
-
method_option :'extractor-opts', desc: 'Extractor options', type: :hash, default: {}
|
13
|
-
method_option :transformer, aliases: '-t', desc: 'Transformer class (available: null)', default: 'null', banner: 'transformer-name'
|
14
|
-
method_option :'transformer-opts', desc: 'Transformer options', type: :hash, default: {}
|
15
|
-
method_option :loader, aliases: '-l', desc: 'Loader class (available: stdout, csv, table)', default: 'stdout', banner: 'loader-name'
|
16
|
-
method_option :'loader-opts', desc: 'Loader options', type: :hash, default: {}
|
17
|
-
method_option :job, aliases: '-j', desc: 'Job configuration file'
|
18
|
-
|
19
|
-
def job
|
20
|
-
runner_options = {
|
21
|
-
extractor: {
|
22
|
-
name: options[:extractor],
|
23
|
-
options: options[:'extractor-opts']
|
24
|
-
},
|
25
|
-
transformer: {
|
26
|
-
name: options[:transformer],
|
27
|
-
options: options[:'transformer-opts']
|
28
|
-
},
|
29
|
-
loader: {
|
30
|
-
name: options[:loader],
|
31
|
-
options: options[:'loader-opts']
|
32
|
-
}
|
33
|
-
}
|
34
|
-
|
35
|
-
runner = Runner.new(runner_options)
|
36
|
-
runner.run!
|
37
|
-
end
|
38
|
-
|
39
|
-
# FIXME: namespace this differently
|
40
|
-
desc 'list', 'List all ETL classes'
|
41
|
-
def list
|
42
|
-
klasses = Chronicle::Etl::Catalog.available_classes
|
43
|
-
klasses = klasses.sort_by do |a|
|
44
|
-
[a[:built_in].to_s, a[:provider], a[:phase]]
|
45
|
-
end
|
46
|
-
|
47
|
-
headers = klasses.first.keys.map do |key|
|
48
|
-
key.to_s.capitalize.light_white
|
49
|
-
end
|
50
|
-
|
51
|
-
table = TTY::Table.new(headers, klasses.map(&:values))
|
52
|
-
puts table.render(padding: [0, 2])
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|
56
|
-
end
|