chronicle-etl 0.1.1 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +8 -0
- data/.ruby-version +1 -0
- data/.yardopts +1 -0
- data/CHANGELOG.md +23 -0
- data/Gemfile.lock +42 -10
- data/README.md +64 -11
- data/bin/console +16 -4
- data/chronicle-etl.gemspec +9 -7
- data/exe/chronicle-etl +2 -2
- data/lib/chronicle/etl.rb +5 -2
- data/lib/chronicle/etl/catalog.rb +62 -0
- data/lib/chronicle/etl/cli/connectors.rb +32 -0
- data/lib/chronicle/etl/cli/jobs.rb +111 -0
- data/lib/chronicle/etl/cli/main.rb +83 -0
- data/lib/chronicle/etl/cli/subcommand_base.rb +37 -0
- data/lib/chronicle/etl/config.rb +32 -0
- data/lib/chronicle/etl/extractors/{csv.rb → csv_extractor.rb} +3 -3
- data/lib/chronicle/etl/extractors/extractor.rb +23 -12
- data/lib/chronicle/etl/extractors/file_extractor.rb +52 -0
- data/lib/chronicle/etl/extractors/stdin_extractor.rb +11 -0
- data/lib/chronicle/etl/loaders/csv_loader.rb +29 -0
- data/lib/chronicle/etl/loaders/loader.rb +23 -16
- data/lib/chronicle/etl/loaders/rest_loader.rb +30 -0
- data/lib/chronicle/etl/loaders/stdout_loader.rb +9 -0
- data/lib/chronicle/etl/loaders/table_loader.rb +21 -0
- data/lib/chronicle/etl/runner.rb +33 -11
- data/lib/chronicle/etl/transformers/json_transformer.rb +11 -0
- data/lib/chronicle/etl/transformers/null_transformer.rb +10 -0
- data/lib/chronicle/etl/transformers/transformer.rb +27 -11
- data/lib/chronicle/etl/utils/progress_bar.rb +76 -0
- data/lib/chronicle/etl/version.rb +2 -2
- metadata +69 -30
- data/lib/chronicle/etl/cli.rb +0 -38
- data/lib/chronicle/etl/extractors/stdin.rb +0 -13
- data/lib/chronicle/etl/loaders/csv.rb +0 -31
- data/lib/chronicle/etl/loaders/stdout.rb +0 -11
- data/lib/chronicle/etl/loaders/table.rb +0 -22
- data/lib/chronicle/etl/transformers/json.rb +0 -13
- data/lib/chronicle/etl/transformers/null.rb +0 -11
- data/lib/chronicle/etl/utils/progress_bar_wrapper.rb +0 -43
@@ -0,0 +1,83 @@
|
|
1
|
+
require 'thor'
|
2
|
+
require 'chronicle/etl'
|
3
|
+
require 'colorize'
|
4
|
+
|
5
|
+
require 'chronicle/etl/cli/subcommand_base'
|
6
|
+
require 'chronicle/etl/cli/connectors'
|
7
|
+
require 'chronicle/etl/cli/jobs'
|
8
|
+
|
9
|
+
module Chronicle
|
10
|
+
module ETL
|
11
|
+
module CLI
|
12
|
+
# Main entrypoint for CLI app
|
13
|
+
class Main < Thor
|
14
|
+
class_option "verbose", type: :boolean, default: false
|
15
|
+
default_task "jobs"
|
16
|
+
|
17
|
+
desc 'connectors:COMMAND', 'Connectors available for ETL jobs', hide: true
|
18
|
+
subcommand 'connectors', Connectors
|
19
|
+
|
20
|
+
desc 'jobs:COMMAND', 'Configure and run jobs', hide: true
|
21
|
+
subcommand 'jobs', Jobs
|
22
|
+
|
23
|
+
# Entrypoint for the CLI
|
24
|
+
def self.start(given_args = ARGV, config = {})
|
25
|
+
if given_args.none?
|
26
|
+
abort "No command entered or job specified. To see commands, run `chronicle-etl help`".red
|
27
|
+
end
|
28
|
+
|
29
|
+
# take a subcommand:command and splits them so Thor knows how to hand off to the subcommand class
|
30
|
+
if given_args.any? && given_args[0].include?(':')
|
31
|
+
commands = given_args.shift.split(':')
|
32
|
+
given_args = given_args.unshift(commands).flatten
|
33
|
+
end
|
34
|
+
|
35
|
+
super(given_args, config)
|
36
|
+
end
|
37
|
+
|
38
|
+
# Displays help options for chronicle-etl
|
39
|
+
def help(meth = nil, subcommand = false)
|
40
|
+
if meth && !respond_to?(meth)
|
41
|
+
klass, task = Thor::Util.find_class_and_task_by_namespace("#{meth}:#{meth}")
|
42
|
+
klass.start(['-h', task].compact, shell: shell)
|
43
|
+
else
|
44
|
+
shell.say "ABOUT".bold
|
45
|
+
shell.say " #{'chronicle-etl'.italic} is a utility tool for #{'extracting'.underline}, #{'transforming'.underline}, and #{'loading'.underline} personal data."
|
46
|
+
shell.say
|
47
|
+
shell.say "USAGE".bold
|
48
|
+
shell.say " $ chronicle-etl COMMAND"
|
49
|
+
shell.say
|
50
|
+
shell.say "EXAMPLES".bold
|
51
|
+
shell.say " Show available connectors:".italic.light_black
|
52
|
+
shell.say " $ chronicle-etl connectors:list"
|
53
|
+
shell.say
|
54
|
+
shell.say " Run a simple job:".italic.light_black
|
55
|
+
shell.say " $ chronicle-etl jobs:start --extractor stdin --transformer null --loader stdout"
|
56
|
+
shell.say
|
57
|
+
shell.say " Show full job options:".italic.light_black
|
58
|
+
shell.say " $ chronicle-etl jobs help start"
|
59
|
+
|
60
|
+
list = []
|
61
|
+
|
62
|
+
Thor::Util.thor_classes_in(Chronicle::ETL::CLI).each do |thor_class|
|
63
|
+
list += thor_class.printable_tasks(false)
|
64
|
+
end
|
65
|
+
list.sort! { |a, b| a[0] <=> b[0] }
|
66
|
+
list.unshift ["help", "# This help menu"]
|
67
|
+
|
68
|
+
shell.say
|
69
|
+
shell.say 'ALL COMMANDS'.bold
|
70
|
+
shell.print_table(list, indent: 2, truncate: true)
|
71
|
+
shell.say
|
72
|
+
shell.say "VERSION".bold
|
73
|
+
shell.say " #{Chronicle::ETL::VERSION}"
|
74
|
+
shell.say
|
75
|
+
shell.say "FULL DOCUMENTATION".bold
|
76
|
+
shell.say " https://github.com/chronicle-app/chronicle-etl".blue
|
77
|
+
shell.say
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module Chronicle
|
2
|
+
module ETL
|
3
|
+
module CLI
|
4
|
+
# Base class for CLI subcommands. Overrides Thor methods so we can use command:subcommand syntax
|
5
|
+
class SubcommandBase < Thor
|
6
|
+
# Print usage instructions for a subcommand
|
7
|
+
def self.help(shell, subcommand = false)
|
8
|
+
list = printable_commands(true, subcommand)
|
9
|
+
Thor::Util.thor_classes_in(self).each do |klass|
|
10
|
+
list += klass.printable_commands(false)
|
11
|
+
end
|
12
|
+
list.sort! { |a, b| a[0] <=> b[0] }
|
13
|
+
|
14
|
+
shell.say "COMMANDS".bold
|
15
|
+
shell.print_table(list, indent: 2, truncate: true)
|
16
|
+
shell.say
|
17
|
+
class_options_help(shell)
|
18
|
+
end
|
19
|
+
|
20
|
+
# Show docs with command:subcommand pattern.
|
21
|
+
# For `help` command, don't use colon
|
22
|
+
def self.banner(command, namespace = nil, subcommand = false)
|
23
|
+
if command.name == 'help'
|
24
|
+
"#{subcommand_prefix} #{command.usage}"
|
25
|
+
else
|
26
|
+
"#{subcommand_prefix}:#{command.usage}"
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
# Use subcommand classname to derive display name for subcommand
|
31
|
+
def self.subcommand_prefix
|
32
|
+
self.name.gsub(%r{.*::}, '').gsub(%r{^[A-Z]}) { |match| match[0].downcase }.gsub(%r{[A-Z]}) { |match| "-#{match[0].downcase}" }
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'runcom'
|
2
|
+
|
3
|
+
module Chronicle
|
4
|
+
module ETL
|
5
|
+
# Utility methods to read, write, and access config files
|
6
|
+
module Config
|
7
|
+
# Loads a yml config file
|
8
|
+
def self.load(path)
|
9
|
+
config = Runcom::Config.new(path)
|
10
|
+
# FIXME: hack to deeply symbolize keys
|
11
|
+
JSON.parse(config.to_h.to_json, symbolize_names: true)
|
12
|
+
end
|
13
|
+
|
14
|
+
# Writes a hash as a yml config file
|
15
|
+
def self.write(path, data)
|
16
|
+
config = Runcom::Config.new(path)
|
17
|
+
filename = config.all[0].to_s + '.yml'
|
18
|
+
File.open(filename, 'w') do |f|
|
19
|
+
f << data.to_yaml
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# Returns all jobs available in ~/.config/chronicle/etl/jobs/*.yml
|
24
|
+
def self.jobs
|
25
|
+
job_directory = Runcom::Config.new('chronicle/etl/jobs').current
|
26
|
+
Dir.glob(File.join(job_directory, "*.yml")).map do |filename|
|
27
|
+
File.basename(filename, ".*")
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
require 'csv'
|
2
|
-
class Chronicle::
|
2
|
+
class Chronicle::ETL::CsvExtractor < Chronicle::ETL::Extractor
|
3
3
|
DEFAULT_OPTIONS = {
|
4
4
|
headers: true,
|
5
5
|
filename: $stdin
|
@@ -18,7 +18,7 @@ class Chronicle::Etl::Extractors::Csv < Chronicle::Etl::Extractors::Extractor
|
|
18
18
|
end
|
19
19
|
|
20
20
|
def results_count
|
21
|
-
CSV.read(@options[:filename],
|
21
|
+
CSV.read(@options[:filename], headers: @options[:headers]).count if read_from_file?
|
22
22
|
end
|
23
23
|
|
24
24
|
private
|
@@ -33,7 +33,7 @@ class Chronicle::Etl::Extractors::Csv < Chronicle::Etl::Extractors::Extractor
|
|
33
33
|
}
|
34
34
|
|
35
35
|
stream = read_from_file? ? File.open(@options[:filename]) : @options[:filename]
|
36
|
-
CSV.new(stream, csv_options)
|
36
|
+
CSV.new(stream, **csv_options)
|
37
37
|
end
|
38
38
|
|
39
39
|
def read_from_file?
|
@@ -1,20 +1,31 @@
|
|
1
|
+
require 'chronicle/etl'
|
2
|
+
|
1
3
|
module Chronicle
|
2
|
-
module
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
@options = options.transform_keys!(&:to_sym)
|
7
|
-
end
|
4
|
+
module ETL
|
5
|
+
# Abstract class representing an Extractor for an ETL job
|
6
|
+
class Extractor
|
7
|
+
extend Chronicle::ETL::Catalog
|
8
8
|
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
# Construct a new instance of this extractor. Options are passed in from a Runner
|
10
|
+
# == Paramters:
|
11
|
+
# options::
|
12
|
+
# Options for configuring this Extractor
|
13
|
+
def initialize(options = {})
|
14
|
+
@options = options.transform_keys!(&:to_sym)
|
15
|
+
end
|
12
16
|
|
13
|
-
|
17
|
+
# Entrypoint for this Extractor. Called by a Runner. Expects a series of records to be yielded
|
18
|
+
def extract
|
19
|
+
raise NotImplementedError
|
14
20
|
end
|
21
|
+
|
22
|
+
# An optional method to calculate how many records there are to extract. Used primarily for
|
23
|
+
# building the progress bar
|
24
|
+
def results_count; end
|
15
25
|
end
|
16
26
|
end
|
17
27
|
end
|
18
28
|
|
19
|
-
require_relative '
|
20
|
-
require_relative '
|
29
|
+
require_relative 'csv_extractor'
|
30
|
+
require_relative 'file_extractor'
|
31
|
+
require_relative 'stdin_extractor'
|
@@ -0,0 +1,52 @@
|
|
1
|
+
require 'pathname'
|
2
|
+
|
3
|
+
module Chronicle
|
4
|
+
module ETL
|
5
|
+
class FileExtractor < Chronicle::ETL::Extractor
|
6
|
+
def extract
|
7
|
+
if file?
|
8
|
+
extract_file do |data, metadata|
|
9
|
+
yield(data, metadata)
|
10
|
+
end
|
11
|
+
elsif directory?
|
12
|
+
extract_from_directory do |data, metadata|
|
13
|
+
yield(data, metadata)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def results_count
|
19
|
+
if file?
|
20
|
+
return 1
|
21
|
+
else
|
22
|
+
search_pattern = File.join(@options[:filename], '**/*.eml')
|
23
|
+
Dir.glob(search_pattern).count
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
|
29
|
+
def extract_from_directory
|
30
|
+
search_pattern = File.join(@options[:filename], '**/*.eml')
|
31
|
+
filenames = Dir.glob(search_pattern)
|
32
|
+
filenames.each do |filename|
|
33
|
+
file = File.open(filename)
|
34
|
+
yield(file.read, {filename: file})
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def extract_file
|
39
|
+
file = File.open(@options[:filename])
|
40
|
+
yield(file.read, {filename: @options[:filename]})
|
41
|
+
end
|
42
|
+
|
43
|
+
def directory?
|
44
|
+
Pathname.new(@options[:filename]).directory?
|
45
|
+
end
|
46
|
+
|
47
|
+
def file?
|
48
|
+
Pathname.new(@options[:filename]).file?
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'csv'
|
2
|
+
|
3
|
+
module Chronicle
|
4
|
+
module ETL
|
5
|
+
class CsvLoader < Chronicle::ETL::Loader
|
6
|
+
def initialize(options={})
|
7
|
+
super(options)
|
8
|
+
@rows = []
|
9
|
+
end
|
10
|
+
|
11
|
+
def load(result)
|
12
|
+
if (result.is_a? Hash)
|
13
|
+
@rows << result.values
|
14
|
+
else
|
15
|
+
@rows << result
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def finish
|
20
|
+
z = $stdout
|
21
|
+
CSV(z) do |csv|
|
22
|
+
@rows.each do |row|
|
23
|
+
csv << row
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -1,25 +1,32 @@
|
|
1
1
|
module Chronicle
|
2
|
-
module
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
@options = options
|
7
|
-
end
|
2
|
+
module ETL
|
3
|
+
# Abstract class representing a Loader for an ETL job
|
4
|
+
class Loader
|
5
|
+
extend Chronicle::ETL::Catalog
|
8
6
|
|
9
|
-
|
10
|
-
|
11
|
-
|
7
|
+
# Construct a new instance of this loader. Options are passed in from a Runner
|
8
|
+
# == Paramters:
|
9
|
+
# options::
|
10
|
+
# Options for configuring this Loader
|
11
|
+
def initialize(options = {})
|
12
|
+
@options = options
|
13
|
+
end
|
12
14
|
|
13
|
-
|
14
|
-
|
15
|
-
end
|
15
|
+
# Called once before processing records
|
16
|
+
def start; end
|
16
17
|
|
17
|
-
|
18
|
+
# Load a single record
|
19
|
+
def load
|
20
|
+
raise NotImplementedError
|
18
21
|
end
|
22
|
+
|
23
|
+
# Called once there are no more records to process
|
24
|
+
def finish; end
|
19
25
|
end
|
20
26
|
end
|
21
27
|
end
|
22
28
|
|
23
|
-
require_relative '
|
24
|
-
require_relative '
|
25
|
-
require_relative '
|
29
|
+
require_relative 'csv_loader'
|
30
|
+
require_relative 'rest_loader'
|
31
|
+
require_relative 'stdout_loader'
|
32
|
+
require_relative 'table_loader'
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'net/http'
|
2
|
+
require 'uri'
|
3
|
+
require 'json'
|
4
|
+
|
5
|
+
module Chronicle
|
6
|
+
module ETL
|
7
|
+
class RestLoader < Chronicle::ETL::Loader
|
8
|
+
def initialize(options={})
|
9
|
+
super(options)
|
10
|
+
end
|
11
|
+
|
12
|
+
def load(result)
|
13
|
+
uri = URI.parse("#{@options[:hostname]}#{@options[:endpoint]}")
|
14
|
+
|
15
|
+
header = {
|
16
|
+
"Authorization" => "Bearer #{@options[:access_token]}",
|
17
|
+
"Content-Type": 'application/json'
|
18
|
+
}
|
19
|
+
|
20
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
21
|
+
request = Net::HTTP::Post.new(uri.request_uri, header)
|
22
|
+
|
23
|
+
obj = {data: result} unless result[:data]
|
24
|
+
request.body = obj.to_json
|
25
|
+
|
26
|
+
response = http.request(request)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'tty/table'
|
2
|
+
|
3
|
+
module Chronicle
|
4
|
+
module ETL
|
5
|
+
class TableLoader < Chronicle::ETL::Loader
|
6
|
+
def initialize(options)
|
7
|
+
super(options)
|
8
|
+
end
|
9
|
+
|
10
|
+
def load(result)
|
11
|
+
@table ||= TTY::Table.new(header: result.keys)
|
12
|
+
values = result.values.map{|x| x.to_s[0..30]}
|
13
|
+
@table << values
|
14
|
+
end
|
15
|
+
|
16
|
+
def finish
|
17
|
+
puts @table.render(:ascii, padding: [0, 1])
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
data/lib/chronicle/etl/runner.rb
CHANGED
@@ -1,4 +1,10 @@
|
|
1
|
-
class Chronicle::
|
1
|
+
class Chronicle::ETL::Runner
|
2
|
+
BUILTIN = {
|
3
|
+
extractor: ['stdin', 'json', 'csv', 'file'],
|
4
|
+
transformer: ['null'],
|
5
|
+
loader: ['stdout', 'csv', 'table']
|
6
|
+
}.freeze
|
7
|
+
|
2
8
|
def initialize(options)
|
3
9
|
@options = options
|
4
10
|
|
@@ -6,16 +12,18 @@ class Chronicle::Etl::Runner
|
|
6
12
|
end
|
7
13
|
|
8
14
|
def run!
|
9
|
-
|
10
|
-
|
15
|
+
total = @extractor.results_count
|
16
|
+
progress_bar = Chronicle::ETL::Utils::ProgressBar.new(title: 'Running job', total: total)
|
17
|
+
count = 0
|
11
18
|
|
12
|
-
@
|
13
|
-
@loader.first_load(result) if i == 0
|
19
|
+
@loader.start
|
14
20
|
|
15
|
-
|
21
|
+
@extractor.extract do |data, metadata|
|
22
|
+
transformed_data = @transformer.transform(data)
|
16
23
|
@loader.load(transformed_data)
|
17
24
|
|
18
25
|
progress_bar.increment
|
26
|
+
count += 1
|
19
27
|
end
|
20
28
|
|
21
29
|
progress_bar.finish
|
@@ -25,13 +33,27 @@ class Chronicle::Etl::Runner
|
|
25
33
|
private
|
26
34
|
|
27
35
|
def instantiate_etl_classes
|
28
|
-
@extractor =
|
29
|
-
@transformer =
|
30
|
-
@loader =
|
36
|
+
@extractor = load_etl_class(:extractor, @options[:extractor][:name]).new(@options[:extractor][:options])
|
37
|
+
@transformer = load_etl_class(:transformer, @options[:transformer][:name]).new(@options[:transformer][:options])
|
38
|
+
@loader = load_etl_class(:loader, @options[:loader][:name]).new(@options[:loader][:options])
|
31
39
|
end
|
32
40
|
|
33
|
-
def
|
34
|
-
|
41
|
+
def load_etl_class(phase, x)
|
42
|
+
if BUILTIN[phase].include? x
|
43
|
+
klass_name = "Chronicle::ETL::#{x.capitalize}#{phase.to_s.capitalize}"
|
44
|
+
else
|
45
|
+
# TODO: come up with syntax for specifying a particular extractor in a provider library
|
46
|
+
provider, name = x.split(":")
|
47
|
+
provider = x unless provider
|
48
|
+
begin
|
49
|
+
require "chronicle/#{provider}"
|
50
|
+
rescue LoadError => e
|
51
|
+
warn("Error loading #{phase} '#{provider}'".red)
|
52
|
+
warn(" Perhaps you haven't installed it yet: `$ gem install chronicle-#{provider}`")
|
53
|
+
exit(false)
|
54
|
+
end
|
55
|
+
klass_name = "Chronicle::#{provider.capitalize}::#{name&.capitalize}#{phase.capitalize}"
|
56
|
+
end
|
35
57
|
Object.const_get(klass_name)
|
36
58
|
end
|
37
59
|
end
|