chronicle-etl 0.2.2 → 0.2.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +7 -1
- data/chronicle-etl.gemspec +3 -0
- data/lib/chronicle/etl.rb +5 -1
- data/lib/chronicle/etl/catalog.rb +6 -0
- data/lib/chronicle/etl/cli/jobs.rb +25 -19
- data/lib/chronicle/etl/config.rb +24 -3
- data/lib/chronicle/etl/extractors/extractor.rb +7 -0
- data/lib/chronicle/etl/job.rb +62 -0
- data/lib/chronicle/etl/job_definition.rb +51 -0
- data/lib/chronicle/etl/job_log.rb +79 -0
- data/lib/chronicle/etl/job_logger.rb +76 -0
- data/lib/chronicle/etl/runner.rb +12 -26
- data/lib/chronicle/etl/version.rb +1 -1
- metadata +48 -3
- data/CHANGELOG.md +0 -23
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 12a38a335c05b1626b9f259318956480df3f96e447cc2b1a25b8a9c23d591e49
|
4
|
+
data.tar.gz: d8ed027154403e68e5684213b0d0f58218a23dc2f667a882dcd3b2e8ab0c69b7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 396863ed665137905cfa9fe51ee925776e0a0f616721658a889b9b587dda83b9cd1e0fa2a483b08fc65ec70797f07facec082c1c88403aa8d61e1ce4ae791779
|
7
|
+
data.tar.gz: 705d626f45c816494949d6bc5c4f83cc4a8cd1c527aef72911bb90000e2151f758889c6080e9ac489f235df4169fbc158a7713dbf76d5e7ba5fdaf2a6ad51567
|
data/Gemfile.lock
CHANGED
@@ -1,8 +1,10 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
chronicle-etl (0.2.
|
4
|
+
chronicle-etl (0.2.3)
|
5
5
|
colorize (~> 0.8.1)
|
6
|
+
deep_merge (~> 1.2)
|
7
|
+
sequel (~> 5.35)
|
6
8
|
thor (~> 0.20)
|
7
9
|
tty-progressbar (~> 0.17)
|
8
10
|
tty-table (~> 0.11)
|
@@ -13,6 +15,7 @@ GEM
|
|
13
15
|
byebug (11.1.3)
|
14
16
|
coderay (1.1.3)
|
15
17
|
colorize (0.8.1)
|
18
|
+
deep_merge (1.2.1)
|
16
19
|
diff-lcs (1.4.4)
|
17
20
|
equatable (0.6.1)
|
18
21
|
method_source (1.0.0)
|
@@ -45,6 +48,8 @@ GEM
|
|
45
48
|
runcom (6.2.0)
|
46
49
|
refinements (~> 7.4)
|
47
50
|
xdg (~> 4.0)
|
51
|
+
sequel (5.36.0)
|
52
|
+
sqlite3 (1.4.2)
|
48
53
|
strings (0.1.8)
|
49
54
|
strings-ansi (~> 0.1)
|
50
55
|
unicode-display_width (~> 1.5)
|
@@ -80,6 +85,7 @@ DEPENDENCIES
|
|
80
85
|
redcarpet (~> 3.5)
|
81
86
|
rspec (~> 3.9)
|
82
87
|
runcom (~> 6.2)
|
88
|
+
sqlite3 (~> 1.4)
|
83
89
|
|
84
90
|
BUNDLED WITH
|
85
91
|
2.1.4
|
data/chronicle-etl.gemspec
CHANGED
@@ -40,6 +40,8 @@ Gem::Specification.new do |spec|
|
|
40
40
|
spec.add_dependency "colorize", "~> 0.8.1"
|
41
41
|
spec.add_dependency "tty-table", "~> 0.11"
|
42
42
|
spec.add_dependency "tty-progressbar", "~> 0.17"
|
43
|
+
spec.add_dependency 'sequel', '~> 5.35'
|
44
|
+
spec.add_dependency 'deep_merge', '~> 1.2'
|
43
45
|
|
44
46
|
spec.add_development_dependency "bundler", "~> 2.1"
|
45
47
|
spec.add_development_dependency "rake", "~> 13.0"
|
@@ -47,4 +49,5 @@ Gem::Specification.new do |spec|
|
|
47
49
|
spec.add_development_dependency "pry-byebug", "~> 3.9"
|
48
50
|
spec.add_development_dependency 'runcom', '~> 6.2'
|
49
51
|
spec.add_development_dependency 'redcarpet', '~> 3.5'
|
52
|
+
spec.add_development_dependency 'sqlite3', '~> 1.4'
|
50
53
|
end
|
data/lib/chronicle/etl.rb
CHANGED
@@ -1,9 +1,13 @@
|
|
1
1
|
require_relative 'etl/catalog'
|
2
2
|
require_relative 'etl/config'
|
3
|
+
require_relative 'etl/job_definition'
|
3
4
|
require_relative 'etl/exceptions'
|
4
5
|
require_relative 'etl/extractors/extractor'
|
6
|
+
require_relative 'etl/job_log'
|
7
|
+
require_relative 'etl/job_logger'
|
8
|
+
require_relative 'etl/job'
|
5
9
|
require_relative 'etl/loaders/loader'
|
6
10
|
require_relative 'etl/runner'
|
7
11
|
require_relative 'etl/transformers/transformer'
|
8
12
|
require_relative 'etl/utils/progress_bar'
|
9
|
-
require_relative 'etl/version'
|
13
|
+
require_relative 'etl/version'
|
@@ -3,6 +3,7 @@ module Chronicle
|
|
3
3
|
# Utility methods to catalogue which Extractor, Transformer, and
|
4
4
|
# Loader connector classes are available to chronicle-etl
|
5
5
|
module Catalog
|
6
|
+
PHASES = [:extractor, :transformer, :loader]
|
6
7
|
PLUGINS = ['email', 'bash']
|
7
8
|
BUILTIN = {
|
8
9
|
extractor: ['stdin', 'json', 'csv', 'file'],
|
@@ -43,6 +44,11 @@ module Chronicle
|
|
43
44
|
end
|
44
45
|
end
|
45
46
|
|
47
|
+
# Take a phase (e, t, or l) and an identifier and return the right class
|
48
|
+
def self.phase_and_identifier_to_klass(phase, identifier)
|
49
|
+
Chronicle::ETL::Catalog.identifier_to_klass(phase: phase, identifier: identifier)
|
50
|
+
end
|
51
|
+
|
46
52
|
# For a given connector identifier, return the class (either builtin, or from a
|
47
53
|
# external chronicle gem)
|
48
54
|
def self.identifier_to_klass(identifier:, phase:)
|
@@ -1,5 +1,4 @@
|
|
1
1
|
require 'pp'
|
2
|
-
|
3
2
|
module Chronicle
|
4
3
|
module ETL
|
5
4
|
module CLI
|
@@ -14,7 +13,7 @@ module Chronicle
|
|
14
13
|
class_option :'transformer-opts', desc: 'Transformer options', type: :hash, default: {}
|
15
14
|
class_option :loader, aliases: '-l', desc: 'Loader class (available: stdout, csv, table)', default: 'stdout', banner: 'loader-name'
|
16
15
|
class_option :'loader-opts', desc: 'Loader options', type: :hash, default: {}
|
17
|
-
class_option :
|
16
|
+
class_option :name, aliases: '-j', desc: 'Job configuration name'
|
18
17
|
|
19
18
|
map run: :start # Thor doesn't like `run` as a command name
|
20
19
|
desc "run", "Start a job"
|
@@ -31,30 +30,38 @@ module Chronicle
|
|
31
30
|
LONG_DESC
|
32
31
|
# Run an ETL job
|
33
32
|
def start
|
34
|
-
|
35
|
-
|
33
|
+
job_definition = build_job_definition(options)
|
34
|
+
job = Chronicle::ETL::Job.new(job_definition)
|
35
|
+
runner = Chronicle::ETL::Runner.new(job)
|
36
36
|
runner.run!
|
37
|
+
rescue Chronicle::ETL::ProviderNotAvailableError => e
|
38
|
+
warn(e.message.red)
|
39
|
+
warn(" Perhaps you haven't installed it yet: `$ gem install chronicle-#{e.provider}`")
|
40
|
+
exit(false)
|
41
|
+
rescue Chronicle::ETL::ConnectorNotAvailableError => e
|
42
|
+
warn(e.message.red)
|
43
|
+
exit(false)
|
37
44
|
end
|
38
45
|
|
39
46
|
desc "create", "Create a job"
|
40
47
|
# Create an ETL job
|
41
48
|
def create
|
42
|
-
|
43
|
-
path = File.join('chronicle', 'etl', 'jobs', options[:
|
44
|
-
Chronicle::ETL::Config.write(path,
|
49
|
+
job_definition = build_job_definition(options)
|
50
|
+
path = File.join('chronicle', 'etl', 'jobs', options[:name])
|
51
|
+
Chronicle::ETL::Config.write(path, job_definition)
|
45
52
|
end
|
46
53
|
|
47
54
|
desc "show", "Show details about a job"
|
48
55
|
# Show an ETL job
|
49
56
|
def show
|
50
|
-
|
51
|
-
pp
|
57
|
+
job_config = build_job_definition(options)
|
58
|
+
pp job_config
|
52
59
|
end
|
53
60
|
|
54
61
|
desc "list", "List all available jobs"
|
55
62
|
# List available ETL jobs
|
56
63
|
def list
|
57
|
-
jobs = Chronicle::ETL::Config.
|
64
|
+
jobs = Chronicle::ETL::Config.available_jobs
|
58
65
|
|
59
66
|
job_details = jobs.map do |job|
|
60
67
|
r = Chronicle::ETL::Config.load("chronicle/etl/jobs/#{job}.yml")
|
@@ -74,17 +81,16 @@ LONG_DESC
|
|
74
81
|
|
75
82
|
private
|
76
83
|
|
77
|
-
# Create
|
78
|
-
def
|
79
|
-
|
80
|
-
|
81
|
-
|
84
|
+
# Create job definition by reading config file and then overwriting with flag options
|
85
|
+
def build_job_definition(options)
|
86
|
+
definition = Chronicle::ETL::JobDefinition.new
|
87
|
+
definition.add_config(process_flag_options(options))
|
88
|
+
definition.add_config(load_job_config(options[:name]))
|
89
|
+
definition
|
82
90
|
end
|
83
91
|
|
84
|
-
def
|
85
|
-
|
86
|
-
# FIXME: use better trick to depely symbolize keys
|
87
|
-
JSON.parse(yml_config.to_json, symbolize_names: true)
|
92
|
+
def load_job_config name
|
93
|
+
Chronicle::ETL::Config.load_job_from_config(name)
|
88
94
|
end
|
89
95
|
|
90
96
|
# Takes flag options and turns them into a runner config
|
data/lib/chronicle/etl/config.rb
CHANGED
@@ -4,15 +4,17 @@ module Chronicle
|
|
4
4
|
module ETL
|
5
5
|
# Utility methods to read, write, and access config files
|
6
6
|
module Config
|
7
|
+
module_function
|
8
|
+
|
7
9
|
# Loads a yml config file
|
8
|
-
def
|
10
|
+
def load(path)
|
9
11
|
config = Runcom::Config.new(path)
|
10
12
|
# FIXME: hack to deeply symbolize keys
|
11
13
|
JSON.parse(config.to_h.to_json, symbolize_names: true)
|
12
14
|
end
|
13
15
|
|
14
16
|
# Writes a hash as a yml config file
|
15
|
-
def
|
17
|
+
def write(path, data)
|
16
18
|
config = Runcom::Config.new(path)
|
17
19
|
filename = config.all[0].to_s + '.yml'
|
18
20
|
File.open(filename, 'w') do |f|
|
@@ -21,12 +23,31 @@ module Chronicle
|
|
21
23
|
end
|
22
24
|
|
23
25
|
# Returns all jobs available in ~/.config/chronicle/etl/jobs/*.yml
|
24
|
-
def
|
26
|
+
def available_jobs
|
25
27
|
job_directory = Runcom::Config.new('chronicle/etl/jobs').current
|
26
28
|
Dir.glob(File.join(job_directory, "*.yml")).map do |filename|
|
27
29
|
File.basename(filename, ".*")
|
28
30
|
end
|
29
31
|
end
|
32
|
+
|
33
|
+
# Returns all available credentials available in ~/.config/chronilce/etl/credenetials/*.yml
|
34
|
+
def available_credentials
|
35
|
+
job_directory = Runcom::Config.new('chronicle/etl/credentials').current
|
36
|
+
Dir.glob(File.join(job_directory, "*.yml")).map do |filename|
|
37
|
+
File.basename(filename, ".*")
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# Load a job definition from job config directory
|
42
|
+
def load_job_from_config(job_name)
|
43
|
+
definition = self.load("chronicle/etl/jobs/#{job_name}.yml")
|
44
|
+
definition[:name] = job_name
|
45
|
+
definition
|
46
|
+
end
|
47
|
+
|
48
|
+
def load_credentials(name)
|
49
|
+
config = self.load("chronicle/etl/credentials/#{name}.yml")
|
50
|
+
end
|
30
51
|
end
|
31
52
|
end
|
32
53
|
end
|
@@ -12,6 +12,7 @@ module Chronicle
|
|
12
12
|
# Options for configuring this Extractor
|
13
13
|
def initialize(options = {})
|
14
14
|
@options = options.transform_keys!(&:to_sym)
|
15
|
+
handle_continuation
|
15
16
|
end
|
16
17
|
|
17
18
|
# Entrypoint for this Extractor. Called by a Runner. Expects a series of records to be yielded
|
@@ -22,6 +23,12 @@ module Chronicle
|
|
22
23
|
# An optional method to calculate how many records there are to extract. Used primarily for
|
23
24
|
# building the progress bar
|
24
25
|
def results_count; end
|
26
|
+
|
27
|
+
private
|
28
|
+
|
29
|
+
def handle_continuation
|
30
|
+
@options[:load_since] = @options[:continuation].highest_timestamp if @options[:continuation] && @options[:continuation].highest_timestamp
|
31
|
+
end
|
25
32
|
end
|
26
33
|
end
|
27
34
|
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
module Chronicle
|
2
|
+
module ETL
|
3
|
+
class Job
|
4
|
+
attr_accessor :name,
|
5
|
+
:extractor_klass,
|
6
|
+
:extractor_options,
|
7
|
+
:transformer_klass,
|
8
|
+
:transformer_options,
|
9
|
+
:loader_klass,
|
10
|
+
:loader_options
|
11
|
+
|
12
|
+
# TODO: build a proper id system
|
13
|
+
alias id name
|
14
|
+
|
15
|
+
def initialize(definition)
|
16
|
+
definition = definition.definition # FIXME
|
17
|
+
@name = definition[:name]
|
18
|
+
@extractor_klass = load_klass(:extractor, definition[:extractor][:name])
|
19
|
+
@extractor_options = definition[:extractor][:options] || {}
|
20
|
+
|
21
|
+
@transformer_klass = load_klass(:transformer, definition[:transformer][:name])
|
22
|
+
@transformer_options = definition[:transformer][:options] || {}
|
23
|
+
|
24
|
+
@loader_klass = load_klass(:loader, definition[:loader][:name])
|
25
|
+
@loader_options = definition[:loader][:options] || {}
|
26
|
+
|
27
|
+
set_continuation
|
28
|
+
yield self if block_given?
|
29
|
+
end
|
30
|
+
|
31
|
+
def instantiate_extractor
|
32
|
+
instantiate_klass(:extractor)
|
33
|
+
end
|
34
|
+
|
35
|
+
def instantiate_transformer data
|
36
|
+
instantiate_klass(:transformer, data)
|
37
|
+
end
|
38
|
+
|
39
|
+
def instantiate_loader
|
40
|
+
instantiate_klass(:loader)
|
41
|
+
end
|
42
|
+
|
43
|
+
private
|
44
|
+
|
45
|
+
def instantiate_klass(phase, *args)
|
46
|
+
options = self.send("#{phase.to_s}_options")
|
47
|
+
args = args.unshift(options)
|
48
|
+
klass = self.send("#{phase.to_s}_klass")
|
49
|
+
klass.new(*args)
|
50
|
+
end
|
51
|
+
|
52
|
+
def load_klass phase, identifier
|
53
|
+
Chronicle::ETL::Catalog.phase_and_identifier_to_klass(phase, identifier)
|
54
|
+
end
|
55
|
+
|
56
|
+
def set_continuation
|
57
|
+
continuation = Chronicle::ETL::JobLogger.load_latest(@job_id)
|
58
|
+
@extractor_options[:continuation] = continuation
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
require 'deep_merge'
|
2
|
+
|
3
|
+
module Chronicle
|
4
|
+
module ETL
|
5
|
+
class JobDefinition
|
6
|
+
SKELETON_DEFINITION = {
|
7
|
+
extractor: {
|
8
|
+
name: nil,
|
9
|
+
options: {}
|
10
|
+
},
|
11
|
+
transformer: {
|
12
|
+
name: nil,
|
13
|
+
options: {}
|
14
|
+
},
|
15
|
+
loader: {
|
16
|
+
name: nil,
|
17
|
+
options: {}
|
18
|
+
}
|
19
|
+
}.freeze
|
20
|
+
|
21
|
+
attr_accessor :definition
|
22
|
+
|
23
|
+
def initialize()
|
24
|
+
@definition = SKELETON_DEFINITION
|
25
|
+
end
|
26
|
+
|
27
|
+
# Add config hash to this definition
|
28
|
+
def add_config(config = {})
|
29
|
+
@definition = config.deep_merge(@definition)
|
30
|
+
load_credentials
|
31
|
+
validate
|
32
|
+
end
|
33
|
+
|
34
|
+
private
|
35
|
+
|
36
|
+
def load_credentials
|
37
|
+
Chronicle::ETL::Catalog::PHASES.each do |phase|
|
38
|
+
credentials_name = @definition[phase][:options][:credentials]
|
39
|
+
if credentials_name
|
40
|
+
credentials = Chronicle::ETL::Config.load_credentials(credentials_name)
|
41
|
+
@definition[phase][:options].deep_merge(credentials)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def validate
|
47
|
+
return true # TODO
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
require 'pry'
|
2
|
+
|
3
|
+
module Chronicle
|
4
|
+
module ETL
|
5
|
+
# A record of what happened in the running of a job. We're interested in
|
6
|
+
# tracking when it ran, if it was successful, and what the latest record
|
7
|
+
# we found is (to use as a cursor for the next time)
|
8
|
+
class JobLog
|
9
|
+
attr_accessor :job,
|
10
|
+
:job_id,
|
11
|
+
:last_id,
|
12
|
+
:highest_timestamp,
|
13
|
+
:num_records_processed,
|
14
|
+
:started_at,
|
15
|
+
:finished_at,
|
16
|
+
:success
|
17
|
+
|
18
|
+
# Create a new JobLog for a given Job
|
19
|
+
def initialize
|
20
|
+
@num_records_processed = 0
|
21
|
+
@success = false
|
22
|
+
yield self if block_given?
|
23
|
+
end
|
24
|
+
|
25
|
+
# Log the result of a single transformation in a job
|
26
|
+
# @param transformer [Chronicle::ETL::Tranformer] The transformer that ran
|
27
|
+
def log_transformation(transformer)
|
28
|
+
@last_id = transformer.id if transformer.id
|
29
|
+
|
30
|
+
# Save the highest timestamp that we've encountered so far
|
31
|
+
@highest_timestamp = [transformer.timestamp, @highest_timestamp].compact.max if transformer.timestamp
|
32
|
+
|
33
|
+
# TODO: a transformer might yield nil. We might also want certain transformers to explode
|
34
|
+
# records into multiple new ones. Therefore, this this variable will need more subtle behaviour
|
35
|
+
@num_records_processed += 1
|
36
|
+
end
|
37
|
+
|
38
|
+
# Indicate that a job has started
|
39
|
+
def start
|
40
|
+
@started_at = Time.now
|
41
|
+
end
|
42
|
+
|
43
|
+
# Indicate that a job has finished
|
44
|
+
def finish
|
45
|
+
@finished_at = Time.now
|
46
|
+
@success = true
|
47
|
+
end
|
48
|
+
|
49
|
+
def job= job
|
50
|
+
@job = job
|
51
|
+
@job_id = job.id
|
52
|
+
end
|
53
|
+
|
54
|
+
# Take a JobLog's instance variables and turn them into a hash representation
|
55
|
+
def serialize
|
56
|
+
{
|
57
|
+
job_id: @job_id,
|
58
|
+
last_id: @last_id,
|
59
|
+
highest_timestamp: @highest_timestamp,
|
60
|
+
num_records_processed: @num_records_processed,
|
61
|
+
started_at: @started_at,
|
62
|
+
finished_at: @finished_at,
|
63
|
+
success: @success
|
64
|
+
}
|
65
|
+
end
|
66
|
+
|
67
|
+
# Create a new JobLog and set its instance variables from a serialized hash
|
68
|
+
def self.build_from_serialized attrs
|
69
|
+
attrs.delete(:id)
|
70
|
+
new do |job_log|
|
71
|
+
attrs.each do |key, value|
|
72
|
+
setter = "#{key.to_s}=".to_sym
|
73
|
+
job_log.send(setter, value)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
require 'sequel'
|
2
|
+
require 'forwardable'
|
3
|
+
|
4
|
+
require 'pry'
|
5
|
+
|
6
|
+
module Chronicle
|
7
|
+
module ETL
|
8
|
+
# Saves JobLogs to db and loads previous ones
|
9
|
+
class JobLogger
|
10
|
+
extend Forwardable
|
11
|
+
|
12
|
+
def_delegators :@job_log, :start, :finish, :log_transformation
|
13
|
+
|
14
|
+
# Create a new JobLogger
|
15
|
+
def initialize(job)
|
16
|
+
@job_log = JobLog.new do |job_log|
|
17
|
+
job_log.job = job
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
# Save this JobLogger's JobLog to db
|
22
|
+
def save
|
23
|
+
JobLogger.with_db_connection do |db|
|
24
|
+
dataset = db[:job_logs]
|
25
|
+
dataset.insert(@job_log.serialize)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
# For a given `job_id`, return the last successful log
|
30
|
+
def self.load_latest(job_id)
|
31
|
+
with_db_connection do |db|
|
32
|
+
attrs = db[:job_logs].reverse_order(:finished_at).where(success: true).first
|
33
|
+
JobLog.build_from_serialized(attrs) if attrs
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.with_db_connection
|
38
|
+
initialize_db unless db_exists?
|
39
|
+
Sequel.connect("sqlite://#{db_filename}") do |db|
|
40
|
+
initialize_schema(db) unless schema_exists?(db)
|
41
|
+
yield db
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.db_exists?
|
46
|
+
File.exists?(db_filename)
|
47
|
+
end
|
48
|
+
|
49
|
+
def self.schema_exists?(db)
|
50
|
+
return db.tables.include? :job_logs
|
51
|
+
end
|
52
|
+
|
53
|
+
def self.db_filename
|
54
|
+
data = Runcom::Data.new "chronicle/etl/job_log.db"
|
55
|
+
filename = data.all[0].to_s
|
56
|
+
end
|
57
|
+
|
58
|
+
def self.initialize_db
|
59
|
+
FileUtils.mkdir_p(File.dirname(db_filename))
|
60
|
+
end
|
61
|
+
|
62
|
+
def self.initialize_schema db
|
63
|
+
db.create_table :job_logs do
|
64
|
+
primary_key :id
|
65
|
+
String :job_id, null: false
|
66
|
+
String :last_id
|
67
|
+
Time :highest_timestamp
|
68
|
+
Integer :num_records_processed
|
69
|
+
boolean :success, default: false
|
70
|
+
Time :started_at
|
71
|
+
Time :finished_at
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
data/lib/chronicle/etl/runner.rb
CHANGED
@@ -1,46 +1,32 @@
|
|
1
1
|
require 'colorize'
|
2
2
|
|
3
3
|
class Chronicle::ETL::Runner
|
4
|
-
def initialize(
|
5
|
-
@
|
4
|
+
def initialize(job)
|
5
|
+
@job = job
|
6
|
+
@job_logger = Chronicle::ETL::JobLogger.new(@job)
|
6
7
|
end
|
7
8
|
|
8
9
|
def run!
|
9
|
-
extractor =
|
10
|
-
loader =
|
10
|
+
extractor = @job.instantiate_extractor
|
11
|
+
loader = @job.instantiate_loader
|
12
|
+
|
13
|
+
@job_logger.start
|
14
|
+
loader.start
|
11
15
|
|
12
16
|
total = extractor.results_count
|
13
17
|
progress_bar = Chronicle::ETL::Utils::ProgressBar.new(title: 'Running job', total: total)
|
14
18
|
|
15
|
-
loader.start
|
16
|
-
|
17
19
|
extractor.extract do |data, metadata|
|
18
|
-
transformer =
|
20
|
+
transformer = @job.instantiate_transformer(data)
|
19
21
|
transformed_data = transformer.transform
|
20
|
-
|
22
|
+
@job_logger.log_transformation(transformer)
|
21
23
|
loader.load(transformed_data)
|
22
24
|
progress_bar.increment
|
23
25
|
end
|
24
26
|
|
25
27
|
progress_bar.finish
|
26
28
|
loader.finish
|
27
|
-
|
28
|
-
|
29
|
-
private
|
30
|
-
|
31
|
-
def instantiate_klass(phase, *args)
|
32
|
-
klass = load_etl_class(phase, @options[phase][:name])
|
33
|
-
klass.new(@options[phase][:options], *args)
|
34
|
-
end
|
35
|
-
|
36
|
-
def load_etl_class(phase, identifier)
|
37
|
-
Chronicle::ETL::Catalog.identifier_to_klass(phase: phase, identifier: identifier)
|
38
|
-
rescue Chronicle::ETL::ProviderNotAvailableError => e
|
39
|
-
warn(e.message.red)
|
40
|
-
warn(" Perhaps you haven't installed it yet: `$ gem install chronicle-#{e.provider}`")
|
41
|
-
exit(false)
|
42
|
-
rescue Chronicle::ETL::ConnectorNotAvailableError => e
|
43
|
-
warn(e.message.red)
|
44
|
-
exit(false)
|
29
|
+
@job_logger.finish
|
30
|
+
@job_logger.save
|
45
31
|
end
|
46
32
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: chronicle-etl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Louis
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-09-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: thor
|
@@ -66,6 +66,34 @@ dependencies:
|
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0.17'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: sequel
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '5.35'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '5.35'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: deep_merge
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '1.2'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '1.2'
|
69
97
|
- !ruby/object:Gem::Dependency
|
70
98
|
name: bundler
|
71
99
|
requirement: !ruby/object:Gem::Requirement
|
@@ -150,6 +178,20 @@ dependencies:
|
|
150
178
|
- - "~>"
|
151
179
|
- !ruby/object:Gem::Version
|
152
180
|
version: '3.5'
|
181
|
+
- !ruby/object:Gem::Dependency
|
182
|
+
name: sqlite3
|
183
|
+
requirement: !ruby/object:Gem::Requirement
|
184
|
+
requirements:
|
185
|
+
- - "~>"
|
186
|
+
- !ruby/object:Gem::Version
|
187
|
+
version: '1.4'
|
188
|
+
type: :development
|
189
|
+
prerelease: false
|
190
|
+
version_requirements: !ruby/object:Gem::Requirement
|
191
|
+
requirements:
|
192
|
+
- - "~>"
|
193
|
+
- !ruby/object:Gem::Version
|
194
|
+
version: '1.4'
|
153
195
|
description: Chronicle-ETL allows you to extract personal data from a variety of services,
|
154
196
|
transformer it, and load it.
|
155
197
|
email:
|
@@ -165,7 +207,6 @@ files:
|
|
165
207
|
- ".ruby-version"
|
166
208
|
- ".travis.yml"
|
167
209
|
- ".yardopts"
|
168
|
-
- CHANGELOG.md
|
169
210
|
- CODE_OF_CONDUCT.md
|
170
211
|
- Gemfile
|
171
212
|
- Gemfile.lock
|
@@ -188,6 +229,10 @@ files:
|
|
188
229
|
- lib/chronicle/etl/extractors/extractor.rb
|
189
230
|
- lib/chronicle/etl/extractors/file_extractor.rb
|
190
231
|
- lib/chronicle/etl/extractors/stdin_extractor.rb
|
232
|
+
- lib/chronicle/etl/job.rb
|
233
|
+
- lib/chronicle/etl/job_definition.rb
|
234
|
+
- lib/chronicle/etl/job_log.rb
|
235
|
+
- lib/chronicle/etl/job_logger.rb
|
191
236
|
- lib/chronicle/etl/loaders/csv_loader.rb
|
192
237
|
- lib/chronicle/etl/loaders/loader.rb
|
193
238
|
- lib/chronicle/etl/loaders/rest_loader.rb
|
data/CHANGELOG.md
DELETED
@@ -1,23 +0,0 @@
|
|
1
|
-
# Changelog
|
2
|
-
|
3
|
-
This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
4
|
-
|
5
|
-
## [0.1.4] - 2020-08-18
|
6
|
-
### Updated
|
7
|
-
- Better display of available ETL classes
|
8
|
-
- Updated documentation
|
9
|
-
|
10
|
-
## [0.1.3] - 2020-08-13
|
11
|
-
### Added
|
12
|
-
- Ability to list all available ETL classes
|
13
|
-
- Refactored E, T, L module and class structure
|
14
|
-
- Better progress bar
|
15
|
-
|
16
|
-
## [0.1.2] - 2020-08-02
|
17
|
-
### Added
|
18
|
-
- This changelog
|
19
|
-
- Ability to use extractors, transformers, and loaders from other gems
|
20
|
-
|
21
|
-
## [0.1.0] - 2020-08-01
|
22
|
-
### Added
|
23
|
-
- Basic job runner and ETL classes
|