chronicle-etl 0.2.2 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +7 -1
- data/chronicle-etl.gemspec +3 -0
- data/lib/chronicle/etl.rb +5 -1
- data/lib/chronicle/etl/catalog.rb +6 -0
- data/lib/chronicle/etl/cli/jobs.rb +25 -19
- data/lib/chronicle/etl/config.rb +24 -3
- data/lib/chronicle/etl/extractors/extractor.rb +7 -0
- data/lib/chronicle/etl/job.rb +62 -0
- data/lib/chronicle/etl/job_definition.rb +51 -0
- data/lib/chronicle/etl/job_log.rb +79 -0
- data/lib/chronicle/etl/job_logger.rb +76 -0
- data/lib/chronicle/etl/runner.rb +12 -26
- data/lib/chronicle/etl/version.rb +1 -1
- metadata +48 -3
- data/CHANGELOG.md +0 -23
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 12a38a335c05b1626b9f259318956480df3f96e447cc2b1a25b8a9c23d591e49
|
4
|
+
data.tar.gz: d8ed027154403e68e5684213b0d0f58218a23dc2f667a882dcd3b2e8ab0c69b7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 396863ed665137905cfa9fe51ee925776e0a0f616721658a889b9b587dda83b9cd1e0fa2a483b08fc65ec70797f07facec082c1c88403aa8d61e1ce4ae791779
|
7
|
+
data.tar.gz: 705d626f45c816494949d6bc5c4f83cc4a8cd1c527aef72911bb90000e2151f758889c6080e9ac489f235df4169fbc158a7713dbf76d5e7ba5fdaf2a6ad51567
|
data/Gemfile.lock
CHANGED
@@ -1,8 +1,10 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
chronicle-etl (0.2.
|
4
|
+
chronicle-etl (0.2.3)
|
5
5
|
colorize (~> 0.8.1)
|
6
|
+
deep_merge (~> 1.2)
|
7
|
+
sequel (~> 5.35)
|
6
8
|
thor (~> 0.20)
|
7
9
|
tty-progressbar (~> 0.17)
|
8
10
|
tty-table (~> 0.11)
|
@@ -13,6 +15,7 @@ GEM
|
|
13
15
|
byebug (11.1.3)
|
14
16
|
coderay (1.1.3)
|
15
17
|
colorize (0.8.1)
|
18
|
+
deep_merge (1.2.1)
|
16
19
|
diff-lcs (1.4.4)
|
17
20
|
equatable (0.6.1)
|
18
21
|
method_source (1.0.0)
|
@@ -45,6 +48,8 @@ GEM
|
|
45
48
|
runcom (6.2.0)
|
46
49
|
refinements (~> 7.4)
|
47
50
|
xdg (~> 4.0)
|
51
|
+
sequel (5.36.0)
|
52
|
+
sqlite3 (1.4.2)
|
48
53
|
strings (0.1.8)
|
49
54
|
strings-ansi (~> 0.1)
|
50
55
|
unicode-display_width (~> 1.5)
|
@@ -80,6 +85,7 @@ DEPENDENCIES
|
|
80
85
|
redcarpet (~> 3.5)
|
81
86
|
rspec (~> 3.9)
|
82
87
|
runcom (~> 6.2)
|
88
|
+
sqlite3 (~> 1.4)
|
83
89
|
|
84
90
|
BUNDLED WITH
|
85
91
|
2.1.4
|
data/chronicle-etl.gemspec
CHANGED
@@ -40,6 +40,8 @@ Gem::Specification.new do |spec|
|
|
40
40
|
spec.add_dependency "colorize", "~> 0.8.1"
|
41
41
|
spec.add_dependency "tty-table", "~> 0.11"
|
42
42
|
spec.add_dependency "tty-progressbar", "~> 0.17"
|
43
|
+
spec.add_dependency 'sequel', '~> 5.35'
|
44
|
+
spec.add_dependency 'deep_merge', '~> 1.2'
|
43
45
|
|
44
46
|
spec.add_development_dependency "bundler", "~> 2.1"
|
45
47
|
spec.add_development_dependency "rake", "~> 13.0"
|
@@ -47,4 +49,5 @@ Gem::Specification.new do |spec|
|
|
47
49
|
spec.add_development_dependency "pry-byebug", "~> 3.9"
|
48
50
|
spec.add_development_dependency 'runcom', '~> 6.2'
|
49
51
|
spec.add_development_dependency 'redcarpet', '~> 3.5'
|
52
|
+
spec.add_development_dependency 'sqlite3', '~> 1.4'
|
50
53
|
end
|
data/lib/chronicle/etl.rb
CHANGED
@@ -1,9 +1,13 @@
|
|
1
1
|
require_relative 'etl/catalog'
|
2
2
|
require_relative 'etl/config'
|
3
|
+
require_relative 'etl/job_definition'
|
3
4
|
require_relative 'etl/exceptions'
|
4
5
|
require_relative 'etl/extractors/extractor'
|
6
|
+
require_relative 'etl/job_log'
|
7
|
+
require_relative 'etl/job_logger'
|
8
|
+
require_relative 'etl/job'
|
5
9
|
require_relative 'etl/loaders/loader'
|
6
10
|
require_relative 'etl/runner'
|
7
11
|
require_relative 'etl/transformers/transformer'
|
8
12
|
require_relative 'etl/utils/progress_bar'
|
9
|
-
require_relative 'etl/version'
|
13
|
+
require_relative 'etl/version'
|
@@ -3,6 +3,7 @@ module Chronicle
|
|
3
3
|
# Utility methods to catalogue which Extractor, Transformer, and
|
4
4
|
# Loader connector classes are available to chronicle-etl
|
5
5
|
module Catalog
|
6
|
+
PHASES = [:extractor, :transformer, :loader]
|
6
7
|
PLUGINS = ['email', 'bash']
|
7
8
|
BUILTIN = {
|
8
9
|
extractor: ['stdin', 'json', 'csv', 'file'],
|
@@ -43,6 +44,11 @@ module Chronicle
|
|
43
44
|
end
|
44
45
|
end
|
45
46
|
|
47
|
+
# Take a phase (e, t, or l) and an identifier and return the right class
|
48
|
+
def self.phase_and_identifier_to_klass(phase, identifier)
|
49
|
+
Chronicle::ETL::Catalog.identifier_to_klass(phase: phase, identifier: identifier)
|
50
|
+
end
|
51
|
+
|
46
52
|
# For a given connector identifier, return the class (either builtin, or from a
|
47
53
|
# external chronicle gem)
|
48
54
|
def self.identifier_to_klass(identifier:, phase:)
|
@@ -1,5 +1,4 @@
|
|
1
1
|
require 'pp'
|
2
|
-
|
3
2
|
module Chronicle
|
4
3
|
module ETL
|
5
4
|
module CLI
|
@@ -14,7 +13,7 @@ module Chronicle
|
|
14
13
|
class_option :'transformer-opts', desc: 'Transformer options', type: :hash, default: {}
|
15
14
|
class_option :loader, aliases: '-l', desc: 'Loader class (available: stdout, csv, table)', default: 'stdout', banner: 'loader-name'
|
16
15
|
class_option :'loader-opts', desc: 'Loader options', type: :hash, default: {}
|
17
|
-
class_option :
|
16
|
+
class_option :name, aliases: '-j', desc: 'Job configuration name'
|
18
17
|
|
19
18
|
map run: :start # Thor doesn't like `run` as a command name
|
20
19
|
desc "run", "Start a job"
|
@@ -31,30 +30,38 @@ module Chronicle
|
|
31
30
|
LONG_DESC
|
32
31
|
# Run an ETL job
|
33
32
|
def start
|
34
|
-
|
35
|
-
|
33
|
+
job_definition = build_job_definition(options)
|
34
|
+
job = Chronicle::ETL::Job.new(job_definition)
|
35
|
+
runner = Chronicle::ETL::Runner.new(job)
|
36
36
|
runner.run!
|
37
|
+
rescue Chronicle::ETL::ProviderNotAvailableError => e
|
38
|
+
warn(e.message.red)
|
39
|
+
warn(" Perhaps you haven't installed it yet: `$ gem install chronicle-#{e.provider}`")
|
40
|
+
exit(false)
|
41
|
+
rescue Chronicle::ETL::ConnectorNotAvailableError => e
|
42
|
+
warn(e.message.red)
|
43
|
+
exit(false)
|
37
44
|
end
|
38
45
|
|
39
46
|
desc "create", "Create a job"
|
40
47
|
# Create an ETL job
|
41
48
|
def create
|
42
|
-
|
43
|
-
path = File.join('chronicle', 'etl', 'jobs', options[:
|
44
|
-
Chronicle::ETL::Config.write(path,
|
49
|
+
job_definition = build_job_definition(options)
|
50
|
+
path = File.join('chronicle', 'etl', 'jobs', options[:name])
|
51
|
+
Chronicle::ETL::Config.write(path, job_definition)
|
45
52
|
end
|
46
53
|
|
47
54
|
desc "show", "Show details about a job"
|
48
55
|
# Show an ETL job
|
49
56
|
def show
|
50
|
-
|
51
|
-
pp
|
57
|
+
job_config = build_job_definition(options)
|
58
|
+
pp job_config
|
52
59
|
end
|
53
60
|
|
54
61
|
desc "list", "List all available jobs"
|
55
62
|
# List available ETL jobs
|
56
63
|
def list
|
57
|
-
jobs = Chronicle::ETL::Config.
|
64
|
+
jobs = Chronicle::ETL::Config.available_jobs
|
58
65
|
|
59
66
|
job_details = jobs.map do |job|
|
60
67
|
r = Chronicle::ETL::Config.load("chronicle/etl/jobs/#{job}.yml")
|
@@ -74,17 +81,16 @@ LONG_DESC
|
|
74
81
|
|
75
82
|
private
|
76
83
|
|
77
|
-
# Create
|
78
|
-
def
|
79
|
-
|
80
|
-
|
81
|
-
|
84
|
+
# Create job definition by reading config file and then overwriting with flag options
|
85
|
+
def build_job_definition(options)
|
86
|
+
definition = Chronicle::ETL::JobDefinition.new
|
87
|
+
definition.add_config(process_flag_options(options))
|
88
|
+
definition.add_config(load_job_config(options[:name]))
|
89
|
+
definition
|
82
90
|
end
|
83
91
|
|
84
|
-
def
|
85
|
-
|
86
|
-
# FIXME: use better trick to depely symbolize keys
|
87
|
-
JSON.parse(yml_config.to_json, symbolize_names: true)
|
92
|
+
def load_job_config name
|
93
|
+
Chronicle::ETL::Config.load_job_from_config(name)
|
88
94
|
end
|
89
95
|
|
90
96
|
# Takes flag options and turns them into a runner config
|
data/lib/chronicle/etl/config.rb
CHANGED
@@ -4,15 +4,17 @@ module Chronicle
|
|
4
4
|
module ETL
|
5
5
|
# Utility methods to read, write, and access config files
|
6
6
|
module Config
|
7
|
+
module_function
|
8
|
+
|
7
9
|
# Loads a yml config file
|
8
|
-
def
|
10
|
+
def load(path)
|
9
11
|
config = Runcom::Config.new(path)
|
10
12
|
# FIXME: hack to deeply symbolize keys
|
11
13
|
JSON.parse(config.to_h.to_json, symbolize_names: true)
|
12
14
|
end
|
13
15
|
|
14
16
|
# Writes a hash as a yml config file
|
15
|
-
def
|
17
|
+
def write(path, data)
|
16
18
|
config = Runcom::Config.new(path)
|
17
19
|
filename = config.all[0].to_s + '.yml'
|
18
20
|
File.open(filename, 'w') do |f|
|
@@ -21,12 +23,31 @@ module Chronicle
|
|
21
23
|
end
|
22
24
|
|
23
25
|
# Returns all jobs available in ~/.config/chronicle/etl/jobs/*.yml
|
24
|
-
def
|
26
|
+
def available_jobs
|
25
27
|
job_directory = Runcom::Config.new('chronicle/etl/jobs').current
|
26
28
|
Dir.glob(File.join(job_directory, "*.yml")).map do |filename|
|
27
29
|
File.basename(filename, ".*")
|
28
30
|
end
|
29
31
|
end
|
32
|
+
|
33
|
+
# Returns all available credentials available in ~/.config/chronilce/etl/credenetials/*.yml
|
34
|
+
def available_credentials
|
35
|
+
job_directory = Runcom::Config.new('chronicle/etl/credentials').current
|
36
|
+
Dir.glob(File.join(job_directory, "*.yml")).map do |filename|
|
37
|
+
File.basename(filename, ".*")
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# Load a job definition from job config directory
|
42
|
+
def load_job_from_config(job_name)
|
43
|
+
definition = self.load("chronicle/etl/jobs/#{job_name}.yml")
|
44
|
+
definition[:name] = job_name
|
45
|
+
definition
|
46
|
+
end
|
47
|
+
|
48
|
+
def load_credentials(name)
|
49
|
+
config = self.load("chronicle/etl/credentials/#{name}.yml")
|
50
|
+
end
|
30
51
|
end
|
31
52
|
end
|
32
53
|
end
|
@@ -12,6 +12,7 @@ module Chronicle
|
|
12
12
|
# Options for configuring this Extractor
|
13
13
|
def initialize(options = {})
|
14
14
|
@options = options.transform_keys!(&:to_sym)
|
15
|
+
handle_continuation
|
15
16
|
end
|
16
17
|
|
17
18
|
# Entrypoint for this Extractor. Called by a Runner. Expects a series of records to be yielded
|
@@ -22,6 +23,12 @@ module Chronicle
|
|
22
23
|
# An optional method to calculate how many records there are to extract. Used primarily for
|
23
24
|
# building the progress bar
|
24
25
|
def results_count; end
|
26
|
+
|
27
|
+
private
|
28
|
+
|
29
|
+
def handle_continuation
|
30
|
+
@options[:load_since] = @options[:continuation].highest_timestamp if @options[:continuation] && @options[:continuation].highest_timestamp
|
31
|
+
end
|
25
32
|
end
|
26
33
|
end
|
27
34
|
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
module Chronicle
|
2
|
+
module ETL
|
3
|
+
class Job
|
4
|
+
attr_accessor :name,
|
5
|
+
:extractor_klass,
|
6
|
+
:extractor_options,
|
7
|
+
:transformer_klass,
|
8
|
+
:transformer_options,
|
9
|
+
:loader_klass,
|
10
|
+
:loader_options
|
11
|
+
|
12
|
+
# TODO: build a proper id system
|
13
|
+
alias id name
|
14
|
+
|
15
|
+
def initialize(definition)
|
16
|
+
definition = definition.definition # FIXME
|
17
|
+
@name = definition[:name]
|
18
|
+
@extractor_klass = load_klass(:extractor, definition[:extractor][:name])
|
19
|
+
@extractor_options = definition[:extractor][:options] || {}
|
20
|
+
|
21
|
+
@transformer_klass = load_klass(:transformer, definition[:transformer][:name])
|
22
|
+
@transformer_options = definition[:transformer][:options] || {}
|
23
|
+
|
24
|
+
@loader_klass = load_klass(:loader, definition[:loader][:name])
|
25
|
+
@loader_options = definition[:loader][:options] || {}
|
26
|
+
|
27
|
+
set_continuation
|
28
|
+
yield self if block_given?
|
29
|
+
end
|
30
|
+
|
31
|
+
def instantiate_extractor
|
32
|
+
instantiate_klass(:extractor)
|
33
|
+
end
|
34
|
+
|
35
|
+
def instantiate_transformer data
|
36
|
+
instantiate_klass(:transformer, data)
|
37
|
+
end
|
38
|
+
|
39
|
+
def instantiate_loader
|
40
|
+
instantiate_klass(:loader)
|
41
|
+
end
|
42
|
+
|
43
|
+
private
|
44
|
+
|
45
|
+
def instantiate_klass(phase, *args)
|
46
|
+
options = self.send("#{phase.to_s}_options")
|
47
|
+
args = args.unshift(options)
|
48
|
+
klass = self.send("#{phase.to_s}_klass")
|
49
|
+
klass.new(*args)
|
50
|
+
end
|
51
|
+
|
52
|
+
def load_klass phase, identifier
|
53
|
+
Chronicle::ETL::Catalog.phase_and_identifier_to_klass(phase, identifier)
|
54
|
+
end
|
55
|
+
|
56
|
+
def set_continuation
|
57
|
+
continuation = Chronicle::ETL::JobLogger.load_latest(@job_id)
|
58
|
+
@extractor_options[:continuation] = continuation
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
require 'deep_merge'
|
2
|
+
|
3
|
+
module Chronicle
|
4
|
+
module ETL
|
5
|
+
class JobDefinition
|
6
|
+
SKELETON_DEFINITION = {
|
7
|
+
extractor: {
|
8
|
+
name: nil,
|
9
|
+
options: {}
|
10
|
+
},
|
11
|
+
transformer: {
|
12
|
+
name: nil,
|
13
|
+
options: {}
|
14
|
+
},
|
15
|
+
loader: {
|
16
|
+
name: nil,
|
17
|
+
options: {}
|
18
|
+
}
|
19
|
+
}.freeze
|
20
|
+
|
21
|
+
attr_accessor :definition
|
22
|
+
|
23
|
+
def initialize()
|
24
|
+
@definition = SKELETON_DEFINITION
|
25
|
+
end
|
26
|
+
|
27
|
+
# Add config hash to this definition
|
28
|
+
def add_config(config = {})
|
29
|
+
@definition = config.deep_merge(@definition)
|
30
|
+
load_credentials
|
31
|
+
validate
|
32
|
+
end
|
33
|
+
|
34
|
+
private
|
35
|
+
|
36
|
+
def load_credentials
|
37
|
+
Chronicle::ETL::Catalog::PHASES.each do |phase|
|
38
|
+
credentials_name = @definition[phase][:options][:credentials]
|
39
|
+
if credentials_name
|
40
|
+
credentials = Chronicle::ETL::Config.load_credentials(credentials_name)
|
41
|
+
@definition[phase][:options].deep_merge(credentials)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def validate
|
47
|
+
return true # TODO
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
require 'pry'
|
2
|
+
|
3
|
+
module Chronicle
|
4
|
+
module ETL
|
5
|
+
# A record of what happened in the running of a job. We're interested in
|
6
|
+
# tracking when it ran, if it was successful, and what the latest record
|
7
|
+
# we found is (to use as a cursor for the next time)
|
8
|
+
class JobLog
|
9
|
+
attr_accessor :job,
|
10
|
+
:job_id,
|
11
|
+
:last_id,
|
12
|
+
:highest_timestamp,
|
13
|
+
:num_records_processed,
|
14
|
+
:started_at,
|
15
|
+
:finished_at,
|
16
|
+
:success
|
17
|
+
|
18
|
+
# Create a new JobLog for a given Job
|
19
|
+
def initialize
|
20
|
+
@num_records_processed = 0
|
21
|
+
@success = false
|
22
|
+
yield self if block_given?
|
23
|
+
end
|
24
|
+
|
25
|
+
# Log the result of a single transformation in a job
|
26
|
+
# @param transformer [Chronicle::ETL::Tranformer] The transformer that ran
|
27
|
+
def log_transformation(transformer)
|
28
|
+
@last_id = transformer.id if transformer.id
|
29
|
+
|
30
|
+
# Save the highest timestamp that we've encountered so far
|
31
|
+
@highest_timestamp = [transformer.timestamp, @highest_timestamp].compact.max if transformer.timestamp
|
32
|
+
|
33
|
+
# TODO: a transformer might yield nil. We might also want certain transformers to explode
|
34
|
+
# records into multiple new ones. Therefore, this this variable will need more subtle behaviour
|
35
|
+
@num_records_processed += 1
|
36
|
+
end
|
37
|
+
|
38
|
+
# Indicate that a job has started
|
39
|
+
def start
|
40
|
+
@started_at = Time.now
|
41
|
+
end
|
42
|
+
|
43
|
+
# Indicate that a job has finished
|
44
|
+
def finish
|
45
|
+
@finished_at = Time.now
|
46
|
+
@success = true
|
47
|
+
end
|
48
|
+
|
49
|
+
def job= job
|
50
|
+
@job = job
|
51
|
+
@job_id = job.id
|
52
|
+
end
|
53
|
+
|
54
|
+
# Take a JobLog's instance variables and turn them into a hash representation
|
55
|
+
def serialize
|
56
|
+
{
|
57
|
+
job_id: @job_id,
|
58
|
+
last_id: @last_id,
|
59
|
+
highest_timestamp: @highest_timestamp,
|
60
|
+
num_records_processed: @num_records_processed,
|
61
|
+
started_at: @started_at,
|
62
|
+
finished_at: @finished_at,
|
63
|
+
success: @success
|
64
|
+
}
|
65
|
+
end
|
66
|
+
|
67
|
+
# Create a new JobLog and set its instance variables from a serialized hash
|
68
|
+
def self.build_from_serialized attrs
|
69
|
+
attrs.delete(:id)
|
70
|
+
new do |job_log|
|
71
|
+
attrs.each do |key, value|
|
72
|
+
setter = "#{key.to_s}=".to_sym
|
73
|
+
job_log.send(setter, value)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
require 'sequel'
|
2
|
+
require 'forwardable'
|
3
|
+
|
4
|
+
require 'pry'
|
5
|
+
|
6
|
+
module Chronicle
|
7
|
+
module ETL
|
8
|
+
# Saves JobLogs to db and loads previous ones
|
9
|
+
class JobLogger
|
10
|
+
extend Forwardable
|
11
|
+
|
12
|
+
def_delegators :@job_log, :start, :finish, :log_transformation
|
13
|
+
|
14
|
+
# Create a new JobLogger
|
15
|
+
def initialize(job)
|
16
|
+
@job_log = JobLog.new do |job_log|
|
17
|
+
job_log.job = job
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
# Save this JobLogger's JobLog to db
|
22
|
+
def save
|
23
|
+
JobLogger.with_db_connection do |db|
|
24
|
+
dataset = db[:job_logs]
|
25
|
+
dataset.insert(@job_log.serialize)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
# For a given `job_id`, return the last successful log
|
30
|
+
def self.load_latest(job_id)
|
31
|
+
with_db_connection do |db|
|
32
|
+
attrs = db[:job_logs].reverse_order(:finished_at).where(success: true).first
|
33
|
+
JobLog.build_from_serialized(attrs) if attrs
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.with_db_connection
|
38
|
+
initialize_db unless db_exists?
|
39
|
+
Sequel.connect("sqlite://#{db_filename}") do |db|
|
40
|
+
initialize_schema(db) unless schema_exists?(db)
|
41
|
+
yield db
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.db_exists?
|
46
|
+
File.exists?(db_filename)
|
47
|
+
end
|
48
|
+
|
49
|
+
def self.schema_exists?(db)
|
50
|
+
return db.tables.include? :job_logs
|
51
|
+
end
|
52
|
+
|
53
|
+
def self.db_filename
|
54
|
+
data = Runcom::Data.new "chronicle/etl/job_log.db"
|
55
|
+
filename = data.all[0].to_s
|
56
|
+
end
|
57
|
+
|
58
|
+
def self.initialize_db
|
59
|
+
FileUtils.mkdir_p(File.dirname(db_filename))
|
60
|
+
end
|
61
|
+
|
62
|
+
def self.initialize_schema db
|
63
|
+
db.create_table :job_logs do
|
64
|
+
primary_key :id
|
65
|
+
String :job_id, null: false
|
66
|
+
String :last_id
|
67
|
+
Time :highest_timestamp
|
68
|
+
Integer :num_records_processed
|
69
|
+
boolean :success, default: false
|
70
|
+
Time :started_at
|
71
|
+
Time :finished_at
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
data/lib/chronicle/etl/runner.rb
CHANGED
@@ -1,46 +1,32 @@
|
|
1
1
|
require 'colorize'
|
2
2
|
|
3
3
|
class Chronicle::ETL::Runner
|
4
|
-
def initialize(
|
5
|
-
@
|
4
|
+
def initialize(job)
|
5
|
+
@job = job
|
6
|
+
@job_logger = Chronicle::ETL::JobLogger.new(@job)
|
6
7
|
end
|
7
8
|
|
8
9
|
def run!
|
9
|
-
extractor =
|
10
|
-
loader =
|
10
|
+
extractor = @job.instantiate_extractor
|
11
|
+
loader = @job.instantiate_loader
|
12
|
+
|
13
|
+
@job_logger.start
|
14
|
+
loader.start
|
11
15
|
|
12
16
|
total = extractor.results_count
|
13
17
|
progress_bar = Chronicle::ETL::Utils::ProgressBar.new(title: 'Running job', total: total)
|
14
18
|
|
15
|
-
loader.start
|
16
|
-
|
17
19
|
extractor.extract do |data, metadata|
|
18
|
-
transformer =
|
20
|
+
transformer = @job.instantiate_transformer(data)
|
19
21
|
transformed_data = transformer.transform
|
20
|
-
|
22
|
+
@job_logger.log_transformation(transformer)
|
21
23
|
loader.load(transformed_data)
|
22
24
|
progress_bar.increment
|
23
25
|
end
|
24
26
|
|
25
27
|
progress_bar.finish
|
26
28
|
loader.finish
|
27
|
-
|
28
|
-
|
29
|
-
private
|
30
|
-
|
31
|
-
def instantiate_klass(phase, *args)
|
32
|
-
klass = load_etl_class(phase, @options[phase][:name])
|
33
|
-
klass.new(@options[phase][:options], *args)
|
34
|
-
end
|
35
|
-
|
36
|
-
def load_etl_class(phase, identifier)
|
37
|
-
Chronicle::ETL::Catalog.identifier_to_klass(phase: phase, identifier: identifier)
|
38
|
-
rescue Chronicle::ETL::ProviderNotAvailableError => e
|
39
|
-
warn(e.message.red)
|
40
|
-
warn(" Perhaps you haven't installed it yet: `$ gem install chronicle-#{e.provider}`")
|
41
|
-
exit(false)
|
42
|
-
rescue Chronicle::ETL::ConnectorNotAvailableError => e
|
43
|
-
warn(e.message.red)
|
44
|
-
exit(false)
|
29
|
+
@job_logger.finish
|
30
|
+
@job_logger.save
|
45
31
|
end
|
46
32
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: chronicle-etl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Louis
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-09-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: thor
|
@@ -66,6 +66,34 @@ dependencies:
|
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0.17'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: sequel
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '5.35'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '5.35'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: deep_merge
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '1.2'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '1.2'
|
69
97
|
- !ruby/object:Gem::Dependency
|
70
98
|
name: bundler
|
71
99
|
requirement: !ruby/object:Gem::Requirement
|
@@ -150,6 +178,20 @@ dependencies:
|
|
150
178
|
- - "~>"
|
151
179
|
- !ruby/object:Gem::Version
|
152
180
|
version: '3.5'
|
181
|
+
- !ruby/object:Gem::Dependency
|
182
|
+
name: sqlite3
|
183
|
+
requirement: !ruby/object:Gem::Requirement
|
184
|
+
requirements:
|
185
|
+
- - "~>"
|
186
|
+
- !ruby/object:Gem::Version
|
187
|
+
version: '1.4'
|
188
|
+
type: :development
|
189
|
+
prerelease: false
|
190
|
+
version_requirements: !ruby/object:Gem::Requirement
|
191
|
+
requirements:
|
192
|
+
- - "~>"
|
193
|
+
- !ruby/object:Gem::Version
|
194
|
+
version: '1.4'
|
153
195
|
description: Chronicle-ETL allows you to extract personal data from a variety of services,
|
154
196
|
transformer it, and load it.
|
155
197
|
email:
|
@@ -165,7 +207,6 @@ files:
|
|
165
207
|
- ".ruby-version"
|
166
208
|
- ".travis.yml"
|
167
209
|
- ".yardopts"
|
168
|
-
- CHANGELOG.md
|
169
210
|
- CODE_OF_CONDUCT.md
|
170
211
|
- Gemfile
|
171
212
|
- Gemfile.lock
|
@@ -188,6 +229,10 @@ files:
|
|
188
229
|
- lib/chronicle/etl/extractors/extractor.rb
|
189
230
|
- lib/chronicle/etl/extractors/file_extractor.rb
|
190
231
|
- lib/chronicle/etl/extractors/stdin_extractor.rb
|
232
|
+
- lib/chronicle/etl/job.rb
|
233
|
+
- lib/chronicle/etl/job_definition.rb
|
234
|
+
- lib/chronicle/etl/job_log.rb
|
235
|
+
- lib/chronicle/etl/job_logger.rb
|
191
236
|
- lib/chronicle/etl/loaders/csv_loader.rb
|
192
237
|
- lib/chronicle/etl/loaders/loader.rb
|
193
238
|
- lib/chronicle/etl/loaders/rest_loader.rb
|
data/CHANGELOG.md
DELETED
@@ -1,23 +0,0 @@
|
|
1
|
-
# Changelog
|
2
|
-
|
3
|
-
This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
4
|
-
|
5
|
-
## [0.1.4] - 2020-08-18
|
6
|
-
### Updated
|
7
|
-
- Better display of available ETL classes
|
8
|
-
- Updated documentation
|
9
|
-
|
10
|
-
## [0.1.3] - 2020-08-13
|
11
|
-
### Added
|
12
|
-
- Ability to list all available ETL classes
|
13
|
-
- Refactored E, T, L module and class structure
|
14
|
-
- Better progress bar
|
15
|
-
|
16
|
-
## [0.1.2] - 2020-08-02
|
17
|
-
### Added
|
18
|
-
- This changelog
|
19
|
-
- Ability to use extractors, transformers, and loaders from other gems
|
20
|
-
|
21
|
-
## [0.1.0] - 2020-08-01
|
22
|
-
### Added
|
23
|
-
- Basic job runner and ETL classes
|