ductr 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +14 -0
- data/.vscode/settings.json +18 -0
- data/COPYING +674 -0
- data/COPYING.LESSER +165 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +121 -0
- data/README.md +37 -0
- data/Rakefile +37 -0
- data/bin/console +15 -0
- data/bin/setup +8 -0
- data/ductr.gemspec +50 -0
- data/exe/ductr +24 -0
- data/lib/ductr/adapter.rb +94 -0
- data/lib/ductr/cli/default.rb +25 -0
- data/lib/ductr/cli/main.rb +60 -0
- data/lib/ductr/cli/new_project_generator.rb +72 -0
- data/lib/ductr/cli/templates/project/bin_ductr.rb +7 -0
- data/lib/ductr/cli/templates/project/config_app.rb +5 -0
- data/lib/ductr/cli/templates/project/config_development.yml +8 -0
- data/lib/ductr/cli/templates/project/config_environment_development.rb +18 -0
- data/lib/ductr/cli/templates/project/gemfile.rb +6 -0
- data/lib/ductr/cli/templates/project/rubocop.yml +14 -0
- data/lib/ductr/cli/templates/project/tool-versions +1 -0
- data/lib/ductr/configuration.rb +145 -0
- data/lib/ductr/etl/controls/buffered_destination.rb +65 -0
- data/lib/ductr/etl/controls/buffered_transform.rb +76 -0
- data/lib/ductr/etl/controls/control.rb +46 -0
- data/lib/ductr/etl/controls/destination.rb +28 -0
- data/lib/ductr/etl/controls/paginated_source.rb +47 -0
- data/lib/ductr/etl/controls/source.rb +21 -0
- data/lib/ductr/etl/controls/transform.rb +28 -0
- data/lib/ductr/etl/fiber_control.rb +136 -0
- data/lib/ductr/etl/fiber_runner.rb +68 -0
- data/lib/ductr/etl/kiba_runner.rb +26 -0
- data/lib/ductr/etl/parser.rb +115 -0
- data/lib/ductr/etl/runner.rb +37 -0
- data/lib/ductr/etl_job.rb +161 -0
- data/lib/ductr/job.rb +58 -0
- data/lib/ductr/job_etl_runner.rb +37 -0
- data/lib/ductr/job_status.rb +56 -0
- data/lib/ductr/kiba_job.rb +130 -0
- data/lib/ductr/log/formatters/color_formatter.rb +48 -0
- data/lib/ductr/log/logger.rb +169 -0
- data/lib/ductr/log/outputs/file_output.rb +30 -0
- data/lib/ductr/log/outputs/standard_output.rb +39 -0
- data/lib/ductr/pipeline.rb +133 -0
- data/lib/ductr/pipeline_runner.rb +95 -0
- data/lib/ductr/pipeline_step.rb +92 -0
- data/lib/ductr/registry.rb +55 -0
- data/lib/ductr/rufus_trigger.rb +106 -0
- data/lib/ductr/scheduler.rb +117 -0
- data/lib/ductr/store/job_serializer.rb +59 -0
- data/lib/ductr/store/job_store.rb +59 -0
- data/lib/ductr/store/pipeline_serializer.rb +106 -0
- data/lib/ductr/store/pipeline_store.rb +48 -0
- data/lib/ductr/store.rb +81 -0
- data/lib/ductr/trigger.rb +49 -0
- data/lib/ductr/version.rb +6 -0
- data/lib/ductr.rb +143 -0
- data/sig/ductr.rbs +1107 -0
- metadata +292 -0
@@ -0,0 +1,136 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Ductr
|
4
|
+
module ETL
|
5
|
+
#
|
6
|
+
# Glues ETL controls and the associated fibers together.
|
7
|
+
#
|
8
|
+
class FiberControl
|
9
|
+
extend Forwardable
|
10
|
+
|
11
|
+
#
|
12
|
+
# @!method resume
|
13
|
+
# Resumes the control's fiber.
|
14
|
+
# @param [Object] row The row to pass to right fiber controls
|
15
|
+
# @return [void]
|
16
|
+
def_delegators :fiber, :resume
|
17
|
+
|
18
|
+
# @return [Array<FiberControl>] The next fiber controls
|
19
|
+
attr_accessor :right
|
20
|
+
# @return [Control] The ETL control instance
|
21
|
+
attr_reader :control
|
22
|
+
|
23
|
+
#
|
24
|
+
# Creates a new fiber control with the given control and control type.
|
25
|
+
#
|
26
|
+
# @param [Control] control The ETL control to work with in the fiber
|
27
|
+
# @param [Symbol] type The ETL control type, one of [:source, :transform, :destination]
|
28
|
+
#
|
29
|
+
def initialize(control, type:)
|
30
|
+
@control = control
|
31
|
+
@type = type
|
32
|
+
|
33
|
+
@right = []
|
34
|
+
end
|
35
|
+
|
36
|
+
#
|
37
|
+
# Memoizes the fiber to be associated with the ETL control based on its type.
|
38
|
+
#
|
39
|
+
# @return [Fiber] The fiber in charge of executing the control's logic
|
40
|
+
#
|
41
|
+
def fiber
|
42
|
+
@fiber ||= send(@type)
|
43
|
+
end
|
44
|
+
|
45
|
+
private
|
46
|
+
|
47
|
+
#
|
48
|
+
# Creates the fiber to run ETL sources.
|
49
|
+
#
|
50
|
+
# @return [Fiber]
|
51
|
+
#
|
52
|
+
def source
|
53
|
+
Fiber.new do
|
54
|
+
control.each do |row|
|
55
|
+
resume_right_fibers(row)
|
56
|
+
end
|
57
|
+
|
58
|
+
resume_right_fibers(:end)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
#
|
63
|
+
# Creates the fiber to run ETL transforms.
|
64
|
+
#
|
65
|
+
# @return [Fiber]
|
66
|
+
#
|
67
|
+
def transform
|
68
|
+
resume_control(Fiber.new do
|
69
|
+
loop do
|
70
|
+
row_in = Fiber.yield
|
71
|
+
next close_transform if row_in == :end
|
72
|
+
|
73
|
+
row_out = control.process(row_in) do |r|
|
74
|
+
resume_right_fibers(r)
|
75
|
+
end
|
76
|
+
|
77
|
+
resume_right_fibers(row_out) if row_out
|
78
|
+
end
|
79
|
+
end)
|
80
|
+
end
|
81
|
+
|
82
|
+
#
|
83
|
+
# Creates the fiber to run ETL Destinations.
|
84
|
+
#
|
85
|
+
# @return [Fiber]
|
86
|
+
#
|
87
|
+
def destination
|
88
|
+
resume_control(Fiber.new do
|
89
|
+
loop do
|
90
|
+
row = Fiber.yield
|
91
|
+
next control.close if row == :end
|
92
|
+
|
93
|
+
control.write(row)
|
94
|
+
end
|
95
|
+
end)
|
96
|
+
end
|
97
|
+
|
98
|
+
#
|
99
|
+
# Call #close on control, resume resulting rows then ends following fibers.
|
100
|
+
#
|
101
|
+
# @return [void]
|
102
|
+
#
|
103
|
+
def close_transform
|
104
|
+
control.close do |row|
|
105
|
+
resume_right_fibers(row)
|
106
|
+
end
|
107
|
+
resume_right_fibers(:end)
|
108
|
+
end
|
109
|
+
|
110
|
+
#
|
111
|
+
# Resumes all fibers at the right of the current one.
|
112
|
+
#
|
113
|
+
# @param [Object] row The row to pass to the next fibers
|
114
|
+
#
|
115
|
+
# @return [void]
|
116
|
+
#
|
117
|
+
def resume_right_fibers(row)
|
118
|
+
right.each do |fiber|
|
119
|
+
fiber.resume(row)
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
#
|
124
|
+
# Resumes the given fiber and returns it.
|
125
|
+
#
|
126
|
+
# @param [Fiber] fiber The fiber to resume
|
127
|
+
#
|
128
|
+
# @return [Fiber] The resumed fiber
|
129
|
+
#
|
130
|
+
def resume_control(fiber)
|
131
|
+
fiber.resume
|
132
|
+
fiber
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Ductr
|
4
|
+
module ETL
|
5
|
+
#
|
6
|
+
# A runner built with fibers. Compared to KibaRunner,
|
7
|
+
# this one allows to define how control are related to each other.
|
8
|
+
# These definitions can be found in Runner#pipes method.
|
9
|
+
#
|
10
|
+
class FiberRunner < Runner
|
11
|
+
#
|
12
|
+
# Initializes fibers and waits for them to finish.
|
13
|
+
#
|
14
|
+
# @return [void]
|
15
|
+
#
|
16
|
+
def run
|
17
|
+
create_fibers!
|
18
|
+
@source_fibers.each_value(&:resume)
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
|
23
|
+
#
|
24
|
+
# Initializes control fibers and pipes them together.
|
25
|
+
#
|
26
|
+
# @return [void]
|
27
|
+
#
|
28
|
+
def create_fibers!
|
29
|
+
@source_fibers = create_control_fibers(sources) { |s| FiberControl.new(s, type: :source) }
|
30
|
+
@transform_fibers = create_control_fibers(transforms) { |t| FiberControl.new(t, type: :transform) }
|
31
|
+
@destination_fibers = create_control_fibers(destinations) { |d| FiberControl.new(d, type: :destination) }
|
32
|
+
|
33
|
+
apply_fibers_plumbing!
|
34
|
+
end
|
35
|
+
|
36
|
+
#
|
37
|
+
# Pipes fiber controls together based on the control plumbing hash.
|
38
|
+
#
|
39
|
+
# @return [void]
|
40
|
+
#
|
41
|
+
def apply_fibers_plumbing!
|
42
|
+
pipes.map do |from_to|
|
43
|
+
from = from_to.keys.first
|
44
|
+
to = from_to[from]
|
45
|
+
|
46
|
+
input = { **@source_fibers, **@transform_fibers }[from]
|
47
|
+
outputs = to.map { |out| { **@transform_fibers, **@destination_fibers }[out] }
|
48
|
+
|
49
|
+
input.right = outputs
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
#
|
54
|
+
# Maps controls into a hash with job's method name as keys and control fibers as values.
|
55
|
+
#
|
56
|
+
# @param [Array<Control>] controls The controls to map on the hash
|
57
|
+
# @yield [control] The block in which the control fiber has to be initialized
|
58
|
+
#
|
59
|
+
# @return [Hash{Symbol => FiberControl}] The mapped hash
|
60
|
+
#
|
61
|
+
def create_control_fibers(controls, &)
|
62
|
+
controls.to_h do |control|
|
63
|
+
[control.job_method.name, yield(control)]
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "kiba"
|
4
|
+
|
5
|
+
module Ductr
|
6
|
+
module ETL
|
7
|
+
#
|
8
|
+
# A runner based on kiba's streaming runner
|
9
|
+
# @see Kiba's streaming runner source code to get details about its forwarded methods
|
10
|
+
#
|
11
|
+
class KibaRunner < Runner
|
12
|
+
extend Forwardable
|
13
|
+
def_delegators Kiba::StreamingRunner, :source_stream, :transform_stream, :process_rows, :close_destinations
|
14
|
+
|
15
|
+
#
|
16
|
+
# Calls kiba's streaming runner #process_rows and #close_destinations like Kiba::StreamingRunner#run
|
17
|
+
#
|
18
|
+
# @return [void]
|
19
|
+
#
|
20
|
+
def run
|
21
|
+
process_rows(sources, transforms, destinations)
|
22
|
+
close_destinations(destinations)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,115 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Ductr
|
4
|
+
module ETL
|
5
|
+
#
|
6
|
+
# Contains anything to "parse" ETL jobs annotations.
|
7
|
+
# #parse_annotations handles ETL controls and send_to directives.
|
8
|
+
#
|
9
|
+
module Parser
|
10
|
+
#
|
11
|
+
# Handles sources, transforms and destinations controls.
|
12
|
+
# Handles send_to directives, used to do the plumbing between controls.
|
13
|
+
# Used for both kiba and fiber runners initialization.
|
14
|
+
#
|
15
|
+
# @return [Array<Source, Transform, Destination, Hash{Symbol => Array<Symbol>}>] The job's controls
|
16
|
+
#
|
17
|
+
def parse_annotations
|
18
|
+
sources = init_adapter_controls(:source)
|
19
|
+
transforms = init_transform_controls(:transform, :lookup)
|
20
|
+
destinations = init_adapter_controls(:destination)
|
21
|
+
pipes = find_method(:send_to) do |method|
|
22
|
+
{ method.name => method.find_annotation(:send_to).params }
|
23
|
+
end
|
24
|
+
|
25
|
+
[sources, transforms, destinations, pipes]
|
26
|
+
end
|
27
|
+
|
28
|
+
#
|
29
|
+
# Currently used adapters set.
|
30
|
+
#
|
31
|
+
# @return [Set] The current adapters
|
32
|
+
#
|
33
|
+
def adapters
|
34
|
+
@adapters ||= Set.new
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
#
|
40
|
+
# Finds the method(s) associated to the given annotation names in the job class.
|
41
|
+
#
|
42
|
+
# @param [Array<Symbol>] *annotation_names The annotation names of the searched methods
|
43
|
+
# @yield [method] The block to execute on each founded methods
|
44
|
+
# @yieldparam [method] A job's method
|
45
|
+
#
|
46
|
+
# @return [Array] Returns mapped array containing the block's returned value
|
47
|
+
#
|
48
|
+
def find_method(*annotation_names, &)
|
49
|
+
self.class.annotated_methods(*annotation_names).map(&)
|
50
|
+
end
|
51
|
+
|
52
|
+
#
|
53
|
+
# Initializes adapter controls for the given type.
|
54
|
+
#
|
55
|
+
# @param [Symbol] control_type The adapter control type, one of :source or :destination
|
56
|
+
#
|
57
|
+
# @return [Array<Source, Destination>] The initialized adapter controls
|
58
|
+
#
|
59
|
+
def init_adapter_controls(control_type)
|
60
|
+
find_method(control_type) do |method|
|
61
|
+
adapter_control(method)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
#
|
66
|
+
# Initializes transform controls for the given types.
|
67
|
+
#
|
68
|
+
# @param [Array<Symbol>] *control_types The transform control types, :transform and/or :lookup
|
69
|
+
#
|
70
|
+
# @return [Array<Transform>] The initialized transform controls
|
71
|
+
#
|
72
|
+
def init_transform_controls(*control_types)
|
73
|
+
find_method(*control_types) do |method|
|
74
|
+
next adapter_control(method) if method.annotation_exist?(:lookup)
|
75
|
+
|
76
|
+
transform_control(method)
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
#
|
81
|
+
# Initializes an adapter control (source, lookup or destination) based on the given annotated method.
|
82
|
+
#
|
83
|
+
# @param [Annotable::Method] annotated_method The control's method
|
84
|
+
#
|
85
|
+
# @return [Control] The adapter control instance
|
86
|
+
#
|
87
|
+
def adapter_control(annotated_method)
|
88
|
+
annotation = annotated_method.find_annotation(:source, :destination, :lookup)
|
89
|
+
adapter_name, control_type = annotation.params
|
90
|
+
|
91
|
+
adapter = Ductr.config.adapter(adapter_name)
|
92
|
+
control_class = adapter.class.send("#{annotation.name}_registry").find(control_type)
|
93
|
+
job_method = method(annotated_method.name)
|
94
|
+
|
95
|
+
adapters.add(adapter)
|
96
|
+
control_class.new(job_method, adapter, **annotation.options)
|
97
|
+
end
|
98
|
+
|
99
|
+
#
|
100
|
+
# Initializes a transform control.
|
101
|
+
#
|
102
|
+
# @param [Annotable::Method] annotated_method The transform's method
|
103
|
+
#
|
104
|
+
# @return [Transform] The transform control instance
|
105
|
+
#
|
106
|
+
def transform_control(annotated_method)
|
107
|
+
annotation = annotated_method.find_annotation(:transform)
|
108
|
+
transform_class = annotation.params.first || Transform
|
109
|
+
job_method = method(annotated_method.name)
|
110
|
+
|
111
|
+
transform_class.new(job_method, **annotation.options)
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Ductr
|
4
|
+
module ETL
|
5
|
+
#
|
6
|
+
# The base class for all runners
|
7
|
+
#
|
8
|
+
class Runner
|
9
|
+
# @return [Array<Source>] The runner source controls
|
10
|
+
attr_accessor :sources
|
11
|
+
|
12
|
+
# @return [Array<Transform>] The runner transform controls
|
13
|
+
attr_accessor :transforms
|
14
|
+
|
15
|
+
# @return [Array<Destination>] The runner destination controls
|
16
|
+
attr_accessor :destinations
|
17
|
+
|
18
|
+
# @return [Array<Hash{Symbol => Array<Symbol>}>] The controls plumbing hashes
|
19
|
+
attr_accessor :pipes
|
20
|
+
|
21
|
+
#
|
22
|
+
# Creates the runner instance.
|
23
|
+
#
|
24
|
+
# @param [Array<Source>] sources The job's source controls
|
25
|
+
# @param [Array<Transform>] transforms The job's transform controls
|
26
|
+
# @param [Array<Destination>] destinations The job's destination controls
|
27
|
+
# @param [Array<Hash{Symbol => Array<Symbol>}>] pipes The controls plumbing hashes
|
28
|
+
#
|
29
|
+
def initialize(sources, transforms, destinations, pipes = [])
|
30
|
+
@sources = sources
|
31
|
+
@transforms = transforms
|
32
|
+
@destinations = destinations
|
33
|
+
@pipes = pipes
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,161 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Ductr
|
4
|
+
#
|
5
|
+
# Base class for ETL job using the experimental fiber runner.
|
6
|
+
# Usage example:
|
7
|
+
#
|
8
|
+
# class MyETLJob < Ductr::ETLJob
|
9
|
+
# source :first_db, :basic
|
10
|
+
# send_to :the_transform, :the_other_transform
|
11
|
+
# def the_source(db)
|
12
|
+
# # ...
|
13
|
+
# end
|
14
|
+
#
|
15
|
+
# transform
|
16
|
+
# send_to :the_destination
|
17
|
+
# def the_transform(row)
|
18
|
+
# # ...
|
19
|
+
# end
|
20
|
+
#
|
21
|
+
# destination :first_db, :basic
|
22
|
+
# def the_destination(row, db)
|
23
|
+
# # ...
|
24
|
+
# end
|
25
|
+
#
|
26
|
+
# transform
|
27
|
+
# send_to :the_other_destination
|
28
|
+
# def the_other_transform(row)
|
29
|
+
# # ...
|
30
|
+
# end
|
31
|
+
#
|
32
|
+
# destination :second_db, :basic
|
33
|
+
# def the_other_destination(row, db)
|
34
|
+
# # ...
|
35
|
+
# end
|
36
|
+
# end
|
37
|
+
#
|
38
|
+
class ETLJob < Job
|
39
|
+
# @return [Class] The ETL runner class used by the job
|
40
|
+
ETL_RUNNER_CLASS = ETL::FiberRunner
|
41
|
+
include JobETLRunner
|
42
|
+
|
43
|
+
include ETL::Parser
|
44
|
+
|
45
|
+
#
|
46
|
+
# @!method self.source(adapter_name, source_type, **source_options)
|
47
|
+
# Annotation to define a source method
|
48
|
+
# @param adapter_name [Symbol] The adapter the source is running on
|
49
|
+
# @param source_type [Symbol] The type of source to run
|
50
|
+
# @param **source_options [Hash<Symbol: Object>] The options to pass to the source
|
51
|
+
#
|
52
|
+
# @example Source with Sequel SQLite adapter
|
53
|
+
# source :my_adapter, :paginated, page_size: 42
|
54
|
+
# def my_source(db, offset, limit)
|
55
|
+
# db[:items].offset(offset).limit(limit)
|
56
|
+
# end
|
57
|
+
#
|
58
|
+
# @see The chosen adapter documentation for further information on sources usage.
|
59
|
+
#
|
60
|
+
# @return [void]
|
61
|
+
#
|
62
|
+
annotable :source
|
63
|
+
|
64
|
+
#
|
65
|
+
# @!method self.transform(transform_class, **transform_options)
|
66
|
+
# Annotation to define a transform method
|
67
|
+
# @param transform_class [Class, nil] The class the transform is running on
|
68
|
+
# @param **transform_options [Hash<Symbol: Object>] The options to pass to the transform
|
69
|
+
#
|
70
|
+
# @example Transform without params
|
71
|
+
# transform
|
72
|
+
# def rename_keys(row)
|
73
|
+
# row[:new_name] = row.delete[:old_name]
|
74
|
+
# row[:new_email] = row.delete[:old_email]
|
75
|
+
# end
|
76
|
+
#
|
77
|
+
# @example Transform with params
|
78
|
+
# class RenameTransform < Ductr::ETL::Transform
|
79
|
+
# def process(row)
|
80
|
+
# call_method.each do |actual_name, new_name|
|
81
|
+
# new_key = "#{options[:prefix]}#{new_name}".to_sym
|
82
|
+
#
|
83
|
+
# row[new_key] = row.delete(actual_name)
|
84
|
+
# end
|
85
|
+
# end
|
86
|
+
# end
|
87
|
+
#
|
88
|
+
# transform RenameTransform, prefix: "some_"
|
89
|
+
# def rename
|
90
|
+
# { old_name: :new_name, old_email: :new_email }
|
91
|
+
# end
|
92
|
+
#
|
93
|
+
# @return [void]
|
94
|
+
#
|
95
|
+
annotable :transform
|
96
|
+
|
97
|
+
#
|
98
|
+
# @!method self.lookup(adapter_name, lookup_type, **lookup_options)
|
99
|
+
# Annotation to define a lookup method
|
100
|
+
# @param adapter_name [Symbol] The adapter the lookup is running on
|
101
|
+
# @param lookup_type [Symbol] The type of lookup to run
|
102
|
+
# @param **lookup_options [Hash<Symbol: Object>] The options to pass to the lookup
|
103
|
+
#
|
104
|
+
# @example Lookup with Sequel SQLite adapter
|
105
|
+
# lookup :my_other_adapter, :match, merge: [:id, :item], buffer_size: 4
|
106
|
+
# def joining_different_adapters(db, ids)
|
107
|
+
# db[:items_bis].select(:id, :item, :name).where(item: ids)
|
108
|
+
# end
|
109
|
+
#
|
110
|
+
# @see The chosen adapter documentation for further information on lookups usage.
|
111
|
+
#
|
112
|
+
# @return [void]
|
113
|
+
#
|
114
|
+
annotable :lookup
|
115
|
+
|
116
|
+
#
|
117
|
+
# @!method self.destination(adapter_name, destination_type, **destination_options)
|
118
|
+
# Annotation to define a destination method
|
119
|
+
# @param adapter_name [Symbol] The adapter the destination is running on
|
120
|
+
# @param destination_type [Symbol] The type of destination to run
|
121
|
+
# @param **destination_options [Hash<Symbol: Object>] The options to pass to the destination
|
122
|
+
#
|
123
|
+
# @example Destination with Sequel SQLite adapter
|
124
|
+
# destination :my_other_adapter, :basic
|
125
|
+
# def my_destination(row, db)
|
126
|
+
# db[:new_items].insert(name: row[:name], new_name: row[:new_name])
|
127
|
+
# end
|
128
|
+
#
|
129
|
+
# @see The chosen adapter documentation for further information on destinations usage.
|
130
|
+
#
|
131
|
+
# @return [void]
|
132
|
+
#
|
133
|
+
annotable :destination
|
134
|
+
|
135
|
+
#
|
136
|
+
# @!method self.send_to(*methods)
|
137
|
+
# Annotation to define which methods will follow the current one
|
138
|
+
# @param *methods [Array<Symbol>] The names of the following methods
|
139
|
+
#
|
140
|
+
# @example Source with Sequel SQLite adapter sending rows to two transforms
|
141
|
+
# source :my_adapter, :paginated, page_size: 42
|
142
|
+
# send_to :my_first_transform, :my_second_transform
|
143
|
+
# def my_source(db, offset, limit)
|
144
|
+
# db[:items].offset(offset).limit(limit)
|
145
|
+
# end
|
146
|
+
#
|
147
|
+
# transform
|
148
|
+
# def my_first_transform(row)
|
149
|
+
# # ...
|
150
|
+
# end
|
151
|
+
#
|
152
|
+
# transform
|
153
|
+
# def my_second_transform(row)
|
154
|
+
# # ...
|
155
|
+
# end
|
156
|
+
#
|
157
|
+
# @return [void]
|
158
|
+
#
|
159
|
+
annotable :send_to
|
160
|
+
end
|
161
|
+
end
|
data/lib/ductr/job.rb
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Ductr
|
4
|
+
#
|
5
|
+
# The base class for any job, you can use it directly if you don't need an ETL job.
|
6
|
+
#
|
7
|
+
class Job < ActiveJob::Base
|
8
|
+
extend Annotable
|
9
|
+
extend Forwardable
|
10
|
+
|
11
|
+
include JobStatus
|
12
|
+
|
13
|
+
# @return [Exception] The occurred error if any
|
14
|
+
attr_reader :error
|
15
|
+
# @return [Symbol] The job's status, one of `:queued`, `:working`, `:completed` and `:failed`
|
16
|
+
attr_reader :status
|
17
|
+
|
18
|
+
queue_as :ductr_jobs
|
19
|
+
|
20
|
+
#
|
21
|
+
# The active job's perform method. DO NOT override it, implement the #run method instead.
|
22
|
+
#
|
23
|
+
# @return [void]
|
24
|
+
#
|
25
|
+
def perform(*_)
|
26
|
+
run
|
27
|
+
end
|
28
|
+
|
29
|
+
#
|
30
|
+
# The configured adapter instances.
|
31
|
+
#
|
32
|
+
# @param [Symbol] name The adapter name
|
33
|
+
#
|
34
|
+
# @return [Adapter] The adapter corresponding to the given name
|
35
|
+
#
|
36
|
+
def adapter(name)
|
37
|
+
Ductr.config.adapter(name)
|
38
|
+
end
|
39
|
+
|
40
|
+
#
|
41
|
+
# The job's logger instance.
|
42
|
+
#
|
43
|
+
# @return [Ductr::Log::Logger] The logger instance
|
44
|
+
#
|
45
|
+
def logger
|
46
|
+
@logger ||= Ductr.config.logging.new(self.class)
|
47
|
+
end
|
48
|
+
|
49
|
+
#
|
50
|
+
# The entry point of jobs.
|
51
|
+
#
|
52
|
+
# @return [void]
|
53
|
+
#
|
54
|
+
def run
|
55
|
+
raise NotImplementedError, "A job must implement the `#run` method"
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Ductr
|
4
|
+
#
|
5
|
+
# Allowing a job to execute ETL runners.
|
6
|
+
# You need to declare the ETL_RUNNER_CLASS constant in the including class:
|
7
|
+
#
|
8
|
+
# class CustomJobClass < Job
|
9
|
+
# ETL_RUNNER_CLASS = ETL::KibaRunner
|
10
|
+
# include JobETLRunner
|
11
|
+
# end
|
12
|
+
#
|
13
|
+
# The job must have the #parse_annotations method defined, which can be added by including ETL::Parser.
|
14
|
+
#
|
15
|
+
module JobETLRunner
|
16
|
+
#
|
17
|
+
# Parse job's annotations and create the runner instance.
|
18
|
+
#
|
19
|
+
def initialize(...)
|
20
|
+
super(...)
|
21
|
+
|
22
|
+
@runner = self.class::ETL_RUNNER_CLASS.new(*parse_annotations)
|
23
|
+
end
|
24
|
+
|
25
|
+
#
|
26
|
+
# Opens adapters, executes the runner and then closes back adapters.
|
27
|
+
#
|
28
|
+
# @return [void]
|
29
|
+
#
|
30
|
+
def run
|
31
|
+
adapters.each(&:open!)
|
32
|
+
@runner.run
|
33
|
+
ensure
|
34
|
+
adapters.each(&:close!)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "annotable"
|
4
|
+
|
5
|
+
module Ductr
|
6
|
+
#
|
7
|
+
# This module contains the job's status tracking logic.
|
8
|
+
# It relies on Active Job's callbacks to write status into the store.
|
9
|
+
#
|
10
|
+
module JobStatus
|
11
|
+
class << self
|
12
|
+
#
|
13
|
+
# Registers the ActiveJob's `before_enqueue`, `before_perform` and `after_perform` callbacks
|
14
|
+
# to write status in the Ductr's store.
|
15
|
+
# Intercepts and re-raises job's exceptions to write the `:failed` status.
|
16
|
+
#
|
17
|
+
# @param [Class<Job>] job_class The job's class
|
18
|
+
#
|
19
|
+
# @return [void]
|
20
|
+
#
|
21
|
+
def included(job_class)
|
22
|
+
job_class.before_enqueue { |job| job.status = :queued }
|
23
|
+
job_class.before_perform { |job| job.status = :working }
|
24
|
+
job_class.after_perform { |job| job.status = :completed }
|
25
|
+
|
26
|
+
job_class.rescue_from(Exception) do |e|
|
27
|
+
@error = e
|
28
|
+
self.status = :failed
|
29
|
+
|
30
|
+
raise e
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
#
|
36
|
+
# Writes the job's status into the Ductr's store.
|
37
|
+
#
|
38
|
+
# @param [Symbol] status The status of the job
|
39
|
+
#
|
40
|
+
# @return [void]
|
41
|
+
#
|
42
|
+
def status=(status)
|
43
|
+
@status = status
|
44
|
+
Store.write_job(self)
|
45
|
+
end
|
46
|
+
|
47
|
+
#
|
48
|
+
# Determines whether the job has a `completed` or `failed` status.
|
49
|
+
#
|
50
|
+
# @return [Boolean] True when the status is `completed` or `failed`
|
51
|
+
#
|
52
|
+
def stopped?
|
53
|
+
%i[completed failed].include? status
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|