ductr 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +14 -0
- data/.vscode/settings.json +18 -0
- data/COPYING +674 -0
- data/COPYING.LESSER +165 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +121 -0
- data/README.md +37 -0
- data/Rakefile +37 -0
- data/bin/console +15 -0
- data/bin/setup +8 -0
- data/ductr.gemspec +50 -0
- data/exe/ductr +24 -0
- data/lib/ductr/adapter.rb +94 -0
- data/lib/ductr/cli/default.rb +25 -0
- data/lib/ductr/cli/main.rb +60 -0
- data/lib/ductr/cli/new_project_generator.rb +72 -0
- data/lib/ductr/cli/templates/project/bin_ductr.rb +7 -0
- data/lib/ductr/cli/templates/project/config_app.rb +5 -0
- data/lib/ductr/cli/templates/project/config_development.yml +8 -0
- data/lib/ductr/cli/templates/project/config_environment_development.rb +18 -0
- data/lib/ductr/cli/templates/project/gemfile.rb +6 -0
- data/lib/ductr/cli/templates/project/rubocop.yml +14 -0
- data/lib/ductr/cli/templates/project/tool-versions +1 -0
- data/lib/ductr/configuration.rb +145 -0
- data/lib/ductr/etl/controls/buffered_destination.rb +65 -0
- data/lib/ductr/etl/controls/buffered_transform.rb +76 -0
- data/lib/ductr/etl/controls/control.rb +46 -0
- data/lib/ductr/etl/controls/destination.rb +28 -0
- data/lib/ductr/etl/controls/paginated_source.rb +47 -0
- data/lib/ductr/etl/controls/source.rb +21 -0
- data/lib/ductr/etl/controls/transform.rb +28 -0
- data/lib/ductr/etl/fiber_control.rb +136 -0
- data/lib/ductr/etl/fiber_runner.rb +68 -0
- data/lib/ductr/etl/kiba_runner.rb +26 -0
- data/lib/ductr/etl/parser.rb +115 -0
- data/lib/ductr/etl/runner.rb +37 -0
- data/lib/ductr/etl_job.rb +161 -0
- data/lib/ductr/job.rb +58 -0
- data/lib/ductr/job_etl_runner.rb +37 -0
- data/lib/ductr/job_status.rb +56 -0
- data/lib/ductr/kiba_job.rb +130 -0
- data/lib/ductr/log/formatters/color_formatter.rb +48 -0
- data/lib/ductr/log/logger.rb +169 -0
- data/lib/ductr/log/outputs/file_output.rb +30 -0
- data/lib/ductr/log/outputs/standard_output.rb +39 -0
- data/lib/ductr/pipeline.rb +133 -0
- data/lib/ductr/pipeline_runner.rb +95 -0
- data/lib/ductr/pipeline_step.rb +92 -0
- data/lib/ductr/registry.rb +55 -0
- data/lib/ductr/rufus_trigger.rb +106 -0
- data/lib/ductr/scheduler.rb +117 -0
- data/lib/ductr/store/job_serializer.rb +59 -0
- data/lib/ductr/store/job_store.rb +59 -0
- data/lib/ductr/store/pipeline_serializer.rb +106 -0
- data/lib/ductr/store/pipeline_store.rb +48 -0
- data/lib/ductr/store.rb +81 -0
- data/lib/ductr/trigger.rb +49 -0
- data/lib/ductr/version.rb +6 -0
- data/lib/ductr.rb +143 -0
- data/sig/ductr.rbs +1107 -0
- metadata +292 -0
@@ -0,0 +1,136 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Ductr
|
4
|
+
module ETL
|
5
|
+
#
|
6
|
+
# Glues ETL controls and the associated fibers together.
|
7
|
+
#
|
8
|
+
class FiberControl
|
9
|
+
extend Forwardable
|
10
|
+
|
11
|
+
#
|
12
|
+
# @!method resume
|
13
|
+
# Resumes the control's fiber.
|
14
|
+
# @param [Object] row The row to pass to right fiber controls
|
15
|
+
# @return [void]
|
16
|
+
def_delegators :fiber, :resume
|
17
|
+
|
18
|
+
# @return [Array<FiberControl>] The next fiber controls
|
19
|
+
attr_accessor :right
|
20
|
+
# @return [Control] The ETL control instance
|
21
|
+
attr_reader :control
|
22
|
+
|
23
|
+
#
|
24
|
+
# Creates a new fiber control with the given control and control type.
|
25
|
+
#
|
26
|
+
# @param [Control] control The ETL control to work with in the fiber
|
27
|
+
# @param [Symbol] type The ETL control type, one of [:source, :transform, :destination]
|
28
|
+
#
|
29
|
+
def initialize(control, type:)
|
30
|
+
@control = control
|
31
|
+
@type = type
|
32
|
+
|
33
|
+
@right = []
|
34
|
+
end
|
35
|
+
|
36
|
+
#
|
37
|
+
# Memoizes the fiber to be associated with the ETL control based on its type.
|
38
|
+
#
|
39
|
+
# @return [Fiber] The fiber in charge of executing the control's logic
|
40
|
+
#
|
41
|
+
def fiber
|
42
|
+
@fiber ||= send(@type)
|
43
|
+
end
|
44
|
+
|
45
|
+
private
|
46
|
+
|
47
|
+
#
|
48
|
+
# Creates the fiber to run ETL sources.
|
49
|
+
#
|
50
|
+
# @return [Fiber]
|
51
|
+
#
|
52
|
+
def source
|
53
|
+
Fiber.new do
|
54
|
+
control.each do |row|
|
55
|
+
resume_right_fibers(row)
|
56
|
+
end
|
57
|
+
|
58
|
+
resume_right_fibers(:end)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
#
|
63
|
+
# Creates the fiber to run ETL transforms.
|
64
|
+
#
|
65
|
+
# @return [Fiber]
|
66
|
+
#
|
67
|
+
def transform
|
68
|
+
resume_control(Fiber.new do
|
69
|
+
loop do
|
70
|
+
row_in = Fiber.yield
|
71
|
+
next close_transform if row_in == :end
|
72
|
+
|
73
|
+
row_out = control.process(row_in) do |r|
|
74
|
+
resume_right_fibers(r)
|
75
|
+
end
|
76
|
+
|
77
|
+
resume_right_fibers(row_out) if row_out
|
78
|
+
end
|
79
|
+
end)
|
80
|
+
end
|
81
|
+
|
82
|
+
#
|
83
|
+
# Creates the fiber to run ETL Destinations.
|
84
|
+
#
|
85
|
+
# @return [Fiber]
|
86
|
+
#
|
87
|
+
def destination
|
88
|
+
resume_control(Fiber.new do
|
89
|
+
loop do
|
90
|
+
row = Fiber.yield
|
91
|
+
next control.close if row == :end
|
92
|
+
|
93
|
+
control.write(row)
|
94
|
+
end
|
95
|
+
end)
|
96
|
+
end
|
97
|
+
|
98
|
+
#
|
99
|
+
# Call #close on control, resume resulting rows then ends following fibers.
|
100
|
+
#
|
101
|
+
# @return [void]
|
102
|
+
#
|
103
|
+
def close_transform
|
104
|
+
control.close do |row|
|
105
|
+
resume_right_fibers(row)
|
106
|
+
end
|
107
|
+
resume_right_fibers(:end)
|
108
|
+
end
|
109
|
+
|
110
|
+
#
|
111
|
+
# Resumes all fibers at the right of the current one.
|
112
|
+
#
|
113
|
+
# @param [Object] row The row to pass to the next fibers
|
114
|
+
#
|
115
|
+
# @return [void]
|
116
|
+
#
|
117
|
+
def resume_right_fibers(row)
|
118
|
+
right.each do |fiber|
|
119
|
+
fiber.resume(row)
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
#
|
124
|
+
# Resumes the given fiber and returns it.
|
125
|
+
#
|
126
|
+
# @param [Fiber] fiber The fiber to resume
|
127
|
+
#
|
128
|
+
# @return [Fiber] The resumed fiber
|
129
|
+
#
|
130
|
+
def resume_control(fiber)
|
131
|
+
fiber.resume
|
132
|
+
fiber
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Ductr
|
4
|
+
module ETL
|
5
|
+
#
|
6
|
+
# A runner built with fibers. Compared to KibaRunner,
|
7
|
+
# this one allows to define how control are related to each other.
|
8
|
+
# These definitions can be found in Runner#pipes method.
|
9
|
+
#
|
10
|
+
class FiberRunner < Runner
|
11
|
+
#
|
12
|
+
# Initializes fibers and waits for them to finish.
|
13
|
+
#
|
14
|
+
# @return [void]
|
15
|
+
#
|
16
|
+
def run
|
17
|
+
create_fibers!
|
18
|
+
@source_fibers.each_value(&:resume)
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
|
23
|
+
#
|
24
|
+
# Initializes control fibers and pipes them together.
|
25
|
+
#
|
26
|
+
# @return [void]
|
27
|
+
#
|
28
|
+
def create_fibers!
|
29
|
+
@source_fibers = create_control_fibers(sources) { |s| FiberControl.new(s, type: :source) }
|
30
|
+
@transform_fibers = create_control_fibers(transforms) { |t| FiberControl.new(t, type: :transform) }
|
31
|
+
@destination_fibers = create_control_fibers(destinations) { |d| FiberControl.new(d, type: :destination) }
|
32
|
+
|
33
|
+
apply_fibers_plumbing!
|
34
|
+
end
|
35
|
+
|
36
|
+
#
|
37
|
+
# Pipes fiber controls together based on the control plumbing hash.
|
38
|
+
#
|
39
|
+
# @return [void]
|
40
|
+
#
|
41
|
+
def apply_fibers_plumbing!
|
42
|
+
pipes.map do |from_to|
|
43
|
+
from = from_to.keys.first
|
44
|
+
to = from_to[from]
|
45
|
+
|
46
|
+
input = { **@source_fibers, **@transform_fibers }[from]
|
47
|
+
outputs = to.map { |out| { **@transform_fibers, **@destination_fibers }[out] }
|
48
|
+
|
49
|
+
input.right = outputs
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
#
|
54
|
+
# Maps controls into a hash with job's method name as keys and control fibers as values.
|
55
|
+
#
|
56
|
+
# @param [Array<Control>] controls The controls to map on the hash
|
57
|
+
# @yield [control] The block in which the control fiber has to be initialized
|
58
|
+
#
|
59
|
+
# @return [Hash{Symbol => FiberControl}] The mapped hash
|
60
|
+
#
|
61
|
+
def create_control_fibers(controls, &)
|
62
|
+
controls.to_h do |control|
|
63
|
+
[control.job_method.name, yield(control)]
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "kiba"
|
4
|
+
|
5
|
+
module Ductr
|
6
|
+
module ETL
|
7
|
+
#
|
8
|
+
# A runner based on kiba's streaming runner
|
9
|
+
# @see Kiba's streaming runner source code to get details about its forwarded methods
|
10
|
+
#
|
11
|
+
class KibaRunner < Runner
|
12
|
+
extend Forwardable
|
13
|
+
def_delegators Kiba::StreamingRunner, :source_stream, :transform_stream, :process_rows, :close_destinations
|
14
|
+
|
15
|
+
#
|
16
|
+
# Calls kiba's streaming runner #process_rows and #close_destinations like Kiba::StreamingRunner#run
|
17
|
+
#
|
18
|
+
# @return [void]
|
19
|
+
#
|
20
|
+
def run
|
21
|
+
process_rows(sources, transforms, destinations)
|
22
|
+
close_destinations(destinations)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,115 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Ductr
|
4
|
+
module ETL
|
5
|
+
#
|
6
|
+
# Contains anything to "parse" ETL jobs annotations.
|
7
|
+
# #parse_annotations handles ETL controls and send_to directives.
|
8
|
+
#
|
9
|
+
module Parser
|
10
|
+
#
|
11
|
+
# Handles sources, transforms and destinations controls.
|
12
|
+
# Handles send_to directives, used to do the plumbing between controls.
|
13
|
+
# Used for both kiba and fiber runners initialization.
|
14
|
+
#
|
15
|
+
# @return [Array<Source, Transform, Destination, Hash{Symbol => Array<Symbol>}>] The job's controls
|
16
|
+
#
|
17
|
+
def parse_annotations
|
18
|
+
sources = init_adapter_controls(:source)
|
19
|
+
transforms = init_transform_controls(:transform, :lookup)
|
20
|
+
destinations = init_adapter_controls(:destination)
|
21
|
+
pipes = find_method(:send_to) do |method|
|
22
|
+
{ method.name => method.find_annotation(:send_to).params }
|
23
|
+
end
|
24
|
+
|
25
|
+
[sources, transforms, destinations, pipes]
|
26
|
+
end
|
27
|
+
|
28
|
+
#
|
29
|
+
# Currently used adapters set.
|
30
|
+
#
|
31
|
+
# @return [Set] The current adapters
|
32
|
+
#
|
33
|
+
def adapters
|
34
|
+
@adapters ||= Set.new
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
#
|
40
|
+
# Finds the method(s) associated to the given annotation names in the job class.
|
41
|
+
#
|
42
|
+
# @param [Array<Symbol>] *annotation_names The annotation names of the searched methods
|
43
|
+
# @yield [method] The block to execute on each founded methods
|
44
|
+
# @yieldparam [method] A job's method
|
45
|
+
#
|
46
|
+
# @return [Array] Returns mapped array containing the block's returned value
|
47
|
+
#
|
48
|
+
def find_method(*annotation_names, &)
|
49
|
+
self.class.annotated_methods(*annotation_names).map(&)
|
50
|
+
end
|
51
|
+
|
52
|
+
#
|
53
|
+
# Initializes adapter controls for the given type.
|
54
|
+
#
|
55
|
+
# @param [Symbol] control_type The adapter control type, one of :source or :destination
|
56
|
+
#
|
57
|
+
# @return [Array<Source, Destination>] The initialized adapter controls
|
58
|
+
#
|
59
|
+
def init_adapter_controls(control_type)
|
60
|
+
find_method(control_type) do |method|
|
61
|
+
adapter_control(method)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
#
|
66
|
+
# Initializes transform controls for the given types.
|
67
|
+
#
|
68
|
+
# @param [Array<Symbol>] *control_types The transform control types, :transform and/or :lookup
|
69
|
+
#
|
70
|
+
# @return [Array<Transform>] The initialized transform controls
|
71
|
+
#
|
72
|
+
def init_transform_controls(*control_types)
|
73
|
+
find_method(*control_types) do |method|
|
74
|
+
next adapter_control(method) if method.annotation_exist?(:lookup)
|
75
|
+
|
76
|
+
transform_control(method)
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
#
|
81
|
+
# Initializes an adapter control (source, lookup or destination) based on the given annotated method.
|
82
|
+
#
|
83
|
+
# @param [Annotable::Method] annotated_method The control's method
|
84
|
+
#
|
85
|
+
# @return [Control] The adapter control instance
|
86
|
+
#
|
87
|
+
def adapter_control(annotated_method)
|
88
|
+
annotation = annotated_method.find_annotation(:source, :destination, :lookup)
|
89
|
+
adapter_name, control_type = annotation.params
|
90
|
+
|
91
|
+
adapter = Ductr.config.adapter(adapter_name)
|
92
|
+
control_class = adapter.class.send("#{annotation.name}_registry").find(control_type)
|
93
|
+
job_method = method(annotated_method.name)
|
94
|
+
|
95
|
+
adapters.add(adapter)
|
96
|
+
control_class.new(job_method, adapter, **annotation.options)
|
97
|
+
end
|
98
|
+
|
99
|
+
#
|
100
|
+
# Initializes a transform control.
|
101
|
+
#
|
102
|
+
# @param [Annotable::Method] annotated_method The transform's method
|
103
|
+
#
|
104
|
+
# @return [Transform] The transform control instance
|
105
|
+
#
|
106
|
+
def transform_control(annotated_method)
|
107
|
+
annotation = annotated_method.find_annotation(:transform)
|
108
|
+
transform_class = annotation.params.first || Transform
|
109
|
+
job_method = method(annotated_method.name)
|
110
|
+
|
111
|
+
transform_class.new(job_method, **annotation.options)
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Ductr
|
4
|
+
module ETL
|
5
|
+
#
|
6
|
+
# The base class for all runners
|
7
|
+
#
|
8
|
+
class Runner
|
9
|
+
# @return [Array<Source>] The runner source controls
|
10
|
+
attr_accessor :sources
|
11
|
+
|
12
|
+
# @return [Array<Transform>] The runner transform controls
|
13
|
+
attr_accessor :transforms
|
14
|
+
|
15
|
+
# @return [Array<Destination>] The runner destination controls
|
16
|
+
attr_accessor :destinations
|
17
|
+
|
18
|
+
# @return [Array<Hash{Symbol => Array<Symbol>}>] The controls plumbing hashes
|
19
|
+
attr_accessor :pipes
|
20
|
+
|
21
|
+
#
|
22
|
+
# Creates the runner instance.
|
23
|
+
#
|
24
|
+
# @param [Array<Source>] sources The job's source controls
|
25
|
+
# @param [Array<Transform>] transforms The job's transform controls
|
26
|
+
# @param [Array<Destination>] destinations The job's destination controls
|
27
|
+
# @param [Array<Hash{Symbol => Array<Symbol>}>] pipes The controls plumbing hashes
|
28
|
+
#
|
29
|
+
def initialize(sources, transforms, destinations, pipes = [])
|
30
|
+
@sources = sources
|
31
|
+
@transforms = transforms
|
32
|
+
@destinations = destinations
|
33
|
+
@pipes = pipes
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,161 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Ductr
|
4
|
+
#
|
5
|
+
# Base class for ETL job using the experimental fiber runner.
|
6
|
+
# Usage example:
|
7
|
+
#
|
8
|
+
# class MyETLJob < Ductr::ETLJob
|
9
|
+
# source :first_db, :basic
|
10
|
+
# send_to :the_transform, :the_other_transform
|
11
|
+
# def the_source(db)
|
12
|
+
# # ...
|
13
|
+
# end
|
14
|
+
#
|
15
|
+
# transform
|
16
|
+
# send_to :the_destination
|
17
|
+
# def the_transform(row)
|
18
|
+
# # ...
|
19
|
+
# end
|
20
|
+
#
|
21
|
+
# destination :first_db, :basic
|
22
|
+
# def the_destination(row, db)
|
23
|
+
# # ...
|
24
|
+
# end
|
25
|
+
#
|
26
|
+
# transform
|
27
|
+
# send_to :the_other_destination
|
28
|
+
# def the_other_transform(row)
|
29
|
+
# # ...
|
30
|
+
# end
|
31
|
+
#
|
32
|
+
# destination :second_db, :basic
|
33
|
+
# def the_other_destination(row, db)
|
34
|
+
# # ...
|
35
|
+
# end
|
36
|
+
# end
|
37
|
+
#
|
38
|
+
class ETLJob < Job
|
39
|
+
# @return [Class] The ETL runner class used by the job
|
40
|
+
ETL_RUNNER_CLASS = ETL::FiberRunner
|
41
|
+
include JobETLRunner
|
42
|
+
|
43
|
+
include ETL::Parser
|
44
|
+
|
45
|
+
#
|
46
|
+
# @!method self.source(adapter_name, source_type, **source_options)
|
47
|
+
# Annotation to define a source method
|
48
|
+
# @param adapter_name [Symbol] The adapter the source is running on
|
49
|
+
# @param source_type [Symbol] The type of source to run
|
50
|
+
# @param **source_options [Hash<Symbol: Object>] The options to pass to the source
|
51
|
+
#
|
52
|
+
# @example Source with Sequel SQLite adapter
|
53
|
+
# source :my_adapter, :paginated, page_size: 42
|
54
|
+
# def my_source(db, offset, limit)
|
55
|
+
# db[:items].offset(offset).limit(limit)
|
56
|
+
# end
|
57
|
+
#
|
58
|
+
# @see The chosen adapter documentation for further information on sources usage.
|
59
|
+
#
|
60
|
+
# @return [void]
|
61
|
+
#
|
62
|
+
annotable :source
|
63
|
+
|
64
|
+
#
|
65
|
+
# @!method self.transform(transform_class, **transform_options)
|
66
|
+
# Annotation to define a transform method
|
67
|
+
# @param transform_class [Class, nil] The class the transform is running on
|
68
|
+
# @param **transform_options [Hash<Symbol: Object>] The options to pass to the transform
|
69
|
+
#
|
70
|
+
# @example Transform without params
|
71
|
+
# transform
|
72
|
+
# def rename_keys(row)
|
73
|
+
# row[:new_name] = row.delete[:old_name]
|
74
|
+
# row[:new_email] = row.delete[:old_email]
|
75
|
+
# end
|
76
|
+
#
|
77
|
+
# @example Transform with params
|
78
|
+
# class RenameTransform < Ductr::ETL::Transform
|
79
|
+
# def process(row)
|
80
|
+
# call_method.each do |actual_name, new_name|
|
81
|
+
# new_key = "#{options[:prefix]}#{new_name}".to_sym
|
82
|
+
#
|
83
|
+
# row[new_key] = row.delete(actual_name)
|
84
|
+
# end
|
85
|
+
# end
|
86
|
+
# end
|
87
|
+
#
|
88
|
+
# transform RenameTransform, prefix: "some_"
|
89
|
+
# def rename
|
90
|
+
# { old_name: :new_name, old_email: :new_email }
|
91
|
+
# end
|
92
|
+
#
|
93
|
+
# @return [void]
|
94
|
+
#
|
95
|
+
annotable :transform
|
96
|
+
|
97
|
+
#
|
98
|
+
# @!method self.lookup(adapter_name, lookup_type, **lookup_options)
|
99
|
+
# Annotation to define a lookup method
|
100
|
+
# @param adapter_name [Symbol] The adapter the lookup is running on
|
101
|
+
# @param lookup_type [Symbol] The type of lookup to run
|
102
|
+
# @param **lookup_options [Hash<Symbol: Object>] The options to pass to the lookup
|
103
|
+
#
|
104
|
+
# @example Lookup with Sequel SQLite adapter
|
105
|
+
# lookup :my_other_adapter, :match, merge: [:id, :item], buffer_size: 4
|
106
|
+
# def joining_different_adapters(db, ids)
|
107
|
+
# db[:items_bis].select(:id, :item, :name).where(item: ids)
|
108
|
+
# end
|
109
|
+
#
|
110
|
+
# @see The chosen adapter documentation for further information on lookups usage.
|
111
|
+
#
|
112
|
+
# @return [void]
|
113
|
+
#
|
114
|
+
annotable :lookup
|
115
|
+
|
116
|
+
#
|
117
|
+
# @!method self.destination(adapter_name, destination_type, **destination_options)
|
118
|
+
# Annotation to define a destination method
|
119
|
+
# @param adapter_name [Symbol] The adapter the destination is running on
|
120
|
+
# @param destination_type [Symbol] The type of destination to run
|
121
|
+
# @param **destination_options [Hash<Symbol: Object>] The options to pass to the destination
|
122
|
+
#
|
123
|
+
# @example Destination with Sequel SQLite adapter
|
124
|
+
# destination :my_other_adapter, :basic
|
125
|
+
# def my_destination(row, db)
|
126
|
+
# db[:new_items].insert(name: row[:name], new_name: row[:new_name])
|
127
|
+
# end
|
128
|
+
#
|
129
|
+
# @see The chosen adapter documentation for further information on destinations usage.
|
130
|
+
#
|
131
|
+
# @return [void]
|
132
|
+
#
|
133
|
+
annotable :destination
|
134
|
+
|
135
|
+
#
|
136
|
+
# @!method self.send_to(*methods)
|
137
|
+
# Annotation to define which methods will follow the current one
|
138
|
+
# @param *methods [Array<Symbol>] The names of the following methods
|
139
|
+
#
|
140
|
+
# @example Source with Sequel SQLite adapter sending rows to two transforms
|
141
|
+
# source :my_adapter, :paginated, page_size: 42
|
142
|
+
# send_to :my_first_transform, :my_second_transform
|
143
|
+
# def my_source(db, offset, limit)
|
144
|
+
# db[:items].offset(offset).limit(limit)
|
145
|
+
# end
|
146
|
+
#
|
147
|
+
# transform
|
148
|
+
# def my_first_transform(row)
|
149
|
+
# # ...
|
150
|
+
# end
|
151
|
+
#
|
152
|
+
# transform
|
153
|
+
# def my_second_transform(row)
|
154
|
+
# # ...
|
155
|
+
# end
|
156
|
+
#
|
157
|
+
# @return [void]
|
158
|
+
#
|
159
|
+
annotable :send_to
|
160
|
+
end
|
161
|
+
end
|
data/lib/ductr/job.rb
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Ductr
|
4
|
+
#
|
5
|
+
# The base class for any job, you can use it directly if you don't need an ETL job.
|
6
|
+
#
|
7
|
+
class Job < ActiveJob::Base
|
8
|
+
extend Annotable
|
9
|
+
extend Forwardable
|
10
|
+
|
11
|
+
include JobStatus
|
12
|
+
|
13
|
+
# @return [Exception] The occurred error if any
|
14
|
+
attr_reader :error
|
15
|
+
# @return [Symbol] The job's status, one of `:queued`, `:working`, `:completed` and `:failed`
|
16
|
+
attr_reader :status
|
17
|
+
|
18
|
+
queue_as :ductr_jobs
|
19
|
+
|
20
|
+
#
|
21
|
+
# The active job's perform method. DO NOT override it, implement the #run method instead.
|
22
|
+
#
|
23
|
+
# @return [void]
|
24
|
+
#
|
25
|
+
def perform(*_)
|
26
|
+
run
|
27
|
+
end
|
28
|
+
|
29
|
+
#
|
30
|
+
# The configured adapter instances.
|
31
|
+
#
|
32
|
+
# @param [Symbol] name The adapter name
|
33
|
+
#
|
34
|
+
# @return [Adapter] The adapter corresponding to the given name
|
35
|
+
#
|
36
|
+
def adapter(name)
|
37
|
+
Ductr.config.adapter(name)
|
38
|
+
end
|
39
|
+
|
40
|
+
#
|
41
|
+
# The job's logger instance.
|
42
|
+
#
|
43
|
+
# @return [Ductr::Log::Logger] The logger instance
|
44
|
+
#
|
45
|
+
def logger
|
46
|
+
@logger ||= Ductr.config.logging.new(self.class)
|
47
|
+
end
|
48
|
+
|
49
|
+
#
|
50
|
+
# The entry point of jobs.
|
51
|
+
#
|
52
|
+
# @return [void]
|
53
|
+
#
|
54
|
+
def run
|
55
|
+
raise NotImplementedError, "A job must implement the `#run` method"
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Ductr
|
4
|
+
#
|
5
|
+
# Allowing a job to execute ETL runners.
|
6
|
+
# You need to declare the ETL_RUNNER_CLASS constant in the including class:
|
7
|
+
#
|
8
|
+
# class CustomJobClass < Job
|
9
|
+
# ETL_RUNNER_CLASS = ETL::KibaRunner
|
10
|
+
# include JobETLRunner
|
11
|
+
# end
|
12
|
+
#
|
13
|
+
# The job must have the #parse_annotations method defined, which can be added by including ETL::Parser.
|
14
|
+
#
|
15
|
+
module JobETLRunner
|
16
|
+
#
|
17
|
+
# Parse job's annotations and create the runner instance.
|
18
|
+
#
|
19
|
+
def initialize(...)
|
20
|
+
super(...)
|
21
|
+
|
22
|
+
@runner = self.class::ETL_RUNNER_CLASS.new(*parse_annotations)
|
23
|
+
end
|
24
|
+
|
25
|
+
#
|
26
|
+
# Opens adapters, executes the runner and then closes back adapters.
|
27
|
+
#
|
28
|
+
# @return [void]
|
29
|
+
#
|
30
|
+
def run
|
31
|
+
adapters.each(&:open!)
|
32
|
+
@runner.run
|
33
|
+
ensure
|
34
|
+
adapters.each(&:close!)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "annotable"
|
4
|
+
|
5
|
+
module Ductr
|
6
|
+
#
|
7
|
+
# This module contains the job's status tracking logic.
|
8
|
+
# It relies on Active Job's callbacks to write status into the store.
|
9
|
+
#
|
10
|
+
module JobStatus
|
11
|
+
class << self
|
12
|
+
#
|
13
|
+
# Registers the ActiveJob's `before_enqueue`, `before_perform` and `after_perform` callbacks
|
14
|
+
# to write status in the Ductr's store.
|
15
|
+
# Intercepts and re-raises job's exceptions to write the `:failed` status.
|
16
|
+
#
|
17
|
+
# @param [Class<Job>] job_class The job's class
|
18
|
+
#
|
19
|
+
# @return [void]
|
20
|
+
#
|
21
|
+
def included(job_class)
|
22
|
+
job_class.before_enqueue { |job| job.status = :queued }
|
23
|
+
job_class.before_perform { |job| job.status = :working }
|
24
|
+
job_class.after_perform { |job| job.status = :completed }
|
25
|
+
|
26
|
+
job_class.rescue_from(Exception) do |e|
|
27
|
+
@error = e
|
28
|
+
self.status = :failed
|
29
|
+
|
30
|
+
raise e
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
#
|
36
|
+
# Writes the job's status into the Ductr's store.
|
37
|
+
#
|
38
|
+
# @param [Symbol] status The status of the job
|
39
|
+
#
|
40
|
+
# @return [void]
|
41
|
+
#
|
42
|
+
def status=(status)
|
43
|
+
@status = status
|
44
|
+
Store.write_job(self)
|
45
|
+
end
|
46
|
+
|
47
|
+
#
|
48
|
+
# Determines whether the job has a `completed` or `failed` status.
|
49
|
+
#
|
50
|
+
# @return [Boolean] True when the status is `completed` or `failed`
|
51
|
+
#
|
52
|
+
def stopped?
|
53
|
+
%i[completed failed].include? status
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|