ductr 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +14 -0
- data/.vscode/settings.json +18 -0
- data/COPYING +674 -0
- data/COPYING.LESSER +165 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +121 -0
- data/README.md +37 -0
- data/Rakefile +37 -0
- data/bin/console +15 -0
- data/bin/setup +8 -0
- data/ductr.gemspec +50 -0
- data/exe/ductr +24 -0
- data/lib/ductr/adapter.rb +94 -0
- data/lib/ductr/cli/default.rb +25 -0
- data/lib/ductr/cli/main.rb +60 -0
- data/lib/ductr/cli/new_project_generator.rb +72 -0
- data/lib/ductr/cli/templates/project/bin_ductr.rb +7 -0
- data/lib/ductr/cli/templates/project/config_app.rb +5 -0
- data/lib/ductr/cli/templates/project/config_development.yml +8 -0
- data/lib/ductr/cli/templates/project/config_environment_development.rb +18 -0
- data/lib/ductr/cli/templates/project/gemfile.rb +6 -0
- data/lib/ductr/cli/templates/project/rubocop.yml +14 -0
- data/lib/ductr/cli/templates/project/tool-versions +1 -0
- data/lib/ductr/configuration.rb +145 -0
- data/lib/ductr/etl/controls/buffered_destination.rb +65 -0
- data/lib/ductr/etl/controls/buffered_transform.rb +76 -0
- data/lib/ductr/etl/controls/control.rb +46 -0
- data/lib/ductr/etl/controls/destination.rb +28 -0
- data/lib/ductr/etl/controls/paginated_source.rb +47 -0
- data/lib/ductr/etl/controls/source.rb +21 -0
- data/lib/ductr/etl/controls/transform.rb +28 -0
- data/lib/ductr/etl/fiber_control.rb +136 -0
- data/lib/ductr/etl/fiber_runner.rb +68 -0
- data/lib/ductr/etl/kiba_runner.rb +26 -0
- data/lib/ductr/etl/parser.rb +115 -0
- data/lib/ductr/etl/runner.rb +37 -0
- data/lib/ductr/etl_job.rb +161 -0
- data/lib/ductr/job.rb +58 -0
- data/lib/ductr/job_etl_runner.rb +37 -0
- data/lib/ductr/job_status.rb +56 -0
- data/lib/ductr/kiba_job.rb +130 -0
- data/lib/ductr/log/formatters/color_formatter.rb +48 -0
- data/lib/ductr/log/logger.rb +169 -0
- data/lib/ductr/log/outputs/file_output.rb +30 -0
- data/lib/ductr/log/outputs/standard_output.rb +39 -0
- data/lib/ductr/pipeline.rb +133 -0
- data/lib/ductr/pipeline_runner.rb +95 -0
- data/lib/ductr/pipeline_step.rb +92 -0
- data/lib/ductr/registry.rb +55 -0
- data/lib/ductr/rufus_trigger.rb +106 -0
- data/lib/ductr/scheduler.rb +117 -0
- data/lib/ductr/store/job_serializer.rb +59 -0
- data/lib/ductr/store/job_store.rb +59 -0
- data/lib/ductr/store/pipeline_serializer.rb +106 -0
- data/lib/ductr/store/pipeline_store.rb +48 -0
- data/lib/ductr/store.rb +81 -0
- data/lib/ductr/trigger.rb +49 -0
- data/lib/ductr/version.rb +6 -0
- data/lib/ductr.rb +143 -0
- data/sig/ductr.rbs +1107 -0
- metadata +292 -0
@@ -0,0 +1,130 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Ductr
|
4
|
+
#
|
5
|
+
# Base class for ETL job using kiba's streaming runner.
|
6
|
+
# Example using the SQLite adapter:
|
7
|
+
#
|
8
|
+
# class MyKibaJob < Ductr::KibaJob
|
9
|
+
# source :some_adapter, :paginated, page_size: 4
|
10
|
+
# def select_some_stuff(db, offset, limit)
|
11
|
+
# db[:items].offset(offset).limit(limit)
|
12
|
+
# end
|
13
|
+
#
|
14
|
+
# lookup :some_adapter, :match, merge: [:id, :item], buffer_size: 4
|
15
|
+
# def merge_with_stuff(db, ids)
|
16
|
+
# db[:items_bis].select(:id, Sequel.as(:name, :name_bis), :item).where(item: ids)
|
17
|
+
# end
|
18
|
+
#
|
19
|
+
# transform
|
20
|
+
# def generate_more_stuff(row)
|
21
|
+
# { name: "#{row[:name]}_#{row[:name_bis]}" }
|
22
|
+
# end
|
23
|
+
#
|
24
|
+
# destination :some_other_adapter, :basic
|
25
|
+
# def my_destination(row, db)
|
26
|
+
# logger.trace("Hello destination: #{row}")
|
27
|
+
# db[:new_items].insert(name: row[:name])
|
28
|
+
# end
|
29
|
+
# end
|
30
|
+
#
|
31
|
+
# @see The chosen adapter documentation for further information on controls usage.
|
32
|
+
#
|
33
|
+
class KibaJob < Job
|
34
|
+
# @return [Class] The ETL runner class used by the job
|
35
|
+
ETL_RUNNER_CLASS = ETL::KibaRunner
|
36
|
+
include JobETLRunner
|
37
|
+
|
38
|
+
include ETL::Parser
|
39
|
+
|
40
|
+
#
|
41
|
+
# @!method self.source(adapter_name, source_type, **source_options)
|
42
|
+
# Annotation to define a source method
|
43
|
+
# @param adapter_name [Symbol] The adapter the source is running on
|
44
|
+
# @param source_type [Symbol] The type of source to run
|
45
|
+
# @param **source_options [Hash<Symbol: Object>] The options to pass to the source
|
46
|
+
#
|
47
|
+
# @example Source with Sequel SQLite adapter
|
48
|
+
# source :my_adapter, :paginated, page_size: 42
|
49
|
+
# def my_source(db, offset, limit)
|
50
|
+
# db[:items].offset(offset).limit(limit)
|
51
|
+
# end
|
52
|
+
#
|
53
|
+
# @see The chosen adapter documentation for further information on sources usage.
|
54
|
+
#
|
55
|
+
# @return [void]
|
56
|
+
#
|
57
|
+
annotable :source
|
58
|
+
|
59
|
+
#
|
60
|
+
# @!method self.transform(transform_class, **transform_options)
|
61
|
+
# Annotation to define a transform method
|
62
|
+
# @param transform_class [Class, nil] The class the transform is running on
|
63
|
+
# @param **transform_options [Hash<Symbol: Object>] The options to pass to the transform
|
64
|
+
#
|
65
|
+
# @example Transform without params
|
66
|
+
# transform
|
67
|
+
# def rename_keys(row)
|
68
|
+
# row[:new_name] = row.delete[:old_name]
|
69
|
+
# row[:new_email] = row.delete[:old_email]
|
70
|
+
# end
|
71
|
+
#
|
72
|
+
# @example Transform with params
|
73
|
+
# class RenameTransform < Ductr::ETL::Transform
|
74
|
+
# def process(row)
|
75
|
+
# call_method.each do |actual_name, new_name|
|
76
|
+
# new_key = "#{options[:prefix]}#{new_name}".to_sym
|
77
|
+
#
|
78
|
+
# row[new_key] = row.delete(actual_name)
|
79
|
+
# end
|
80
|
+
# end
|
81
|
+
# end
|
82
|
+
#
|
83
|
+
# transform RenameTransform, prefix: "some_"
|
84
|
+
# def rename
|
85
|
+
# { old_name: :new_name, old_email: :new_email }
|
86
|
+
# end
|
87
|
+
#
|
88
|
+
# @return [void]
|
89
|
+
#
|
90
|
+
annotable :transform
|
91
|
+
|
92
|
+
#
|
93
|
+
# @!method self.lookup(adapter_name, lookup_type, **lookup_options)
|
94
|
+
# Annotation to define a lookup method
|
95
|
+
# @param adapter_name [Symbol] The adapter the lookup is running on
|
96
|
+
# @param lookup_type [Symbol] The type of lookup to run
|
97
|
+
# @param **lookup_options [Hash<Symbol: Object>] The options to pass to the lookup
|
98
|
+
#
|
99
|
+
# @example Lookup with Sequel SQLite adapter
|
100
|
+
# lookup :my_other_adapter, :match, merge: [:id, :item], buffer_size: 4
|
101
|
+
# def joining_different_adapters(db, ids)
|
102
|
+
# db[:items_bis].select(:id, :item, :name).where(item: ids)
|
103
|
+
# end
|
104
|
+
#
|
105
|
+
# @see The chosen adapter documentation for further information on lookups usage.
|
106
|
+
#
|
107
|
+
# @return [void]
|
108
|
+
#
|
109
|
+
annotable :lookup
|
110
|
+
|
111
|
+
#
|
112
|
+
# @!method self.destination(adapter_name, destination_type, **destination_options)
|
113
|
+
# Annotation to define a destination method
|
114
|
+
# @param adapter_name [Symbol] The adapter the destination is running on
|
115
|
+
# @param destination_type [Symbol] The type of destination to run
|
116
|
+
# @param **destination_options [Hash<Symbol: Object>] The options to pass to the destination
|
117
|
+
#
|
118
|
+
# @example Destination with Sequel SQLite adapter
|
119
|
+
# destination :my_other_adapter, :basic
|
120
|
+
# def my_destination(row, db)
|
121
|
+
# db[:new_items].insert(name: row[:name], new_name: row[:new_name])
|
122
|
+
# end
|
123
|
+
#
|
124
|
+
# @see The chosen adapter documentation for further information on destinations usage.
|
125
|
+
#
|
126
|
+
# @return [void]
|
127
|
+
#
|
128
|
+
annotable :destination
|
129
|
+
end
|
130
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "logger"
|
4
|
+
require "colorized_string"
|
5
|
+
|
6
|
+
module Ductr
|
7
|
+
module Log
|
8
|
+
#
|
9
|
+
# A log formatter which colorize the text with ANSI colors.
|
10
|
+
#
|
11
|
+
class ColorFormatter < ::Logger::Formatter
|
12
|
+
#
|
13
|
+
# Colorizes the given log entry.
|
14
|
+
#
|
15
|
+
# @param [Integer] level The log's severity level
|
16
|
+
# @param [Time] time The log's timestamp
|
17
|
+
# @param [Symbol] prog_name The log's "program" name, used to add job method name to the log
|
18
|
+
# @param [String] message The log's message
|
19
|
+
#
|
20
|
+
# @return [String] The formatted log
|
21
|
+
#
|
22
|
+
def call(level, time, prog_name, message)
|
23
|
+
format(format_str(level), level[0], format_datetime(time), Process.pid, level, prog_name, msg2str(message))
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
#
|
29
|
+
# Generates the colorized format string based on the log level.
|
30
|
+
#
|
31
|
+
# @param [String] level The log level
|
32
|
+
#
|
33
|
+
# @return [String] The colored format string
|
34
|
+
#
|
35
|
+
def format_str(level)
|
36
|
+
colors = {
|
37
|
+
"DEBUG" => :green, "INFO" => :cyan, "WARN" => :yellow, "ERROR" => :red, "FATAL" => { background: :red }
|
38
|
+
}
|
39
|
+
|
40
|
+
timestamp = ColorizedString["%s, [%s #%d]"].colorize(:light_black)
|
41
|
+
level_name = ColorizedString["%5s"].colorize(colors[level])
|
42
|
+
prog_name = ColorizedString["%s:"].colorize(:blue)
|
43
|
+
|
44
|
+
"#{timestamp} #{level_name} -- #{prog_name} %s\n"
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,169 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "logger"
|
4
|
+
|
5
|
+
module Ductr
|
6
|
+
module Log
|
7
|
+
#
|
8
|
+
# A ractor compatible logger to be used inside jobs or anywhere else in your ductr project.
|
9
|
+
#
|
10
|
+
class Logger
|
11
|
+
class << self
|
12
|
+
#
|
13
|
+
# Allows to add another log output.
|
14
|
+
# Making possible to write logs in multiple places at the same time, e.g. in STDOUT and in logs files
|
15
|
+
#
|
16
|
+
# @param [StandardOutput] output The new output to write logs to
|
17
|
+
# @param [::Logger::Formatter] formatter The formatter to use when writing logs
|
18
|
+
# @param [Hash] **options The formatter options
|
19
|
+
#
|
20
|
+
# @return [void]
|
21
|
+
#
|
22
|
+
def add_output(output, formatter = ::Logger::Formatter, **options)
|
23
|
+
@outputs ||= []
|
24
|
+
@outputs.push([output, [formatter, options]])
|
25
|
+
end
|
26
|
+
|
27
|
+
#
|
28
|
+
# The configured outputs list
|
29
|
+
#
|
30
|
+
# @return [Array<Array<StandardOutput, Array<::Logger::Formatter, Hash>>>]
|
31
|
+
# The list of outputs with their formatters and configurations
|
32
|
+
#
|
33
|
+
def outputs
|
34
|
+
@outputs || [[StandardOutput, [::Logger::Formatter]]]
|
35
|
+
end
|
36
|
+
|
37
|
+
#
|
38
|
+
# Configure the logging level.
|
39
|
+
#
|
40
|
+
# @param [Symbol, String] lvl The desired logging level
|
41
|
+
#
|
42
|
+
# @return [void]
|
43
|
+
#
|
44
|
+
def level=(lvl)
|
45
|
+
level_sym = lvl.to_s.downcase.to_sym
|
46
|
+
@level = {
|
47
|
+
debug: ::Logger::DEBUG,
|
48
|
+
info: ::Logger::INFO,
|
49
|
+
warn: ::Logger::WARN,
|
50
|
+
error: ::Logger::ERROR,
|
51
|
+
fatal: ::Logger::FATAL
|
52
|
+
}[level_sym]
|
53
|
+
|
54
|
+
raise ArgumentError, "invalid log level: #{lvl}" unless @level
|
55
|
+
end
|
56
|
+
|
57
|
+
#
|
58
|
+
# @return [Integer] The current logging level, default ::Logger::DEBUG
|
59
|
+
#
|
60
|
+
def level
|
61
|
+
@level || ::Logger::DEBUG
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
#
|
66
|
+
# Create configured outputs instances, meaning that you can't add outputs in an already instantiated logger.
|
67
|
+
#
|
68
|
+
def initialize(prog_name = nil)
|
69
|
+
@prog_name = prog_name
|
70
|
+
|
71
|
+
@outputs = self.class.outputs.map do |output_with_params|
|
72
|
+
out, params = *output_with_params
|
73
|
+
formatter, options = *params
|
74
|
+
|
75
|
+
out.new(formatter, **options || {})
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
#
|
80
|
+
# Logs a message with the `debug` level.
|
81
|
+
#
|
82
|
+
# @param [String] message The message to log
|
83
|
+
# @param [String, Symbol] prog_name The program name of the message
|
84
|
+
#
|
85
|
+
# @return [void]
|
86
|
+
# @yield The message
|
87
|
+
#
|
88
|
+
def debug(...)
|
89
|
+
write(::Logger::DEBUG, ...)
|
90
|
+
end
|
91
|
+
|
92
|
+
#
|
93
|
+
# Logs a message with the `info` level.
|
94
|
+
#
|
95
|
+
# @param [String] message The message to log
|
96
|
+
# @param [String, Symbol] prog_name The program name of the message
|
97
|
+
#
|
98
|
+
# @return [void]
|
99
|
+
# @yield The message
|
100
|
+
#
|
101
|
+
def info(...)
|
102
|
+
write(::Logger::INFO, ...)
|
103
|
+
end
|
104
|
+
|
105
|
+
#
|
106
|
+
# Logs a message with the `warn` level.
|
107
|
+
#
|
108
|
+
# @param [String] message The message to log
|
109
|
+
# @param [String, Symbol] prog_name The program name of the message
|
110
|
+
#
|
111
|
+
# @return [void]
|
112
|
+
# @yield The message
|
113
|
+
#
|
114
|
+
def warn(...)
|
115
|
+
write(::Logger::WARN, ...)
|
116
|
+
end
|
117
|
+
|
118
|
+
#
|
119
|
+
# Logs a message with the `error` level.
|
120
|
+
#
|
121
|
+
# @param [String] message The message to log
|
122
|
+
# @param [String, Symbol] prog_name The program name of the message
|
123
|
+
#
|
124
|
+
# @return [void]
|
125
|
+
# @yield The message
|
126
|
+
#
|
127
|
+
def error(...)
|
128
|
+
write(::Logger::ERROR, ...)
|
129
|
+
end
|
130
|
+
|
131
|
+
#
|
132
|
+
# Logs a message with the `fatal` level.
|
133
|
+
#
|
134
|
+
# @param [String] message The message to log
|
135
|
+
# @param [String, Symbol] prog_name The program name of the message
|
136
|
+
#
|
137
|
+
# @return [void]
|
138
|
+
# @yield The message
|
139
|
+
#
|
140
|
+
def fatal(...)
|
141
|
+
write(::Logger::FATAL, ...)
|
142
|
+
end
|
143
|
+
|
144
|
+
private
|
145
|
+
|
146
|
+
#
|
147
|
+
# Writes the message with the given level into all outputs.
|
148
|
+
#
|
149
|
+
# @param [Integer] severity The severity level of the message
|
150
|
+
# @param [String] message The message to write
|
151
|
+
# @param [String] prog_name The program name of the message
|
152
|
+
#
|
153
|
+
# @return [void]
|
154
|
+
#
|
155
|
+
def write(severity, message = nil, prog_name = nil, &)
|
156
|
+
return if severity < self.class.level
|
157
|
+
|
158
|
+
message ||= yield
|
159
|
+
|
160
|
+
called_method = "#{@prog_name}##{caller_locations(2, 1).first.label}"
|
161
|
+
prog_name ||= @prog_name.is_a?(Class) ? called_method : @prog_name
|
162
|
+
|
163
|
+
@outputs.each do |output|
|
164
|
+
output.write severity, prog_name, message
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "fileutils"
|
4
|
+
|
5
|
+
module Ductr
|
6
|
+
module Log
|
7
|
+
#
|
8
|
+
# An output to write logs in a file
|
9
|
+
#
|
10
|
+
class FileOutput < StandardOutput
|
11
|
+
#
|
12
|
+
# Creates the output with the given formatter, path and options
|
13
|
+
#
|
14
|
+
# @param [::Logger::Formatter] formatter The formatter to use when writing logs
|
15
|
+
# @param [String] path The path to write the logs
|
16
|
+
# @param [Hash] **options The options to write files
|
17
|
+
#
|
18
|
+
# @see The ruby's logger documentation to get options documentation
|
19
|
+
#
|
20
|
+
def initialize(formatter, path:, **options) # rubocop:disable Lint/MissingSuper
|
21
|
+
dir = File.dirname(path)
|
22
|
+
FileUtils.mkdir_p(dir) unless File.directory?(dir)
|
23
|
+
File.new(path, "w") unless File.exist?(path)
|
24
|
+
|
25
|
+
@formatter = formatter.new
|
26
|
+
@log_device = ::Logger::LogDevice.new path, **options
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "logger"
|
4
|
+
|
5
|
+
module Ductr
|
6
|
+
module Log
|
7
|
+
#
|
8
|
+
# The STDOUT logger output
|
9
|
+
#
|
10
|
+
class StandardOutput
|
11
|
+
# @return [Array<String>] The labels to associate to severity integers
|
12
|
+
SEVERITY_LABELS = %w[DEBUG INFO WARN ERROR FATAL ANY].freeze
|
13
|
+
|
14
|
+
#
|
15
|
+
# Creates a logger output instance
|
16
|
+
#
|
17
|
+
# @param [::Logger::Formatter] formatter The formatter to use to write the logs in STDOUT
|
18
|
+
# @param [Hash] **options The LogDevice options
|
19
|
+
#
|
20
|
+
def initialize(formatter, **options)
|
21
|
+
@formatter = formatter.new
|
22
|
+
@log_device = ::Logger::LogDevice.new $stdout, **options
|
23
|
+
end
|
24
|
+
|
25
|
+
#
|
26
|
+
# Writes the log to the STDOUT
|
27
|
+
#
|
28
|
+
# @param [Integer] severity The log's severity level
|
29
|
+
# @param [Symbol] prog_name The "program" name, used to add job method name to the log
|
30
|
+
# @param [String] message The log message
|
31
|
+
#
|
32
|
+
# @return [void]
|
33
|
+
#
|
34
|
+
def write(severity, prog_name, message)
|
35
|
+
@log_device.write @formatter.call(SEVERITY_LABELS[severity], Time.now, prog_name, message)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,133 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Ductr
|
4
|
+
#
|
5
|
+
# Pipelines allows to easily declare rich data pipelines.
|
6
|
+
#
|
7
|
+
# By using the `after` annotation, you can define steps execution hierarchy.
|
8
|
+
#
|
9
|
+
# `sync` and `async` are useful to define job sequences inside step methods.
|
10
|
+
#
|
11
|
+
# `Pipeline` inherits from `Job` which means that pipeline are enqueued as any other job.
|
12
|
+
# Pipelines are enqueued in the :ductr_pipelines queue.
|
13
|
+
#
|
14
|
+
# class MyPipeline < Ductr::Pipeline
|
15
|
+
# def first_step
|
16
|
+
# sync(MyJob, 1)
|
17
|
+
# async(SomeJob) # Executed when `MyJob` is done
|
18
|
+
# end
|
19
|
+
#
|
20
|
+
# after :first_step
|
21
|
+
# def first_parallel_step # Returns when all three `HelloJob` are done
|
22
|
+
# async(HelloJob, :one)
|
23
|
+
# async(HelloJob, :two)
|
24
|
+
# async(HelloJob, :three)
|
25
|
+
# end
|
26
|
+
#
|
27
|
+
# after :first_step
|
28
|
+
# def second_parallel_step # Executed concurrently with :first_parallel_step
|
29
|
+
# async(SomeJob)
|
30
|
+
# async(SomeOtherJob)
|
31
|
+
# sync(HelloJob, :one) # Executed when `SomeJob` and `SomeOtherJob` are done
|
32
|
+
# end
|
33
|
+
#
|
34
|
+
# after :first_parallel_step, :second_parallel_step
|
35
|
+
# def last_step # Executed when `first_parallel_step` and `second_parallel_step` jobs are done
|
36
|
+
# sync(ByeJob)
|
37
|
+
# end
|
38
|
+
# end
|
39
|
+
#
|
40
|
+
# You can define pipelines with only one step by using `after` annotation without parameter:
|
41
|
+
#
|
42
|
+
# class MonoStepPipeline < Ductr::Pipeline
|
43
|
+
# after
|
44
|
+
# def unique_step
|
45
|
+
# async(MyJob)
|
46
|
+
# async(MyJob)
|
47
|
+
# end
|
48
|
+
# end
|
49
|
+
#
|
50
|
+
# A pipeline can inherit from another, allowing you to overload and add steps to the parent pipeline:
|
51
|
+
#
|
52
|
+
# class InheritPipeline < MonoStepPipeline
|
53
|
+
# after :unique_step
|
54
|
+
# def not_that_unique
|
55
|
+
# async(MyJob)
|
56
|
+
# end
|
57
|
+
# end
|
58
|
+
#
|
59
|
+
class Pipeline < Job
|
60
|
+
#
|
61
|
+
# @!method self.after
|
62
|
+
# Annotation to define preceding steps on a pipeline step method.
|
63
|
+
# @params *step_names [Array<Symbol>] The preceding steps methods names
|
64
|
+
# @example
|
65
|
+
# after :some_step_method, :some_other_step_method
|
66
|
+
# def my_step
|
67
|
+
# # ...
|
68
|
+
# end
|
69
|
+
#
|
70
|
+
# @return [void]
|
71
|
+
#
|
72
|
+
annotable :after
|
73
|
+
|
74
|
+
queue_as :ductr_pipelines
|
75
|
+
|
76
|
+
# @return [PipelineRunner] The pipeline's runner instance
|
77
|
+
attr_reader :runner
|
78
|
+
|
79
|
+
#
|
80
|
+
# @!method run
|
81
|
+
# Starts the pipeline runner.
|
82
|
+
# @return [void]
|
83
|
+
#
|
84
|
+
def_delegators :@runner, :run
|
85
|
+
|
86
|
+
#
|
87
|
+
# Initializes the pipeline runner
|
88
|
+
#
|
89
|
+
def initialize(...)
|
90
|
+
super(...)
|
91
|
+
|
92
|
+
@runner = PipelineRunner.new(self)
|
93
|
+
end
|
94
|
+
|
95
|
+
#
|
96
|
+
# Puts the given job in the queue and waits for it to be done.
|
97
|
+
#
|
98
|
+
# @param [Class<Job>] job_class The job to enqueue
|
99
|
+
# @param [Array<Object>] *params The job's params
|
100
|
+
#
|
101
|
+
# @return [void]
|
102
|
+
#
|
103
|
+
def sync(job_class, *params)
|
104
|
+
@runner.current_step.flush_jobs
|
105
|
+
@runner.current_step.enqueue_job job_class.new(*params)
|
106
|
+
@runner.current_step.flush_jobs
|
107
|
+
end
|
108
|
+
|
109
|
+
#
|
110
|
+
# Enqueues the given job.
|
111
|
+
#
|
112
|
+
# @param [Class<Job>] job_class The job to enqueue
|
113
|
+
# @param [Array<Object>] *params The job's params
|
114
|
+
#
|
115
|
+
# @return [void]
|
116
|
+
#
|
117
|
+
def async(job_class, *params)
|
118
|
+
@runner.current_step.enqueue_job job_class.new(*params)
|
119
|
+
end
|
120
|
+
|
121
|
+
#
|
122
|
+
# Writes the pipeline's status into the Ductr's store.
|
123
|
+
#
|
124
|
+
# @param [Symbol] status The status of the job
|
125
|
+
#
|
126
|
+
# @return [void]
|
127
|
+
#
|
128
|
+
def status=(status)
|
129
|
+
@status = status
|
130
|
+
Store.write_pipeline(self)
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
@@ -0,0 +1,95 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Ductr
|
4
|
+
#
|
5
|
+
# In charge to parse pipeline annotations, initializing and running pipeline steps.
|
6
|
+
#
|
7
|
+
class PipelineRunner
|
8
|
+
# @return [Float] Time to wait in second before resuming all alive steps
|
9
|
+
TICK = 0.1
|
10
|
+
|
11
|
+
# @return [Array<PipelineStep>] All the steps declared in the pipeline
|
12
|
+
attr_reader :steps
|
13
|
+
# @return [Array<PipelineStep>] The remaining steps to run
|
14
|
+
attr_reader :remaining_steps
|
15
|
+
|
16
|
+
#
|
17
|
+
# Parses and initializes the given pipeline's steps.
|
18
|
+
#
|
19
|
+
# @param [Pipeline] pipeline The pipeline to parse and run.
|
20
|
+
#
|
21
|
+
def initialize(pipeline)
|
22
|
+
annotated_methods = pipeline.class.annotated_methods
|
23
|
+
|
24
|
+
@steps = step_names(annotated_methods).map do |name|
|
25
|
+
PipelineStep.new(pipeline, name)
|
26
|
+
end
|
27
|
+
|
28
|
+
annotated_methods.each do |method|
|
29
|
+
step_by(name: method.name).left = method.find_annotation(:after).params.map do |left_step_name|
|
30
|
+
step_by(name: left_step_name)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
@remaining_steps = @steps.dup
|
35
|
+
end
|
36
|
+
|
37
|
+
#
|
38
|
+
# Actually runs the pipeline.
|
39
|
+
# Resumes step's fiber until they are all finished.
|
40
|
+
#
|
41
|
+
# @return [void]
|
42
|
+
#
|
43
|
+
def run
|
44
|
+
until @remaining_steps.empty?
|
45
|
+
@remaining_steps.each do |step|
|
46
|
+
next @remaining_steps.delete(step) unless step.alive?
|
47
|
+
|
48
|
+
step.resume
|
49
|
+
end
|
50
|
+
|
51
|
+
sleep(TICK)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
#
|
56
|
+
# Returns the current step based on fiber execution context.
|
57
|
+
#
|
58
|
+
# @return [PipelineStep] The currently running step.
|
59
|
+
#
|
60
|
+
def current_step
|
61
|
+
step_by fiber: Fiber.current
|
62
|
+
end
|
63
|
+
|
64
|
+
#
|
65
|
+
# Parses given annotated methods and extract all step names.
|
66
|
+
#
|
67
|
+
# @param [Array<Annotable::AnnotatedMethod>] annotated_methods The annotated method to parse
|
68
|
+
#
|
69
|
+
# @return [Array<Symbol>] The declared step's names
|
70
|
+
#
|
71
|
+
def step_names(annotated_methods)
|
72
|
+
annotated_methods.flat_map do |method|
|
73
|
+
[method.name, *method.find_annotation(:after).params]
|
74
|
+
end.uniq
|
75
|
+
end
|
76
|
+
|
77
|
+
#
|
78
|
+
# Finds a step corresponding to the given name and value.
|
79
|
+
#
|
80
|
+
# @example Finds a step named `my-step`
|
81
|
+
# step_by(name: :my_step)
|
82
|
+
#
|
83
|
+
# @param [Hash<Symbol: Object>] **name_and_val Step attribute's name and value
|
84
|
+
#
|
85
|
+
# @return [PipelineStep, Nil] Found step if any
|
86
|
+
#
|
87
|
+
def step_by(**name_and_val)
|
88
|
+
name, value = *name_and_val.to_a.first
|
89
|
+
|
90
|
+
steps.find do |step|
|
91
|
+
step.send(name) == value
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|