ductr 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +14 -0
- data/.vscode/settings.json +18 -0
- data/COPYING +674 -0
- data/COPYING.LESSER +165 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +121 -0
- data/README.md +37 -0
- data/Rakefile +37 -0
- data/bin/console +15 -0
- data/bin/setup +8 -0
- data/ductr.gemspec +50 -0
- data/exe/ductr +24 -0
- data/lib/ductr/adapter.rb +94 -0
- data/lib/ductr/cli/default.rb +25 -0
- data/lib/ductr/cli/main.rb +60 -0
- data/lib/ductr/cli/new_project_generator.rb +72 -0
- data/lib/ductr/cli/templates/project/bin_ductr.rb +7 -0
- data/lib/ductr/cli/templates/project/config_app.rb +5 -0
- data/lib/ductr/cli/templates/project/config_development.yml +8 -0
- data/lib/ductr/cli/templates/project/config_environment_development.rb +18 -0
- data/lib/ductr/cli/templates/project/gemfile.rb +6 -0
- data/lib/ductr/cli/templates/project/rubocop.yml +14 -0
- data/lib/ductr/cli/templates/project/tool-versions +1 -0
- data/lib/ductr/configuration.rb +145 -0
- data/lib/ductr/etl/controls/buffered_destination.rb +65 -0
- data/lib/ductr/etl/controls/buffered_transform.rb +76 -0
- data/lib/ductr/etl/controls/control.rb +46 -0
- data/lib/ductr/etl/controls/destination.rb +28 -0
- data/lib/ductr/etl/controls/paginated_source.rb +47 -0
- data/lib/ductr/etl/controls/source.rb +21 -0
- data/lib/ductr/etl/controls/transform.rb +28 -0
- data/lib/ductr/etl/fiber_control.rb +136 -0
- data/lib/ductr/etl/fiber_runner.rb +68 -0
- data/lib/ductr/etl/kiba_runner.rb +26 -0
- data/lib/ductr/etl/parser.rb +115 -0
- data/lib/ductr/etl/runner.rb +37 -0
- data/lib/ductr/etl_job.rb +161 -0
- data/lib/ductr/job.rb +58 -0
- data/lib/ductr/job_etl_runner.rb +37 -0
- data/lib/ductr/job_status.rb +56 -0
- data/lib/ductr/kiba_job.rb +130 -0
- data/lib/ductr/log/formatters/color_formatter.rb +48 -0
- data/lib/ductr/log/logger.rb +169 -0
- data/lib/ductr/log/outputs/file_output.rb +30 -0
- data/lib/ductr/log/outputs/standard_output.rb +39 -0
- data/lib/ductr/pipeline.rb +133 -0
- data/lib/ductr/pipeline_runner.rb +95 -0
- data/lib/ductr/pipeline_step.rb +92 -0
- data/lib/ductr/registry.rb +55 -0
- data/lib/ductr/rufus_trigger.rb +106 -0
- data/lib/ductr/scheduler.rb +117 -0
- data/lib/ductr/store/job_serializer.rb +59 -0
- data/lib/ductr/store/job_store.rb +59 -0
- data/lib/ductr/store/pipeline_serializer.rb +106 -0
- data/lib/ductr/store/pipeline_store.rb +48 -0
- data/lib/ductr/store.rb +81 -0
- data/lib/ductr/trigger.rb +49 -0
- data/lib/ductr/version.rb +6 -0
- data/lib/ductr.rb +143 -0
- data/sig/ductr.rbs +1107 -0
- metadata +292 -0
data/lib/ductr/store.rb
ADDED
@@ -0,0 +1,81 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "set"
|
4
|
+
|
5
|
+
module Ductr
|
6
|
+
#
|
7
|
+
# Store interaction helpers for internal usage.
|
8
|
+
#
|
9
|
+
module Store
|
10
|
+
extend JobStore
|
11
|
+
extend PipelineStore
|
12
|
+
|
13
|
+
class << self
|
14
|
+
# @return [Integer] The cache expiration of job's status, default to one day
|
15
|
+
EXPIRATION_INTERVAL = 86_400
|
16
|
+
|
17
|
+
#
|
18
|
+
# Get all known job instances for the given registry_key and job's key_prefix.
|
19
|
+
#
|
20
|
+
# @param [String] registry_key The registry key in which job keys will be read
|
21
|
+
# @param [String] key_prefix The cache key prefix for the registry's job keys
|
22
|
+
#
|
23
|
+
# @return [Array<Job>] The job instances
|
24
|
+
#
|
25
|
+
def all(registry_key, key_prefix)
|
26
|
+
job_ids = Ductr.store.read(registry_key)
|
27
|
+
return [] unless job_ids
|
28
|
+
|
29
|
+
keys = job_ids.map { |job_id| "#{key_prefix}:#{job_id}" }
|
30
|
+
Ductr.store.read_multi(*keys).values
|
31
|
+
end
|
32
|
+
|
33
|
+
#
|
34
|
+
# Read all given jobs in the given key_prefix.
|
35
|
+
#
|
36
|
+
# @param [String] key_prefix The cache key prefix for the job_id
|
37
|
+
# @param [Array<Job>] *jobs The jobs to read
|
38
|
+
#
|
39
|
+
# @return [Array<Job>] The read jobs
|
40
|
+
#
|
41
|
+
def read(key_prefix, *jobs)
|
42
|
+
keys = jobs.map { |job| "#{key_prefix}:#{job.job_id}" }
|
43
|
+
Ductr.store.read_multi(*keys).values
|
44
|
+
end
|
45
|
+
|
46
|
+
#
|
47
|
+
# Update the given job in the given key_prefix.
|
48
|
+
#
|
49
|
+
# @param [Job] job The job to update in the store
|
50
|
+
#
|
51
|
+
# @return [void]
|
52
|
+
#
|
53
|
+
def write(key_prefix, job)
|
54
|
+
Ductr.store.write("#{key_prefix}:#{job.job_id}", job, expires_in: EXPIRATION_INTERVAL)
|
55
|
+
end
|
56
|
+
|
57
|
+
#
|
58
|
+
# Add the given job to the store's job registry. This method is NOT thread-safe.
|
59
|
+
#
|
60
|
+
# @param [Job] job The job to register
|
61
|
+
#
|
62
|
+
# @return [void]
|
63
|
+
#
|
64
|
+
def register(registry_key, job)
|
65
|
+
job_ids = Ductr.store.read(registry_key) || Set.new
|
66
|
+
|
67
|
+
job_ids.add(job.job_id)
|
68
|
+
Ductr.store.write(registry_key, job_ids, expires_in: EXPIRATION_INTERVAL)
|
69
|
+
end
|
70
|
+
|
71
|
+
#
|
72
|
+
# Determines whether all tracked jobs have either a completed or failed status.
|
73
|
+
#
|
74
|
+
# @return [Boolean] `true` when all jobs are done
|
75
|
+
#
|
76
|
+
def all_done?
|
77
|
+
[*all_jobs, *all_pipelines].all?(&:stopped?)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Ductr
|
4
|
+
#
|
5
|
+
# The base class for any trigger, can be initialized by passing it its adapter name if any.
|
6
|
+
# A trigger must implement the #add method which is called for each trigger declaration.
|
7
|
+
# Depending on what your trigger do, you may have to implement the #start and #stop methods.
|
8
|
+
# #start is called when the scheduler relying on the trigger is started. #stop does the opposite:
|
9
|
+
# it is called when the scheduler relying on the trigger is stopped.
|
10
|
+
#
|
11
|
+
class Trigger
|
12
|
+
attr_reader :adapter
|
13
|
+
|
14
|
+
#
|
15
|
+
# Creates a new trigger instance, called by the scheduler.
|
16
|
+
#
|
17
|
+
# @param [Adapter, Nil] adapter The trigger's adapter, if any
|
18
|
+
#
|
19
|
+
def initialize(adapter = nil)
|
20
|
+
@adapter = adapter
|
21
|
+
end
|
22
|
+
|
23
|
+
#
|
24
|
+
# Adds a new trigger, called by a scheduler when a trigger is declared.
|
25
|
+
#
|
26
|
+
# @param [Method] _method The scheduler method to be called by the trigger
|
27
|
+
# @param [Hash<Symbol: Object>] _options options The options of the trigger declaration
|
28
|
+
#
|
29
|
+
# @return [void]
|
30
|
+
#
|
31
|
+
def add(_method, _options)
|
32
|
+
raise NotImplementedError, "A trigger must implement the #add method"
|
33
|
+
end
|
34
|
+
|
35
|
+
#
|
36
|
+
# Called when the scheduler relying on the trigger is started.
|
37
|
+
#
|
38
|
+
# @return [void]
|
39
|
+
#
|
40
|
+
def start; end
|
41
|
+
|
42
|
+
#
|
43
|
+
# Called when the scheduler relying on the trigger is stopped.
|
44
|
+
#
|
45
|
+
# @return [void]
|
46
|
+
#
|
47
|
+
def stop; end
|
48
|
+
end
|
49
|
+
end
|
data/lib/ductr.rb
ADDED
@@ -0,0 +1,143 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "active_job"
|
4
|
+
require "annotable"
|
5
|
+
require "forwardable"
|
6
|
+
require "zeitwerk"
|
7
|
+
|
8
|
+
#
|
9
|
+
# The main Ductr module.
|
10
|
+
#
|
11
|
+
module Ductr
|
12
|
+
class AdapterNotFoundError < StandardError; end
|
13
|
+
class ControlNotFoundError < StandardError; end
|
14
|
+
class InconsistentPaginationError < StandardError; end
|
15
|
+
|
16
|
+
class << self
|
17
|
+
#
|
18
|
+
# Contains all the Ductr configuration.
|
19
|
+
#
|
20
|
+
# @return [Configuration] The configuration instance
|
21
|
+
attr_reader :config
|
22
|
+
|
23
|
+
#
|
24
|
+
# The adapter classes registry, all declared adapters are in the registry.
|
25
|
+
#
|
26
|
+
# @return [Registry] The registry instance
|
27
|
+
#
|
28
|
+
def adapter_registry
|
29
|
+
@adapter_registry ||= Registry.new(:adapter)
|
30
|
+
end
|
31
|
+
|
32
|
+
#
|
33
|
+
# The trigger classes registry, all declared triggers are in the registry.
|
34
|
+
#
|
35
|
+
# @return [Registry] The registry instance
|
36
|
+
#
|
37
|
+
def trigger_registry
|
38
|
+
@trigger_registry ||= Registry.new(:trigger)
|
39
|
+
end
|
40
|
+
|
41
|
+
#
|
42
|
+
# The Ductr current environment, "development" by default.
|
43
|
+
# You can change it by setting the `DUCTR_ENV` environment variable.
|
44
|
+
#
|
45
|
+
# @return [String] The Ductr environment
|
46
|
+
#
|
47
|
+
def env
|
48
|
+
@env ||= ENV.fetch("DUCTR_ENV", "development").downcase
|
49
|
+
end
|
50
|
+
|
51
|
+
#
|
52
|
+
# Determines if Ductr is in development mode.
|
53
|
+
#
|
54
|
+
# @return [Boolean] True if DUCTR_ENV is set to "development" or nil
|
55
|
+
#
|
56
|
+
def development?
|
57
|
+
env == "development"
|
58
|
+
end
|
59
|
+
|
60
|
+
#
|
61
|
+
# Determines if Ductr is in production mode.
|
62
|
+
#
|
63
|
+
# @return [Boolean] True if DUCTR_ENV is set to "production"
|
64
|
+
#
|
65
|
+
def production?
|
66
|
+
env == "production"
|
67
|
+
end
|
68
|
+
|
69
|
+
#
|
70
|
+
# The configure block allows to configure Ductr internals.
|
71
|
+
# You must calls this method one and only one time to use the framework.
|
72
|
+
#
|
73
|
+
# @raise [ScriptError] Raises when called more than one time
|
74
|
+
# @return [void]
|
75
|
+
# @yield [config] Configure the framework
|
76
|
+
# @yieldparam [Configuration] config The configuration instance
|
77
|
+
#
|
78
|
+
def configure
|
79
|
+
raise ScriptError, "Ductr::configure must be called only once" if @config
|
80
|
+
|
81
|
+
@config = Configuration.new(env)
|
82
|
+
yield(@config)
|
83
|
+
@config.apply_active_job_config
|
84
|
+
end
|
85
|
+
|
86
|
+
#
|
87
|
+
# The Ductr main logger instance.
|
88
|
+
#
|
89
|
+
# @return [Log::Logger] The logger instance
|
90
|
+
#
|
91
|
+
def logger
|
92
|
+
@logger ||= config.logging.new
|
93
|
+
end
|
94
|
+
|
95
|
+
#
|
96
|
+
# The Ductr store, used to share information across different instances.
|
97
|
+
#
|
98
|
+
# @return [ActiveSupport::Cache::Store] The store instance
|
99
|
+
#
|
100
|
+
def store
|
101
|
+
@store ||= \
|
102
|
+
if config.store_adapter.is_a? Class
|
103
|
+
config.store_adapter.new(*config.store_parameters)
|
104
|
+
else
|
105
|
+
ActiveSupport::Cache.lookup_store(config.store_adapter, *config.store_parameters)
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
#
|
112
|
+
# Framework auto loading
|
113
|
+
#
|
114
|
+
Zeitwerk::Loader.for_gem.tap do |loader|
|
115
|
+
loader.inflector.inflect "cli" => "CLI"
|
116
|
+
loader.inflector.inflect "etl" => "ETL"
|
117
|
+
loader.inflector.inflect "etl_job" => "ETLJob"
|
118
|
+
loader.inflector.inflect "job_etl_runner" => "JobETLRunner"
|
119
|
+
|
120
|
+
loader.collapse "#{__dir__}/ductr/etl/controls"
|
121
|
+
loader.collapse "#{__dir__}/ductr/log/outputs"
|
122
|
+
loader.collapse "#{__dir__}/ductr/log/formatters"
|
123
|
+
|
124
|
+
loader.ignore "#{__dir__}/ductr/cli/templates"
|
125
|
+
|
126
|
+
loader.setup
|
127
|
+
loader.eager_load_namespace(Ductr::RufusTrigger)
|
128
|
+
end
|
129
|
+
|
130
|
+
#
|
131
|
+
# Application auto loading
|
132
|
+
#
|
133
|
+
if File.directory?("#{pwd = Dir.pwd}/app")
|
134
|
+
Zeitwerk::Loader.new.tap do |loader|
|
135
|
+
loader.push_dir "#{pwd}/app"
|
136
|
+
|
137
|
+
loader.collapse "#{pwd}/app/jobs"
|
138
|
+
loader.collapse "#{pwd}/app/pipelines"
|
139
|
+
loader.collapse "#{pwd}/app/schedulers"
|
140
|
+
|
141
|
+
loader.setup
|
142
|
+
end
|
143
|
+
end
|