ductr 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +14 -0
- data/.vscode/settings.json +18 -0
- data/COPYING +674 -0
- data/COPYING.LESSER +165 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +121 -0
- data/README.md +37 -0
- data/Rakefile +37 -0
- data/bin/console +15 -0
- data/bin/setup +8 -0
- data/ductr.gemspec +50 -0
- data/exe/ductr +24 -0
- data/lib/ductr/adapter.rb +94 -0
- data/lib/ductr/cli/default.rb +25 -0
- data/lib/ductr/cli/main.rb +60 -0
- data/lib/ductr/cli/new_project_generator.rb +72 -0
- data/lib/ductr/cli/templates/project/bin_ductr.rb +7 -0
- data/lib/ductr/cli/templates/project/config_app.rb +5 -0
- data/lib/ductr/cli/templates/project/config_development.yml +8 -0
- data/lib/ductr/cli/templates/project/config_environment_development.rb +18 -0
- data/lib/ductr/cli/templates/project/gemfile.rb +6 -0
- data/lib/ductr/cli/templates/project/rubocop.yml +14 -0
- data/lib/ductr/cli/templates/project/tool-versions +1 -0
- data/lib/ductr/configuration.rb +145 -0
- data/lib/ductr/etl/controls/buffered_destination.rb +65 -0
- data/lib/ductr/etl/controls/buffered_transform.rb +76 -0
- data/lib/ductr/etl/controls/control.rb +46 -0
- data/lib/ductr/etl/controls/destination.rb +28 -0
- data/lib/ductr/etl/controls/paginated_source.rb +47 -0
- data/lib/ductr/etl/controls/source.rb +21 -0
- data/lib/ductr/etl/controls/transform.rb +28 -0
- data/lib/ductr/etl/fiber_control.rb +136 -0
- data/lib/ductr/etl/fiber_runner.rb +68 -0
- data/lib/ductr/etl/kiba_runner.rb +26 -0
- data/lib/ductr/etl/parser.rb +115 -0
- data/lib/ductr/etl/runner.rb +37 -0
- data/lib/ductr/etl_job.rb +161 -0
- data/lib/ductr/job.rb +58 -0
- data/lib/ductr/job_etl_runner.rb +37 -0
- data/lib/ductr/job_status.rb +56 -0
- data/lib/ductr/kiba_job.rb +130 -0
- data/lib/ductr/log/formatters/color_formatter.rb +48 -0
- data/lib/ductr/log/logger.rb +169 -0
- data/lib/ductr/log/outputs/file_output.rb +30 -0
- data/lib/ductr/log/outputs/standard_output.rb +39 -0
- data/lib/ductr/pipeline.rb +133 -0
- data/lib/ductr/pipeline_runner.rb +95 -0
- data/lib/ductr/pipeline_step.rb +92 -0
- data/lib/ductr/registry.rb +55 -0
- data/lib/ductr/rufus_trigger.rb +106 -0
- data/lib/ductr/scheduler.rb +117 -0
- data/lib/ductr/store/job_serializer.rb +59 -0
- data/lib/ductr/store/job_store.rb +59 -0
- data/lib/ductr/store/pipeline_serializer.rb +106 -0
- data/lib/ductr/store/pipeline_store.rb +48 -0
- data/lib/ductr/store.rb +81 -0
- data/lib/ductr/trigger.rb +49 -0
- data/lib/ductr/version.rb +6 -0
- data/lib/ductr.rb +143 -0
- data/sig/ductr.rbs +1107 -0
- metadata +292 -0
data/lib/ductr/store.rb
ADDED
@@ -0,0 +1,81 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "set"
|
4
|
+
|
5
|
+
module Ductr
|
6
|
+
#
|
7
|
+
# Store interaction helpers for internal usage.
|
8
|
+
#
|
9
|
+
module Store
|
10
|
+
extend JobStore
|
11
|
+
extend PipelineStore
|
12
|
+
|
13
|
+
class << self
|
14
|
+
# @return [Integer] The cache expiration of job's status, default to one day
|
15
|
+
EXPIRATION_INTERVAL = 86_400
|
16
|
+
|
17
|
+
#
|
18
|
+
# Get all known job instances for the given registry_key and job's key_prefix.
|
19
|
+
#
|
20
|
+
# @param [String] registry_key The registry key in which job keys will be read
|
21
|
+
# @param [String] key_prefix The cache key prefix for the registry's job keys
|
22
|
+
#
|
23
|
+
# @return [Array<Job>] The job instances
|
24
|
+
#
|
25
|
+
def all(registry_key, key_prefix)
|
26
|
+
job_ids = Ductr.store.read(registry_key)
|
27
|
+
return [] unless job_ids
|
28
|
+
|
29
|
+
keys = job_ids.map { |job_id| "#{key_prefix}:#{job_id}" }
|
30
|
+
Ductr.store.read_multi(*keys).values
|
31
|
+
end
|
32
|
+
|
33
|
+
#
|
34
|
+
# Read all given jobs in the given key_prefix.
|
35
|
+
#
|
36
|
+
# @param [String] key_prefix The cache key prefix for the job_id
|
37
|
+
# @param [Array<Job>] *jobs The jobs to read
|
38
|
+
#
|
39
|
+
# @return [Array<Job>] The read jobs
|
40
|
+
#
|
41
|
+
def read(key_prefix, *jobs)
|
42
|
+
keys = jobs.map { |job| "#{key_prefix}:#{job.job_id}" }
|
43
|
+
Ductr.store.read_multi(*keys).values
|
44
|
+
end
|
45
|
+
|
46
|
+
#
|
47
|
+
# Update the given job in the given key_prefix.
|
48
|
+
#
|
49
|
+
# @param [Job] job The job to update in the store
|
50
|
+
#
|
51
|
+
# @return [void]
|
52
|
+
#
|
53
|
+
def write(key_prefix, job)
|
54
|
+
Ductr.store.write("#{key_prefix}:#{job.job_id}", job, expires_in: EXPIRATION_INTERVAL)
|
55
|
+
end
|
56
|
+
|
57
|
+
#
|
58
|
+
# Add the given job to the store's job registry. This method is NOT thread-safe.
|
59
|
+
#
|
60
|
+
# @param [Job] job The job to register
|
61
|
+
#
|
62
|
+
# @return [void]
|
63
|
+
#
|
64
|
+
def register(registry_key, job)
|
65
|
+
job_ids = Ductr.store.read(registry_key) || Set.new
|
66
|
+
|
67
|
+
job_ids.add(job.job_id)
|
68
|
+
Ductr.store.write(registry_key, job_ids, expires_in: EXPIRATION_INTERVAL)
|
69
|
+
end
|
70
|
+
|
71
|
+
#
|
72
|
+
# Determines whether all tracked jobs have either a completed or failed status.
|
73
|
+
#
|
74
|
+
# @return [Boolean] `true` when all jobs are done
|
75
|
+
#
|
76
|
+
def all_done?
|
77
|
+
[*all_jobs, *all_pipelines].all?(&:stopped?)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Ductr
|
4
|
+
#
|
5
|
+
# The base class for any trigger, can be initialized by passing it its adapter name if any.
|
6
|
+
# A trigger must implement the #add method which is called for each trigger declaration.
|
7
|
+
# Depending on what your trigger do, you may have to implement the #start and #stop methods.
|
8
|
+
# #start is called when the scheduler relying on the trigger is started. #stop does the opposite:
|
9
|
+
# it is called when the scheduler relying on the trigger is stopped.
|
10
|
+
#
|
11
|
+
class Trigger
|
12
|
+
attr_reader :adapter
|
13
|
+
|
14
|
+
#
|
15
|
+
# Creates a new trigger instance, called by the scheduler.
|
16
|
+
#
|
17
|
+
# @param [Adapter, Nil] adapter The trigger's adapter, if any
|
18
|
+
#
|
19
|
+
def initialize(adapter = nil)
|
20
|
+
@adapter = adapter
|
21
|
+
end
|
22
|
+
|
23
|
+
#
|
24
|
+
# Adds a new trigger, called by a scheduler when a trigger is declared.
|
25
|
+
#
|
26
|
+
# @param [Method] _method The scheduler method to be called by the trigger
|
27
|
+
# @param [Hash<Symbol: Object>] _options options The options of the trigger declaration
|
28
|
+
#
|
29
|
+
# @return [void]
|
30
|
+
#
|
31
|
+
def add(_method, _options)
|
32
|
+
raise NotImplementedError, "A trigger must implement the #add method"
|
33
|
+
end
|
34
|
+
|
35
|
+
#
|
36
|
+
# Called when the scheduler relying on the trigger is started.
|
37
|
+
#
|
38
|
+
# @return [void]
|
39
|
+
#
|
40
|
+
def start; end
|
41
|
+
|
42
|
+
#
|
43
|
+
# Called when the scheduler relying on the trigger is stopped.
|
44
|
+
#
|
45
|
+
# @return [void]
|
46
|
+
#
|
47
|
+
def stop; end
|
48
|
+
end
|
49
|
+
end
|
data/lib/ductr.rb
ADDED
@@ -0,0 +1,143 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "active_job"
|
4
|
+
require "annotable"
|
5
|
+
require "forwardable"
|
6
|
+
require "zeitwerk"
|
7
|
+
|
8
|
+
#
|
9
|
+
# The main Ductr module.
|
10
|
+
#
|
11
|
+
module Ductr
|
12
|
+
class AdapterNotFoundError < StandardError; end
|
13
|
+
class ControlNotFoundError < StandardError; end
|
14
|
+
class InconsistentPaginationError < StandardError; end
|
15
|
+
|
16
|
+
class << self
|
17
|
+
#
|
18
|
+
# Contains all the Ductr configuration.
|
19
|
+
#
|
20
|
+
# @return [Configuration] The configuration instance
|
21
|
+
attr_reader :config
|
22
|
+
|
23
|
+
#
|
24
|
+
# The adapter classes registry, all declared adapters are in the registry.
|
25
|
+
#
|
26
|
+
# @return [Registry] The registry instance
|
27
|
+
#
|
28
|
+
def adapter_registry
|
29
|
+
@adapter_registry ||= Registry.new(:adapter)
|
30
|
+
end
|
31
|
+
|
32
|
+
#
|
33
|
+
# The trigger classes registry, all declared triggers are in the registry.
|
34
|
+
#
|
35
|
+
# @return [Registry] The registry instance
|
36
|
+
#
|
37
|
+
def trigger_registry
|
38
|
+
@trigger_registry ||= Registry.new(:trigger)
|
39
|
+
end
|
40
|
+
|
41
|
+
#
|
42
|
+
# The Ductr current environment, "development" by default.
|
43
|
+
# You can change it by setting the `DUCTR_ENV` environment variable.
|
44
|
+
#
|
45
|
+
# @return [String] The Ductr environment
|
46
|
+
#
|
47
|
+
def env
|
48
|
+
@env ||= ENV.fetch("DUCTR_ENV", "development").downcase
|
49
|
+
end
|
50
|
+
|
51
|
+
#
|
52
|
+
# Determines if Ductr is in development mode.
|
53
|
+
#
|
54
|
+
# @return [Boolean] True if DUCTR_ENV is set to "development" or nil
|
55
|
+
#
|
56
|
+
def development?
|
57
|
+
env == "development"
|
58
|
+
end
|
59
|
+
|
60
|
+
#
|
61
|
+
# Determines if Ductr is in production mode.
|
62
|
+
#
|
63
|
+
# @return [Boolean] True if DUCTR_ENV is set to "production"
|
64
|
+
#
|
65
|
+
def production?
|
66
|
+
env == "production"
|
67
|
+
end
|
68
|
+
|
69
|
+
#
|
70
|
+
# The configure block allows to configure Ductr internals.
|
71
|
+
# You must calls this method one and only one time to use the framework.
|
72
|
+
#
|
73
|
+
# @raise [ScriptError] Raises when called more than one time
|
74
|
+
# @return [void]
|
75
|
+
# @yield [config] Configure the framework
|
76
|
+
# @yieldparam [Configuration] config The configuration instance
|
77
|
+
#
|
78
|
+
def configure
|
79
|
+
raise ScriptError, "Ductr::configure must be called only once" if @config
|
80
|
+
|
81
|
+
@config = Configuration.new(env)
|
82
|
+
yield(@config)
|
83
|
+
@config.apply_active_job_config
|
84
|
+
end
|
85
|
+
|
86
|
+
#
|
87
|
+
# The Ductr main logger instance.
|
88
|
+
#
|
89
|
+
# @return [Log::Logger] The logger instance
|
90
|
+
#
|
91
|
+
def logger
|
92
|
+
@logger ||= config.logging.new
|
93
|
+
end
|
94
|
+
|
95
|
+
#
|
96
|
+
# The Ductr store, used to share information across different instances.
|
97
|
+
#
|
98
|
+
# @return [ActiveSupport::Cache::Store] The store instance
|
99
|
+
#
|
100
|
+
def store
|
101
|
+
@store ||= \
|
102
|
+
if config.store_adapter.is_a? Class
|
103
|
+
config.store_adapter.new(*config.store_parameters)
|
104
|
+
else
|
105
|
+
ActiveSupport::Cache.lookup_store(config.store_adapter, *config.store_parameters)
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
#
|
112
|
+
# Framework auto loading
|
113
|
+
#
|
114
|
+
Zeitwerk::Loader.for_gem.tap do |loader|
|
115
|
+
loader.inflector.inflect "cli" => "CLI"
|
116
|
+
loader.inflector.inflect "etl" => "ETL"
|
117
|
+
loader.inflector.inflect "etl_job" => "ETLJob"
|
118
|
+
loader.inflector.inflect "job_etl_runner" => "JobETLRunner"
|
119
|
+
|
120
|
+
loader.collapse "#{__dir__}/ductr/etl/controls"
|
121
|
+
loader.collapse "#{__dir__}/ductr/log/outputs"
|
122
|
+
loader.collapse "#{__dir__}/ductr/log/formatters"
|
123
|
+
|
124
|
+
loader.ignore "#{__dir__}/ductr/cli/templates"
|
125
|
+
|
126
|
+
loader.setup
|
127
|
+
loader.eager_load_namespace(Ductr::RufusTrigger)
|
128
|
+
end
|
129
|
+
|
130
|
+
#
|
131
|
+
# Application auto loading
|
132
|
+
#
|
133
|
+
if File.directory?("#{pwd = Dir.pwd}/app")
|
134
|
+
Zeitwerk::Loader.new.tap do |loader|
|
135
|
+
loader.push_dir "#{pwd}/app"
|
136
|
+
|
137
|
+
loader.collapse "#{pwd}/app/jobs"
|
138
|
+
loader.collapse "#{pwd}/app/pipelines"
|
139
|
+
loader.collapse "#{pwd}/app/schedulers"
|
140
|
+
|
141
|
+
loader.setup
|
142
|
+
end
|
143
|
+
end
|