ductr 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +14 -0
  4. data/.vscode/settings.json +18 -0
  5. data/COPYING +674 -0
  6. data/COPYING.LESSER +165 -0
  7. data/Gemfile +6 -0
  8. data/Gemfile.lock +121 -0
  9. data/README.md +37 -0
  10. data/Rakefile +37 -0
  11. data/bin/console +15 -0
  12. data/bin/setup +8 -0
  13. data/ductr.gemspec +50 -0
  14. data/exe/ductr +24 -0
  15. data/lib/ductr/adapter.rb +94 -0
  16. data/lib/ductr/cli/default.rb +25 -0
  17. data/lib/ductr/cli/main.rb +60 -0
  18. data/lib/ductr/cli/new_project_generator.rb +72 -0
  19. data/lib/ductr/cli/templates/project/bin_ductr.rb +7 -0
  20. data/lib/ductr/cli/templates/project/config_app.rb +5 -0
  21. data/lib/ductr/cli/templates/project/config_development.yml +8 -0
  22. data/lib/ductr/cli/templates/project/config_environment_development.rb +18 -0
  23. data/lib/ductr/cli/templates/project/gemfile.rb +6 -0
  24. data/lib/ductr/cli/templates/project/rubocop.yml +14 -0
  25. data/lib/ductr/cli/templates/project/tool-versions +1 -0
  26. data/lib/ductr/configuration.rb +145 -0
  27. data/lib/ductr/etl/controls/buffered_destination.rb +65 -0
  28. data/lib/ductr/etl/controls/buffered_transform.rb +76 -0
  29. data/lib/ductr/etl/controls/control.rb +46 -0
  30. data/lib/ductr/etl/controls/destination.rb +28 -0
  31. data/lib/ductr/etl/controls/paginated_source.rb +47 -0
  32. data/lib/ductr/etl/controls/source.rb +21 -0
  33. data/lib/ductr/etl/controls/transform.rb +28 -0
  34. data/lib/ductr/etl/fiber_control.rb +136 -0
  35. data/lib/ductr/etl/fiber_runner.rb +68 -0
  36. data/lib/ductr/etl/kiba_runner.rb +26 -0
  37. data/lib/ductr/etl/parser.rb +115 -0
  38. data/lib/ductr/etl/runner.rb +37 -0
  39. data/lib/ductr/etl_job.rb +161 -0
  40. data/lib/ductr/job.rb +58 -0
  41. data/lib/ductr/job_etl_runner.rb +37 -0
  42. data/lib/ductr/job_status.rb +56 -0
  43. data/lib/ductr/kiba_job.rb +130 -0
  44. data/lib/ductr/log/formatters/color_formatter.rb +48 -0
  45. data/lib/ductr/log/logger.rb +169 -0
  46. data/lib/ductr/log/outputs/file_output.rb +30 -0
  47. data/lib/ductr/log/outputs/standard_output.rb +39 -0
  48. data/lib/ductr/pipeline.rb +133 -0
  49. data/lib/ductr/pipeline_runner.rb +95 -0
  50. data/lib/ductr/pipeline_step.rb +92 -0
  51. data/lib/ductr/registry.rb +55 -0
  52. data/lib/ductr/rufus_trigger.rb +106 -0
  53. data/lib/ductr/scheduler.rb +117 -0
  54. data/lib/ductr/store/job_serializer.rb +59 -0
  55. data/lib/ductr/store/job_store.rb +59 -0
  56. data/lib/ductr/store/pipeline_serializer.rb +106 -0
  57. data/lib/ductr/store/pipeline_store.rb +48 -0
  58. data/lib/ductr/store.rb +81 -0
  59. data/lib/ductr/trigger.rb +49 -0
  60. data/lib/ductr/version.rb +6 -0
  61. data/lib/ductr.rb +143 -0
  62. data/sig/ductr.rbs +1107 -0
  63. metadata +292 -0
@@ -0,0 +1,81 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "set"
4
+
5
+ module Ductr
6
+ #
7
+ # Store interaction helpers for internal usage.
8
+ #
9
+ module Store
10
+ extend JobStore
11
+ extend PipelineStore
12
+
13
+ class << self
14
+ # @return [Integer] The cache expiration of job's status, default to one day
15
+ EXPIRATION_INTERVAL = 86_400
16
+
17
+ #
18
+ # Get all known job instances for the given registry_key and job's key_prefix.
19
+ #
20
+ # @param [String] registry_key The registry key in which job keys will be read
21
+ # @param [String] key_prefix The cache key prefix for the registry's job keys
22
+ #
23
+ # @return [Array<Job>] The job instances
24
+ #
25
+ def all(registry_key, key_prefix)
26
+ job_ids = Ductr.store.read(registry_key)
27
+ return [] unless job_ids
28
+
29
+ keys = job_ids.map { |job_id| "#{key_prefix}:#{job_id}" }
30
+ Ductr.store.read_multi(*keys).values
31
+ end
32
+
33
+ #
34
+ # Read all given jobs in the given key_prefix.
35
+ #
36
+ # @param [String] key_prefix The cache key prefix for the job_id
37
+ # @param [Array<Job>] *jobs The jobs to read
38
+ #
39
+ # @return [Array<Job>] The read jobs
40
+ #
41
+ def read(key_prefix, *jobs)
42
+ keys = jobs.map { |job| "#{key_prefix}:#{job.job_id}" }
43
+ Ductr.store.read_multi(*keys).values
44
+ end
45
+
46
+ #
47
+ # Update the given job in the given key_prefix.
48
+ #
49
+ # @param [Job] job The job to update in the store
50
+ #
51
+ # @return [void]
52
+ #
53
+ def write(key_prefix, job)
54
+ Ductr.store.write("#{key_prefix}:#{job.job_id}", job, expires_in: EXPIRATION_INTERVAL)
55
+ end
56
+
57
+ #
58
+ # Add the given job to the store's job registry. This method is NOT thread-safe.
59
+ #
60
+ # @param [Job] job The job to register
61
+ #
62
+ # @return [void]
63
+ #
64
+ def register(registry_key, job)
65
+ job_ids = Ductr.store.read(registry_key) || Set.new
66
+
67
+ job_ids.add(job.job_id)
68
+ Ductr.store.write(registry_key, job_ids, expires_in: EXPIRATION_INTERVAL)
69
+ end
70
+
71
+ #
72
+ # Determines whether all tracked jobs have either a completed or failed status.
73
+ #
74
+ # @return [Boolean] `true` when all jobs are done
75
+ #
76
+ def all_done?
77
+ [*all_jobs, *all_pipelines].all?(&:stopped?)
78
+ end
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,49 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ductr
4
+ #
5
+ # The base class for any trigger, can be initialized by passing it its adapter name if any.
6
+ # A trigger must implement the #add method which is called for each trigger declaration.
7
+ # Depending on what your trigger do, you may have to implement the #start and #stop methods.
8
+ # #start is called when the scheduler relying on the trigger is started. #stop does the opposite:
9
+ # it is called when the scheduler relying on the trigger is stopped.
10
+ #
11
+ class Trigger
12
+ attr_reader :adapter
13
+
14
+ #
15
+ # Creates a new trigger instance, called by the scheduler.
16
+ #
17
+ # @param [Adapter, Nil] adapter The trigger's adapter, if any
18
+ #
19
+ def initialize(adapter = nil)
20
+ @adapter = adapter
21
+ end
22
+
23
+ #
24
+ # Adds a new trigger, called by a scheduler when a trigger is declared.
25
+ #
26
+ # @param [Method] _method The scheduler method to be called by the trigger
27
+ # @param [Hash<Symbol: Object>] _options options The options of the trigger declaration
28
+ #
29
+ # @return [void]
30
+ #
31
+ def add(_method, _options)
32
+ raise NotImplementedError, "A trigger must implement the #add method"
33
+ end
34
+
35
+ #
36
+ # Called when the scheduler relying on the trigger is started.
37
+ #
38
+ # @return [void]
39
+ #
40
+ def start; end
41
+
42
+ #
43
+ # Called when the scheduler relying on the trigger is stopped.
44
+ #
45
+ # @return [void]
46
+ #
47
+ def stop; end
48
+ end
49
+ end
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ductr
4
+ # @return [String] The ductr's version number
5
+ VERSION = "0.1.0"
6
+ end
data/lib/ductr.rb ADDED
@@ -0,0 +1,143 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "active_job"
4
+ require "annotable"
5
+ require "forwardable"
6
+ require "zeitwerk"
7
+
8
+ #
9
+ # The main Ductr module.
10
+ #
11
+ module Ductr
12
+ class AdapterNotFoundError < StandardError; end
13
+ class ControlNotFoundError < StandardError; end
14
+ class InconsistentPaginationError < StandardError; end
15
+
16
+ class << self
17
+ #
18
+ # Contains all the Ductr configuration.
19
+ #
20
+ # @return [Configuration] The configuration instance
21
+ attr_reader :config
22
+
23
+ #
24
+ # The adapter classes registry, all declared adapters are in the registry.
25
+ #
26
+ # @return [Registry] The registry instance
27
+ #
28
+ def adapter_registry
29
+ @adapter_registry ||= Registry.new(:adapter)
30
+ end
31
+
32
+ #
33
+ # The trigger classes registry, all declared triggers are in the registry.
34
+ #
35
+ # @return [Registry] The registry instance
36
+ #
37
+ def trigger_registry
38
+ @trigger_registry ||= Registry.new(:trigger)
39
+ end
40
+
41
+ #
42
+ # The Ductr current environment, "development" by default.
43
+ # You can change it by setting the `DUCTR_ENV` environment variable.
44
+ #
45
+ # @return [String] The Ductr environment
46
+ #
47
+ def env
48
+ @env ||= ENV.fetch("DUCTR_ENV", "development").downcase
49
+ end
50
+
51
+ #
52
+ # Determines if Ductr is in development mode.
53
+ #
54
+ # @return [Boolean] True if DUCTR_ENV is set to "development" or nil
55
+ #
56
+ def development?
57
+ env == "development"
58
+ end
59
+
60
+ #
61
+ # Determines if Ductr is in production mode.
62
+ #
63
+ # @return [Boolean] True if DUCTR_ENV is set to "production"
64
+ #
65
+ def production?
66
+ env == "production"
67
+ end
68
+
69
+ #
70
+ # The configure block allows to configure Ductr internals.
71
+ # You must calls this method one and only one time to use the framework.
72
+ #
73
+ # @raise [ScriptError] Raises when called more than one time
74
+ # @return [void]
75
+ # @yield [config] Configure the framework
76
+ # @yieldparam [Configuration] config The configuration instance
77
+ #
78
+ def configure
79
+ raise ScriptError, "Ductr::configure must be called only once" if @config
80
+
81
+ @config = Configuration.new(env)
82
+ yield(@config)
83
+ @config.apply_active_job_config
84
+ end
85
+
86
+ #
87
+ # The Ductr main logger instance.
88
+ #
89
+ # @return [Log::Logger] The logger instance
90
+ #
91
+ def logger
92
+ @logger ||= config.logging.new
93
+ end
94
+
95
+ #
96
+ # The Ductr store, used to share information across different instances.
97
+ #
98
+ # @return [ActiveSupport::Cache::Store] The store instance
99
+ #
100
+ def store
101
+ @store ||= \
102
+ if config.store_adapter.is_a? Class
103
+ config.store_adapter.new(*config.store_parameters)
104
+ else
105
+ ActiveSupport::Cache.lookup_store(config.store_adapter, *config.store_parameters)
106
+ end
107
+ end
108
+ end
109
+ end
110
+
111
+ #
112
+ # Framework auto loading
113
+ #
114
+ Zeitwerk::Loader.for_gem.tap do |loader|
115
+ loader.inflector.inflect "cli" => "CLI"
116
+ loader.inflector.inflect "etl" => "ETL"
117
+ loader.inflector.inflect "etl_job" => "ETLJob"
118
+ loader.inflector.inflect "job_etl_runner" => "JobETLRunner"
119
+
120
+ loader.collapse "#{__dir__}/ductr/etl/controls"
121
+ loader.collapse "#{__dir__}/ductr/log/outputs"
122
+ loader.collapse "#{__dir__}/ductr/log/formatters"
123
+
124
+ loader.ignore "#{__dir__}/ductr/cli/templates"
125
+
126
+ loader.setup
127
+ loader.eager_load_namespace(Ductr::RufusTrigger)
128
+ end
129
+
130
+ #
131
+ # Application auto loading
132
+ #
133
+ if File.directory?("#{pwd = Dir.pwd}/app")
134
+ Zeitwerk::Loader.new.tap do |loader|
135
+ loader.push_dir "#{pwd}/app"
136
+
137
+ loader.collapse "#{pwd}/app/jobs"
138
+ loader.collapse "#{pwd}/app/pipelines"
139
+ loader.collapse "#{pwd}/app/schedulers"
140
+
141
+ loader.setup
142
+ end
143
+ end