ductr 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +14 -0
  4. data/.vscode/settings.json +18 -0
  5. data/COPYING +674 -0
  6. data/COPYING.LESSER +165 -0
  7. data/Gemfile +6 -0
  8. data/Gemfile.lock +121 -0
  9. data/README.md +37 -0
  10. data/Rakefile +37 -0
  11. data/bin/console +15 -0
  12. data/bin/setup +8 -0
  13. data/ductr.gemspec +50 -0
  14. data/exe/ductr +24 -0
  15. data/lib/ductr/adapter.rb +94 -0
  16. data/lib/ductr/cli/default.rb +25 -0
  17. data/lib/ductr/cli/main.rb +60 -0
  18. data/lib/ductr/cli/new_project_generator.rb +72 -0
  19. data/lib/ductr/cli/templates/project/bin_ductr.rb +7 -0
  20. data/lib/ductr/cli/templates/project/config_app.rb +5 -0
  21. data/lib/ductr/cli/templates/project/config_development.yml +8 -0
  22. data/lib/ductr/cli/templates/project/config_environment_development.rb +18 -0
  23. data/lib/ductr/cli/templates/project/gemfile.rb +6 -0
  24. data/lib/ductr/cli/templates/project/rubocop.yml +14 -0
  25. data/lib/ductr/cli/templates/project/tool-versions +1 -0
  26. data/lib/ductr/configuration.rb +145 -0
  27. data/lib/ductr/etl/controls/buffered_destination.rb +65 -0
  28. data/lib/ductr/etl/controls/buffered_transform.rb +76 -0
  29. data/lib/ductr/etl/controls/control.rb +46 -0
  30. data/lib/ductr/etl/controls/destination.rb +28 -0
  31. data/lib/ductr/etl/controls/paginated_source.rb +47 -0
  32. data/lib/ductr/etl/controls/source.rb +21 -0
  33. data/lib/ductr/etl/controls/transform.rb +28 -0
  34. data/lib/ductr/etl/fiber_control.rb +136 -0
  35. data/lib/ductr/etl/fiber_runner.rb +68 -0
  36. data/lib/ductr/etl/kiba_runner.rb +26 -0
  37. data/lib/ductr/etl/parser.rb +115 -0
  38. data/lib/ductr/etl/runner.rb +37 -0
  39. data/lib/ductr/etl_job.rb +161 -0
  40. data/lib/ductr/job.rb +58 -0
  41. data/lib/ductr/job_etl_runner.rb +37 -0
  42. data/lib/ductr/job_status.rb +56 -0
  43. data/lib/ductr/kiba_job.rb +130 -0
  44. data/lib/ductr/log/formatters/color_formatter.rb +48 -0
  45. data/lib/ductr/log/logger.rb +169 -0
  46. data/lib/ductr/log/outputs/file_output.rb +30 -0
  47. data/lib/ductr/log/outputs/standard_output.rb +39 -0
  48. data/lib/ductr/pipeline.rb +133 -0
  49. data/lib/ductr/pipeline_runner.rb +95 -0
  50. data/lib/ductr/pipeline_step.rb +92 -0
  51. data/lib/ductr/registry.rb +55 -0
  52. data/lib/ductr/rufus_trigger.rb +106 -0
  53. data/lib/ductr/scheduler.rb +117 -0
  54. data/lib/ductr/store/job_serializer.rb +59 -0
  55. data/lib/ductr/store/job_store.rb +59 -0
  56. data/lib/ductr/store/pipeline_serializer.rb +106 -0
  57. data/lib/ductr/store/pipeline_store.rb +48 -0
  58. data/lib/ductr/store.rb +81 -0
  59. data/lib/ductr/trigger.rb +49 -0
  60. data/lib/ductr/version.rb +6 -0
  61. data/lib/ductr.rb +143 -0
  62. data/sig/ductr.rbs +1107 -0
  63. metadata +292 -0
@@ -0,0 +1,81 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "set"
4
+
5
+ module Ductr
6
+ #
7
+ # Store interaction helpers for internal usage.
8
+ #
9
+ module Store
10
+ extend JobStore
11
+ extend PipelineStore
12
+
13
+ class << self
14
+ # @return [Integer] The cache expiration of job's status, default to one day
15
+ EXPIRATION_INTERVAL = 86_400
16
+
17
+ #
18
+ # Get all known job instances for the given registry_key and job's key_prefix.
19
+ #
20
+ # @param [String] registry_key The registry key in which job keys will be read
21
+ # @param [String] key_prefix The cache key prefix for the registry's job keys
22
+ #
23
+ # @return [Array<Job>] The job instances
24
+ #
25
+ def all(registry_key, key_prefix)
26
+ job_ids = Ductr.store.read(registry_key)
27
+ return [] unless job_ids
28
+
29
+ keys = job_ids.map { |job_id| "#{key_prefix}:#{job_id}" }
30
+ Ductr.store.read_multi(*keys).values
31
+ end
32
+
33
+ #
34
+ # Read all given jobs in the given key_prefix.
35
+ #
36
+ # @param [String] key_prefix The cache key prefix for the job_id
37
+ # @param [Array<Job>] *jobs The jobs to read
38
+ #
39
+ # @return [Array<Job>] The read jobs
40
+ #
41
+ def read(key_prefix, *jobs)
42
+ keys = jobs.map { |job| "#{key_prefix}:#{job.job_id}" }
43
+ Ductr.store.read_multi(*keys).values
44
+ end
45
+
46
+ #
47
+ # Update the given job in the given key_prefix.
48
+ #
49
+ # @param [Job] job The job to update in the store
50
+ #
51
+ # @return [void]
52
+ #
53
+ def write(key_prefix, job)
54
+ Ductr.store.write("#{key_prefix}:#{job.job_id}", job, expires_in: EXPIRATION_INTERVAL)
55
+ end
56
+
57
+ #
58
+ # Add the given job to the store's job registry. This method is NOT thread-safe.
59
+ #
60
+ # @param [Job] job The job to register
61
+ #
62
+ # @return [void]
63
+ #
64
+ def register(registry_key, job)
65
+ job_ids = Ductr.store.read(registry_key) || Set.new
66
+
67
+ job_ids.add(job.job_id)
68
+ Ductr.store.write(registry_key, job_ids, expires_in: EXPIRATION_INTERVAL)
69
+ end
70
+
71
+ #
72
+ # Determines whether all tracked jobs have either a completed or failed status.
73
+ #
74
+ # @return [Boolean] `true` when all jobs are done
75
+ #
76
+ def all_done?
77
+ [*all_jobs, *all_pipelines].all?(&:stopped?)
78
+ end
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,49 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ductr
4
+ #
5
+ # The base class for any trigger, can be initialized by passing it its adapter name if any.
6
+ # A trigger must implement the #add method which is called for each trigger declaration.
7
+ # Depending on what your trigger do, you may have to implement the #start and #stop methods.
8
+ # #start is called when the scheduler relying on the trigger is started. #stop does the opposite:
9
+ # it is called when the scheduler relying on the trigger is stopped.
10
+ #
11
+ class Trigger
12
+ attr_reader :adapter
13
+
14
+ #
15
+ # Creates a new trigger instance, called by the scheduler.
16
+ #
17
+ # @param [Adapter, Nil] adapter The trigger's adapter, if any
18
+ #
19
+ def initialize(adapter = nil)
20
+ @adapter = adapter
21
+ end
22
+
23
+ #
24
+ # Adds a new trigger, called by a scheduler when a trigger is declared.
25
+ #
26
+ # @param [Method] _method The scheduler method to be called by the trigger
27
+ # @param [Hash<Symbol: Object>] _options options The options of the trigger declaration
28
+ #
29
+ # @return [void]
30
+ #
31
+ def add(_method, _options)
32
+ raise NotImplementedError, "A trigger must implement the #add method"
33
+ end
34
+
35
+ #
36
+ # Called when the scheduler relying on the trigger is started.
37
+ #
38
+ # @return [void]
39
+ #
40
+ def start; end
41
+
42
+ #
43
+ # Called when the scheduler relying on the trigger is stopped.
44
+ #
45
+ # @return [void]
46
+ #
47
+ def stop; end
48
+ end
49
+ end
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ductr
4
+ # @return [String] The ductr's version number
5
+ VERSION = "0.1.0"
6
+ end
data/lib/ductr.rb ADDED
@@ -0,0 +1,143 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "active_job"
4
+ require "annotable"
5
+ require "forwardable"
6
+ require "zeitwerk"
7
+
8
+ #
9
+ # The main Ductr module.
10
+ #
11
+ module Ductr
12
+ class AdapterNotFoundError < StandardError; end
13
+ class ControlNotFoundError < StandardError; end
14
+ class InconsistentPaginationError < StandardError; end
15
+
16
+ class << self
17
+ #
18
+ # Contains all the Ductr configuration.
19
+ #
20
+ # @return [Configuration] The configuration instance
21
+ attr_reader :config
22
+
23
+ #
24
+ # The adapter classes registry, all declared adapters are in the registry.
25
+ #
26
+ # @return [Registry] The registry instance
27
+ #
28
+ def adapter_registry
29
+ @adapter_registry ||= Registry.new(:adapter)
30
+ end
31
+
32
+ #
33
+ # The trigger classes registry, all declared triggers are in the registry.
34
+ #
35
+ # @return [Registry] The registry instance
36
+ #
37
+ def trigger_registry
38
+ @trigger_registry ||= Registry.new(:trigger)
39
+ end
40
+
41
+ #
42
+ # The Ductr current environment, "development" by default.
43
+ # You can change it by setting the `DUCTR_ENV` environment variable.
44
+ #
45
+ # @return [String] The Ductr environment
46
+ #
47
+ def env
48
+ @env ||= ENV.fetch("DUCTR_ENV", "development").downcase
49
+ end
50
+
51
+ #
52
+ # Determines if Ductr is in development mode.
53
+ #
54
+ # @return [Boolean] True if DUCTR_ENV is set to "development" or nil
55
+ #
56
+ def development?
57
+ env == "development"
58
+ end
59
+
60
+ #
61
+ # Determines if Ductr is in production mode.
62
+ #
63
+ # @return [Boolean] True if DUCTR_ENV is set to "production"
64
+ #
65
+ def production?
66
+ env == "production"
67
+ end
68
+
69
+ #
70
+ # The configure block allows to configure Ductr internals.
71
+ # You must calls this method one and only one time to use the framework.
72
+ #
73
+ # @raise [ScriptError] Raises when called more than one time
74
+ # @return [void]
75
+ # @yield [config] Configure the framework
76
+ # @yieldparam [Configuration] config The configuration instance
77
+ #
78
+ def configure
79
+ raise ScriptError, "Ductr::configure must be called only once" if @config
80
+
81
+ @config = Configuration.new(env)
82
+ yield(@config)
83
+ @config.apply_active_job_config
84
+ end
85
+
86
+ #
87
+ # The Ductr main logger instance.
88
+ #
89
+ # @return [Log::Logger] The logger instance
90
+ #
91
+ def logger
92
+ @logger ||= config.logging.new
93
+ end
94
+
95
+ #
96
+ # The Ductr store, used to share information across different instances.
97
+ #
98
+ # @return [ActiveSupport::Cache::Store] The store instance
99
+ #
100
+ def store
101
+ @store ||= \
102
+ if config.store_adapter.is_a? Class
103
+ config.store_adapter.new(*config.store_parameters)
104
+ else
105
+ ActiveSupport::Cache.lookup_store(config.store_adapter, *config.store_parameters)
106
+ end
107
+ end
108
+ end
109
+ end
110
+
111
+ #
112
+ # Framework auto loading
113
+ #
114
+ Zeitwerk::Loader.for_gem.tap do |loader|
115
+ loader.inflector.inflect "cli" => "CLI"
116
+ loader.inflector.inflect "etl" => "ETL"
117
+ loader.inflector.inflect "etl_job" => "ETLJob"
118
+ loader.inflector.inflect "job_etl_runner" => "JobETLRunner"
119
+
120
+ loader.collapse "#{__dir__}/ductr/etl/controls"
121
+ loader.collapse "#{__dir__}/ductr/log/outputs"
122
+ loader.collapse "#{__dir__}/ductr/log/formatters"
123
+
124
+ loader.ignore "#{__dir__}/ductr/cli/templates"
125
+
126
+ loader.setup
127
+ loader.eager_load_namespace(Ductr::RufusTrigger)
128
+ end
129
+
130
+ #
131
+ # Application auto loading
132
+ #
133
+ if File.directory?("#{pwd = Dir.pwd}/app")
134
+ Zeitwerk::Loader.new.tap do |loader|
135
+ loader.push_dir "#{pwd}/app"
136
+
137
+ loader.collapse "#{pwd}/app/jobs"
138
+ loader.collapse "#{pwd}/app/pipelines"
139
+ loader.collapse "#{pwd}/app/schedulers"
140
+
141
+ loader.setup
142
+ end
143
+ end