ductr 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +14 -0
  4. data/.vscode/settings.json +18 -0
  5. data/COPYING +674 -0
  6. data/COPYING.LESSER +165 -0
  7. data/Gemfile +6 -0
  8. data/Gemfile.lock +121 -0
  9. data/README.md +37 -0
  10. data/Rakefile +37 -0
  11. data/bin/console +15 -0
  12. data/bin/setup +8 -0
  13. data/ductr.gemspec +50 -0
  14. data/exe/ductr +24 -0
  15. data/lib/ductr/adapter.rb +94 -0
  16. data/lib/ductr/cli/default.rb +25 -0
  17. data/lib/ductr/cli/main.rb +60 -0
  18. data/lib/ductr/cli/new_project_generator.rb +72 -0
  19. data/lib/ductr/cli/templates/project/bin_ductr.rb +7 -0
  20. data/lib/ductr/cli/templates/project/config_app.rb +5 -0
  21. data/lib/ductr/cli/templates/project/config_development.yml +8 -0
  22. data/lib/ductr/cli/templates/project/config_environment_development.rb +18 -0
  23. data/lib/ductr/cli/templates/project/gemfile.rb +6 -0
  24. data/lib/ductr/cli/templates/project/rubocop.yml +14 -0
  25. data/lib/ductr/cli/templates/project/tool-versions +1 -0
  26. data/lib/ductr/configuration.rb +145 -0
  27. data/lib/ductr/etl/controls/buffered_destination.rb +65 -0
  28. data/lib/ductr/etl/controls/buffered_transform.rb +76 -0
  29. data/lib/ductr/etl/controls/control.rb +46 -0
  30. data/lib/ductr/etl/controls/destination.rb +28 -0
  31. data/lib/ductr/etl/controls/paginated_source.rb +47 -0
  32. data/lib/ductr/etl/controls/source.rb +21 -0
  33. data/lib/ductr/etl/controls/transform.rb +28 -0
  34. data/lib/ductr/etl/fiber_control.rb +136 -0
  35. data/lib/ductr/etl/fiber_runner.rb +68 -0
  36. data/lib/ductr/etl/kiba_runner.rb +26 -0
  37. data/lib/ductr/etl/parser.rb +115 -0
  38. data/lib/ductr/etl/runner.rb +37 -0
  39. data/lib/ductr/etl_job.rb +161 -0
  40. data/lib/ductr/job.rb +58 -0
  41. data/lib/ductr/job_etl_runner.rb +37 -0
  42. data/lib/ductr/job_status.rb +56 -0
  43. data/lib/ductr/kiba_job.rb +130 -0
  44. data/lib/ductr/log/formatters/color_formatter.rb +48 -0
  45. data/lib/ductr/log/logger.rb +169 -0
  46. data/lib/ductr/log/outputs/file_output.rb +30 -0
  47. data/lib/ductr/log/outputs/standard_output.rb +39 -0
  48. data/lib/ductr/pipeline.rb +133 -0
  49. data/lib/ductr/pipeline_runner.rb +95 -0
  50. data/lib/ductr/pipeline_step.rb +92 -0
  51. data/lib/ductr/registry.rb +55 -0
  52. data/lib/ductr/rufus_trigger.rb +106 -0
  53. data/lib/ductr/scheduler.rb +117 -0
  54. data/lib/ductr/store/job_serializer.rb +59 -0
  55. data/lib/ductr/store/job_store.rb +59 -0
  56. data/lib/ductr/store/pipeline_serializer.rb +106 -0
  57. data/lib/ductr/store/pipeline_store.rb +48 -0
  58. data/lib/ductr/store.rb +81 -0
  59. data/lib/ductr/trigger.rb +49 -0
  60. data/lib/ductr/version.rb +6 -0
  61. data/lib/ductr.rb +143 -0
  62. data/sig/ductr.rbs +1107 -0
  63. metadata +292 -0
data/sig/ductr.rbs ADDED
@@ -0,0 +1,1107 @@
1
+ #
2
+ # The main Ductr module.
3
+ module Ductr
4
+ VERSION: String
5
+
6
+ # The adapter classes registry, all declared adapters are in the registry.
7
+ #
8
+ # _@return_ — The registry instance
9
+ def self.adapter_registry: () -> Registry
10
+
11
+ # The trigger classes registry, all declared triggers are in the registry.
12
+ #
13
+ # _@return_ — The registry instance
14
+ def self.trigger_registry: () -> Registry
15
+
16
+ # The Ductr current environment, "development" by default.
17
+ # You can change it by setting the `DUCTR_ENV` environment variable.
18
+ #
19
+ # _@return_ — The Ductr environment
20
+ def self.env: () -> String
21
+
22
+ # Determines if Ductr is in development mode.
23
+ #
24
+ # _@return_ — True if DUCTR_ENV is set to "development" or nil
25
+ def self.development?: () -> bool
26
+
27
+ # Determines if Ductr is in production mode.
28
+ #
29
+ # _@return_ — True if DUCTR_ENV is set to "production"
30
+ def self.production?: () -> bool
31
+
32
+ # The configure block allows to configure Ductr internals.
33
+ # You must calls this method one and only one time to use the framework.
34
+ def self.configure: () ?{ (Configuration config) -> void } -> void
35
+
36
+ # The Ductr main logger instance.
37
+ #
38
+ # _@return_ — The logger instance
39
+ def self.logger: () -> Log::Logger
40
+
41
+ # sord warn - ActiveSupport::Cache::Store wasn't able to be resolved to a constant in this project
42
+ # The Ductr store, used to share information across different instances.
43
+ #
44
+ # _@return_ — The store instance
45
+ def self.store: () -> ActiveSupport::Cache::Store
46
+
47
+ # Contains all the Ductr configuration.
48
+ #
49
+ # _@return_ — The configuration instance
50
+ def self.config: () -> Configuration
51
+
52
+ class AdapterNotFoundError < StandardError
53
+ end
54
+
55
+ class ControlNotFoundError < StandardError
56
+ end
57
+
58
+ class InconsistentPaginationError < StandardError
59
+ end
60
+
61
+ #
62
+ # The base class for any job, you can use it directly if you don't need an ETL job.
63
+ class Job < ActiveJob::Base
64
+ include Ductr::JobStatus
65
+ extend Annotable
66
+ extend Forwardable
67
+
68
+ # sord omit - no YARD type given for "*_", using untyped
69
+ # The active job's perform method. DO NOT override it, implement the #run method instead.
70
+ def perform: (*untyped _) -> void
71
+
72
+ # The configured adapter instances.
73
+ #
74
+ # _@param_ `name` — The adapter name
75
+ #
76
+ # _@return_ — The adapter corresponding to the given name
77
+ def adapter: (Symbol name) -> Adapter
78
+
79
+ # The job's logger instance.
80
+ #
81
+ # _@return_ — The logger instance
82
+ def logger: () -> Ductr::Log::Logger
83
+
84
+ # The entry point of jobs.
85
+ def run: () -> void
86
+
87
+ # Writes the job's status into the Ductr's store.
88
+ #
89
+ # _@param_ `status` — The status of the job
90
+ def status=: (Symbol status) -> void
91
+
92
+ # Determines whether the job has a `completed` or `failed` status.
93
+ #
94
+ # _@return_ — True when the status is `completed` or `failed`
95
+ def stopped?: () -> bool
96
+
97
+ # _@return_ — The occurred error if any
98
+ attr_reader error: Exception
99
+
100
+ # _@return_ — The job's status, one of `:queued`, `:working`, `:completed` and `:failed`
101
+ attr_reader status: Symbol
102
+ end
103
+
104
+ #
105
+ # Store interaction helpers for internal usage.
106
+ module Store
107
+ extend Ductr::Store::JobStore
108
+ extend Ductr::Store::PipelineStore
109
+ EXPIRATION_INTERVAL: Integer
110
+
111
+ # Get all known job instances for the given registry_key and job's key_prefix.
112
+ #
113
+ # _@param_ `registry_key` — The registry key in which job keys will be read
114
+ #
115
+ # _@param_ `key_prefix` — The cache key prefix for the registry's job keys
116
+ #
117
+ # _@return_ — The job instances
118
+ def self.all: (String registry_key, String key_prefix) -> ::Array[Job]
119
+
120
+ # Read all given jobs in the given key_prefix.
121
+ #
122
+ # _@param_ `key_prefix` — The cache key prefix for the job_id
123
+ #
124
+ # _@param_ `*jobs` — The jobs to read
125
+ #
126
+ # _@return_ — The read jobs
127
+ def self.read: (String key_prefix, *::Array[Job] jobs) -> ::Array[Job]
128
+
129
+ # sord omit - no YARD type given for "key_prefix", using untyped
130
+ # Update the given job in the given key_prefix.
131
+ #
132
+ # _@param_ `job` — The job to update in the store
133
+ def self.write: (untyped key_prefix, Job job) -> void
134
+
135
+ # sord omit - no YARD type given for "registry_key", using untyped
136
+ # Add the given job to the store's job registry. This method is NOT thread-safe.
137
+ #
138
+ # _@param_ `job` — The job to register
139
+ def self.register: (untyped registry_key, Job job) -> void
140
+
141
+ # Determines whether all tracked jobs have either a completed or failed status.
142
+ #
143
+ # _@return_ — `true` when all jobs are done
144
+ def self.all_done?: () -> bool
145
+
146
+ # Get all known job instances.
147
+ #
148
+ # _@return_ — The job instances
149
+ def self.all_jobs: () -> ::Array[Job]
150
+
151
+ # Read all given jobs.
152
+ #
153
+ # _@param_ `*jobs` — The jobs to read
154
+ #
155
+ # _@return_ — The read jobs
156
+ def self.read_jobs: (*::Array[Job] jobs) -> ::Array[Job]
157
+
158
+ # Update the given job.
159
+ #
160
+ # _@param_ `job` — The job to update in the store
161
+ def self.write_job: (Job job) -> void
162
+
163
+ # Add the given job to the store's job registry. This method is NOT thread-safe.
164
+ #
165
+ # _@param_ `job` — The job to register
166
+ def self.register_job: (Job job) -> void
167
+
168
+ # Convert the given job into a `SerializedJob` struct.
169
+ #
170
+ # _@param_ `job` — The job to serialize
171
+ #
172
+ # _@return_ — The job converted into struct
173
+ def self.serialize_job: (Job job) -> SerializedJob
174
+
175
+ # sord infer - SerializedPipeline was resolved to Ductr::Store::PipelineSerializer::SerializedPipeline
176
+ # Get all known pipeline instances.
177
+ #
178
+ # _@return_ — The pipeline instances
179
+ def self.all_pipelines: () -> ::Array[Ductr::Store::PipelineSerializer::SerializedPipeline]
180
+
181
+ # Update the given pipeline.
182
+ #
183
+ # _@param_ `pipeline` — The pipeline to update in the store
184
+ def self.write_pipeline: (Pipeline pipeline) -> void
185
+
186
+ # Add the given pipeline to the store's pipeline registry. This method is NOT thread-safe.
187
+ #
188
+ # _@param_ `pipeline` — The job to register
189
+ def self.register_pipeline: (Pipeline pipeline) -> void
190
+
191
+ # Convert the given pipeline and its steps into
192
+ # `SerializedPipeline` and `SerializedPipelineStep` structs.
193
+ #
194
+ # _@param_ `pipeline` — The pipeline to serialize
195
+ #
196
+ # _@return_ — The pipeline converted into struct
197
+ def self.serialize_pipeline: (Pipeline pipeline) -> SerializedPipeline
198
+
199
+ #
200
+ # Job's level store interactions.
201
+ module JobStore
202
+ include Ductr::Store::JobSerializer
203
+ JOB_KEY_PREFIX: String
204
+ JOB_REGISTRY_KEY: String
205
+
206
+ # Get all known job instances.
207
+ #
208
+ # _@return_ — The job instances
209
+ def all_jobs: () -> ::Array[Job]
210
+
211
+ # Read all given jobs.
212
+ #
213
+ # _@param_ `*jobs` — The jobs to read
214
+ #
215
+ # _@return_ — The read jobs
216
+ def read_jobs: (*::Array[Job] jobs) -> ::Array[Job]
217
+
218
+ # Update the given job.
219
+ #
220
+ # _@param_ `job` — The job to update in the store
221
+ def write_job: (Job job) -> void
222
+
223
+ # Add the given job to the store's job registry. This method is NOT thread-safe.
224
+ #
225
+ # _@param_ `job` — The job to register
226
+ def register_job: (Job job) -> void
227
+
228
+ # Convert the given job into a `SerializedJob` struct.
229
+ #
230
+ # _@param_ `job` — The job to serialize
231
+ #
232
+ # _@return_ — The job converted into struct
233
+ def serialize_job: (Job job) -> SerializedJob
234
+ end
235
+
236
+ #
237
+ # Convert jobs into active job serializable structs.
238
+ module JobSerializer
239
+ # Convert the given job into a `SerializedJob` struct.
240
+ #
241
+ # _@param_ `job` — The job to serialize
242
+ #
243
+ # _@return_ — The job converted into struct
244
+ def serialize_job: (Job job) -> SerializedJob
245
+
246
+ #
247
+ # @!parse
248
+ # #
249
+ # # The job representation as a struct.
250
+ # #
251
+ # # @!attribute [r] job_id
252
+ # # @return [String] The active job's job id
253
+ # #
254
+ # # @!attribute [r] status
255
+ # # @return [Symbol] The job's status
256
+ # #
257
+ # # @!attribute [r] error
258
+ # # @return [Exception, nil] The job's error if any
259
+ # #
260
+ # class SerializedJob < Struct
261
+ # #
262
+ # # @param [String] job_id Active job's job id
263
+ # # @param [Symbol] status Job's status
264
+ # # @param [Exception, nil] error Job's error
265
+ # #
266
+ # def initialize(job_id, status, error)
267
+ # @job_id = job_id
268
+ # @status = status
269
+ # @error = error
270
+ # end
271
+ # end
272
+ class SerializedJob < Struct
273
+ # _@param_ `job_id` — Active job's job id
274
+ #
275
+ # _@param_ `status` — Job's status
276
+ #
277
+ # _@param_ `error` — Job's error
278
+ def initialize: (String job_id, Symbol status, Exception? error) -> void
279
+
280
+ # Determines whether the job has a `completed` or `failed` status.
281
+ #
282
+ # _@return_ — True when the status is `completed` or `failed`
283
+ def stopped?: () -> bool
284
+
285
+ # _@return_ — The active job's job id
286
+ attr_reader job_id: String
287
+
288
+ # _@return_ — The job's status
289
+ attr_reader status: Symbol
290
+
291
+ # _@return_ — The job's error if any
292
+ attr_reader error: Exception?
293
+ end
294
+ end
295
+
296
+ #
297
+ # Pipeline's level store interactions.
298
+ module PipelineStore
299
+ include Ductr::Store::PipelineSerializer
300
+ PIPELINE_KEY_PREFIX: String
301
+ PIPELINE_REGISTRY_KEY: String
302
+
303
+ # sord infer - SerializedPipeline was resolved to Ductr::Store::PipelineSerializer::SerializedPipeline
304
+ # Get all known pipeline instances.
305
+ #
306
+ # _@return_ — The pipeline instances
307
+ def all_pipelines: () -> ::Array[Ductr::Store::PipelineSerializer::SerializedPipeline]
308
+
309
+ # Update the given pipeline.
310
+ #
311
+ # _@param_ `pipeline` — The pipeline to update in the store
312
+ def write_pipeline: (Pipeline pipeline) -> void
313
+
314
+ # Add the given pipeline to the store's pipeline registry. This method is NOT thread-safe.
315
+ #
316
+ # _@param_ `pipeline` — The job to register
317
+ def register_pipeline: (Pipeline pipeline) -> void
318
+
319
+ # Convert the given pipeline and its steps into
320
+ # `SerializedPipeline` and `SerializedPipelineStep` structs.
321
+ #
322
+ # _@param_ `pipeline` — The pipeline to serialize
323
+ #
324
+ # _@return_ — The pipeline converted into struct
325
+ def serialize_pipeline: (Pipeline pipeline) -> SerializedPipeline
326
+
327
+ # Convert the given job into a `SerializedJob` struct.
328
+ #
329
+ # _@param_ `job` — The job to serialize
330
+ #
331
+ # _@return_ — The job converted into struct
332
+ def serialize_job: (Job job) -> SerializedJob
333
+ end
334
+
335
+ #
336
+ # Convert pipelines and steps into active job serializable structs.
337
+ module PipelineSerializer
338
+ include Ductr::Store::JobSerializer
339
+
340
+ # Convert the given pipeline and its steps into
341
+ # `SerializedPipeline` and `SerializedPipelineStep` structs.
342
+ #
343
+ # _@param_ `pipeline` — The pipeline to serialize
344
+ #
345
+ # _@return_ — The pipeline converted into struct
346
+ def serialize_pipeline: (Pipeline pipeline) -> SerializedPipeline
347
+
348
+ # Convert the given job into a `SerializedJob` struct.
349
+ #
350
+ # _@param_ `job` — The job to serialize
351
+ #
352
+ # _@return_ — The job converted into struct
353
+ def serialize_job: (Job job) -> SerializedJob
354
+
355
+ #
356
+ # @!parse
357
+ # #
358
+ # # The pipeline representation as a struct.
359
+ # #
360
+ # # @!attribute [r] job_id
361
+ # # @return [String] The active job's job id
362
+ # #
363
+ # # @!attribute [r] status
364
+ # # @return [Symbol] The pipeline job status
365
+ # #
366
+ # # @!attribute [r] error
367
+ # # @return [Exception, nil] The pipeline job error if any
368
+ # #
369
+ # # @!attribute [r] steps
370
+ # # @return [Array<SerializedPipelineStep>] The pipeline steps as struct
371
+ # #
372
+ # class SerializedPipeline < Struct
373
+ # #
374
+ # # @param [String] job_id Pipeline job id
375
+ # # @param [Symbol] status Pipeline status
376
+ # # @param [Exception, nil] error Pipeline error
377
+ # # @param [Array<SerializedPipelineStep>] steps Pipeline steps as struct
378
+ # #
379
+ # def initialize(job_id, status, error, steps)
380
+ # @job_id = job_id
381
+ # @status = status
382
+ # @error = error
383
+ # @steps = steps
384
+ # end
385
+ # end
386
+ class SerializedPipeline < Struct
387
+ # _@param_ `job_id` — Pipeline job id
388
+ #
389
+ # _@param_ `status` — Pipeline status
390
+ #
391
+ # _@param_ `error` — Pipeline error
392
+ #
393
+ # _@param_ `steps` — Pipeline steps as struct
394
+ def initialize: (
395
+ String job_id,
396
+ Symbol status,
397
+ Exception? error,
398
+ ::Array[SerializedPipelineStep] steps
399
+ ) -> void
400
+
401
+ # Determines whether the pipeline has a `completed` or `failed` status.
402
+ #
403
+ # _@return_ — True when the status is `completed` or `failed`
404
+ def stopped?: () -> bool
405
+
406
+ # _@return_ — The active job's job id
407
+ attr_reader job_id: String
408
+
409
+ # _@return_ — The pipeline job status
410
+ attr_reader status: Symbol
411
+
412
+ # _@return_ — The pipeline job error if any
413
+ attr_reader error: Exception?
414
+
415
+ # _@return_ — The pipeline steps as struct
416
+ attr_reader steps: ::Array[SerializedPipelineStep]
417
+ end
418
+
419
+ #
420
+ # @!parse
421
+ # #
422
+ # # The pipeline step representation as a struct.
423
+ # #
424
+ # # @!attribute [r] jobs
425
+ # # @return [Array<Job>] The step's jobs
426
+ # #
427
+ # # @!attribute [r] done
428
+ # # @return [Boolean] The step's fiber state
429
+ # #
430
+ # class SerializedPipelineStep < Struct
431
+ # #
432
+ # # @param [Array<Job>] jobs The step's jobs
433
+ # # @param [Boolean] done The step's fiber state
434
+ # #
435
+ # def initialize(jobs, done)
436
+ # @jobs = jobs
437
+ # @done = done
438
+ # end
439
+ # end
440
+ class SerializedPipelineStep < Struct
441
+ # _@param_ `jobs` — The step's jobs
442
+ #
443
+ # _@param_ `done` — The step's fiber state
444
+ def initialize: (::Array[Job] jobs, bool done) -> void
445
+
446
+ # Check if the step is done.
447
+ #
448
+ # _@return_ — True if the step is done
449
+ def done?: () -> bool
450
+
451
+ # _@return_ — The step's jobs
452
+ attr_reader jobs: ::Array[Job]
453
+
454
+ # _@return_ — The step's fiber state
455
+ attr_reader done: bool
456
+ end
457
+ end
458
+ end
459
+
460
+ #
461
+ # Base adapter class, your adapter should inherit of this class.
462
+ class Adapter
463
+ # All the sources declared for this adapter goes here.
464
+ #
465
+ # _@return_ — The registry instance
466
+ def self.source_registry: () -> Registry
467
+
468
+ # All the lookups declared for this adapter goes here.
469
+ #
470
+ # _@return_ — The registry instance
471
+ def self.lookup_registry: () -> Registry
472
+
473
+ # All the destinations declared for this adapter goes here.
474
+ #
475
+ # _@return_ — The registry instance
476
+ def self.destination_registry: () -> Registry
477
+
478
+ # All the triggers declared for this adapter goes here.
479
+ #
480
+ # _@return_ — The registry instance
481
+ def self.trigger_registry: () -> Registry
482
+
483
+ # sord warn - "Symbol: Object" does not appear to be a type
484
+ # sord warn - Invalid hash, must have exactly two types: "Hash<Symbol: Object>".
485
+ # Creates a new adapter instance.
486
+ #
487
+ # _@param_ `name` — The adapter instance name, mandatory, must be unique
488
+ #
489
+ # _@param_ `**config` — The adapter configuration hash
490
+ def initialize: (Symbol name, **SORD_ERROR_SORD_ERROR_SymbolObject config) -> void
491
+
492
+ # Allow use of adapter with block syntax, automatically closes on block exit.
493
+ def open: () -> void
494
+
495
+ # Opens the adapter before using it e.g. open connection, authenticate to http endpoint, open file...
496
+ # This method may return something, as a connection object.
497
+ def open!: () -> void
498
+
499
+ # Closes the adapter when finished e.g. close connection, drop http session, close file...
500
+ def close!: () -> void
501
+
502
+ # _@return_ — the adapter instance name
503
+ attr_reader name: Symbol
504
+
505
+ # sord warn - "Symbol: Object" does not appear to be a type
506
+ # sord warn - Invalid hash, must have exactly two types: "Hash<Symbol: Object>".
507
+ # _@return_ — the adapter configuration hash
508
+ attr_reader config: SORD_ERROR_SORD_ERROR_SymbolObject
509
+ end
510
+
511
+ #
512
+ # Base class for ETL job using the experimental fiber runner.
513
+ # Usage example:
514
+ #
515
+ # class MyETLJob < Ductr::ETLJob
516
+ # source :first_db, :basic
517
+ # send_to :the_transform, :the_other_transform
518
+ # def the_source(db)
519
+ # # ...
520
+ # end
521
+ #
522
+ # transform
523
+ # send_to :the_destination
524
+ # def the_transform(row)
525
+ # # ...
526
+ # end
527
+ #
528
+ # destination :first_db, :basic
529
+ # def the_destination(row, db)
530
+ # # ...
531
+ # end
532
+ #
533
+ # transform
534
+ # send_to :the_other_destination
535
+ # def the_other_transform(row)
536
+ # # ...
537
+ # end
538
+ #
539
+ # destination :second_db, :basic
540
+ # def the_other_destination(row, db)
541
+ # # ...
542
+ # end
543
+ # end
544
+ class ETLJob < Ductr::Job
545
+ include Ductr::JobETLRunner
546
+ include Ductr::ETL::Parser
547
+ ETL_RUNNER_CLASS: Class
548
+
549
+ # sord warn - "Symbol: Object" does not appear to be a type
550
+ # sord warn - Invalid hash, must have exactly two types: "Hash<Symbol: Object>".
551
+ # Annotation to define a source method
552
+ #
553
+ # _@param_ `adapter_name` — The adapter the source is running on
554
+ #
555
+ # _@param_ `source_type` — The type of source to run
556
+ #
557
+ # _@param_ `**source_options` — The options to pass to the source
558
+ #
559
+ # Source with Sequel SQLite adapter
560
+ # ```ruby
561
+ # source :my_adapter, :paginated, page_size: 42
562
+ # def my_source(db, offset, limit)
563
+ # db[:items].offset(offset).limit(limit)
564
+ # end
565
+ # ```
566
+ #
567
+ # _@see_ `The` — chosen adapter documentation for further information on sources usage.
568
+ def self.source: (Symbol adapter_name, Symbol source_type, **SORD_ERROR_SORD_ERROR_SymbolObject source_options) -> void
569
+
570
+ # sord warn - "Symbol: Object" does not appear to be a type
571
+ # sord warn - Invalid hash, must have exactly two types: "Hash<Symbol: Object>".
572
+ # Annotation to define a transform method
573
+ #
574
+ # _@param_ `transform_class` — The class the transform is running on
575
+ #
576
+ # _@param_ `**transform_options` — The options to pass to the transform
577
+ #
578
+ # Transform without params
579
+ # ```ruby
580
+ # transform
581
+ # def rename_keys(row)
582
+ # row[:new_name] = row.delete[:old_name]
583
+ # row[:new_email] = row.delete[:old_email]
584
+ # end
585
+ # ```
586
+ #
587
+ # Transform with params
588
+ # ```ruby
589
+ # class RenameTransform < Ductr::ETL::Transform
590
+ # def process(row)
591
+ # call_method.each do |actual_name, new_name|
592
+ # new_key = "#{options[:prefix]}#{new_name}".to_sym
593
+ #
594
+ # row[new_key] = row.delete(actual_name)
595
+ # end
596
+ # end
597
+ # end
598
+ #
599
+ # transform RenameTransform, prefix: "some_"
600
+ # def rename
601
+ # { old_name: :new_name, old_email: :new_email }
602
+ # end
603
+ # ```
604
+ def self.transform: (Class? transform_class, **SORD_ERROR_SORD_ERROR_SymbolObject transform_options) -> void
605
+
606
+ # sord warn - "Symbol: Object" does not appear to be a type
607
+ # sord warn - Invalid hash, must have exactly two types: "Hash<Symbol: Object>".
608
+ # Annotation to define a lookup method
609
+ #
610
+ # _@param_ `adapter_name` — The adapter the lookup is running on
611
+ #
612
+ # _@param_ `lookup_type` — The type of lookup to run
613
+ #
614
+ # _@param_ `**lookup_options` — The options to pass to the lookup
615
+ #
616
+ # Lookup with Sequel SQLite adapter
617
+ # ```ruby
618
+ # lookup :my_other_adapter, :match, merge: [:id, :item], buffer_size: 4
619
+ # def joining_different_adapters(db, ids)
620
+ # db[:items_bis].select(:id, :item, :name).where(item: ids)
621
+ # end
622
+ # ```
623
+ #
624
+ # _@see_ `The` — chosen adapter documentation for further information on lookups usage.
625
+ def self.lookup: (Symbol adapter_name, Symbol lookup_type, **SORD_ERROR_SORD_ERROR_SymbolObject lookup_options) -> void
626
+
627
+ # sord warn - "Symbol: Object" does not appear to be a type
628
+ # sord warn - Invalid hash, must have exactly two types: "Hash<Symbol: Object>".
629
+ # Annotation to define a destination method
630
+ #
631
+ # _@param_ `adapter_name` — The adapter the destination is running on
632
+ #
633
+ # _@param_ `destination_type` — The type of destination to run
634
+ #
635
+ # _@param_ `**destination_options` — The options to pass to the destination
636
+ #
637
+ # Destination with Sequel SQLite adapter
638
+ # ```ruby
639
+ # destination :my_other_adapter, :basic
640
+ # def my_destination(row, db)
641
+ # db[:new_items].insert(name: row[:name], new_name: row[:new_name])
642
+ # end
643
+ # ```
644
+ #
645
+ # _@see_ `The` — chosen adapter documentation for further information on destinations usage.
646
+ def self.destination: (Symbol adapter_name, Symbol destination_type, **SORD_ERROR_SORD_ERROR_SymbolObject destination_options) -> void
647
+
648
+ # Annotation to define which methods will follow the current one
649
+ #
650
+ # _@param_ `*methods` — The names of the following methods
651
+ #
652
+ # Source with Sequel SQLite adapter sending rows to two transforms
653
+ # ```ruby
654
+ # source :my_adapter, :paginated, page_size: 42
655
+ # send_to :my_first_transform, :my_second_transform
656
+ # def my_source(db, offset, limit)
657
+ # db[:items].offset(offset).limit(limit)
658
+ # end
659
+ #
660
+ # transform
661
+ # def my_first_transform(row)
662
+ # # ...
663
+ # end
664
+ #
665
+ # transform
666
+ # def my_second_transform(row)
667
+ # # ...
668
+ # end
669
+ # ```
670
+ def self.send_to: (*::Array[Symbol] methods) -> void
671
+
672
+ # Handles sources, transforms and destinations controls.
673
+ # Handles send_to directives, used to do the plumbing between controls.
674
+ # Used for both kiba and fiber runners initialization.
675
+ #
676
+ # _@return_ — The job's controls
677
+ def parse_annotations: () -> ::Array[(Source | Transform | Destination | ::Hash[Symbol, ::Array[Symbol]])]
678
+
679
+ # Currently used adapters set.
680
+ #
681
+ # _@return_ — The current adapters
682
+ def adapters: () -> ::Set[untyped]
683
+
684
+ # sord warn - method is probably not a type, but using anyway
685
+ # sord warn - method wasn't able to be resolved to a constant in this project
686
+ # Finds the method(s) associated to the given annotation names in the job class.
687
+ #
688
+ # _@param_ `*annotation_names` — The annotation names of the searched methods
689
+ #
690
+ # _@return_ — Returns mapped array containing the block's returned value
691
+ def find_method: (*::Array[Symbol] annotation_names) ?{ (method A) -> void } -> ::Array[untyped]
692
+
693
+ # Initializes adapter controls for the given type.
694
+ #
695
+ # _@param_ `control_type` — The adapter control type, one of :source or :destination
696
+ #
697
+ # _@return_ — The initialized adapter controls
698
+ def init_adapter_controls: (Symbol control_type) -> ::Array[(Source | Destination)]
699
+
700
+ # Initializes transform controls for the given types.
701
+ #
702
+ # _@param_ `*control_types` — The transform control types, :transform and/or :lookup
703
+ #
704
+ # _@return_ — The initialized transform controls
705
+ def init_transform_controls: (*::Array[Symbol] control_types) -> ::Array[Transform]
706
+
707
+ # sord warn - Annotable::Method wasn't able to be resolved to a constant in this project
708
+ # Initializes an adapter control (source, lookup or destination) based on the given annotated method.
709
+ #
710
+ # _@param_ `annotated_method` — The control's method
711
+ #
712
+ # _@return_ — The adapter control instance
713
+ def adapter_control: (Annotable::Method annotated_method) -> Control
714
+
715
+ # sord warn - Annotable::Method wasn't able to be resolved to a constant in this project
716
+ # Initializes a transform control.
717
+ #
718
+ # _@param_ `annotated_method` — The transform's method
719
+ #
720
+ # _@return_ — The transform control instance
721
+ def transform_control: (Annotable::Method annotated_method) -> Transform
722
+
723
+ # Parse job's annotations and create the runner instance.
724
+ def initialize: () -> void
725
+
726
+ # Opens adapters, executes the runner and then closes back adapters.
727
+ def run: () -> void
728
+ end
729
+
730
+ #
731
+ # The base class for any trigger, can be initialized by passing it its adapter name if any.
732
+ # A trigger must implement the #add method which is called for each trigger declaration.
733
+ # Depending on what your trigger do, you may have to implement the #start and #stop methods.
734
+ # #start is called when the scheduler relying on the trigger is started. #stop does the opposite:
735
+ # it is called when the scheduler relying on the trigger is stopped.
736
+ class Trigger
737
+ # sord warn - Nil wasn't able to be resolved to a constant in this project
738
+ # Creates a new trigger instance, called by the scheduler.
739
+ #
740
+ # _@param_ `adapter` — The trigger's adapter, if any
741
+ def initialize: (?(Adapter | Nil)? adapter) -> void
742
+
743
+ # sord warn - "Symbol: Object" does not appear to be a type
744
+ # sord warn - Invalid hash, must have exactly two types: "Hash<Symbol: Object>".
745
+ # Adds a new trigger, called by a scheduler when a trigger is declared.
746
+ #
747
+ # _@param_ `_method` — The scheduler method to be called by the trigger
748
+ #
749
+ # _@param_ `_options` — options The options of the trigger declaration
750
+ def add: (Method _method, SORD_ERROR_SORD_ERROR_SymbolObject _options) -> void
751
+
752
+ # Called when the scheduler relying on the trigger is started.
753
+ def start: () -> void
754
+
755
+ # Called when the scheduler relying on the trigger is stopped.
756
+ def stop: () -> void
757
+
758
+ # sord omit - no YARD type given for :adapter, using untyped
759
+ # Returns the value of attribute adapter.
760
+ attr_reader adapter: untyped
761
+ end
762
+
763
+ module CLI
764
+ #
765
+ # The main CLI is started when used inside a ductr project folder.
766
+ # It exposes scheduling and monitoring tasks.
767
+ class Main < Thor
768
+ # sord omit - no YARD type given for "job_name", using untyped
769
+ # sord omit - no YARD return type given, using untyped
770
+ def perform: (untyped job_name) -> untyped
771
+
772
+ # sord omit - no YARD type given for "*scheduler_names", using untyped
773
+ # sord omit - no YARD return type given, using untyped
774
+ def schedule: (*untyped scheduler_names) -> untyped
775
+
776
+ # Keeps the thread alive until Ctrl-C is pressed.
777
+ def sleep_until_interrupt: () -> void
778
+ end
779
+
780
+ #
781
+ # The default CLI is started when no project folder was found.
782
+ # It expose project and adapter generation tasks.
783
+ class Default < Thor
784
+ # Generates a new project
785
+ #
786
+ # _@param_ `name` — The project's name
787
+ def new: (?String? name) -> void
788
+ end
789
+
790
+ #
791
+ # Thor generator to create a new project
792
+ class NewProjectGenerator < Thor::Group
793
+ include Thor::Actions
794
+
795
+ # The templates source used to create a new project
796
+ #
797
+ # _@return_ — the templates source absolute path
798
+ def self.source_root: () -> String
799
+
800
+ # Doing some setup before generating file,
801
+ # creates the project directory and sets it as destination for the generator
802
+ def init: () -> void
803
+
804
+ # Creates files in the project's root
805
+ def gen_root: () -> void
806
+
807
+ # Creates the bin file for the project
808
+ def gen_bin: () -> void
809
+
810
+ # Creates files in the `config` folder
811
+ def gen_config: () -> void
812
+ end
813
+ end
814
+
815
+ #
816
+ # Base class for ETL job using kiba's streaming runner.
817
+ # Example using the SQLite adapter:
818
+ #
819
+ # class MyKibaJob < Ductr::KibaJob
820
+ # source :some_adapter, :paginated, page_size: 4
821
+ # def select_some_stuff(db, offset, limit)
822
+ # db[:items].offset(offset).limit(limit)
823
+ # end
824
+ #
825
+ # lookup :some_adapter, :match, merge: [:id, :item], buffer_size: 4
826
+ # def merge_with_stuff(db, ids)
827
+ # db[:items_bis].select(:id, Sequel.as(:name, :name_bis), :item).where(item: ids)
828
+ # end
829
+ #
830
+ # transform
831
+ # def generate_more_stuff(row)
832
+ # { name: "#{row[:name]}_#{row[:name_bis]}" }
833
+ # end
834
+ #
835
+ # destination :some_other_adapter, :basic
836
+ # def my_destination(row, db)
837
+ # logger.trace("Hello destination: #{row}")
838
+ # db[:new_items].insert(name: row[:name])
839
+ # end
840
+ # end
841
+ #
842
+ # @see The chosen adapter documentation for further information on controls usage.
843
+ class KibaJob < Ductr::Job
844
+ include Ductr::JobETLRunner
845
+ include Ductr::ETL::Parser
846
+ ETL_RUNNER_CLASS: Class
847
+
848
+ # sord warn - "Symbol: Object" does not appear to be a type
849
+ # sord warn - Invalid hash, must have exactly two types: "Hash<Symbol: Object>".
850
+ # Annotation to define a source method
851
+ #
852
+ # _@param_ `adapter_name` — The adapter the source is running on
853
+ #
854
+ # _@param_ `source_type` — The type of source to run
855
+ #
856
+ # _@param_ `**source_options` — The options to pass to the source
857
+ #
858
+ # Source with Sequel SQLite adapter
859
+ # ```ruby
860
+ # source :my_adapter, :paginated, page_size: 42
861
+ # def my_source(db, offset, limit)
862
+ # db[:items].offset(offset).limit(limit)
863
+ # end
864
+ # ```
865
+ #
866
+ # _@see_ `The` — chosen adapter documentation for further information on sources usage.
867
+ def self.source: (Symbol adapter_name, Symbol source_type, **SORD_ERROR_SORD_ERROR_SymbolObject source_options) -> void
868
+
869
+ # sord warn - "Symbol: Object" does not appear to be a type
870
+ # sord warn - Invalid hash, must have exactly two types: "Hash<Symbol: Object>".
871
+ # Annotation to define a transform method
872
+ #
873
+ # _@param_ `transform_class` — The class the transform is running on
874
+ #
875
+ # _@param_ `**transform_options` — The options to pass to the transform
876
+ #
877
+ # Transform without params
878
+ # ```ruby
879
+ # transform
880
+ # def rename_keys(row)
881
+ # row[:new_name] = row.delete[:old_name]
882
+ # row[:new_email] = row.delete[:old_email]
883
+ # end
884
+ # ```
885
+ #
886
+ # Transform with params
887
+ # ```ruby
888
+ # class RenameTransform < Ductr::ETL::Transform
889
+ # def process(row)
890
+ # call_method.each do |actual_name, new_name|
891
+ # new_key = "#{options[:prefix]}#{new_name}".to_sym
892
+ #
893
+ # row[new_key] = row.delete(actual_name)
894
+ # end
895
+ # end
896
+ # end
897
+ #
898
+ # transform RenameTransform, prefix: "some_"
899
+ # def rename
900
+ # { old_name: :new_name, old_email: :new_email }
901
+ # end
902
+ # ```
903
+ def self.transform: (Class? transform_class, **SORD_ERROR_SORD_ERROR_SymbolObject transform_options) -> void
904
+
905
+ # sord warn - "Symbol: Object" does not appear to be a type
906
+ # sord warn - Invalid hash, must have exactly two types: "Hash<Symbol: Object>".
907
+ # Annotation to define a lookup method
908
+ #
909
+ # _@param_ `adapter_name` — The adapter the lookup is running on
910
+ #
911
+ # _@param_ `lookup_type` — The type of lookup to run
912
+ #
913
+ # _@param_ `**lookup_options` — The options to pass to the lookup
914
+ #
915
+ # Lookup with Sequel SQLite adapter
916
+ # ```ruby
917
+ # lookup :my_other_adapter, :match, merge: [:id, :item], buffer_size: 4
918
+ # def joining_different_adapters(db, ids)
919
+ # db[:items_bis].select(:id, :item, :name).where(item: ids)
920
+ # end
921
+ # ```
922
+ #
923
+ # _@see_ `The` — chosen adapter documentation for further information on lookups usage.
924
+ def self.lookup: (Symbol adapter_name, Symbol lookup_type, **SORD_ERROR_SORD_ERROR_SymbolObject lookup_options) -> void
925
+
926
+ # sord warn - "Symbol: Object" does not appear to be a type
927
+ # sord warn - Invalid hash, must have exactly two types: "Hash<Symbol: Object>".
928
+ # Annotation to define a destination method
929
+ #
930
+ # _@param_ `adapter_name` — The adapter the destination is running on
931
+ #
932
+ # _@param_ `destination_type` — The type of destination to run
933
+ #
934
+ # _@param_ `**destination_options` — The options to pass to the destination
935
+ #
936
+ # Destination with Sequel SQLite adapter
937
+ # ```ruby
938
+ # destination :my_other_adapter, :basic
939
+ # def my_destination(row, db)
940
+ # db[:new_items].insert(name: row[:name], new_name: row[:new_name])
941
+ # end
942
+ # ```
943
+ #
944
+ # _@see_ `The` — chosen adapter documentation for further information on destinations usage.
945
+ def self.destination: (Symbol adapter_name, Symbol destination_type, **SORD_ERROR_SORD_ERROR_SymbolObject destination_options) -> void
946
+
947
+ # Handles sources, transforms and destinations controls.
948
+ # Handles send_to directives, used to do the plumbing between controls.
949
+ # Used for both kiba and fiber runners initialization.
950
+ #
951
+ # _@return_ — The job's controls
952
+ def parse_annotations: () -> ::Array[(Source | Transform | Destination | ::Hash[Symbol, ::Array[Symbol]])]
953
+
954
+ # Currently used adapters set.
955
+ #
956
+ # _@return_ — The current adapters
957
+ def adapters: () -> ::Set[untyped]
958
+
959
+ # sord warn - method is probably not a type, but using anyway
960
+ # sord warn - method wasn't able to be resolved to a constant in this project
961
+ # Finds the method(s) associated to the given annotation names in the job class.
962
+ #
963
+ # _@param_ `*annotation_names` — The annotation names of the searched methods
964
+ #
965
+ # _@return_ — Returns mapped array containing the block's returned value
966
+ def find_method: (*::Array[Symbol] annotation_names) ?{ (method A) -> void } -> ::Array[untyped]
967
+
968
+ # Initializes adapter controls for the given type.
969
+ #
970
+ # _@param_ `control_type` — The adapter control type, one of :source or :destination
971
+ #
972
+ # _@return_ — The initialized adapter controls
973
+ def init_adapter_controls: (Symbol control_type) -> ::Array[(Source | Destination)]
974
+
975
+ # Initializes transform controls for the given types.
976
+ #
977
+ # _@param_ `*control_types` — The transform control types, :transform and/or :lookup
978
+ #
979
+ # _@return_ — The initialized transform controls
980
+ def init_transform_controls: (*::Array[Symbol] control_types) -> ::Array[Transform]
981
+
982
+ # sord warn - Annotable::Method wasn't able to be resolved to a constant in this project
983
+ # Initializes an adapter control (source, lookup or destination) based on the given annotated method.
984
+ #
985
+ # _@param_ `annotated_method` — The control's method
986
+ #
987
+ # _@return_ — The adapter control instance
988
+ def adapter_control: (Annotable::Method annotated_method) -> Control
989
+
990
+ # sord warn - Annotable::Method wasn't able to be resolved to a constant in this project
991
+ # Initializes a transform control.
992
+ #
993
+ # _@param_ `annotated_method` — The transform's method
994
+ #
995
+ # _@return_ — The transform control instance
996
+ def transform_control: (Annotable::Method annotated_method) -> Transform
997
+
998
+ # Parse job's annotations and create the runner instance.
999
+ def initialize: () -> void
1000
+
1001
+ # Opens adapters, executes the runner and then closes back adapters.
1002
+ def run: () -> void
1003
+ end
1004
+
1005
+ #
1006
+ # Pipelines allows to easily declare rich data pipelines.
1007
+ #
1008
+ # By using the `after` annotation, you can define steps execution hierarchy.
1009
+ #
1010
+ # `sync` and `async` are useful to define job sequences inside step methods.
1011
+ #
1012
+ # `Pipeline` inherits from `Job` which means that pipeline are enqueued as any other job.
1013
+ # Pipelines are enqueued in the :ductr_pipelines queue.
1014
+ #
1015
+ # class MyPipeline < Ductr::Pipeline
1016
+ # def first_step
1017
+ # sync(MyJob, 1)
1018
+ # async(SomeJob) # Executed when `MyJob` is done
1019
+ # end
1020
+ #
1021
+ # after :first_step
1022
+ # def first_parallel_step # Returns when all three `HelloJob` are done
1023
+ # async(HelloJob, :one)
1024
+ # async(HelloJob, :two)
1025
+ # async(HelloJob, :three)
1026
+ # end
1027
+ #
1028
+ # after :first_step
1029
+ # def second_parallel_step # Executed concurrently with :first_parallel_step
1030
+ # async(SomeJob)
1031
+ # async(SomeOtherJob)
1032
+ # sync(HelloJob, :one) # Executed when `SomeJob` and `SomeOtherJob` are done
1033
+ # end
1034
+ #
1035
+ # after :first_parallel_step, :second_parallel_step
1036
+ # def last_step # Executed when `first_parallel_step` and `second_parallel_step` jobs are done
1037
+ # sync(ByeJob)
1038
+ # end
1039
+ # end
1040
+ #
1041
+ # You can define pipelines with only one step by using `after` annotation without parameter:
1042
+ #
1043
+ # class MonoStepPipeline < Ductr::Pipeline
1044
+ # after
1045
+ # def unique_step
1046
+ # async(MyJob)
1047
+ # async(MyJob)
1048
+ # end
1049
+ # end
1050
+ #
1051
+ # A pipeline can inherit from another, allowing you to overload and add steps to the parent pipeline:
1052
+ #
1053
+ # class InheritPipeline < MonoStepPipeline
1054
+ # after :unique_step
1055
+ # def not_that_unique
1056
+ # async(MyJob)
1057
+ # end
1058
+ # end
1059
+ class Pipeline < Ductr::Job
1060
+ # Annotation to define preceding steps on a pipeline step method.
1061
+ #
1062
+ # ```ruby
1063
+ # after :some_step_method, :some_other_step_method
1064
+ # def my_step
1065
+ # # ...
1066
+ # end
1067
+ # ```
1068
+ def self.after: () -> void
1069
+
1070
+ # Starts the pipeline runner.
1071
+ def run: () -> void
1072
+
1073
+ # Initializes the pipeline runner
1074
+ def initialize: () -> void
1075
+
1076
+ # Puts the given job in the queue and waits for it to be done.
1077
+ #
1078
+ # _@param_ `job_class` — The job to enqueue
1079
+ #
1080
+ # _@param_ `*params` — The job's params
1081
+ def sync: (singleton(Job) job_class, *::Array[Object] params) -> void
1082
+
1083
+ # Enqueues the given job.
1084
+ #
1085
+ # _@param_ `job_class` — The job to enqueue
1086
+ #
1087
+ # _@param_ `*params` — The job's params
1088
+ def async: (singleton(Job) job_class, *::Array[Object] params) -> void
1089
+
1090
+ # Writes the pipeline's status into the Ductr's store.
1091
+ #
1092
+ # _@param_ `status` — The status of the job
1093
+ def status=: (Symbol status) -> void
1094
+
1095
+ # _@return_ — The pipeline's runner instance
1096
+ attr_reader runner: PipelineRunner
1097
+ end
1098
+
1099
+ class NotFoundInRegistryError < StandardError
1100
+ end
1101
+
1102
+ #
1103
+ # The registry pattern to store adapters, controls and triggers.
1104
+ class Registry
1105
+ extend Forwardable
1106
+ end
1107
+ end