ductr 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +14 -0
  4. data/.vscode/settings.json +18 -0
  5. data/COPYING +674 -0
  6. data/COPYING.LESSER +165 -0
  7. data/Gemfile +6 -0
  8. data/Gemfile.lock +121 -0
  9. data/README.md +37 -0
  10. data/Rakefile +37 -0
  11. data/bin/console +15 -0
  12. data/bin/setup +8 -0
  13. data/ductr.gemspec +50 -0
  14. data/exe/ductr +24 -0
  15. data/lib/ductr/adapter.rb +94 -0
  16. data/lib/ductr/cli/default.rb +25 -0
  17. data/lib/ductr/cli/main.rb +60 -0
  18. data/lib/ductr/cli/new_project_generator.rb +72 -0
  19. data/lib/ductr/cli/templates/project/bin_ductr.rb +7 -0
  20. data/lib/ductr/cli/templates/project/config_app.rb +5 -0
  21. data/lib/ductr/cli/templates/project/config_development.yml +8 -0
  22. data/lib/ductr/cli/templates/project/config_environment_development.rb +18 -0
  23. data/lib/ductr/cli/templates/project/gemfile.rb +6 -0
  24. data/lib/ductr/cli/templates/project/rubocop.yml +14 -0
  25. data/lib/ductr/cli/templates/project/tool-versions +1 -0
  26. data/lib/ductr/configuration.rb +145 -0
  27. data/lib/ductr/etl/controls/buffered_destination.rb +65 -0
  28. data/lib/ductr/etl/controls/buffered_transform.rb +76 -0
  29. data/lib/ductr/etl/controls/control.rb +46 -0
  30. data/lib/ductr/etl/controls/destination.rb +28 -0
  31. data/lib/ductr/etl/controls/paginated_source.rb +47 -0
  32. data/lib/ductr/etl/controls/source.rb +21 -0
  33. data/lib/ductr/etl/controls/transform.rb +28 -0
  34. data/lib/ductr/etl/fiber_control.rb +136 -0
  35. data/lib/ductr/etl/fiber_runner.rb +68 -0
  36. data/lib/ductr/etl/kiba_runner.rb +26 -0
  37. data/lib/ductr/etl/parser.rb +115 -0
  38. data/lib/ductr/etl/runner.rb +37 -0
  39. data/lib/ductr/etl_job.rb +161 -0
  40. data/lib/ductr/job.rb +58 -0
  41. data/lib/ductr/job_etl_runner.rb +37 -0
  42. data/lib/ductr/job_status.rb +56 -0
  43. data/lib/ductr/kiba_job.rb +130 -0
  44. data/lib/ductr/log/formatters/color_formatter.rb +48 -0
  45. data/lib/ductr/log/logger.rb +169 -0
  46. data/lib/ductr/log/outputs/file_output.rb +30 -0
  47. data/lib/ductr/log/outputs/standard_output.rb +39 -0
  48. data/lib/ductr/pipeline.rb +133 -0
  49. data/lib/ductr/pipeline_runner.rb +95 -0
  50. data/lib/ductr/pipeline_step.rb +92 -0
  51. data/lib/ductr/registry.rb +55 -0
  52. data/lib/ductr/rufus_trigger.rb +106 -0
  53. data/lib/ductr/scheduler.rb +117 -0
  54. data/lib/ductr/store/job_serializer.rb +59 -0
  55. data/lib/ductr/store/job_store.rb +59 -0
  56. data/lib/ductr/store/pipeline_serializer.rb +106 -0
  57. data/lib/ductr/store/pipeline_store.rb +48 -0
  58. data/lib/ductr/store.rb +81 -0
  59. data/lib/ductr/trigger.rb +49 -0
  60. data/lib/ductr/version.rb +6 -0
  61. data/lib/ductr.rb +143 -0
  62. data/sig/ductr.rbs +1107 -0
  63. metadata +292 -0
data/sig/ductr.rbs ADDED
@@ -0,0 +1,1107 @@
1
+ #
2
+ # The main Ductr module.
3
+ module Ductr
4
+ VERSION: String
5
+
6
+ # The adapter classes registry, all declared adapters are in the registry.
7
+ #
8
+ # _@return_ — The registry instance
9
+ def self.adapter_registry: () -> Registry
10
+
11
+ # The trigger classes registry, all declared triggers are in the registry.
12
+ #
13
+ # _@return_ — The registry instance
14
+ def self.trigger_registry: () -> Registry
15
+
16
+ # The Ductr current environment, "development" by default.
17
+ # You can change it by setting the `DUCTR_ENV` environment variable.
18
+ #
19
+ # _@return_ — The Ductr environment
20
+ def self.env: () -> String
21
+
22
+ # Determines if Ductr is in development mode.
23
+ #
24
+ # _@return_ — True if DUCTR_ENV is set to "development" or nil
25
+ def self.development?: () -> bool
26
+
27
+ # Determines if Ductr is in production mode.
28
+ #
29
+ # _@return_ — True if DUCTR_ENV is set to "production"
30
+ def self.production?: () -> bool
31
+
32
+ # The configure block allows to configure Ductr internals.
33
+ # You must calls this method one and only one time to use the framework.
34
+ def self.configure: () ?{ (Configuration config) -> void } -> void
35
+
36
+ # The Ductr main logger instance.
37
+ #
38
+ # _@return_ — The logger instance
39
+ def self.logger: () -> Log::Logger
40
+
41
+ # sord warn - ActiveSupport::Cache::Store wasn't able to be resolved to a constant in this project
42
+ # The Ductr store, used to share information across different instances.
43
+ #
44
+ # _@return_ — The store instance
45
+ def self.store: () -> ActiveSupport::Cache::Store
46
+
47
+ # Contains all the Ductr configuration.
48
+ #
49
+ # _@return_ — The configuration instance
50
+ def self.config: () -> Configuration
51
+
52
+ class AdapterNotFoundError < StandardError
53
+ end
54
+
55
+ class ControlNotFoundError < StandardError
56
+ end
57
+
58
+ class InconsistentPaginationError < StandardError
59
+ end
60
+
61
+ #
62
+ # The base class for any job, you can use it directly if you don't need an ETL job.
63
+ class Job < ActiveJob::Base
64
+ include Ductr::JobStatus
65
+ extend Annotable
66
+ extend Forwardable
67
+
68
+ # sord omit - no YARD type given for "*_", using untyped
69
+ # The active job's perform method. DO NOT override it, implement the #run method instead.
70
+ def perform: (*untyped _) -> void
71
+
72
+ # The configured adapter instances.
73
+ #
74
+ # _@param_ `name` — The adapter name
75
+ #
76
+ # _@return_ — The adapter corresponding to the given name
77
+ def adapter: (Symbol name) -> Adapter
78
+
79
+ # The job's logger instance.
80
+ #
81
+ # _@return_ — The logger instance
82
+ def logger: () -> Ductr::Log::Logger
83
+
84
+ # The entry point of jobs.
85
+ def run: () -> void
86
+
87
+ # Writes the job's status into the Ductr's store.
88
+ #
89
+ # _@param_ `status` — The status of the job
90
+ def status=: (Symbol status) -> void
91
+
92
+ # Determines whether the job has a `completed` or `failed` status.
93
+ #
94
+ # _@return_ — True when the status is `completed` or `failed`
95
+ def stopped?: () -> bool
96
+
97
+ # _@return_ — The occurred error if any
98
+ attr_reader error: Exception
99
+
100
+ # _@return_ — The job's status, one of `:queued`, `:working`, `:completed` and `:failed`
101
+ attr_reader status: Symbol
102
+ end
103
+
104
+ #
105
+ # Store interaction helpers for internal usage.
106
+ module Store
107
+ extend Ductr::Store::JobStore
108
+ extend Ductr::Store::PipelineStore
109
+ EXPIRATION_INTERVAL: Integer
110
+
111
+ # Get all known job instances for the given registry_key and job's key_prefix.
112
+ #
113
+ # _@param_ `registry_key` — The registry key in which job keys will be read
114
+ #
115
+ # _@param_ `key_prefix` — The cache key prefix for the registry's job keys
116
+ #
117
+ # _@return_ — The job instances
118
+ def self.all: (String registry_key, String key_prefix) -> ::Array[Job]
119
+
120
+ # Read all given jobs in the given key_prefix.
121
+ #
122
+ # _@param_ `key_prefix` — The cache key prefix for the job_id
123
+ #
124
+ # _@param_ `*jobs` — The jobs to read
125
+ #
126
+ # _@return_ — The read jobs
127
+ def self.read: (String key_prefix, *::Array[Job] jobs) -> ::Array[Job]
128
+
129
+ # sord omit - no YARD type given for "key_prefix", using untyped
130
+ # Update the given job in the given key_prefix.
131
+ #
132
+ # _@param_ `job` — The job to update in the store
133
+ def self.write: (untyped key_prefix, Job job) -> void
134
+
135
+ # sord omit - no YARD type given for "registry_key", using untyped
136
+ # Add the given job to the store's job registry. This method is NOT thread-safe.
137
+ #
138
+ # _@param_ `job` — The job to register
139
+ def self.register: (untyped registry_key, Job job) -> void
140
+
141
+ # Determines whether all tracked jobs have either a completed or failed status.
142
+ #
143
+ # _@return_ — `true` when all jobs are done
144
+ def self.all_done?: () -> bool
145
+
146
+ # Get all known job instances.
147
+ #
148
+ # _@return_ — The job instances
149
+ def self.all_jobs: () -> ::Array[Job]
150
+
151
+ # Read all given jobs.
152
+ #
153
+ # _@param_ `*jobs` — The jobs to read
154
+ #
155
+ # _@return_ — The read jobs
156
+ def self.read_jobs: (*::Array[Job] jobs) -> ::Array[Job]
157
+
158
+ # Update the given job.
159
+ #
160
+ # _@param_ `job` — The job to update in the store
161
+ def self.write_job: (Job job) -> void
162
+
163
+ # Add the given job to the store's job registry. This method is NOT thread-safe.
164
+ #
165
+ # _@param_ `job` — The job to register
166
+ def self.register_job: (Job job) -> void
167
+
168
+ # Convert the given job into a `SerializedJob` struct.
169
+ #
170
+ # _@param_ `job` — The job to serialize
171
+ #
172
+ # _@return_ — The job converted into struct
173
+ def self.serialize_job: (Job job) -> SerializedJob
174
+
175
+ # sord infer - SerializedPipeline was resolved to Ductr::Store::PipelineSerializer::SerializedPipeline
176
+ # Get all known pipeline instances.
177
+ #
178
+ # _@return_ — The pipeline instances
179
+ def self.all_pipelines: () -> ::Array[Ductr::Store::PipelineSerializer::SerializedPipeline]
180
+
181
+ # Update the given pipeline.
182
+ #
183
+ # _@param_ `pipeline` — The pipeline to update in the store
184
+ def self.write_pipeline: (Pipeline pipeline) -> void
185
+
186
+ # Add the given pipeline to the store's pipeline registry. This method is NOT thread-safe.
187
+ #
188
+ # _@param_ `pipeline` — The job to register
189
+ def self.register_pipeline: (Pipeline pipeline) -> void
190
+
191
+ # Convert the given pipeline and its steps into
192
+ # `SerializedPipeline` and `SerializedPipelineStep` structs.
193
+ #
194
+ # _@param_ `pipeline` — The pipeline to serialize
195
+ #
196
+ # _@return_ — The pipeline converted into struct
197
+ def self.serialize_pipeline: (Pipeline pipeline) -> SerializedPipeline
198
+
199
+ #
200
+ # Job's level store interactions.
201
+ module JobStore
202
+ include Ductr::Store::JobSerializer
203
+ JOB_KEY_PREFIX: String
204
+ JOB_REGISTRY_KEY: String
205
+
206
+ # Get all known job instances.
207
+ #
208
+ # _@return_ — The job instances
209
+ def all_jobs: () -> ::Array[Job]
210
+
211
+ # Read all given jobs.
212
+ #
213
+ # _@param_ `*jobs` — The jobs to read
214
+ #
215
+ # _@return_ — The read jobs
216
+ def read_jobs: (*::Array[Job] jobs) -> ::Array[Job]
217
+
218
+ # Update the given job.
219
+ #
220
+ # _@param_ `job` — The job to update in the store
221
+ def write_job: (Job job) -> void
222
+
223
+ # Add the given job to the store's job registry. This method is NOT thread-safe.
224
+ #
225
+ # _@param_ `job` — The job to register
226
+ def register_job: (Job job) -> void
227
+
228
+ # Convert the given job into a `SerializedJob` struct.
229
+ #
230
+ # _@param_ `job` — The job to serialize
231
+ #
232
+ # _@return_ — The job converted into struct
233
+ def serialize_job: (Job job) -> SerializedJob
234
+ end
235
+
236
+ #
237
+ # Convert jobs into active job serializable structs.
238
+ module JobSerializer
239
+ # Convert the given job into a `SerializedJob` struct.
240
+ #
241
+ # _@param_ `job` — The job to serialize
242
+ #
243
+ # _@return_ — The job converted into struct
244
+ def serialize_job: (Job job) -> SerializedJob
245
+
246
+ #
247
+ # @!parse
248
+ # #
249
+ # # The job representation as a struct.
250
+ # #
251
+ # # @!attribute [r] job_id
252
+ # # @return [String] The active job's job id
253
+ # #
254
+ # # @!attribute [r] status
255
+ # # @return [Symbol] The job's status
256
+ # #
257
+ # # @!attribute [r] error
258
+ # # @return [Exception, nil] The job's error if any
259
+ # #
260
+ # class SerializedJob < Struct
261
+ # #
262
+ # # @param [String] job_id Active job's job id
263
+ # # @param [Symbol] status Job's status
264
+ # # @param [Exception, nil] error Job's error
265
+ # #
266
+ # def initialize(job_id, status, error)
267
+ # @job_id = job_id
268
+ # @status = status
269
+ # @error = error
270
+ # end
271
+ # end
272
+ class SerializedJob < Struct
273
+ # _@param_ `job_id` — Active job's job id
274
+ #
275
+ # _@param_ `status` — Job's status
276
+ #
277
+ # _@param_ `error` — Job's error
278
+ def initialize: (String job_id, Symbol status, Exception? error) -> void
279
+
280
+ # Determines whether the job has a `completed` or `failed` status.
281
+ #
282
+ # _@return_ — True when the status is `completed` or `failed`
283
+ def stopped?: () -> bool
284
+
285
+ # _@return_ — The active job's job id
286
+ attr_reader job_id: String
287
+
288
+ # _@return_ — The job's status
289
+ attr_reader status: Symbol
290
+
291
+ # _@return_ — The job's error if any
292
+ attr_reader error: Exception?
293
+ end
294
+ end
295
+
296
+ #
297
+ # Pipeline's level store interactions.
298
+ module PipelineStore
299
+ include Ductr::Store::PipelineSerializer
300
+ PIPELINE_KEY_PREFIX: String
301
+ PIPELINE_REGISTRY_KEY: String
302
+
303
+ # sord infer - SerializedPipeline was resolved to Ductr::Store::PipelineSerializer::SerializedPipeline
304
+ # Get all known pipeline instances.
305
+ #
306
+ # _@return_ — The pipeline instances
307
+ def all_pipelines: () -> ::Array[Ductr::Store::PipelineSerializer::SerializedPipeline]
308
+
309
+ # Update the given pipeline.
310
+ #
311
+ # _@param_ `pipeline` — The pipeline to update in the store
312
+ def write_pipeline: (Pipeline pipeline) -> void
313
+
314
+ # Add the given pipeline to the store's pipeline registry. This method is NOT thread-safe.
315
+ #
316
+ # _@param_ `pipeline` — The job to register
317
+ def register_pipeline: (Pipeline pipeline) -> void
318
+
319
+ # Convert the given pipeline and its steps into
320
+ # `SerializedPipeline` and `SerializedPipelineStep` structs.
321
+ #
322
+ # _@param_ `pipeline` — The pipeline to serialize
323
+ #
324
+ # _@return_ — The pipeline converted into struct
325
+ def serialize_pipeline: (Pipeline pipeline) -> SerializedPipeline
326
+
327
+ # Convert the given job into a `SerializedJob` struct.
328
+ #
329
+ # _@param_ `job` — The job to serialize
330
+ #
331
+ # _@return_ — The job converted into struct
332
+ def serialize_job: (Job job) -> SerializedJob
333
+ end
334
+
335
+ #
336
+ # Convert pipelines and steps into active job serializable structs.
337
+ module PipelineSerializer
338
+ include Ductr::Store::JobSerializer
339
+
340
+ # Convert the given pipeline and its steps into
341
+ # `SerializedPipeline` and `SerializedPipelineStep` structs.
342
+ #
343
+ # _@param_ `pipeline` — The pipeline to serialize
344
+ #
345
+ # _@return_ — The pipeline converted into struct
346
+ def serialize_pipeline: (Pipeline pipeline) -> SerializedPipeline
347
+
348
+ # Convert the given job into a `SerializedJob` struct.
349
+ #
350
+ # _@param_ `job` — The job to serialize
351
+ #
352
+ # _@return_ — The job converted into struct
353
+ def serialize_job: (Job job) -> SerializedJob
354
+
355
+ #
356
+ # @!parse
357
+ # #
358
+ # # The pipeline representation as a struct.
359
+ # #
360
+ # # @!attribute [r] job_id
361
+ # # @return [String] The active job's job id
362
+ # #
363
+ # # @!attribute [r] status
364
+ # # @return [Symbol] The pipeline job status
365
+ # #
366
+ # # @!attribute [r] error
367
+ # # @return [Exception, nil] The pipeline job error if any
368
+ # #
369
+ # # @!attribute [r] steps
370
+ # # @return [Array<SerializedPipelineStep>] The pipeline steps as struct
371
+ # #
372
+ # class SerializedPipeline < Struct
373
+ # #
374
+ # # @param [String] job_id Pipeline job id
375
+ # # @param [Symbol] status Pipeline status
376
+ # # @param [Exception, nil] error Pipeline error
377
+ # # @param [Array<SerializedPipelineStep>] steps Pipeline steps as struct
378
+ # #
379
+ # def initialize(job_id, status, error, steps)
380
+ # @job_id = job_id
381
+ # @status = status
382
+ # @error = error
383
+ # @steps = steps
384
+ # end
385
+ # end
386
+ class SerializedPipeline < Struct
387
+ # _@param_ `job_id` — Pipeline job id
388
+ #
389
+ # _@param_ `status` — Pipeline status
390
+ #
391
+ # _@param_ `error` — Pipeline error
392
+ #
393
+ # _@param_ `steps` — Pipeline steps as struct
394
+ def initialize: (
395
+ String job_id,
396
+ Symbol status,
397
+ Exception? error,
398
+ ::Array[SerializedPipelineStep] steps
399
+ ) -> void
400
+
401
+ # Determines whether the pipeline has a `completed` or `failed` status.
402
+ #
403
+ # _@return_ — True when the status is `completed` or `failed`
404
+ def stopped?: () -> bool
405
+
406
+ # _@return_ — The active job's job id
407
+ attr_reader job_id: String
408
+
409
+ # _@return_ — The pipeline job status
410
+ attr_reader status: Symbol
411
+
412
+ # _@return_ — The pipeline job error if any
413
+ attr_reader error: Exception?
414
+
415
+ # _@return_ — The pipeline steps as struct
416
+ attr_reader steps: ::Array[SerializedPipelineStep]
417
+ end
418
+
419
+ #
420
+ # @!parse
421
+ # #
422
+ # # The pipeline step representation as a struct.
423
+ # #
424
+ # # @!attribute [r] jobs
425
+ # # @return [Array<Job>] The step's jobs
426
+ # #
427
+ # # @!attribute [r] done
428
+ # # @return [Boolean] The step's fiber state
429
+ # #
430
+ # class SerializedPipelineStep < Struct
431
+ # #
432
+ # # @param [Array<Job>] jobs The step's jobs
433
+ # # @param [Boolean] done The step's fiber state
434
+ # #
435
+ # def initialize(jobs, done)
436
+ # @jobs = jobs
437
+ # @done = done
438
+ # end
439
+ # end
440
+ class SerializedPipelineStep < Struct
441
+ # _@param_ `jobs` — The step's jobs
442
+ #
443
+ # _@param_ `done` — The step's fiber state
444
+ def initialize: (::Array[Job] jobs, bool done) -> void
445
+
446
+ # Check if the step is done.
447
+ #
448
+ # _@return_ — True if the step is done
449
+ def done?: () -> bool
450
+
451
+ # _@return_ — The step's jobs
452
+ attr_reader jobs: ::Array[Job]
453
+
454
+ # _@return_ — The step's fiber state
455
+ attr_reader done: bool
456
+ end
457
+ end
458
+ end
459
+
460
+ #
461
+ # Base adapter class, your adapter should inherit of this class.
462
+ class Adapter
463
+ # All the sources declared for this adapter goes here.
464
+ #
465
+ # _@return_ — The registry instance
466
+ def self.source_registry: () -> Registry
467
+
468
+ # All the lookups declared for this adapter goes here.
469
+ #
470
+ # _@return_ — The registry instance
471
+ def self.lookup_registry: () -> Registry
472
+
473
+ # All the destinations declared for this adapter goes here.
474
+ #
475
+ # _@return_ — The registry instance
476
+ def self.destination_registry: () -> Registry
477
+
478
+ # All the triggers declared for this adapter goes here.
479
+ #
480
+ # _@return_ — The registry instance
481
+ def self.trigger_registry: () -> Registry
482
+
483
+ # sord warn - "Symbol: Object" does not appear to be a type
484
+ # sord warn - Invalid hash, must have exactly two types: "Hash<Symbol: Object>".
485
+ # Creates a new adapter instance.
486
+ #
487
+ # _@param_ `name` — The adapter instance name, mandatory, must be unique
488
+ #
489
+ # _@param_ `**config` — The adapter configuration hash
490
+ def initialize: (Symbol name, **SORD_ERROR_SORD_ERROR_SymbolObject config) -> void
491
+
492
+ # Allow use of adapter with block syntax, automatically closes on block exit.
493
+ def open: () -> void
494
+
495
+ # Opens the adapter before using it e.g. open connection, authenticate to http endpoint, open file...
496
+ # This method may return something, as a connection object.
497
+ def open!: () -> void
498
+
499
+ # Closes the adapter when finished e.g. close connection, drop http session, close file...
500
+ def close!: () -> void
501
+
502
+ # _@return_ — the adapter instance name
503
+ attr_reader name: Symbol
504
+
505
+ # sord warn - "Symbol: Object" does not appear to be a type
506
+ # sord warn - Invalid hash, must have exactly two types: "Hash<Symbol: Object>".
507
+ # _@return_ — the adapter configuration hash
508
+ attr_reader config: SORD_ERROR_SORD_ERROR_SymbolObject
509
+ end
510
+
511
+ #
512
+ # Base class for ETL job using the experimental fiber runner.
513
+ # Usage example:
514
+ #
515
+ # class MyETLJob < Ductr::ETLJob
516
+ # source :first_db, :basic
517
+ # send_to :the_transform, :the_other_transform
518
+ # def the_source(db)
519
+ # # ...
520
+ # end
521
+ #
522
+ # transform
523
+ # send_to :the_destination
524
+ # def the_transform(row)
525
+ # # ...
526
+ # end
527
+ #
528
+ # destination :first_db, :basic
529
+ # def the_destination(row, db)
530
+ # # ...
531
+ # end
532
+ #
533
+ # transform
534
+ # send_to :the_other_destination
535
+ # def the_other_transform(row)
536
+ # # ...
537
+ # end
538
+ #
539
+ # destination :second_db, :basic
540
+ # def the_other_destination(row, db)
541
+ # # ...
542
+ # end
543
+ # end
544
+ class ETLJob < Ductr::Job
545
+ include Ductr::JobETLRunner
546
+ include Ductr::ETL::Parser
547
+ ETL_RUNNER_CLASS: Class
548
+
549
+ # sord warn - "Symbol: Object" does not appear to be a type
550
+ # sord warn - Invalid hash, must have exactly two types: "Hash<Symbol: Object>".
551
+ # Annotation to define a source method
552
+ #
553
+ # _@param_ `adapter_name` — The adapter the source is running on
554
+ #
555
+ # _@param_ `source_type` — The type of source to run
556
+ #
557
+ # _@param_ `**source_options` — The options to pass to the source
558
+ #
559
+ # Source with Sequel SQLite adapter
560
+ # ```ruby
561
+ # source :my_adapter, :paginated, page_size: 42
562
+ # def my_source(db, offset, limit)
563
+ # db[:items].offset(offset).limit(limit)
564
+ # end
565
+ # ```
566
+ #
567
+ # _@see_ `The` — chosen adapter documentation for further information on sources usage.
568
+ def self.source: (Symbol adapter_name, Symbol source_type, **SORD_ERROR_SORD_ERROR_SymbolObject source_options) -> void
569
+
570
+ # sord warn - "Symbol: Object" does not appear to be a type
571
+ # sord warn - Invalid hash, must have exactly two types: "Hash<Symbol: Object>".
572
+ # Annotation to define a transform method
573
+ #
574
+ # _@param_ `transform_class` — The class the transform is running on
575
+ #
576
+ # _@param_ `**transform_options` — The options to pass to the transform
577
+ #
578
+ # Transform without params
579
+ # ```ruby
580
+ # transform
581
+ # def rename_keys(row)
582
+ # row[:new_name] = row.delete[:old_name]
583
+ # row[:new_email] = row.delete[:old_email]
584
+ # end
585
+ # ```
586
+ #
587
+ # Transform with params
588
+ # ```ruby
589
+ # class RenameTransform < Ductr::ETL::Transform
590
+ # def process(row)
591
+ # call_method.each do |actual_name, new_name|
592
+ # new_key = "#{options[:prefix]}#{new_name}".to_sym
593
+ #
594
+ # row[new_key] = row.delete(actual_name)
595
+ # end
596
+ # end
597
+ # end
598
+ #
599
+ # transform RenameTransform, prefix: "some_"
600
+ # def rename
601
+ # { old_name: :new_name, old_email: :new_email }
602
+ # end
603
+ # ```
604
+ def self.transform: (Class? transform_class, **SORD_ERROR_SORD_ERROR_SymbolObject transform_options) -> void
605
+
606
+ # sord warn - "Symbol: Object" does not appear to be a type
607
+ # sord warn - Invalid hash, must have exactly two types: "Hash<Symbol: Object>".
608
+ # Annotation to define a lookup method
609
+ #
610
+ # _@param_ `adapter_name` — The adapter the lookup is running on
611
+ #
612
+ # _@param_ `lookup_type` — The type of lookup to run
613
+ #
614
+ # _@param_ `**lookup_options` — The options to pass to the lookup
615
+ #
616
+ # Lookup with Sequel SQLite adapter
617
+ # ```ruby
618
+ # lookup :my_other_adapter, :match, merge: [:id, :item], buffer_size: 4
619
+ # def joining_different_adapters(db, ids)
620
+ # db[:items_bis].select(:id, :item, :name).where(item: ids)
621
+ # end
622
+ # ```
623
+ #
624
+ # _@see_ `The` — chosen adapter documentation for further information on lookups usage.
625
+ def self.lookup: (Symbol adapter_name, Symbol lookup_type, **SORD_ERROR_SORD_ERROR_SymbolObject lookup_options) -> void
626
+
627
+ # sord warn - "Symbol: Object" does not appear to be a type
628
+ # sord warn - Invalid hash, must have exactly two types: "Hash<Symbol: Object>".
629
+ # Annotation to define a destination method
630
+ #
631
+ # _@param_ `adapter_name` — The adapter the destination is running on
632
+ #
633
+ # _@param_ `destination_type` — The type of destination to run
634
+ #
635
+ # _@param_ `**destination_options` — The options to pass to the destination
636
+ #
637
+ # Destination with Sequel SQLite adapter
638
+ # ```ruby
639
+ # destination :my_other_adapter, :basic
640
+ # def my_destination(row, db)
641
+ # db[:new_items].insert(name: row[:name], new_name: row[:new_name])
642
+ # end
643
+ # ```
644
+ #
645
+ # _@see_ `The` — chosen adapter documentation for further information on destinations usage.
646
+ def self.destination: (Symbol adapter_name, Symbol destination_type, **SORD_ERROR_SORD_ERROR_SymbolObject destination_options) -> void
647
+
648
+ # Annotation to define which methods will follow the current one
649
+ #
650
+ # _@param_ `*methods` — The names of the following methods
651
+ #
652
+ # Source with Sequel SQLite adapter sending rows to two transforms
653
+ # ```ruby
654
+ # source :my_adapter, :paginated, page_size: 42
655
+ # send_to :my_first_transform, :my_second_transform
656
+ # def my_source(db, offset, limit)
657
+ # db[:items].offset(offset).limit(limit)
658
+ # end
659
+ #
660
+ # transform
661
+ # def my_first_transform(row)
662
+ # # ...
663
+ # end
664
+ #
665
+ # transform
666
+ # def my_second_transform(row)
667
+ # # ...
668
+ # end
669
+ # ```
670
+ def self.send_to: (*::Array[Symbol] methods) -> void
671
+
672
+ # Handles sources, transforms and destinations controls.
673
+ # Handles send_to directives, used to do the plumbing between controls.
674
+ # Used for both kiba and fiber runners initialization.
675
+ #
676
+ # _@return_ — The job's controls
677
+ def parse_annotations: () -> ::Array[(Source | Transform | Destination | ::Hash[Symbol, ::Array[Symbol]])]
678
+
679
+ # Currently used adapters set.
680
+ #
681
+ # _@return_ — The current adapters
682
+ def adapters: () -> ::Set[untyped]
683
+
684
+ # sord warn - method is probably not a type, but using anyway
685
+ # sord warn - method wasn't able to be resolved to a constant in this project
686
+ # Finds the method(s) associated to the given annotation names in the job class.
687
+ #
688
+ # _@param_ `*annotation_names` — The annotation names of the searched methods
689
+ #
690
+ # _@return_ — Returns mapped array containing the block's returned value
691
+ def find_method: (*::Array[Symbol] annotation_names) ?{ (method A) -> void } -> ::Array[untyped]
692
+
693
+ # Initializes adapter controls for the given type.
694
+ #
695
+ # _@param_ `control_type` — The adapter control type, one of :source or :destination
696
+ #
697
+ # _@return_ — The initialized adapter controls
698
+ def init_adapter_controls: (Symbol control_type) -> ::Array[(Source | Destination)]
699
+
700
+ # Initializes transform controls for the given types.
701
+ #
702
+ # _@param_ `*control_types` — The transform control types, :transform and/or :lookup
703
+ #
704
+ # _@return_ — The initialized transform controls
705
+ def init_transform_controls: (*::Array[Symbol] control_types) -> ::Array[Transform]
706
+
707
+ # sord warn - Annotable::Method wasn't able to be resolved to a constant in this project
708
+ # Initializes an adapter control (source, lookup or destination) based on the given annotated method.
709
+ #
710
+ # _@param_ `annotated_method` — The control's method
711
+ #
712
+ # _@return_ — The adapter control instance
713
+ def adapter_control: (Annotable::Method annotated_method) -> Control
714
+
715
+ # sord warn - Annotable::Method wasn't able to be resolved to a constant in this project
716
+ # Initializes a transform control.
717
+ #
718
+ # _@param_ `annotated_method` — The transform's method
719
+ #
720
+ # _@return_ — The transform control instance
721
+ def transform_control: (Annotable::Method annotated_method) -> Transform
722
+
723
+ # Parse job's annotations and create the runner instance.
724
+ def initialize: () -> void
725
+
726
+ # Opens adapters, executes the runner and then closes back adapters.
727
+ def run: () -> void
728
+ end
729
+
730
+ #
731
+ # The base class for any trigger, can be initialized by passing it its adapter name if any.
732
+ # A trigger must implement the #add method which is called for each trigger declaration.
733
+ # Depending on what your trigger do, you may have to implement the #start and #stop methods.
734
+ # #start is called when the scheduler relying on the trigger is started. #stop does the opposite:
735
+ # it is called when the scheduler relying on the trigger is stopped.
736
+ class Trigger
737
+ # sord warn - Nil wasn't able to be resolved to a constant in this project
738
+ # Creates a new trigger instance, called by the scheduler.
739
+ #
740
+ # _@param_ `adapter` — The trigger's adapter, if any
741
+ def initialize: (?(Adapter | Nil)? adapter) -> void
742
+
743
+ # sord warn - "Symbol: Object" does not appear to be a type
744
+ # sord warn - Invalid hash, must have exactly two types: "Hash<Symbol: Object>".
745
+ # Adds a new trigger, called by a scheduler when a trigger is declared.
746
+ #
747
+ # _@param_ `_method` — The scheduler method to be called by the trigger
748
+ #
749
+ # _@param_ `_options` — options The options of the trigger declaration
750
+ def add: (Method _method, SORD_ERROR_SORD_ERROR_SymbolObject _options) -> void
751
+
752
+ # Called when the scheduler relying on the trigger is started.
753
+ def start: () -> void
754
+
755
+ # Called when the scheduler relying on the trigger is stopped.
756
+ def stop: () -> void
757
+
758
+ # sord omit - no YARD type given for :adapter, using untyped
759
+ # Returns the value of attribute adapter.
760
+ attr_reader adapter: untyped
761
+ end
762
+
763
+ module CLI
764
+ #
765
+ # The main CLI is started when used inside a ductr project folder.
766
+ # It exposes scheduling and monitoring tasks.
767
+ class Main < Thor
768
+ # sord omit - no YARD type given for "job_name", using untyped
769
+ # sord omit - no YARD return type given, using untyped
770
+ def perform: (untyped job_name) -> untyped
771
+
772
+ # sord omit - no YARD type given for "*scheduler_names", using untyped
773
+ # sord omit - no YARD return type given, using untyped
774
+ def schedule: (*untyped scheduler_names) -> untyped
775
+
776
+ # Keeps the thread alive until Ctrl-C is pressed.
777
+ def sleep_until_interrupt: () -> void
778
+ end
779
+
780
+ #
781
+ # The default CLI is started when no project folder was found.
782
+ # It expose project and adapter generation tasks.
783
+ class Default < Thor
784
+ # Generates a new project
785
+ #
786
+ # _@param_ `name` — The project's name
787
+ def new: (?String? name) -> void
788
+ end
789
+
790
+ #
791
+ # Thor generator to create a new project
792
+ class NewProjectGenerator < Thor::Group
793
+ include Thor::Actions
794
+
795
+ # The templates source used to create a new project
796
+ #
797
+ # _@return_ — the templates source absolute path
798
+ def self.source_root: () -> String
799
+
800
+ # Doing some setup before generating file,
801
+ # creates the project directory and sets it as destination for the generator
802
+ def init: () -> void
803
+
804
+ # Creates files in the project's root
805
+ def gen_root: () -> void
806
+
807
+ # Creates the bin file for the project
808
+ def gen_bin: () -> void
809
+
810
+ # Creates files in the `config` folder
811
+ def gen_config: () -> void
812
+ end
813
+ end
814
+
815
+ #
816
+ # Base class for ETL job using kiba's streaming runner.
817
+ # Example using the SQLite adapter:
818
+ #
819
+ # class MyKibaJob < Ductr::KibaJob
820
+ # source :some_adapter, :paginated, page_size: 4
821
+ # def select_some_stuff(db, offset, limit)
822
+ # db[:items].offset(offset).limit(limit)
823
+ # end
824
+ #
825
+ # lookup :some_adapter, :match, merge: [:id, :item], buffer_size: 4
826
+ # def merge_with_stuff(db, ids)
827
+ # db[:items_bis].select(:id, Sequel.as(:name, :name_bis), :item).where(item: ids)
828
+ # end
829
+ #
830
+ # transform
831
+ # def generate_more_stuff(row)
832
+ # { name: "#{row[:name]}_#{row[:name_bis]}" }
833
+ # end
834
+ #
835
+ # destination :some_other_adapter, :basic
836
+ # def my_destination(row, db)
837
+ # logger.trace("Hello destination: #{row}")
838
+ # db[:new_items].insert(name: row[:name])
839
+ # end
840
+ # end
841
+ #
842
+ # @see The chosen adapter documentation for further information on controls usage.
843
+ class KibaJob < Ductr::Job
844
+ include Ductr::JobETLRunner
845
+ include Ductr::ETL::Parser
846
+ ETL_RUNNER_CLASS: Class
847
+
848
+ # sord warn - "Symbol: Object" does not appear to be a type
849
+ # sord warn - Invalid hash, must have exactly two types: "Hash<Symbol: Object>".
850
+ # Annotation to define a source method
851
+ #
852
+ # _@param_ `adapter_name` — The adapter the source is running on
853
+ #
854
+ # _@param_ `source_type` — The type of source to run
855
+ #
856
+ # _@param_ `**source_options` — The options to pass to the source
857
+ #
858
+ # Source with Sequel SQLite adapter
859
+ # ```ruby
860
+ # source :my_adapter, :paginated, page_size: 42
861
+ # def my_source(db, offset, limit)
862
+ # db[:items].offset(offset).limit(limit)
863
+ # end
864
+ # ```
865
+ #
866
+ # _@see_ `The` — chosen adapter documentation for further information on sources usage.
867
+ def self.source: (Symbol adapter_name, Symbol source_type, **SORD_ERROR_SORD_ERROR_SymbolObject source_options) -> void
868
+
869
+ # sord warn - "Symbol: Object" does not appear to be a type
870
+ # sord warn - Invalid hash, must have exactly two types: "Hash<Symbol: Object>".
871
+ # Annotation to define a transform method
872
+ #
873
+ # _@param_ `transform_class` — The class the transform is running on
874
+ #
875
+ # _@param_ `**transform_options` — The options to pass to the transform
876
+ #
877
+ # Transform without params
878
+ # ```ruby
879
+ # transform
880
+ # def rename_keys(row)
881
+ # row[:new_name] = row.delete[:old_name]
882
+ # row[:new_email] = row.delete[:old_email]
883
+ # end
884
+ # ```
885
+ #
886
+ # Transform with params
887
+ # ```ruby
888
+ # class RenameTransform < Ductr::ETL::Transform
889
+ # def process(row)
890
+ # call_method.each do |actual_name, new_name|
891
+ # new_key = "#{options[:prefix]}#{new_name}".to_sym
892
+ #
893
+ # row[new_key] = row.delete(actual_name)
894
+ # end
895
+ # end
896
+ # end
897
+ #
898
+ # transform RenameTransform, prefix: "some_"
899
+ # def rename
900
+ # { old_name: :new_name, old_email: :new_email }
901
+ # end
902
+ # ```
903
+ def self.transform: (Class? transform_class, **SORD_ERROR_SORD_ERROR_SymbolObject transform_options) -> void
904
+
905
+ # sord warn - "Symbol: Object" does not appear to be a type
906
+ # sord warn - Invalid hash, must have exactly two types: "Hash<Symbol: Object>".
907
+ # Annotation to define a lookup method
908
+ #
909
+ # _@param_ `adapter_name` — The adapter the lookup is running on
910
+ #
911
+ # _@param_ `lookup_type` — The type of lookup to run
912
+ #
913
+ # _@param_ `**lookup_options` — The options to pass to the lookup
914
+ #
915
+ # Lookup with Sequel SQLite adapter
916
+ # ```ruby
917
+ # lookup :my_other_adapter, :match, merge: [:id, :item], buffer_size: 4
918
+ # def joining_different_adapters(db, ids)
919
+ # db[:items_bis].select(:id, :item, :name).where(item: ids)
920
+ # end
921
+ # ```
922
+ #
923
+ # _@see_ `The` — chosen adapter documentation for further information on lookups usage.
924
+ def self.lookup: (Symbol adapter_name, Symbol lookup_type, **SORD_ERROR_SORD_ERROR_SymbolObject lookup_options) -> void
925
+
926
+ # sord warn - "Symbol: Object" does not appear to be a type
927
+ # sord warn - Invalid hash, must have exactly two types: "Hash<Symbol: Object>".
928
+ # Annotation to define a destination method
929
+ #
930
+ # _@param_ `adapter_name` — The adapter the destination is running on
931
+ #
932
+ # _@param_ `destination_type` — The type of destination to run
933
+ #
934
+ # _@param_ `**destination_options` — The options to pass to the destination
935
+ #
936
+ # Destination with Sequel SQLite adapter
937
+ # ```ruby
938
+ # destination :my_other_adapter, :basic
939
+ # def my_destination(row, db)
940
+ # db[:new_items].insert(name: row[:name], new_name: row[:new_name])
941
+ # end
942
+ # ```
943
+ #
944
+ # _@see_ `The` — chosen adapter documentation for further information on destinations usage.
945
+ def self.destination: (Symbol adapter_name, Symbol destination_type, **SORD_ERROR_SORD_ERROR_SymbolObject destination_options) -> void
946
+
947
+ # Handles sources, transforms and destinations controls.
948
+ # Handles send_to directives, used to do the plumbing between controls.
949
+ # Used for both kiba and fiber runners initialization.
950
+ #
951
+ # _@return_ — The job's controls
952
+ def parse_annotations: () -> ::Array[(Source | Transform | Destination | ::Hash[Symbol, ::Array[Symbol]])]
953
+
954
+ # Currently used adapters set.
955
+ #
956
+ # _@return_ — The current adapters
957
+ def adapters: () -> ::Set[untyped]
958
+
959
+ # sord warn - method is probably not a type, but using anyway
960
+ # sord warn - method wasn't able to be resolved to a constant in this project
961
+ # Finds the method(s) associated to the given annotation names in the job class.
962
+ #
963
+ # _@param_ `*annotation_names` — The annotation names of the searched methods
964
+ #
965
+ # _@return_ — Returns mapped array containing the block's returned value
966
+ def find_method: (*::Array[Symbol] annotation_names) ?{ (method A) -> void } -> ::Array[untyped]
967
+
968
+ # Initializes adapter controls for the given type.
969
+ #
970
+ # _@param_ `control_type` — The adapter control type, one of :source or :destination
971
+ #
972
+ # _@return_ — The initialized adapter controls
973
+ def init_adapter_controls: (Symbol control_type) -> ::Array[(Source | Destination)]
974
+
975
+ # Initializes transform controls for the given types.
976
+ #
977
+ # _@param_ `*control_types` — The transform control types, :transform and/or :lookup
978
+ #
979
+ # _@return_ — The initialized transform controls
980
+ def init_transform_controls: (*::Array[Symbol] control_types) -> ::Array[Transform]
981
+
982
+ # sord warn - Annotable::Method wasn't able to be resolved to a constant in this project
983
+ # Initializes an adapter control (source, lookup or destination) based on the given annotated method.
984
+ #
985
+ # _@param_ `annotated_method` — The control's method
986
+ #
987
+ # _@return_ — The adapter control instance
988
+ def adapter_control: (Annotable::Method annotated_method) -> Control
989
+
990
+ # sord warn - Annotable::Method wasn't able to be resolved to a constant in this project
991
+ # Initializes a transform control.
992
+ #
993
+ # _@param_ `annotated_method` — The transform's method
994
+ #
995
+ # _@return_ — The transform control instance
996
+ def transform_control: (Annotable::Method annotated_method) -> Transform
997
+
998
+ # Parse job's annotations and create the runner instance.
999
+ def initialize: () -> void
1000
+
1001
+ # Opens adapters, executes the runner and then closes back adapters.
1002
+ def run: () -> void
1003
+ end
1004
+
1005
+ #
1006
+ # Pipelines allows to easily declare rich data pipelines.
1007
+ #
1008
+ # By using the `after` annotation, you can define steps execution hierarchy.
1009
+ #
1010
+ # `sync` and `async` are useful to define job sequences inside step methods.
1011
+ #
1012
+ # `Pipeline` inherits from `Job` which means that pipeline are enqueued as any other job.
1013
+ # Pipelines are enqueued in the :ductr_pipelines queue.
1014
+ #
1015
+ # class MyPipeline < Ductr::Pipeline
1016
+ # def first_step
1017
+ # sync(MyJob, 1)
1018
+ # async(SomeJob) # Executed when `MyJob` is done
1019
+ # end
1020
+ #
1021
+ # after :first_step
1022
+ # def first_parallel_step # Returns when all three `HelloJob` are done
1023
+ # async(HelloJob, :one)
1024
+ # async(HelloJob, :two)
1025
+ # async(HelloJob, :three)
1026
+ # end
1027
+ #
1028
+ # after :first_step
1029
+ # def second_parallel_step # Executed concurrently with :first_parallel_step
1030
+ # async(SomeJob)
1031
+ # async(SomeOtherJob)
1032
+ # sync(HelloJob, :one) # Executed when `SomeJob` and `SomeOtherJob` are done
1033
+ # end
1034
+ #
1035
+ # after :first_parallel_step, :second_parallel_step
1036
+ # def last_step # Executed when `first_parallel_step` and `second_parallel_step` jobs are done
1037
+ # sync(ByeJob)
1038
+ # end
1039
+ # end
1040
+ #
1041
+ # You can define pipelines with only one step by using `after` annotation without parameter:
1042
+ #
1043
+ # class MonoStepPipeline < Ductr::Pipeline
1044
+ # after
1045
+ # def unique_step
1046
+ # async(MyJob)
1047
+ # async(MyJob)
1048
+ # end
1049
+ # end
1050
+ #
1051
+ # A pipeline can inherit from another, allowing you to overload and add steps to the parent pipeline:
1052
+ #
1053
+ # class InheritPipeline < MonoStepPipeline
1054
+ # after :unique_step
1055
+ # def not_that_unique
1056
+ # async(MyJob)
1057
+ # end
1058
+ # end
1059
+ class Pipeline < Ductr::Job
1060
+ # Annotation to define preceding steps on a pipeline step method.
1061
+ #
1062
+ # ```ruby
1063
+ # after :some_step_method, :some_other_step_method
1064
+ # def my_step
1065
+ # # ...
1066
+ # end
1067
+ # ```
1068
+ def self.after: () -> void
1069
+
1070
+ # Starts the pipeline runner.
1071
+ def run: () -> void
1072
+
1073
+ # Initializes the pipeline runner
1074
+ def initialize: () -> void
1075
+
1076
+ # Puts the given job in the queue and waits for it to be done.
1077
+ #
1078
+ # _@param_ `job_class` — The job to enqueue
1079
+ #
1080
+ # _@param_ `*params` — The job's params
1081
+ def sync: (singleton(Job) job_class, *::Array[Object] params) -> void
1082
+
1083
+ # Enqueues the given job.
1084
+ #
1085
+ # _@param_ `job_class` — The job to enqueue
1086
+ #
1087
+ # _@param_ `*params` — The job's params
1088
+ def async: (singleton(Job) job_class, *::Array[Object] params) -> void
1089
+
1090
+ # Writes the pipeline's status into the Ductr's store.
1091
+ #
1092
+ # _@param_ `status` — The status of the job
1093
+ def status=: (Symbol status) -> void
1094
+
1095
+ # _@return_ — The pipeline's runner instance
1096
+ attr_reader runner: PipelineRunner
1097
+ end
1098
+
1099
+ class NotFoundInRegistryError < StandardError
1100
+ end
1101
+
1102
+ #
1103
+ # The registry pattern to store adapters, controls and triggers.
1104
+ class Registry
1105
+ extend Forwardable
1106
+ end
1107
+ end