tasker-rb 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. checksums.yaml +7 -0
  2. data/DEVELOPMENT.md +548 -0
  3. data/README.md +87 -0
  4. data/ext/tasker_core/Cargo.lock +4720 -0
  5. data/ext/tasker_core/Cargo.toml +76 -0
  6. data/ext/tasker_core/extconf.rb +38 -0
  7. data/ext/tasker_core/src/CLAUDE.md +7 -0
  8. data/ext/tasker_core/src/bootstrap.rs +320 -0
  9. data/ext/tasker_core/src/bridge.rs +400 -0
  10. data/ext/tasker_core/src/client_ffi.rs +173 -0
  11. data/ext/tasker_core/src/conversions.rs +131 -0
  12. data/ext/tasker_core/src/diagnostics.rs +57 -0
  13. data/ext/tasker_core/src/event_handler.rs +179 -0
  14. data/ext/tasker_core/src/event_publisher_ffi.rs +239 -0
  15. data/ext/tasker_core/src/ffi_logging.rs +245 -0
  16. data/ext/tasker_core/src/global_event_system.rs +16 -0
  17. data/ext/tasker_core/src/in_process_event_ffi.rs +319 -0
  18. data/ext/tasker_core/src/lib.rs +41 -0
  19. data/ext/tasker_core/src/observability_ffi.rs +339 -0
  20. data/lib/tasker_core/batch_processing/batch_aggregation_scenario.rb +85 -0
  21. data/lib/tasker_core/batch_processing/batch_worker_context.rb +238 -0
  22. data/lib/tasker_core/bootstrap.rb +394 -0
  23. data/lib/tasker_core/domain_events/base_publisher.rb +220 -0
  24. data/lib/tasker_core/domain_events/base_subscriber.rb +178 -0
  25. data/lib/tasker_core/domain_events/publisher_registry.rb +253 -0
  26. data/lib/tasker_core/domain_events/subscriber_registry.rb +152 -0
  27. data/lib/tasker_core/domain_events.rb +43 -0
  28. data/lib/tasker_core/errors/CLAUDE.md +7 -0
  29. data/lib/tasker_core/errors/common.rb +305 -0
  30. data/lib/tasker_core/errors/error_classifier.rb +61 -0
  31. data/lib/tasker_core/errors.rb +4 -0
  32. data/lib/tasker_core/event_bridge.rb +330 -0
  33. data/lib/tasker_core/handlers.rb +159 -0
  34. data/lib/tasker_core/internal.rb +31 -0
  35. data/lib/tasker_core/logger.rb +234 -0
  36. data/lib/tasker_core/models.rb +337 -0
  37. data/lib/tasker_core/observability/types.rb +158 -0
  38. data/lib/tasker_core/observability.rb +292 -0
  39. data/lib/tasker_core/registry/handler_registry.rb +453 -0
  40. data/lib/tasker_core/registry/resolver_chain.rb +258 -0
  41. data/lib/tasker_core/registry/resolvers/base_resolver.rb +90 -0
  42. data/lib/tasker_core/registry/resolvers/class_constant_resolver.rb +156 -0
  43. data/lib/tasker_core/registry/resolvers/explicit_mapping_resolver.rb +146 -0
  44. data/lib/tasker_core/registry/resolvers/method_dispatch_wrapper.rb +144 -0
  45. data/lib/tasker_core/registry/resolvers/registry_resolver.rb +229 -0
  46. data/lib/tasker_core/registry/resolvers.rb +42 -0
  47. data/lib/tasker_core/registry.rb +12 -0
  48. data/lib/tasker_core/step_handler/api.rb +48 -0
  49. data/lib/tasker_core/step_handler/base.rb +354 -0
  50. data/lib/tasker_core/step_handler/batchable.rb +50 -0
  51. data/lib/tasker_core/step_handler/decision.rb +53 -0
  52. data/lib/tasker_core/step_handler/mixins/api.rb +452 -0
  53. data/lib/tasker_core/step_handler/mixins/batchable.rb +465 -0
  54. data/lib/tasker_core/step_handler/mixins/decision.rb +252 -0
  55. data/lib/tasker_core/step_handler/mixins.rb +66 -0
  56. data/lib/tasker_core/subscriber.rb +212 -0
  57. data/lib/tasker_core/task_handler/base.rb +254 -0
  58. data/lib/tasker_core/tasker_rb.so +0 -0
  59. data/lib/tasker_core/template_discovery.rb +181 -0
  60. data/lib/tasker_core/tracing.rb +166 -0
  61. data/lib/tasker_core/types/batch_processing_outcome.rb +301 -0
  62. data/lib/tasker_core/types/client_types.rb +145 -0
  63. data/lib/tasker_core/types/decision_point_outcome.rb +177 -0
  64. data/lib/tasker_core/types/error_types.rb +72 -0
  65. data/lib/tasker_core/types/simple_message.rb +151 -0
  66. data/lib/tasker_core/types/step_context.rb +328 -0
  67. data/lib/tasker_core/types/step_handler_call_result.rb +307 -0
  68. data/lib/tasker_core/types/step_message.rb +112 -0
  69. data/lib/tasker_core/types/step_types.rb +207 -0
  70. data/lib/tasker_core/types/task_template.rb +240 -0
  71. data/lib/tasker_core/types/task_types.rb +148 -0
  72. data/lib/tasker_core/types.rb +132 -0
  73. data/lib/tasker_core/version.rb +13 -0
  74. data/lib/tasker_core/worker/CLAUDE.md +7 -0
  75. data/lib/tasker_core/worker/event_poller.rb +224 -0
  76. data/lib/tasker_core/worker/in_process_domain_event_poller.rb +271 -0
  77. data/lib/tasker_core.rb +160 -0
  78. metadata +322 -0
@@ -0,0 +1,465 @@
1
+ # frozen_string_literal: true
2
+
3
+ module TaskerCore
4
+ module StepHandler
5
+ module Mixins
6
+ # Batchable mixin for batch processing handlers
7
+ #
8
+ # ## TAS-112: Composition Pattern
9
+ #
10
+ # This module follows the composition-over-inheritance pattern. Instead of
11
+ # inheriting from a specialized Batchable handler class, include this mixin
12
+ # in your Base handler.
13
+ #
14
+ # ## TAS-112: 0-Indexed Cursors (BREAKING CHANGE)
15
+ #
16
+ # As of TAS-112, cursor indexing is 0-based to match Python, TypeScript, and Rust.
17
+ # Previously Ruby used 1-based indexing.
18
+ #
19
+ # ## Usage
20
+ #
21
+ # ```ruby
22
+ # class CsvBatchProcessorHandler < TaskerCore::StepHandler::Base
23
+ # include TaskerCore::StepHandler::Mixins::Batchable
24
+ #
25
+ # def call(context)
26
+ # batch_ctx = get_batch_context(context)
27
+ #
28
+ # # Handle no-op placeholder
29
+ # no_op_result = handle_no_op_worker(batch_ctx)
30
+ # return no_op_result if no_op_result
31
+ #
32
+ # # Get dependency results
33
+ # csv_file = context.get_dependency_result('analyze_csv')&.dig('csv_file_path')
34
+ #
35
+ # # Handler-specific processing...
36
+ # end
37
+ # end
38
+ # ```
39
+ #
40
+ # ## IMPORTANT: Outcome Helper Methods Return Success Objects
41
+ #
42
+ # The outcome helper methods return fully-wrapped Success objects:
43
+ #
44
+ # ```ruby
45
+ # def call(context)
46
+ # if dataset_empty?
47
+ # return no_batches_outcome(reason: 'empty_dataset') # Returns Success
48
+ # end
49
+ # end
50
+ # ```
51
+ module Batchable
52
+ # Hook called when module is included
53
+ def self.included(base)
54
+ base.extend(ClassMethods)
55
+ end
56
+
57
+ # Class methods added to including class
58
+ module ClassMethods
59
+ # No class methods needed for now
60
+ end
61
+
62
+ # Override capabilities to include batch-specific features
63
+ def capabilities
64
+ super + %w[batchable batch_processing parallel_execution cursor_based deferred_convergence]
65
+ end
66
+
67
+ # ========================================================================
68
+ # Category 1: Context Extraction Helpers
69
+ # ========================================================================
70
+
71
+ # Cross-language standard: Extract batch context from step context
72
+ #
73
+ # @param context [TaskerCore::Types::StepContext] Step execution context
74
+ # @return [BatchWorkerContext] Extracted batch context
75
+ #
76
+ # @example
77
+ # batch_ctx = get_batch_context(context)
78
+ # batch_id = batch_ctx.batch_id
79
+ # start = batch_ctx.start_cursor
80
+ def get_batch_context(context)
81
+ BatchProcessing::BatchWorkerContext.from_step_data(context.workflow_step)
82
+ end
83
+
84
+ # Detect batch aggregation scenario from dependency results
85
+ #
86
+ # @param sequence [DependencyResultsWrapper] Dependency results to analyze
87
+ # @param analyzer_step_name [String] Name of the analyzer step
88
+ # @param batch_worker_prefix [String] Prefix for batch worker step names
89
+ # @return [BatchAggregationScenario] Detected scenario (NoBatches or WithBatches)
90
+ #
91
+ # @example
92
+ # scenario = detect_aggregation_scenario(sequence, 'analyze_csv', 'process_csv_batch_')
93
+ # if scenario.no_batches?
94
+ # return no_batches_aggregation_result
95
+ # end
96
+ def detect_aggregation_scenario(sequence, analyzer_step_name, batch_worker_prefix)
97
+ BatchProcessing::BatchAggregationScenario.detect(
98
+ sequence,
99
+ analyzer_step_name,
100
+ batch_worker_prefix
101
+ )
102
+ end
103
+
104
+ # Extract dependency result with safe navigation
105
+ #
106
+ # @param sequence [DependencyResultsWrapper] Dependency results
107
+ # @param step_name [String] Name of the dependency step
108
+ # @param keys [Array<String>] Optional keys to dig into result hash
109
+ # @return [Object, nil] Extracted result or nil if not found
110
+ #
111
+ # @example
112
+ # result = get_dependency_result(sequence, 'analyze_csv')
113
+ # csv_path = get_dependency_result(sequence, 'analyze_csv', 'csv_file_path')
114
+ def get_dependency_result(sequence, step_name, *keys)
115
+ result = sequence.get_results(step_name)
116
+ keys.empty? ? result : result&.dig(*keys)
117
+ end
118
+
119
+ # ========================================================================
120
+ # Category 2: No-Op Worker Handling
121
+ # ========================================================================
122
+
123
+ # Handle no-op placeholder worker scenario
124
+ #
125
+ # Returns a success result if the worker is a no-op placeholder,
126
+ # otherwise returns nil to allow normal processing to continue.
127
+ #
128
+ # @param context [BatchWorkerContext] Cursor context
129
+ # @return [StepHandlerCallResult::Success, nil] Success result if no-op, nil otherwise
130
+ #
131
+ # @example
132
+ # context = get_batch_context(step)
133
+ # no_op_result = handle_no_op_worker(context)
134
+ # return no_op_result if no_op_result
135
+ def handle_no_op_worker(context)
136
+ return nil unless context.no_op?
137
+
138
+ success(
139
+ result: {
140
+ 'batch_id' => context.batch_id,
141
+ 'no_op' => true,
142
+ 'processed_count' => 0
143
+ }
144
+ )
145
+ end
146
+
147
+ # ========================================================================
148
+ # Category 3: Cursor Config Creation
149
+ # ========================================================================
150
+
151
+ # Create standard cursor configurations for batch workers
152
+ #
153
+ # Divides total items into roughly equal ranges for each worker.
154
+ # Supports optional customization via block.
155
+ #
156
+ # ## TAS-112: 0-Indexed Cursors (BREAKING CHANGE)
157
+ #
158
+ # As of TAS-112, cursor indexing is 0-based to match Python, TypeScript, and Rust.
159
+ #
160
+ # ## Cursor Boundary Math
161
+ #
162
+ # The method divides total_items into worker_count roughly equal ranges using
163
+ # ceiling division to ensure all items are covered:
164
+ #
165
+ # 1. items_per_worker = ceil(total_items / worker_count)
166
+ # 2. For worker i (0-indexed):
167
+ # - start_cursor = i * items_per_worker (0-indexed)
168
+ # - end_cursor = min(start_cursor + items_per_worker, total_items) (exclusive)
169
+ # - batch_size = end_cursor - start_cursor
170
+ #
171
+ # Example: 1000 items, 3 workers
172
+ # - items_per_worker = ceil(1000/3) = 334
173
+ # - Worker 0: start=0, end=334, size=334
174
+ # - Worker 1: start=334, end=668, size=334
175
+ # - Worker 2: start=668, end=1000, size=332
176
+ #
177
+ # @param total_items [Integer] Total number of items to process
178
+ # @param worker_count [Integer] Number of workers to create configs for (must be > 0)
179
+ # @yield [config, index] Optional block to customize each config
180
+ # @yieldparam config [Hash] Cursor config being created
181
+ # @yieldparam index [Integer] Worker index (0-based)
182
+ # @return [Array<Hash>] Array of cursor configurations
183
+ #
184
+ # @example Basic usage (numeric cursors)
185
+ # configs = create_cursor_configs(1000, 5)
186
+ # # => [
187
+ # # { 'batch_id' => '001', 'start_cursor' => 0, 'end_cursor' => 200, 'batch_size' => 200 },
188
+ # # { 'batch_id' => '002', 'start_cursor' => 200, 'end_cursor' => 400, 'batch_size' => 200 },
189
+ # # ...
190
+ # # ]
191
+ #
192
+ # @example With metadata customization
193
+ # configs = create_cursor_configs(1000, 5) do |config, i|
194
+ # config['worker_name'] = "worker_#{i + 1}"
195
+ # end
196
+ #
197
+ # @example Alphanumeric cursors (alphabetical ranges)
198
+ # alphabet_ranges = [['A', 'F'], ['G', 'M'], ['N', 'S'], ['T', 'Z']]
199
+ # configs = create_cursor_configs(alphabet_ranges.size, alphabet_ranges.size) do |config, i|
200
+ # config['start_cursor'] = alphabet_ranges[i][0]
201
+ # config['end_cursor'] = alphabet_ranges[i][1]
202
+ # config.delete('batch_size')
203
+ # end
204
+ def create_cursor_configs(total_items, worker_count)
205
+ raise ArgumentError, 'worker_count must be > 0' if worker_count <= 0
206
+
207
+ items_per_worker = (total_items.to_f / worker_count).ceil
208
+
209
+ (0...worker_count).map do |i|
210
+ # TAS-112: 0-indexed cursors (BREAKING CHANGE from 1-indexed)
211
+ start_position = i * items_per_worker
212
+ end_position = [start_position + items_per_worker, total_items].min
213
+
214
+ config = {
215
+ 'batch_id' => format('%03d', i + 1),
216
+ 'start_cursor' => start_position,
217
+ 'end_cursor' => end_position,
218
+ 'batch_size' => end_position - start_position
219
+ }
220
+
221
+ # Allow customization via block
222
+ yield(config, i) if block_given?
223
+ config
224
+ end
225
+ end
226
+
227
+ # ========================================================================
228
+ # Category 4: Standard Outcome Builders
229
+ # ========================================================================
230
+
231
+ # Create NoBatches outcome for analyzer steps
232
+ #
233
+ # @param reason [String] Reason why batching is not needed
234
+ # @param metadata [Hash] Additional metadata to include in result
235
+ # @return [StepHandlerCallResult::Success] Success result with NoBatches outcome
236
+ #
237
+ # @example
238
+ # return no_batches_outcome(
239
+ # reason: 'dataset_too_small',
240
+ # metadata: { 'total_rows' => 0 }
241
+ # )
242
+ def no_batches_outcome(reason:, metadata: {})
243
+ outcome = TaskerCore::Types::BatchProcessingOutcome.no_batches
244
+
245
+ success(
246
+ result: {
247
+ 'batch_processing_outcome' => outcome.to_h,
248
+ 'reason' => reason
249
+ }.merge(metadata)
250
+ )
251
+ end
252
+
253
+ # Create CreateBatches outcome for analyzer steps
254
+ #
255
+ # @param worker_template_name [String] Name of worker template to use
256
+ # @param cursor_configs [Array<Hash>] Array of cursor configurations
257
+ # @param total_items [Integer] Total number of items to process
258
+ # @param metadata [Hash] Additional metadata to include in result
259
+ # @return [StepHandlerCallResult::Success] Success result with CreateBatches outcome
260
+ #
261
+ # @example
262
+ # cursor_configs = create_cursor_configs(1000, 5)
263
+ # return create_batches_outcome(
264
+ # worker_template_name: 'process_csv_batch',
265
+ # cursor_configs: cursor_configs,
266
+ # total_items: 1000,
267
+ # metadata: { 'csv_file_path' => '/path/to/file.csv' }
268
+ # )
269
+ def create_batches_outcome(worker_template_name:, cursor_configs:, total_items:, metadata: {})
270
+ outcome = TaskerCore::Types::BatchProcessingOutcome.create_batches(
271
+ worker_template_name: worker_template_name,
272
+ worker_count: cursor_configs.size,
273
+ cursor_configs: cursor_configs,
274
+ total_items: total_items
275
+ )
276
+
277
+ success(
278
+ result: {
279
+ 'batch_processing_outcome' => outcome.to_h,
280
+ 'worker_count' => cursor_configs.size,
281
+ 'total_items' => total_items
282
+ }.merge(metadata)
283
+ )
284
+ end
285
+
286
+ # Cross-language standard: Return success result for batch worker
287
+ #
288
+ # @param items_processed [Integer] Number of items processed
289
+ # @param items_succeeded [Integer] Number of items that succeeded
290
+ # @param items_failed [Integer] Number of items that failed (default 0)
291
+ # @param items_skipped [Integer] Number of items skipped (default 0)
292
+ # @param last_cursor [Object, nil] Last cursor position processed
293
+ # @param results [Array, nil] Optional array of result items
294
+ # @param errors [Array, nil] Optional array of error items
295
+ # @param metadata [Hash] Additional metadata
296
+ # @return [StepHandlerCallResult::Success] Success result with batch worker outcome
297
+ #
298
+ # @example
299
+ # batch_worker_success(
300
+ # items_processed: 100,
301
+ # items_succeeded: 98,
302
+ # items_failed: 2,
303
+ # last_cursor: 500,
304
+ # metadata: { batch_id: '001' }
305
+ # )
306
+ def batch_worker_success(
307
+ items_processed:,
308
+ items_succeeded:,
309
+ items_failed: 0,
310
+ items_skipped: 0,
311
+ last_cursor: nil,
312
+ results: nil,
313
+ errors: nil,
314
+ metadata: {}
315
+ )
316
+ result_data = {
317
+ 'items_processed' => items_processed,
318
+ 'items_succeeded' => items_succeeded,
319
+ 'items_failed' => items_failed,
320
+ 'items_skipped' => items_skipped
321
+ }
322
+
323
+ result_data['last_cursor'] = last_cursor if last_cursor
324
+ result_data['results'] = results if results
325
+ result_data['errors'] = errors if errors
326
+
327
+ success(
328
+ result: result_data.merge(metadata),
329
+ metadata: { batch_worker: true }
330
+ )
331
+ end
332
+
333
+ # TAS-125: Yield checkpoint for batch processing
334
+ #
335
+ # Use this method when your handler needs to persist progress and be
336
+ # re-dispatched for continued processing. This is useful for:
337
+ # - Processing very large datasets that exceed memory limits
338
+ # - Providing progress visibility for long-running batch jobs
339
+ # - Enabling graceful shutdown with resumption capability
340
+ #
341
+ # Unlike batch_worker_success, this does NOT complete the step.
342
+ # Instead, it persists the checkpoint and causes the step to be
343
+ # re-dispatched with the updated checkpoint context.
344
+ #
345
+ # @param cursor [Integer, String, Hash] Position to resume from
346
+ # - Integer: For offset-based pagination (row number)
347
+ # - String: For cursor-based pagination (opaque token)
348
+ # - Hash: For complex cursors (e.g., { page_token: "..." })
349
+ # @param items_processed [Integer] Total items processed so far (cumulative)
350
+ # @param accumulated_results [Hash, nil] Partial aggregations to carry forward
351
+ # @return [StepHandlerCallResult::CheckpointYield] Checkpoint yield result
352
+ #
353
+ # @example Simple offset checkpoint
354
+ # def call(context)
355
+ # batch_ctx = get_batch_context(context)
356
+ # start = batch_ctx.checkpoint_cursor || batch_ctx.start_cursor
357
+ # accumulated = batch_ctx.accumulated_results || { 'total' => 0 }
358
+ #
359
+ # # Process a chunk
360
+ # chunk_size = 1000
361
+ # items.each_with_index do |item, idx|
362
+ # break if idx >= chunk_size
363
+ # process(item)
364
+ # accumulated['total'] += item.value
365
+ # end
366
+ #
367
+ # new_cursor = start + chunk_size
368
+ # if new_cursor < batch_ctx.end_cursor
369
+ # # More work to do - yield checkpoint
370
+ # return checkpoint_yield(
371
+ # cursor: new_cursor,
372
+ # items_processed: new_cursor,
373
+ # accumulated_results: accumulated
374
+ # )
375
+ # end
376
+ #
377
+ # # Done - return final success
378
+ # batch_worker_success(
379
+ # items_processed: batch_ctx.batch_size,
380
+ # items_succeeded: batch_ctx.batch_size,
381
+ # metadata: accumulated
382
+ # )
383
+ # end
384
+ def checkpoint_yield(cursor:, items_processed:, accumulated_results: nil)
385
+ TaskerCore::Types::StepHandlerCallResult.checkpoint_yield(
386
+ cursor: cursor,
387
+ items_processed: items_processed,
388
+ accumulated_results: accumulated_results
389
+ )
390
+ end
391
+
392
+ # ========================================================================
393
+ # Category 5: Aggregation Helpers
394
+ # ========================================================================
395
+
396
+ # Create no-batches aggregation result
397
+ #
398
+ # Used by aggregator steps when no batch processing occurred.
399
+ #
400
+ # @param metadata [Hash] Additional metadata (typically zero metrics)
401
+ # @return [StepHandlerCallResult::Success] Success result for NoBatches scenario
402
+ #
403
+ # @example
404
+ # return no_batches_aggregation_result(
405
+ # metadata: {
406
+ # 'total_processed' => 0,
407
+ # 'total_value' => 0.0
408
+ # }
409
+ # )
410
+ def no_batches_aggregation_result(metadata: {})
411
+ success(
412
+ result: {
413
+ 'worker_count' => 0,
414
+ 'scenario' => 'no_batches'
415
+ }.merge(metadata)
416
+ )
417
+ end
418
+
419
+ # Aggregate batch worker results
420
+ #
421
+ # Handles both NoBatches and WithBatches scenarios.
422
+ #
423
+ # @param scenario [BatchAggregationScenario] Detected scenario
424
+ # @param zero_metrics [Hash] Metrics to return for NoBatches scenario
425
+ # @yield [batch_results] Block to perform custom aggregation
426
+ # @yieldparam batch_results [Hash] Hash of worker results
427
+ # @yieldreturn [Hash] Aggregated metrics
428
+ # @return [StepHandlerCallResult::Success] Success result with aggregated data
429
+ #
430
+ # @example Sum aggregation
431
+ # scenario = detect_aggregation_scenario(sequence, 'analyze_csv', 'process_csv_batch_')
432
+ #
433
+ # aggregate_batch_worker_results(
434
+ # scenario,
435
+ # zero_metrics: { 'total_processed' => 0, 'total_value' => 0.0 }
436
+ # ) do |batch_results|
437
+ # total_processed = 0
438
+ # total_value = 0.0
439
+ #
440
+ # batch_results.each_value do |result|
441
+ # total_processed += result['processed_count'] || 0
442
+ # total_value += result['total_value'] || 0.0
443
+ # end
444
+ #
445
+ # {
446
+ # 'total_processed' => total_processed,
447
+ # 'total_value' => total_value
448
+ # }
449
+ # end
450
+ def aggregate_batch_worker_results(scenario, zero_metrics: {})
451
+ return no_batches_aggregation_result(metadata: zero_metrics) if scenario.no_batches?
452
+
453
+ aggregated = yield(scenario.batch_results)
454
+
455
+ success(
456
+ result: aggregated.merge(
457
+ 'worker_count' => scenario.worker_count,
458
+ 'scenario' => 'with_batches'
459
+ )
460
+ )
461
+ end
462
+ end
463
+ end
464
+ end
465
+ end