tasker-rb 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/DEVELOPMENT.md +548 -0
- data/README.md +87 -0
- data/ext/tasker_core/Cargo.lock +4720 -0
- data/ext/tasker_core/Cargo.toml +76 -0
- data/ext/tasker_core/extconf.rb +38 -0
- data/ext/tasker_core/src/CLAUDE.md +7 -0
- data/ext/tasker_core/src/bootstrap.rs +320 -0
- data/ext/tasker_core/src/bridge.rs +400 -0
- data/ext/tasker_core/src/client_ffi.rs +173 -0
- data/ext/tasker_core/src/conversions.rs +131 -0
- data/ext/tasker_core/src/diagnostics.rs +57 -0
- data/ext/tasker_core/src/event_handler.rs +179 -0
- data/ext/tasker_core/src/event_publisher_ffi.rs +239 -0
- data/ext/tasker_core/src/ffi_logging.rs +245 -0
- data/ext/tasker_core/src/global_event_system.rs +16 -0
- data/ext/tasker_core/src/in_process_event_ffi.rs +319 -0
- data/ext/tasker_core/src/lib.rs +41 -0
- data/ext/tasker_core/src/observability_ffi.rs +339 -0
- data/lib/tasker_core/batch_processing/batch_aggregation_scenario.rb +85 -0
- data/lib/tasker_core/batch_processing/batch_worker_context.rb +238 -0
- data/lib/tasker_core/bootstrap.rb +394 -0
- data/lib/tasker_core/domain_events/base_publisher.rb +220 -0
- data/lib/tasker_core/domain_events/base_subscriber.rb +178 -0
- data/lib/tasker_core/domain_events/publisher_registry.rb +253 -0
- data/lib/tasker_core/domain_events/subscriber_registry.rb +152 -0
- data/lib/tasker_core/domain_events.rb +43 -0
- data/lib/tasker_core/errors/CLAUDE.md +7 -0
- data/lib/tasker_core/errors/common.rb +305 -0
- data/lib/tasker_core/errors/error_classifier.rb +61 -0
- data/lib/tasker_core/errors.rb +4 -0
- data/lib/tasker_core/event_bridge.rb +330 -0
- data/lib/tasker_core/handlers.rb +159 -0
- data/lib/tasker_core/internal.rb +31 -0
- data/lib/tasker_core/logger.rb +234 -0
- data/lib/tasker_core/models.rb +337 -0
- data/lib/tasker_core/observability/types.rb +158 -0
- data/lib/tasker_core/observability.rb +292 -0
- data/lib/tasker_core/registry/handler_registry.rb +453 -0
- data/lib/tasker_core/registry/resolver_chain.rb +258 -0
- data/lib/tasker_core/registry/resolvers/base_resolver.rb +90 -0
- data/lib/tasker_core/registry/resolvers/class_constant_resolver.rb +156 -0
- data/lib/tasker_core/registry/resolvers/explicit_mapping_resolver.rb +146 -0
- data/lib/tasker_core/registry/resolvers/method_dispatch_wrapper.rb +144 -0
- data/lib/tasker_core/registry/resolvers/registry_resolver.rb +229 -0
- data/lib/tasker_core/registry/resolvers.rb +42 -0
- data/lib/tasker_core/registry.rb +12 -0
- data/lib/tasker_core/step_handler/api.rb +48 -0
- data/lib/tasker_core/step_handler/base.rb +354 -0
- data/lib/tasker_core/step_handler/batchable.rb +50 -0
- data/lib/tasker_core/step_handler/decision.rb +53 -0
- data/lib/tasker_core/step_handler/mixins/api.rb +452 -0
- data/lib/tasker_core/step_handler/mixins/batchable.rb +465 -0
- data/lib/tasker_core/step_handler/mixins/decision.rb +252 -0
- data/lib/tasker_core/step_handler/mixins.rb +66 -0
- data/lib/tasker_core/subscriber.rb +212 -0
- data/lib/tasker_core/task_handler/base.rb +254 -0
- data/lib/tasker_core/tasker_rb.so +0 -0
- data/lib/tasker_core/template_discovery.rb +181 -0
- data/lib/tasker_core/tracing.rb +166 -0
- data/lib/tasker_core/types/batch_processing_outcome.rb +301 -0
- data/lib/tasker_core/types/client_types.rb +145 -0
- data/lib/tasker_core/types/decision_point_outcome.rb +177 -0
- data/lib/tasker_core/types/error_types.rb +72 -0
- data/lib/tasker_core/types/simple_message.rb +151 -0
- data/lib/tasker_core/types/step_context.rb +328 -0
- data/lib/tasker_core/types/step_handler_call_result.rb +307 -0
- data/lib/tasker_core/types/step_message.rb +112 -0
- data/lib/tasker_core/types/step_types.rb +207 -0
- data/lib/tasker_core/types/task_template.rb +240 -0
- data/lib/tasker_core/types/task_types.rb +148 -0
- data/lib/tasker_core/types.rb +132 -0
- data/lib/tasker_core/version.rb +13 -0
- data/lib/tasker_core/worker/CLAUDE.md +7 -0
- data/lib/tasker_core/worker/event_poller.rb +224 -0
- data/lib/tasker_core/worker/in_process_domain_event_poller.rb +271 -0
- data/lib/tasker_core.rb +160 -0
- metadata +322 -0
|
@@ -0,0 +1,465 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module TaskerCore
|
|
4
|
+
module StepHandler
|
|
5
|
+
module Mixins
|
|
6
|
+
# Batchable mixin for batch processing handlers
|
|
7
|
+
#
|
|
8
|
+
# ## TAS-112: Composition Pattern
|
|
9
|
+
#
|
|
10
|
+
# This module follows the composition-over-inheritance pattern. Instead of
|
|
11
|
+
# inheriting from a specialized Batchable handler class, include this mixin
|
|
12
|
+
# in your Base handler.
|
|
13
|
+
#
|
|
14
|
+
# ## TAS-112: 0-Indexed Cursors (BREAKING CHANGE)
|
|
15
|
+
#
|
|
16
|
+
# As of TAS-112, cursor indexing is 0-based to match Python, TypeScript, and Rust.
|
|
17
|
+
# Previously Ruby used 1-based indexing.
|
|
18
|
+
#
|
|
19
|
+
# ## Usage
|
|
20
|
+
#
|
|
21
|
+
# ```ruby
|
|
22
|
+
# class CsvBatchProcessorHandler < TaskerCore::StepHandler::Base
|
|
23
|
+
# include TaskerCore::StepHandler::Mixins::Batchable
|
|
24
|
+
#
|
|
25
|
+
# def call(context)
|
|
26
|
+
# batch_ctx = get_batch_context(context)
|
|
27
|
+
#
|
|
28
|
+
# # Handle no-op placeholder
|
|
29
|
+
# no_op_result = handle_no_op_worker(batch_ctx)
|
|
30
|
+
# return no_op_result if no_op_result
|
|
31
|
+
#
|
|
32
|
+
# # Get dependency results
|
|
33
|
+
# csv_file = context.get_dependency_result('analyze_csv')&.dig('csv_file_path')
|
|
34
|
+
#
|
|
35
|
+
# # Handler-specific processing...
|
|
36
|
+
# end
|
|
37
|
+
# end
|
|
38
|
+
# ```
|
|
39
|
+
#
|
|
40
|
+
# ## IMPORTANT: Outcome Helper Methods Return Success Objects
|
|
41
|
+
#
|
|
42
|
+
# The outcome helper methods return fully-wrapped Success objects:
|
|
43
|
+
#
|
|
44
|
+
# ```ruby
|
|
45
|
+
# def call(context)
|
|
46
|
+
# if dataset_empty?
|
|
47
|
+
# return no_batches_outcome(reason: 'empty_dataset') # Returns Success
|
|
48
|
+
# end
|
|
49
|
+
# end
|
|
50
|
+
# ```
|
|
51
|
+
module Batchable
|
|
52
|
+
# Hook called when module is included
|
|
53
|
+
def self.included(base)
|
|
54
|
+
base.extend(ClassMethods)
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Class methods added to including class
|
|
58
|
+
module ClassMethods
|
|
59
|
+
# No class methods needed for now
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Override capabilities to include batch-specific features
|
|
63
|
+
def capabilities
|
|
64
|
+
super + %w[batchable batch_processing parallel_execution cursor_based deferred_convergence]
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# ========================================================================
|
|
68
|
+
# Category 1: Context Extraction Helpers
|
|
69
|
+
# ========================================================================
|
|
70
|
+
|
|
71
|
+
# Cross-language standard: Extract batch context from step context
|
|
72
|
+
#
|
|
73
|
+
# @param context [TaskerCore::Types::StepContext] Step execution context
|
|
74
|
+
# @return [BatchWorkerContext] Extracted batch context
|
|
75
|
+
#
|
|
76
|
+
# @example
|
|
77
|
+
# batch_ctx = get_batch_context(context)
|
|
78
|
+
# batch_id = batch_ctx.batch_id
|
|
79
|
+
# start = batch_ctx.start_cursor
|
|
80
|
+
def get_batch_context(context)
|
|
81
|
+
BatchProcessing::BatchWorkerContext.from_step_data(context.workflow_step)
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Detect batch aggregation scenario from dependency results
|
|
85
|
+
#
|
|
86
|
+
# @param sequence [DependencyResultsWrapper] Dependency results to analyze
|
|
87
|
+
# @param analyzer_step_name [String] Name of the analyzer step
|
|
88
|
+
# @param batch_worker_prefix [String] Prefix for batch worker step names
|
|
89
|
+
# @return [BatchAggregationScenario] Detected scenario (NoBatches or WithBatches)
|
|
90
|
+
#
|
|
91
|
+
# @example
|
|
92
|
+
# scenario = detect_aggregation_scenario(sequence, 'analyze_csv', 'process_csv_batch_')
|
|
93
|
+
# if scenario.no_batches?
|
|
94
|
+
# return no_batches_aggregation_result
|
|
95
|
+
# end
|
|
96
|
+
def detect_aggregation_scenario(sequence, analyzer_step_name, batch_worker_prefix)
|
|
97
|
+
BatchProcessing::BatchAggregationScenario.detect(
|
|
98
|
+
sequence,
|
|
99
|
+
analyzer_step_name,
|
|
100
|
+
batch_worker_prefix
|
|
101
|
+
)
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# Extract dependency result with safe navigation
|
|
105
|
+
#
|
|
106
|
+
# @param sequence [DependencyResultsWrapper] Dependency results
|
|
107
|
+
# @param step_name [String] Name of the dependency step
|
|
108
|
+
# @param keys [Array<String>] Optional keys to dig into result hash
|
|
109
|
+
# @return [Object, nil] Extracted result or nil if not found
|
|
110
|
+
#
|
|
111
|
+
# @example
|
|
112
|
+
# result = get_dependency_result(sequence, 'analyze_csv')
|
|
113
|
+
# csv_path = get_dependency_result(sequence, 'analyze_csv', 'csv_file_path')
|
|
114
|
+
def get_dependency_result(sequence, step_name, *keys)
|
|
115
|
+
result = sequence.get_results(step_name)
|
|
116
|
+
keys.empty? ? result : result&.dig(*keys)
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
# ========================================================================
|
|
120
|
+
# Category 2: No-Op Worker Handling
|
|
121
|
+
# ========================================================================
|
|
122
|
+
|
|
123
|
+
# Handle no-op placeholder worker scenario
|
|
124
|
+
#
|
|
125
|
+
# Returns a success result if the worker is a no-op placeholder,
|
|
126
|
+
# otherwise returns nil to allow normal processing to continue.
|
|
127
|
+
#
|
|
128
|
+
# @param context [BatchWorkerContext] Cursor context
|
|
129
|
+
# @return [StepHandlerCallResult::Success, nil] Success result if no-op, nil otherwise
|
|
130
|
+
#
|
|
131
|
+
# @example
|
|
132
|
+
# context = get_batch_context(step)
|
|
133
|
+
# no_op_result = handle_no_op_worker(context)
|
|
134
|
+
# return no_op_result if no_op_result
|
|
135
|
+
def handle_no_op_worker(context)
|
|
136
|
+
return nil unless context.no_op?
|
|
137
|
+
|
|
138
|
+
success(
|
|
139
|
+
result: {
|
|
140
|
+
'batch_id' => context.batch_id,
|
|
141
|
+
'no_op' => true,
|
|
142
|
+
'processed_count' => 0
|
|
143
|
+
}
|
|
144
|
+
)
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
# ========================================================================
|
|
148
|
+
# Category 3: Cursor Config Creation
|
|
149
|
+
# ========================================================================
|
|
150
|
+
|
|
151
|
+
# Create standard cursor configurations for batch workers
|
|
152
|
+
#
|
|
153
|
+
# Divides total items into roughly equal ranges for each worker.
|
|
154
|
+
# Supports optional customization via block.
|
|
155
|
+
#
|
|
156
|
+
# ## TAS-112: 0-Indexed Cursors (BREAKING CHANGE)
|
|
157
|
+
#
|
|
158
|
+
# As of TAS-112, cursor indexing is 0-based to match Python, TypeScript, and Rust.
|
|
159
|
+
#
|
|
160
|
+
# ## Cursor Boundary Math
|
|
161
|
+
#
|
|
162
|
+
# The method divides total_items into worker_count roughly equal ranges using
|
|
163
|
+
# ceiling division to ensure all items are covered:
|
|
164
|
+
#
|
|
165
|
+
# 1. items_per_worker = ceil(total_items / worker_count)
|
|
166
|
+
# 2. For worker i (0-indexed):
|
|
167
|
+
# - start_cursor = i * items_per_worker (0-indexed)
|
|
168
|
+
# - end_cursor = min(start_cursor + items_per_worker, total_items) (exclusive)
|
|
169
|
+
# - batch_size = end_cursor - start_cursor
|
|
170
|
+
#
|
|
171
|
+
# Example: 1000 items, 3 workers
|
|
172
|
+
# - items_per_worker = ceil(1000/3) = 334
|
|
173
|
+
# - Worker 0: start=0, end=334, size=334
|
|
174
|
+
# - Worker 1: start=334, end=668, size=334
|
|
175
|
+
# - Worker 2: start=668, end=1000, size=332
|
|
176
|
+
#
|
|
177
|
+
# @param total_items [Integer] Total number of items to process
|
|
178
|
+
# @param worker_count [Integer] Number of workers to create configs for (must be > 0)
|
|
179
|
+
# @yield [config, index] Optional block to customize each config
|
|
180
|
+
# @yieldparam config [Hash] Cursor config being created
|
|
181
|
+
# @yieldparam index [Integer] Worker index (0-based)
|
|
182
|
+
# @return [Array<Hash>] Array of cursor configurations
|
|
183
|
+
#
|
|
184
|
+
# @example Basic usage (numeric cursors)
|
|
185
|
+
# configs = create_cursor_configs(1000, 5)
|
|
186
|
+
# # => [
|
|
187
|
+
# # { 'batch_id' => '001', 'start_cursor' => 0, 'end_cursor' => 200, 'batch_size' => 200 },
|
|
188
|
+
# # { 'batch_id' => '002', 'start_cursor' => 200, 'end_cursor' => 400, 'batch_size' => 200 },
|
|
189
|
+
# # ...
|
|
190
|
+
# # ]
|
|
191
|
+
#
|
|
192
|
+
# @example With metadata customization
|
|
193
|
+
# configs = create_cursor_configs(1000, 5) do |config, i|
|
|
194
|
+
# config['worker_name'] = "worker_#{i + 1}"
|
|
195
|
+
# end
|
|
196
|
+
#
|
|
197
|
+
# @example Alphanumeric cursors (alphabetical ranges)
|
|
198
|
+
# alphabet_ranges = [['A', 'F'], ['G', 'M'], ['N', 'S'], ['T', 'Z']]
|
|
199
|
+
# configs = create_cursor_configs(alphabet_ranges.size, alphabet_ranges.size) do |config, i|
|
|
200
|
+
# config['start_cursor'] = alphabet_ranges[i][0]
|
|
201
|
+
# config['end_cursor'] = alphabet_ranges[i][1]
|
|
202
|
+
# config.delete('batch_size')
|
|
203
|
+
# end
|
|
204
|
+
def create_cursor_configs(total_items, worker_count)
|
|
205
|
+
raise ArgumentError, 'worker_count must be > 0' if worker_count <= 0
|
|
206
|
+
|
|
207
|
+
items_per_worker = (total_items.to_f / worker_count).ceil
|
|
208
|
+
|
|
209
|
+
(0...worker_count).map do |i|
|
|
210
|
+
# TAS-112: 0-indexed cursors (BREAKING CHANGE from 1-indexed)
|
|
211
|
+
start_position = i * items_per_worker
|
|
212
|
+
end_position = [start_position + items_per_worker, total_items].min
|
|
213
|
+
|
|
214
|
+
config = {
|
|
215
|
+
'batch_id' => format('%03d', i + 1),
|
|
216
|
+
'start_cursor' => start_position,
|
|
217
|
+
'end_cursor' => end_position,
|
|
218
|
+
'batch_size' => end_position - start_position
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
# Allow customization via block
|
|
222
|
+
yield(config, i) if block_given?
|
|
223
|
+
config
|
|
224
|
+
end
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
# ========================================================================
|
|
228
|
+
# Category 4: Standard Outcome Builders
|
|
229
|
+
# ========================================================================
|
|
230
|
+
|
|
231
|
+
# Create NoBatches outcome for analyzer steps
|
|
232
|
+
#
|
|
233
|
+
# @param reason [String] Reason why batching is not needed
|
|
234
|
+
# @param metadata [Hash] Additional metadata to include in result
|
|
235
|
+
# @return [StepHandlerCallResult::Success] Success result with NoBatches outcome
|
|
236
|
+
#
|
|
237
|
+
# @example
|
|
238
|
+
# return no_batches_outcome(
|
|
239
|
+
# reason: 'dataset_too_small',
|
|
240
|
+
# metadata: { 'total_rows' => 0 }
|
|
241
|
+
# )
|
|
242
|
+
def no_batches_outcome(reason:, metadata: {})
|
|
243
|
+
outcome = TaskerCore::Types::BatchProcessingOutcome.no_batches
|
|
244
|
+
|
|
245
|
+
success(
|
|
246
|
+
result: {
|
|
247
|
+
'batch_processing_outcome' => outcome.to_h,
|
|
248
|
+
'reason' => reason
|
|
249
|
+
}.merge(metadata)
|
|
250
|
+
)
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
# Create CreateBatches outcome for analyzer steps
|
|
254
|
+
#
|
|
255
|
+
# @param worker_template_name [String] Name of worker template to use
|
|
256
|
+
# @param cursor_configs [Array<Hash>] Array of cursor configurations
|
|
257
|
+
# @param total_items [Integer] Total number of items to process
|
|
258
|
+
# @param metadata [Hash] Additional metadata to include in result
|
|
259
|
+
# @return [StepHandlerCallResult::Success] Success result with CreateBatches outcome
|
|
260
|
+
#
|
|
261
|
+
# @example
|
|
262
|
+
# cursor_configs = create_cursor_configs(1000, 5)
|
|
263
|
+
# return create_batches_outcome(
|
|
264
|
+
# worker_template_name: 'process_csv_batch',
|
|
265
|
+
# cursor_configs: cursor_configs,
|
|
266
|
+
# total_items: 1000,
|
|
267
|
+
# metadata: { 'csv_file_path' => '/path/to/file.csv' }
|
|
268
|
+
# )
|
|
269
|
+
def create_batches_outcome(worker_template_name:, cursor_configs:, total_items:, metadata: {})
|
|
270
|
+
outcome = TaskerCore::Types::BatchProcessingOutcome.create_batches(
|
|
271
|
+
worker_template_name: worker_template_name,
|
|
272
|
+
worker_count: cursor_configs.size,
|
|
273
|
+
cursor_configs: cursor_configs,
|
|
274
|
+
total_items: total_items
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
success(
|
|
278
|
+
result: {
|
|
279
|
+
'batch_processing_outcome' => outcome.to_h,
|
|
280
|
+
'worker_count' => cursor_configs.size,
|
|
281
|
+
'total_items' => total_items
|
|
282
|
+
}.merge(metadata)
|
|
283
|
+
)
|
|
284
|
+
end
|
|
285
|
+
|
|
286
|
+
# Cross-language standard: Return success result for batch worker
|
|
287
|
+
#
|
|
288
|
+
# @param items_processed [Integer] Number of items processed
|
|
289
|
+
# @param items_succeeded [Integer] Number of items that succeeded
|
|
290
|
+
# @param items_failed [Integer] Number of items that failed (default 0)
|
|
291
|
+
# @param items_skipped [Integer] Number of items skipped (default 0)
|
|
292
|
+
# @param last_cursor [Object, nil] Last cursor position processed
|
|
293
|
+
# @param results [Array, nil] Optional array of result items
|
|
294
|
+
# @param errors [Array, nil] Optional array of error items
|
|
295
|
+
# @param metadata [Hash] Additional metadata
|
|
296
|
+
# @return [StepHandlerCallResult::Success] Success result with batch worker outcome
|
|
297
|
+
#
|
|
298
|
+
# @example
|
|
299
|
+
# batch_worker_success(
|
|
300
|
+
# items_processed: 100,
|
|
301
|
+
# items_succeeded: 98,
|
|
302
|
+
# items_failed: 2,
|
|
303
|
+
# last_cursor: 500,
|
|
304
|
+
# metadata: { batch_id: '001' }
|
|
305
|
+
# )
|
|
306
|
+
def batch_worker_success(
|
|
307
|
+
items_processed:,
|
|
308
|
+
items_succeeded:,
|
|
309
|
+
items_failed: 0,
|
|
310
|
+
items_skipped: 0,
|
|
311
|
+
last_cursor: nil,
|
|
312
|
+
results: nil,
|
|
313
|
+
errors: nil,
|
|
314
|
+
metadata: {}
|
|
315
|
+
)
|
|
316
|
+
result_data = {
|
|
317
|
+
'items_processed' => items_processed,
|
|
318
|
+
'items_succeeded' => items_succeeded,
|
|
319
|
+
'items_failed' => items_failed,
|
|
320
|
+
'items_skipped' => items_skipped
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
result_data['last_cursor'] = last_cursor if last_cursor
|
|
324
|
+
result_data['results'] = results if results
|
|
325
|
+
result_data['errors'] = errors if errors
|
|
326
|
+
|
|
327
|
+
success(
|
|
328
|
+
result: result_data.merge(metadata),
|
|
329
|
+
metadata: { batch_worker: true }
|
|
330
|
+
)
|
|
331
|
+
end
|
|
332
|
+
|
|
333
|
+
# TAS-125: Yield checkpoint for batch processing
|
|
334
|
+
#
|
|
335
|
+
# Use this method when your handler needs to persist progress and be
|
|
336
|
+
# re-dispatched for continued processing. This is useful for:
|
|
337
|
+
# - Processing very large datasets that exceed memory limits
|
|
338
|
+
# - Providing progress visibility for long-running batch jobs
|
|
339
|
+
# - Enabling graceful shutdown with resumption capability
|
|
340
|
+
#
|
|
341
|
+
# Unlike batch_worker_success, this does NOT complete the step.
|
|
342
|
+
# Instead, it persists the checkpoint and causes the step to be
|
|
343
|
+
# re-dispatched with the updated checkpoint context.
|
|
344
|
+
#
|
|
345
|
+
# @param cursor [Integer, String, Hash] Position to resume from
|
|
346
|
+
# - Integer: For offset-based pagination (row number)
|
|
347
|
+
# - String: For cursor-based pagination (opaque token)
|
|
348
|
+
# - Hash: For complex cursors (e.g., { page_token: "..." })
|
|
349
|
+
# @param items_processed [Integer] Total items processed so far (cumulative)
|
|
350
|
+
# @param accumulated_results [Hash, nil] Partial aggregations to carry forward
|
|
351
|
+
# @return [StepHandlerCallResult::CheckpointYield] Checkpoint yield result
|
|
352
|
+
#
|
|
353
|
+
# @example Simple offset checkpoint
|
|
354
|
+
# def call(context)
|
|
355
|
+
# batch_ctx = get_batch_context(context)
|
|
356
|
+
# start = batch_ctx.checkpoint_cursor || batch_ctx.start_cursor
|
|
357
|
+
# accumulated = batch_ctx.accumulated_results || { 'total' => 0 }
|
|
358
|
+
#
|
|
359
|
+
# # Process a chunk
|
|
360
|
+
# chunk_size = 1000
|
|
361
|
+
# items.each_with_index do |item, idx|
|
|
362
|
+
# break if idx >= chunk_size
|
|
363
|
+
# process(item)
|
|
364
|
+
# accumulated['total'] += item.value
|
|
365
|
+
# end
|
|
366
|
+
#
|
|
367
|
+
# new_cursor = start + chunk_size
|
|
368
|
+
# if new_cursor < batch_ctx.end_cursor
|
|
369
|
+
# # More work to do - yield checkpoint
|
|
370
|
+
# return checkpoint_yield(
|
|
371
|
+
# cursor: new_cursor,
|
|
372
|
+
# items_processed: new_cursor,
|
|
373
|
+
# accumulated_results: accumulated
|
|
374
|
+
# )
|
|
375
|
+
# end
|
|
376
|
+
#
|
|
377
|
+
# # Done - return final success
|
|
378
|
+
# batch_worker_success(
|
|
379
|
+
# items_processed: batch_ctx.batch_size,
|
|
380
|
+
# items_succeeded: batch_ctx.batch_size,
|
|
381
|
+
# metadata: accumulated
|
|
382
|
+
# )
|
|
383
|
+
# end
|
|
384
|
+
def checkpoint_yield(cursor:, items_processed:, accumulated_results: nil)
|
|
385
|
+
TaskerCore::Types::StepHandlerCallResult.checkpoint_yield(
|
|
386
|
+
cursor: cursor,
|
|
387
|
+
items_processed: items_processed,
|
|
388
|
+
accumulated_results: accumulated_results
|
|
389
|
+
)
|
|
390
|
+
end
|
|
391
|
+
|
|
392
|
+
# ========================================================================
|
|
393
|
+
# Category 5: Aggregation Helpers
|
|
394
|
+
# ========================================================================
|
|
395
|
+
|
|
396
|
+
# Create no-batches aggregation result
|
|
397
|
+
#
|
|
398
|
+
# Used by aggregator steps when no batch processing occurred.
|
|
399
|
+
#
|
|
400
|
+
# @param metadata [Hash] Additional metadata (typically zero metrics)
|
|
401
|
+
# @return [StepHandlerCallResult::Success] Success result for NoBatches scenario
|
|
402
|
+
#
|
|
403
|
+
# @example
|
|
404
|
+
# return no_batches_aggregation_result(
|
|
405
|
+
# metadata: {
|
|
406
|
+
# 'total_processed' => 0,
|
|
407
|
+
# 'total_value' => 0.0
|
|
408
|
+
# }
|
|
409
|
+
# )
|
|
410
|
+
def no_batches_aggregation_result(metadata: {})
|
|
411
|
+
success(
|
|
412
|
+
result: {
|
|
413
|
+
'worker_count' => 0,
|
|
414
|
+
'scenario' => 'no_batches'
|
|
415
|
+
}.merge(metadata)
|
|
416
|
+
)
|
|
417
|
+
end
|
|
418
|
+
|
|
419
|
+
# Aggregate batch worker results
|
|
420
|
+
#
|
|
421
|
+
# Handles both NoBatches and WithBatches scenarios.
|
|
422
|
+
#
|
|
423
|
+
# @param scenario [BatchAggregationScenario] Detected scenario
|
|
424
|
+
# @param zero_metrics [Hash] Metrics to return for NoBatches scenario
|
|
425
|
+
# @yield [batch_results] Block to perform custom aggregation
|
|
426
|
+
# @yieldparam batch_results [Hash] Hash of worker results
|
|
427
|
+
# @yieldreturn [Hash] Aggregated metrics
|
|
428
|
+
# @return [StepHandlerCallResult::Success] Success result with aggregated data
|
|
429
|
+
#
|
|
430
|
+
# @example Sum aggregation
|
|
431
|
+
# scenario = detect_aggregation_scenario(sequence, 'analyze_csv', 'process_csv_batch_')
|
|
432
|
+
#
|
|
433
|
+
# aggregate_batch_worker_results(
|
|
434
|
+
# scenario,
|
|
435
|
+
# zero_metrics: { 'total_processed' => 0, 'total_value' => 0.0 }
|
|
436
|
+
# ) do |batch_results|
|
|
437
|
+
# total_processed = 0
|
|
438
|
+
# total_value = 0.0
|
|
439
|
+
#
|
|
440
|
+
# batch_results.each_value do |result|
|
|
441
|
+
# total_processed += result['processed_count'] || 0
|
|
442
|
+
# total_value += result['total_value'] || 0.0
|
|
443
|
+
# end
|
|
444
|
+
#
|
|
445
|
+
# {
|
|
446
|
+
# 'total_processed' => total_processed,
|
|
447
|
+
# 'total_value' => total_value
|
|
448
|
+
# }
|
|
449
|
+
# end
|
|
450
|
+
def aggregate_batch_worker_results(scenario, zero_metrics: {})
|
|
451
|
+
return no_batches_aggregation_result(metadata: zero_metrics) if scenario.no_batches?
|
|
452
|
+
|
|
453
|
+
aggregated = yield(scenario.batch_results)
|
|
454
|
+
|
|
455
|
+
success(
|
|
456
|
+
result: aggregated.merge(
|
|
457
|
+
'worker_count' => scenario.worker_count,
|
|
458
|
+
'scenario' => 'with_batches'
|
|
459
|
+
)
|
|
460
|
+
)
|
|
461
|
+
end
|
|
462
|
+
end
|
|
463
|
+
end
|
|
464
|
+
end
|
|
465
|
+
end
|