ruby_reactor 0.2.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +132 -0
  3. data/Rakefile +2 -2
  4. data/documentation/data_pipelines.md +90 -84
  5. data/documentation/testing.md +812 -0
  6. data/lib/ruby_reactor/configuration.rb +1 -1
  7. data/lib/ruby_reactor/context.rb +13 -5
  8. data/lib/ruby_reactor/context_serializer.rb +70 -4
  9. data/lib/ruby_reactor/dsl/map_builder.rb +6 -2
  10. data/lib/ruby_reactor/dsl/reactor.rb +3 -2
  11. data/lib/ruby_reactor/error/step_failure_error.rb +5 -2
  12. data/lib/ruby_reactor/executor/result_handler.rb +9 -2
  13. data/lib/ruby_reactor/executor/retry_manager.rb +26 -8
  14. data/lib/ruby_reactor/executor/step_executor.rb +24 -99
  15. data/lib/ruby_reactor/executor.rb +3 -13
  16. data/lib/ruby_reactor/map/collector.rb +72 -33
  17. data/lib/ruby_reactor/map/dispatcher.rb +162 -0
  18. data/lib/ruby_reactor/map/element_executor.rb +103 -114
  19. data/lib/ruby_reactor/map/execution.rb +18 -4
  20. data/lib/ruby_reactor/map/helpers.rb +4 -3
  21. data/lib/ruby_reactor/map/result_enumerator.rb +105 -0
  22. data/lib/ruby_reactor/reactor.rb +174 -16
  23. data/lib/ruby_reactor/rspec/helpers.rb +17 -0
  24. data/lib/ruby_reactor/rspec/matchers.rb +256 -0
  25. data/lib/ruby_reactor/rspec/step_executor_patch.rb +85 -0
  26. data/lib/ruby_reactor/rspec/test_subject.rb +625 -0
  27. data/lib/ruby_reactor/rspec.rb +18 -0
  28. data/lib/ruby_reactor/{async_router.rb → sidekiq_adapter.rb} +15 -10
  29. data/lib/ruby_reactor/sidekiq_workers/worker.rb +1 -3
  30. data/lib/ruby_reactor/step/compose_step.rb +0 -1
  31. data/lib/ruby_reactor/step/map_step.rb +52 -27
  32. data/lib/ruby_reactor/storage/redis_adapter.rb +59 -0
  33. data/lib/ruby_reactor/template/dynamic_source.rb +32 -0
  34. data/lib/ruby_reactor/version.rb +1 -1
  35. data/lib/ruby_reactor/web/api.rb +32 -24
  36. data/lib/ruby_reactor.rb +70 -10
  37. metadata +12 -3
@@ -22,7 +22,7 @@ module RubyReactor
22
22
  end
23
23
 
24
24
  def async_router
25
- @async_router ||= RubyReactor::AsyncRouter
25
+ @async_router ||= RubyReactor::SidekiqAdapter
26
26
  end
27
27
 
28
28
  def storage
@@ -3,7 +3,7 @@
3
3
  module RubyReactor
4
4
  class Context
5
5
  attr_accessor :inputs, :intermediate_results, :private_data, :current_step, :retry_count, :concurrency_key,
6
- :retry_context, :reactor_class, :execution_trace, :inline_async_execution, :undo_stack, :test_mode,
6
+ :retry_context, :reactor_class, :execution_trace, :inline_async_execution, :undo_stack,
7
7
  :parent_context, :root_context, :composed_contexts, :context_id, :map_operations, :map_metadata,
8
8
  :cancelled, :cancellation_reason, :parent_context_id, :status, :failure_reason
9
9
 
@@ -23,7 +23,6 @@ module RubyReactor
23
23
  @execution_trace = []
24
24
  @inline_async_execution = false # Flag to prevent nested async calls
25
25
  @undo_stack = [] # Initialize the undo stack
26
- @test_mode = false
27
26
  @cancelled = false
28
27
  @cancellation_reason = nil
29
28
  @status = "pending"
@@ -33,6 +32,14 @@ module RubyReactor
33
32
  @root_context = nil
34
33
  end
35
34
 
35
+ def finished?
36
+ %w[completed failed cancelled].include?(@status.to_s)
37
+ end
38
+
39
+ def failed?
40
+ @status.to_s == "failed"
41
+ end
42
+
36
43
  def get_input(name, path = nil)
37
44
  value = @inputs[name.to_sym] || @inputs[name.to_s]
38
45
  return nil if value.nil?
@@ -57,6 +64,10 @@ module RubyReactor
57
64
  end
58
65
  alias result get_result
59
66
 
67
+ def has_result?(step_name)
68
+ @intermediate_results.key?(step_name.to_sym) || @intermediate_results.key?(step_name.to_s)
69
+ end
70
+
60
71
  def set_result(step_name, value)
61
72
  @intermediate_results[step_name.to_sym] = value
62
73
  end
@@ -81,7 +92,6 @@ module RubyReactor
81
92
  retry_context: @retry_context,
82
93
  reactor_class: @reactor_class,
83
94
  execution_trace: @execution_trace,
84
- test_mode: @test_mode,
85
95
  status: @status,
86
96
  failure_reason: @failure_reason
87
97
  }
@@ -105,7 +115,6 @@ module RubyReactor
105
115
  retry_context: @retry_context.serialize_for_retry,
106
116
  execution_trace: ContextSerializer.serialize_value(@execution_trace),
107
117
  undo_stack: serialize_undo_stack,
108
- test_mode: @test_mode,
109
118
  cancelled: @cancelled,
110
119
  cancellation_reason: @cancellation_reason,
111
120
  status: @status,
@@ -130,7 +139,6 @@ module RubyReactor
130
139
  context.retry_context = RetryContext.deserialize_from_retry(data["retry_context"] || {})
131
140
  context.execution_trace = ContextSerializer.deserialize_value(data["execution_trace"]) || []
132
141
  context.undo_stack = deserialize_undo_stack(data["undo_stack"] || [], context.reactor_class)
133
- context.test_mode = data["test_mode"] || false
134
142
  context.cancelled = data["cancelled"] || false
135
143
  context.cancellation_reason = data["cancellation_reason"]
136
144
  context.status = data["status"] || "pending"
@@ -34,9 +34,25 @@ module RubyReactor
34
34
  when RubyReactor::Success
35
35
  { "_type" => "Success", "value" => serialize_value(value.value) }
36
36
  when RubyReactor::Failure
37
- { "_type" => "Failure", "error" => serialize_value(value.error), "retryable" => value.retryable }
37
+ {
38
+ "_type" => "Failure",
39
+ "error" => serialize_value(value.error),
40
+ "retryable" => value.retryable,
41
+ "step_name" => value.step_name,
42
+ "inputs" => serialize_value(value.inputs),
43
+ "backtrace" => value.backtrace,
44
+ "reactor_name" => value.reactor_name,
45
+ "step_arguments" => serialize_value(value.step_arguments),
46
+ "exception_class" => value.exception_class,
47
+ "file_path" => value.file_path,
48
+ "line_number" => value.line_number,
49
+ "code_snippet" => serialize_value(value.code_snippet),
50
+ "validation_errors" => serialize_value(value.validation_errors)
51
+ }
38
52
  when RubyReactor::Context
39
53
  { "_type" => "Context", "value" => value.serialize_for_retry }
54
+ when Symbol
55
+ { "_type" => "Symbol", "value" => value.to_s }
40
56
  when Time
41
57
  { "_type" => "Time", "value" => value.iso8601 }
42
58
  when BigDecimal
@@ -68,6 +84,14 @@ module RubyReactor
68
84
  { "_type" => "Template::Value", "value" => serialize_value(value.instance_variable_get(:@value)) }
69
85
  when RubyReactor::Template::Result
70
86
  { "_type" => "Template::Result", "step_name" => value.step_name.to_s, "path" => value.path }
87
+ when RubyReactor::Map::ResultEnumerator
88
+ {
89
+ "_type" => "Map::ResultEnumerator",
90
+ "map_id" => value.map_id,
91
+ "reactor_class_name" => value.reactor_class_name,
92
+ "strict_ordering" => value.strict_ordering,
93
+ "batch_size" => value.batch_size
94
+ }
71
95
  when Hash
72
96
  value.transform_keys(&:to_s).transform_values { |v| serialize_value(v) }
73
97
  when Array
@@ -86,9 +110,24 @@ module RubyReactor
86
110
  when "Success"
87
111
  RubyReactor::Success(deserialize_value(value["value"]))
88
112
  when "Failure"
89
- RubyReactor::Failure(deserialize_value(value["error"]), retryable: value["retryable"])
113
+ RubyReactor::Failure.new(
114
+ deserialize_value(value["error"]),
115
+ retryable: value["retryable"],
116
+ step_name: value["step_name"],
117
+ inputs: deserialize_value(value["inputs"]),
118
+ backtrace: value["backtrace"],
119
+ reactor_name: value["reactor_name"],
120
+ step_arguments: deserialize_value(value["step_arguments"]),
121
+ exception_class: value["exception_class"],
122
+ file_path: value["file_path"],
123
+ line_number: value["line_number"],
124
+ code_snippet: deserialize_value(value["code_snippet"]),
125
+ validation_errors: deserialize_value(value["validation_errors"])
126
+ )
90
127
  when "Context"
91
128
  Context.deserialize_from_retry(value["value"])
129
+ when "Symbol"
130
+ value["value"].to_sym
92
131
  when "Time"
93
132
  Time.iso8601(value["value"])
94
133
  when "BigDecimal"
@@ -115,11 +154,20 @@ module RubyReactor
115
154
  RubyReactor::Template::Value.new(deserialize_value(value["value"]))
116
155
  when "Template::Result"
117
156
  RubyReactor::Template::Result.new(value["step_name"], value["path"])
157
+ when "Map::ResultEnumerator"
158
+ RubyReactor::Map::ResultEnumerator.new(
159
+ value["map_id"],
160
+ value["reactor_class_name"],
161
+ strict_ordering: value["strict_ordering"],
162
+ batch_size: value["batch_size"]
163
+ )
164
+
118
165
  else
119
- value
166
+ # Unknown type wrapper, return as is (but deserialize values)
167
+ value.transform_values { |v| deserialize_value(v) }
120
168
  end
121
169
  else
122
- # Regular hash - symbolize all keys recursively
170
+ # Regular Hash
123
171
  value.transform_keys(&:to_sym).transform_values { |v| deserialize_value(v) }
124
172
  end
125
173
  when Array
@@ -128,6 +176,24 @@ module RubyReactor
128
176
  value
129
177
  end
130
178
  end
179
+
180
+ # Simplifies data for public API usage (removes wrappers, flattens types)
181
+ def simplify_for_api(value)
182
+ case value
183
+ when Hash
184
+ value.each_with_object({}) do |(k, v), hash|
185
+ hash[k.to_s] = simplify_for_api(v)
186
+ end
187
+ when Array
188
+ value.map { |v| simplify_for_api(v) }
189
+ when Success, Failure, Context
190
+ simplify_for_api(value.to_h)
191
+ when Symbol
192
+ value.to_s
193
+ else
194
+ value
195
+ end
196
+ end
131
197
  # rubocop:enable Metrics/CyclomaticComplexity, Metrics/MethodLength
132
198
 
133
199
  private
@@ -32,8 +32,12 @@ module RubyReactor
32
32
  @argument_mappings[mapped_input_name] = source
33
33
  end
34
34
 
35
- def source(enumerable)
36
- @source_enumerable = enumerable
35
+ def source(enumerable = nil, &block)
36
+ @source_enumerable = if block
37
+ RubyReactor::Template::DynamicSource.new(@argument_mappings, &block)
38
+ else
39
+ enumerable
40
+ end
37
41
  end
38
42
 
39
43
  def async(async = true, batch_size: nil)
@@ -118,8 +118,9 @@ module RubyReactor
118
118
  step_config
119
119
  end
120
120
 
121
- def returns(step_name)
122
- @return_step = step_name
121
+ def returns(step_name = nil)
122
+ @return_step = step_name if step_name
123
+ @return_step
123
124
  end
124
125
 
125
126
  def middleware(middleware_class)
@@ -3,11 +3,14 @@
3
3
  module RubyReactor
4
4
  module Error
5
5
  class StepFailureError < Base
6
- attr_reader :step_arguments
6
+ attr_reader :step_arguments, :exception_class
7
7
 
8
- def initialize(message, step: nil, context: nil, original_error: nil, step_arguments: {})
8
+ # rubocop:disable Metrics/ParameterLists
9
+ def initialize(message, step: nil, context: nil, original_error: nil, step_arguments: {}, exception_class: nil)
10
+ # rubocop:enable Metrics/ParameterLists
9
11
  super(message, step: step, context: context, original_error: original_error)
10
12
  @step_arguments = step_arguments
13
+ @exception_class = exception_class
11
14
  end
12
15
 
13
16
  def retryable?
@@ -31,7 +31,7 @@ module RubyReactor
31
31
  handle_step_failure_error(error)
32
32
  when Error::InputValidationError
33
33
  # Preserve validation errors as-is for proper error handling
34
- RubyReactor.Failure(error)
34
+ RubyReactor.Failure(error, validation_errors: error.field_errors)
35
35
  when Error::Base
36
36
  # Other errors need rollback
37
37
  @compensation_manager.rollback_completed_steps
@@ -117,6 +117,7 @@ module RubyReactor
117
117
 
118
118
  def store_failed_map_context(context)
119
119
  return unless context.map_metadata && context.map_metadata[:map_id]
120
+ return unless context.map_metadata[:fail_fast]
120
121
 
121
122
  storage = RubyReactor.configuration.storage_adapter
122
123
  storage.store_map_failed_context_id(
@@ -128,7 +129,7 @@ module RubyReactor
128
129
 
129
130
  def create_failure_from_error(error, redact_inputs)
130
131
  original_error = error.original_error
131
- exception_class = original_error&.class&.name
132
+ exception_class = resolve_exception_class(original_error, error)
132
133
  backtrace = original_error&.backtrace || error.backtrace
133
134
  file_path, line_number = extract_location(backtrace)
134
135
  code_snippet = RubyReactor::Utils::CodeExtractor.extract(file_path, line_number) if file_path
@@ -148,6 +149,12 @@ module RubyReactor
148
149
  )
149
150
  end
150
151
 
152
+ def resolve_exception_class(original_error, error)
153
+ return original_error.class.name if original_error
154
+
155
+ error.respond_to?(:exception_class) ? error.exception_class : nil
156
+ end
157
+
151
158
  def validate_step_output(step_config, value, resolved_arguments = {})
152
159
  return unless step_config.output_validator
153
160
 
@@ -39,7 +39,17 @@ module RubyReactor
39
39
 
40
40
  # Serialize context and requeue the job
41
41
  # Use root context if available to ensure we serialize the full tree
42
- context_to_serialize = @context.root_context || @context
42
+ # BUT for map elements (which have map_metadata), we must serialize the element context itself
43
+
44
+ context_to_serialize = if @context.map_metadata
45
+ @context
46
+ else
47
+ @context.root_context || @context
48
+ end
49
+
50
+ puts "SERIALIZING CONTEXT: #{context_to_serialize.reactor_class.name}"
51
+ puts "INPUTS KEYS: #{context_to_serialize.inputs.keys}" if context_to_serialize.respond_to?(:inputs)
52
+
43
53
  reactor_class_name = context_to_serialize.reactor_class.name
44
54
 
45
55
  serialized_context = ContextSerializer.serialize(context_to_serialize)
@@ -106,7 +116,8 @@ module RubyReactor
106
116
  @context.root_context&.reactor_class&.async? ||
107
117
  @context.inline_async_execution
108
118
 
109
- if is_async && !@context.test_mode
119
+ # Always try async retry if configured
120
+ if is_async
110
121
  handle_async_retry(step_config, reactor_class, result)
111
122
  else
112
123
  handle_sync_retry(step_config, reactor_class, result)
@@ -114,12 +125,19 @@ module RubyReactor
114
125
  end
115
126
 
116
127
  def handle_async_retry(step_config, reactor_class, result)
117
- requeue_job_for_step_retry(step_config, result.error, reactor_class)
118
- RetryQueuedResult.new(
119
- step_config.name,
120
- @context.retry_context.attempts_for_step(step_config.name),
121
- @context.retry_context.next_retry_at
122
- )
128
+ requeue_result = requeue_job_for_step_retry(step_config, result.error, reactor_class)
129
+
130
+ # If it returned an AsyncResult, we are truly async.
131
+ # Otherwise, it ran inline and we should return the result of that execution.
132
+ if requeue_result.is_a?(RubyReactor::AsyncResult)
133
+ RetryQueuedResult.new(
134
+ step_config.name,
135
+ @context.retry_context.attempts_for_step(step_config.name),
136
+ @context.retry_context.next_retry_at
137
+ )
138
+ else
139
+ requeue_result
140
+ end
123
141
  end
124
142
 
125
143
  def handle_sync_retry(step_config, reactor_class, result)
@@ -13,7 +13,7 @@ module RubyReactor
13
13
  end
14
14
 
15
15
  def execute_all_steps
16
- until @dependency_graph.all_completed?
16
+ until @dependency_graph.all_completed? || @context.finished?
17
17
  ready_steps = @dependency_graph.ready_steps
18
18
 
19
19
  if ready_steps.empty?
@@ -65,53 +65,29 @@ module RubyReactor
65
65
  end
66
66
  end
67
67
 
68
- def merge_executor_state(other_executor)
69
- # Merge the state from the async-executed executor back into ours
70
- # We need to update our context IN PLACE, not replace the reference,
71
- # because the Executor also holds a reference to the same context object
72
-
73
- # Update intermediate results
74
- other_executor.context.intermediate_results.each do |step_name, value|
75
- @context.set_result(step_name, value)
76
- end
77
-
78
- # Append execution trace from the async execution
79
- # The Worker's execution will have ALL steps including ones we already executed,
80
- # but we only want to add the NEW entries (from current_step onwards)
81
- current_trace_length = @context.execution_trace.length
82
- new_trace_entries = other_executor.context.execution_trace[current_trace_length..] || []
83
-
84
- @context.execution_trace.concat(new_trace_entries)
85
-
86
- # Update retry context
87
- @context.retry_context = other_executor.context.retry_context
88
-
89
- # Update current_step:
90
- # If the other executor has a current_step, it means it paused/interrupted there. We should adopt it.
91
- # If it's nil, it means it completed successfully, so we clear our current_step (which was the async step).
92
- @context.current_step = other_executor.context.current_step
93
-
94
- # Update our dependency graph to reflect completed steps
95
- other_executor.context.intermediate_results.each_key do |step_name|
96
- @dependency_graph.complete_step(step_name)
97
- end
98
-
99
- # Also mark the current_step as completed if it exists (for failed steps that don't have results)
100
- @dependency_graph.complete_step(other_executor.context.current_step) if other_executor.context.current_step
101
-
102
- # Merge any undo stack items
103
- other_executor.undo_stack.each do |item|
104
- # Avoid duplicates by checking if this step is already in the undo stack
105
- # Use string comparison for step names to avoid symbol/string mismatch issues
106
- unless @compensation_manager.undo_stack.any? { |existing| existing[:step].name.to_s == item[:step].name.to_s }
107
- @compensation_manager.add_to_undo_stack(item)
108
- end
109
- end
68
+ private
110
69
 
111
- # Merge undo trace from the other executor
112
- other_executor.undo_trace.each do |trace_entry|
113
- @compensation_manager.undo_trace << trace_entry
114
- end
70
+ def reconstruct_failure(data)
71
+ return data if data.is_a?(RubyReactor::Failure)
72
+ return nil unless data.is_a?(Hash)
73
+
74
+ # Helper for hash access with string/symbol keys
75
+ get = ->(key) { data[key] || data[key.to_s] }
76
+
77
+ RubyReactor::Failure.new(
78
+ get.call(:message),
79
+ step_name: get.call(:step_name),
80
+ inputs: get.call(:inputs),
81
+ redact_inputs: get.call(:redact_inputs) || [],
82
+ backtrace: get.call(:backtrace),
83
+ reactor_name: get.call(:reactor_name),
84
+ step_arguments: get.call(:step_arguments),
85
+ exception_class: get.call(:exception_class),
86
+ file_path: get.call(:file_path),
87
+ line_number: get.call(:line_number),
88
+ code_snippet: get.call(:code_snippet),
89
+ validation_errors: get.call(:validation_errors)
90
+ )
115
91
  end
116
92
 
117
93
  def execute_step_with_retry(step_config)
@@ -190,8 +166,6 @@ module RubyReactor
190
166
  end
191
167
  end
192
168
 
193
- private
194
-
195
169
  def handle_async_step(step_config)
196
170
  # Step-level async: hand off execution to worker
197
171
 
@@ -204,60 +178,11 @@ module RubyReactor
204
178
 
205
179
  serialized_context = ContextSerializer.serialize(context_to_serialize)
206
180
 
207
- result = configuration.async_router.perform_async(
181
+ configuration.async_router.perform_async(
208
182
  serialized_context,
209
183
  reactor_class_name,
210
184
  intermediate_results: @context.intermediate_results
211
185
  )
212
-
213
- # Handle different result types from async router
214
- case result
215
- when RubyReactor::AsyncResult
216
- # Production behavior: return async result to caller
217
-
218
- result
219
- when Executor
220
- handle_inline_executor_result(result)
221
- else
222
- # Unexpected result type, treat as error
223
- raise Error::ValidationError.new(
224
- "Unexpected result type from async router: #{result.class}",
225
- context: @context
226
- )
227
- end
228
- end
229
-
230
- def handle_inline_executor_result(result)
231
- # Worker executed inline and returned an executor.
232
- # This happens when running in test mode or when perform_async returns an executor.
233
- # We need to merge the state back into our current executor.
234
- #
235
- # If we are a child reactor, the worker executed the root reactor, so the result
236
- # will be a Root executor. We handle this mismatch below by finding our
237
- # corresponding child context within the root result.
238
- if @context.root_context && (result.context.reactor_class != @reactor_class)
239
- # We are a child, and result is root.
240
- # We need to find ourselves in the root result using context_id.
241
- matching_context = find_context_by_id(result.context, @context.context_id)
242
-
243
- if matching_context
244
- # Replace the result's context with the matching child context
245
- # so merge_executor_state works correctly
246
- result.instance_variable_set(:@context, matching_context)
247
- else
248
- # Fallback: if we can't find it (shouldn't happen), we might be in trouble.
249
- # But let's try to proceed, maybe it's not nested?
250
- # For now, raise an error to be explicit
251
- raise Error::ValidationError.new(
252
- "Could not find child context with ID #{@context.context_id} in root result",
253
- context: @context
254
- )
255
- end
256
- end
257
-
258
- merge_executor_state(result)
259
-
260
- result.result
261
186
  end
262
187
 
263
188
  def handle_interrupt_step(step_config)
@@ -40,6 +40,7 @@ module RubyReactor
40
40
  input_validator = InputValidator.new(@reactor_class, @context)
41
41
  input_validator.validate!
42
42
 
43
+ @context.status = :running
43
44
  save_context
44
45
 
45
46
  graph_manager = GraphManager.new(@reactor_class, @dependency_graph, @context)
@@ -59,6 +60,7 @@ module RubyReactor
59
60
  end
60
61
 
61
62
  def resume_execution
63
+ @context.status = :running
62
64
  prepare_for_resume
63
65
  save_context
64
66
 
@@ -118,19 +120,7 @@ module RubyReactor
118
120
  @context.status = :completed
119
121
  when RubyReactor::Failure
120
122
  @context.status = :failed
121
- @context.failure_reason = {
122
- message: result.error.is_a?(Exception) ? result.error.message : result.error.to_s,
123
- step_name: result.step_name,
124
- inputs: result.inputs,
125
- backtrace: result.backtrace,
126
- reactor_name: result.reactor_name,
127
- step_arguments: result.step_arguments,
128
- exception_class: result.exception_class,
129
- file_path: result.file_path,
130
- line_number: result.line_number,
131
- code_snippet: result.code_snippet,
132
- validation_errors: result.validation_errors
133
- }
123
+ @context.failure_reason = result
134
124
  when RubyReactor::InterruptResult
135
125
  @context.status = :paused
136
126
  end
@@ -7,56 +7,95 @@ module RubyReactor
7
7
 
8
8
  def self.perform(arguments)
9
9
  arguments = arguments.transform_keys(&:to_sym)
10
- parent_context_id = arguments[:parent_context_id]
11
10
  map_id = arguments[:map_id]
11
+ parent_context_id = arguments[:parent_context_id]
12
12
  parent_reactor_class_name = arguments[:parent_reactor_class_name]
13
13
  step_name = arguments[:step_name]
14
14
  strict_ordering = arguments[:strict_ordering]
15
+ # timeout = arguments[:timeout]
15
16
 
16
17
  storage = RubyReactor.configuration.storage_adapter
18
+ parent_context_data = storage.retrieve_context(parent_context_id, parent_reactor_class_name)
19
+ parent_context = RubyReactor::Context.deserialize_from_retry(parent_context_data)
20
+
21
+ # Check if all tasks are completed
22
+ metadata = storage.retrieve_map_metadata(map_id, parent_reactor_class_name)
23
+ total_count = metadata ? metadata["count"].to_i : 0
17
24
 
18
- # Retrieve parent context
19
- parent_context = load_parent_context_from_storage(
20
- parent_context_id,
25
+ results_count = storage.count_map_results(map_id, parent_reactor_class_name)
26
+
27
+ # Not done yet, requeue or wait?
28
+ # Actually Collector currently assumes we only call it when we expect completion or check progress
29
+ # Since map_offset tracks dispatching progress and might exceed count due to batching reservation,
30
+ # we must strictly check against the total count of elements.
31
+ # Check for fail_fast failure FIRST
32
+ if (failed_context_id = storage.retrieve_map_failed_context_id(map_id, parent_reactor_class_name))
33
+ handle_failure(failed_context_id, metadata, storage, parent_context, step_name)
34
+ return
35
+ end
36
+
37
+ return if results_count < total_count
38
+
39
+ # Retrieve results lazily
40
+ results = RubyReactor::Map::ResultEnumerator.new(
41
+ map_id,
21
42
  parent_reactor_class_name,
22
- storage
43
+ strict_ordering: strict_ordering
23
44
  )
24
45
 
25
- # Retrieve results
26
- serialized_results = storage.retrieve_map_results(map_id, parent_reactor_class_name,
27
- strict_ordering: strict_ordering)
46
+ # Apply collect block (or default collection)
47
+ step_config = parent_context.reactor_class.steps[step_name.to_sym]
28
48
 
29
- results = serialized_results.map do |r|
30
- if r.is_a?(Hash) && r.key?("_error")
31
- RubyReactor::Failure(r["_error"])
32
- else
33
- RubyReactor::Success(r)
49
+ begin
50
+ final_result = apply_collect_block(results, step_config)
51
+
52
+ if final_result.failure?
53
+ # Optionally log failure internally or just rely on context status update
34
54
  end
55
+ rescue StandardError => e
56
+ final_result = RubyReactor::Failure(e)
35
57
  end
36
58
 
37
- # Get step config to check for collect block and other options
38
- parent_class = Object.const_get(parent_reactor_class_name)
39
- step_config = parent_class.steps[step_name.to_sym]
59
+ # Resume parent execution
60
+ resume_parent_execution(parent_context, step_name, final_result, storage)
61
+ rescue StandardError => e
62
+ puts "COLLECTOR CRASH: #{e.message}"
63
+ puts e.backtrace
64
+ raise e
65
+ end
40
66
 
41
- collect_block = step_config.arguments[:collect_block][:source].value
67
+ def self.apply_collect_block(results, step_config)
68
+ collect_block = step_config.arguments[:collect_block][:source].value if step_config.arguments[:collect_block]
42
69
  # TODO: Check allow_partial_failure option
43
70
 
44
- final_result = if collect_block
45
- begin
46
- # Pass all results (Success and Failure) to collect block
47
- collected = collect_block.call(results)
48
- RubyReactor::Success(collected)
49
- rescue StandardError => e
50
- RubyReactor::Failure(e)
51
- end
52
- else
53
- # Default behavior: fail if any failure
54
- first_failure = results.find(&:failure?)
55
- first_failure || RubyReactor::Success(results.map(&:value))
56
- end
57
-
58
- # Resume execution
59
- resume_parent_execution(parent_context, step_name, final_result, storage)
71
+ if collect_block
72
+ begin
73
+ # Pass Enumerator to collect block
74
+ collected = collect_block.call(results)
75
+ RubyReactor::Success(collected)
76
+ rescue StandardError => e
77
+ puts "COLLECTOR INNER EXCEPTION: #{e.message}"
78
+ puts e.backtrace
79
+ RubyReactor::Failure(e)
80
+ end
81
+ else
82
+ # Default behavior: Return Success(Enumerator).
83
+ # Logic for checking failures is deferred to the consumer of the enumerator.
84
+ RubyReactor::Success(results)
85
+ end
86
+ end
87
+
88
+ def self.handle_failure(failed_context_id, metadata, storage, parent_context, step_name)
89
+ # Resolve the class of the mapped reactor to retrieve its context
90
+ reactor_class = resolve_reactor_class(metadata["reactor_class_info"])
91
+ failed_context_data = storage.retrieve_context(failed_context_id, reactor_class.name)
92
+
93
+ return unless failed_context_data
94
+
95
+ failed_context = RubyReactor::Context.deserialize_from_retry(failed_context_data)
96
+ reason = failed_context.failure_reason
97
+ result = reason.is_a?(RubyReactor::Failure) ? reason : RubyReactor::Failure(reason)
98
+ resume_parent_execution(parent_context, step_name, result, storage)
60
99
  end
61
100
  end
62
101
  end