ruby_reactor 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +105 -0
- data/Rakefile +2 -2
- data/documentation/data_pipelines.md +90 -84
- data/lib/ruby_reactor/async_router.rb +6 -6
- data/lib/ruby_reactor/context_serializer.rb +15 -0
- data/lib/ruby_reactor/dsl/map_builder.rb +6 -2
- data/lib/ruby_reactor/executor/result_handler.rb +1 -0
- data/lib/ruby_reactor/executor/retry_manager.rb +11 -1
- data/lib/ruby_reactor/map/collector.rb +71 -33
- data/lib/ruby_reactor/map/dispatcher.rb +162 -0
- data/lib/ruby_reactor/map/element_executor.rb +59 -56
- data/lib/ruby_reactor/map/execution.rb +16 -3
- data/lib/ruby_reactor/map/helpers.rb +2 -2
- data/lib/ruby_reactor/map/result_enumerator.rb +105 -0
- data/lib/ruby_reactor/step/map_step.rb +48 -16
- data/lib/ruby_reactor/storage/redis_adapter.rb +59 -0
- data/lib/ruby_reactor/template/dynamic_source.rb +32 -0
- data/lib/ruby_reactor/version.rb +1 -1
- metadata +5 -2
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyReactor
|
|
4
|
+
module Map
|
|
5
|
+
class Dispatcher
|
|
6
|
+
extend Helpers
|
|
7
|
+
|
|
8
|
+
def self.perform(arguments)
|
|
9
|
+
arguments = arguments.transform_keys(&:to_sym)
|
|
10
|
+
parent_reactor_class_name = arguments[:parent_reactor_class_name]
|
|
11
|
+
|
|
12
|
+
storage = RubyReactor.configuration.storage_adapter
|
|
13
|
+
|
|
14
|
+
# Load parent context to resolve source
|
|
15
|
+
parent_context = load_parent_context_from_storage(
|
|
16
|
+
arguments[:parent_context_id],
|
|
17
|
+
parent_reactor_class_name,
|
|
18
|
+
storage
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
# Initialize metadata if first run
|
|
22
|
+
initialize_map_metadata(arguments, storage) unless arguments[:continuation]
|
|
23
|
+
|
|
24
|
+
# Resolve Source
|
|
25
|
+
# We need to resolve the source to know what we are iterating.
|
|
26
|
+
# Strict "Array Only" rule means we expect an Array-like object or we handle the
|
|
27
|
+
# "Query Builder" result if user used it.
|
|
28
|
+
source = resolve_source(arguments, parent_context)
|
|
29
|
+
|
|
30
|
+
# Dispatch next batch
|
|
31
|
+
dispatch_batch(source, arguments, parent_context, storage)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def self.initialize_map_metadata(arguments, storage)
|
|
35
|
+
map_id = arguments[:map_id]
|
|
36
|
+
reactor_class_name = arguments[:parent_reactor_class_name]
|
|
37
|
+
|
|
38
|
+
# Reset or set initial offset. Use NX to act as a mutex/guard against duplicate initialization.
|
|
39
|
+
storage.set_map_offset_if_not_exists(map_id, 0, reactor_class_name)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def self.resolve_source(arguments, context)
|
|
43
|
+
# Arguments has :source which is a Template::Input or similar.
|
|
44
|
+
# We need to resolve it against the context.
|
|
45
|
+
source_template = arguments[:source]
|
|
46
|
+
|
|
47
|
+
# Fallback: look up from step config if missing (e.g. called from ElementExecutor)
|
|
48
|
+
if source_template.nil? && context
|
|
49
|
+
step_name = arguments[:step_name]
|
|
50
|
+
step_config = context.reactor_class.steps[step_name.to_sym]
|
|
51
|
+
source_template = step_config.arguments[:source][:source]
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# If source is packaged in arguments as a value (deserialized)
|
|
55
|
+
return source_template if source_template.is_a?(Array)
|
|
56
|
+
|
|
57
|
+
# Resolve template
|
|
58
|
+
return source_template.resolve(context) if source_template.respond_to?(:resolve)
|
|
59
|
+
|
|
60
|
+
source_template
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def self.dispatch_batch(source, arguments, parent_context, storage)
|
|
64
|
+
map_id = arguments[:map_id]
|
|
65
|
+
reactor_class_name = arguments[:parent_reactor_class_name]
|
|
66
|
+
|
|
67
|
+
# Fail Fast Check
|
|
68
|
+
if arguments[:fail_fast]
|
|
69
|
+
failed_context_id = storage.retrieve_map_failed_context_id(map_id, reactor_class_name)
|
|
70
|
+
return if failed_context_id
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
batch_size = arguments[:batch_size] || source.size # Default to all if no batch_size (async=true only)
|
|
74
|
+
|
|
75
|
+
# Atomically reserve a batch
|
|
76
|
+
new_offset = storage.increment_map_offset(map_id, batch_size, reactor_class_name)
|
|
77
|
+
current_offset = new_offset - batch_size
|
|
78
|
+
|
|
79
|
+
batch_elements = if source.is_a?(Array)
|
|
80
|
+
source.slice(current_offset, batch_size) || []
|
|
81
|
+
elsif source.respond_to?(:offset) && source.respond_to?(:limit)
|
|
82
|
+
# Optimized for ActiveRecord and similar query builders
|
|
83
|
+
source.offset(current_offset).limit(batch_size).to_a
|
|
84
|
+
else
|
|
85
|
+
# Fallback for generic Enumerable
|
|
86
|
+
# This is inefficient for huge sets if not Array, but compliant
|
|
87
|
+
source.drop(current_offset).take(batch_size)
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
return if batch_elements.empty?
|
|
91
|
+
|
|
92
|
+
# Queue Jobs
|
|
93
|
+
queue_options = {
|
|
94
|
+
map_id: map_id,
|
|
95
|
+
arguments: arguments,
|
|
96
|
+
context: parent_context,
|
|
97
|
+
reactor_class_info: resolve_reactor_class_info(arguments, parent_context),
|
|
98
|
+
step_name: arguments[:step_name]
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
batch_elements.each_with_index do |element, i|
|
|
102
|
+
absolute_index = current_offset + i
|
|
103
|
+
queue_element_job(element, absolute_index, queue_options)
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def self.queue_element_job(element, index, options)
|
|
108
|
+
arguments = options[:arguments]
|
|
109
|
+
context = options[:context]
|
|
110
|
+
|
|
111
|
+
# Resolve mappings
|
|
112
|
+
mappings_template = arguments[:argument_mappings]
|
|
113
|
+
|
|
114
|
+
# Fallback: look up from step config if missing (e.g. called from ElementExecutor)
|
|
115
|
+
if mappings_template.nil? && context
|
|
116
|
+
step_name = options[:step_name] || arguments[:step_name]
|
|
117
|
+
step_config = context.reactor_class.steps[step_name.to_sym]
|
|
118
|
+
mappings_template = step_config.arguments[:argument_mappings]
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
mappings = if mappings_template.respond_to?(:resolve)
|
|
122
|
+
mappings_template.resolve(context)
|
|
123
|
+
else
|
|
124
|
+
mappings_template || {}
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
# Fix for weird structure observed in fallback (wrapped in :source -> Template::Value)
|
|
128
|
+
if mappings.key?(:source) && mappings[:source].respond_to?(:value) && mappings[:source].value.is_a?(Hash)
|
|
129
|
+
mappings = mappings[:source].value
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
mapped_inputs = build_element_inputs(mappings, context, element)
|
|
133
|
+
serialized_inputs = ContextSerializer.serialize_value(mapped_inputs)
|
|
134
|
+
|
|
135
|
+
RubyReactor.configuration.async_router.perform_map_element_async(
|
|
136
|
+
map_id: options[:map_id],
|
|
137
|
+
element_id: "#{options[:map_id]}:#{index}",
|
|
138
|
+
index: index,
|
|
139
|
+
serialized_inputs: serialized_inputs,
|
|
140
|
+
reactor_class_info: options[:reactor_class_info],
|
|
141
|
+
strict_ordering: arguments[:strict_ordering],
|
|
142
|
+
parent_context_id: context.context_id,
|
|
143
|
+
parent_reactor_class_name: context.reactor_class.name,
|
|
144
|
+
step_name: options[:step_name].to_s,
|
|
145
|
+
batch_size: arguments[:batch_size], # Passed to worker so it knows to trigger next batch?
|
|
146
|
+
fail_fast: arguments[:fail_fast]
|
|
147
|
+
)
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
def self.resolve_reactor_class_info(arguments, context)
|
|
151
|
+
mapped_reactor_class = arguments[:mapped_reactor_class]
|
|
152
|
+
step_name = arguments[:step_name]
|
|
153
|
+
|
|
154
|
+
if mapped_reactor_class.respond_to?(:name)
|
|
155
|
+
{ "type" => "class", "name" => mapped_reactor_class.name }
|
|
156
|
+
else
|
|
157
|
+
{ "type" => "inline", "parent" => context.reactor_class.name, "step" => step_name.to_s }
|
|
158
|
+
end
|
|
159
|
+
end
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
end
|
|
@@ -25,6 +25,11 @@ module RubyReactor
|
|
|
25
25
|
context = ContextSerializer.deserialize(serialized_context)
|
|
26
26
|
context.map_metadata = arguments
|
|
27
27
|
reactor_class = context.reactor_class
|
|
28
|
+
|
|
29
|
+
# Ensure inputs are present (fallback to serialized_inputs if missing from context)
|
|
30
|
+
if context.inputs.empty? && serialized_inputs
|
|
31
|
+
context.inputs = ContextSerializer.deserialize_value(serialized_inputs)
|
|
32
|
+
end
|
|
28
33
|
else
|
|
29
34
|
# Deserialize inputs
|
|
30
35
|
inputs = ContextSerializer.deserialize_value(serialized_inputs)
|
|
@@ -41,6 +46,27 @@ module RubyReactor
|
|
|
41
46
|
storage = RubyReactor.configuration.storage_adapter
|
|
42
47
|
storage.store_map_element_context_id(map_id, context.context_id, parent_reactor_class_name)
|
|
43
48
|
|
|
49
|
+
# Fail Fast Check
|
|
50
|
+
if arguments[:fail_fast]
|
|
51
|
+
failed_context_id = storage.retrieve_map_failed_context_id(map_id, parent_reactor_class_name)
|
|
52
|
+
if failed_context_id
|
|
53
|
+
# Decrement counter as we are skipping execution
|
|
54
|
+
new_count = storage.decrement_map_counter(map_id, parent_reactor_class_name)
|
|
55
|
+
return unless new_count.zero?
|
|
56
|
+
|
|
57
|
+
# Trigger collection if we are the last one (skipped or otherwise)
|
|
58
|
+
RubyReactor.configuration.async_router.perform_map_collection_async(
|
|
59
|
+
parent_context_id: parent_context_id,
|
|
60
|
+
map_id: map_id,
|
|
61
|
+
parent_reactor_class_name: parent_reactor_class_name,
|
|
62
|
+
step_name: step_name,
|
|
63
|
+
strict_ordering: strict_ordering,
|
|
64
|
+
timeout: 3600
|
|
65
|
+
)
|
|
66
|
+
return
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
44
70
|
# Execute
|
|
45
71
|
executor = Executor.new(reactor_class, {}, context)
|
|
46
72
|
|
|
@@ -53,27 +79,35 @@ module RubyReactor
|
|
|
53
79
|
result = executor.result
|
|
54
80
|
|
|
55
81
|
if result.is_a?(RetryQueuedResult)
|
|
56
|
-
|
|
82
|
+
trigger_next_batch_if_needed(arguments, index, batch_size)
|
|
57
83
|
return
|
|
58
84
|
end
|
|
59
85
|
|
|
60
86
|
# Store result
|
|
61
87
|
|
|
62
|
-
# Store result
|
|
63
|
-
|
|
64
88
|
if result.success?
|
|
65
|
-
storage.store_map_result(map_id, index,
|
|
89
|
+
storage.store_map_result(map_id, index,
|
|
90
|
+
ContextSerializer.serialize_value(result.value),
|
|
91
|
+
parent_reactor_class_name,
|
|
66
92
|
strict_ordering: strict_ordering)
|
|
67
93
|
else
|
|
94
|
+
# Trigger Compensation Logic
|
|
95
|
+
executor.undo_all
|
|
96
|
+
|
|
68
97
|
# Store error
|
|
69
98
|
storage.store_map_result(map_id, index, { _error: result.error }, parent_reactor_class_name,
|
|
70
99
|
strict_ordering: strict_ordering)
|
|
100
|
+
|
|
101
|
+
if arguments[:fail_fast]
|
|
102
|
+
storage.store_map_failed_context_id(map_id, context.context_id, parent_reactor_class_name)
|
|
103
|
+
end
|
|
71
104
|
end
|
|
72
105
|
|
|
73
106
|
# Decrement counter
|
|
74
107
|
new_count = storage.decrement_map_counter(map_id, parent_reactor_class_name)
|
|
75
108
|
|
|
76
|
-
|
|
109
|
+
# Trigger next batch if it's the last element of the current batch
|
|
110
|
+
trigger_next_batch_if_needed(arguments, index, batch_size)
|
|
77
111
|
|
|
78
112
|
return unless new_count.zero?
|
|
79
113
|
|
|
@@ -88,23 +122,6 @@ module RubyReactor
|
|
|
88
122
|
)
|
|
89
123
|
end
|
|
90
124
|
|
|
91
|
-
def self.queue_next_batch(arguments)
|
|
92
|
-
storage = RubyReactor.configuration.storage_adapter
|
|
93
|
-
map_id = arguments[:map_id]
|
|
94
|
-
reactor_class_name = arguments[:parent_reactor_class_name]
|
|
95
|
-
|
|
96
|
-
next_index = storage.increment_last_queued_index(map_id, reactor_class_name)
|
|
97
|
-
total_count = storage.retrieve_map_metadata(map_id, reactor_class_name)["count"]
|
|
98
|
-
|
|
99
|
-
return unless next_index < total_count
|
|
100
|
-
|
|
101
|
-
parent_context = load_parent_context(arguments, reactor_class_name, storage)
|
|
102
|
-
element = resolve_next_element(arguments, parent_context, next_index)
|
|
103
|
-
serialized_inputs = build_serialized_inputs(arguments, parent_context, element)
|
|
104
|
-
|
|
105
|
-
queue_element_job(arguments, map_id, next_index, serialized_inputs, reactor_class_name)
|
|
106
|
-
end
|
|
107
|
-
|
|
108
125
|
def self.load_parent_context(arguments, reactor_class_name, storage)
|
|
109
126
|
parent_context_data = storage.retrieve_context(arguments[:parent_context_id], reactor_class_name)
|
|
110
127
|
parent_reactor_class = Object.const_get(reactor_class_name)
|
|
@@ -116,42 +133,28 @@ module RubyReactor
|
|
|
116
133
|
parent_context
|
|
117
134
|
end
|
|
118
135
|
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
136
|
+
# Legacy helpers resolved_next_element, build_serialized_inputs, queue_element_job
|
|
137
|
+
# are REMOVED as they are no longer used for self-queuing.
|
|
138
|
+
|
|
139
|
+
# Basic helper to build inputs for the CURRENT element (still needed for perform)
|
|
140
|
+
# Wait, perform uses `serialized_inputs` passed to it.
|
|
141
|
+
# We don't need `build_element_inputs` here?
|
|
142
|
+
# `perform` uses `params[:serialized_inputs]`.
|
|
143
|
+
# So we can remove input building helpers too?
|
|
144
|
+
# Let's check if they are used elsewhere.
|
|
145
|
+
# `resolve_reactor_class` is used in `perform`.
|
|
146
|
+
# `build_element_inputs` is likely in Helpers or mixed in?
|
|
147
|
+
|
|
148
|
+
def self.trigger_next_batch_if_needed(arguments, index, batch_size)
|
|
149
|
+
return unless batch_size && ((index + 1) % batch_size).zero?
|
|
150
|
+
|
|
151
|
+
# Trigger Dispatcher for next batch
|
|
152
|
+
next_batch_args = arguments.dup
|
|
153
|
+
next_batch_args[:continuation] = true
|
|
154
|
+
RubyReactor::Map::Dispatcher.perform(next_batch_args)
|
|
126
155
|
end
|
|
127
156
|
|
|
128
|
-
|
|
129
|
-
parent_reactor_class = parent_context.reactor_class
|
|
130
|
-
step_config = parent_reactor_class.steps[arguments[:step_name].to_sym]
|
|
131
|
-
|
|
132
|
-
mappings_template = step_config.arguments[:argument_mappings][:source]
|
|
133
|
-
mappings = mappings_template.resolve(parent_context) || {}
|
|
134
|
-
|
|
135
|
-
mapped_inputs = build_element_inputs(mappings, parent_context, element)
|
|
136
|
-
ContextSerializer.serialize_value(mapped_inputs)
|
|
137
|
-
end
|
|
138
|
-
|
|
139
|
-
def self.queue_element_job(arguments, map_id, next_index, serialized_inputs, reactor_class_name)
|
|
140
|
-
RubyReactor.configuration.async_router.perform_map_element_async(
|
|
141
|
-
map_id: map_id,
|
|
142
|
-
element_id: "#{map_id}:#{next_index}",
|
|
143
|
-
index: next_index,
|
|
144
|
-
serialized_inputs: serialized_inputs,
|
|
145
|
-
reactor_class_info: arguments[:reactor_class_info],
|
|
146
|
-
strict_ordering: arguments[:strict_ordering],
|
|
147
|
-
parent_context_id: arguments[:parent_context_id],
|
|
148
|
-
parent_reactor_class_name: reactor_class_name,
|
|
149
|
-
step_name: arguments[:step_name],
|
|
150
|
-
batch_size: arguments[:batch_size]
|
|
151
|
-
)
|
|
152
|
-
end
|
|
153
|
-
private_class_method :queue_next_batch, :load_parent_context,
|
|
154
|
-
:resolve_next_element, :build_serialized_inputs, :queue_element_job
|
|
157
|
+
private_class_method :load_parent_context, :trigger_next_batch_if_needed
|
|
155
158
|
end
|
|
156
159
|
end
|
|
157
160
|
end
|
|
@@ -21,7 +21,8 @@ module RubyReactor
|
|
|
21
21
|
storage_options: {
|
|
22
22
|
map_id: arguments[:map_id], storage: storage,
|
|
23
23
|
parent_reactor_class_name: arguments[:parent_reactor_class_name],
|
|
24
|
-
strict_ordering: arguments[:strict_ordering]
|
|
24
|
+
strict_ordering: arguments[:strict_ordering],
|
|
25
|
+
fail_fast: arguments[:fail_fast]
|
|
25
26
|
}
|
|
26
27
|
)
|
|
27
28
|
|
|
@@ -31,6 +32,12 @@ module RubyReactor
|
|
|
31
32
|
|
|
32
33
|
def self.execute_all_elements(source:, mappings:, reactor_class:, parent_context:, storage_options:)
|
|
33
34
|
source.map.with_index do |element, index|
|
|
35
|
+
if storage_options[:fail_fast]
|
|
36
|
+
failed_context_id = storage_options[:storage].retrieve_map_failed_context_id(
|
|
37
|
+
storage_options[:map_id], storage_options[:parent_reactor_class_name]
|
|
38
|
+
)
|
|
39
|
+
next if failed_context_id
|
|
40
|
+
end
|
|
34
41
|
element_inputs = build_element_inputs(mappings, parent_context, element)
|
|
35
42
|
|
|
36
43
|
# Manually create and link context to ensure parent_context_id is set
|
|
@@ -56,8 +63,14 @@ module RubyReactor
|
|
|
56
63
|
|
|
57
64
|
store_result(result, index, storage_options)
|
|
58
65
|
|
|
66
|
+
if result.failure? && storage_options[:fail_fast]
|
|
67
|
+
storage_options[:storage].store_map_failed_context_id(
|
|
68
|
+
storage_options[:map_id], child_context.context_id, storage_options[:parent_reactor_class_name]
|
|
69
|
+
)
|
|
70
|
+
end
|
|
71
|
+
|
|
59
72
|
result
|
|
60
|
-
end
|
|
73
|
+
end.compact
|
|
61
74
|
end
|
|
62
75
|
|
|
63
76
|
def self.link_contexts(child_context, parent_context)
|
|
@@ -70,7 +83,7 @@ module RubyReactor
|
|
|
70
83
|
def self.store_result(result, index, options)
|
|
71
84
|
value = result.success? ? result.value : { _error: result.error }
|
|
72
85
|
options[:storage].store_map_result(
|
|
73
|
-
options[:map_id], index, value, options[:parent_reactor_class_name],
|
|
86
|
+
options[:map_id], index, ContextSerializer.serialize_value(value), options[:parent_reactor_class_name],
|
|
74
87
|
strict_ordering: options[:strict_ordering]
|
|
75
88
|
)
|
|
76
89
|
end
|
|
@@ -54,9 +54,9 @@ module RubyReactor
|
|
|
54
54
|
# Resumes parent reactor execution after map completion
|
|
55
55
|
def resume_parent_execution(parent_context, step_name, final_result, storage)
|
|
56
56
|
executor = RubyReactor::Executor.new(parent_context.reactor_class, {}, parent_context)
|
|
57
|
+
step_name_sym = step_name.to_sym
|
|
57
58
|
|
|
58
59
|
if final_result.failure?
|
|
59
|
-
step_name_sym = step_name.to_sym
|
|
60
60
|
parent_context.current_step = step_name_sym
|
|
61
61
|
|
|
62
62
|
error = RubyReactor::Error::StepFailureError.new(
|
|
@@ -77,7 +77,7 @@ module RubyReactor
|
|
|
77
77
|
# Manually update context status since we're not running executor loop
|
|
78
78
|
executor.send(:update_context_status, failure_response)
|
|
79
79
|
else
|
|
80
|
-
parent_context.set_result(
|
|
80
|
+
parent_context.set_result(step_name_sym, final_result.value)
|
|
81
81
|
|
|
82
82
|
# Manually update execution trace to reflect completion
|
|
83
83
|
# This is necessary because resume_execution continues from the NEXT step
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyReactor
|
|
4
|
+
module Map
|
|
5
|
+
class ResultEnumerator
|
|
6
|
+
include Enumerable
|
|
7
|
+
|
|
8
|
+
DEFAULT_BATCH_SIZE = 1000
|
|
9
|
+
|
|
10
|
+
attr_reader :map_id, :reactor_class_name, :strict_ordering, :batch_size
|
|
11
|
+
|
|
12
|
+
def initialize(map_id, reactor_class_name, strict_ordering: true, batch_size: DEFAULT_BATCH_SIZE)
|
|
13
|
+
@map_id = map_id
|
|
14
|
+
@reactor_class_name = reactor_class_name
|
|
15
|
+
@strict_ordering = strict_ordering
|
|
16
|
+
@batch_size = batch_size
|
|
17
|
+
@storage = RubyReactor.configuration.storage_adapter
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def each
|
|
21
|
+
return enum_for(:each) unless block_given?
|
|
22
|
+
|
|
23
|
+
if @strict_ordering
|
|
24
|
+
count.times do |i|
|
|
25
|
+
yield self[i]
|
|
26
|
+
end
|
|
27
|
+
else
|
|
28
|
+
offset = 0
|
|
29
|
+
loop do
|
|
30
|
+
results = @storage.retrieve_map_results_batch(
|
|
31
|
+
@map_id,
|
|
32
|
+
@reactor_class_name,
|
|
33
|
+
offset: offset,
|
|
34
|
+
limit: @batch_size,
|
|
35
|
+
strict_ordering: @strict_ordering
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
break if results.empty?
|
|
39
|
+
|
|
40
|
+
results.each { |result| yield wrap_result(result) }
|
|
41
|
+
|
|
42
|
+
offset += results.size
|
|
43
|
+
break if results.size < @batch_size
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def count
|
|
49
|
+
@count ||= @storage.count_map_results(@map_id, @reactor_class_name)
|
|
50
|
+
end
|
|
51
|
+
alias size count
|
|
52
|
+
alias length count
|
|
53
|
+
|
|
54
|
+
def empty?
|
|
55
|
+
count.zero?
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def any?
|
|
59
|
+
!empty?
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def [](index)
|
|
63
|
+
return nil if index < 0 || index >= count
|
|
64
|
+
|
|
65
|
+
results = @storage.retrieve_map_results_batch(
|
|
66
|
+
@map_id,
|
|
67
|
+
@reactor_class_name,
|
|
68
|
+
offset: index,
|
|
69
|
+
limit: 1,
|
|
70
|
+
strict_ordering: @strict_ordering
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
return nil if results.empty?
|
|
74
|
+
|
|
75
|
+
wrap_result(results.first)
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def first
|
|
79
|
+
self[0]
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def last
|
|
83
|
+
self[count - 1]
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def successes
|
|
87
|
+
lazy.select { |result| result.is_a?(RubyReactor::Success) }.map(&:value)
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def failures
|
|
91
|
+
lazy.select { |result| result.is_a?(RubyReactor::Failure) }.map(&:error)
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
private
|
|
95
|
+
|
|
96
|
+
def wrap_result(result)
|
|
97
|
+
if result.is_a?(Hash) && result.key?("_error")
|
|
98
|
+
RubyReactor::Failure.new(result["_error"])
|
|
99
|
+
else
|
|
100
|
+
RubyReactor::Success.new(ContextSerializer.deserialize_value(result))
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
end
|
|
@@ -28,6 +28,9 @@ module RubyReactor
|
|
|
28
28
|
inputs = {}
|
|
29
29
|
|
|
30
30
|
mappings.each do |mapped_input_name, source|
|
|
31
|
+
# Handle serialized template objects (Hashes from Sidekiq)
|
|
32
|
+
source = ContextSerializer.deserialize_value(source) if source.is_a?(Hash) && source["_type"]
|
|
33
|
+
|
|
31
34
|
value = if source.is_a?(RubyReactor::Template::Element)
|
|
32
35
|
# Handle element reference
|
|
33
36
|
# For now assuming element() refers to the current map's element
|
|
@@ -133,8 +136,11 @@ module RubyReactor
|
|
|
133
136
|
RubyReactor::Success(results)
|
|
134
137
|
else
|
|
135
138
|
# New behavior: extract successful values only
|
|
136
|
-
|
|
137
|
-
|
|
139
|
+
# New behavior: extract successful values only IF fail_fast is true behavior implies only values
|
|
140
|
+
# However, if fail_fast is false, we want to return results as is, or if logic dictates otherwise.
|
|
141
|
+
# wait, if fail_fast=false, we expect Result objects so we can check if success/failure.
|
|
142
|
+
# If we return only successes, we hide failures.
|
|
143
|
+
RubyReactor::Success(results)
|
|
138
144
|
end
|
|
139
145
|
end
|
|
140
146
|
|
|
@@ -157,18 +163,9 @@ module RubyReactor
|
|
|
157
163
|
|
|
158
164
|
reactor_class_info = build_reactor_class_info(arguments[:mapped_reactor_class], context, step_name)
|
|
159
165
|
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
queue_fan_out(
|
|
164
|
-
map_id: map_id, arguments: arguments, context: context,
|
|
165
|
-
reactor_class_info: reactor_class_info, step_name: step_name,
|
|
166
|
-
limit: arguments[:batch_size]
|
|
167
|
-
)
|
|
168
|
-
else
|
|
169
|
-
queue_single_worker(map_id: map_id, arguments: arguments, context: context,
|
|
170
|
-
reactor_class_info: reactor_class_info, step_name: step_name)
|
|
171
|
-
end
|
|
166
|
+
initialize_map_metadata(map_id, arguments, context, reactor_class_info)
|
|
167
|
+
|
|
168
|
+
job_id = dispatch_async_map(map_id, arguments, context, reactor_class_info, step_name)
|
|
172
169
|
|
|
173
170
|
# Store reference in composed_contexts so the UI knows where to find elements
|
|
174
171
|
context.composed_contexts[step_name.to_s] = {
|
|
@@ -178,7 +175,42 @@ module RubyReactor
|
|
|
178
175
|
element_reactor_class: arguments[:mapped_reactor_class].name
|
|
179
176
|
}
|
|
180
177
|
|
|
181
|
-
RubyReactor::AsyncResult.new(
|
|
178
|
+
RubyReactor::AsyncResult.new(
|
|
179
|
+
job_id: job_id,
|
|
180
|
+
intermediate_results: context.intermediate_results,
|
|
181
|
+
execution_id: context.context_id
|
|
182
|
+
)
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
def initialize_map_metadata(map_id, arguments, context, reactor_class_info)
|
|
186
|
+
storage = RubyReactor.configuration.storage_adapter
|
|
187
|
+
storage.initialize_map_operation(
|
|
188
|
+
map_id, arguments[:source].size, context.reactor_class.name,
|
|
189
|
+
strict_ordering: arguments[:strict_ordering], reactor_class_info: reactor_class_info
|
|
190
|
+
)
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
def dispatch_async_map(map_id, arguments, context, reactor_class_info, step_name)
|
|
194
|
+
if arguments[:batch_size]
|
|
195
|
+
# Use new Dispatcher with Backpressure
|
|
196
|
+
RubyReactor::Map::Dispatcher.perform(
|
|
197
|
+
map_id: map_id,
|
|
198
|
+
parent_context_id: context.context_id,
|
|
199
|
+
parent_reactor_class_name: context.reactor_class.name,
|
|
200
|
+
source: arguments[:source],
|
|
201
|
+
batch_size: arguments[:batch_size],
|
|
202
|
+
step_name: step_name,
|
|
203
|
+
argument_mappings: arguments[:argument_mappings],
|
|
204
|
+
strict_ordering: arguments[:strict_ordering],
|
|
205
|
+
mapped_reactor_class: arguments[:mapped_reactor_class],
|
|
206
|
+
fail_fast: arguments[:fail_fast]
|
|
207
|
+
)
|
|
208
|
+
queue_collector(map_id, context, step_name, arguments[:strict_ordering])
|
|
209
|
+
"map:#{map_id}"
|
|
210
|
+
else
|
|
211
|
+
queue_single_worker(map_id: map_id, arguments: arguments, context: context,
|
|
212
|
+
reactor_class_info: reactor_class_info, step_name: step_name)
|
|
213
|
+
end
|
|
182
214
|
end
|
|
183
215
|
|
|
184
216
|
def prepare_async_execution(context, map_id, count)
|
|
@@ -252,7 +284,7 @@ module RubyReactor
|
|
|
252
284
|
map_id: map_id, serialized_inputs: serialized_inputs,
|
|
253
285
|
reactor_class_info: reactor_class_info, strict_ordering: arguments[:strict_ordering],
|
|
254
286
|
parent_context_id: context.context_id, parent_reactor_class_name: context.reactor_class.name,
|
|
255
|
-
step_name: step_name.to_s
|
|
287
|
+
step_name: step_name.to_s, fail_fast: arguments[:fail_fast]
|
|
256
288
|
)
|
|
257
289
|
end
|
|
258
290
|
end
|
|
@@ -230,6 +230,61 @@ module RubyReactor
|
|
|
230
230
|
@redis.get(key)
|
|
231
231
|
end
|
|
232
232
|
|
|
233
|
+
def set_map_offset(map_id, offset, reactor_class_name)
|
|
234
|
+
key = map_offset_key(map_id, reactor_class_name)
|
|
235
|
+
@redis.set(key, offset, ex: 86_400)
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
def set_map_offset_if_not_exists(map_id, offset, reactor_class_name)
|
|
239
|
+
key = map_offset_key(map_id, reactor_class_name)
|
|
240
|
+
@redis.set(key, offset, nx: true, ex: 86_400)
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
def retrieve_map_offset(map_id, reactor_class_name)
|
|
244
|
+
key = map_offset_key(map_id, reactor_class_name)
|
|
245
|
+
@redis.get(key)
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
def increment_map_offset(map_id, increment, reactor_class_name)
|
|
249
|
+
key = map_offset_key(map_id, reactor_class_name)
|
|
250
|
+
@redis.incrby(key, increment)
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
def retrieve_map_results_batch(map_id, reactor_class_name, offset:, limit:, strict_ordering: true)
|
|
254
|
+
key = map_results_key(map_id, reactor_class_name)
|
|
255
|
+
|
|
256
|
+
if strict_ordering
|
|
257
|
+
# For Hash based results (indexed), we can use HMGET if we know the keys.
|
|
258
|
+
# Since we use 0-based index keys, we can generate the keys for the batch.
|
|
259
|
+
fields = (offset...(offset + limit)).map(&:to_s)
|
|
260
|
+
results = @redis.hmget(key, *fields)
|
|
261
|
+
|
|
262
|
+
# HMGET returns nil for missing fields, compact them?
|
|
263
|
+
# Or should we respect the holes?
|
|
264
|
+
# Map results are usually dense.
|
|
265
|
+
results.compact.map { |r| JSON.parse(r) }
|
|
266
|
+
else
|
|
267
|
+
# For List based results
|
|
268
|
+
# LRANGE uses inclusive ending index
|
|
269
|
+
end_index = offset + limit - 1
|
|
270
|
+
results = @redis.lrange(key, offset, end_index)
|
|
271
|
+
results.map { |r| JSON.parse(r) }
|
|
272
|
+
end
|
|
273
|
+
end
|
|
274
|
+
|
|
275
|
+
def count_map_results(map_id, reactor_class_name)
|
|
276
|
+
key = map_results_key(map_id, reactor_class_name)
|
|
277
|
+
type = @redis.type(key)
|
|
278
|
+
|
|
279
|
+
if type == "hash"
|
|
280
|
+
@redis.hlen(key)
|
|
281
|
+
elsif type == "list"
|
|
282
|
+
@redis.llen(key)
|
|
283
|
+
else
|
|
284
|
+
0
|
|
285
|
+
end
|
|
286
|
+
end
|
|
287
|
+
|
|
233
288
|
private
|
|
234
289
|
|
|
235
290
|
def fetch_and_filter_reactors(keys)
|
|
@@ -267,6 +322,10 @@ module RubyReactor
|
|
|
267
322
|
"reactor:#{reactor_class_name}:map:#{map_id}:last_queued_index"
|
|
268
323
|
end
|
|
269
324
|
|
|
325
|
+
def map_offset_key(map_id, reactor_class_name)
|
|
326
|
+
"reactor:#{reactor_class_name}:map:#{map_id}:offset"
|
|
327
|
+
end
|
|
328
|
+
|
|
270
329
|
def correlation_id_key(correlation_id, reactor_class_name)
|
|
271
330
|
"reactor:#{reactor_class_name}:correlation:#{correlation_id}"
|
|
272
331
|
end
|