conductor_ruby 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +142 -0
- data/LICENSE +190 -0
- data/README.md +517 -0
- data/examples/agentic_workflows/llm_chat.rb +106 -0
- data/examples/dynamic_workflow.rb +177 -0
- data/examples/event_handler.rb +94 -0
- data/examples/event_listener_examples.rb +430 -0
- data/examples/helloworld/greetings_worker.rb +24 -0
- data/examples/helloworld/helloworld.rb +99 -0
- data/examples/kitchensink.rb +213 -0
- data/examples/metadata_journey.rb +189 -0
- data/examples/metrics_example.rb +284 -0
- data/examples/new_dsl_demo.rb +141 -0
- data/examples/orkes/http_poll.rb +83 -0
- data/examples/orkes/secrets_example.rb +69 -0
- data/examples/orkes/wait_for_webhook.rb +90 -0
- data/examples/prompt_journey.rb +245 -0
- data/examples/rag_workflow.rb +167 -0
- data/examples/schedule_journey.rb +244 -0
- data/examples/simple_worker.rb +125 -0
- data/examples/simple_workflow.rb +89 -0
- data/examples/task_context_example.rb +257 -0
- data/examples/task_listener_example.rb +192 -0
- data/examples/worker_configuration_example.rb +282 -0
- data/examples/workflow_dsl.rb +316 -0
- data/examples/workflow_ops.rb +305 -0
- data/lib/conductor/client/authorization_client.rb +238 -0
- data/lib/conductor/client/integration_client.rb +108 -0
- data/lib/conductor/client/metadata_client.rb +139 -0
- data/lib/conductor/client/prompt_client.rb +58 -0
- data/lib/conductor/client/scheduler_client.rb +132 -0
- data/lib/conductor/client/schema_client.rb +32 -0
- data/lib/conductor/client/secret_client.rb +48 -0
- data/lib/conductor/client/task_client.rb +168 -0
- data/lib/conductor/client/workflow_client.rb +242 -0
- data/lib/conductor/configuration/authentication_settings.rb +17 -0
- data/lib/conductor/configuration.rb +103 -0
- data/lib/conductor/exceptions.rb +86 -0
- data/lib/conductor/http/api/application_resource_api.rb +107 -0
- data/lib/conductor/http/api/authorization_resource_api.rb +56 -0
- data/lib/conductor/http/api/event_resource_api.rb +133 -0
- data/lib/conductor/http/api/gateway_auth_resource_api.rb +48 -0
- data/lib/conductor/http/api/group_resource_api.rb +76 -0
- data/lib/conductor/http/api/integration_resource_api.rb +145 -0
- data/lib/conductor/http/api/metadata_resource_api.rb +231 -0
- data/lib/conductor/http/api/prompt_resource_api.rb +81 -0
- data/lib/conductor/http/api/role_resource_api.rb +60 -0
- data/lib/conductor/http/api/scheduler_resource_api.rb +211 -0
- data/lib/conductor/http/api/schema_resource_api.rb +82 -0
- data/lib/conductor/http/api/secret_resource_api.rb +134 -0
- data/lib/conductor/http/api/task_resource_api.rb +321 -0
- data/lib/conductor/http/api/token_resource_api.rb +42 -0
- data/lib/conductor/http/api/user_resource_api.rb +59 -0
- data/lib/conductor/http/api/workflow_bulk_resource_api.rb +91 -0
- data/lib/conductor/http/api/workflow_resource_api.rb +451 -0
- data/lib/conductor/http/api_client.rb +437 -0
- data/lib/conductor/http/models/authentication_config.rb +67 -0
- data/lib/conductor/http/models/authorization_request.rb +39 -0
- data/lib/conductor/http/models/base_model.rb +162 -0
- data/lib/conductor/http/models/bulk_response.rb +39 -0
- data/lib/conductor/http/models/conductor_application.rb +39 -0
- data/lib/conductor/http/models/conductor_user.rb +53 -0
- data/lib/conductor/http/models/create_or_update_application_request.rb +24 -0
- data/lib/conductor/http/models/create_or_update_role_request.rb +27 -0
- data/lib/conductor/http/models/event_handler.rb +130 -0
- data/lib/conductor/http/models/generate_token_request.rb +27 -0
- data/lib/conductor/http/models/group.rb +36 -0
- data/lib/conductor/http/models/integration.rb +70 -0
- data/lib/conductor/http/models/integration_api.rb +53 -0
- data/lib/conductor/http/models/integration_api_update.rb +43 -0
- data/lib/conductor/http/models/integration_update.rb +36 -0
- data/lib/conductor/http/models/permission.rb +24 -0
- data/lib/conductor/http/models/poll_data.rb +33 -0
- data/lib/conductor/http/models/prompt_template.rb +59 -0
- data/lib/conductor/http/models/prompt_template_test_request.rb +43 -0
- data/lib/conductor/http/models/rerun_workflow_request.rb +37 -0
- data/lib/conductor/http/models/role.rb +27 -0
- data/lib/conductor/http/models/schema_def.rb +59 -0
- data/lib/conductor/http/models/search_result.rb +187 -0
- data/lib/conductor/http/models/skip_task_request.rb +27 -0
- data/lib/conductor/http/models/start_workflow_request.rb +68 -0
- data/lib/conductor/http/models/subject_ref.rb +35 -0
- data/lib/conductor/http/models/tag_object.rb +36 -0
- data/lib/conductor/http/models/target_ref.rb +39 -0
- data/lib/conductor/http/models/task.rb +156 -0
- data/lib/conductor/http/models/task_def.rb +95 -0
- data/lib/conductor/http/models/task_exec_log.rb +30 -0
- data/lib/conductor/http/models/task_result.rb +115 -0
- data/lib/conductor/http/models/task_result_status.rb +24 -0
- data/lib/conductor/http/models/token.rb +33 -0
- data/lib/conductor/http/models/upsert_group_request.rb +30 -0
- data/lib/conductor/http/models/upsert_user_request.rb +39 -0
- data/lib/conductor/http/models/workflow.rb +202 -0
- data/lib/conductor/http/models/workflow_def.rb +73 -0
- data/lib/conductor/http/models/workflow_schedule.rb +100 -0
- data/lib/conductor/http/models/workflow_state_update.rb +30 -0
- data/lib/conductor/http/models/workflow_status_constants.rb +57 -0
- data/lib/conductor/http/models/workflow_task.rb +169 -0
- data/lib/conductor/http/models/workflow_test_request.rb +67 -0
- data/lib/conductor/http/rest_client.rb +211 -0
- data/lib/conductor/orkes/models/access_key.rb +56 -0
- data/lib/conductor/orkes/models/granted_permission.rb +27 -0
- data/lib/conductor/orkes/models/metadata_tag.rb +15 -0
- data/lib/conductor/orkes/models/rate_limit_tag.rb +15 -0
- data/lib/conductor/orkes/orkes_clients.rb +69 -0
- data/lib/conductor/version.rb +5 -0
- data/lib/conductor/worker/events/conductor_event.rb +40 -0
- data/lib/conductor/worker/events/global_dispatcher.rb +37 -0
- data/lib/conductor/worker/events/http_events.rb +25 -0
- data/lib/conductor/worker/events/listener_registry.rb +40 -0
- data/lib/conductor/worker/events/listeners.rb +34 -0
- data/lib/conductor/worker/events/sync_event_dispatcher.rb +78 -0
- data/lib/conductor/worker/events/task_runner_events.rb +271 -0
- data/lib/conductor/worker/events/workflow_events.rb +49 -0
- data/lib/conductor/worker/fiber_executor.rb +532 -0
- data/lib/conductor/worker/ractor_task_runner.rb +501 -0
- data/lib/conductor/worker/task_context.rb +114 -0
- data/lib/conductor/worker/task_definition_registrar.rb +322 -0
- data/lib/conductor/worker/task_handler.rb +360 -0
- data/lib/conductor/worker/task_in_progress.rb +60 -0
- data/lib/conductor/worker/task_runner.rb +538 -0
- data/lib/conductor/worker/telemetry/metrics_collector.rb +196 -0
- data/lib/conductor/worker/telemetry/prometheus_backend.rb +224 -0
- data/lib/conductor/worker/worker.rb +355 -0
- data/lib/conductor/worker/worker_config.rb +154 -0
- data/lib/conductor/worker/worker_registry.rb +71 -0
- data/lib/conductor/workflow/dsl/input_ref.rb +37 -0
- data/lib/conductor/workflow/dsl/output_ref.rb +44 -0
- data/lib/conductor/workflow/dsl/parallel_builder.rb +49 -0
- data/lib/conductor/workflow/dsl/switch_builder.rb +74 -0
- data/lib/conductor/workflow/dsl/task_ref.rb +178 -0
- data/lib/conductor/workflow/dsl/workflow_builder.rb +1016 -0
- data/lib/conductor/workflow/dsl/workflow_definition.rb +150 -0
- data/lib/conductor/workflow/llm/chat_message.rb +47 -0
- data/lib/conductor/workflow/llm/embedding_model.rb +19 -0
- data/lib/conductor/workflow/llm/tool_call.rb +43 -0
- data/lib/conductor/workflow/llm/tool_spec.rb +46 -0
- data/lib/conductor/workflow/task_type.rb +68 -0
- data/lib/conductor/workflow/timeout_policy.rb +31 -0
- data/lib/conductor/workflow/workflow_executor.rb +373 -0
- data/lib/conductor.rb +192 -0
- metadata +359 -0
|
@@ -0,0 +1,532 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Conductor
|
|
4
|
+
module Worker
|
|
5
|
+
# FiberExecutor - Fiber-based executor using the async gem
|
|
6
|
+
# Provides lightweight cooperative concurrency for high I/O workloads
|
|
7
|
+
#
|
|
8
|
+
# Unlike ThreadPoolExecutor which uses OS threads (~8KB each),
|
|
9
|
+
# FiberExecutor uses fibers (~400 bytes each), enabling thousands
|
|
10
|
+
# of concurrent tasks within a single thread.
|
|
11
|
+
#
|
|
12
|
+
# Requirements:
|
|
13
|
+
# - async gem must be installed (optional dependency)
|
|
14
|
+
# - All I/O must be non-blocking (use async-compatible libraries)
|
|
15
|
+
#
|
|
16
|
+
# @example
|
|
17
|
+
# worker = Worker.new('io_task', executor: :fiber, thread_count: 100) { |t| async_http_call(t) }
|
|
18
|
+
# handler = TaskHandler.new(workers: [worker])
|
|
19
|
+
# handler.start
|
|
20
|
+
class FiberExecutor
|
|
21
|
+
attr_reader :max_concurrency
|
|
22
|
+
|
|
23
|
+
# Initialize FiberExecutor
|
|
24
|
+
# @param max_concurrency [Integer] Maximum concurrent fibers (semaphore limit)
|
|
25
|
+
def initialize(max_concurrency)
|
|
26
|
+
@max_concurrency = max_concurrency
|
|
27
|
+
@running_fibers = []
|
|
28
|
+
@semaphore = nil
|
|
29
|
+
@scheduler = nil
|
|
30
|
+
@shutdown = false
|
|
31
|
+
|
|
32
|
+
# Lazy-load the async gem
|
|
33
|
+
load_async_gem
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Submit a task for execution
|
|
37
|
+
# @param block [Proc] Block to execute in a fiber
|
|
38
|
+
# @return [Object] Fiber task handle
|
|
39
|
+
def submit(&block)
|
|
40
|
+
raise 'FiberExecutor not started' unless @scheduler
|
|
41
|
+
|
|
42
|
+
# Wrap the block with semaphore for concurrency control
|
|
43
|
+
fiber_task = @scheduler.async do
|
|
44
|
+
@semaphore.acquire
|
|
45
|
+
begin
|
|
46
|
+
block.call
|
|
47
|
+
ensure
|
|
48
|
+
@semaphore.release
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
@running_fibers << fiber_task
|
|
53
|
+
cleanup_completed_fibers
|
|
54
|
+
fiber_task
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Get current number of running fibers
|
|
58
|
+
# @return [Integer]
|
|
59
|
+
def running_count
|
|
60
|
+
cleanup_completed_fibers
|
|
61
|
+
@running_fibers.size
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Check if at capacity
|
|
65
|
+
# @return [Boolean]
|
|
66
|
+
def at_capacity?
|
|
67
|
+
running_count >= @max_concurrency
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Wait for all fibers to complete
|
|
71
|
+
# @param timeout [Float, nil] Optional timeout in seconds
|
|
72
|
+
def wait_for_completion(timeout: nil)
|
|
73
|
+
cleanup_completed_fibers
|
|
74
|
+
@running_fibers.each do |fiber|
|
|
75
|
+
fiber.wait
|
|
76
|
+
rescue StandardError
|
|
77
|
+
# Ignore errors during wait
|
|
78
|
+
end
|
|
79
|
+
@running_fibers.clear
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# Start the fiber scheduler
|
|
83
|
+
# Must be called before submitting tasks
|
|
84
|
+
# @yield Block to execute within the scheduler
|
|
85
|
+
def start(&block)
|
|
86
|
+
Async do |task|
|
|
87
|
+
@scheduler = task
|
|
88
|
+
@semaphore = Async::Semaphore.new(@max_concurrency)
|
|
89
|
+
block.call(self) if block_given?
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# Signal shutdown
|
|
94
|
+
def shutdown
|
|
95
|
+
@shutdown = true
|
|
96
|
+
@running_fibers.each do |fiber|
|
|
97
|
+
fiber.stop
|
|
98
|
+
rescue StandardError
|
|
99
|
+
# Ignore errors during shutdown
|
|
100
|
+
end
|
|
101
|
+
@running_fibers.clear
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# Check if shutdown
|
|
105
|
+
# @return [Boolean]
|
|
106
|
+
def shutdown?
|
|
107
|
+
@shutdown
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
private
|
|
111
|
+
|
|
112
|
+
# Load the async gem
|
|
113
|
+
def load_async_gem
|
|
114
|
+
require 'async'
|
|
115
|
+
require 'async/semaphore'
|
|
116
|
+
rescue LoadError
|
|
117
|
+
raise ConfigurationError,
|
|
118
|
+
"The 'async' gem is required for fiber executor. " \
|
|
119
|
+
"Add `gem 'async'` to your Gemfile."
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# Remove completed fibers from tracking
|
|
123
|
+
def cleanup_completed_fibers
|
|
124
|
+
@running_fibers.reject! do |fiber|
|
|
125
|
+
fiber.finished? || fiber.stopped?
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
# FiberTaskRunner - TaskRunner variant that uses FiberExecutor
|
|
131
|
+
# Runs within an async event loop for fiber-based concurrency
|
|
132
|
+
class FiberTaskRunner
|
|
133
|
+
# Retry backoffs for task update (in seconds)
|
|
134
|
+
RETRY_BACKOFFS = [0, 10, 20, 30].freeze
|
|
135
|
+
|
|
136
|
+
# Maximum exponent for adaptive backoff
|
|
137
|
+
MAX_BACKOFF_EXPONENT = 10
|
|
138
|
+
|
|
139
|
+
# Maximum auth failure backoff in seconds
|
|
140
|
+
MAX_AUTH_BACKOFF_SECONDS = 60
|
|
141
|
+
|
|
142
|
+
attr_reader :worker
|
|
143
|
+
|
|
144
|
+
# Initialize FiberTaskRunner
|
|
145
|
+
# @param worker [Worker] The worker instance
|
|
146
|
+
# @param configuration [Configuration] Conductor configuration
|
|
147
|
+
# @param event_dispatcher [SyncEventDispatcher] Event dispatcher
|
|
148
|
+
# @param logger [Logger] Logger instance
|
|
149
|
+
def initialize(worker, configuration:, event_dispatcher: nil, logger: nil)
|
|
150
|
+
@worker = worker
|
|
151
|
+
@configuration = configuration || Configuration.new
|
|
152
|
+
@event_dispatcher = event_dispatcher || Events::SyncEventDispatcher.new
|
|
153
|
+
@logger = logger || create_default_logger
|
|
154
|
+
|
|
155
|
+
# Resolve worker configuration
|
|
156
|
+
resolved = WorkerConfig.resolve(
|
|
157
|
+
worker.task_definition_name,
|
|
158
|
+
extract_worker_options(worker)
|
|
159
|
+
)
|
|
160
|
+
@poll_interval = resolved[:poll_interval]
|
|
161
|
+
@max_workers = resolved[:thread_count] # thread_count becomes fiber concurrency
|
|
162
|
+
@worker_id = resolved[:worker_id]
|
|
163
|
+
@domain = resolved[:domain]
|
|
164
|
+
@poll_timeout = resolved[:poll_timeout]
|
|
165
|
+
|
|
166
|
+
# State tracking
|
|
167
|
+
@consecutive_empty_polls = 0
|
|
168
|
+
@auth_failures = 0
|
|
169
|
+
@last_auth_failure_time = nil
|
|
170
|
+
@last_poll_time = nil
|
|
171
|
+
@poll_count = 0
|
|
172
|
+
@shutdown = false
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
# Main run loop - runs within async event loop
|
|
176
|
+
def run
|
|
177
|
+
@logger.info("Starting FiberTaskRunner for '#{@worker.task_definition_name}' " \
|
|
178
|
+
"(fiber_concurrency=#{@max_workers})")
|
|
179
|
+
|
|
180
|
+
# Create task client (using async-compatible HTTP if available)
|
|
181
|
+
@task_client = Client::TaskClient.new(@configuration)
|
|
182
|
+
|
|
183
|
+
# Create fiber executor
|
|
184
|
+
@executor = FiberExecutor.new(@max_workers)
|
|
185
|
+
|
|
186
|
+
# Start the async event loop
|
|
187
|
+
@executor.start do |executor|
|
|
188
|
+
until @shutdown
|
|
189
|
+
begin
|
|
190
|
+
run_once(executor)
|
|
191
|
+
# Small sleep to prevent tight loop (async-friendly)
|
|
192
|
+
sleep(0.001)
|
|
193
|
+
rescue StandardError => e
|
|
194
|
+
@logger.error("Error in fiber polling loop: #{e.message}")
|
|
195
|
+
sleep(1)
|
|
196
|
+
end
|
|
197
|
+
end
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
cleanup
|
|
201
|
+
@logger.info('FiberTaskRunner stopped')
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
# Single iteration
|
|
205
|
+
# @param executor [FiberExecutor] The fiber executor
|
|
206
|
+
def run_once(executor)
|
|
207
|
+
# Check capacity
|
|
208
|
+
return if executor.at_capacity?
|
|
209
|
+
|
|
210
|
+
available_slots = @max_workers - executor.running_count
|
|
211
|
+
|
|
212
|
+
# Adaptive backoff
|
|
213
|
+
if @consecutive_empty_polls.positive?
|
|
214
|
+
backoff_ms = calculate_adaptive_backoff
|
|
215
|
+
elapsed_ms = @last_poll_time ? (Time.now - @last_poll_time) * 1000 : backoff_ms
|
|
216
|
+
return if elapsed_ms < backoff_ms
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
# Poll for tasks
|
|
220
|
+
@last_poll_time = Time.now
|
|
221
|
+
tasks = batch_poll(available_slots)
|
|
222
|
+
|
|
223
|
+
if tasks.empty?
|
|
224
|
+
@consecutive_empty_polls += 1
|
|
225
|
+
else
|
|
226
|
+
@consecutive_empty_polls = 0
|
|
227
|
+
tasks.each do |task|
|
|
228
|
+
executor.submit { execute_and_update(task) }
|
|
229
|
+
end
|
|
230
|
+
publish_active_workers(executor)
|
|
231
|
+
end
|
|
232
|
+
end
|
|
233
|
+
|
|
234
|
+
# Signal shutdown
|
|
235
|
+
def shutdown
|
|
236
|
+
@shutdown = true
|
|
237
|
+
@executor&.shutdown
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
private
|
|
241
|
+
|
|
242
|
+
def create_default_logger
|
|
243
|
+
logger = Logger.new($stdout)
|
|
244
|
+
logger.level = Logger::INFO
|
|
245
|
+
logger
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
def extract_worker_options(worker)
|
|
249
|
+
options = {}
|
|
250
|
+
Worker::DEFAULTS.each_key do |key|
|
|
251
|
+
options[key] = worker.send(key) if worker.respond_to?(key)
|
|
252
|
+
end
|
|
253
|
+
options
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
def calculate_adaptive_backoff
|
|
257
|
+
exponent = [@consecutive_empty_polls, MAX_BACKOFF_EXPONENT].min
|
|
258
|
+
[1.0 * (2**exponent), @poll_interval].min
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
def batch_poll(count)
|
|
262
|
+
if @worker.paused
|
|
263
|
+
@event_dispatcher.publish(Events::TaskPaused.new(task_type: @worker.task_definition_name))
|
|
264
|
+
return []
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
if @auth_failures.positive? && @last_auth_failure_time
|
|
268
|
+
backoff_seconds = [2**@auth_failures, MAX_AUTH_BACKOFF_SECONDS].min
|
|
269
|
+
elapsed = Time.now - @last_auth_failure_time
|
|
270
|
+
return [] if elapsed < backoff_seconds
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
@event_dispatcher.publish(Events::PollStarted.new(
|
|
274
|
+
task_type: @worker.task_definition_name,
|
|
275
|
+
worker_id: @worker_id,
|
|
276
|
+
poll_count: @poll_count
|
|
277
|
+
))
|
|
278
|
+
|
|
279
|
+
start_time = Time.now
|
|
280
|
+
|
|
281
|
+
begin
|
|
282
|
+
domain_param = @domain.to_s.empty? ? nil : @domain
|
|
283
|
+
|
|
284
|
+
tasks = @task_client.batch_poll(
|
|
285
|
+
@worker.task_definition_name,
|
|
286
|
+
count: count,
|
|
287
|
+
timeout: @poll_timeout,
|
|
288
|
+
worker_id: @worker_id,
|
|
289
|
+
domain: domain_param
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
tasks ||= []
|
|
293
|
+
duration_ms = (Time.now - start_time) * 1000
|
|
294
|
+
@poll_count += 1
|
|
295
|
+
|
|
296
|
+
@event_dispatcher.publish(Events::PollCompleted.new(
|
|
297
|
+
task_type: @worker.task_definition_name,
|
|
298
|
+
duration_ms: duration_ms,
|
|
299
|
+
tasks_received: tasks.size
|
|
300
|
+
))
|
|
301
|
+
|
|
302
|
+
@auth_failures = 0
|
|
303
|
+
tasks
|
|
304
|
+
rescue AuthorizationError => e
|
|
305
|
+
@auth_failures += 1
|
|
306
|
+
@last_auth_failure_time = Time.now
|
|
307
|
+
@event_dispatcher.publish(Events::PollFailure.new(
|
|
308
|
+
task_type: @worker.task_definition_name,
|
|
309
|
+
duration_ms: (Time.now - start_time) * 1000,
|
|
310
|
+
cause: e
|
|
311
|
+
))
|
|
312
|
+
[]
|
|
313
|
+
rescue StandardError => e
|
|
314
|
+
@event_dispatcher.publish(Events::PollFailure.new(
|
|
315
|
+
task_type: @worker.task_definition_name,
|
|
316
|
+
duration_ms: (Time.now - start_time) * 1000,
|
|
317
|
+
cause: e
|
|
318
|
+
))
|
|
319
|
+
[]
|
|
320
|
+
end
|
|
321
|
+
end
|
|
322
|
+
|
|
323
|
+
def execute_and_update(task)
|
|
324
|
+
task_result = execute_task(task)
|
|
325
|
+
return if task_result.nil?
|
|
326
|
+
return if task_result.status == Http::Models::TaskResultStatus::IN_PROGRESS &&
|
|
327
|
+
task_result.callback_after_seconds&.positive?
|
|
328
|
+
|
|
329
|
+
update_task_with_retry(task_result)
|
|
330
|
+
end
|
|
331
|
+
|
|
332
|
+
def execute_task(task)
|
|
333
|
+
task_obj = Http::Models::Task.from_hash(task)
|
|
334
|
+
|
|
335
|
+
initial_result = Http::Models::TaskResult.new
|
|
336
|
+
initial_result.task_id = task_obj.task_id
|
|
337
|
+
initial_result.workflow_instance_id = task_obj.workflow_instance_id
|
|
338
|
+
initial_result.worker_id = @worker_id
|
|
339
|
+
|
|
340
|
+
# Fiber-local context (uses Fiber.current storage if available)
|
|
341
|
+
set_fiber_context(task_obj, initial_result)
|
|
342
|
+
|
|
343
|
+
start_time = Time.now
|
|
344
|
+
|
|
345
|
+
@event_dispatcher.publish(Events::TaskExecutionStarted.new(
|
|
346
|
+
task_type: @worker.task_definition_name,
|
|
347
|
+
task_id: task_obj.task_id,
|
|
348
|
+
worker_id: @worker_id,
|
|
349
|
+
workflow_instance_id: task_obj.workflow_instance_id
|
|
350
|
+
))
|
|
351
|
+
|
|
352
|
+
begin
|
|
353
|
+
task_result = @worker.execute(task_obj)
|
|
354
|
+
duration_ms = (Time.now - start_time) * 1000
|
|
355
|
+
|
|
356
|
+
ctx = get_fiber_context
|
|
357
|
+
if ctx&.task_result&.logs && !ctx.task_result.logs.empty?
|
|
358
|
+
task_result.logs ||= []
|
|
359
|
+
task_result.logs.concat(ctx.task_result.logs)
|
|
360
|
+
end
|
|
361
|
+
task_result.callback_after_seconds ||= ctx&.callback_after_seconds
|
|
362
|
+
|
|
363
|
+
output_size = begin
|
|
364
|
+
task_result.output_data.to_json.bytesize
|
|
365
|
+
rescue StandardError
|
|
366
|
+
0
|
|
367
|
+
end
|
|
368
|
+
|
|
369
|
+
@event_dispatcher.publish(Events::TaskExecutionCompleted.new(
|
|
370
|
+
task_type: @worker.task_definition_name,
|
|
371
|
+
task_id: task_obj.task_id,
|
|
372
|
+
worker_id: @worker_id,
|
|
373
|
+
workflow_instance_id: task_obj.workflow_instance_id,
|
|
374
|
+
duration_ms: duration_ms,
|
|
375
|
+
output_size_bytes: output_size
|
|
376
|
+
))
|
|
377
|
+
|
|
378
|
+
task_result
|
|
379
|
+
rescue NonRetryableError => e
|
|
380
|
+
handle_execution_error(task_obj, e, start_time, retryable: false)
|
|
381
|
+
rescue StandardError => e
|
|
382
|
+
handle_execution_error(task_obj, e, start_time, retryable: true)
|
|
383
|
+
ensure
|
|
384
|
+
clear_fiber_context
|
|
385
|
+
end
|
|
386
|
+
end
|
|
387
|
+
|
|
388
|
+
# Fiber-local context storage
|
|
389
|
+
def set_fiber_context(task, task_result)
|
|
390
|
+
# Try Fiber.current storage (Ruby 3.2+), fall back to Thread.current
|
|
391
|
+
if Fiber.current.respond_to?(:[]=)
|
|
392
|
+
Fiber.current[:conductor_task_context] = TaskContext.new(task, task_result)
|
|
393
|
+
else
|
|
394
|
+
Thread.current[:conductor_task_context] = TaskContext.new(task, task_result)
|
|
395
|
+
end
|
|
396
|
+
end
|
|
397
|
+
|
|
398
|
+
def get_fiber_context
|
|
399
|
+
if Fiber.current.respond_to?(:[])
|
|
400
|
+
Fiber.current[:conductor_task_context]
|
|
401
|
+
else
|
|
402
|
+
Thread.current[:conductor_task_context]
|
|
403
|
+
end
|
|
404
|
+
end
|
|
405
|
+
|
|
406
|
+
def clear_fiber_context
|
|
407
|
+
if Fiber.current.respond_to?(:[]=)
|
|
408
|
+
Fiber.current[:conductor_task_context] = nil
|
|
409
|
+
else
|
|
410
|
+
Thread.current[:conductor_task_context] = nil
|
|
411
|
+
end
|
|
412
|
+
end
|
|
413
|
+
|
|
414
|
+
def handle_execution_error(task, error, start_time, retryable:)
|
|
415
|
+
duration_ms = (Time.now - start_time) * 1000
|
|
416
|
+
|
|
417
|
+
task_result = if retryable
|
|
418
|
+
Http::Models::TaskResult.failed(error.message)
|
|
419
|
+
else
|
|
420
|
+
Http::Models::TaskResult.failed_with_terminal_error(error.message)
|
|
421
|
+
end
|
|
422
|
+
|
|
423
|
+
task_result.task_id = task.task_id
|
|
424
|
+
task_result.workflow_instance_id = task.workflow_instance_id
|
|
425
|
+
task_result.worker_id = @worker_id
|
|
426
|
+
task_result.log("Error: #{error.class}: #{error.message}")
|
|
427
|
+
|
|
428
|
+
@event_dispatcher.publish(Events::TaskExecutionFailure.new(
|
|
429
|
+
task_type: @worker.task_definition_name,
|
|
430
|
+
task_id: task.task_id,
|
|
431
|
+
worker_id: @worker_id,
|
|
432
|
+
workflow_instance_id: task.workflow_instance_id,
|
|
433
|
+
duration_ms: duration_ms,
|
|
434
|
+
cause: error,
|
|
435
|
+
is_retryable: retryable
|
|
436
|
+
))
|
|
437
|
+
|
|
438
|
+
task_result
|
|
439
|
+
end
|
|
440
|
+
|
|
441
|
+
def update_task_with_retry(task_result)
|
|
442
|
+
RETRY_BACKOFFS.each_with_index do |backoff, attempt|
|
|
443
|
+
sleep(backoff) if backoff.positive?
|
|
444
|
+
|
|
445
|
+
start_time = Time.now
|
|
446
|
+
begin
|
|
447
|
+
@task_client.update_task(task_result)
|
|
448
|
+
duration_ms = (Time.now - start_time) * 1000
|
|
449
|
+
|
|
450
|
+
publish_task_update_completed(task_result, duration_ms)
|
|
451
|
+
return
|
|
452
|
+
rescue StandardError => e
|
|
453
|
+
duration_ms = (Time.now - start_time) * 1000
|
|
454
|
+
@logger.error("Update failed (attempt #{attempt + 1}): #{e.message}")
|
|
455
|
+
|
|
456
|
+
publish_task_update_failure(task_result, e, duration_ms) if attempt == RETRY_BACKOFFS.size - 1
|
|
457
|
+
end
|
|
458
|
+
end
|
|
459
|
+
end
|
|
460
|
+
|
|
461
|
+
def publish_task_update_completed(task_result, duration_ms)
|
|
462
|
+
@event_dispatcher.publish(Events::TaskUpdateCompleted.new(
|
|
463
|
+
task_type: @worker.task_definition_name,
|
|
464
|
+
task_id: task_result.task_id,
|
|
465
|
+
worker_id: @worker_id,
|
|
466
|
+
workflow_instance_id: task_result.workflow_instance_id,
|
|
467
|
+
duration_ms: duration_ms
|
|
468
|
+
))
|
|
469
|
+
end
|
|
470
|
+
|
|
471
|
+
def publish_task_update_failure(task_result, error, duration_ms)
|
|
472
|
+
@event_dispatcher.publish(Events::TaskUpdateFailure.new(
|
|
473
|
+
task_type: @worker.task_definition_name,
|
|
474
|
+
task_id: task_result.task_id,
|
|
475
|
+
worker_id: @worker_id,
|
|
476
|
+
workflow_instance_id: task_result.workflow_instance_id,
|
|
477
|
+
cause: error,
|
|
478
|
+
retry_count: RETRY_BACKOFFS.size,
|
|
479
|
+
task_result: task_result,
|
|
480
|
+
duration_ms: duration_ms
|
|
481
|
+
))
|
|
482
|
+
end
|
|
483
|
+
|
|
484
|
+
def publish_active_workers(executor)
|
|
485
|
+
@event_dispatcher.publish(Events::ActiveWorkersChanged.new(
|
|
486
|
+
task_type: @worker.task_definition_name,
|
|
487
|
+
count: executor.running_count
|
|
488
|
+
))
|
|
489
|
+
rescue StandardError => e
|
|
490
|
+
@logger.debug { "Telemetry error (non-fatal): #{e.class}: #{e.message}" }
|
|
491
|
+
end
|
|
492
|
+
|
|
493
|
+
def publish_uncaught_exception(error)
|
|
494
|
+
@event_dispatcher.publish(Events::ThreadUncaughtException.new(
|
|
495
|
+
cause: error,
|
|
496
|
+
task_type: @worker&.task_definition_name
|
|
497
|
+
))
|
|
498
|
+
rescue StandardError => e
|
|
499
|
+
@logger.debug { "Telemetry error (non-fatal): #{e.class}: #{e.message}" }
|
|
500
|
+
end
|
|
501
|
+
|
|
502
|
+
def cleanup
|
|
503
|
+
@executor&.shutdown
|
|
504
|
+
@event_dispatcher.clear
|
|
505
|
+
end
|
|
506
|
+
end
|
|
507
|
+
|
|
508
|
+
# Helper to check async gem availability
|
|
509
|
+
module AsyncSupport
|
|
510
|
+
class << self
|
|
511
|
+
def available?
|
|
512
|
+
return @available if defined?(@available)
|
|
513
|
+
|
|
514
|
+
@available = begin
|
|
515
|
+
require 'async'
|
|
516
|
+
true
|
|
517
|
+
rescue LoadError
|
|
518
|
+
false
|
|
519
|
+
end
|
|
520
|
+
end
|
|
521
|
+
|
|
522
|
+
def require_async!
|
|
523
|
+
return if available?
|
|
524
|
+
|
|
525
|
+
raise ConfigurationError,
|
|
526
|
+
"The 'async' gem is required for fiber executor. " \
|
|
527
|
+
"Add `gem 'async'` to your Gemfile."
|
|
528
|
+
end
|
|
529
|
+
end
|
|
530
|
+
end
|
|
531
|
+
end
|
|
532
|
+
end
|