conductor_ruby 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +142 -0
- data/LICENSE +190 -0
- data/README.md +517 -0
- data/examples/agentic_workflows/llm_chat.rb +106 -0
- data/examples/dynamic_workflow.rb +177 -0
- data/examples/event_handler.rb +94 -0
- data/examples/event_listener_examples.rb +430 -0
- data/examples/helloworld/greetings_worker.rb +24 -0
- data/examples/helloworld/helloworld.rb +99 -0
- data/examples/kitchensink.rb +213 -0
- data/examples/metadata_journey.rb +189 -0
- data/examples/metrics_example.rb +284 -0
- data/examples/new_dsl_demo.rb +141 -0
- data/examples/orkes/http_poll.rb +83 -0
- data/examples/orkes/secrets_example.rb +69 -0
- data/examples/orkes/wait_for_webhook.rb +90 -0
- data/examples/prompt_journey.rb +245 -0
- data/examples/rag_workflow.rb +167 -0
- data/examples/schedule_journey.rb +244 -0
- data/examples/simple_worker.rb +125 -0
- data/examples/simple_workflow.rb +89 -0
- data/examples/task_context_example.rb +257 -0
- data/examples/task_listener_example.rb +192 -0
- data/examples/worker_configuration_example.rb +282 -0
- data/examples/workflow_dsl.rb +316 -0
- data/examples/workflow_ops.rb +305 -0
- data/lib/conductor/client/authorization_client.rb +238 -0
- data/lib/conductor/client/integration_client.rb +108 -0
- data/lib/conductor/client/metadata_client.rb +139 -0
- data/lib/conductor/client/prompt_client.rb +58 -0
- data/lib/conductor/client/scheduler_client.rb +132 -0
- data/lib/conductor/client/schema_client.rb +32 -0
- data/lib/conductor/client/secret_client.rb +48 -0
- data/lib/conductor/client/task_client.rb +168 -0
- data/lib/conductor/client/workflow_client.rb +242 -0
- data/lib/conductor/configuration/authentication_settings.rb +17 -0
- data/lib/conductor/configuration.rb +103 -0
- data/lib/conductor/exceptions.rb +86 -0
- data/lib/conductor/http/api/application_resource_api.rb +107 -0
- data/lib/conductor/http/api/authorization_resource_api.rb +56 -0
- data/lib/conductor/http/api/event_resource_api.rb +133 -0
- data/lib/conductor/http/api/gateway_auth_resource_api.rb +48 -0
- data/lib/conductor/http/api/group_resource_api.rb +76 -0
- data/lib/conductor/http/api/integration_resource_api.rb +145 -0
- data/lib/conductor/http/api/metadata_resource_api.rb +231 -0
- data/lib/conductor/http/api/prompt_resource_api.rb +81 -0
- data/lib/conductor/http/api/role_resource_api.rb +60 -0
- data/lib/conductor/http/api/scheduler_resource_api.rb +211 -0
- data/lib/conductor/http/api/schema_resource_api.rb +82 -0
- data/lib/conductor/http/api/secret_resource_api.rb +134 -0
- data/lib/conductor/http/api/task_resource_api.rb +321 -0
- data/lib/conductor/http/api/token_resource_api.rb +42 -0
- data/lib/conductor/http/api/user_resource_api.rb +59 -0
- data/lib/conductor/http/api/workflow_bulk_resource_api.rb +91 -0
- data/lib/conductor/http/api/workflow_resource_api.rb +451 -0
- data/lib/conductor/http/api_client.rb +437 -0
- data/lib/conductor/http/models/authentication_config.rb +67 -0
- data/lib/conductor/http/models/authorization_request.rb +39 -0
- data/lib/conductor/http/models/base_model.rb +162 -0
- data/lib/conductor/http/models/bulk_response.rb +39 -0
- data/lib/conductor/http/models/conductor_application.rb +39 -0
- data/lib/conductor/http/models/conductor_user.rb +53 -0
- data/lib/conductor/http/models/create_or_update_application_request.rb +24 -0
- data/lib/conductor/http/models/create_or_update_role_request.rb +27 -0
- data/lib/conductor/http/models/event_handler.rb +130 -0
- data/lib/conductor/http/models/generate_token_request.rb +27 -0
- data/lib/conductor/http/models/group.rb +36 -0
- data/lib/conductor/http/models/integration.rb +70 -0
- data/lib/conductor/http/models/integration_api.rb +53 -0
- data/lib/conductor/http/models/integration_api_update.rb +43 -0
- data/lib/conductor/http/models/integration_update.rb +36 -0
- data/lib/conductor/http/models/permission.rb +24 -0
- data/lib/conductor/http/models/poll_data.rb +33 -0
- data/lib/conductor/http/models/prompt_template.rb +59 -0
- data/lib/conductor/http/models/prompt_template_test_request.rb +43 -0
- data/lib/conductor/http/models/rerun_workflow_request.rb +37 -0
- data/lib/conductor/http/models/role.rb +27 -0
- data/lib/conductor/http/models/schema_def.rb +59 -0
- data/lib/conductor/http/models/search_result.rb +187 -0
- data/lib/conductor/http/models/skip_task_request.rb +27 -0
- data/lib/conductor/http/models/start_workflow_request.rb +68 -0
- data/lib/conductor/http/models/subject_ref.rb +35 -0
- data/lib/conductor/http/models/tag_object.rb +36 -0
- data/lib/conductor/http/models/target_ref.rb +39 -0
- data/lib/conductor/http/models/task.rb +156 -0
- data/lib/conductor/http/models/task_def.rb +95 -0
- data/lib/conductor/http/models/task_exec_log.rb +30 -0
- data/lib/conductor/http/models/task_result.rb +115 -0
- data/lib/conductor/http/models/task_result_status.rb +24 -0
- data/lib/conductor/http/models/token.rb +33 -0
- data/lib/conductor/http/models/upsert_group_request.rb +30 -0
- data/lib/conductor/http/models/upsert_user_request.rb +39 -0
- data/lib/conductor/http/models/workflow.rb +202 -0
- data/lib/conductor/http/models/workflow_def.rb +73 -0
- data/lib/conductor/http/models/workflow_schedule.rb +100 -0
- data/lib/conductor/http/models/workflow_state_update.rb +30 -0
- data/lib/conductor/http/models/workflow_status_constants.rb +57 -0
- data/lib/conductor/http/models/workflow_task.rb +169 -0
- data/lib/conductor/http/models/workflow_test_request.rb +67 -0
- data/lib/conductor/http/rest_client.rb +211 -0
- data/lib/conductor/orkes/models/access_key.rb +56 -0
- data/lib/conductor/orkes/models/granted_permission.rb +27 -0
- data/lib/conductor/orkes/models/metadata_tag.rb +15 -0
- data/lib/conductor/orkes/models/rate_limit_tag.rb +15 -0
- data/lib/conductor/orkes/orkes_clients.rb +69 -0
- data/lib/conductor/version.rb +5 -0
- data/lib/conductor/worker/events/conductor_event.rb +40 -0
- data/lib/conductor/worker/events/global_dispatcher.rb +37 -0
- data/lib/conductor/worker/events/http_events.rb +25 -0
- data/lib/conductor/worker/events/listener_registry.rb +40 -0
- data/lib/conductor/worker/events/listeners.rb +34 -0
- data/lib/conductor/worker/events/sync_event_dispatcher.rb +78 -0
- data/lib/conductor/worker/events/task_runner_events.rb +271 -0
- data/lib/conductor/worker/events/workflow_events.rb +49 -0
- data/lib/conductor/worker/fiber_executor.rb +532 -0
- data/lib/conductor/worker/ractor_task_runner.rb +501 -0
- data/lib/conductor/worker/task_context.rb +114 -0
- data/lib/conductor/worker/task_definition_registrar.rb +322 -0
- data/lib/conductor/worker/task_handler.rb +360 -0
- data/lib/conductor/worker/task_in_progress.rb +60 -0
- data/lib/conductor/worker/task_runner.rb +538 -0
- data/lib/conductor/worker/telemetry/metrics_collector.rb +196 -0
- data/lib/conductor/worker/telemetry/prometheus_backend.rb +224 -0
- data/lib/conductor/worker/worker.rb +355 -0
- data/lib/conductor/worker/worker_config.rb +154 -0
- data/lib/conductor/worker/worker_registry.rb +71 -0
- data/lib/conductor/workflow/dsl/input_ref.rb +37 -0
- data/lib/conductor/workflow/dsl/output_ref.rb +44 -0
- data/lib/conductor/workflow/dsl/parallel_builder.rb +49 -0
- data/lib/conductor/workflow/dsl/switch_builder.rb +74 -0
- data/lib/conductor/workflow/dsl/task_ref.rb +178 -0
- data/lib/conductor/workflow/dsl/workflow_builder.rb +1016 -0
- data/lib/conductor/workflow/dsl/workflow_definition.rb +150 -0
- data/lib/conductor/workflow/llm/chat_message.rb +47 -0
- data/lib/conductor/workflow/llm/embedding_model.rb +19 -0
- data/lib/conductor/workflow/llm/tool_call.rb +43 -0
- data/lib/conductor/workflow/llm/tool_spec.rb +46 -0
- data/lib/conductor/workflow/task_type.rb +68 -0
- data/lib/conductor/workflow/timeout_policy.rb +31 -0
- data/lib/conductor/workflow/workflow_executor.rb +373 -0
- data/lib/conductor.rb +192 -0
- metadata +359 -0
|
@@ -0,0 +1,501 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'logger'
|
|
4
|
+
require_relative '../http/models/task'
|
|
5
|
+
require_relative '../http/models/task_result'
|
|
6
|
+
require_relative '../http/models/task_result_status'
|
|
7
|
+
require_relative '../exceptions'
|
|
8
|
+
require_relative 'task_context'
|
|
9
|
+
require_relative 'task_in_progress'
|
|
10
|
+
require_relative 'worker_config'
|
|
11
|
+
require_relative 'events/task_runner_events'
|
|
12
|
+
|
|
13
|
+
module Conductor
|
|
14
|
+
module Worker
|
|
15
|
+
# RactorTaskRunner - Ractor-based runner for CPU-bound workers
|
|
16
|
+
# Provides true parallelism by running in isolated Ractors (no GVL sharing)
|
|
17
|
+
#
|
|
18
|
+
# Key differences from TaskRunner:
|
|
19
|
+
# - Creates HTTP client INSIDE the Ractor (can't be shared)
|
|
20
|
+
# - Sequential task execution within each Ractor
|
|
21
|
+
# - Events sent to main thread via Ractor messaging
|
|
22
|
+
# - Parallelism comes from multiple Ractors (thread_count = Ractor count)
|
|
23
|
+
# - Requires Ruby 3.1+
|
|
24
|
+
#
|
|
25
|
+
# @example
|
|
26
|
+
# worker = Worker.new('cpu_task', isolation: :ractor, thread_count: 4) { |t| heavy_computation(t) }
|
|
27
|
+
# handler = TaskHandler.new(workers: [worker])
|
|
28
|
+
# handler.start
|
|
29
|
+
class RactorTaskRunner
|
|
30
|
+
# Retry backoffs for task update (in seconds)
|
|
31
|
+
RETRY_BACKOFFS = [0, 10, 20, 30].freeze
|
|
32
|
+
|
|
33
|
+
# Maximum exponent for adaptive backoff
|
|
34
|
+
MAX_BACKOFF_EXPONENT = 10
|
|
35
|
+
|
|
36
|
+
# Maximum auth failure backoff in seconds
|
|
37
|
+
MAX_AUTH_BACKOFF_SECONDS = 60
|
|
38
|
+
|
|
39
|
+
attr_reader :worker, :ractor_id
|
|
40
|
+
|
|
41
|
+
# Initialize RactorTaskRunner
|
|
42
|
+
# Note: HTTP client is created inside run() after Ractor starts
|
|
43
|
+
# @param worker [Worker] The worker instance (must be Ractor-safe)
|
|
44
|
+
# @param configuration [Configuration] Conductor configuration (serializable parts)
|
|
45
|
+
# @param ractor_id [Integer] Identifier for this Ractor instance
|
|
46
|
+
# @param event_queue [Ractor] Main Ractor to send events to (optional)
|
|
47
|
+
def initialize(worker, configuration:, ractor_id: 0, event_queue: nil)
|
|
48
|
+
@worker = worker
|
|
49
|
+
@configuration_hash = serialize_configuration(configuration)
|
|
50
|
+
@ractor_id = ractor_id
|
|
51
|
+
@event_queue = event_queue
|
|
52
|
+
|
|
53
|
+
# These will be created inside the Ractor
|
|
54
|
+
@task_client = nil
|
|
55
|
+
@logger = nil
|
|
56
|
+
|
|
57
|
+
# State tracking (will be initialized in run)
|
|
58
|
+
@consecutive_empty_polls = 0
|
|
59
|
+
@auth_failures = 0
|
|
60
|
+
@last_auth_failure_time = nil
|
|
61
|
+
@last_poll_time = nil
|
|
62
|
+
@poll_count = 0
|
|
63
|
+
@shutdown = false
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Main polling loop - runs inside a Ractor
|
|
67
|
+
# Creates HTTP client after Ractor starts (can't be passed in)
|
|
68
|
+
def run
|
|
69
|
+
setup_ractor_resources
|
|
70
|
+
@logger.info("[Ractor #{@ractor_id}] Starting RactorTaskRunner for '#{@worker.task_definition_name}'")
|
|
71
|
+
|
|
72
|
+
until @shutdown
|
|
73
|
+
begin
|
|
74
|
+
run_once
|
|
75
|
+
rescue StandardError => e
|
|
76
|
+
@logger.error("[Ractor #{@ractor_id}] Error in polling loop: #{e.message}")
|
|
77
|
+
sleep(1)
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
cleanup
|
|
82
|
+
@logger.info("[Ractor #{@ractor_id}] RactorTaskRunner stopped")
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# Single iteration of the polling loop
|
|
86
|
+
def run_once
|
|
87
|
+
# Adaptive backoff for empty polls
|
|
88
|
+
if @consecutive_empty_polls.positive?
|
|
89
|
+
backoff_ms = calculate_adaptive_backoff
|
|
90
|
+
elapsed_ms = @last_poll_time ? (Time.now - @last_poll_time) * 1000 : backoff_ms
|
|
91
|
+
|
|
92
|
+
if elapsed_ms < backoff_ms
|
|
93
|
+
sleep((backoff_ms - elapsed_ms) / 1000.0)
|
|
94
|
+
return
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# Poll for a single task (Ractor processes sequentially)
|
|
99
|
+
@last_poll_time = Time.now
|
|
100
|
+
task = poll_task
|
|
101
|
+
|
|
102
|
+
if task.nil?
|
|
103
|
+
@consecutive_empty_polls += 1
|
|
104
|
+
else
|
|
105
|
+
@consecutive_empty_polls = 0
|
|
106
|
+
execute_and_update(task)
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# Signal shutdown
|
|
111
|
+
def shutdown
|
|
112
|
+
@shutdown = true
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
private
|
|
116
|
+
|
|
117
|
+
# Serialize configuration for Ractor transfer
|
|
118
|
+
# @param config [Configuration] Configuration object
|
|
119
|
+
# @return [Hash] Serializable configuration hash
|
|
120
|
+
def serialize_configuration(config)
|
|
121
|
+
{
|
|
122
|
+
server_api_url: config.server_api_url,
|
|
123
|
+
authentication_settings: if config.authentication_settings
|
|
124
|
+
{
|
|
125
|
+
key_id: config.authentication_settings.key_id,
|
|
126
|
+
key_secret: config.authentication_settings.key_secret
|
|
127
|
+
}
|
|
128
|
+
end
|
|
129
|
+
}
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
# Setup resources that must be created inside the Ractor
|
|
133
|
+
def setup_ractor_resources
|
|
134
|
+
# Create logger
|
|
135
|
+
@logger = Logger.new($stdout)
|
|
136
|
+
@logger.level = Logger::INFO
|
|
137
|
+
@logger.formatter = proc do |severity, datetime, _progname, msg|
|
|
138
|
+
"[#{datetime.strftime('%Y-%m-%d %H:%M:%S')}] #{severity} [R#{@ractor_id}] -- #{msg}\n"
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
# Recreate configuration from hash
|
|
142
|
+
config = Configuration.new(
|
|
143
|
+
server_api_url: @configuration_hash[:server_api_url]
|
|
144
|
+
)
|
|
145
|
+
if @configuration_hash[:authentication_settings]
|
|
146
|
+
config.authentication_settings = Configuration::AuthenticationSettings.new(
|
|
147
|
+
key_id: @configuration_hash[:authentication_settings][:key_id],
|
|
148
|
+
key_secret: @configuration_hash[:authentication_settings][:key_secret]
|
|
149
|
+
)
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
# Create HTTP client inside Ractor
|
|
153
|
+
@task_client = Client::TaskClient.new(config)
|
|
154
|
+
|
|
155
|
+
# Resolve worker configuration
|
|
156
|
+
resolved = WorkerConfig.resolve(
|
|
157
|
+
@worker.task_definition_name,
|
|
158
|
+
extract_worker_options
|
|
159
|
+
)
|
|
160
|
+
@poll_interval = resolved[:poll_interval]
|
|
161
|
+
@worker_id = "#{resolved[:worker_id]}-ractor-#{@ractor_id}"
|
|
162
|
+
@domain = resolved[:domain]
|
|
163
|
+
@poll_timeout = resolved[:poll_timeout]
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
# Extract worker options
|
|
167
|
+
# @return [Hash]
|
|
168
|
+
def extract_worker_options
|
|
169
|
+
options = {}
|
|
170
|
+
Worker::DEFAULTS.each_key do |key|
|
|
171
|
+
options[key] = @worker.send(key) if @worker.respond_to?(key)
|
|
172
|
+
end
|
|
173
|
+
options
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
# Calculate adaptive backoff
|
|
177
|
+
# @return [Float] Backoff in milliseconds
|
|
178
|
+
def calculate_adaptive_backoff
|
|
179
|
+
exponent = [@consecutive_empty_polls, MAX_BACKOFF_EXPONENT].min
|
|
180
|
+
[1.0 * (2**exponent), @poll_interval].min
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
# Poll for a single task
|
|
184
|
+
# @return [Hash, nil] Task data or nil
|
|
185
|
+
def poll_task
|
|
186
|
+
if @worker.paused
|
|
187
|
+
publish_event(Events::TaskPaused.new(task_type: @worker.task_definition_name))
|
|
188
|
+
return nil
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
# Auth failure backoff
|
|
192
|
+
if @auth_failures.positive? && @last_auth_failure_time
|
|
193
|
+
backoff_seconds = [2**@auth_failures, MAX_AUTH_BACKOFF_SECONDS].min
|
|
194
|
+
elapsed = Time.now - @last_auth_failure_time
|
|
195
|
+
return nil if elapsed < backoff_seconds
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
publish_event(Events::PollStarted.new(
|
|
199
|
+
task_type: @worker.task_definition_name,
|
|
200
|
+
worker_id: @worker_id,
|
|
201
|
+
poll_count: @poll_count
|
|
202
|
+
))
|
|
203
|
+
|
|
204
|
+
start_time = Time.now
|
|
205
|
+
|
|
206
|
+
begin
|
|
207
|
+
domain_param = @domain.to_s.empty? ? nil : @domain
|
|
208
|
+
|
|
209
|
+
# Poll for single task (Ractor processes one at a time)
|
|
210
|
+
tasks = @task_client.batch_poll(
|
|
211
|
+
@worker.task_definition_name,
|
|
212
|
+
count: 1,
|
|
213
|
+
timeout: @poll_timeout,
|
|
214
|
+
worker_id: @worker_id,
|
|
215
|
+
domain: domain_param
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
tasks ||= []
|
|
219
|
+
duration_ms = (Time.now - start_time) * 1000
|
|
220
|
+
@poll_count += 1
|
|
221
|
+
|
|
222
|
+
publish_event(Events::PollCompleted.new(
|
|
223
|
+
task_type: @worker.task_definition_name,
|
|
224
|
+
duration_ms: duration_ms,
|
|
225
|
+
tasks_received: tasks.size
|
|
226
|
+
))
|
|
227
|
+
|
|
228
|
+
@auth_failures = 0
|
|
229
|
+
tasks.first
|
|
230
|
+
rescue AuthorizationError => e
|
|
231
|
+
handle_auth_failure(e, start_time)
|
|
232
|
+
nil
|
|
233
|
+
rescue StandardError => e
|
|
234
|
+
handle_poll_failure(e, start_time)
|
|
235
|
+
nil
|
|
236
|
+
end
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
# Handle auth failure
|
|
240
|
+
def handle_auth_failure(error, start_time)
|
|
241
|
+
@auth_failures += 1
|
|
242
|
+
@last_auth_failure_time = Time.now
|
|
243
|
+
duration_ms = (Time.now - start_time) * 1000
|
|
244
|
+
|
|
245
|
+
publish_event(Events::PollFailure.new(
|
|
246
|
+
task_type: @worker.task_definition_name,
|
|
247
|
+
duration_ms: duration_ms,
|
|
248
|
+
cause: error
|
|
249
|
+
))
|
|
250
|
+
|
|
251
|
+
@logger.warn("[Ractor #{@ractor_id}] Auth failure ##{@auth_failures}: #{error.message}")
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
# Handle poll failure
|
|
255
|
+
def handle_poll_failure(error, start_time)
|
|
256
|
+
duration_ms = (Time.now - start_time) * 1000
|
|
257
|
+
|
|
258
|
+
publish_event(Events::PollFailure.new(
|
|
259
|
+
task_type: @worker.task_definition_name,
|
|
260
|
+
duration_ms: duration_ms,
|
|
261
|
+
cause: error
|
|
262
|
+
))
|
|
263
|
+
|
|
264
|
+
@logger.error("[Ractor #{@ractor_id}] Poll failed: #{error.message}")
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
# Execute task and update result
|
|
268
|
+
# @param task [Hash] Task data
|
|
269
|
+
def execute_and_update(task)
|
|
270
|
+
task_result = execute_task(task)
|
|
271
|
+
return if task_result.nil?
|
|
272
|
+
return if task_result.status == Http::Models::TaskResultStatus::IN_PROGRESS &&
|
|
273
|
+
task_result.callback_after_seconds&.positive?
|
|
274
|
+
|
|
275
|
+
update_task_with_retry(task_result)
|
|
276
|
+
end
|
|
277
|
+
|
|
278
|
+
# Execute a task
|
|
279
|
+
# @param task [Hash] Task data
|
|
280
|
+
# @return [TaskResult, nil]
|
|
281
|
+
def execute_task(task)
|
|
282
|
+
task_obj = Http::Models::Task.from_hash(task)
|
|
283
|
+
|
|
284
|
+
initial_result = Http::Models::TaskResult.new
|
|
285
|
+
initial_result.task_id = task_obj.task_id
|
|
286
|
+
initial_result.workflow_instance_id = task_obj.workflow_instance_id
|
|
287
|
+
initial_result.worker_id = @worker_id
|
|
288
|
+
|
|
289
|
+
# Set Ractor-local context
|
|
290
|
+
set_ractor_context(task_obj, initial_result)
|
|
291
|
+
|
|
292
|
+
start_time = Time.now
|
|
293
|
+
|
|
294
|
+
publish_event(Events::TaskExecutionStarted.new(
|
|
295
|
+
task_type: @worker.task_definition_name,
|
|
296
|
+
task_id: task_obj.task_id,
|
|
297
|
+
worker_id: @worker_id,
|
|
298
|
+
workflow_instance_id: task_obj.workflow_instance_id
|
|
299
|
+
))
|
|
300
|
+
|
|
301
|
+
begin
|
|
302
|
+
task_result = @worker.execute(task_obj)
|
|
303
|
+
duration_ms = (Time.now - start_time) * 1000
|
|
304
|
+
|
|
305
|
+
# Merge context
|
|
306
|
+
ctx = get_ractor_context
|
|
307
|
+
if ctx&.task_result&.logs && !ctx.task_result.logs.empty?
|
|
308
|
+
task_result.logs ||= []
|
|
309
|
+
task_result.logs.concat(ctx.task_result.logs)
|
|
310
|
+
end
|
|
311
|
+
task_result.callback_after_seconds ||= ctx&.callback_after_seconds
|
|
312
|
+
|
|
313
|
+
output_size = calculate_output_size(task_result)
|
|
314
|
+
|
|
315
|
+
publish_event(Events::TaskExecutionCompleted.new(
|
|
316
|
+
task_type: @worker.task_definition_name,
|
|
317
|
+
task_id: task_obj.task_id,
|
|
318
|
+
worker_id: @worker_id,
|
|
319
|
+
workflow_instance_id: task_obj.workflow_instance_id,
|
|
320
|
+
duration_ms: duration_ms,
|
|
321
|
+
output_size_bytes: output_size
|
|
322
|
+
))
|
|
323
|
+
|
|
324
|
+
task_result
|
|
325
|
+
rescue NonRetryableError => e
|
|
326
|
+
handle_non_retryable_error(task_obj, e, start_time)
|
|
327
|
+
rescue StandardError => e
|
|
328
|
+
handle_retryable_error(task_obj, e, start_time)
|
|
329
|
+
ensure
|
|
330
|
+
clear_ractor_context
|
|
331
|
+
end
|
|
332
|
+
end
|
|
333
|
+
|
|
334
|
+
# Ractor-local context using Thread.current (each Ractor has its own threads)
|
|
335
|
+
def set_ractor_context(task, task_result)
|
|
336
|
+
Thread.current[:conductor_task_context] = TaskContext.new(task, task_result)
|
|
337
|
+
end
|
|
338
|
+
|
|
339
|
+
def get_ractor_context
|
|
340
|
+
Thread.current[:conductor_task_context]
|
|
341
|
+
end
|
|
342
|
+
|
|
343
|
+
def clear_ractor_context
|
|
344
|
+
Thread.current[:conductor_task_context] = nil
|
|
345
|
+
end
|
|
346
|
+
|
|
347
|
+
# Calculate output size
|
|
348
|
+
def calculate_output_size(task_result)
|
|
349
|
+
return 0 unless task_result.output_data
|
|
350
|
+
|
|
351
|
+
task_result.output_data.to_json.bytesize
|
|
352
|
+
rescue StandardError
|
|
353
|
+
0
|
|
354
|
+
end
|
|
355
|
+
|
|
356
|
+
# Handle non-retryable error
|
|
357
|
+
def handle_non_retryable_error(task, error, start_time)
|
|
358
|
+
duration_ms = (Time.now - start_time) * 1000
|
|
359
|
+
|
|
360
|
+
task_result = Http::Models::TaskResult.failed_with_terminal_error(error.message)
|
|
361
|
+
task_result.task_id = task.task_id
|
|
362
|
+
task_result.workflow_instance_id = task.workflow_instance_id
|
|
363
|
+
task_result.worker_id = @worker_id
|
|
364
|
+
task_result.log("NonRetryableError: #{error.class}: #{error.message}")
|
|
365
|
+
|
|
366
|
+
publish_event(Events::TaskExecutionFailure.new(
|
|
367
|
+
task_type: @worker.task_definition_name,
|
|
368
|
+
task_id: task.task_id,
|
|
369
|
+
worker_id: @worker_id,
|
|
370
|
+
workflow_instance_id: task.workflow_instance_id,
|
|
371
|
+
duration_ms: duration_ms,
|
|
372
|
+
cause: error,
|
|
373
|
+
is_retryable: false
|
|
374
|
+
))
|
|
375
|
+
|
|
376
|
+
task_result
|
|
377
|
+
end
|
|
378
|
+
|
|
379
|
+
# Handle retryable error
|
|
380
|
+
def handle_retryable_error(task, error, start_time)
|
|
381
|
+
duration_ms = (Time.now - start_time) * 1000
|
|
382
|
+
|
|
383
|
+
task_result = Http::Models::TaskResult.failed(error.message)
|
|
384
|
+
task_result.task_id = task.task_id
|
|
385
|
+
task_result.workflow_instance_id = task.workflow_instance_id
|
|
386
|
+
task_result.worker_id = @worker_id
|
|
387
|
+
task_result.log("Error: #{error.class}: #{error.message}")
|
|
388
|
+
|
|
389
|
+
publish_event(Events::TaskExecutionFailure.new(
|
|
390
|
+
task_type: @worker.task_definition_name,
|
|
391
|
+
task_id: task.task_id,
|
|
392
|
+
worker_id: @worker_id,
|
|
393
|
+
workflow_instance_id: task.workflow_instance_id,
|
|
394
|
+
duration_ms: duration_ms,
|
|
395
|
+
cause: error,
|
|
396
|
+
is_retryable: true
|
|
397
|
+
))
|
|
398
|
+
|
|
399
|
+
task_result
|
|
400
|
+
end
|
|
401
|
+
|
|
402
|
+
# Update task with retry
|
|
403
|
+
def update_task_with_retry(task_result)
|
|
404
|
+
RETRY_BACKOFFS.each_with_index do |backoff, attempt|
|
|
405
|
+
sleep(backoff) if backoff.positive?
|
|
406
|
+
|
|
407
|
+
start_time = Time.now
|
|
408
|
+
begin
|
|
409
|
+
@task_client.update_task(task_result)
|
|
410
|
+
duration_ms = (Time.now - start_time) * 1000
|
|
411
|
+
|
|
412
|
+
publish_task_update_completed(task_result, duration_ms)
|
|
413
|
+
return
|
|
414
|
+
rescue StandardError => e
|
|
415
|
+
duration_ms = (Time.now - start_time) * 1000
|
|
416
|
+
@logger.error("[Ractor #{@ractor_id}] Update failed (attempt #{attempt + 1}): #{e.message}")
|
|
417
|
+
|
|
418
|
+
if attempt == RETRY_BACKOFFS.size - 1
|
|
419
|
+
@logger.fatal("[Ractor #{@ractor_id}] CRITICAL: Task #{task_result.task_id} result LOST")
|
|
420
|
+
publish_task_update_failure(task_result, e, duration_ms)
|
|
421
|
+
end
|
|
422
|
+
end
|
|
423
|
+
end
|
|
424
|
+
end
|
|
425
|
+
|
|
426
|
+
def publish_task_update_completed(task_result, duration_ms)
|
|
427
|
+
publish_event(Events::TaskUpdateCompleted.new(
|
|
428
|
+
task_type: @worker.task_definition_name,
|
|
429
|
+
task_id: task_result.task_id,
|
|
430
|
+
worker_id: @worker_id,
|
|
431
|
+
workflow_instance_id: task_result.workflow_instance_id,
|
|
432
|
+
duration_ms: duration_ms
|
|
433
|
+
))
|
|
434
|
+
end
|
|
435
|
+
|
|
436
|
+
def publish_task_update_failure(task_result, error, duration_ms)
|
|
437
|
+
publish_event(Events::TaskUpdateFailure.new(
|
|
438
|
+
task_type: @worker.task_definition_name,
|
|
439
|
+
task_id: task_result.task_id,
|
|
440
|
+
worker_id: @worker_id,
|
|
441
|
+
workflow_instance_id: task_result.workflow_instance_id,
|
|
442
|
+
cause: error,
|
|
443
|
+
retry_count: RETRY_BACKOFFS.size,
|
|
444
|
+
task_result: task_result,
|
|
445
|
+
duration_ms: duration_ms
|
|
446
|
+
))
|
|
447
|
+
end
|
|
448
|
+
|
|
449
|
+
def publish_uncaught_exception(error)
|
|
450
|
+
publish_event(Events::ThreadUncaughtException.new(
|
|
451
|
+
cause: error,
|
|
452
|
+
task_type: @worker&.task_definition_name
|
|
453
|
+
))
|
|
454
|
+
rescue StandardError => e
|
|
455
|
+
@logger&.debug { "Telemetry error (non-fatal): #{e.class}: #{e.message}" }
|
|
456
|
+
end
|
|
457
|
+
|
|
458
|
+
# Publish event - sends to main Ractor if configured, otherwise logs
|
|
459
|
+
# @param event [ConductorEvent] Event to publish
|
|
460
|
+
def publish_event(event)
|
|
461
|
+
return unless @event_queue
|
|
462
|
+
|
|
463
|
+
begin
|
|
464
|
+
@event_queue.send(event)
|
|
465
|
+
rescue Ractor::ClosedError
|
|
466
|
+
# Event queue closed, ignore
|
|
467
|
+
end
|
|
468
|
+
end
|
|
469
|
+
|
|
470
|
+
# Cleanup resources
|
|
471
|
+
def cleanup
|
|
472
|
+
# Nothing to cleanup - HTTP client will be GC'd
|
|
473
|
+
end
|
|
474
|
+
end
|
|
475
|
+
|
|
476
|
+
# Helper module to check Ractor availability
|
|
477
|
+
module RactorSupport
|
|
478
|
+
class << self
|
|
479
|
+
# Check if Ractors are available (Ruby 3.1+)
|
|
480
|
+
# @return [Boolean]
|
|
481
|
+
def available?
|
|
482
|
+
return @available if defined?(@available)
|
|
483
|
+
|
|
484
|
+
@available = begin
|
|
485
|
+
RUBY_VERSION >= '3.1' && !defined?(Ractor).nil?
|
|
486
|
+
rescue StandardError
|
|
487
|
+
false
|
|
488
|
+
end
|
|
489
|
+
end
|
|
490
|
+
|
|
491
|
+
# Raise error if Ractors not available
|
|
492
|
+
def require_ractors!
|
|
493
|
+
return if available?
|
|
494
|
+
|
|
495
|
+
raise ConfigurationError,
|
|
496
|
+
"Ractors require Ruby 3.1 or later. Current version: #{RUBY_VERSION}"
|
|
497
|
+
end
|
|
498
|
+
end
|
|
499
|
+
end
|
|
500
|
+
end
|
|
501
|
+
end
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Conductor
|
|
4
|
+
module Worker
|
|
5
|
+
# Provides execution context for workers
|
|
6
|
+
# Accessible from anywhere in worker code via TaskContext.current
|
|
7
|
+
# Stored in thread-local storage (Thread.current)
|
|
8
|
+
class TaskContext
|
|
9
|
+
# @return [Task] The task being executed
|
|
10
|
+
attr_reader :task
|
|
11
|
+
|
|
12
|
+
# @return [TaskResult] The task result being built
|
|
13
|
+
attr_reader :task_result
|
|
14
|
+
|
|
15
|
+
# Get the current task context (thread-local)
|
|
16
|
+
# @return [TaskContext, nil] Current context or nil if not in a task execution
|
|
17
|
+
def self.current
|
|
18
|
+
Thread.current[:conductor_task_context]
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# Set the current task context (internal use by TaskRunner)
|
|
22
|
+
# @param context [TaskContext, nil]
|
|
23
|
+
# @return [void]
|
|
24
|
+
def self.current=(context)
|
|
25
|
+
Thread.current[:conductor_task_context] = context
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Clear the current task context (internal use by TaskRunner)
|
|
29
|
+
# @return [void]
|
|
30
|
+
def self.clear
|
|
31
|
+
Thread.current[:conductor_task_context] = nil
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Initialize a new task context
|
|
35
|
+
# @param task [Task] The task being executed
|
|
36
|
+
# @param task_result [TaskResult] The task result being built
|
|
37
|
+
def initialize(task, task_result)
|
|
38
|
+
@task = task
|
|
39
|
+
@task_result = task_result
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Get the task ID
|
|
43
|
+
# @return [String]
|
|
44
|
+
def task_id
|
|
45
|
+
@task.task_id
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Get the workflow instance ID
|
|
49
|
+
# @return [String]
|
|
50
|
+
def workflow_instance_id
|
|
51
|
+
@task.workflow_instance_id
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Get the retry count (how many times this task has been retried)
|
|
55
|
+
# @return [Integer]
|
|
56
|
+
def retry_count
|
|
57
|
+
@task.retry_count || 0
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Get the poll count (how many times this task has been polled for long-running tasks)
|
|
61
|
+
# @return [Integer]
|
|
62
|
+
def poll_count
|
|
63
|
+
@task.poll_count || 0
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Get the task input data
|
|
67
|
+
# @return [Hash]
|
|
68
|
+
def input
|
|
69
|
+
@task.input_data || {}
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Get the task definition name
|
|
73
|
+
# @return [String]
|
|
74
|
+
def task_def_name
|
|
75
|
+
@task.task_def_name || @task.task_type
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Get the workflow task type
|
|
79
|
+
# @return [String]
|
|
80
|
+
def workflow_task_type
|
|
81
|
+
@task.workflow_task&.type || @task.task_type
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Add a log message to the task result
|
|
85
|
+
# Logs are visible in the Conductor UI
|
|
86
|
+
# @param message [String] Log message
|
|
87
|
+
# @return [void]
|
|
88
|
+
def add_log(message)
|
|
89
|
+
@task_result.log(message)
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# Set the callback_after_seconds for long-running tasks
|
|
93
|
+
# When returning TaskInProgress, this determines when Conductor will poll again
|
|
94
|
+
# @param seconds [Integer] Seconds to wait before polling again
|
|
95
|
+
# @return [void]
|
|
96
|
+
def set_callback_after(seconds)
|
|
97
|
+
@task_result.callback_after_seconds = seconds
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# Get the callback_after_seconds value
|
|
101
|
+
# @return [Integer, nil]
|
|
102
|
+
def callback_after_seconds
|
|
103
|
+
@task_result.callback_after_seconds
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# Set the output data on the task result
|
|
107
|
+
# @param output_data [Hash] Output data
|
|
108
|
+
# @return [void]
|
|
109
|
+
def set_output(output_data)
|
|
110
|
+
@task_result.output_data = output_data
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
end
|