conductor_ruby 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +142 -0
  3. data/LICENSE +190 -0
  4. data/README.md +517 -0
  5. data/examples/agentic_workflows/llm_chat.rb +106 -0
  6. data/examples/dynamic_workflow.rb +177 -0
  7. data/examples/event_handler.rb +94 -0
  8. data/examples/event_listener_examples.rb +430 -0
  9. data/examples/helloworld/greetings_worker.rb +24 -0
  10. data/examples/helloworld/helloworld.rb +99 -0
  11. data/examples/kitchensink.rb +213 -0
  12. data/examples/metadata_journey.rb +189 -0
  13. data/examples/metrics_example.rb +284 -0
  14. data/examples/new_dsl_demo.rb +141 -0
  15. data/examples/orkes/http_poll.rb +83 -0
  16. data/examples/orkes/secrets_example.rb +69 -0
  17. data/examples/orkes/wait_for_webhook.rb +90 -0
  18. data/examples/prompt_journey.rb +245 -0
  19. data/examples/rag_workflow.rb +167 -0
  20. data/examples/schedule_journey.rb +244 -0
  21. data/examples/simple_worker.rb +125 -0
  22. data/examples/simple_workflow.rb +89 -0
  23. data/examples/task_context_example.rb +257 -0
  24. data/examples/task_listener_example.rb +192 -0
  25. data/examples/worker_configuration_example.rb +282 -0
  26. data/examples/workflow_dsl.rb +316 -0
  27. data/examples/workflow_ops.rb +305 -0
  28. data/lib/conductor/client/authorization_client.rb +238 -0
  29. data/lib/conductor/client/integration_client.rb +108 -0
  30. data/lib/conductor/client/metadata_client.rb +139 -0
  31. data/lib/conductor/client/prompt_client.rb +58 -0
  32. data/lib/conductor/client/scheduler_client.rb +132 -0
  33. data/lib/conductor/client/schema_client.rb +32 -0
  34. data/lib/conductor/client/secret_client.rb +48 -0
  35. data/lib/conductor/client/task_client.rb +168 -0
  36. data/lib/conductor/client/workflow_client.rb +242 -0
  37. data/lib/conductor/configuration/authentication_settings.rb +17 -0
  38. data/lib/conductor/configuration.rb +103 -0
  39. data/lib/conductor/exceptions.rb +86 -0
  40. data/lib/conductor/http/api/application_resource_api.rb +107 -0
  41. data/lib/conductor/http/api/authorization_resource_api.rb +56 -0
  42. data/lib/conductor/http/api/event_resource_api.rb +133 -0
  43. data/lib/conductor/http/api/gateway_auth_resource_api.rb +48 -0
  44. data/lib/conductor/http/api/group_resource_api.rb +76 -0
  45. data/lib/conductor/http/api/integration_resource_api.rb +145 -0
  46. data/lib/conductor/http/api/metadata_resource_api.rb +231 -0
  47. data/lib/conductor/http/api/prompt_resource_api.rb +81 -0
  48. data/lib/conductor/http/api/role_resource_api.rb +60 -0
  49. data/lib/conductor/http/api/scheduler_resource_api.rb +211 -0
  50. data/lib/conductor/http/api/schema_resource_api.rb +82 -0
  51. data/lib/conductor/http/api/secret_resource_api.rb +134 -0
  52. data/lib/conductor/http/api/task_resource_api.rb +321 -0
  53. data/lib/conductor/http/api/token_resource_api.rb +42 -0
  54. data/lib/conductor/http/api/user_resource_api.rb +59 -0
  55. data/lib/conductor/http/api/workflow_bulk_resource_api.rb +91 -0
  56. data/lib/conductor/http/api/workflow_resource_api.rb +451 -0
  57. data/lib/conductor/http/api_client.rb +437 -0
  58. data/lib/conductor/http/models/authentication_config.rb +67 -0
  59. data/lib/conductor/http/models/authorization_request.rb +39 -0
  60. data/lib/conductor/http/models/base_model.rb +162 -0
  61. data/lib/conductor/http/models/bulk_response.rb +39 -0
  62. data/lib/conductor/http/models/conductor_application.rb +39 -0
  63. data/lib/conductor/http/models/conductor_user.rb +53 -0
  64. data/lib/conductor/http/models/create_or_update_application_request.rb +24 -0
  65. data/lib/conductor/http/models/create_or_update_role_request.rb +27 -0
  66. data/lib/conductor/http/models/event_handler.rb +130 -0
  67. data/lib/conductor/http/models/generate_token_request.rb +27 -0
  68. data/lib/conductor/http/models/group.rb +36 -0
  69. data/lib/conductor/http/models/integration.rb +70 -0
  70. data/lib/conductor/http/models/integration_api.rb +53 -0
  71. data/lib/conductor/http/models/integration_api_update.rb +43 -0
  72. data/lib/conductor/http/models/integration_update.rb +36 -0
  73. data/lib/conductor/http/models/permission.rb +24 -0
  74. data/lib/conductor/http/models/poll_data.rb +33 -0
  75. data/lib/conductor/http/models/prompt_template.rb +59 -0
  76. data/lib/conductor/http/models/prompt_template_test_request.rb +43 -0
  77. data/lib/conductor/http/models/rerun_workflow_request.rb +37 -0
  78. data/lib/conductor/http/models/role.rb +27 -0
  79. data/lib/conductor/http/models/schema_def.rb +59 -0
  80. data/lib/conductor/http/models/search_result.rb +187 -0
  81. data/lib/conductor/http/models/skip_task_request.rb +27 -0
  82. data/lib/conductor/http/models/start_workflow_request.rb +68 -0
  83. data/lib/conductor/http/models/subject_ref.rb +35 -0
  84. data/lib/conductor/http/models/tag_object.rb +36 -0
  85. data/lib/conductor/http/models/target_ref.rb +39 -0
  86. data/lib/conductor/http/models/task.rb +156 -0
  87. data/lib/conductor/http/models/task_def.rb +95 -0
  88. data/lib/conductor/http/models/task_exec_log.rb +30 -0
  89. data/lib/conductor/http/models/task_result.rb +115 -0
  90. data/lib/conductor/http/models/task_result_status.rb +24 -0
  91. data/lib/conductor/http/models/token.rb +33 -0
  92. data/lib/conductor/http/models/upsert_group_request.rb +30 -0
  93. data/lib/conductor/http/models/upsert_user_request.rb +39 -0
  94. data/lib/conductor/http/models/workflow.rb +202 -0
  95. data/lib/conductor/http/models/workflow_def.rb +73 -0
  96. data/lib/conductor/http/models/workflow_schedule.rb +100 -0
  97. data/lib/conductor/http/models/workflow_state_update.rb +30 -0
  98. data/lib/conductor/http/models/workflow_status_constants.rb +57 -0
  99. data/lib/conductor/http/models/workflow_task.rb +169 -0
  100. data/lib/conductor/http/models/workflow_test_request.rb +67 -0
  101. data/lib/conductor/http/rest_client.rb +211 -0
  102. data/lib/conductor/orkes/models/access_key.rb +56 -0
  103. data/lib/conductor/orkes/models/granted_permission.rb +27 -0
  104. data/lib/conductor/orkes/models/metadata_tag.rb +15 -0
  105. data/lib/conductor/orkes/models/rate_limit_tag.rb +15 -0
  106. data/lib/conductor/orkes/orkes_clients.rb +69 -0
  107. data/lib/conductor/version.rb +5 -0
  108. data/lib/conductor/worker/events/conductor_event.rb +40 -0
  109. data/lib/conductor/worker/events/global_dispatcher.rb +37 -0
  110. data/lib/conductor/worker/events/http_events.rb +25 -0
  111. data/lib/conductor/worker/events/listener_registry.rb +40 -0
  112. data/lib/conductor/worker/events/listeners.rb +34 -0
  113. data/lib/conductor/worker/events/sync_event_dispatcher.rb +78 -0
  114. data/lib/conductor/worker/events/task_runner_events.rb +271 -0
  115. data/lib/conductor/worker/events/workflow_events.rb +49 -0
  116. data/lib/conductor/worker/fiber_executor.rb +532 -0
  117. data/lib/conductor/worker/ractor_task_runner.rb +501 -0
  118. data/lib/conductor/worker/task_context.rb +114 -0
  119. data/lib/conductor/worker/task_definition_registrar.rb +322 -0
  120. data/lib/conductor/worker/task_handler.rb +360 -0
  121. data/lib/conductor/worker/task_in_progress.rb +60 -0
  122. data/lib/conductor/worker/task_runner.rb +538 -0
  123. data/lib/conductor/worker/telemetry/metrics_collector.rb +196 -0
  124. data/lib/conductor/worker/telemetry/prometheus_backend.rb +224 -0
  125. data/lib/conductor/worker/worker.rb +355 -0
  126. data/lib/conductor/worker/worker_config.rb +154 -0
  127. data/lib/conductor/worker/worker_registry.rb +71 -0
  128. data/lib/conductor/workflow/dsl/input_ref.rb +37 -0
  129. data/lib/conductor/workflow/dsl/output_ref.rb +44 -0
  130. data/lib/conductor/workflow/dsl/parallel_builder.rb +49 -0
  131. data/lib/conductor/workflow/dsl/switch_builder.rb +74 -0
  132. data/lib/conductor/workflow/dsl/task_ref.rb +178 -0
  133. data/lib/conductor/workflow/dsl/workflow_builder.rb +1016 -0
  134. data/lib/conductor/workflow/dsl/workflow_definition.rb +150 -0
  135. data/lib/conductor/workflow/llm/chat_message.rb +47 -0
  136. data/lib/conductor/workflow/llm/embedding_model.rb +19 -0
  137. data/lib/conductor/workflow/llm/tool_call.rb +43 -0
  138. data/lib/conductor/workflow/llm/tool_spec.rb +46 -0
  139. data/lib/conductor/workflow/task_type.rb +68 -0
  140. data/lib/conductor/workflow/timeout_policy.rb +31 -0
  141. data/lib/conductor/workflow/workflow_executor.rb +373 -0
  142. data/lib/conductor.rb +192 -0
  143. metadata +359 -0
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Conductor
4
+ module Worker
5
+ # Return type for long-running tasks
6
+ # When a worker returns TaskInProgress, the task remains in IN_PROGRESS state
7
+ # and Conductor will poll again after callback_after_seconds
8
+ #
9
+ # @example Long-running task with periodic updates
10
+ # def execute(task)
11
+ # ctx = TaskContext.current
12
+ #
13
+ # # Check if we're being polled again
14
+ # if ctx.poll_count > 0
15
+ # # Check if processing is complete
16
+ # if processing_complete?(task.input_data['job_id'])
17
+ # return { status: 'completed', result: get_result() }
18
+ # end
19
+ #
20
+ # # Still processing, check back later
21
+ # return TaskInProgress.new(
22
+ # callback_after_seconds: 30,
23
+ # output: { status: 'processing', progress: get_progress() }
24
+ # )
25
+ # end
26
+ #
27
+ # # First poll - start the long-running job
28
+ # job_id = start_long_running_job(task.input_data)
29
+ #
30
+ # TaskInProgress.new(
31
+ # callback_after_seconds: 60,
32
+ # output: { status: 'started', job_id: job_id }
33
+ # )
34
+ # end
35
+ class TaskInProgress
36
+ # @return [Integer] Seconds to wait before Conductor polls again
37
+ attr_accessor :callback_after_seconds
38
+
39
+ # @return [Hash, nil] Intermediate output data
40
+ attr_accessor :output
41
+
42
+ # Create a TaskInProgress response
43
+ # @param callback_after_seconds [Integer] Seconds to wait before polling again (default: 60)
44
+ # @param output [Hash, nil] Intermediate output data (optional)
45
+ def initialize(callback_after_seconds: 60, output: nil)
46
+ @callback_after_seconds = callback_after_seconds
47
+ @output = output
48
+ end
49
+
50
+ # Convert to hash
51
+ # @return [Hash]
52
+ def to_h
53
+ {
54
+ callback_after_seconds: @callback_after_seconds,
55
+ output: @output
56
+ }
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,538 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'concurrent'
4
+ require 'logger'
5
+ require_relative '../client/task_client'
6
+ require_relative '../http/models/task'
7
+ require_relative '../http/models/task_result'
8
+ require_relative '../http/models/task_result_status'
9
+ require_relative '../exceptions'
10
+ require_relative 'task_context'
11
+ require_relative 'task_in_progress'
12
+ require_relative 'worker_config'
13
+ require_relative 'events/task_runner_events'
14
+ require_relative 'events/sync_event_dispatcher'
15
+ require_relative 'events/listener_registry'
16
+
17
+ module Conductor
18
+ module Worker
19
+ # TaskRunner - The core polling loop that runs in a dedicated thread
20
+ # Implements batch polling, adaptive backoff, capacity management, and event publishing
21
+ class TaskRunner
22
+ # Retry backoffs for task update (in seconds)
23
+ RETRY_BACKOFFS = [0, 10, 20, 30].freeze
24
+
25
+ # Maximum exponent for adaptive backoff to prevent overflow
26
+ MAX_BACKOFF_EXPONENT = 10
27
+
28
+ # Maximum auth failure backoff in seconds
29
+ MAX_AUTH_BACKOFF_SECONDS = 60
30
+
31
+ attr_reader :worker, :running
32
+
33
+ # Initialize TaskRunner for a specific worker
34
+ # @param worker [Worker] The worker instance
35
+ # @param configuration [Configuration] Conductor configuration
36
+ # @param event_dispatcher [SyncEventDispatcher] Shared event dispatcher
37
+ # @param logger [Logger] Logger instance
38
+ def initialize(worker, configuration:, event_dispatcher: nil, logger: nil)
39
+ @worker = worker
40
+ @configuration = configuration || Configuration.new
41
+ @event_dispatcher = event_dispatcher || Events::SyncEventDispatcher.new
42
+ @logger = logger || create_default_logger
43
+
44
+ # Create task client for API communication
45
+ @task_client = Client::TaskClient.new(@configuration)
46
+
47
+ # Resolve worker configuration
48
+ resolved_config = WorkerConfig.resolve(
49
+ worker.task_definition_name,
50
+ extract_worker_options(worker)
51
+ )
52
+ apply_resolved_config(resolved_config)
53
+
54
+ # Create thread pool executor for task execution
55
+ @executor = Concurrent::ThreadPoolExecutor.new(
56
+ min_threads: 1,
57
+ max_threads: @max_workers,
58
+ max_queue: @max_workers * 2,
59
+ fallback_policy: :caller_runs
60
+ )
61
+
62
+ # State tracking
63
+ @running_tasks = Concurrent::Set.new
64
+ @consecutive_empty_polls = Concurrent::AtomicFixnum.new(0)
65
+ @auth_failures = Concurrent::AtomicFixnum.new(0)
66
+ @last_auth_failure_time = nil
67
+ @last_poll_time = nil
68
+ @poll_count = Concurrent::AtomicFixnum.new(0)
69
+ @shutdown = Concurrent::AtomicBoolean.new(false)
70
+ @mutex = Mutex.new
71
+ end
72
+
73
+ # Main polling loop (runs until shutdown)
74
+ def run
75
+ @logger.info("Starting TaskRunner for '#{@worker.task_definition_name}' " \
76
+ "(thread_count=#{@max_workers}, poll_interval=#{@poll_interval}ms)")
77
+
78
+ # Register task definition if configured
79
+ register_task_definition if @worker.register_task_def
80
+
81
+ until @shutdown.true?
82
+ begin
83
+ run_once
84
+ rescue StandardError => e
85
+ @logger.error("Error in polling loop: #{e.message}")
86
+ @logger.debug(e.backtrace.join("\n")) if e.backtrace
87
+ sleep(1) # Brief pause before retrying
88
+ end
89
+ end
90
+
91
+ cleanup
92
+ @logger.info("TaskRunner for '#{@worker.task_definition_name}' stopped")
93
+ end
94
+
95
+ # Single iteration of the polling loop
96
+ def run_once
97
+ # 1. Cleanup completed tasks
98
+ cleanup_completed_tasks
99
+
100
+ # 2. Check capacity
101
+ current_capacity = @running_tasks.size
102
+ if current_capacity >= @max_workers
103
+ sleep(0.001) # 1ms sleep to prevent busy-waiting
104
+ return
105
+ end
106
+
107
+ available_slots = @max_workers - current_capacity
108
+
109
+ # 3. Adaptive backoff for empty polls
110
+ if @consecutive_empty_polls.value.positive?
111
+ backoff_ms = calculate_adaptive_backoff
112
+ elapsed_ms = @last_poll_time ? (Time.now - @last_poll_time) * 1000 : backoff_ms
113
+
114
+ if elapsed_ms < backoff_ms
115
+ sleep_time = (backoff_ms - elapsed_ms) / 1000.0
116
+ sleep([sleep_time, 0.001].max)
117
+ return
118
+ end
119
+ end
120
+
121
+ # 4. Batch poll for tasks
122
+ @last_poll_time = Time.now
123
+ tasks = batch_poll(available_slots)
124
+
125
+ # 5. Submit tasks for execution
126
+ if tasks.empty?
127
+ @consecutive_empty_polls.increment
128
+ else
129
+ @consecutive_empty_polls.value = 0
130
+ tasks.each do |task|
131
+ submit_task(task)
132
+ end
133
+ end
134
+ end
135
+
136
+ # Signal the runner to stop
137
+ def shutdown
138
+ @shutdown.make_true
139
+ end
140
+
141
+ # Check if runner is running
142
+ # @return [Boolean]
143
+ def running?
144
+ !@shutdown.true?
145
+ end
146
+
147
+ private
148
+
149
+ # Create default logger
150
+ # @return [Logger]
151
+ def create_default_logger
152
+ logger = Logger.new($stdout)
153
+ logger.level = Logger::INFO
154
+ logger.formatter = proc do |severity, datetime, _progname, msg|
155
+ "[#{datetime.strftime('%Y-%m-%d %H:%M:%S')}] #{severity} -- #{msg}\n"
156
+ end
157
+ logger
158
+ end
159
+
160
+ # Extract worker options as a hash
161
+ # @param worker [Worker] Worker instance
162
+ # @return [Hash]
163
+ def extract_worker_options(worker)
164
+ options = {}
165
+ Worker::DEFAULTS.each_key do |key|
166
+ options[key] = worker.send(key) if worker.respond_to?(key)
167
+ end
168
+ options
169
+ end
170
+
171
+ # Apply resolved configuration
172
+ # @param config [Hash] Resolved configuration
173
+ def apply_resolved_config(config)
174
+ @poll_interval = config[:poll_interval]
175
+ @max_workers = config[:thread_count]
176
+ @worker_id = config[:worker_id]
177
+ @domain = config[:domain]
178
+ @poll_timeout = config[:poll_timeout]
179
+ end
180
+
181
+ # Cleanup completed task futures
182
+ def cleanup_completed_tasks
183
+ removed = false
184
+ @running_tasks.each do |future|
185
+ if future.fulfilled? || future.rejected?
186
+ @running_tasks.delete(future)
187
+ removed = true
188
+ end
189
+ end
190
+ publish_active_workers if removed
191
+ end
192
+
193
+ # Calculate adaptive backoff for empty polls
194
+ # @return [Float] Backoff in milliseconds
195
+ def calculate_adaptive_backoff
196
+ exponent = [@consecutive_empty_polls.value, MAX_BACKOFF_EXPONENT].min
197
+ [1.0 * (2**exponent), @poll_interval].min
198
+ end
199
+
200
+ # Batch poll for tasks with auth failure backoff
201
+ # @param count [Integer] Number of tasks to poll for
202
+ # @return [Array<Hash>] Array of task hashes
203
+ def batch_poll(count)
204
+ if @worker.paused
205
+ @event_dispatcher.publish(Events::TaskPaused.new(task_type: @worker.task_definition_name))
206
+ return []
207
+ end
208
+
209
+ # Auth failure exponential backoff
210
+ if @auth_failures.value.positive? && @last_auth_failure_time
211
+ backoff_seconds = [2**@auth_failures.value, MAX_AUTH_BACKOFF_SECONDS].min
212
+ elapsed = Time.now - @last_auth_failure_time
213
+ return [] if elapsed < backoff_seconds
214
+ end
215
+
216
+ # Publish PollStarted event
217
+ @event_dispatcher.publish(Events::PollStarted.new(
218
+ task_type: @worker.task_definition_name,
219
+ worker_id: @worker_id,
220
+ poll_count: @poll_count.value
221
+ ))
222
+
223
+ start_time = Time.now
224
+
225
+ begin
226
+ # HTTP batch poll - use domain only if it's a non-empty string
227
+ domain_param = @domain.to_s.empty? ? nil : @domain
228
+
229
+ tasks = @task_client.batch_poll_tasks(
230
+ @worker.task_definition_name,
231
+ count: count,
232
+ timeout: @poll_timeout,
233
+ worker_id: @worker_id,
234
+ domain: domain_param
235
+ )
236
+
237
+ tasks ||= []
238
+ duration_ms = (Time.now - start_time) * 1000
239
+ @poll_count.increment
240
+
241
+ # Publish PollCompleted event
242
+ @event_dispatcher.publish(Events::PollCompleted.new(
243
+ task_type: @worker.task_definition_name,
244
+ duration_ms: duration_ms,
245
+ tasks_received: tasks.size
246
+ ))
247
+
248
+ # Reset auth failures on success
249
+ @auth_failures.value = 0
250
+
251
+ tasks
252
+ rescue AuthorizationError => e
253
+ handle_auth_failure(e, start_time)
254
+ []
255
+ rescue StandardError => e
256
+ handle_poll_failure(e, start_time)
257
+ []
258
+ end
259
+ end
260
+
261
+ # Handle authorization failure
262
+ # @param error [AuthorizationError] The error
263
+ # @param start_time [Time] When the poll started
264
+ def handle_auth_failure(error, start_time)
265
+ @auth_failures.increment
266
+ @last_auth_failure_time = Time.now
267
+ duration_ms = (Time.now - start_time) * 1000
268
+
269
+ @event_dispatcher.publish(Events::PollFailure.new(
270
+ task_type: @worker.task_definition_name,
271
+ duration_ms: duration_ms,
272
+ cause: error
273
+ ))
274
+
275
+ backoff = [2**@auth_failures.value, MAX_AUTH_BACKOFF_SECONDS].min
276
+ @logger.warn("Auth failure ##{@auth_failures.value} for '#{@worker.task_definition_name}', " \
277
+ "backing off #{backoff}s: #{error.message}")
278
+ end
279
+
280
+ # Handle general poll failure
281
+ # @param error [StandardError] The error
282
+ # @param start_time [Time] When the poll started
283
+ def handle_poll_failure(error, start_time)
284
+ duration_ms = (Time.now - start_time) * 1000
285
+
286
+ @event_dispatcher.publish(Events::PollFailure.new(
287
+ task_type: @worker.task_definition_name,
288
+ duration_ms: duration_ms,
289
+ cause: error
290
+ ))
291
+
292
+ @logger.error("Poll failed for '#{@worker.task_definition_name}': #{error.message}")
293
+ end
294
+
295
+ # Submit a task for execution
296
+ # @param task [Hash] Task data from API
297
+ def submit_task(task)
298
+ future = Concurrent::Future.execute(executor: @executor) do
299
+ execute_and_update(task)
300
+ end
301
+ @running_tasks << future
302
+ publish_active_workers
303
+ end
304
+
305
+ # Execute a task and update the result
306
+ # @param task [Hash] Task data from API
307
+ def execute_and_update(task)
308
+ task_result = execute_task(task)
309
+
310
+ # Skip update for TaskInProgress (task stays in IN_PROGRESS state)
311
+ return if task_result.nil?
312
+
313
+ # Don't update if result is IN_PROGRESS (will be polled again)
314
+ return if task_result.status == Http::Models::TaskResultStatus::IN_PROGRESS &&
315
+ task_result.callback_after_seconds&.positive?
316
+
317
+ update_task_with_retry(task_result)
318
+ end
319
+
320
+ # Execute a task
321
+ # @param task [Task] Task object from API (already deserialized)
322
+ # @return [TaskResult, nil]
323
+ def execute_task(task)
324
+ # Ensure we have a Task object (may be Hash if deserialization was skipped)
325
+ task_obj = task.is_a?(Http::Models::Task) ? task : Http::Models::Task.from_hash(task)
326
+
327
+ # Create initial TaskResult for context
328
+ initial_result = Http::Models::TaskResult.new
329
+ initial_result.task_id = task_obj.task_id
330
+ initial_result.workflow_instance_id = task_obj.workflow_instance_id
331
+ initial_result.worker_id = @worker_id
332
+
333
+ # Set task context (thread-local)
334
+ TaskContext.current = TaskContext.new(task_obj, initial_result)
335
+
336
+ start_time = Time.now
337
+
338
+ # Publish TaskExecutionStarted
339
+ @event_dispatcher.publish(Events::TaskExecutionStarted.new(
340
+ task_type: @worker.task_definition_name,
341
+ task_id: task_obj.task_id,
342
+ worker_id: @worker_id,
343
+ workflow_instance_id: task_obj.workflow_instance_id
344
+ ))
345
+
346
+ begin
347
+ # Execute worker
348
+ task_result = @worker.execute(task_obj)
349
+
350
+ duration_ms = (Time.now - start_time) * 1000
351
+
352
+ # Merge logs from context
353
+ ctx = TaskContext.current
354
+ if ctx&.task_result&.logs && !ctx.task_result.logs.empty?
355
+ task_result.logs ||= []
356
+ task_result.logs.concat(ctx.task_result.logs)
357
+ end
358
+
359
+ # Merge callback_after from context
360
+ task_result.callback_after_seconds ||= ctx&.callback_after_seconds
361
+
362
+ output_size = calculate_output_size(task_result)
363
+
364
+ # Publish TaskExecutionCompleted
365
+ @event_dispatcher.publish(Events::TaskExecutionCompleted.new(
366
+ task_type: @worker.task_definition_name,
367
+ task_id: task_obj.task_id,
368
+ worker_id: @worker_id,
369
+ workflow_instance_id: task_obj.workflow_instance_id,
370
+ duration_ms: duration_ms,
371
+ output_size_bytes: output_size
372
+ ))
373
+
374
+ task_result
375
+ rescue NonRetryableError => e
376
+ handle_non_retryable_error(task_obj, e, start_time)
377
+ rescue StandardError => e
378
+ handle_retryable_error(task_obj, e, start_time)
379
+ ensure
380
+ TaskContext.clear
381
+ end
382
+ end
383
+
384
+ # Calculate output size in bytes
385
+ # @param task_result [TaskResult]
386
+ # @return [Integer]
387
+ def calculate_output_size(task_result)
388
+ return 0 unless task_result.output_data
389
+
390
+ task_result.output_data.to_json.bytesize
391
+ rescue StandardError
392
+ 0
393
+ end
394
+
395
+ # Handle non-retryable error
396
+ # @param task [Task] Task object
397
+ # @param error [NonRetryableError] The error
398
+ # @param start_time [Time] When execution started
399
+ # @return [TaskResult]
400
+ def handle_non_retryable_error(task, error, start_time)
401
+ duration_ms = (Time.now - start_time) * 1000
402
+
403
+ task_result = Http::Models::TaskResult.failed_with_terminal_error(error.message)
404
+ task_result.task_id = task.task_id
405
+ task_result.workflow_instance_id = task.workflow_instance_id
406
+ task_result.worker_id = @worker_id
407
+ task_result.log("NonRetryableError: #{error.class}: #{error.message}")
408
+
409
+ @event_dispatcher.publish(Events::TaskExecutionFailure.new(
410
+ task_type: @worker.task_definition_name,
411
+ task_id: task.task_id,
412
+ worker_id: @worker_id,
413
+ workflow_instance_id: task.workflow_instance_id,
414
+ duration_ms: duration_ms,
415
+ cause: error,
416
+ is_retryable: false
417
+ ))
418
+
419
+ @logger.warn("Task #{task.task_id} failed with terminal error: #{error.message}")
420
+ task_result
421
+ end
422
+
423
+ # Handle retryable error
424
+ # @param task [Task] Task object
425
+ # @param error [StandardError] The error
426
+ # @param start_time [Time] When execution started
427
+ # @return [TaskResult]
428
+ def handle_retryable_error(task, error, start_time)
429
+ duration_ms = (Time.now - start_time) * 1000
430
+
431
+ task_result = Http::Models::TaskResult.failed(error.message)
432
+ task_result.task_id = task.task_id
433
+ task_result.workflow_instance_id = task.workflow_instance_id
434
+ task_result.worker_id = @worker_id
435
+
436
+ backtrace = error.backtrace&.first(5)&.join("\n") || ''
437
+ task_result.log("Error: #{error.class}: #{error.message}\n#{backtrace}")
438
+
439
+ @event_dispatcher.publish(Events::TaskExecutionFailure.new(
440
+ task_type: @worker.task_definition_name,
441
+ task_id: task.task_id,
442
+ worker_id: @worker_id,
443
+ workflow_instance_id: task.workflow_instance_id,
444
+ duration_ms: duration_ms,
445
+ cause: error,
446
+ is_retryable: true
447
+ ))
448
+
449
+ @logger.error("Task #{task.task_id} failed: #{error.message}")
450
+ task_result
451
+ end
452
+
453
+ # Update task with retry logic
454
+ # @param task_result [TaskResult] The result to send
455
+ def update_task_with_retry(task_result)
456
+ RETRY_BACKOFFS.each_with_index do |backoff, attempt|
457
+ sleep(backoff) if backoff.positive?
458
+
459
+ start_time = Time.now
460
+ begin
461
+ @task_client.update_task(task_result)
462
+ duration_ms = (Time.now - start_time) * 1000
463
+
464
+ publish_task_update_completed(task_result, duration_ms)
465
+ return # Success
466
+ rescue StandardError => e
467
+ duration_ms = (Time.now - start_time) * 1000
468
+ @logger.error("Task update failed (attempt #{attempt + 1}/#{RETRY_BACKOFFS.size}): #{e.message}")
469
+
470
+ if attempt == RETRY_BACKOFFS.size - 1
471
+ @logger.fatal("CRITICAL: Task update failed after #{RETRY_BACKOFFS.size} attempts. " \
472
+ "Task #{task_result.task_id} result is LOST.")
473
+ publish_task_update_failure(task_result, e, duration_ms)
474
+ end
475
+ end
476
+ end
477
+ end
478
+
479
+ def publish_task_update_completed(task_result, duration_ms)
480
+ @event_dispatcher.publish(Events::TaskUpdateCompleted.new(
481
+ task_type: @worker.task_definition_name,
482
+ task_id: task_result.task_id,
483
+ worker_id: @worker_id,
484
+ workflow_instance_id: task_result.workflow_instance_id,
485
+ duration_ms: duration_ms
486
+ ))
487
+ end
488
+
489
+ def publish_task_update_failure(task_result, error, duration_ms)
490
+ @event_dispatcher.publish(Events::TaskUpdateFailure.new(
491
+ task_type: @worker.task_definition_name,
492
+ task_id: task_result.task_id,
493
+ worker_id: @worker_id,
494
+ workflow_instance_id: task_result.workflow_instance_id,
495
+ cause: error,
496
+ retry_count: RETRY_BACKOFFS.size,
497
+ task_result: task_result,
498
+ duration_ms: duration_ms
499
+ ))
500
+ end
501
+
502
+ def publish_active_workers
503
+ @event_dispatcher.publish(Events::ActiveWorkersChanged.new(
504
+ task_type: @worker.task_definition_name,
505
+ count: @running_tasks.size
506
+ ))
507
+ rescue StandardError => e
508
+ @logger.debug { "Telemetry error (non-fatal): #{e.class}: #{e.message}" }
509
+ end
510
+
511
+ def publish_uncaught_exception(error)
512
+ @event_dispatcher.publish(Events::ThreadUncaughtException.new(
513
+ cause: error,
514
+ task_type: @worker&.task_definition_name
515
+ ))
516
+ rescue StandardError => e
517
+ @logger.debug { "Telemetry error (non-fatal): #{e.class}: #{e.message}" }
518
+ end
519
+
520
+ # Register task definition if configured
521
+ def register_task_definition
522
+ @logger.info('Task definition registration not yet implemented')
523
+ # TODO: Implement task definition registration
524
+ end
525
+
526
+ # Cleanup resources
527
+ def cleanup
528
+ @executor.shutdown
529
+ @executor.wait_for_termination(5)
530
+ @executor.kill unless @executor.shutdown?
531
+
532
+ @event_dispatcher.clear
533
+ rescue StandardError => e
534
+ @logger.warn("Error during cleanup: #{e.message}")
535
+ end
536
+ end
537
+ end
538
+ end