kubiya-control-plane-api 0.9.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (479) hide show
  1. control_plane_api/LICENSE +676 -0
  2. control_plane_api/README.md +350 -0
  3. control_plane_api/__init__.py +4 -0
  4. control_plane_api/__version__.py +8 -0
  5. control_plane_api/alembic/README +1 -0
  6. control_plane_api/alembic/env.py +121 -0
  7. control_plane_api/alembic/script.py.mako +28 -0
  8. control_plane_api/alembic/versions/2613c65c3dbe_initial_database_setup.py +32 -0
  9. control_plane_api/alembic/versions/2df520d4927d_merge_heads.py +28 -0
  10. control_plane_api/alembic/versions/43abf98d6a01_add_paused_status_to_executions.py +73 -0
  11. control_plane_api/alembic/versions/6289854264cb_merge_multiple_heads.py +28 -0
  12. control_plane_api/alembic/versions/6a4d4dc3d8dc_generate_execution_transitions.py +50 -0
  13. control_plane_api/alembic/versions/87d11cf0a783_add_disconnected_status_to_worker_.py +44 -0
  14. control_plane_api/alembic/versions/add_ephemeral_queue_support.py +85 -0
  15. control_plane_api/alembic/versions/add_model_type_to_llm_models.py +31 -0
  16. control_plane_api/alembic/versions/add_plan_executions_table.py +114 -0
  17. control_plane_api/alembic/versions/add_trace_span_tables.py +154 -0
  18. control_plane_api/alembic/versions/add_user_info_to_traces.py +36 -0
  19. control_plane_api/alembic/versions/adjusting_foreign_keys.py +32 -0
  20. control_plane_api/alembic/versions/b4983d976db2_initial_tables.py +1128 -0
  21. control_plane_api/alembic/versions/d181a3b40e71_rename_custom_metadata_to_metadata_in_.py +50 -0
  22. control_plane_api/alembic/versions/df9117888e82_add_missing_columns.py +82 -0
  23. control_plane_api/alembic/versions/f25de6ad895a_missing_migrations.py +34 -0
  24. control_plane_api/alembic/versions/f71305fb69b9_fix_ephemeral_queue_deletion_foreign_key.py +54 -0
  25. control_plane_api/alembic/versions/mark_local_exec_queues_as_ephemeral.py +68 -0
  26. control_plane_api/alembic.ini +148 -0
  27. control_plane_api/api/index.py +12 -0
  28. control_plane_api/app/__init__.py +11 -0
  29. control_plane_api/app/activities/__init__.py +20 -0
  30. control_plane_api/app/activities/agent_activities.py +384 -0
  31. control_plane_api/app/activities/plan_generation_activities.py +499 -0
  32. control_plane_api/app/activities/team_activities.py +424 -0
  33. control_plane_api/app/activities/temporal_cloud_activities.py +588 -0
  34. control_plane_api/app/config/__init__.py +35 -0
  35. control_plane_api/app/config/api_config.py +469 -0
  36. control_plane_api/app/config/config_loader.py +224 -0
  37. control_plane_api/app/config/model_pricing.py +323 -0
  38. control_plane_api/app/config/storage_config.py +159 -0
  39. control_plane_api/app/config.py +115 -0
  40. control_plane_api/app/controllers/__init__.py +0 -0
  41. control_plane_api/app/controllers/execution_environment_controller.py +1315 -0
  42. control_plane_api/app/database.py +135 -0
  43. control_plane_api/app/exceptions.py +408 -0
  44. control_plane_api/app/lib/__init__.py +11 -0
  45. control_plane_api/app/lib/environment.py +65 -0
  46. control_plane_api/app/lib/event_bus/__init__.py +17 -0
  47. control_plane_api/app/lib/event_bus/base.py +136 -0
  48. control_plane_api/app/lib/event_bus/manager.py +335 -0
  49. control_plane_api/app/lib/event_bus/providers/__init__.py +6 -0
  50. control_plane_api/app/lib/event_bus/providers/http_provider.py +166 -0
  51. control_plane_api/app/lib/event_bus/providers/nats_provider.py +324 -0
  52. control_plane_api/app/lib/event_bus/providers/redis_provider.py +233 -0
  53. control_plane_api/app/lib/event_bus/providers/websocket_provider.py +497 -0
  54. control_plane_api/app/lib/job_executor.py +330 -0
  55. control_plane_api/app/lib/kubiya_client.py +293 -0
  56. control_plane_api/app/lib/litellm_pricing.py +166 -0
  57. control_plane_api/app/lib/mcp_validation.py +163 -0
  58. control_plane_api/app/lib/nats/__init__.py +13 -0
  59. control_plane_api/app/lib/nats/credentials_manager.py +288 -0
  60. control_plane_api/app/lib/nats/listener.py +374 -0
  61. control_plane_api/app/lib/planning_prompt_builder.py +153 -0
  62. control_plane_api/app/lib/planning_tools/__init__.py +41 -0
  63. control_plane_api/app/lib/planning_tools/agents.py +409 -0
  64. control_plane_api/app/lib/planning_tools/agno_toolkit.py +836 -0
  65. control_plane_api/app/lib/planning_tools/base.py +119 -0
  66. control_plane_api/app/lib/planning_tools/cognitive_memory_tools.py +403 -0
  67. control_plane_api/app/lib/planning_tools/context_graph_tools.py +545 -0
  68. control_plane_api/app/lib/planning_tools/environments.py +218 -0
  69. control_plane_api/app/lib/planning_tools/knowledge.py +204 -0
  70. control_plane_api/app/lib/planning_tools/models.py +93 -0
  71. control_plane_api/app/lib/planning_tools/planning_service.py +646 -0
  72. control_plane_api/app/lib/planning_tools/resources.py +242 -0
  73. control_plane_api/app/lib/planning_tools/teams.py +334 -0
  74. control_plane_api/app/lib/policy_enforcer_client.py +1016 -0
  75. control_plane_api/app/lib/redis_client.py +803 -0
  76. control_plane_api/app/lib/sqlalchemy_utils.py +486 -0
  77. control_plane_api/app/lib/state_transition_tools/__init__.py +7 -0
  78. control_plane_api/app/lib/state_transition_tools/execution_context.py +388 -0
  79. control_plane_api/app/lib/storage/__init__.py +20 -0
  80. control_plane_api/app/lib/storage/base_provider.py +274 -0
  81. control_plane_api/app/lib/storage/provider_factory.py +157 -0
  82. control_plane_api/app/lib/storage/vercel_blob_provider.py +468 -0
  83. control_plane_api/app/lib/supabase.py +71 -0
  84. control_plane_api/app/lib/supabase_utils.py +138 -0
  85. control_plane_api/app/lib/task_planning/__init__.py +138 -0
  86. control_plane_api/app/lib/task_planning/agent_factory.py +308 -0
  87. control_plane_api/app/lib/task_planning/agents.py +389 -0
  88. control_plane_api/app/lib/task_planning/cache.py +218 -0
  89. control_plane_api/app/lib/task_planning/entity_resolver.py +273 -0
  90. control_plane_api/app/lib/task_planning/helpers.py +293 -0
  91. control_plane_api/app/lib/task_planning/hooks.py +474 -0
  92. control_plane_api/app/lib/task_planning/models.py +503 -0
  93. control_plane_api/app/lib/task_planning/plan_validator.py +166 -0
  94. control_plane_api/app/lib/task_planning/planning_workflow.py +2911 -0
  95. control_plane_api/app/lib/task_planning/runner.py +656 -0
  96. control_plane_api/app/lib/task_planning/streaming_hook.py +213 -0
  97. control_plane_api/app/lib/task_planning/workflow.py +424 -0
  98. control_plane_api/app/lib/templating/__init__.py +88 -0
  99. control_plane_api/app/lib/templating/compiler.py +278 -0
  100. control_plane_api/app/lib/templating/engine.py +178 -0
  101. control_plane_api/app/lib/templating/parsers/__init__.py +29 -0
  102. control_plane_api/app/lib/templating/parsers/base.py +96 -0
  103. control_plane_api/app/lib/templating/parsers/env.py +85 -0
  104. control_plane_api/app/lib/templating/parsers/graph.py +112 -0
  105. control_plane_api/app/lib/templating/parsers/secret.py +87 -0
  106. control_plane_api/app/lib/templating/parsers/simple.py +81 -0
  107. control_plane_api/app/lib/templating/resolver.py +366 -0
  108. control_plane_api/app/lib/templating/types.py +214 -0
  109. control_plane_api/app/lib/templating/validator.py +201 -0
  110. control_plane_api/app/lib/temporal_client.py +232 -0
  111. control_plane_api/app/lib/temporal_credentials_cache.py +178 -0
  112. control_plane_api/app/lib/temporal_credentials_service.py +203 -0
  113. control_plane_api/app/lib/validation/__init__.py +24 -0
  114. control_plane_api/app/lib/validation/runtime_validation.py +388 -0
  115. control_plane_api/app/main.py +531 -0
  116. control_plane_api/app/middleware/__init__.py +10 -0
  117. control_plane_api/app/middleware/auth.py +645 -0
  118. control_plane_api/app/middleware/exception_handler.py +267 -0
  119. control_plane_api/app/middleware/prometheus_middleware.py +173 -0
  120. control_plane_api/app/middleware/rate_limiting.py +384 -0
  121. control_plane_api/app/middleware/request_id.py +202 -0
  122. control_plane_api/app/models/__init__.py +40 -0
  123. control_plane_api/app/models/agent.py +90 -0
  124. control_plane_api/app/models/analytics.py +206 -0
  125. control_plane_api/app/models/associations.py +107 -0
  126. control_plane_api/app/models/auth_user.py +73 -0
  127. control_plane_api/app/models/context.py +161 -0
  128. control_plane_api/app/models/custom_integration.py +99 -0
  129. control_plane_api/app/models/environment.py +64 -0
  130. control_plane_api/app/models/execution.py +125 -0
  131. control_plane_api/app/models/execution_transition.py +50 -0
  132. control_plane_api/app/models/job.py +159 -0
  133. control_plane_api/app/models/llm_model.py +78 -0
  134. control_plane_api/app/models/orchestration.py +66 -0
  135. control_plane_api/app/models/plan_execution.py +102 -0
  136. control_plane_api/app/models/presence.py +49 -0
  137. control_plane_api/app/models/project.py +61 -0
  138. control_plane_api/app/models/project_management.py +85 -0
  139. control_plane_api/app/models/session.py +29 -0
  140. control_plane_api/app/models/skill.py +155 -0
  141. control_plane_api/app/models/system_tables.py +43 -0
  142. control_plane_api/app/models/task_planning.py +372 -0
  143. control_plane_api/app/models/team.py +86 -0
  144. control_plane_api/app/models/trace.py +257 -0
  145. control_plane_api/app/models/user_profile.py +54 -0
  146. control_plane_api/app/models/worker.py +221 -0
  147. control_plane_api/app/models/workflow.py +161 -0
  148. control_plane_api/app/models/workspace.py +50 -0
  149. control_plane_api/app/observability/__init__.py +177 -0
  150. control_plane_api/app/observability/context_logging.py +475 -0
  151. control_plane_api/app/observability/decorators.py +337 -0
  152. control_plane_api/app/observability/local_span_processor.py +702 -0
  153. control_plane_api/app/observability/metrics.py +303 -0
  154. control_plane_api/app/observability/middleware.py +246 -0
  155. control_plane_api/app/observability/optional.py +115 -0
  156. control_plane_api/app/observability/tracing.py +382 -0
  157. control_plane_api/app/policies/README.md +149 -0
  158. control_plane_api/app/policies/approved_users.rego +62 -0
  159. control_plane_api/app/policies/business_hours.rego +51 -0
  160. control_plane_api/app/policies/rate_limiting.rego +100 -0
  161. control_plane_api/app/policies/tool_enforcement/README.md +336 -0
  162. control_plane_api/app/policies/tool_enforcement/bash_command_validation.rego +71 -0
  163. control_plane_api/app/policies/tool_enforcement/business_hours_enforcement.rego +82 -0
  164. control_plane_api/app/policies/tool_enforcement/mcp_tool_allowlist.rego +58 -0
  165. control_plane_api/app/policies/tool_enforcement/production_safeguards.rego +80 -0
  166. control_plane_api/app/policies/tool_enforcement/role_based_tool_access.rego +44 -0
  167. control_plane_api/app/policies/tool_restrictions.rego +86 -0
  168. control_plane_api/app/routers/__init__.py +4 -0
  169. control_plane_api/app/routers/agents.py +382 -0
  170. control_plane_api/app/routers/agents_v2.py +1598 -0
  171. control_plane_api/app/routers/analytics.py +1310 -0
  172. control_plane_api/app/routers/auth.py +59 -0
  173. control_plane_api/app/routers/client_config.py +57 -0
  174. control_plane_api/app/routers/context_graph.py +561 -0
  175. control_plane_api/app/routers/context_manager.py +577 -0
  176. control_plane_api/app/routers/custom_integrations.py +490 -0
  177. control_plane_api/app/routers/enforcer.py +132 -0
  178. control_plane_api/app/routers/environment_context.py +252 -0
  179. control_plane_api/app/routers/environments.py +761 -0
  180. control_plane_api/app/routers/execution_environment.py +847 -0
  181. control_plane_api/app/routers/executions/__init__.py +28 -0
  182. control_plane_api/app/routers/executions/router.py +286 -0
  183. control_plane_api/app/routers/executions/services/__init__.py +22 -0
  184. control_plane_api/app/routers/executions/services/demo_worker_health.py +156 -0
  185. control_plane_api/app/routers/executions/services/status_service.py +420 -0
  186. control_plane_api/app/routers/executions/services/test_worker_health.py +480 -0
  187. control_plane_api/app/routers/executions/services/worker_health.py +514 -0
  188. control_plane_api/app/routers/executions/streaming/__init__.py +22 -0
  189. control_plane_api/app/routers/executions/streaming/deduplication.py +352 -0
  190. control_plane_api/app/routers/executions/streaming/event_buffer.py +353 -0
  191. control_plane_api/app/routers/executions/streaming/event_formatter.py +964 -0
  192. control_plane_api/app/routers/executions/streaming/history_loader.py +588 -0
  193. control_plane_api/app/routers/executions/streaming/live_source.py +693 -0
  194. control_plane_api/app/routers/executions/streaming/streamer.py +849 -0
  195. control_plane_api/app/routers/executions.py +4888 -0
  196. control_plane_api/app/routers/health.py +165 -0
  197. control_plane_api/app/routers/health_v2.py +394 -0
  198. control_plane_api/app/routers/integration_templates.py +496 -0
  199. control_plane_api/app/routers/integrations.py +287 -0
  200. control_plane_api/app/routers/jobs.py +1809 -0
  201. control_plane_api/app/routers/metrics.py +517 -0
  202. control_plane_api/app/routers/models.py +82 -0
  203. control_plane_api/app/routers/models_v2.py +628 -0
  204. control_plane_api/app/routers/plan_executions.py +1481 -0
  205. control_plane_api/app/routers/plan_generation_async.py +304 -0
  206. control_plane_api/app/routers/policies.py +669 -0
  207. control_plane_api/app/routers/presence.py +234 -0
  208. control_plane_api/app/routers/projects.py +987 -0
  209. control_plane_api/app/routers/runners.py +379 -0
  210. control_plane_api/app/routers/runtimes.py +172 -0
  211. control_plane_api/app/routers/secrets.py +171 -0
  212. control_plane_api/app/routers/skills.py +1010 -0
  213. control_plane_api/app/routers/skills_definitions.py +140 -0
  214. control_plane_api/app/routers/storage.py +456 -0
  215. control_plane_api/app/routers/task_planning.py +611 -0
  216. control_plane_api/app/routers/task_queues.py +650 -0
  217. control_plane_api/app/routers/team_context.py +274 -0
  218. control_plane_api/app/routers/teams.py +1747 -0
  219. control_plane_api/app/routers/templates.py +248 -0
  220. control_plane_api/app/routers/traces.py +571 -0
  221. control_plane_api/app/routers/websocket_client.py +479 -0
  222. control_plane_api/app/routers/websocket_executions_status.py +437 -0
  223. control_plane_api/app/routers/websocket_gateway.py +323 -0
  224. control_plane_api/app/routers/websocket_traces.py +576 -0
  225. control_plane_api/app/routers/worker_queues.py +2555 -0
  226. control_plane_api/app/routers/worker_websocket.py +419 -0
  227. control_plane_api/app/routers/workers.py +1004 -0
  228. control_plane_api/app/routers/workflows.py +204 -0
  229. control_plane_api/app/runtimes/__init__.py +6 -0
  230. control_plane_api/app/runtimes/validation.py +344 -0
  231. control_plane_api/app/schemas/__init__.py +1 -0
  232. control_plane_api/app/schemas/job_schemas.py +302 -0
  233. control_plane_api/app/schemas/mcp_schemas.py +311 -0
  234. control_plane_api/app/schemas/template_schemas.py +133 -0
  235. control_plane_api/app/schemas/trace_schemas.py +168 -0
  236. control_plane_api/app/schemas/worker_queue_observability_schemas.py +165 -0
  237. control_plane_api/app/services/__init__.py +1 -0
  238. control_plane_api/app/services/agno_planning_strategy.py +233 -0
  239. control_plane_api/app/services/agno_service.py +838 -0
  240. control_plane_api/app/services/claude_code_planning_service.py +203 -0
  241. control_plane_api/app/services/context_graph_client.py +224 -0
  242. control_plane_api/app/services/custom_integration_service.py +415 -0
  243. control_plane_api/app/services/integration_resolution_service.py +345 -0
  244. control_plane_api/app/services/litellm_service.py +394 -0
  245. control_plane_api/app/services/plan_generator.py +79 -0
  246. control_plane_api/app/services/planning_strategy.py +66 -0
  247. control_plane_api/app/services/planning_strategy_factory.py +118 -0
  248. control_plane_api/app/services/policy_service.py +615 -0
  249. control_plane_api/app/services/state_transition_service.py +755 -0
  250. control_plane_api/app/services/storage_service.py +593 -0
  251. control_plane_api/app/services/temporal_cloud_provisioning.py +150 -0
  252. control_plane_api/app/services/toolsets/context_graph_skill.py +432 -0
  253. control_plane_api/app/services/trace_retention.py +354 -0
  254. control_plane_api/app/services/worker_queue_metrics_service.py +190 -0
  255. control_plane_api/app/services/workflow_cancellation_manager.py +135 -0
  256. control_plane_api/app/services/workflow_operations_service.py +611 -0
  257. control_plane_api/app/skills/__init__.py +100 -0
  258. control_plane_api/app/skills/base.py +239 -0
  259. control_plane_api/app/skills/builtin/__init__.py +37 -0
  260. control_plane_api/app/skills/builtin/agent_communication/__init__.py +8 -0
  261. control_plane_api/app/skills/builtin/agent_communication/skill.py +246 -0
  262. control_plane_api/app/skills/builtin/code_ingestion/__init__.py +4 -0
  263. control_plane_api/app/skills/builtin/code_ingestion/skill.py +267 -0
  264. control_plane_api/app/skills/builtin/cognitive_memory/__init__.py +4 -0
  265. control_plane_api/app/skills/builtin/cognitive_memory/skill.py +174 -0
  266. control_plane_api/app/skills/builtin/contextual_awareness/__init__.py +4 -0
  267. control_plane_api/app/skills/builtin/contextual_awareness/skill.py +387 -0
  268. control_plane_api/app/skills/builtin/data_visualization/__init__.py +4 -0
  269. control_plane_api/app/skills/builtin/data_visualization/skill.py +154 -0
  270. control_plane_api/app/skills/builtin/docker/__init__.py +4 -0
  271. control_plane_api/app/skills/builtin/docker/skill.py +104 -0
  272. control_plane_api/app/skills/builtin/file_generation/__init__.py +4 -0
  273. control_plane_api/app/skills/builtin/file_generation/skill.py +94 -0
  274. control_plane_api/app/skills/builtin/file_system/__init__.py +4 -0
  275. control_plane_api/app/skills/builtin/file_system/skill.py +110 -0
  276. control_plane_api/app/skills/builtin/knowledge_api/__init__.py +5 -0
  277. control_plane_api/app/skills/builtin/knowledge_api/skill.py +124 -0
  278. control_plane_api/app/skills/builtin/python/__init__.py +4 -0
  279. control_plane_api/app/skills/builtin/python/skill.py +92 -0
  280. control_plane_api/app/skills/builtin/remote_filesystem/__init__.py +5 -0
  281. control_plane_api/app/skills/builtin/remote_filesystem/skill.py +170 -0
  282. control_plane_api/app/skills/builtin/shell/__init__.py +4 -0
  283. control_plane_api/app/skills/builtin/shell/skill.py +161 -0
  284. control_plane_api/app/skills/builtin/slack/__init__.py +3 -0
  285. control_plane_api/app/skills/builtin/slack/skill.py +302 -0
  286. control_plane_api/app/skills/builtin/workflow_executor/__init__.py +4 -0
  287. control_plane_api/app/skills/builtin/workflow_executor/skill.py +469 -0
  288. control_plane_api/app/skills/business_intelligence.py +189 -0
  289. control_plane_api/app/skills/config.py +63 -0
  290. control_plane_api/app/skills/loaders/__init__.py +14 -0
  291. control_plane_api/app/skills/loaders/base.py +73 -0
  292. control_plane_api/app/skills/loaders/filesystem_loader.py +199 -0
  293. control_plane_api/app/skills/registry.py +125 -0
  294. control_plane_api/app/utils/helpers.py +12 -0
  295. control_plane_api/app/utils/workflow_executor.py +354 -0
  296. control_plane_api/app/workflows/__init__.py +11 -0
  297. control_plane_api/app/workflows/agent_execution.py +520 -0
  298. control_plane_api/app/workflows/agent_execution_with_skills.py +223 -0
  299. control_plane_api/app/workflows/namespace_provisioning.py +326 -0
  300. control_plane_api/app/workflows/plan_generation.py +254 -0
  301. control_plane_api/app/workflows/team_execution.py +442 -0
  302. control_plane_api/scripts/seed_models.py +240 -0
  303. control_plane_api/scripts/validate_existing_tool_names.py +492 -0
  304. control_plane_api/shared/__init__.py +8 -0
  305. control_plane_api/shared/version.py +17 -0
  306. control_plane_api/test_deduplication.py +274 -0
  307. control_plane_api/test_executor_deduplication_e2e.py +309 -0
  308. control_plane_api/test_job_execution_e2e.py +283 -0
  309. control_plane_api/test_real_integration.py +193 -0
  310. control_plane_api/version.py +38 -0
  311. control_plane_api/worker/__init__.py +0 -0
  312. control_plane_api/worker/activities/__init__.py +0 -0
  313. control_plane_api/worker/activities/agent_activities.py +1585 -0
  314. control_plane_api/worker/activities/approval_activities.py +234 -0
  315. control_plane_api/worker/activities/job_activities.py +199 -0
  316. control_plane_api/worker/activities/runtime_activities.py +1167 -0
  317. control_plane_api/worker/activities/skill_activities.py +282 -0
  318. control_plane_api/worker/activities/team_activities.py +479 -0
  319. control_plane_api/worker/agent_runtime_server.py +370 -0
  320. control_plane_api/worker/binary_manager.py +333 -0
  321. control_plane_api/worker/config/__init__.py +31 -0
  322. control_plane_api/worker/config/worker_config.py +273 -0
  323. control_plane_api/worker/control_plane_client.py +1491 -0
  324. control_plane_api/worker/examples/analytics_integration_example.py +362 -0
  325. control_plane_api/worker/health_monitor.py +159 -0
  326. control_plane_api/worker/metrics.py +237 -0
  327. control_plane_api/worker/models/__init__.py +1 -0
  328. control_plane_api/worker/models/error_events.py +105 -0
  329. control_plane_api/worker/models/inputs.py +89 -0
  330. control_plane_api/worker/runtimes/__init__.py +35 -0
  331. control_plane_api/worker/runtimes/agent_runtime/runtime.py +485 -0
  332. control_plane_api/worker/runtimes/agno/__init__.py +34 -0
  333. control_plane_api/worker/runtimes/agno/config.py +248 -0
  334. control_plane_api/worker/runtimes/agno/hooks.py +385 -0
  335. control_plane_api/worker/runtimes/agno/mcp_builder.py +195 -0
  336. control_plane_api/worker/runtimes/agno/runtime.py +1063 -0
  337. control_plane_api/worker/runtimes/agno/utils.py +163 -0
  338. control_plane_api/worker/runtimes/base.py +979 -0
  339. control_plane_api/worker/runtimes/claude_code/__init__.py +38 -0
  340. control_plane_api/worker/runtimes/claude_code/cleanup.py +184 -0
  341. control_plane_api/worker/runtimes/claude_code/client_pool.py +529 -0
  342. control_plane_api/worker/runtimes/claude_code/config.py +829 -0
  343. control_plane_api/worker/runtimes/claude_code/hooks.py +482 -0
  344. control_plane_api/worker/runtimes/claude_code/litellm_proxy.py +1702 -0
  345. control_plane_api/worker/runtimes/claude_code/mcp_builder.py +467 -0
  346. control_plane_api/worker/runtimes/claude_code/mcp_discovery.py +558 -0
  347. control_plane_api/worker/runtimes/claude_code/runtime.py +1546 -0
  348. control_plane_api/worker/runtimes/claude_code/tool_mapper.py +403 -0
  349. control_plane_api/worker/runtimes/claude_code/utils.py +149 -0
  350. control_plane_api/worker/runtimes/factory.py +173 -0
  351. control_plane_api/worker/runtimes/model_utils.py +107 -0
  352. control_plane_api/worker/runtimes/validation.py +93 -0
  353. control_plane_api/worker/services/__init__.py +1 -0
  354. control_plane_api/worker/services/agent_communication_tools.py +908 -0
  355. control_plane_api/worker/services/agent_executor.py +485 -0
  356. control_plane_api/worker/services/agent_executor_v2.py +793 -0
  357. control_plane_api/worker/services/analytics_collector.py +457 -0
  358. control_plane_api/worker/services/analytics_service.py +464 -0
  359. control_plane_api/worker/services/approval_tools.py +310 -0
  360. control_plane_api/worker/services/approval_tools_agno.py +207 -0
  361. control_plane_api/worker/services/cancellation_manager.py +177 -0
  362. control_plane_api/worker/services/code_ingestion_tools.py +465 -0
  363. control_plane_api/worker/services/contextual_awareness_tools.py +405 -0
  364. control_plane_api/worker/services/data_visualization.py +834 -0
  365. control_plane_api/worker/services/event_publisher.py +531 -0
  366. control_plane_api/worker/services/jira_tools.py +257 -0
  367. control_plane_api/worker/services/remote_filesystem_tools.py +498 -0
  368. control_plane_api/worker/services/runtime_analytics.py +328 -0
  369. control_plane_api/worker/services/session_service.py +365 -0
  370. control_plane_api/worker/services/skill_context_enhancement.py +181 -0
  371. control_plane_api/worker/services/skill_factory.py +471 -0
  372. control_plane_api/worker/services/system_prompt_enhancement.py +410 -0
  373. control_plane_api/worker/services/team_executor.py +715 -0
  374. control_plane_api/worker/services/team_executor_v2.py +1866 -0
  375. control_plane_api/worker/services/tool_enforcement.py +254 -0
  376. control_plane_api/worker/services/workflow_executor/__init__.py +52 -0
  377. control_plane_api/worker/services/workflow_executor/event_processor.py +287 -0
  378. control_plane_api/worker/services/workflow_executor/event_publisher.py +210 -0
  379. control_plane_api/worker/services/workflow_executor/executors/__init__.py +15 -0
  380. control_plane_api/worker/services/workflow_executor/executors/base.py +270 -0
  381. control_plane_api/worker/services/workflow_executor/executors/json_executor.py +50 -0
  382. control_plane_api/worker/services/workflow_executor/executors/python_executor.py +50 -0
  383. control_plane_api/worker/services/workflow_executor/models.py +142 -0
  384. control_plane_api/worker/services/workflow_executor_tools.py +1748 -0
  385. control_plane_api/worker/skills/__init__.py +12 -0
  386. control_plane_api/worker/skills/builtin/context_graph_search/README.md +213 -0
  387. control_plane_api/worker/skills/builtin/context_graph_search/__init__.py +5 -0
  388. control_plane_api/worker/skills/builtin/context_graph_search/agno_impl.py +808 -0
  389. control_plane_api/worker/skills/builtin/context_graph_search/skill.yaml +67 -0
  390. control_plane_api/worker/skills/builtin/contextual_awareness/__init__.py +4 -0
  391. control_plane_api/worker/skills/builtin/contextual_awareness/agno_impl.py +62 -0
  392. control_plane_api/worker/skills/builtin/data_visualization/agno_impl.py +18 -0
  393. control_plane_api/worker/skills/builtin/data_visualization/skill.yaml +84 -0
  394. control_plane_api/worker/skills/builtin/docker/agno_impl.py +65 -0
  395. control_plane_api/worker/skills/builtin/docker/skill.yaml +60 -0
  396. control_plane_api/worker/skills/builtin/file_generation/agno_impl.py +47 -0
  397. control_plane_api/worker/skills/builtin/file_generation/skill.yaml +64 -0
  398. control_plane_api/worker/skills/builtin/file_system/agno_impl.py +32 -0
  399. control_plane_api/worker/skills/builtin/file_system/skill.yaml +54 -0
  400. control_plane_api/worker/skills/builtin/knowledge_api/__init__.py +4 -0
  401. control_plane_api/worker/skills/builtin/knowledge_api/agno_impl.py +50 -0
  402. control_plane_api/worker/skills/builtin/knowledge_api/skill.yaml +66 -0
  403. control_plane_api/worker/skills/builtin/python/agno_impl.py +25 -0
  404. control_plane_api/worker/skills/builtin/python/skill.yaml +60 -0
  405. control_plane_api/worker/skills/builtin/schema_fix_mixin.py +260 -0
  406. control_plane_api/worker/skills/builtin/shell/agno_impl.py +31 -0
  407. control_plane_api/worker/skills/builtin/shell/skill.yaml +60 -0
  408. control_plane_api/worker/skills/builtin/slack/__init__.py +3 -0
  409. control_plane_api/worker/skills/builtin/slack/agno_impl.py +1282 -0
  410. control_plane_api/worker/skills/builtin/slack/skill.yaml +276 -0
  411. control_plane_api/worker/skills/builtin/workflow_executor/agno_impl.py +62 -0
  412. control_plane_api/worker/skills/builtin/workflow_executor/skill.yaml +79 -0
  413. control_plane_api/worker/skills/loaders/__init__.py +5 -0
  414. control_plane_api/worker/skills/loaders/base.py +23 -0
  415. control_plane_api/worker/skills/loaders/filesystem_loader.py +357 -0
  416. control_plane_api/worker/skills/registry.py +208 -0
  417. control_plane_api/worker/tests/__init__.py +1 -0
  418. control_plane_api/worker/tests/conftest.py +12 -0
  419. control_plane_api/worker/tests/e2e/__init__.py +0 -0
  420. control_plane_api/worker/tests/e2e/test_context_graph_real_api.py +338 -0
  421. control_plane_api/worker/tests/e2e/test_context_graph_templates_e2e.py +523 -0
  422. control_plane_api/worker/tests/e2e/test_enforcement_e2e.py +344 -0
  423. control_plane_api/worker/tests/e2e/test_execution_flow.py +571 -0
  424. control_plane_api/worker/tests/e2e/test_single_execution_mode.py +656 -0
  425. control_plane_api/worker/tests/integration/__init__.py +0 -0
  426. control_plane_api/worker/tests/integration/test_builtin_skills_fixes.py +245 -0
  427. control_plane_api/worker/tests/integration/test_context_graph_search_integration.py +365 -0
  428. control_plane_api/worker/tests/integration/test_control_plane_integration.py +308 -0
  429. control_plane_api/worker/tests/integration/test_hook_enforcement_integration.py +579 -0
  430. control_plane_api/worker/tests/integration/test_scheduled_job_workflow.py +237 -0
  431. control_plane_api/worker/tests/integration/test_system_prompt_enhancement_integration.py +343 -0
  432. control_plane_api/worker/tests/unit/__init__.py +0 -0
  433. control_plane_api/worker/tests/unit/test_builtin_skill_autoload.py +396 -0
  434. control_plane_api/worker/tests/unit/test_context_graph_search.py +450 -0
  435. control_plane_api/worker/tests/unit/test_context_graph_templates.py +403 -0
  436. control_plane_api/worker/tests/unit/test_control_plane_client.py +401 -0
  437. control_plane_api/worker/tests/unit/test_control_plane_client_jobs.py +345 -0
  438. control_plane_api/worker/tests/unit/test_job_activities.py +353 -0
  439. control_plane_api/worker/tests/unit/test_skill_context_enhancement.py +321 -0
  440. control_plane_api/worker/tests/unit/test_system_prompt_enhancement.py +415 -0
  441. control_plane_api/worker/tests/unit/test_tool_enforcement.py +324 -0
  442. control_plane_api/worker/utils/__init__.py +1 -0
  443. control_plane_api/worker/utils/chunk_batcher.py +330 -0
  444. control_plane_api/worker/utils/environment.py +65 -0
  445. control_plane_api/worker/utils/error_publisher.py +260 -0
  446. control_plane_api/worker/utils/event_batcher.py +256 -0
  447. control_plane_api/worker/utils/logging_config.py +335 -0
  448. control_plane_api/worker/utils/logging_helper.py +326 -0
  449. control_plane_api/worker/utils/parameter_validator.py +120 -0
  450. control_plane_api/worker/utils/retry_utils.py +60 -0
  451. control_plane_api/worker/utils/streaming_utils.py +665 -0
  452. control_plane_api/worker/utils/tool_validation.py +332 -0
  453. control_plane_api/worker/utils/workspace_manager.py +163 -0
  454. control_plane_api/worker/websocket_client.py +393 -0
  455. control_plane_api/worker/worker.py +1297 -0
  456. control_plane_api/worker/workflows/__init__.py +0 -0
  457. control_plane_api/worker/workflows/agent_execution.py +909 -0
  458. control_plane_api/worker/workflows/scheduled_job_wrapper.py +332 -0
  459. control_plane_api/worker/workflows/team_execution.py +611 -0
  460. kubiya_control_plane_api-0.9.15.dist-info/METADATA +354 -0
  461. kubiya_control_plane_api-0.9.15.dist-info/RECORD +479 -0
  462. kubiya_control_plane_api-0.9.15.dist-info/WHEEL +5 -0
  463. kubiya_control_plane_api-0.9.15.dist-info/entry_points.txt +5 -0
  464. kubiya_control_plane_api-0.9.15.dist-info/licenses/LICENSE +676 -0
  465. kubiya_control_plane_api-0.9.15.dist-info/top_level.txt +3 -0
  466. scripts/__init__.py +1 -0
  467. scripts/migrations.py +39 -0
  468. scripts/seed_worker_queues.py +128 -0
  469. scripts/setup_agent_runtime.py +142 -0
  470. worker_internal/__init__.py +1 -0
  471. worker_internal/planner/__init__.py +1 -0
  472. worker_internal/planner/activities.py +1499 -0
  473. worker_internal/planner/agent_tools.py +197 -0
  474. worker_internal/planner/event_models.py +148 -0
  475. worker_internal/planner/event_publisher.py +67 -0
  476. worker_internal/planner/models.py +199 -0
  477. worker_internal/planner/retry_logic.py +134 -0
  478. worker_internal/planner/worker.py +300 -0
  479. worker_internal/planner/workflows.py +970 -0
@@ -0,0 +1,1297 @@
1
+ """
2
+ Temporal worker for Agent Control Plane - Decoupled Architecture.
3
+
4
+ This worker:
5
+ 1. Registers with Control Plane API on startup using KUBIYA_API_KEY
6
+ 2. Gets dynamic configuration (Temporal credentials, task queue name, etc.)
7
+ 3. Connects to Temporal Cloud with provided credentials
8
+ 4. Sends periodic heartbeats to Control Plane
9
+ 5. Has NO direct database access - all state managed via Control Plane API
10
+
11
+ Environment variables REQUIRED:
12
+ - KUBIYA_API_KEY: Kubiya API key for authentication (required)
13
+ - CONTROL_PLANE_URL: Control Plane API URL (e.g., https://control-plane.kubiya.ai)
14
+ - ENVIRONMENT_NAME: Environment/task queue name to join (default: "default")
15
+
16
+ Environment variables OPTIONAL:
17
+ - WORKER_HOSTNAME: Custom hostname for worker (default: auto-detected)
18
+ - HEARTBEAT_INTERVAL: Seconds between heartbeats (default: 60, lightweight mode)
19
+ """
20
+
21
+ import asyncio
22
+ import os
23
+ import sys
24
+ import structlog
25
+ import httpx
26
+ import socket
27
+ import platform
28
+ import psutil
29
+ import time
30
+ from dataclasses import dataclass
31
+ from typing import Optional, List
32
+ from temporalio.worker import Worker
33
+ from temporalio.worker.workflow_sandbox import SandboxedWorkflowRunner, SandboxRestrictions
34
+ from temporalio.client import Client, TLSConfig
35
+ from collections import deque
36
+
37
+ from control_plane_api.app.utils.helpers import is_local_temporal
38
+ # Import workflows and activities from local package
39
+ from control_plane_api.worker.workflows.agent_execution import AgentExecutionWorkflow
40
+ from control_plane_api.worker.workflows.team_execution import TeamExecutionWorkflow
41
+ from control_plane_api.worker.workflows.scheduled_job_wrapper import ScheduledJobWrapperWorkflow
42
+ from control_plane_api.worker.activities.agent_activities import (
43
+ execute_agent_llm,
44
+ update_execution_status,
45
+ update_agent_status,
46
+ get_execution_details,
47
+ persist_conversation_history,
48
+ submit_runtime_analytics_activity,
49
+ )
50
+ from control_plane_api.worker.activities.team_activities import (
51
+ get_team_agents,
52
+ execute_team_coordination,
53
+ )
54
+ from control_plane_api.worker.activities.runtime_activities import (
55
+ execute_with_runtime,
56
+ publish_user_message,
57
+ )
58
+ from control_plane_api.worker.activities.job_activities import (
59
+ create_job_execution_record,
60
+ update_job_execution_status,
61
+ )
62
+
63
+ # Configure structured logging
64
+ import logging
65
+ from control_plane_api.worker.utils.logging_config import configure_logging
66
+
67
+ # Configure logging with dynamic settings from environment variables
68
+ configure_logging()
69
+
70
+ logger = structlog.get_logger()
71
+
72
+ # Global log buffer to collect logs since last heartbeat
73
+ log_buffer = deque(maxlen=500) # Keep last 500 log lines
74
+ worker_start_time = time.time()
75
+
76
+ # Global state for differential heartbeats (optimization)
77
+ _last_full_heartbeat_time: float = 0
78
+ _cached_system_info: Optional[dict] = None
79
+ _last_log_index_sent: int = 0
80
+ _full_heartbeat_interval: int = 300 # Full heartbeat every 5 minutes (vs lightweight every 60s)
81
+
82
+
83
+ class ProgressUI:
84
+ """Minimal animated UI for worker startup - minikube style"""
85
+
86
+ @staticmethod
87
+ def step(emoji: str, message: str, status: str = ""):
88
+ """Log a step with emoji and optional status"""
89
+ if status:
90
+ logger.info("worker_progress", emoji=emoji, message=message, status=status)
91
+ else:
92
+ logger.info("worker_progress", emoji=emoji, message=message)
93
+
94
+ @staticmethod
95
+ def success(emoji: str, message: str):
96
+ """Log success message"""
97
+ logger.info("worker_success", emoji=emoji, message=message)
98
+
99
+ @staticmethod
100
+ def error(emoji: str, message: str):
101
+ """Log error message"""
102
+ logger.error("worker_error", emoji=emoji, message=message)
103
+
104
+ @staticmethod
105
+ def warning(emoji: str, message: str):
106
+ """Log warning message"""
107
+ logger.warning("worker_warning", emoji=emoji, message=message)
108
+
109
+ @staticmethod
110
+ def header(text: str):
111
+ """Log section header"""
112
+ logger.info("worker_header", text=text)
113
+
114
+ @staticmethod
115
+ def banner():
116
+ """Log startup banner"""
117
+ logger.info("worker_banner", title="Kubiya Agent Worker")
118
+
119
+
120
+ def collect_system_info() -> dict:
121
+ """
122
+ Collect current system metrics and information.
123
+ """
124
+ try:
125
+ cpu_percent = psutil.cpu_percent(interval=0.1)
126
+ memory = psutil.virtual_memory()
127
+ disk = psutil.disk_usage('/')
128
+
129
+ # Get Kubiya CLI version from environment variable (set by CLI) - skipped for now
130
+ cli_version = None
131
+
132
+ # Get SDK version
133
+ from control_plane_api.version import get_sdk_version
134
+ sdk_version = get_sdk_version()
135
+
136
+ # Get process ID
137
+ pid = os.getpid()
138
+
139
+ # Get current working directory
140
+ cwd = os.getcwd()
141
+
142
+ # Get supported runtimes (both are always available)
143
+ supported_runtimes = ["agno", "claude_code"]
144
+
145
+ # Check Docker availability
146
+ docker_available = False
147
+ docker_version = None
148
+ try:
149
+ import subprocess
150
+ import shutil
151
+
152
+ # First try to find docker in PATH using shutil.which
153
+ docker_path = shutil.which('docker')
154
+ logger.debug("docker_which_result", path=docker_path)
155
+
156
+ # Fallback to common locations if not in PATH
157
+ if not docker_path:
158
+ docker_paths = [
159
+ '/usr/local/bin/docker',
160
+ '/usr/bin/docker',
161
+ '/opt/homebrew/bin/docker',
162
+ ]
163
+ for path in docker_paths:
164
+ logger.debug("docker_checking_path", path=path, exists=os.path.exists(path))
165
+ if os.path.exists(path):
166
+ docker_path = path
167
+ break
168
+
169
+ if docker_path:
170
+ logger.debug("docker_running_version_check", path=docker_path)
171
+ result = subprocess.run(
172
+ [docker_path, '--version'],
173
+ capture_output=True,
174
+ text=True,
175
+ timeout=3,
176
+ shell=False
177
+ )
178
+ logger.debug(
179
+ "docker_version_output",
180
+ returncode=result.returncode,
181
+ stdout=result.stdout[:200],
182
+ stderr=result.stderr[:200] if result.stderr else None
183
+ )
184
+ if result.returncode == 0:
185
+ docker_available = True
186
+ # Parse "Docker version 28.1.1, build 4eba377"
187
+ output = result.stdout.strip()
188
+ if ',' in output:
189
+ docker_version = output.split(',')[0].replace('Docker version', '').strip()
190
+ else:
191
+ docker_version = output.replace('Docker version', '').strip()
192
+ logger.debug("docker_detected", version=docker_version, path=docker_path)
193
+ else:
194
+ logger.warning("docker_version_check_failed", returncode=result.returncode)
195
+ else:
196
+ logger.warning("docker_not_found_in_path_or_common_locations")
197
+ except Exception as e:
198
+ # Log for debugging but don't fail
199
+ logger.warning("docker_detection_failed", error=str(e), error_type=type(e).__name__)
200
+ import traceback
201
+ logger.debug("docker_detection_traceback", traceback=traceback.format_exc())
202
+
203
+ # Parse OS details from platform
204
+ os_name = platform.system() # Darwin, Linux, Windows
205
+ os_version = platform.release()
206
+
207
+ return {
208
+ "hostname": socket.gethostname(),
209
+ "platform": platform.platform(),
210
+ "os_name": os_name,
211
+ "os_version": os_version,
212
+ "python_version": platform.python_version(),
213
+ "cli_version": cli_version,
214
+ "sdk_version": sdk_version,
215
+ "pid": pid,
216
+ "cwd": cwd,
217
+ "supported_runtimes": supported_runtimes,
218
+ "docker_available": docker_available,
219
+ "docker_version": docker_version,
220
+ "cpu_count": psutil.cpu_count(),
221
+ "cpu_percent": cpu_percent,
222
+ "memory_total": memory.total,
223
+ "memory_used": memory.used,
224
+ "memory_percent": memory.percent,
225
+ "disk_total": disk.total,
226
+ "disk_used": disk.used,
227
+ "disk_percent": disk.percent,
228
+ "uptime_seconds": time.time() - worker_start_time,
229
+ }
230
+ except Exception as e:
231
+ logger.warning("failed_to_collect_system_info", error=str(e))
232
+ return {
233
+ "hostname": socket.gethostname(),
234
+ "platform": platform.platform(),
235
+ }
236
+
237
+
238
+ def get_recent_logs() -> List[str]:
239
+ """
240
+ Get logs collected since last heartbeat and clear the buffer.
241
+ """
242
+ logs = list(log_buffer)
243
+ log_buffer.clear()
244
+ return logs
245
+
246
+
247
+ def log_to_buffer(message: str):
248
+ """
249
+ Add a log message to the buffer for sending in next heartbeat.
250
+ """
251
+ log_buffer.append(message)
252
+
253
+
254
+ @dataclass
255
+ class WorkerConfig:
256
+ """Configuration received from Control Plane registration"""
257
+ worker_id: str
258
+ environment_name: str # Task queue name (org_id.environment)
259
+ temporal_namespace: str
260
+ temporal_host: str
261
+ temporal_api_key: str
262
+ organization_id: str
263
+ control_plane_url: str
264
+ litellm_api_url: str = "https://llm-proxy.kubiya.ai"
265
+ litellm_api_key: str = ""
266
+ # Redis configuration for direct event streaming
267
+ redis_url: str = ""
268
+ redis_password: str = ""
269
+ redis_enabled: bool = False
270
+ # WebSocket configuration
271
+ websocket_enabled: bool = True
272
+ websocket_url: str = ""
273
+ websocket_features: list = None
274
+ # Queue configuration for cleanup
275
+ queue_id: str = ""
276
+ queue_ephemeral: bool = False
277
+ queue_single_execution: bool = False
278
+
279
+
280
+ async def start_worker_for_queue(
281
+ control_plane_url: str,
282
+ kubiya_api_key: str,
283
+ queue_id: str,
284
+ ) -> WorkerConfig:
285
+ """
286
+ Start a worker for a specific queue ID.
287
+
288
+ Args:
289
+ control_plane_url: Control Plane API URL
290
+ kubiya_api_key: Kubiya API key for authentication
291
+ queue_id: Worker queue ID (UUID)
292
+
293
+ Returns:
294
+ WorkerConfig with all necessary configuration
295
+
296
+ Raises:
297
+ Exception if start fails
298
+ """
299
+ # Get worker SDK version for compatibility check
300
+ from control_plane_api.version import get_sdk_version
301
+ worker_sdk_version = get_sdk_version()
302
+
303
+ # Collect system info to send during registration
304
+ system_info = collect_system_info()
305
+
306
+ logger.info(
307
+ "starting_worker_for_queue",
308
+ queue_id=queue_id,
309
+ control_plane_url=control_plane_url,
310
+ sdk_version=worker_sdk_version,
311
+ pid=system_info.get("pid"),
312
+ cwd=system_info.get("cwd"),
313
+ )
314
+
315
+ try:
316
+ async with httpx.AsyncClient(timeout=60.0) as client:
317
+ response = await client.post(
318
+ f"{control_plane_url}/api/v1/worker-queues/{queue_id}/start",
319
+ headers={"Authorization": f"Bearer {kubiya_api_key}"},
320
+ json={
321
+ "worker_sdk_version": worker_sdk_version,
322
+ "system_info": system_info,
323
+ "control_plane_url": control_plane_url
324
+ }
325
+ )
326
+
327
+ # Success case
328
+ if response.status_code == 200:
329
+ data = response.json()
330
+
331
+ ProgressUI.success("✓", f"Registered with control plane")
332
+ logger.info(
333
+ "worker_registered",
334
+ worker_id=data.get("worker_id")[:8],
335
+ queue_name=data.get("queue_name"),
336
+ )
337
+
338
+ # Check SDK version compatibility
339
+ control_plane_sdk_version = data.get("control_plane_sdk_version")
340
+ if control_plane_sdk_version and control_plane_sdk_version != worker_sdk_version:
341
+ ProgressUI.warning("⚠", "SDK version mismatch detected")
342
+ print(f"\n Worker SDK version: {worker_sdk_version}")
343
+ print(f" Control Plane SDK version: {control_plane_sdk_version}")
344
+ print(f"\n Consider updating your worker to match the control plane version.\n")
345
+
346
+ logger.warning(
347
+ "sdk_version_mismatch",
348
+ worker_version=worker_sdk_version,
349
+ control_plane_version=control_plane_sdk_version,
350
+ )
351
+ elif control_plane_sdk_version:
352
+ logger.info(
353
+ "sdk_version_match",
354
+ version=worker_sdk_version,
355
+ )
356
+
357
+ # The task_queue_name is now just the queue UUID
358
+ # Priority for LiteLLM API URL:
359
+ # 1. LITELLM_API_BASE environment variable (from local proxy via CLI)
360
+ # 2. Control plane litellm_api_url
361
+ # 3. Default (https://llm-proxy.kubiya.ai)
362
+ litellm_api_url = os.getenv("LITELLM_API_BASE") or data.get("litellm_api_url", "https://llm-proxy.kubiya.ai")
363
+ litellm_api_key = os.getenv("LITELLM_API_KEY") or data.get("litellm_api_key", "")
364
+
365
+ # Log which LiteLLM endpoint is being used
366
+ if os.getenv("LITELLM_API_BASE"):
367
+ logger.info(
368
+ "using_local_litellm_proxy",
369
+ litellm_api_url=litellm_api_url,
370
+ source="environment_variable"
371
+ )
372
+ elif "litellm_api_url" in data:
373
+ logger.info(
374
+ "using_control_plane_litellm_proxy",
375
+ litellm_api_url=litellm_api_url,
376
+ source="control_plane"
377
+ )
378
+
379
+ return WorkerConfig(
380
+ worker_id=data["worker_id"],
381
+ environment_name=data["task_queue_name"], # This is now the queue UUID
382
+ temporal_namespace=data["temporal_namespace"],
383
+ temporal_host=data["temporal_host"],
384
+ temporal_api_key=data["temporal_api_key"],
385
+ organization_id=data["organization_id"],
386
+ control_plane_url=data["control_plane_url"],
387
+ litellm_api_url=litellm_api_url,
388
+ litellm_api_key=litellm_api_key,
389
+ # Redis configuration from control plane (for direct event streaming)
390
+ redis_url=data.get("redis_url", ""),
391
+ redis_password=data.get("redis_password", ""),
392
+ redis_enabled=data.get("redis_enabled", False),
393
+ # WebSocket configuration from control plane
394
+ websocket_enabled=data.get("websocket_enabled", True),
395
+ websocket_url=data.get("websocket_url", ""),
396
+ websocket_features=data.get("websocket_features", []),
397
+ )
398
+
399
+ # Handle errors
400
+ else:
401
+ # Try to extract error detail from response
402
+ error_message = response.text
403
+ try:
404
+ error_data = response.json()
405
+ error_message = error_data.get("detail", response.text)
406
+ except:
407
+ pass
408
+
409
+ ProgressUI.error("✗", "Worker registration failed")
410
+ print(f" {error_message}\n")
411
+
412
+ logger.error(
413
+ "worker_start_failed",
414
+ status_code=response.status_code,
415
+ queue_id=queue_id,
416
+ )
417
+ sys.exit(1)
418
+
419
+ except httpx.RequestError as e:
420
+ ProgressUI.error("✗", f"Connection failed: {control_plane_url}")
421
+ print(f" {str(e)}\n")
422
+ logger.error("control_plane_connection_failed", error=str(e))
423
+ sys.exit(1)
424
+
425
+
426
+ async def send_heartbeat(
427
+ config: WorkerConfig,
428
+ kubiya_api_key: str,
429
+ status: str = "active",
430
+ tasks_processed: int = 0,
431
+ current_task_id: Optional[str] = None,
432
+ force_full: bool = False
433
+ ) -> bool:
434
+ """
435
+ Send heartbeat to Control Plane with differential data.
436
+
437
+ Optimization: Uses lightweight heartbeats (status only) by default,
438
+ and sends full heartbeats (with system info + logs) every 5 minutes.
439
+ This reduces server load by 90% while maintaining full visibility.
440
+
441
+ Args:
442
+ config: Worker configuration
443
+ kubiya_api_key: Kubiya API key for authentication
444
+ status: Worker status (active, idle, busy)
445
+ tasks_processed: Number of tasks processed
446
+ current_task_id: Currently executing task ID
447
+ force_full: Force a full heartbeat (ignores timing logic)
448
+
449
+ Returns:
450
+ True if successful, False otherwise
451
+ """
452
+ global _last_full_heartbeat_time, _cached_system_info, _last_log_index_sent
453
+
454
+ current_time = time.time()
455
+ time_since_last_full = current_time - _last_full_heartbeat_time
456
+
457
+ # Determine if this should be a full heartbeat
458
+ # Full heartbeat: every 5 minutes, or on first run, or if forced
459
+ is_full_heartbeat = (
460
+ force_full or
461
+ _last_full_heartbeat_time == 0 or
462
+ time_since_last_full >= _full_heartbeat_interval
463
+ )
464
+
465
+ # Build base heartbeat data (always included)
466
+ heartbeat_data = {
467
+ "status": status,
468
+ "tasks_processed": tasks_processed,
469
+ "current_task_id": current_task_id,
470
+ "worker_metadata": {},
471
+ }
472
+
473
+ # Add system info and logs only for full heartbeats
474
+ if is_full_heartbeat:
475
+ # Collect fresh system info (expensive operation)
476
+ system_info = collect_system_info()
477
+ _cached_system_info = system_info
478
+ heartbeat_data["system_info"] = system_info
479
+
480
+ # Get logs since last full heartbeat (only new logs)
481
+ logs = get_recent_logs()
482
+ if logs:
483
+ heartbeat_data["logs"] = logs
484
+
485
+ # Update last full heartbeat time
486
+ _last_full_heartbeat_time = current_time
487
+ heartbeat_type = "full"
488
+ else:
489
+ # Lightweight heartbeat - no system info or logs
490
+ # Server will use cached system info from Redis
491
+ heartbeat_type = "lightweight"
492
+
493
+ try:
494
+ # Normalize URL to prevent double-slash issues
495
+ control_plane_url = config.control_plane_url.rstrip("/")
496
+ url = f"{control_plane_url}/api/v1/workers/{config.worker_id}/heartbeat"
497
+
498
+ async with httpx.AsyncClient(timeout=10.0) as client:
499
+ response = await client.post(
500
+ url,
501
+ json=heartbeat_data,
502
+ headers={"Authorization": f"Bearer {kubiya_api_key}"}
503
+ )
504
+
505
+ if response.status_code in [200, 204]:
506
+ logger.debug(
507
+ "heartbeat_sent",
508
+ worker_id=config.worker_id,
509
+ type=heartbeat_type,
510
+ payload_size=len(str(heartbeat_data))
511
+ )
512
+ log_to_buffer(
513
+ f"[{time.strftime('%H:%M:%S')}] Heartbeat sent ({heartbeat_type})"
514
+ )
515
+ return True
516
+ else:
517
+ logger.warning(
518
+ "heartbeat_failed",
519
+ status_code=response.status_code,
520
+ response=response.text[:200],
521
+ type=heartbeat_type
522
+ )
523
+ log_to_buffer(
524
+ f"[{time.strftime('%H:%M:%S')}] Heartbeat failed: HTTP {response.status_code}"
525
+ )
526
+ return False
527
+
528
+ except Exception as e:
529
+ error_msg = f"{type(e).__name__}: {str(e)}" if str(e) else f"{type(e).__name__} (no message)"
530
+ logger.warning(
531
+ "heartbeat_error",
532
+ error=error_msg,
533
+ error_type=type(e).__name__,
534
+ worker_id=config.worker_id[:8] if config.worker_id else "unknown",
535
+ type=heartbeat_type
536
+ )
537
+ log_to_buffer(f"[{time.strftime('%H:%M:%S')}] Heartbeat error: {error_msg[:150]}")
538
+ return False
539
+
540
+
541
+ async def create_temporal_client(config: WorkerConfig) -> Client:
542
+ """
543
+ Create Temporal client using configuration from Control Plane.
544
+
545
+ Args:
546
+ config: Worker configuration from Control Plane registration
547
+
548
+ Returns:
549
+ Connected Temporal client instance
550
+ """
551
+ try:
552
+ if is_local_temporal():
553
+ # Connect to local Temporal without TLS or API key
554
+ logger.info("connecting_to_local_temporal", host=config.temporal_host)
555
+ client = await Client.connect(
556
+ config.temporal_host,
557
+ namespace=config.temporal_namespace,
558
+ )
559
+ else:
560
+ # Connect to Temporal Cloud with TLS and API key
561
+ logger.info("connecting_to_temporal_cloud", host=config.temporal_host)
562
+ client = await Client.connect(
563
+ config.temporal_host,
564
+ namespace=config.temporal_namespace,
565
+ tls=TLSConfig(), # TLS enabled
566
+ rpc_metadata={"authorization": f"Bearer {config.temporal_api_key}"}
567
+ )
568
+
569
+ return client
570
+
571
+ except Exception as e:
572
+ logger.error("connection_failed", error=str(e))
573
+ ProgressUI.error("✗", f"Temporal connection failed: {str(e)}")
574
+ raise
575
+
576
+
577
+ async def send_disconnect(
578
+ config: WorkerConfig,
579
+ kubiya_api_key: str,
580
+ reason: str = "shutdown",
581
+ exit_code: Optional[int] = None,
582
+ error_message: Optional[str] = None
583
+ ) -> bool:
584
+ """
585
+ Notify Control Plane that worker is disconnecting/exiting.
586
+
587
+ Args:
588
+ config: Worker configuration
589
+ kubiya_api_key: Kubiya API key for authentication
590
+ reason: Disconnect reason (shutdown, error, crash, etc.)
591
+ exit_code: Exit code if applicable
592
+ error_message: Error message if applicable
593
+
594
+ Returns:
595
+ True if successful, False otherwise
596
+ """
597
+ disconnect_data = {
598
+ "reason": reason,
599
+ "exit_code": exit_code,
600
+ "error_message": error_message
601
+ }
602
+
603
+ try:
604
+ async with httpx.AsyncClient(timeout=10.0) as client:
605
+ response = await client.post(
606
+ f"{config.control_plane_url}/api/v1/workers/{config.worker_id}/disconnect",
607
+ json=disconnect_data,
608
+ headers={"Authorization": f"Bearer {kubiya_api_key}"}
609
+ )
610
+
611
+ if response.status_code in [200, 204]:
612
+ logger.info(
613
+ "worker_disconnected",
614
+ worker_id=config.worker_id,
615
+ reason=reason,
616
+ exit_code=exit_code
617
+ )
618
+ return True
619
+ else:
620
+ logger.warning(
621
+ "disconnect_notification_failed",
622
+ status_code=response.status_code,
623
+ response=response.text[:200]
624
+ )
625
+ return False
626
+
627
+ except Exception as e:
628
+ logger.warning("disconnect_notification_error", error=str(e))
629
+ return False
630
+
631
+
632
+ async def delete_ephemeral_queue(
633
+ config: WorkerConfig,
634
+ kubiya_api_key: str,
635
+ queue_id: str,
636
+ timeout: int = 5
637
+ ) -> bool:
638
+ """
639
+ Delete ephemeral queue during worker shutdown.
640
+
641
+ This allows the worker to clean up its ephemeral queue immediately,
642
+ without requiring the CLI to wait for worker unregistration.
643
+
644
+ Args:
645
+ config: Worker configuration
646
+ kubiya_api_key: Kubiya API key for authentication
647
+ queue_id: Queue UUID to delete
648
+ timeout: Request timeout in seconds (short timeout - if it fails, TTL handles it)
649
+
650
+ Returns:
651
+ True if successful, False otherwise
652
+ """
653
+ try:
654
+ async with httpx.AsyncClient(timeout=float(timeout)) as client:
655
+ response = await client.delete(
656
+ f"{config.control_plane_url}/api/v1/worker-queues/{queue_id}",
657
+ headers={"Authorization": f"Bearer {kubiya_api_key}"}
658
+ )
659
+
660
+ if response.status_code in [200, 204]:
661
+ logger.info(
662
+ "ephemeral_queue_deleted",
663
+ queue_id=queue_id,
664
+ worker_id=config.worker_id
665
+ )
666
+ return True
667
+ else:
668
+ logger.warning(
669
+ "queue_delete_failed",
670
+ queue_id=queue_id,
671
+ status_code=response.status_code,
672
+ response=response.text[:200]
673
+ )
674
+ return False
675
+
676
+ except Exception as e:
677
+ logger.warning(
678
+ "queue_delete_error",
679
+ queue_id=queue_id,
680
+ error=str(e)
681
+ )
682
+ return False
683
+
684
+
685
+ async def heartbeat_loop(config: WorkerConfig, kubiya_api_key: str, interval: int = 60):
686
+ """
687
+ Background task to send periodic heartbeats to Control Plane.
688
+
689
+ Args:
690
+ config: Worker configuration
691
+ kubiya_api_key: Kubiya API key for authentication
692
+ interval: Seconds between heartbeats
693
+ """
694
+ tasks_processed = 0
695
+
696
+ while True:
697
+ try:
698
+ await asyncio.sleep(interval)
699
+ await send_heartbeat(
700
+ config=config,
701
+ kubiya_api_key=kubiya_api_key,
702
+ status="active",
703
+ tasks_processed=tasks_processed
704
+ )
705
+ except asyncio.CancelledError:
706
+ logger.info("heartbeat_loop_cancelled")
707
+ break
708
+ except Exception as e:
709
+ logger.warning("heartbeat_loop_error", error=str(e))
710
+
711
+
712
+ async def run_worker():
713
+ """
714
+ Run the Temporal worker with decoupled architecture.
715
+
716
+ The worker:
717
+ 1. Registers with Control Plane API
718
+ 2. Gets dynamic configuration (Temporal credentials, task queue, etc.)
719
+ 3. Connects to Temporal Cloud
720
+ 4. Starts heartbeat loop
721
+ 5. Registers workflows and activities
722
+ 6. Polls for tasks and executes them
723
+ """
724
+ # Get configuration from environment
725
+ kubiya_api_key = os.environ.get("KUBIYA_API_KEY")
726
+ control_plane_url = os.environ.get("CONTROL_PLANE_URL")
727
+ queue_id = os.environ.get("QUEUE_ID")
728
+ heartbeat_interval = int(os.environ.get("HEARTBEAT_INTERVAL", "60"))
729
+ single_execution_mode = os.environ.get("SINGLE_EXECUTION", "").lower() in ("true", "1", "yes")
730
+
731
+ # Validate required configuration
732
+ if not kubiya_api_key:
733
+ logger.error(
734
+ "configuration_error",
735
+ message="KUBIYA_API_KEY environment variable is required"
736
+ )
737
+ sys.exit(1)
738
+
739
+ if not control_plane_url:
740
+ logger.error(
741
+ "configuration_error",
742
+ message="CONTROL_PLANE_URL environment variable is required"
743
+ )
744
+ sys.exit(1)
745
+
746
+ if not queue_id:
747
+ logger.error(
748
+ "configuration_error",
749
+ message="QUEUE_ID environment variable is required"
750
+ )
751
+ sys.exit(1)
752
+
753
+ log_to_buffer(f"[{time.strftime('%H:%M:%S')}] Worker starting for queue {queue_id}")
754
+
755
+ if single_execution_mode:
756
+ log_to_buffer(f"[{time.strftime('%H:%M:%S')}] Single execution mode: enabled (will exit after one task)")
757
+ logger.info("single_execution_mode_enabled", queue_id=queue_id)
758
+
759
+ # Check if agent-runtime mode is enabled
760
+ use_agent_runtime = os.environ.get("USE_AGENT_RUNTIME", "").lower() in ("true", "1", "yes")
761
+ agent_runtime_server = None
762
+ health_monitor = None
763
+
764
+ try:
765
+ # Print banner
766
+ ProgressUI.banner()
767
+
768
+ # Step 0: Setup agent-runtime if enabled
769
+ if use_agent_runtime:
770
+ from pathlib import Path
771
+ from control_plane_api.worker.binary_manager import BinaryManager
772
+ from control_plane_api.worker.agent_runtime_server import AgentRuntimeServer, ServerConfig
773
+
774
+ ProgressUI.step("⏳", "Setting up agent-runtime...")
775
+ log_to_buffer(f"[{time.strftime('%H:%M:%S')}] Downloading agent-runtime binary...")
776
+
777
+ config_dir = Path(os.environ.get("AGENT_RUNTIME_CONFIG_DIR", Path.home() / ".kubiya"))
778
+ binary_manager = BinaryManager(config_dir)
779
+ binary_path = await binary_manager.ensure_binary("latest")
780
+
781
+ log_to_buffer(f"[{time.strftime('%H:%M:%S')}] Starting agent-runtime server...")
782
+ server_config = ServerConfig(
783
+ grpc_port=int(os.environ.get("AGENT_RUNTIME_GRPC_PORT", "50052")),
784
+ http_port=int(os.environ.get("AGENT_RUNTIME_HTTP_PORT", "8082")),
785
+ health_port=int(os.environ.get("AGENT_RUNTIME_HEALTH_PORT", "8083")),
786
+ config_dir=config_dir,
787
+ log_level=os.environ.get("AGENT_RUNTIME_LOG_LEVEL", "info"),
788
+ )
789
+
790
+ agent_runtime_server = AgentRuntimeServer(binary_path, server_config)
791
+ await agent_runtime_server.start(wait_for_health=True, timeout=30)
792
+
793
+ # Set environment variable for runtime to use
794
+ os.environ["AGENT_RUNTIME_ADDRESS"] = agent_runtime_server.grpc_address
795
+ ProgressUI.success("✓", f"Agent runtime ready at {agent_runtime_server.grpc_address}")
796
+ log_to_buffer(f"[{time.strftime('%H:%M:%S')}] Agent runtime server started on {agent_runtime_server.grpc_address}")
797
+
798
+ # Step 1: Register with control plane
799
+ ProgressUI.step("⏳", "Registering with control plane...")
800
+ log_to_buffer(f"[{time.strftime('%H:%M:%S')}] Registering with control plane...")
801
+ config = await start_worker_for_queue(
802
+ control_plane_url=control_plane_url,
803
+ kubiya_api_key=kubiya_api_key,
804
+ queue_id=queue_id,
805
+ )
806
+ log_to_buffer(f"[{time.strftime('%H:%M:%S')}] Worker registered: {config.worker_id}")
807
+
808
+ # Set environment variables for activities to use
809
+ os.environ["CONTROL_PLANE_URL"] = config.control_plane_url
810
+
811
+ # Set single execution flag so event publisher can disable WebSocket
812
+ if single_execution_mode:
813
+ os.environ["KUBIYA_SINGLE_EXECUTION_MODE"] = "true"
814
+ os.environ["KUBIYA_API_KEY"] = kubiya_api_key
815
+ os.environ["WORKER_ID"] = config.worker_id
816
+ os.environ["LITELLM_API_BASE"] = config.litellm_api_url
817
+ os.environ["LITELLM_API_KEY"] = config.litellm_api_key
818
+
819
+ # Set WebSocket environment variables if enabled
820
+ from control_plane_api.worker.utils.environment import should_use_websocket
821
+
822
+ if config.websocket_enabled and config.websocket_url and should_use_websocket():
823
+ os.environ["WEBSOCKET_ENABLED"] = "true"
824
+ os.environ["WEBSOCKET_URL"] = config.websocket_url
825
+ logger.info(
826
+ "websocket_configured",
827
+ worker_id=config.worker_id[:8],
828
+ websocket_url=config.websocket_url
829
+ )
830
+ else:
831
+ os.environ["WEBSOCKET_ENABLED"] = "false"
832
+ if not should_use_websocket():
833
+ logger.info("websocket_disabled_serverless_environment")
834
+ else:
835
+ logger.info("websocket_disabled_using_http")
836
+
837
+ # Set Redis environment variables if provided (for Redis-first event streaming)
838
+ if config.redis_enabled and config.redis_url:
839
+ os.environ["REDIS_URL"] = config.redis_url
840
+ os.environ["REDIS_ENABLED"] = "true"
841
+ if config.redis_password:
842
+ os.environ["REDIS_PASSWORD"] = config.redis_password
843
+ logger.info(
844
+ "redis_configured_for_direct_streaming",
845
+ worker_id=config.worker_id[:8],
846
+ redis_url=config.redis_url.split("@")[-1] if "@" in config.redis_url else config.redis_url # Log without password
847
+ )
848
+ else:
849
+ os.environ["REDIS_ENABLED"] = "false"
850
+ logger.debug("redis_not_configured_will_use_http_endpoint")
851
+
852
+ # Step 2: Connect to Temporal
853
+ ProgressUI.step("⏳", "Connecting to Temporal...")
854
+ client = await create_temporal_client(config)
855
+ ProgressUI.success("✓", "Connected to Temporal")
856
+
857
+ # Step 3: Send initial heartbeat
858
+ ProgressUI.step("⏳", "Sending heartbeat...")
859
+ await send_heartbeat(
860
+ config=config,
861
+ kubiya_api_key=kubiya_api_key,
862
+ status="active",
863
+ tasks_processed=0
864
+ )
865
+ ProgressUI.success("✓", "Worker visible in UI")
866
+
867
+ # Start heartbeat loop in background
868
+ heartbeat_task = asyncio.create_task(
869
+ heartbeat_loop(config, kubiya_api_key, heartbeat_interval)
870
+ )
871
+
872
+ # Start health monitoring for agent-runtime if enabled
873
+ health_monitor_task = None
874
+ if agent_runtime_server is not None:
875
+ from control_plane_api.worker.health_monitor import HealthMonitor
876
+ # Note: os is already imported at module level (line 22)
877
+
878
+ check_interval = int(os.environ.get("AGENT_RUNTIME_HEALTH_CHECK_INTERVAL", "30"))
879
+ max_failures = int(os.environ.get("AGENT_RUNTIME_MAX_RESTART_ATTEMPTS", "3"))
880
+ restart_enabled = os.environ.get("AGENT_RUNTIME_AUTO_RESTART", "true").lower() in ("true", "1", "yes")
881
+
882
+ health_monitor = HealthMonitor(
883
+ agent_runtime_server=agent_runtime_server,
884
+ check_interval=check_interval,
885
+ max_failures=max_failures,
886
+ restart_enabled=restart_enabled,
887
+ )
888
+ await health_monitor.start()
889
+ ProgressUI.success("✓", "Health monitoring enabled")
890
+ log_to_buffer(f"[{time.strftime('%H:%M:%S')}] Health monitoring started (interval={check_interval}s)")
891
+
892
+ # Step 4: Create worker
893
+ ProgressUI.step("⏳", "Starting worker...")
894
+
895
+ # Configure workflow sandbox with passthrough modules
896
+ # These modules use non-deterministic operations at import time but are safe
897
+ # because they're only used in activities, not workflow logic
898
+ sandbox_restrictions = SandboxRestrictions.default.with_passthrough_modules(
899
+ "structlog",
900
+ "structlog.dev",
901
+ "structlog.processors",
902
+ "structlog.tracebacks",
903
+ "rich",
904
+ "rich.traceback",
905
+ "control_plane_api.version", # Version checking uses filesystem operations
906
+ )
907
+
908
+ worker = Worker(
909
+ client,
910
+ task_queue=config.environment_name,
911
+ workflows=[
912
+ AgentExecutionWorkflow,
913
+ TeamExecutionWorkflow,
914
+ ScheduledJobWrapperWorkflow, # Wrapper for scheduled jobs
915
+ ],
916
+ activities=[
917
+ execute_agent_llm,
918
+ update_execution_status,
919
+ update_agent_status,
920
+ get_execution_details, # Get execution details from Control Plane
921
+ persist_conversation_history, # Conversation persistence
922
+ submit_runtime_analytics_activity, # Analytics submission
923
+ get_team_agents,
924
+ execute_team_coordination,
925
+ execute_with_runtime, # RuntimeFactory-based execution
926
+ publish_user_message, # Publish user message to stream
927
+ create_job_execution_record, # Job execution record creation
928
+ update_job_execution_status, # Job execution status updates
929
+ ],
930
+ max_concurrent_activities=10,
931
+ max_concurrent_workflow_tasks=10,
932
+ workflow_runner=SandboxedWorkflowRunner(restrictions=sandbox_restrictions),
933
+ )
934
+
935
+ ProgressUI.success("✓", "Worker ready")
936
+
937
+ # Start WebSocket client if enabled
938
+ from control_plane_api.worker.control_plane_client import get_control_plane_client
939
+
940
+ control_plane_client = get_control_plane_client()
941
+ if config.websocket_enabled and should_use_websocket():
942
+ await control_plane_client.start_websocket()
943
+ ProgressUI.step("✓", "WebSocket connected")
944
+ logger.info("websocket_started", worker_id=config.worker_id[:8])
945
+
946
+ if single_execution_mode:
947
+ ProgressUI.header("📡 Listening for one task... (will exit after completion)")
948
+ else:
949
+ ProgressUI.header("📡 Listening for tasks... (Ctrl+C to stop)")
950
+
951
+ logger.info(
952
+ "worker_ready",
953
+ worker_id=config.worker_id[:8],
954
+ single_execution_mode=single_execution_mode,
955
+ )
956
+
957
+ # Run worker (blocks until interrupted)
958
+ try:
959
+ if single_execution_mode:
960
+ # Single execution mode: run worker and monitor for workflow completion
961
+ logger.info("starting_worker_in_single_execution_mode")
962
+
963
+ # Create a task to run the worker
964
+ worker_run_task = asyncio.create_task(worker.run())
965
+
966
+ # Monitor for execution completion via Control Plane API
967
+ async def monitor_and_shutdown():
968
+ """
969
+ Monitor execution status and shutdown after task completes.
970
+ Robustness improvements:
971
+ - Requires consecutive completion checks to avoid false positives
972
+ - Extends timeout for long-running tasks
973
+ """
974
+ # Brief wait for worker to start and pick up the execution
975
+ # Reduced from 5s to 1s for faster ephemeral worker startup
976
+ await asyncio.sleep(1)
977
+
978
+ # Monitor for 30 minutes max (extended from 10 minutes)
979
+ max_runtime = 1800
980
+ check_interval = 2 # Check every 2 seconds - balanced between speed and API load
981
+ elapsed = 0
982
+ execution_seen = False
983
+ execution_id = None
984
+
985
+ # Robustness: Require 2 consecutive "completed" checks before shutting down
986
+ # With 2s polling interval, this provides 4s buffer for async operations to settle
987
+ consecutive_completion_checks = 0
988
+ required_consecutive_checks = 2
989
+
990
+ logger.info("single_execution_monitor_started", queue_id=queue_id)
991
+
992
+ should_shutdown = False
993
+ while elapsed < max_runtime and not should_shutdown:
994
+ await asyncio.sleep(check_interval)
995
+ elapsed += check_interval
996
+
997
+ # Check if worker task completed unexpectedly
998
+ if worker_run_task.done():
999
+ logger.info("single_execution_worker_task_completed", elapsed=elapsed)
1000
+ break
1001
+
1002
+ # Query Control Plane for recent executions on this queue
1003
+ try:
1004
+ # Get the control plane client
1005
+ async with httpx.AsyncClient(timeout=10.0) as http_client:
1006
+ # List recent executions for this queue
1007
+ response = await http_client.get(
1008
+ f"{control_plane_url}/api/v1/worker-queues/{queue_id}/executions",
1009
+ headers={"Authorization": f"Bearer {kubiya_api_key}"},
1010
+ params={"limit": 5, "status": "all"}
1011
+ )
1012
+
1013
+ if response.status_code == 200:
1014
+ executions = response.json()
1015
+
1016
+ # Look for any execution in a terminal or waiting state
1017
+ for execution in executions:
1018
+ exec_status = execution.get("status", "").lower()
1019
+ exec_id = execution.get("id")
1020
+
1021
+ if not execution_seen:
1022
+ if exec_status in ["running", "completed", "failed", "waiting_for_input"]:
1023
+ execution_seen = True
1024
+ execution_id = exec_id
1025
+ logger.info("single_execution_detected", execution_id=exec_id[:8] if exec_id else None, status=exec_status)
1026
+
1027
+ # If we've seen an execution and it's now in a terminal state, check if consistent
1028
+ # NOTE: We do NOT treat "waiting_for_input" as terminal in single execution mode
1029
+ # because the LLM may still be processing (e.g., tool calls) and the execution
1030
+ # should continue until truly completed or failed
1031
+ if execution_seen and exec_id == execution_id:
1032
+ if exec_status in ["completed", "failed", "cancelled"]:
1033
+ consecutive_completion_checks += 1
1034
+ logger.info("single_execution_completion_check",
1035
+ execution_id=exec_id[:8] if exec_id else None,
1036
+ status=exec_status,
1037
+ consecutive_checks=consecutive_completion_checks,
1038
+ required_checks=required_consecutive_checks,
1039
+ elapsed=elapsed)
1040
+
1041
+ # Only shutdown after consecutive checks confirm completion
1042
+ if consecutive_completion_checks >= required_consecutive_checks:
1043
+ logger.info("single_execution_completed",
1044
+ execution_id=exec_id[:8] if exec_id else None,
1045
+ status=exec_status,
1046
+ elapsed=elapsed)
1047
+ # Give SSE clients time to receive all final events
1048
+ # Reduced to 2s for faster shutdown while still allowing
1049
+ # SSE streams to complete
1050
+ logger.info("single_execution_grace_period_starting",
1051
+ execution_id=exec_id[:8] if exec_id else None,
1052
+ grace_seconds=2)
1053
+ await asyncio.sleep(2)
1054
+ should_shutdown = True
1055
+ break
1056
+ else:
1057
+ # Execution is back to running state - reset counter
1058
+ if consecutive_completion_checks > 0:
1059
+ logger.info("single_execution_still_active",
1060
+ execution_id=exec_id[:8] if exec_id else None,
1061
+ status=exec_status,
1062
+ resetting_counter=True)
1063
+ consecutive_completion_checks = 0
1064
+ else:
1065
+ logger.debug("single_execution_status_check_failed", status_code=response.status_code)
1066
+ # Reset consecutive checks on failed API call to be safe
1067
+ if consecutive_completion_checks > 0:
1068
+ logger.debug("single_execution_resetting_counter_after_failed_check")
1069
+ consecutive_completion_checks = 0
1070
+
1071
+ except Exception as e:
1072
+ logger.debug("single_execution_status_check_error", error=str(e))
1073
+ # Reset consecutive checks on error to be safe
1074
+ if consecutive_completion_checks > 0:
1075
+ logger.debug("single_execution_resetting_counter_after_error")
1076
+ consecutive_completion_checks = 0
1077
+ # Continue monitoring even if status check fails
1078
+
1079
+ # Check why we exited the loop
1080
+ if not should_shutdown and elapsed >= max_runtime:
1081
+ # Actual timeout
1082
+ logger.warning("single_execution_timeout_reached", elapsed=elapsed)
1083
+
1084
+ # Shutdown the worker gracefully
1085
+ logger.info("single_execution_triggering_shutdown", elapsed_seconds=elapsed, reason="completed" if should_shutdown else "timeout")
1086
+ ProgressUI.step("✓", "Task completed - shutting down worker...")
1087
+ log_to_buffer(f"[{time.strftime('%H:%M:%S')}] Task completed, shutting down...")
1088
+ await worker.shutdown()
1089
+
1090
+ # Start monitoring task
1091
+ monitor_task = asyncio.create_task(monitor_and_shutdown())
1092
+
1093
+ try:
1094
+ # Wait for worker to complete
1095
+ await worker_run_task
1096
+ logger.info("single_execution_worker_stopped")
1097
+ finally:
1098
+ # Cancel monitor task if still running
1099
+ if not monitor_task.done():
1100
+ monitor_task.cancel()
1101
+ try:
1102
+ await monitor_task
1103
+ except asyncio.CancelledError:
1104
+ pass
1105
+ else:
1106
+ # Normal mode - run indefinitely
1107
+ await worker.run()
1108
+ finally:
1109
+ # Stop WebSocket client
1110
+ await control_plane_client.stop_websocket()
1111
+
1112
+ # Cancel heartbeat task when worker stops
1113
+ heartbeat_task.cancel()
1114
+ try:
1115
+ await heartbeat_task
1116
+ except asyncio.CancelledError:
1117
+ pass
1118
+
1119
+ # Notify control plane of graceful shutdown
1120
+ print()
1121
+ ProgressUI.step("⏳", "Shutting down gracefully...")
1122
+
1123
+ # Delete ephemeral queue if we're the owner (single execution mode)
1124
+ if config.queue_ephemeral and config.queue_single_execution and config.queue_id:
1125
+ try:
1126
+ await delete_ephemeral_queue(
1127
+ config=config,
1128
+ kubiya_api_key=kubiya_api_key,
1129
+ queue_id=config.queue_id
1130
+ )
1131
+ logger.info("ephemeral_queue_cleaned_up", queue_id=config.queue_id)
1132
+ except Exception as e:
1133
+ logger.warning(
1134
+ "ephemeral_queue_cleanup_failed",
1135
+ queue_id=config.queue_id,
1136
+ error=str(e)
1137
+ )
1138
+ # Continue shutdown even if delete fails (TTL will handle it)
1139
+
1140
+ await send_disconnect(
1141
+ config=config,
1142
+ kubiya_api_key=kubiya_api_key,
1143
+ reason="shutdown",
1144
+ exit_code=0
1145
+ )
1146
+ ProgressUI.success("✓", "Worker stopped")
1147
+ print()
1148
+
1149
+ except KeyboardInterrupt:
1150
+ print()
1151
+ ProgressUI.step("⏳", "Shutting down...")
1152
+
1153
+ # Stop health monitor if running
1154
+ if health_monitor is not None:
1155
+ try:
1156
+ await health_monitor.stop()
1157
+ except Exception as e:
1158
+ logger.warning("health_monitor_stop_failed", error=str(e))
1159
+
1160
+ # Stop agent-runtime server if running
1161
+ if agent_runtime_server is not None:
1162
+ try:
1163
+ ProgressUI.step("⏳", "Stopping agent-runtime server...")
1164
+ agent_runtime_server.stop(timeout=10)
1165
+ ProgressUI.success("✓", "Agent runtime stopped")
1166
+ except Exception as e:
1167
+ logger.warning("agent_runtime_stop_failed_on_interrupt", error=str(e))
1168
+
1169
+ # Stop WebSocket client
1170
+ from control_plane_api.worker.control_plane_client import get_control_plane_client
1171
+ try:
1172
+ control_plane_client = get_control_plane_client()
1173
+ await control_plane_client.stop_websocket()
1174
+ except:
1175
+ pass
1176
+
1177
+ # Notify control plane of keyboard interrupt (only if config was successfully obtained)
1178
+ try:
1179
+ if 'config' in locals():
1180
+ # Delete ephemeral queue if we're the owner
1181
+ if config.queue_ephemeral and config.queue_single_execution and config.queue_id:
1182
+ try:
1183
+ await delete_ephemeral_queue(
1184
+ config=config,
1185
+ kubiya_api_key=kubiya_api_key,
1186
+ queue_id=config.queue_id
1187
+ )
1188
+ except Exception as e:
1189
+ logger.warning(
1190
+ "ephemeral_queue_cleanup_on_interrupt_failed",
1191
+ error=str(e)
1192
+ )
1193
+
1194
+ await send_disconnect(
1195
+ config=config,
1196
+ kubiya_api_key=kubiya_api_key,
1197
+ reason="shutdown",
1198
+ exit_code=0
1199
+ )
1200
+ ProgressUI.success("✓", "Worker stopped")
1201
+ else:
1202
+ logger.info("shutdown_before_registration_completed")
1203
+ except Exception as e:
1204
+ logger.warning("disconnect_on_interrupt_failed", error=str(e))
1205
+ except Exception as e:
1206
+ import traceback
1207
+ logger.error("temporal_worker_error", error=str(e), traceback=traceback.format_exc())
1208
+
1209
+ # Stop health monitor if running
1210
+ if health_monitor is not None:
1211
+ try:
1212
+ await health_monitor.stop()
1213
+ except Exception as stop_error:
1214
+ logger.warning("health_monitor_stop_failed_on_error", error=str(stop_error))
1215
+
1216
+ # Stop agent-runtime server if running
1217
+ if agent_runtime_server is not None:
1218
+ try:
1219
+ logger.info("stopping_agent_runtime_on_error")
1220
+ agent_runtime_server.stop(timeout=10)
1221
+ logger.info("agent_runtime_stopped_on_error")
1222
+ except Exception as stop_error:
1223
+ logger.warning("agent_runtime_stop_failed_on_error", error=str(stop_error))
1224
+
1225
+ # Notify control plane of error (only if config was successfully obtained)
1226
+ try:
1227
+ if 'config' in locals():
1228
+ await send_disconnect(
1229
+ config=config,
1230
+ kubiya_api_key=kubiya_api_key,
1231
+ reason="error",
1232
+ exit_code=1,
1233
+ error_message=str(e)[:2000] + (" [truncated]" if len(str(e)) > 2000 else "")
1234
+ )
1235
+ else:
1236
+ logger.warning("disconnect_skipped_no_config", error="Worker failed before registration completed")
1237
+ except Exception as disconnect_error:
1238
+ logger.warning("disconnect_on_error_failed", error=str(disconnect_error))
1239
+ raise
1240
+
1241
+
1242
+ def main():
1243
+ """Main entry point with CLI argument support"""
1244
+ import argparse
1245
+
1246
+ # Parse CLI arguments
1247
+ parser = argparse.ArgumentParser(
1248
+ description="Kubiya Agent Worker - Temporal worker for agent execution"
1249
+ )
1250
+ parser.add_argument(
1251
+ "--queue-id",
1252
+ type=str,
1253
+ help="Worker queue ID (can also use QUEUE_ID env var)"
1254
+ )
1255
+ parser.add_argument(
1256
+ "--api-key",
1257
+ type=str,
1258
+ help="Kubiya API key (can also use KUBIYA_API_KEY env var)"
1259
+ )
1260
+ parser.add_argument(
1261
+ "--control-plane-url",
1262
+ type=str,
1263
+ help="Control plane URL (can also use CONTROL_PLANE_URL env var)"
1264
+ )
1265
+ parser.add_argument(
1266
+ "--heartbeat-interval",
1267
+ type=int,
1268
+ default=60,
1269
+ help="Heartbeat interval in seconds (default: 60, lightweight mode)"
1270
+ )
1271
+
1272
+ args = parser.parse_args()
1273
+
1274
+ # Set environment variables from CLI args if not already set
1275
+ # Environment variables take precedence over CLI args (safer)
1276
+ if args.queue_id and not os.environ.get("QUEUE_ID"):
1277
+ os.environ["QUEUE_ID"] = args.queue_id
1278
+ if args.api_key and not os.environ.get("KUBIYA_API_KEY"):
1279
+ os.environ["KUBIYA_API_KEY"] = args.api_key
1280
+ if args.control_plane_url and not os.environ.get("CONTROL_PLANE_URL"):
1281
+ os.environ["CONTROL_PLANE_URL"] = args.control_plane_url
1282
+ if args.heartbeat_interval and not os.environ.get("HEARTBEAT_INTERVAL"):
1283
+ os.environ["HEARTBEAT_INTERVAL"] = str(args.heartbeat_interval)
1284
+
1285
+ logger.info("worker_starting")
1286
+
1287
+ try:
1288
+ asyncio.run(run_worker())
1289
+ except KeyboardInterrupt:
1290
+ logger.info("worker_stopped")
1291
+ except Exception as e:
1292
+ logger.error("worker_failed", error=str(e))
1293
+ sys.exit(1)
1294
+
1295
+
1296
+ if __name__ == "__main__":
1297
+ main()