kubiya-control-plane-api 0.9.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (479) hide show
  1. control_plane_api/LICENSE +676 -0
  2. control_plane_api/README.md +350 -0
  3. control_plane_api/__init__.py +4 -0
  4. control_plane_api/__version__.py +8 -0
  5. control_plane_api/alembic/README +1 -0
  6. control_plane_api/alembic/env.py +121 -0
  7. control_plane_api/alembic/script.py.mako +28 -0
  8. control_plane_api/alembic/versions/2613c65c3dbe_initial_database_setup.py +32 -0
  9. control_plane_api/alembic/versions/2df520d4927d_merge_heads.py +28 -0
  10. control_plane_api/alembic/versions/43abf98d6a01_add_paused_status_to_executions.py +73 -0
  11. control_plane_api/alembic/versions/6289854264cb_merge_multiple_heads.py +28 -0
  12. control_plane_api/alembic/versions/6a4d4dc3d8dc_generate_execution_transitions.py +50 -0
  13. control_plane_api/alembic/versions/87d11cf0a783_add_disconnected_status_to_worker_.py +44 -0
  14. control_plane_api/alembic/versions/add_ephemeral_queue_support.py +85 -0
  15. control_plane_api/alembic/versions/add_model_type_to_llm_models.py +31 -0
  16. control_plane_api/alembic/versions/add_plan_executions_table.py +114 -0
  17. control_plane_api/alembic/versions/add_trace_span_tables.py +154 -0
  18. control_plane_api/alembic/versions/add_user_info_to_traces.py +36 -0
  19. control_plane_api/alembic/versions/adjusting_foreign_keys.py +32 -0
  20. control_plane_api/alembic/versions/b4983d976db2_initial_tables.py +1128 -0
  21. control_plane_api/alembic/versions/d181a3b40e71_rename_custom_metadata_to_metadata_in_.py +50 -0
  22. control_plane_api/alembic/versions/df9117888e82_add_missing_columns.py +82 -0
  23. control_plane_api/alembic/versions/f25de6ad895a_missing_migrations.py +34 -0
  24. control_plane_api/alembic/versions/f71305fb69b9_fix_ephemeral_queue_deletion_foreign_key.py +54 -0
  25. control_plane_api/alembic/versions/mark_local_exec_queues_as_ephemeral.py +68 -0
  26. control_plane_api/alembic.ini +148 -0
  27. control_plane_api/api/index.py +12 -0
  28. control_plane_api/app/__init__.py +11 -0
  29. control_plane_api/app/activities/__init__.py +20 -0
  30. control_plane_api/app/activities/agent_activities.py +384 -0
  31. control_plane_api/app/activities/plan_generation_activities.py +499 -0
  32. control_plane_api/app/activities/team_activities.py +424 -0
  33. control_plane_api/app/activities/temporal_cloud_activities.py +588 -0
  34. control_plane_api/app/config/__init__.py +35 -0
  35. control_plane_api/app/config/api_config.py +469 -0
  36. control_plane_api/app/config/config_loader.py +224 -0
  37. control_plane_api/app/config/model_pricing.py +323 -0
  38. control_plane_api/app/config/storage_config.py +159 -0
  39. control_plane_api/app/config.py +115 -0
  40. control_plane_api/app/controllers/__init__.py +0 -0
  41. control_plane_api/app/controllers/execution_environment_controller.py +1315 -0
  42. control_plane_api/app/database.py +135 -0
  43. control_plane_api/app/exceptions.py +408 -0
  44. control_plane_api/app/lib/__init__.py +11 -0
  45. control_plane_api/app/lib/environment.py +65 -0
  46. control_plane_api/app/lib/event_bus/__init__.py +17 -0
  47. control_plane_api/app/lib/event_bus/base.py +136 -0
  48. control_plane_api/app/lib/event_bus/manager.py +335 -0
  49. control_plane_api/app/lib/event_bus/providers/__init__.py +6 -0
  50. control_plane_api/app/lib/event_bus/providers/http_provider.py +166 -0
  51. control_plane_api/app/lib/event_bus/providers/nats_provider.py +324 -0
  52. control_plane_api/app/lib/event_bus/providers/redis_provider.py +233 -0
  53. control_plane_api/app/lib/event_bus/providers/websocket_provider.py +497 -0
  54. control_plane_api/app/lib/job_executor.py +330 -0
  55. control_plane_api/app/lib/kubiya_client.py +293 -0
  56. control_plane_api/app/lib/litellm_pricing.py +166 -0
  57. control_plane_api/app/lib/mcp_validation.py +163 -0
  58. control_plane_api/app/lib/nats/__init__.py +13 -0
  59. control_plane_api/app/lib/nats/credentials_manager.py +288 -0
  60. control_plane_api/app/lib/nats/listener.py +374 -0
  61. control_plane_api/app/lib/planning_prompt_builder.py +153 -0
  62. control_plane_api/app/lib/planning_tools/__init__.py +41 -0
  63. control_plane_api/app/lib/planning_tools/agents.py +409 -0
  64. control_plane_api/app/lib/planning_tools/agno_toolkit.py +836 -0
  65. control_plane_api/app/lib/planning_tools/base.py +119 -0
  66. control_plane_api/app/lib/planning_tools/cognitive_memory_tools.py +403 -0
  67. control_plane_api/app/lib/planning_tools/context_graph_tools.py +545 -0
  68. control_plane_api/app/lib/planning_tools/environments.py +218 -0
  69. control_plane_api/app/lib/planning_tools/knowledge.py +204 -0
  70. control_plane_api/app/lib/planning_tools/models.py +93 -0
  71. control_plane_api/app/lib/planning_tools/planning_service.py +646 -0
  72. control_plane_api/app/lib/planning_tools/resources.py +242 -0
  73. control_plane_api/app/lib/planning_tools/teams.py +334 -0
  74. control_plane_api/app/lib/policy_enforcer_client.py +1016 -0
  75. control_plane_api/app/lib/redis_client.py +803 -0
  76. control_plane_api/app/lib/sqlalchemy_utils.py +486 -0
  77. control_plane_api/app/lib/state_transition_tools/__init__.py +7 -0
  78. control_plane_api/app/lib/state_transition_tools/execution_context.py +388 -0
  79. control_plane_api/app/lib/storage/__init__.py +20 -0
  80. control_plane_api/app/lib/storage/base_provider.py +274 -0
  81. control_plane_api/app/lib/storage/provider_factory.py +157 -0
  82. control_plane_api/app/lib/storage/vercel_blob_provider.py +468 -0
  83. control_plane_api/app/lib/supabase.py +71 -0
  84. control_plane_api/app/lib/supabase_utils.py +138 -0
  85. control_plane_api/app/lib/task_planning/__init__.py +138 -0
  86. control_plane_api/app/lib/task_planning/agent_factory.py +308 -0
  87. control_plane_api/app/lib/task_planning/agents.py +389 -0
  88. control_plane_api/app/lib/task_planning/cache.py +218 -0
  89. control_plane_api/app/lib/task_planning/entity_resolver.py +273 -0
  90. control_plane_api/app/lib/task_planning/helpers.py +293 -0
  91. control_plane_api/app/lib/task_planning/hooks.py +474 -0
  92. control_plane_api/app/lib/task_planning/models.py +503 -0
  93. control_plane_api/app/lib/task_planning/plan_validator.py +166 -0
  94. control_plane_api/app/lib/task_planning/planning_workflow.py +2911 -0
  95. control_plane_api/app/lib/task_planning/runner.py +656 -0
  96. control_plane_api/app/lib/task_planning/streaming_hook.py +213 -0
  97. control_plane_api/app/lib/task_planning/workflow.py +424 -0
  98. control_plane_api/app/lib/templating/__init__.py +88 -0
  99. control_plane_api/app/lib/templating/compiler.py +278 -0
  100. control_plane_api/app/lib/templating/engine.py +178 -0
  101. control_plane_api/app/lib/templating/parsers/__init__.py +29 -0
  102. control_plane_api/app/lib/templating/parsers/base.py +96 -0
  103. control_plane_api/app/lib/templating/parsers/env.py +85 -0
  104. control_plane_api/app/lib/templating/parsers/graph.py +112 -0
  105. control_plane_api/app/lib/templating/parsers/secret.py +87 -0
  106. control_plane_api/app/lib/templating/parsers/simple.py +81 -0
  107. control_plane_api/app/lib/templating/resolver.py +366 -0
  108. control_plane_api/app/lib/templating/types.py +214 -0
  109. control_plane_api/app/lib/templating/validator.py +201 -0
  110. control_plane_api/app/lib/temporal_client.py +232 -0
  111. control_plane_api/app/lib/temporal_credentials_cache.py +178 -0
  112. control_plane_api/app/lib/temporal_credentials_service.py +203 -0
  113. control_plane_api/app/lib/validation/__init__.py +24 -0
  114. control_plane_api/app/lib/validation/runtime_validation.py +388 -0
  115. control_plane_api/app/main.py +531 -0
  116. control_plane_api/app/middleware/__init__.py +10 -0
  117. control_plane_api/app/middleware/auth.py +645 -0
  118. control_plane_api/app/middleware/exception_handler.py +267 -0
  119. control_plane_api/app/middleware/prometheus_middleware.py +173 -0
  120. control_plane_api/app/middleware/rate_limiting.py +384 -0
  121. control_plane_api/app/middleware/request_id.py +202 -0
  122. control_plane_api/app/models/__init__.py +40 -0
  123. control_plane_api/app/models/agent.py +90 -0
  124. control_plane_api/app/models/analytics.py +206 -0
  125. control_plane_api/app/models/associations.py +107 -0
  126. control_plane_api/app/models/auth_user.py +73 -0
  127. control_plane_api/app/models/context.py +161 -0
  128. control_plane_api/app/models/custom_integration.py +99 -0
  129. control_plane_api/app/models/environment.py +64 -0
  130. control_plane_api/app/models/execution.py +125 -0
  131. control_plane_api/app/models/execution_transition.py +50 -0
  132. control_plane_api/app/models/job.py +159 -0
  133. control_plane_api/app/models/llm_model.py +78 -0
  134. control_plane_api/app/models/orchestration.py +66 -0
  135. control_plane_api/app/models/plan_execution.py +102 -0
  136. control_plane_api/app/models/presence.py +49 -0
  137. control_plane_api/app/models/project.py +61 -0
  138. control_plane_api/app/models/project_management.py +85 -0
  139. control_plane_api/app/models/session.py +29 -0
  140. control_plane_api/app/models/skill.py +155 -0
  141. control_plane_api/app/models/system_tables.py +43 -0
  142. control_plane_api/app/models/task_planning.py +372 -0
  143. control_plane_api/app/models/team.py +86 -0
  144. control_plane_api/app/models/trace.py +257 -0
  145. control_plane_api/app/models/user_profile.py +54 -0
  146. control_plane_api/app/models/worker.py +221 -0
  147. control_plane_api/app/models/workflow.py +161 -0
  148. control_plane_api/app/models/workspace.py +50 -0
  149. control_plane_api/app/observability/__init__.py +177 -0
  150. control_plane_api/app/observability/context_logging.py +475 -0
  151. control_plane_api/app/observability/decorators.py +337 -0
  152. control_plane_api/app/observability/local_span_processor.py +702 -0
  153. control_plane_api/app/observability/metrics.py +303 -0
  154. control_plane_api/app/observability/middleware.py +246 -0
  155. control_plane_api/app/observability/optional.py +115 -0
  156. control_plane_api/app/observability/tracing.py +382 -0
  157. control_plane_api/app/policies/README.md +149 -0
  158. control_plane_api/app/policies/approved_users.rego +62 -0
  159. control_plane_api/app/policies/business_hours.rego +51 -0
  160. control_plane_api/app/policies/rate_limiting.rego +100 -0
  161. control_plane_api/app/policies/tool_enforcement/README.md +336 -0
  162. control_plane_api/app/policies/tool_enforcement/bash_command_validation.rego +71 -0
  163. control_plane_api/app/policies/tool_enforcement/business_hours_enforcement.rego +82 -0
  164. control_plane_api/app/policies/tool_enforcement/mcp_tool_allowlist.rego +58 -0
  165. control_plane_api/app/policies/tool_enforcement/production_safeguards.rego +80 -0
  166. control_plane_api/app/policies/tool_enforcement/role_based_tool_access.rego +44 -0
  167. control_plane_api/app/policies/tool_restrictions.rego +86 -0
  168. control_plane_api/app/routers/__init__.py +4 -0
  169. control_plane_api/app/routers/agents.py +382 -0
  170. control_plane_api/app/routers/agents_v2.py +1598 -0
  171. control_plane_api/app/routers/analytics.py +1310 -0
  172. control_plane_api/app/routers/auth.py +59 -0
  173. control_plane_api/app/routers/client_config.py +57 -0
  174. control_plane_api/app/routers/context_graph.py +561 -0
  175. control_plane_api/app/routers/context_manager.py +577 -0
  176. control_plane_api/app/routers/custom_integrations.py +490 -0
  177. control_plane_api/app/routers/enforcer.py +132 -0
  178. control_plane_api/app/routers/environment_context.py +252 -0
  179. control_plane_api/app/routers/environments.py +761 -0
  180. control_plane_api/app/routers/execution_environment.py +847 -0
  181. control_plane_api/app/routers/executions/__init__.py +28 -0
  182. control_plane_api/app/routers/executions/router.py +286 -0
  183. control_plane_api/app/routers/executions/services/__init__.py +22 -0
  184. control_plane_api/app/routers/executions/services/demo_worker_health.py +156 -0
  185. control_plane_api/app/routers/executions/services/status_service.py +420 -0
  186. control_plane_api/app/routers/executions/services/test_worker_health.py +480 -0
  187. control_plane_api/app/routers/executions/services/worker_health.py +514 -0
  188. control_plane_api/app/routers/executions/streaming/__init__.py +22 -0
  189. control_plane_api/app/routers/executions/streaming/deduplication.py +352 -0
  190. control_plane_api/app/routers/executions/streaming/event_buffer.py +353 -0
  191. control_plane_api/app/routers/executions/streaming/event_formatter.py +964 -0
  192. control_plane_api/app/routers/executions/streaming/history_loader.py +588 -0
  193. control_plane_api/app/routers/executions/streaming/live_source.py +693 -0
  194. control_plane_api/app/routers/executions/streaming/streamer.py +849 -0
  195. control_plane_api/app/routers/executions.py +4888 -0
  196. control_plane_api/app/routers/health.py +165 -0
  197. control_plane_api/app/routers/health_v2.py +394 -0
  198. control_plane_api/app/routers/integration_templates.py +496 -0
  199. control_plane_api/app/routers/integrations.py +287 -0
  200. control_plane_api/app/routers/jobs.py +1809 -0
  201. control_plane_api/app/routers/metrics.py +517 -0
  202. control_plane_api/app/routers/models.py +82 -0
  203. control_plane_api/app/routers/models_v2.py +628 -0
  204. control_plane_api/app/routers/plan_executions.py +1481 -0
  205. control_plane_api/app/routers/plan_generation_async.py +304 -0
  206. control_plane_api/app/routers/policies.py +669 -0
  207. control_plane_api/app/routers/presence.py +234 -0
  208. control_plane_api/app/routers/projects.py +987 -0
  209. control_plane_api/app/routers/runners.py +379 -0
  210. control_plane_api/app/routers/runtimes.py +172 -0
  211. control_plane_api/app/routers/secrets.py +171 -0
  212. control_plane_api/app/routers/skills.py +1010 -0
  213. control_plane_api/app/routers/skills_definitions.py +140 -0
  214. control_plane_api/app/routers/storage.py +456 -0
  215. control_plane_api/app/routers/task_planning.py +611 -0
  216. control_plane_api/app/routers/task_queues.py +650 -0
  217. control_plane_api/app/routers/team_context.py +274 -0
  218. control_plane_api/app/routers/teams.py +1747 -0
  219. control_plane_api/app/routers/templates.py +248 -0
  220. control_plane_api/app/routers/traces.py +571 -0
  221. control_plane_api/app/routers/websocket_client.py +479 -0
  222. control_plane_api/app/routers/websocket_executions_status.py +437 -0
  223. control_plane_api/app/routers/websocket_gateway.py +323 -0
  224. control_plane_api/app/routers/websocket_traces.py +576 -0
  225. control_plane_api/app/routers/worker_queues.py +2555 -0
  226. control_plane_api/app/routers/worker_websocket.py +419 -0
  227. control_plane_api/app/routers/workers.py +1004 -0
  228. control_plane_api/app/routers/workflows.py +204 -0
  229. control_plane_api/app/runtimes/__init__.py +6 -0
  230. control_plane_api/app/runtimes/validation.py +344 -0
  231. control_plane_api/app/schemas/__init__.py +1 -0
  232. control_plane_api/app/schemas/job_schemas.py +302 -0
  233. control_plane_api/app/schemas/mcp_schemas.py +311 -0
  234. control_plane_api/app/schemas/template_schemas.py +133 -0
  235. control_plane_api/app/schemas/trace_schemas.py +168 -0
  236. control_plane_api/app/schemas/worker_queue_observability_schemas.py +165 -0
  237. control_plane_api/app/services/__init__.py +1 -0
  238. control_plane_api/app/services/agno_planning_strategy.py +233 -0
  239. control_plane_api/app/services/agno_service.py +838 -0
  240. control_plane_api/app/services/claude_code_planning_service.py +203 -0
  241. control_plane_api/app/services/context_graph_client.py +224 -0
  242. control_plane_api/app/services/custom_integration_service.py +415 -0
  243. control_plane_api/app/services/integration_resolution_service.py +345 -0
  244. control_plane_api/app/services/litellm_service.py +394 -0
  245. control_plane_api/app/services/plan_generator.py +79 -0
  246. control_plane_api/app/services/planning_strategy.py +66 -0
  247. control_plane_api/app/services/planning_strategy_factory.py +118 -0
  248. control_plane_api/app/services/policy_service.py +615 -0
  249. control_plane_api/app/services/state_transition_service.py +755 -0
  250. control_plane_api/app/services/storage_service.py +593 -0
  251. control_plane_api/app/services/temporal_cloud_provisioning.py +150 -0
  252. control_plane_api/app/services/toolsets/context_graph_skill.py +432 -0
  253. control_plane_api/app/services/trace_retention.py +354 -0
  254. control_plane_api/app/services/worker_queue_metrics_service.py +190 -0
  255. control_plane_api/app/services/workflow_cancellation_manager.py +135 -0
  256. control_plane_api/app/services/workflow_operations_service.py +611 -0
  257. control_plane_api/app/skills/__init__.py +100 -0
  258. control_plane_api/app/skills/base.py +239 -0
  259. control_plane_api/app/skills/builtin/__init__.py +37 -0
  260. control_plane_api/app/skills/builtin/agent_communication/__init__.py +8 -0
  261. control_plane_api/app/skills/builtin/agent_communication/skill.py +246 -0
  262. control_plane_api/app/skills/builtin/code_ingestion/__init__.py +4 -0
  263. control_plane_api/app/skills/builtin/code_ingestion/skill.py +267 -0
  264. control_plane_api/app/skills/builtin/cognitive_memory/__init__.py +4 -0
  265. control_plane_api/app/skills/builtin/cognitive_memory/skill.py +174 -0
  266. control_plane_api/app/skills/builtin/contextual_awareness/__init__.py +4 -0
  267. control_plane_api/app/skills/builtin/contextual_awareness/skill.py +387 -0
  268. control_plane_api/app/skills/builtin/data_visualization/__init__.py +4 -0
  269. control_plane_api/app/skills/builtin/data_visualization/skill.py +154 -0
  270. control_plane_api/app/skills/builtin/docker/__init__.py +4 -0
  271. control_plane_api/app/skills/builtin/docker/skill.py +104 -0
  272. control_plane_api/app/skills/builtin/file_generation/__init__.py +4 -0
  273. control_plane_api/app/skills/builtin/file_generation/skill.py +94 -0
  274. control_plane_api/app/skills/builtin/file_system/__init__.py +4 -0
  275. control_plane_api/app/skills/builtin/file_system/skill.py +110 -0
  276. control_plane_api/app/skills/builtin/knowledge_api/__init__.py +5 -0
  277. control_plane_api/app/skills/builtin/knowledge_api/skill.py +124 -0
  278. control_plane_api/app/skills/builtin/python/__init__.py +4 -0
  279. control_plane_api/app/skills/builtin/python/skill.py +92 -0
  280. control_plane_api/app/skills/builtin/remote_filesystem/__init__.py +5 -0
  281. control_plane_api/app/skills/builtin/remote_filesystem/skill.py +170 -0
  282. control_plane_api/app/skills/builtin/shell/__init__.py +4 -0
  283. control_plane_api/app/skills/builtin/shell/skill.py +161 -0
  284. control_plane_api/app/skills/builtin/slack/__init__.py +3 -0
  285. control_plane_api/app/skills/builtin/slack/skill.py +302 -0
  286. control_plane_api/app/skills/builtin/workflow_executor/__init__.py +4 -0
  287. control_plane_api/app/skills/builtin/workflow_executor/skill.py +469 -0
  288. control_plane_api/app/skills/business_intelligence.py +189 -0
  289. control_plane_api/app/skills/config.py +63 -0
  290. control_plane_api/app/skills/loaders/__init__.py +14 -0
  291. control_plane_api/app/skills/loaders/base.py +73 -0
  292. control_plane_api/app/skills/loaders/filesystem_loader.py +199 -0
  293. control_plane_api/app/skills/registry.py +125 -0
  294. control_plane_api/app/utils/helpers.py +12 -0
  295. control_plane_api/app/utils/workflow_executor.py +354 -0
  296. control_plane_api/app/workflows/__init__.py +11 -0
  297. control_plane_api/app/workflows/agent_execution.py +520 -0
  298. control_plane_api/app/workflows/agent_execution_with_skills.py +223 -0
  299. control_plane_api/app/workflows/namespace_provisioning.py +326 -0
  300. control_plane_api/app/workflows/plan_generation.py +254 -0
  301. control_plane_api/app/workflows/team_execution.py +442 -0
  302. control_plane_api/scripts/seed_models.py +240 -0
  303. control_plane_api/scripts/validate_existing_tool_names.py +492 -0
  304. control_plane_api/shared/__init__.py +8 -0
  305. control_plane_api/shared/version.py +17 -0
  306. control_plane_api/test_deduplication.py +274 -0
  307. control_plane_api/test_executor_deduplication_e2e.py +309 -0
  308. control_plane_api/test_job_execution_e2e.py +283 -0
  309. control_plane_api/test_real_integration.py +193 -0
  310. control_plane_api/version.py +38 -0
  311. control_plane_api/worker/__init__.py +0 -0
  312. control_plane_api/worker/activities/__init__.py +0 -0
  313. control_plane_api/worker/activities/agent_activities.py +1585 -0
  314. control_plane_api/worker/activities/approval_activities.py +234 -0
  315. control_plane_api/worker/activities/job_activities.py +199 -0
  316. control_plane_api/worker/activities/runtime_activities.py +1167 -0
  317. control_plane_api/worker/activities/skill_activities.py +282 -0
  318. control_plane_api/worker/activities/team_activities.py +479 -0
  319. control_plane_api/worker/agent_runtime_server.py +370 -0
  320. control_plane_api/worker/binary_manager.py +333 -0
  321. control_plane_api/worker/config/__init__.py +31 -0
  322. control_plane_api/worker/config/worker_config.py +273 -0
  323. control_plane_api/worker/control_plane_client.py +1491 -0
  324. control_plane_api/worker/examples/analytics_integration_example.py +362 -0
  325. control_plane_api/worker/health_monitor.py +159 -0
  326. control_plane_api/worker/metrics.py +237 -0
  327. control_plane_api/worker/models/__init__.py +1 -0
  328. control_plane_api/worker/models/error_events.py +105 -0
  329. control_plane_api/worker/models/inputs.py +89 -0
  330. control_plane_api/worker/runtimes/__init__.py +35 -0
  331. control_plane_api/worker/runtimes/agent_runtime/runtime.py +485 -0
  332. control_plane_api/worker/runtimes/agno/__init__.py +34 -0
  333. control_plane_api/worker/runtimes/agno/config.py +248 -0
  334. control_plane_api/worker/runtimes/agno/hooks.py +385 -0
  335. control_plane_api/worker/runtimes/agno/mcp_builder.py +195 -0
  336. control_plane_api/worker/runtimes/agno/runtime.py +1063 -0
  337. control_plane_api/worker/runtimes/agno/utils.py +163 -0
  338. control_plane_api/worker/runtimes/base.py +979 -0
  339. control_plane_api/worker/runtimes/claude_code/__init__.py +38 -0
  340. control_plane_api/worker/runtimes/claude_code/cleanup.py +184 -0
  341. control_plane_api/worker/runtimes/claude_code/client_pool.py +529 -0
  342. control_plane_api/worker/runtimes/claude_code/config.py +829 -0
  343. control_plane_api/worker/runtimes/claude_code/hooks.py +482 -0
  344. control_plane_api/worker/runtimes/claude_code/litellm_proxy.py +1702 -0
  345. control_plane_api/worker/runtimes/claude_code/mcp_builder.py +467 -0
  346. control_plane_api/worker/runtimes/claude_code/mcp_discovery.py +558 -0
  347. control_plane_api/worker/runtimes/claude_code/runtime.py +1546 -0
  348. control_plane_api/worker/runtimes/claude_code/tool_mapper.py +403 -0
  349. control_plane_api/worker/runtimes/claude_code/utils.py +149 -0
  350. control_plane_api/worker/runtimes/factory.py +173 -0
  351. control_plane_api/worker/runtimes/model_utils.py +107 -0
  352. control_plane_api/worker/runtimes/validation.py +93 -0
  353. control_plane_api/worker/services/__init__.py +1 -0
  354. control_plane_api/worker/services/agent_communication_tools.py +908 -0
  355. control_plane_api/worker/services/agent_executor.py +485 -0
  356. control_plane_api/worker/services/agent_executor_v2.py +793 -0
  357. control_plane_api/worker/services/analytics_collector.py +457 -0
  358. control_plane_api/worker/services/analytics_service.py +464 -0
  359. control_plane_api/worker/services/approval_tools.py +310 -0
  360. control_plane_api/worker/services/approval_tools_agno.py +207 -0
  361. control_plane_api/worker/services/cancellation_manager.py +177 -0
  362. control_plane_api/worker/services/code_ingestion_tools.py +465 -0
  363. control_plane_api/worker/services/contextual_awareness_tools.py +405 -0
  364. control_plane_api/worker/services/data_visualization.py +834 -0
  365. control_plane_api/worker/services/event_publisher.py +531 -0
  366. control_plane_api/worker/services/jira_tools.py +257 -0
  367. control_plane_api/worker/services/remote_filesystem_tools.py +498 -0
  368. control_plane_api/worker/services/runtime_analytics.py +328 -0
  369. control_plane_api/worker/services/session_service.py +365 -0
  370. control_plane_api/worker/services/skill_context_enhancement.py +181 -0
  371. control_plane_api/worker/services/skill_factory.py +471 -0
  372. control_plane_api/worker/services/system_prompt_enhancement.py +410 -0
  373. control_plane_api/worker/services/team_executor.py +715 -0
  374. control_plane_api/worker/services/team_executor_v2.py +1866 -0
  375. control_plane_api/worker/services/tool_enforcement.py +254 -0
  376. control_plane_api/worker/services/workflow_executor/__init__.py +52 -0
  377. control_plane_api/worker/services/workflow_executor/event_processor.py +287 -0
  378. control_plane_api/worker/services/workflow_executor/event_publisher.py +210 -0
  379. control_plane_api/worker/services/workflow_executor/executors/__init__.py +15 -0
  380. control_plane_api/worker/services/workflow_executor/executors/base.py +270 -0
  381. control_plane_api/worker/services/workflow_executor/executors/json_executor.py +50 -0
  382. control_plane_api/worker/services/workflow_executor/executors/python_executor.py +50 -0
  383. control_plane_api/worker/services/workflow_executor/models.py +142 -0
  384. control_plane_api/worker/services/workflow_executor_tools.py +1748 -0
  385. control_plane_api/worker/skills/__init__.py +12 -0
  386. control_plane_api/worker/skills/builtin/context_graph_search/README.md +213 -0
  387. control_plane_api/worker/skills/builtin/context_graph_search/__init__.py +5 -0
  388. control_plane_api/worker/skills/builtin/context_graph_search/agno_impl.py +808 -0
  389. control_plane_api/worker/skills/builtin/context_graph_search/skill.yaml +67 -0
  390. control_plane_api/worker/skills/builtin/contextual_awareness/__init__.py +4 -0
  391. control_plane_api/worker/skills/builtin/contextual_awareness/agno_impl.py +62 -0
  392. control_plane_api/worker/skills/builtin/data_visualization/agno_impl.py +18 -0
  393. control_plane_api/worker/skills/builtin/data_visualization/skill.yaml +84 -0
  394. control_plane_api/worker/skills/builtin/docker/agno_impl.py +65 -0
  395. control_plane_api/worker/skills/builtin/docker/skill.yaml +60 -0
  396. control_plane_api/worker/skills/builtin/file_generation/agno_impl.py +47 -0
  397. control_plane_api/worker/skills/builtin/file_generation/skill.yaml +64 -0
  398. control_plane_api/worker/skills/builtin/file_system/agno_impl.py +32 -0
  399. control_plane_api/worker/skills/builtin/file_system/skill.yaml +54 -0
  400. control_plane_api/worker/skills/builtin/knowledge_api/__init__.py +4 -0
  401. control_plane_api/worker/skills/builtin/knowledge_api/agno_impl.py +50 -0
  402. control_plane_api/worker/skills/builtin/knowledge_api/skill.yaml +66 -0
  403. control_plane_api/worker/skills/builtin/python/agno_impl.py +25 -0
  404. control_plane_api/worker/skills/builtin/python/skill.yaml +60 -0
  405. control_plane_api/worker/skills/builtin/schema_fix_mixin.py +260 -0
  406. control_plane_api/worker/skills/builtin/shell/agno_impl.py +31 -0
  407. control_plane_api/worker/skills/builtin/shell/skill.yaml +60 -0
  408. control_plane_api/worker/skills/builtin/slack/__init__.py +3 -0
  409. control_plane_api/worker/skills/builtin/slack/agno_impl.py +1282 -0
  410. control_plane_api/worker/skills/builtin/slack/skill.yaml +276 -0
  411. control_plane_api/worker/skills/builtin/workflow_executor/agno_impl.py +62 -0
  412. control_plane_api/worker/skills/builtin/workflow_executor/skill.yaml +79 -0
  413. control_plane_api/worker/skills/loaders/__init__.py +5 -0
  414. control_plane_api/worker/skills/loaders/base.py +23 -0
  415. control_plane_api/worker/skills/loaders/filesystem_loader.py +357 -0
  416. control_plane_api/worker/skills/registry.py +208 -0
  417. control_plane_api/worker/tests/__init__.py +1 -0
  418. control_plane_api/worker/tests/conftest.py +12 -0
  419. control_plane_api/worker/tests/e2e/__init__.py +0 -0
  420. control_plane_api/worker/tests/e2e/test_context_graph_real_api.py +338 -0
  421. control_plane_api/worker/tests/e2e/test_context_graph_templates_e2e.py +523 -0
  422. control_plane_api/worker/tests/e2e/test_enforcement_e2e.py +344 -0
  423. control_plane_api/worker/tests/e2e/test_execution_flow.py +571 -0
  424. control_plane_api/worker/tests/e2e/test_single_execution_mode.py +656 -0
  425. control_plane_api/worker/tests/integration/__init__.py +0 -0
  426. control_plane_api/worker/tests/integration/test_builtin_skills_fixes.py +245 -0
  427. control_plane_api/worker/tests/integration/test_context_graph_search_integration.py +365 -0
  428. control_plane_api/worker/tests/integration/test_control_plane_integration.py +308 -0
  429. control_plane_api/worker/tests/integration/test_hook_enforcement_integration.py +579 -0
  430. control_plane_api/worker/tests/integration/test_scheduled_job_workflow.py +237 -0
  431. control_plane_api/worker/tests/integration/test_system_prompt_enhancement_integration.py +343 -0
  432. control_plane_api/worker/tests/unit/__init__.py +0 -0
  433. control_plane_api/worker/tests/unit/test_builtin_skill_autoload.py +396 -0
  434. control_plane_api/worker/tests/unit/test_context_graph_search.py +450 -0
  435. control_plane_api/worker/tests/unit/test_context_graph_templates.py +403 -0
  436. control_plane_api/worker/tests/unit/test_control_plane_client.py +401 -0
  437. control_plane_api/worker/tests/unit/test_control_plane_client_jobs.py +345 -0
  438. control_plane_api/worker/tests/unit/test_job_activities.py +353 -0
  439. control_plane_api/worker/tests/unit/test_skill_context_enhancement.py +321 -0
  440. control_plane_api/worker/tests/unit/test_system_prompt_enhancement.py +415 -0
  441. control_plane_api/worker/tests/unit/test_tool_enforcement.py +324 -0
  442. control_plane_api/worker/utils/__init__.py +1 -0
  443. control_plane_api/worker/utils/chunk_batcher.py +330 -0
  444. control_plane_api/worker/utils/environment.py +65 -0
  445. control_plane_api/worker/utils/error_publisher.py +260 -0
  446. control_plane_api/worker/utils/event_batcher.py +256 -0
  447. control_plane_api/worker/utils/logging_config.py +335 -0
  448. control_plane_api/worker/utils/logging_helper.py +326 -0
  449. control_plane_api/worker/utils/parameter_validator.py +120 -0
  450. control_plane_api/worker/utils/retry_utils.py +60 -0
  451. control_plane_api/worker/utils/streaming_utils.py +665 -0
  452. control_plane_api/worker/utils/tool_validation.py +332 -0
  453. control_plane_api/worker/utils/workspace_manager.py +163 -0
  454. control_plane_api/worker/websocket_client.py +393 -0
  455. control_plane_api/worker/worker.py +1297 -0
  456. control_plane_api/worker/workflows/__init__.py +0 -0
  457. control_plane_api/worker/workflows/agent_execution.py +909 -0
  458. control_plane_api/worker/workflows/scheduled_job_wrapper.py +332 -0
  459. control_plane_api/worker/workflows/team_execution.py +611 -0
  460. kubiya_control_plane_api-0.9.15.dist-info/METADATA +354 -0
  461. kubiya_control_plane_api-0.9.15.dist-info/RECORD +479 -0
  462. kubiya_control_plane_api-0.9.15.dist-info/WHEEL +5 -0
  463. kubiya_control_plane_api-0.9.15.dist-info/entry_points.txt +5 -0
  464. kubiya_control_plane_api-0.9.15.dist-info/licenses/LICENSE +676 -0
  465. kubiya_control_plane_api-0.9.15.dist-info/top_level.txt +3 -0
  466. scripts/__init__.py +1 -0
  467. scripts/migrations.py +39 -0
  468. scripts/seed_worker_queues.py +128 -0
  469. scripts/setup_agent_runtime.py +142 -0
  470. worker_internal/__init__.py +1 -0
  471. worker_internal/planner/__init__.py +1 -0
  472. worker_internal/planner/activities.py +1499 -0
  473. worker_internal/planner/agent_tools.py +197 -0
  474. worker_internal/planner/event_models.py +148 -0
  475. worker_internal/planner/event_publisher.py +67 -0
  476. worker_internal/planner/models.py +199 -0
  477. worker_internal/planner/retry_logic.py +134 -0
  478. worker_internal/planner/worker.py +300 -0
  479. worker_internal/planner/workflows.py +970 -0
@@ -0,0 +1,1491 @@
1
+ """
2
+ Control Plane Client - Clean API for worker to communicate with Control Plane.
3
+
4
+ This centralizes all HTTP and WebSocket communication between worker and Control Plane,
5
+ providing a clean interface for:
6
+ - Event streaming (real-time UI updates via WebSocket or HTTP fallback)
7
+ - Session persistence (history storage)
8
+ - Metadata caching (execution types)
9
+ - Skill resolution
10
+ - Bi-directional control messages
11
+
12
+ Usage:
13
+ from control_plane_client import get_control_plane_client
14
+
15
+ client = get_control_plane_client()
16
+ await client.start_websocket() # If WebSocket enabled
17
+ await client.publish_event_async(execution_id, "message_chunk", {...})
18
+ client.persist_session(execution_id, session_id, user_id, messages)
19
+ """
20
+
21
+ import os
22
+ import httpx
23
+ import asyncio
24
+ import threading
25
+ from datetime import datetime, timezone
26
+ from typing import Optional, Dict, List, Any
27
+ import structlog
28
+
29
+ logger = structlog.get_logger()
30
+
31
+
32
+ class ControlPlaneClient:
33
+ """Client for communicating with the Control Plane API from workers."""
34
+
35
+ def __init__(
36
+ self,
37
+ base_url: str,
38
+ api_key: str,
39
+ websocket_enabled: bool = False,
40
+ websocket_url: Optional[str] = None,
41
+ worker_id: Optional[str] = None,
42
+ event_bus_config: Optional[Dict[str, Any]] = None
43
+ ):
44
+ """
45
+ Initialize Control Plane client.
46
+
47
+ Args:
48
+ base_url: Control Plane URL (e.g., http://localhost:8000)
49
+ api_key: Kubiya API key for authentication
50
+ websocket_enabled: Whether WebSocket is enabled
51
+ websocket_url: WebSocket URL if enabled
52
+ worker_id: Worker ID for WebSocket connection
53
+ event_bus_config: Optional event bus configuration dict
54
+ """
55
+ self.base_url = base_url.rstrip("/")
56
+ self.api_key = api_key
57
+ self.headers = {"Authorization": f"UserKey {api_key}"}
58
+ self.worker_id = worker_id
59
+
60
+ # Event bus manager for multi-provider support
61
+ self.event_bus_manager = None
62
+ if event_bus_config:
63
+ try:
64
+ from control_plane_api.app.lib.event_bus.manager import (
65
+ EventBusManager,
66
+ EventBusManagerConfig,
67
+ )
68
+ from control_plane_api.app.lib.event_bus.providers.http_provider import HTTPConfig
69
+ from control_plane_api.app.lib.event_bus.providers.redis_provider import RedisConfig
70
+ from control_plane_api.app.lib.event_bus.providers.websocket_provider import WebSocketConfig
71
+
72
+ # Parse config dicts into config objects
73
+ parsed_config = {}
74
+
75
+ if "http" in event_bus_config and event_bus_config["http"]:
76
+ parsed_config["http"] = HTTPConfig(**event_bus_config["http"])
77
+
78
+ if "redis" in event_bus_config and event_bus_config["redis"]:
79
+ parsed_config["redis"] = RedisConfig(**event_bus_config["redis"])
80
+
81
+ if "websocket" in event_bus_config and event_bus_config["websocket"]:
82
+ parsed_config["websocket"] = WebSocketConfig(**event_bus_config["websocket"])
83
+
84
+ if "nats" in event_bus_config and event_bus_config["nats"]:
85
+ try:
86
+ from control_plane_api.app.lib.event_bus.providers.nats_provider import NATSConfig
87
+ parsed_config["nats"] = NATSConfig(**event_bus_config["nats"])
88
+ except ImportError:
89
+ logger.warning("nats_provider_not_installed", message="Install with: pip install kubiya-control-plane-api[nats]")
90
+
91
+ manager_config = EventBusManagerConfig(**parsed_config)
92
+ self.event_bus_manager = EventBusManager(manager_config)
93
+ logger.info(
94
+ "worker_event_bus_initialized",
95
+ worker_id=worker_id[:8] if worker_id else "unknown",
96
+ providers=list(parsed_config.keys())
97
+ )
98
+ except ImportError as e:
99
+ logger.warning(
100
+ "event_bus_dependencies_missing",
101
+ error=str(e),
102
+ message="Install event bus dependencies with: pip install kubiya-control-plane-api[event-bus]"
103
+ )
104
+ except Exception as e:
105
+ logger.error(
106
+ "worker_event_bus_init_failed",
107
+ error=str(e),
108
+ worker_id=worker_id[:8] if worker_id else "unknown"
109
+ )
110
+
111
+ # Thread-local storage for event loop reuse in sync context
112
+ # This prevents creating a new event loop per publish_event() call
113
+ self._thread_local = threading.local()
114
+
115
+ # Use BOTH sync and async clients for different use cases
116
+ # Sync client for backwards compatibility with non-async code
117
+ self._client = httpx.Client(
118
+ timeout=httpx.Timeout(30.0, connect=5.0, read=30.0, write=10.0),
119
+ limits=httpx.Limits(max_connections=10, max_keepalive_connections=5),
120
+ )
121
+
122
+ # Async client for streaming/real-time operations
123
+ # Longer read timeout to handle streaming scenarios
124
+ self._async_client = httpx.AsyncClient(
125
+ timeout=httpx.Timeout(60.0, connect=5.0, read=60.0, write=10.0),
126
+ limits=httpx.Limits(max_connections=20, max_keepalive_connections=10),
127
+ )
128
+
129
+ # WebSocket client for persistent connection
130
+ self.websocket_client: Optional[Any] = None
131
+
132
+ # Initialize WebSocket client if enabled and environment supports it
133
+ if websocket_enabled and websocket_url and worker_id:
134
+ from control_plane_api.worker.utils.environment import should_use_websocket
135
+
136
+ if should_use_websocket():
137
+ from control_plane_api.worker.websocket_client import WorkerWebSocketClient
138
+
139
+ self.websocket_client = WorkerWebSocketClient(
140
+ worker_id=worker_id,
141
+ websocket_url=websocket_url,
142
+ api_key=api_key,
143
+ on_control_message=self._handle_control_message
144
+ )
145
+ logger.info("websocket_client_initialized", worker_id=worker_id[:8])
146
+ else:
147
+ logger.info("websocket_skipped_serverless_environment")
148
+
149
+ # SSE stream completion tracking for single execution mode
150
+ # This allows the worker to wait for SSE streaming to complete before shutdown
151
+ self._sse_stream_completed: Dict[str, asyncio.Event] = {}
152
+ self._sse_completion_lock = asyncio.Lock()
153
+
154
+ def __del__(self):
155
+ """Close the HTTP clients on cleanup."""
156
+ try:
157
+ self._client.close()
158
+ except:
159
+ pass
160
+ # Async client cleanup happens via context manager or explicit close
161
+
162
+ def _get_thread_event_loop(self) -> asyncio.AbstractEventLoop:
163
+ """
164
+ Get or create a persistent event loop for the current thread.
165
+
166
+ This reuses the same event loop for all publish_event() calls within
167
+ a thread, preventing resource leaks and "await wasn't used with future"
168
+ errors that occur when creating a new loop per call.
169
+
170
+ Returns:
171
+ The thread-local event loop
172
+ """
173
+ if not hasattr(self._thread_local, 'loop') or self._thread_local.loop is None or self._thread_local.loop.is_closed():
174
+ self._thread_local.loop = asyncio.new_event_loop()
175
+ asyncio.set_event_loop(self._thread_local.loop)
176
+ logger.debug(
177
+ "created_thread_local_event_loop",
178
+ thread_id=threading.current_thread().ident,
179
+ thread_name=threading.current_thread().name,
180
+ )
181
+ return self._thread_local.loop
182
+
183
+ def close_thread_event_loop(self):
184
+ """
185
+ Close the thread-local event loop if it exists.
186
+
187
+ Call this when the thread is done publishing events (e.g., at end of
188
+ Agno streaming execution) to properly clean up resources.
189
+ """
190
+ if hasattr(self._thread_local, 'loop') and self._thread_local.loop is not None:
191
+ loop = self._thread_local.loop
192
+ if not loop.is_closed():
193
+ try:
194
+ # Cancel any pending tasks
195
+ pending = asyncio.all_tasks(loop)
196
+ for task in pending:
197
+ task.cancel()
198
+
199
+ # Run loop until all tasks are cancelled
200
+ if pending:
201
+ loop.run_until_complete(
202
+ asyncio.gather(*pending, return_exceptions=True)
203
+ )
204
+
205
+ loop.close()
206
+ logger.debug(
207
+ "closed_thread_local_event_loop",
208
+ thread_id=threading.current_thread().ident,
209
+ pending_tasks_cancelled=len(pending) if pending else 0,
210
+ )
211
+ except Exception as e:
212
+ logger.warning(
213
+ "thread_event_loop_close_error",
214
+ error=str(e),
215
+ thread_id=threading.current_thread().ident,
216
+ )
217
+ self._thread_local.loop = None
218
+
219
+ # =========================================================================
220
+ # SSE Stream Completion Tracking (for single execution mode)
221
+ # =========================================================================
222
+
223
+ def register_execution_for_sse_tracking(self, execution_id: str):
224
+ """
225
+ Register an execution for SSE completion tracking.
226
+
227
+ Call this when an execution starts so the worker can later wait for
228
+ the SSE stream to complete before shutting down.
229
+
230
+ Args:
231
+ execution_id: The execution ID to track
232
+ """
233
+ if execution_id not in self._sse_stream_completed:
234
+ self._sse_stream_completed[execution_id] = asyncio.Event()
235
+ logger.debug(
236
+ "sse_tracking_registered",
237
+ execution_id=execution_id[:8] if execution_id else None
238
+ )
239
+
240
+ def mark_sse_stream_completed(self, execution_id: str):
241
+ """
242
+ Signal that SSE streaming has completed for an execution.
243
+
244
+ Call this from the SSE streamer after sending the 'done' event.
245
+
246
+ Args:
247
+ execution_id: The execution ID whose SSE stream completed
248
+ """
249
+ if execution_id in self._sse_stream_completed:
250
+ self._sse_stream_completed[execution_id].set()
251
+ logger.info(
252
+ "sse_stream_marked_completed",
253
+ execution_id=execution_id[:8] if execution_id else None
254
+ )
255
+ else:
256
+ # Auto-register and mark complete if not pre-registered
257
+ self._sse_stream_completed[execution_id] = asyncio.Event()
258
+ self._sse_stream_completed[execution_id].set()
259
+ logger.debug(
260
+ "sse_stream_marked_completed_auto_registered",
261
+ execution_id=execution_id[:8] if execution_id else None
262
+ )
263
+
264
+ async def wait_for_sse_stream_completion(
265
+ self,
266
+ execution_id: str,
267
+ timeout: float = 30.0
268
+ ) -> bool:
269
+ """
270
+ Wait for SSE stream to complete, with timeout.
271
+
272
+ Call this from the single execution monitor before shutting down
273
+ to ensure all SSE events have been sent to the client.
274
+
275
+ Args:
276
+ execution_id: The execution ID to wait for
277
+ timeout: Maximum seconds to wait (default: 30s)
278
+
279
+ Returns:
280
+ True if SSE stream completed, False if timeout reached
281
+ """
282
+ # Auto-register if not already tracked
283
+ if execution_id not in self._sse_stream_completed:
284
+ self._sse_stream_completed[execution_id] = asyncio.Event()
285
+
286
+ try:
287
+ await asyncio.wait_for(
288
+ self._sse_stream_completed[execution_id].wait(),
289
+ timeout=timeout
290
+ )
291
+ logger.info(
292
+ "sse_stream_wait_completed",
293
+ execution_id=execution_id[:8] if execution_id else None
294
+ )
295
+ return True
296
+ except asyncio.TimeoutError:
297
+ logger.warning(
298
+ "sse_stream_wait_timeout",
299
+ execution_id=execution_id[:8] if execution_id else None,
300
+ timeout_seconds=timeout
301
+ )
302
+ return False
303
+
304
+ def cleanup_sse_tracking(self, execution_id: str):
305
+ """
306
+ Clean up SSE tracking for an execution.
307
+
308
+ Call this after the execution is fully complete and the worker
309
+ has confirmed SSE streaming is done.
310
+
311
+ Args:
312
+ execution_id: The execution ID to clean up
313
+ """
314
+ if execution_id in self._sse_stream_completed:
315
+ del self._sse_stream_completed[execution_id]
316
+ logger.debug(
317
+ "sse_tracking_cleaned_up",
318
+ execution_id=execution_id[:8] if execution_id else None
319
+ )
320
+
321
+ async def initialize_event_bus(self):
322
+ """Initialize event bus manager asynchronously with connection testing."""
323
+ if self.event_bus_manager and not self.event_bus_manager.is_initialized():
324
+ try:
325
+ await self.event_bus_manager.initialize()
326
+
327
+ # Test provider connectivity (especially Redis)
328
+ provider_health = {}
329
+ for provider_name, provider in self.event_bus_manager.providers.items():
330
+ try:
331
+ health = await provider.health_check()
332
+ provider_health[provider_name] = health.get("healthy", False)
333
+ except Exception as e:
334
+ logger.warning(
335
+ "provider_health_check_failed",
336
+ provider=provider_name,
337
+ error=str(e),
338
+ worker_id=self.worker_id[:8] if self.worker_id else "unknown"
339
+ )
340
+ provider_health[provider_name] = False
341
+
342
+ # Log provider status
343
+ healthy_providers = [name for name, healthy in provider_health.items() if healthy]
344
+ unhealthy_providers = [name for name, healthy in provider_health.items() if not healthy]
345
+
346
+ if healthy_providers:
347
+ logger.info(
348
+ "worker_event_bus_ready",
349
+ worker_id=self.worker_id[:8] if self.worker_id else "unknown",
350
+ providers=self.event_bus_manager.get_provider_names(),
351
+ healthy_providers=healthy_providers,
352
+ unhealthy_providers=unhealthy_providers if unhealthy_providers else None
353
+ )
354
+
355
+ # If Redis failed but was configured, log warning about falling back to HTTP
356
+ if "redis" in unhealthy_providers:
357
+ logger.warning(
358
+ "redis_connection_failed_will_fallback",
359
+ worker_id=self.worker_id[:8] if self.worker_id else "unknown",
360
+ message="Redis unavailable, will fallback to HTTP endpoint for event streaming"
361
+ )
362
+ else:
363
+ logger.warning(
364
+ "all_event_bus_providers_unhealthy",
365
+ worker_id=self.worker_id[:8] if self.worker_id else "unknown",
366
+ providers=list(provider_health.keys()),
367
+ message="Will fallback to HTTP endpoint for event streaming"
368
+ )
369
+
370
+ except Exception as e:
371
+ logger.error(
372
+ "worker_event_bus_init_failed",
373
+ error=str(e),
374
+ worker_id=self.worker_id[:8] if self.worker_id else "unknown"
375
+ )
376
+ # Don't fail initialization - just won't use event bus
377
+ self.event_bus_manager = None
378
+
379
+ async def aclose(self):
380
+ """Async cleanup for async client and event bus."""
381
+ try:
382
+ # Shutdown event bus first
383
+ if self.event_bus_manager:
384
+ await self.event_bus_manager.shutdown()
385
+ logger.info("worker_event_bus_shutdown", worker_id=self.worker_id[:8] if self.worker_id else "unknown")
386
+
387
+ # Then close async client
388
+ await self._async_client.aclose()
389
+ except:
390
+ pass
391
+
392
+ async def start_websocket(self):
393
+ """Start WebSocket client if enabled."""
394
+ if self.websocket_client:
395
+ await self.websocket_client.start()
396
+ logger.info("websocket_started")
397
+
398
+ async def stop_websocket(self):
399
+ """Stop WebSocket client if running."""
400
+ if self.websocket_client:
401
+ await self.websocket_client.stop()
402
+ logger.info("websocket_stopped")
403
+
404
+ def _get_running_loop_safe(self) -> Optional[asyncio.AbstractEventLoop]:
405
+ """
406
+ Safely get the running event loop if one exists.
407
+
408
+ Returns:
409
+ The running event loop, or None if not in an async context
410
+ """
411
+ try:
412
+ return asyncio.get_running_loop()
413
+ except RuntimeError:
414
+ # No running loop in this thread
415
+ return None
416
+
417
+ def publish_event(
418
+ self,
419
+ execution_id: str,
420
+ event_type: str,
421
+ data: Dict[str, Any],
422
+ ) -> bool:
423
+ """
424
+ Publish a streaming event for real-time UI updates (SYNC version).
425
+
426
+ NOTE: This is the BLOCKING version. For real-time streaming,
427
+ use publish_event_async() instead to avoid blocking the event loop.
428
+
429
+ IMPORTANT: This method now auto-detects if it's being called from within
430
+ an async context (like Claude Code SDK hooks) and schedules tasks
431
+ appropriately to avoid "Cannot run the event loop while another loop
432
+ is running" errors.
433
+
434
+ Strategy (in order):
435
+ 1. Try Event Bus (multi-provider) if configured [DEFAULT: includes Redis for fast path]
436
+ 2. Try WebSocket if connected
437
+ 3. Fallback to HTTP endpoint
438
+
439
+ Args:
440
+ execution_id: Execution ID
441
+ event_type: Event type (message_chunk, tool_started, etc.)
442
+ data: Event payload
443
+
444
+ Returns:
445
+ True if successful, False otherwise
446
+ """
447
+ # Check if we're in an async context
448
+ running_loop = self._get_running_loop_safe()
449
+ in_async_context = running_loop is not None
450
+
451
+ # Strategy 1: Try Event Bus first (Redis is auto-configured by default)
452
+ if self.event_bus_manager and self.event_bus_manager.is_initialized():
453
+ metadata = {}
454
+ if self.worker_id:
455
+ metadata["worker_id"] = self.worker_id
456
+
457
+ if in_async_context:
458
+ # We're in an async context - schedule task directly without creating coroutine first
459
+ try:
460
+ # Create and schedule the task in one go
461
+ coro = self.event_bus_manager.publish_event(
462
+ execution_id=execution_id,
463
+ event_type=event_type,
464
+ data=data,
465
+ metadata=metadata
466
+ )
467
+ task = running_loop.create_task(coro)
468
+
469
+ # Add error callback
470
+ def handle_task_error(t):
471
+ try:
472
+ exc = t.exception()
473
+ if exc:
474
+ logger.warning(
475
+ "background_event_bus_task_error",
476
+ error=str(exc),
477
+ execution_id=execution_id[:8],
478
+ event_type=event_type,
479
+ )
480
+ except asyncio.CancelledError:
481
+ pass
482
+ except Exception:
483
+ pass
484
+
485
+ task.add_done_callback(handle_task_error)
486
+ logger.debug(
487
+ "worker_event_scheduled_via_event_bus_async",
488
+ execution_id=execution_id[:8],
489
+ event_type=event_type,
490
+ note="Task scheduled in running event loop"
491
+ )
492
+ return True
493
+ except Exception as e:
494
+ logger.warning(
495
+ "failed_to_schedule_event_bus_task",
496
+ error=str(e),
497
+ execution_id=execution_id[:8],
498
+ event_type=event_type,
499
+ )
500
+ # Fall through to fallback strategies
501
+ else:
502
+ # Not in async context - use blocking execution
503
+ try:
504
+ loop = self._get_thread_event_loop()
505
+ coro = self.event_bus_manager.publish_event(
506
+ execution_id=execution_id,
507
+ event_type=event_type,
508
+ data=data,
509
+ metadata=metadata
510
+ )
511
+
512
+ try:
513
+ results = loop.run_until_complete(coro)
514
+ except RuntimeError as re:
515
+ # Handle nested event loop case
516
+ if "Cannot run the event loop while another loop is running" in str(re):
517
+ logger.warning(
518
+ "nested_event_loop_detected",
519
+ execution_id=execution_id[:8],
520
+ event_type=event_type,
521
+ note="Skipping event bus publish due to nested loop"
522
+ )
523
+ coro.close()
524
+ # Fall through to fallback strategies
525
+ results = None
526
+ else:
527
+ coro.close()
528
+ raise
529
+
530
+ if results is not None:
531
+ # Success if any provider succeeded
532
+ success_count = sum(1 for success in results.values() if success)
533
+ if success_count > 0:
534
+ logger.debug(
535
+ "worker_event_published_via_event_bus_sync",
536
+ execution_id=execution_id[:8],
537
+ event_type=event_type,
538
+ success_count=success_count,
539
+ total_providers=len(results)
540
+ )
541
+ return True
542
+ else:
543
+ logger.warning(
544
+ "worker_event_bus_all_providers_failed_fallback_sync",
545
+ execution_id=execution_id[:8],
546
+ event_type=event_type,
547
+ results=results
548
+ )
549
+ # Fall through to WebSocket/HTTP fallback
550
+ except Exception as e:
551
+ logger.error(
552
+ "worker_event_bus_publish_error_sync",
553
+ error=str(e),
554
+ execution_id=execution_id[:8],
555
+ event_type=event_type
556
+ )
557
+ # Fall through to WebSocket/HTTP fallback
558
+
559
+ # Strategy 2: Try WebSocket if available and connected
560
+ if self.websocket_client and self.websocket_client.is_connected():
561
+ if in_async_context:
562
+ # Schedule WebSocket send as a task
563
+ try:
564
+ coro = self.websocket_client.send_event(execution_id, event_type, data)
565
+ task = running_loop.create_task(coro)
566
+ logger.debug(
567
+ "worker_event_scheduled_via_websocket_async",
568
+ execution_id=execution_id[:8],
569
+ event_type=event_type
570
+ )
571
+ return True
572
+ except Exception as e:
573
+ logger.warning(
574
+ "failed_to_schedule_websocket_task",
575
+ error=str(e),
576
+ execution_id=execution_id[:8],
577
+ )
578
+ # Fall through to HTTP fallback
579
+ else:
580
+ # WebSocket send_event is async, need to run it in event loop
581
+ try:
582
+ loop = self._get_thread_event_loop()
583
+ coro = self.websocket_client.send_event(execution_id, event_type, data)
584
+
585
+ try:
586
+ success = loop.run_until_complete(coro)
587
+ except RuntimeError as re:
588
+ if "Cannot run the event loop while another loop is running" in str(re):
589
+ coro.close()
590
+ success = None
591
+ else:
592
+ coro.close()
593
+ raise
594
+
595
+ if success:
596
+ logger.debug(
597
+ "worker_event_published_via_websocket_sync",
598
+ execution_id=execution_id[:8],
599
+ event_type=event_type
600
+ )
601
+ return True
602
+
603
+ # Queue full - fallback to HTTP immediately
604
+ if success is not None:
605
+ logger.warning("websocket_queue_full_fallback_http_sync", execution_id=execution_id[:8])
606
+ except Exception as e:
607
+ logger.error(
608
+ "websocket_publish_error_sync",
609
+ error=str(e),
610
+ execution_id=execution_id[:8]
611
+ )
612
+ # Fall through to HTTP fallback
613
+
614
+ # Strategy 3: Fallback to HTTP
615
+ logger.debug(
616
+ "worker_event_publishing_via_http_fallback_sync",
617
+ execution_id=execution_id[:8],
618
+ event_type=event_type
619
+ )
620
+
621
+ try:
622
+ # Sanitize data to remove non-JSON-serializable objects
623
+ import json
624
+ import asyncio
625
+
626
+ def sanitize_value(val):
627
+ """Remove non-JSON-serializable objects"""
628
+ try:
629
+ # Fast path for JSON primitives
630
+ if val is None or isinstance(val, (bool, int, float, str)):
631
+ return val
632
+
633
+ # Check type name to avoid event loop issues
634
+ type_name = type(val).__name__
635
+ type_module = str(type(val).__module__)
636
+
637
+ # Remove asyncio objects by checking module and type name
638
+ if 'asyncio' in type_module or any(x in type_name for x in ['Event', 'Lock', 'Queue', 'Semaphore', 'Condition']):
639
+ return f"<{type_name}>"
640
+ elif isinstance(val, dict):
641
+ return {k: sanitize_value(v) for k, v in val.items()}
642
+ elif isinstance(val, (list, tuple)):
643
+ return [sanitize_value(v) for v in val]
644
+ else:
645
+ try:
646
+ json.dumps(val)
647
+ return val
648
+ except (TypeError, ValueError, RuntimeError):
649
+ # RuntimeError catches "bound to different event loop" errors
650
+ return f"<non-serializable: {type_name}>"
651
+ except Exception as e:
652
+ # Catch-all for ANY errors during sanitization itself
653
+ # Do NOT attempt to inspect the value here - it may cause event loop errors
654
+ return "<sanitization-error>"
655
+
656
+ sanitized_data = sanitize_value(data)
657
+
658
+ url = f"{self.base_url}/api/v1/executions/{execution_id}/events"
659
+ payload = {
660
+ "event_type": event_type,
661
+ "data": sanitized_data,
662
+ "timestamp": datetime.now(timezone.utc).isoformat(),
663
+ }
664
+
665
+ # Double-check: Try to serialize the payload before sending
666
+ try:
667
+ json.dumps(payload)
668
+ except Exception as serialize_err:
669
+ logger.error(
670
+ "payload_serialization_test_failed",
671
+ execution_id=execution_id[:8],
672
+ event_type=event_type,
673
+ error=str(serialize_err)[:200],
674
+ )
675
+ # If we can't serialize it, don't even try to send
676
+ return False
677
+
678
+ response = self._client.post(url, json=payload, headers=self.headers)
679
+
680
+ if response.status_code not in (200, 202):
681
+ logger.warning(
682
+ "event_publish_failed",
683
+ status=response.status_code,
684
+ execution_id=execution_id[:8],
685
+ event_type=event_type,
686
+ )
687
+ return False
688
+
689
+ return True
690
+
691
+ except Exception as e:
692
+ # Sanitize error message to avoid serialization issues
693
+ import re
694
+ error_str = str(e) or "(empty)"
695
+ error_type = type(e).__name__
696
+ # Remove asyncio object references that cause serialization errors
697
+ error_str = re.sub(r'<asyncio\.\w+\.\w+ object at 0x[0-9a-f]+ \[[\w\s]+\]>', '[asyncio-object]', error_str)
698
+
699
+ logger.warning(
700
+ "event_publish_error",
701
+ error=error_str[:500], # Truncate to prevent huge error messages
702
+ error_type=error_type,
703
+ execution_id=execution_id[:8],
704
+ event_type=event_type,
705
+ )
706
+ return False
707
+
708
+ async def publish_event_async(
709
+ self,
710
+ execution_id: str,
711
+ event_type: str,
712
+ data: Dict[str, Any],
713
+ ) -> bool:
714
+ """
715
+ Publish a streaming event for real-time UI updates (ASYNC version).
716
+
717
+ Strategy (in order):
718
+ 1. Try Event Bus (multi-provider) if configured [DEFAULT: includes Redis for fast path]
719
+ 2. Try WebSocket if connected
720
+ 3. Fallback to HTTP endpoint (control plane handles Redis internally)
721
+
722
+ By default, workers receive Redis credentials during registration and
723
+ publish directly to Redis (fast path). If Redis is unavailable, falls
724
+ back to HTTP endpoint.
725
+
726
+ Args:
727
+ execution_id: Execution ID
728
+ event_type: Event type (message_chunk, tool_started, etc.)
729
+ data: Event payload
730
+
731
+ Returns:
732
+ True if at least one provider succeeded, False otherwise
733
+ """
734
+ # Strategy 1: Try Event Bus first (Redis is auto-configured by default)
735
+ if self.event_bus_manager and self.event_bus_manager.is_initialized():
736
+ try:
737
+ metadata = {}
738
+ if self.worker_id:
739
+ metadata["worker_id"] = self.worker_id
740
+
741
+ results = await self.event_bus_manager.publish_event(
742
+ execution_id=execution_id,
743
+ event_type=event_type,
744
+ data=data,
745
+ metadata=metadata
746
+ )
747
+
748
+ # Success if any provider succeeded
749
+ success_count = sum(1 for success in results.values() if success)
750
+ if success_count > 0:
751
+ logger.debug(
752
+ "worker_event_published_via_event_bus",
753
+ execution_id=execution_id[:8],
754
+ event_type=event_type,
755
+ success_count=success_count,
756
+ total_providers=len(results)
757
+ )
758
+ return True
759
+ else:
760
+ logger.warning(
761
+ "worker_event_bus_all_providers_failed_fallback",
762
+ execution_id=execution_id[:8],
763
+ event_type=event_type,
764
+ results=results
765
+ )
766
+ # Fall through to WebSocket/HTTP fallback
767
+ except Exception as e:
768
+ logger.error(
769
+ "worker_event_bus_publish_error",
770
+ error=str(e),
771
+ execution_id=execution_id[:8],
772
+ event_type=event_type
773
+ )
774
+ # Fall through to WebSocket/HTTP fallback
775
+
776
+ # Strategy 2: Try WebSocket if available and connected
777
+ if self.websocket_client and self.websocket_client.is_connected():
778
+ success = await self.websocket_client.send_event(execution_id, event_type, data)
779
+ if success:
780
+ logger.debug(
781
+ "worker_event_published_via_websocket",
782
+ execution_id=execution_id[:8],
783
+ event_type=event_type
784
+ )
785
+ return True
786
+
787
+ # Queue full - fallback to HTTP immediately
788
+ logger.warning("websocket_queue_full_fallback_http", execution_id=execution_id[:8])
789
+
790
+ # Strategy 3: Fallback to HTTP
791
+ logger.debug(
792
+ "worker_event_publishing_via_http_fallback",
793
+ execution_id=execution_id[:8],
794
+ event_type=event_type
795
+ )
796
+ return await self._publish_event_http(execution_id, event_type, data)
797
+
798
+ async def _publish_event_http(
799
+ self,
800
+ execution_id: str,
801
+ event_type: str,
802
+ data: Dict[str, Any],
803
+ ) -> bool:
804
+ """
805
+ Publish event via HTTP (internal method for fallback).
806
+
807
+ Args:
808
+ execution_id: Execution ID
809
+ event_type: Event type
810
+ data: Event payload
811
+
812
+ Returns:
813
+ True if successful, False otherwise
814
+ """
815
+ try:
816
+ # Sanitize data to remove non-JSON-serializable objects
817
+ import json
818
+ import asyncio
819
+
820
+ def sanitize_value(val):
821
+ """Remove non-JSON-serializable objects"""
822
+ try:
823
+ # Fast path for JSON primitives
824
+ if val is None or isinstance(val, (bool, int, float, str)):
825
+ return val
826
+
827
+ # Check type name to avoid event loop issues
828
+ type_name = type(val).__name__
829
+ type_module = str(type(val).__module__)
830
+
831
+ # Remove asyncio objects by checking module and type name
832
+ if 'asyncio' in type_module or any(x in type_name for x in ['Event', 'Lock', 'Queue', 'Semaphore', 'Condition']):
833
+ return f"<{type_name}>"
834
+ elif isinstance(val, dict):
835
+ return {k: sanitize_value(v) for k, v in val.items()}
836
+ elif isinstance(val, (list, tuple)):
837
+ return [sanitize_value(v) for v in val]
838
+ else:
839
+ try:
840
+ json.dumps(val)
841
+ return val
842
+ except (TypeError, ValueError, RuntimeError):
843
+ # RuntimeError catches "bound to different event loop" errors
844
+ return f"<non-serializable: {type_name}>"
845
+ except Exception as e:
846
+ # Catch-all for ANY errors during sanitization itself
847
+ # Do NOT attempt to inspect the value here - it may cause event loop errors
848
+ return "<sanitization-error>"
849
+
850
+ sanitized_data = sanitize_value(data)
851
+
852
+ url = f"{self.base_url}/api/v1/executions/{execution_id}/events"
853
+ payload = {
854
+ "event_type": event_type,
855
+ "data": sanitized_data,
856
+ "timestamp": datetime.now(timezone.utc).isoformat(),
857
+ }
858
+
859
+ # Double-check: Try to serialize the payload before sending
860
+ try:
861
+ json.dumps(payload)
862
+ except Exception as serialize_err:
863
+ logger.error(
864
+ "payload_serialization_test_failed",
865
+ execution_id=execution_id[:8],
866
+ event_type=event_type,
867
+ error=str(serialize_err)[:200],
868
+ )
869
+ # If we can't serialize it, don't even try to send
870
+ return False
871
+
872
+ response = await self._async_client.post(url, json=payload, headers=self.headers)
873
+
874
+ if response.status_code not in (200, 202):
875
+ logger.warning(
876
+ "event_publish_failed",
877
+ status=response.status_code,
878
+ execution_id=execution_id[:8],
879
+ event_type=event_type,
880
+ )
881
+ return False
882
+
883
+ return True
884
+
885
+ except Exception as e:
886
+ # Sanitize error message to avoid serialization issues
887
+ import re
888
+ error_str = str(e) or "(empty)"
889
+ error_type = type(e).__name__
890
+ # Remove asyncio object references that cause serialization errors
891
+ error_str = re.sub(r'<asyncio\.\w+\.\w+ object at 0x[0-9a-f]+ \[[\w\s]+\]>', '[asyncio-object]', error_str)
892
+
893
+ logger.warning(
894
+ "event_publish_error",
895
+ error=error_str[:500], # Truncate to prevent huge error messages
896
+ error_type=error_type,
897
+ execution_id=execution_id[:8],
898
+ event_type=event_type,
899
+ )
900
+ return False
901
+
902
+ def cache_metadata(
903
+ self,
904
+ execution_id: str,
905
+ execution_type: str,
906
+ ) -> bool:
907
+ """
908
+ Cache execution metadata in Redis for fast SSE lookups.
909
+
910
+ This eliminates the need for database queries on every SSE connection.
911
+
912
+ Args:
913
+ execution_id: Execution ID
914
+ execution_type: "AGENT" or "TEAM"
915
+
916
+ Returns:
917
+ True if successful, False otherwise
918
+ """
919
+ return self.publish_event(
920
+ execution_id=execution_id,
921
+ event_type="metadata",
922
+ data={"execution_type": execution_type},
923
+ )
924
+
925
+ def get_session(
926
+ self,
927
+ execution_id: str,
928
+ session_id: Optional[str] = None,
929
+ ) -> Optional[Dict[str, Any]]:
930
+ """
931
+ Retrieve session history from Control Plane database.
932
+
933
+ This loads conversation history so workers can restore context
934
+ across multiple execution turns.
935
+
936
+ Args:
937
+ execution_id: Execution ID
938
+ session_id: Session ID (defaults to execution_id if not provided)
939
+
940
+ Returns:
941
+ Dict with session data including messages, or None if not found
942
+ """
943
+ try:
944
+ session_id = session_id or execution_id
945
+ url = f"{self.base_url}/api/v1/executions/{execution_id}/session"
946
+
947
+ response = self._client.get(url, headers=self.headers)
948
+
949
+ if response.status_code == 200:
950
+ session_data = response.json()
951
+ logger.info(
952
+ "session_loaded",
953
+ execution_id=execution_id[:8],
954
+ message_count=len(session_data.get("messages", [])),
955
+ )
956
+ return session_data
957
+ elif response.status_code == 404:
958
+ logger.info(
959
+ "session_not_found",
960
+ execution_id=execution_id[:8],
961
+ )
962
+ return None
963
+ else:
964
+ logger.warning(
965
+ "session_load_failed",
966
+ status=response.status_code,
967
+ execution_id=execution_id[:8],
968
+ )
969
+ return None
970
+
971
+ except Exception as e:
972
+ logger.warning(
973
+ "session_load_error",
974
+ error=str(e),
975
+ execution_id=execution_id[:8],
976
+ )
977
+ return None
978
+
979
+ def persist_session(
980
+ self,
981
+ execution_id: str,
982
+ session_id: str,
983
+ user_id: Optional[str],
984
+ messages: List[Dict[str, Any]],
985
+ metadata: Optional[Dict[str, Any]] = None,
986
+ ) -> bool:
987
+ """
988
+ Persist session history to Control Plane database.
989
+
990
+ This ensures history is available even when worker is offline.
991
+
992
+ Args:
993
+ execution_id: Execution ID
994
+ session_id: Session ID
995
+ user_id: User ID
996
+ messages: List of session messages
997
+ metadata: Optional metadata
998
+
999
+ Returns:
1000
+ True if successful, False otherwise
1001
+ """
1002
+ try:
1003
+ url = f"{self.base_url}/api/v1/executions/{execution_id}/session"
1004
+ payload = {
1005
+ "session_id": session_id,
1006
+ "user_id": user_id,
1007
+ "messages": messages,
1008
+ "metadata": metadata or {},
1009
+ }
1010
+
1011
+ response = self._client.post(url, json=payload, headers=self.headers)
1012
+
1013
+ if response.status_code in (200, 201):
1014
+ logger.info(
1015
+ "session_persisted",
1016
+ execution_id=execution_id[:8],
1017
+ message_count=len(messages),
1018
+ )
1019
+ return True
1020
+ else:
1021
+ logger.warning(
1022
+ "session_persistence_failed",
1023
+ status=response.status_code,
1024
+ execution_id=execution_id[:8],
1025
+ )
1026
+ return False
1027
+
1028
+ except Exception as e:
1029
+ logger.warning(
1030
+ "session_persistence_error",
1031
+ error=str(e),
1032
+ execution_id=execution_id[:8],
1033
+ )
1034
+ return False
1035
+
1036
+ def get_skills(
1037
+ self,
1038
+ agent_id: str,
1039
+ ) -> List[Dict[str, Any]]:
1040
+ """
1041
+ Fetch resolved skills for an agent from Control Plane.
1042
+
1043
+ This endpoint returns skills merged from all layers:
1044
+ - All agent environments (many-to-many)
1045
+ - Team skills (if agent has team)
1046
+ - All team environments (many-to-many)
1047
+ - Agent's own skills
1048
+
1049
+ Args:
1050
+ agent_id: Agent ID
1051
+
1052
+ Returns:
1053
+ List of skill configurations with source and inheritance info
1054
+ """
1055
+ try:
1056
+ url = f"{self.base_url}/api/v1/skills/associations/agents/{agent_id}/skills/resolved"
1057
+ response = self._client.get(url, headers=self.headers)
1058
+
1059
+ if response.status_code == 200:
1060
+ skills = response.json()
1061
+ logger.info(
1062
+ "skills_fetched",
1063
+ agent_id=agent_id[:8],
1064
+ skill_count=len(skills),
1065
+ )
1066
+ return skills
1067
+ else:
1068
+ logger.warning(
1069
+ "skills_fetch_failed",
1070
+ status=response.status_code,
1071
+ agent_id=agent_id[:8],
1072
+ )
1073
+ return []
1074
+
1075
+ except Exception as e:
1076
+ logger.warning(
1077
+ "skills_fetch_error",
1078
+ error=str(e),
1079
+ agent_id=agent_id[:8],
1080
+ )
1081
+ return []
1082
+
1083
+ def get_team_skills(
1084
+ self,
1085
+ team_id: str,
1086
+ ) -> List[Dict[str, Any]]:
1087
+ """
1088
+ Fetch resolved skills for a team from Control Plane.
1089
+
1090
+ This endpoint returns skills merged from all layers:
1091
+ - All team environments (many-to-many)
1092
+ - Team's own skills
1093
+
1094
+ Args:
1095
+ team_id: Team ID
1096
+
1097
+ Returns:
1098
+ List of skill configurations with source and inheritance info
1099
+ """
1100
+ try:
1101
+ url = f"{self.base_url}/api/v1/skills/associations/teams/{team_id}/skills/resolved"
1102
+ response = self._client.get(url, headers=self.headers)
1103
+
1104
+ if response.status_code == 200:
1105
+ skills = response.json()
1106
+ logger.info(
1107
+ "team_skills_fetched",
1108
+ team_id=team_id[:8],
1109
+ skill_count=len(skills),
1110
+ )
1111
+ return skills
1112
+ else:
1113
+ logger.warning(
1114
+ "team_skills_fetch_failed",
1115
+ status=response.status_code,
1116
+ team_id=team_id[:8],
1117
+ )
1118
+ return []
1119
+
1120
+ except Exception as e:
1121
+ logger.warning(
1122
+ "team_skills_fetch_error",
1123
+ error=str(e),
1124
+ team_id=team_id[:8],
1125
+ )
1126
+ return []
1127
+
1128
+ def get_agent_execution_environment(
1129
+ self,
1130
+ agent_id: str,
1131
+ ) -> Dict[str, str]:
1132
+ """
1133
+ Fetch resolved execution environment for an agent from Control Plane.
1134
+
1135
+ This endpoint returns a fully resolved environment variable dict with:
1136
+ - Custom env vars from agent configuration
1137
+ - Secret values (resolved from Kubiya vault)
1138
+ - Integration tokens (resolved and mapped to env var names like GH_TOKEN, JIRA_TOKEN)
1139
+
1140
+ Args:
1141
+ agent_id: Agent ID
1142
+
1143
+ Returns:
1144
+ Dict of environment variables ready to inject into agent execution
1145
+ """
1146
+ try:
1147
+ url = f"{self.base_url}/api/v1/execution-environment/agents/{agent_id}/resolved"
1148
+ response = self._client.get(url, headers=self.headers)
1149
+
1150
+ if response.status_code == 200:
1151
+ env_vars = response.json()
1152
+ logger.info(
1153
+ "agent_execution_environment_fetched",
1154
+ agent_id=agent_id[:8],
1155
+ env_var_count=len(env_vars),
1156
+ env_var_keys=list(env_vars.keys()),
1157
+ )
1158
+ return env_vars
1159
+ else:
1160
+ logger.warning(
1161
+ "agent_execution_environment_fetch_failed",
1162
+ status=response.status_code,
1163
+ agent_id=agent_id[:8],
1164
+ )
1165
+ return {}
1166
+
1167
+ except Exception as e:
1168
+ logger.warning(
1169
+ "agent_execution_environment_fetch_error",
1170
+ error=str(e),
1171
+ agent_id=agent_id[:8],
1172
+ )
1173
+ return {}
1174
+
1175
+ def get_team_execution_environment(
1176
+ self,
1177
+ team_id: str,
1178
+ ) -> Dict[str, str]:
1179
+ """
1180
+ Fetch resolved execution environment for a team from Control Plane.
1181
+
1182
+ This endpoint returns a fully resolved environment variable dict with:
1183
+ - Custom env vars from team configuration
1184
+ - Secret values (resolved from Kubiya vault)
1185
+ - Integration tokens (resolved and mapped to env var names like GH_TOKEN, JIRA_TOKEN)
1186
+
1187
+ Args:
1188
+ team_id: Team ID
1189
+
1190
+ Returns:
1191
+ Dict of environment variables ready to inject into team execution
1192
+ """
1193
+ try:
1194
+ url = f"{self.base_url}/api/v1/execution-environment/teams/{team_id}/resolved"
1195
+ response = self._client.get(url, headers=self.headers)
1196
+
1197
+ if response.status_code == 200:
1198
+ env_vars = response.json()
1199
+ logger.info(
1200
+ "team_execution_environment_fetched",
1201
+ team_id=team_id[:8],
1202
+ env_var_count=len(env_vars),
1203
+ env_var_keys=list(env_vars.keys()),
1204
+ )
1205
+ return env_vars
1206
+ else:
1207
+ logger.warning(
1208
+ "team_execution_environment_fetch_failed",
1209
+ status=response.status_code,
1210
+ team_id=team_id[:8],
1211
+ )
1212
+ return {}
1213
+
1214
+ except Exception as e:
1215
+ logger.warning(
1216
+ "team_execution_environment_fetch_error",
1217
+ error=str(e),
1218
+ team_id=team_id[:8],
1219
+ )
1220
+ return {}
1221
+
1222
+ async def create_job_execution_record(
1223
+ self,
1224
+ execution_id: str,
1225
+ job_id: Optional[str],
1226
+ organization_id: str,
1227
+ entity_type: str,
1228
+ entity_id: Optional[str],
1229
+ prompt: str,
1230
+ trigger_type: str,
1231
+ trigger_metadata: Dict[str, Any],
1232
+ ) -> Dict[str, Any]:
1233
+ """
1234
+ Create execution and job_executions records for a scheduled job.
1235
+
1236
+ This calls the Control Plane API to create execution records
1237
+ instead of directly accessing Supabase.
1238
+
1239
+ Args:
1240
+ execution_id: Execution ID
1241
+ job_id: Job ID (optional)
1242
+ organization_id: Organization ID
1243
+ entity_type: "agent" or "team"
1244
+ entity_id: Agent or team ID
1245
+ prompt: Prompt text
1246
+ trigger_type: "cron", "webhook", or "manual"
1247
+ trigger_metadata: Additional trigger metadata
1248
+
1249
+ Returns:
1250
+ Dict with execution_id, status, and created_at
1251
+ """
1252
+ try:
1253
+ url = f"{self.base_url}/api/v1/executions/create"
1254
+ payload = {
1255
+ "execution_id": execution_id,
1256
+ "job_id": job_id,
1257
+ "organization_id": organization_id,
1258
+ "entity_type": entity_type,
1259
+ "entity_id": entity_id,
1260
+ "prompt": prompt,
1261
+ "trigger_type": trigger_type,
1262
+ "trigger_metadata": trigger_metadata,
1263
+ }
1264
+
1265
+ response = await self._async_client.post(url, json=payload, headers=self.headers)
1266
+
1267
+ if response.status_code == 201:
1268
+ result = response.json()
1269
+ logger.info(
1270
+ "job_execution_record_created",
1271
+ execution_id=execution_id[:8],
1272
+ job_id=job_id[:8] if job_id else None,
1273
+ )
1274
+ return result
1275
+ else:
1276
+ logger.error(
1277
+ "job_execution_record_creation_failed",
1278
+ status=response.status_code,
1279
+ execution_id=execution_id[:8],
1280
+ response=response.text,
1281
+ )
1282
+ raise Exception(f"Failed to create execution record: HTTP {response.status_code}")
1283
+
1284
+ except Exception as e:
1285
+ logger.error(
1286
+ "job_execution_record_creation_error",
1287
+ error=str(e),
1288
+ execution_id=execution_id[:8],
1289
+ )
1290
+ raise
1291
+
1292
+ async def update_job_execution_status(
1293
+ self,
1294
+ execution_id: str,
1295
+ job_id: str,
1296
+ status: str,
1297
+ duration_ms: Optional[int] = None,
1298
+ error_message: Optional[str] = None,
1299
+ ) -> Dict[str, Any]:
1300
+ """
1301
+ Update job_executions record with execution results.
1302
+
1303
+ This calls the Control Plane API to update job execution status
1304
+ instead of directly accessing Supabase.
1305
+
1306
+ Args:
1307
+ execution_id: Execution ID
1308
+ job_id: Job ID
1309
+ status: Final status ("completed" or "failed")
1310
+ duration_ms: Execution duration in milliseconds
1311
+ error_message: Error message if failed
1312
+
1313
+ Returns:
1314
+ Dict with job_id, execution_id, and status
1315
+ """
1316
+ try:
1317
+ url = f"{self.base_url}/api/v1/executions/{execution_id}/job/{job_id}/status"
1318
+ payload = {
1319
+ "status": status,
1320
+ "duration_ms": duration_ms,
1321
+ "error_message": error_message,
1322
+ }
1323
+
1324
+ response = await self._async_client.post(url, json=payload, headers=self.headers)
1325
+
1326
+ if response.status_code == 200:
1327
+ result = response.json()
1328
+ logger.info(
1329
+ "job_execution_status_updated",
1330
+ execution_id=execution_id[:8],
1331
+ job_id=job_id[:8],
1332
+ status=status,
1333
+ )
1334
+ return result
1335
+ else:
1336
+ logger.error(
1337
+ "job_execution_status_update_failed",
1338
+ status_code=response.status_code,
1339
+ execution_id=execution_id[:8],
1340
+ job_id=job_id[:8],
1341
+ response=response.text,
1342
+ )
1343
+ raise Exception(f"Failed to update job execution status: HTTP {response.status_code}")
1344
+
1345
+ except Exception as e:
1346
+ logger.error(
1347
+ "job_execution_status_update_error",
1348
+ error=str(e),
1349
+ execution_id=execution_id[:8],
1350
+ job_id=job_id[:8],
1351
+ )
1352
+ raise
1353
+
1354
+ async def _handle_control_message(self, message: Dict[str, Any]):
1355
+ """
1356
+ Handle control messages from control plane via WebSocket.
1357
+
1358
+ This method is called when the worker receives a control message
1359
+ from the control plane (pause, resume, cancel, reload_config).
1360
+
1361
+ Args:
1362
+ message: Control message with command, execution_id, and data
1363
+ """
1364
+ command = message.get("command")
1365
+ execution_id = message.get("execution_id")
1366
+
1367
+ try:
1368
+ # Import Temporal client here to avoid circular import
1369
+ from control_plane_api.app.lib.temporal_client import get_temporal_client
1370
+
1371
+ temporal_client = get_temporal_client()
1372
+ workflow_handle = temporal_client.get_workflow_handle(execution_id)
1373
+
1374
+ if command == "pause":
1375
+ await workflow_handle.signal("pause_execution")
1376
+ logger.info("control_command_executed", command="pause", execution_id=execution_id[:8])
1377
+
1378
+ elif command == "resume":
1379
+ await workflow_handle.signal("resume_execution")
1380
+ logger.info("control_command_executed", command="resume", execution_id=execution_id[:8])
1381
+
1382
+ elif command == "cancel":
1383
+ await workflow_handle.cancel()
1384
+ logger.info("control_command_executed", command="cancel", execution_id=execution_id[:8])
1385
+
1386
+ elif command == "reload_config":
1387
+ # Future: Reload config without restart
1388
+ logger.info("control_command_not_implemented", command="reload_config", execution_id=execution_id[:8])
1389
+
1390
+ else:
1391
+ logger.warning("unknown_control_command", command=command, execution_id=execution_id[:8])
1392
+
1393
+ except Exception as e:
1394
+ logger.error(
1395
+ "control_command_error",
1396
+ error=str(e),
1397
+ command=command,
1398
+ execution_id=execution_id[:8] if execution_id else None
1399
+ )
1400
+
1401
+
1402
+ # Singleton instance
1403
+ _control_plane_client: Optional[ControlPlaneClient] = None
1404
+
1405
+
1406
+ def get_control_plane_client() -> ControlPlaneClient:
1407
+ """
1408
+ Get or create the Control Plane client singleton.
1409
+
1410
+ Reads configuration from environment variables:
1411
+ - CONTROL_PLANE_URL: Control Plane URL
1412
+ - KUBIYA_API_KEY: API key for authentication
1413
+ - REDIS_URL: Redis URL for direct event streaming (from registration)
1414
+ - REDIS_PASSWORD: Redis password if needed (from registration)
1415
+ - REDIS_ENABLED: Whether Redis is enabled (from registration)
1416
+ - WEBSOCKET_ENABLED: Whether WebSocket is enabled (from registration)
1417
+ - WEBSOCKET_URL: WebSocket URL (from registration)
1418
+ - WORKER_ID: Worker ID (from registration)
1419
+ - EVENT_BUS_CONFIG: JSON string with event bus configuration (from registration, optional)
1420
+
1421
+ Returns:
1422
+ ControlPlaneClient instance
1423
+
1424
+ Raises:
1425
+ ValueError: If required environment variables are not set
1426
+ """
1427
+ global _control_plane_client
1428
+
1429
+ if _control_plane_client is None:
1430
+ base_url = os.environ.get("CONTROL_PLANE_URL")
1431
+ api_key = os.environ.get("KUBIYA_API_KEY")
1432
+
1433
+ # WebSocket config from environment (set by worker.py after registration)
1434
+ websocket_enabled = os.environ.get("WEBSOCKET_ENABLED", "false").lower() == "true"
1435
+ websocket_url = os.environ.get("WEBSOCKET_URL")
1436
+ worker_id = os.environ.get("WORKER_ID")
1437
+
1438
+ # Redis config from environment (set by worker.py after registration)
1439
+ # This is the DEFAULT fast path for event streaming
1440
+ redis_url = os.environ.get("REDIS_URL")
1441
+ redis_password = os.environ.get("REDIS_PASSWORD")
1442
+ redis_enabled = os.environ.get("REDIS_ENABLED", "false").lower() == "true"
1443
+
1444
+ # Event bus config from environment (set by worker.py after registration)
1445
+ event_bus_config = None
1446
+ event_bus_config_str = os.environ.get("EVENT_BUS_CONFIG")
1447
+ if event_bus_config_str:
1448
+ try:
1449
+ import json
1450
+ event_bus_config = json.loads(event_bus_config_str)
1451
+ logger.info("event_bus_config_loaded_from_env", providers=list(event_bus_config.keys()))
1452
+ except Exception as e:
1453
+ logger.warning("event_bus_config_parse_failed", error=str(e))
1454
+
1455
+ # AUTO-CONFIGURE: If Redis credentials provided, auto-enable Redis provider
1456
+ # This makes Redis the default fast path without explicit event_bus_config
1457
+ if redis_enabled and redis_url and not event_bus_config:
1458
+ event_bus_config = {
1459
+ "redis": {
1460
+ "enabled": True,
1461
+ "redis_url": redis_url,
1462
+ }
1463
+ }
1464
+ logger.info(
1465
+ "redis_auto_configured_as_default",
1466
+ worker_id=worker_id[:8] if worker_id else "unknown",
1467
+ redis_url=redis_url.split("@")[-1] if "@" in redis_url else redis_url, # Log without password
1468
+ )
1469
+
1470
+ if not base_url:
1471
+ raise ValueError("CONTROL_PLANE_URL environment variable not set")
1472
+ if not api_key:
1473
+ raise ValueError("KUBIYA_API_KEY environment variable not set")
1474
+
1475
+ _control_plane_client = ControlPlaneClient(
1476
+ base_url=base_url,
1477
+ api_key=api_key,
1478
+ websocket_enabled=websocket_enabled,
1479
+ websocket_url=websocket_url,
1480
+ worker_id=worker_id,
1481
+ event_bus_config=event_bus_config
1482
+ )
1483
+
1484
+ logger.info(
1485
+ "control_plane_client_initialized",
1486
+ base_url=base_url,
1487
+ websocket_enabled=websocket_enabled,
1488
+ event_bus_configured=event_bus_config is not None
1489
+ )
1490
+
1491
+ return _control_plane_client