kubiya-control-plane-api 0.9.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (479) hide show
  1. control_plane_api/LICENSE +676 -0
  2. control_plane_api/README.md +350 -0
  3. control_plane_api/__init__.py +4 -0
  4. control_plane_api/__version__.py +8 -0
  5. control_plane_api/alembic/README +1 -0
  6. control_plane_api/alembic/env.py +121 -0
  7. control_plane_api/alembic/script.py.mako +28 -0
  8. control_plane_api/alembic/versions/2613c65c3dbe_initial_database_setup.py +32 -0
  9. control_plane_api/alembic/versions/2df520d4927d_merge_heads.py +28 -0
  10. control_plane_api/alembic/versions/43abf98d6a01_add_paused_status_to_executions.py +73 -0
  11. control_plane_api/alembic/versions/6289854264cb_merge_multiple_heads.py +28 -0
  12. control_plane_api/alembic/versions/6a4d4dc3d8dc_generate_execution_transitions.py +50 -0
  13. control_plane_api/alembic/versions/87d11cf0a783_add_disconnected_status_to_worker_.py +44 -0
  14. control_plane_api/alembic/versions/add_ephemeral_queue_support.py +85 -0
  15. control_plane_api/alembic/versions/add_model_type_to_llm_models.py +31 -0
  16. control_plane_api/alembic/versions/add_plan_executions_table.py +114 -0
  17. control_plane_api/alembic/versions/add_trace_span_tables.py +154 -0
  18. control_plane_api/alembic/versions/add_user_info_to_traces.py +36 -0
  19. control_plane_api/alembic/versions/adjusting_foreign_keys.py +32 -0
  20. control_plane_api/alembic/versions/b4983d976db2_initial_tables.py +1128 -0
  21. control_plane_api/alembic/versions/d181a3b40e71_rename_custom_metadata_to_metadata_in_.py +50 -0
  22. control_plane_api/alembic/versions/df9117888e82_add_missing_columns.py +82 -0
  23. control_plane_api/alembic/versions/f25de6ad895a_missing_migrations.py +34 -0
  24. control_plane_api/alembic/versions/f71305fb69b9_fix_ephemeral_queue_deletion_foreign_key.py +54 -0
  25. control_plane_api/alembic/versions/mark_local_exec_queues_as_ephemeral.py +68 -0
  26. control_plane_api/alembic.ini +148 -0
  27. control_plane_api/api/index.py +12 -0
  28. control_plane_api/app/__init__.py +11 -0
  29. control_plane_api/app/activities/__init__.py +20 -0
  30. control_plane_api/app/activities/agent_activities.py +384 -0
  31. control_plane_api/app/activities/plan_generation_activities.py +499 -0
  32. control_plane_api/app/activities/team_activities.py +424 -0
  33. control_plane_api/app/activities/temporal_cloud_activities.py +588 -0
  34. control_plane_api/app/config/__init__.py +35 -0
  35. control_plane_api/app/config/api_config.py +469 -0
  36. control_plane_api/app/config/config_loader.py +224 -0
  37. control_plane_api/app/config/model_pricing.py +323 -0
  38. control_plane_api/app/config/storage_config.py +159 -0
  39. control_plane_api/app/config.py +115 -0
  40. control_plane_api/app/controllers/__init__.py +0 -0
  41. control_plane_api/app/controllers/execution_environment_controller.py +1315 -0
  42. control_plane_api/app/database.py +135 -0
  43. control_plane_api/app/exceptions.py +408 -0
  44. control_plane_api/app/lib/__init__.py +11 -0
  45. control_plane_api/app/lib/environment.py +65 -0
  46. control_plane_api/app/lib/event_bus/__init__.py +17 -0
  47. control_plane_api/app/lib/event_bus/base.py +136 -0
  48. control_plane_api/app/lib/event_bus/manager.py +335 -0
  49. control_plane_api/app/lib/event_bus/providers/__init__.py +6 -0
  50. control_plane_api/app/lib/event_bus/providers/http_provider.py +166 -0
  51. control_plane_api/app/lib/event_bus/providers/nats_provider.py +324 -0
  52. control_plane_api/app/lib/event_bus/providers/redis_provider.py +233 -0
  53. control_plane_api/app/lib/event_bus/providers/websocket_provider.py +497 -0
  54. control_plane_api/app/lib/job_executor.py +330 -0
  55. control_plane_api/app/lib/kubiya_client.py +293 -0
  56. control_plane_api/app/lib/litellm_pricing.py +166 -0
  57. control_plane_api/app/lib/mcp_validation.py +163 -0
  58. control_plane_api/app/lib/nats/__init__.py +13 -0
  59. control_plane_api/app/lib/nats/credentials_manager.py +288 -0
  60. control_plane_api/app/lib/nats/listener.py +374 -0
  61. control_plane_api/app/lib/planning_prompt_builder.py +153 -0
  62. control_plane_api/app/lib/planning_tools/__init__.py +41 -0
  63. control_plane_api/app/lib/planning_tools/agents.py +409 -0
  64. control_plane_api/app/lib/planning_tools/agno_toolkit.py +836 -0
  65. control_plane_api/app/lib/planning_tools/base.py +119 -0
  66. control_plane_api/app/lib/planning_tools/cognitive_memory_tools.py +403 -0
  67. control_plane_api/app/lib/planning_tools/context_graph_tools.py +545 -0
  68. control_plane_api/app/lib/planning_tools/environments.py +218 -0
  69. control_plane_api/app/lib/planning_tools/knowledge.py +204 -0
  70. control_plane_api/app/lib/planning_tools/models.py +93 -0
  71. control_plane_api/app/lib/planning_tools/planning_service.py +646 -0
  72. control_plane_api/app/lib/planning_tools/resources.py +242 -0
  73. control_plane_api/app/lib/planning_tools/teams.py +334 -0
  74. control_plane_api/app/lib/policy_enforcer_client.py +1016 -0
  75. control_plane_api/app/lib/redis_client.py +803 -0
  76. control_plane_api/app/lib/sqlalchemy_utils.py +486 -0
  77. control_plane_api/app/lib/state_transition_tools/__init__.py +7 -0
  78. control_plane_api/app/lib/state_transition_tools/execution_context.py +388 -0
  79. control_plane_api/app/lib/storage/__init__.py +20 -0
  80. control_plane_api/app/lib/storage/base_provider.py +274 -0
  81. control_plane_api/app/lib/storage/provider_factory.py +157 -0
  82. control_plane_api/app/lib/storage/vercel_blob_provider.py +468 -0
  83. control_plane_api/app/lib/supabase.py +71 -0
  84. control_plane_api/app/lib/supabase_utils.py +138 -0
  85. control_plane_api/app/lib/task_planning/__init__.py +138 -0
  86. control_plane_api/app/lib/task_planning/agent_factory.py +308 -0
  87. control_plane_api/app/lib/task_planning/agents.py +389 -0
  88. control_plane_api/app/lib/task_planning/cache.py +218 -0
  89. control_plane_api/app/lib/task_planning/entity_resolver.py +273 -0
  90. control_plane_api/app/lib/task_planning/helpers.py +293 -0
  91. control_plane_api/app/lib/task_planning/hooks.py +474 -0
  92. control_plane_api/app/lib/task_planning/models.py +503 -0
  93. control_plane_api/app/lib/task_planning/plan_validator.py +166 -0
  94. control_plane_api/app/lib/task_planning/planning_workflow.py +2911 -0
  95. control_plane_api/app/lib/task_planning/runner.py +656 -0
  96. control_plane_api/app/lib/task_planning/streaming_hook.py +213 -0
  97. control_plane_api/app/lib/task_planning/workflow.py +424 -0
  98. control_plane_api/app/lib/templating/__init__.py +88 -0
  99. control_plane_api/app/lib/templating/compiler.py +278 -0
  100. control_plane_api/app/lib/templating/engine.py +178 -0
  101. control_plane_api/app/lib/templating/parsers/__init__.py +29 -0
  102. control_plane_api/app/lib/templating/parsers/base.py +96 -0
  103. control_plane_api/app/lib/templating/parsers/env.py +85 -0
  104. control_plane_api/app/lib/templating/parsers/graph.py +112 -0
  105. control_plane_api/app/lib/templating/parsers/secret.py +87 -0
  106. control_plane_api/app/lib/templating/parsers/simple.py +81 -0
  107. control_plane_api/app/lib/templating/resolver.py +366 -0
  108. control_plane_api/app/lib/templating/types.py +214 -0
  109. control_plane_api/app/lib/templating/validator.py +201 -0
  110. control_plane_api/app/lib/temporal_client.py +232 -0
  111. control_plane_api/app/lib/temporal_credentials_cache.py +178 -0
  112. control_plane_api/app/lib/temporal_credentials_service.py +203 -0
  113. control_plane_api/app/lib/validation/__init__.py +24 -0
  114. control_plane_api/app/lib/validation/runtime_validation.py +388 -0
  115. control_plane_api/app/main.py +531 -0
  116. control_plane_api/app/middleware/__init__.py +10 -0
  117. control_plane_api/app/middleware/auth.py +645 -0
  118. control_plane_api/app/middleware/exception_handler.py +267 -0
  119. control_plane_api/app/middleware/prometheus_middleware.py +173 -0
  120. control_plane_api/app/middleware/rate_limiting.py +384 -0
  121. control_plane_api/app/middleware/request_id.py +202 -0
  122. control_plane_api/app/models/__init__.py +40 -0
  123. control_plane_api/app/models/agent.py +90 -0
  124. control_plane_api/app/models/analytics.py +206 -0
  125. control_plane_api/app/models/associations.py +107 -0
  126. control_plane_api/app/models/auth_user.py +73 -0
  127. control_plane_api/app/models/context.py +161 -0
  128. control_plane_api/app/models/custom_integration.py +99 -0
  129. control_plane_api/app/models/environment.py +64 -0
  130. control_plane_api/app/models/execution.py +125 -0
  131. control_plane_api/app/models/execution_transition.py +50 -0
  132. control_plane_api/app/models/job.py +159 -0
  133. control_plane_api/app/models/llm_model.py +78 -0
  134. control_plane_api/app/models/orchestration.py +66 -0
  135. control_plane_api/app/models/plan_execution.py +102 -0
  136. control_plane_api/app/models/presence.py +49 -0
  137. control_plane_api/app/models/project.py +61 -0
  138. control_plane_api/app/models/project_management.py +85 -0
  139. control_plane_api/app/models/session.py +29 -0
  140. control_plane_api/app/models/skill.py +155 -0
  141. control_plane_api/app/models/system_tables.py +43 -0
  142. control_plane_api/app/models/task_planning.py +372 -0
  143. control_plane_api/app/models/team.py +86 -0
  144. control_plane_api/app/models/trace.py +257 -0
  145. control_plane_api/app/models/user_profile.py +54 -0
  146. control_plane_api/app/models/worker.py +221 -0
  147. control_plane_api/app/models/workflow.py +161 -0
  148. control_plane_api/app/models/workspace.py +50 -0
  149. control_plane_api/app/observability/__init__.py +177 -0
  150. control_plane_api/app/observability/context_logging.py +475 -0
  151. control_plane_api/app/observability/decorators.py +337 -0
  152. control_plane_api/app/observability/local_span_processor.py +702 -0
  153. control_plane_api/app/observability/metrics.py +303 -0
  154. control_plane_api/app/observability/middleware.py +246 -0
  155. control_plane_api/app/observability/optional.py +115 -0
  156. control_plane_api/app/observability/tracing.py +382 -0
  157. control_plane_api/app/policies/README.md +149 -0
  158. control_plane_api/app/policies/approved_users.rego +62 -0
  159. control_plane_api/app/policies/business_hours.rego +51 -0
  160. control_plane_api/app/policies/rate_limiting.rego +100 -0
  161. control_plane_api/app/policies/tool_enforcement/README.md +336 -0
  162. control_plane_api/app/policies/tool_enforcement/bash_command_validation.rego +71 -0
  163. control_plane_api/app/policies/tool_enforcement/business_hours_enforcement.rego +82 -0
  164. control_plane_api/app/policies/tool_enforcement/mcp_tool_allowlist.rego +58 -0
  165. control_plane_api/app/policies/tool_enforcement/production_safeguards.rego +80 -0
  166. control_plane_api/app/policies/tool_enforcement/role_based_tool_access.rego +44 -0
  167. control_plane_api/app/policies/tool_restrictions.rego +86 -0
  168. control_plane_api/app/routers/__init__.py +4 -0
  169. control_plane_api/app/routers/agents.py +382 -0
  170. control_plane_api/app/routers/agents_v2.py +1598 -0
  171. control_plane_api/app/routers/analytics.py +1310 -0
  172. control_plane_api/app/routers/auth.py +59 -0
  173. control_plane_api/app/routers/client_config.py +57 -0
  174. control_plane_api/app/routers/context_graph.py +561 -0
  175. control_plane_api/app/routers/context_manager.py +577 -0
  176. control_plane_api/app/routers/custom_integrations.py +490 -0
  177. control_plane_api/app/routers/enforcer.py +132 -0
  178. control_plane_api/app/routers/environment_context.py +252 -0
  179. control_plane_api/app/routers/environments.py +761 -0
  180. control_plane_api/app/routers/execution_environment.py +847 -0
  181. control_plane_api/app/routers/executions/__init__.py +28 -0
  182. control_plane_api/app/routers/executions/router.py +286 -0
  183. control_plane_api/app/routers/executions/services/__init__.py +22 -0
  184. control_plane_api/app/routers/executions/services/demo_worker_health.py +156 -0
  185. control_plane_api/app/routers/executions/services/status_service.py +420 -0
  186. control_plane_api/app/routers/executions/services/test_worker_health.py +480 -0
  187. control_plane_api/app/routers/executions/services/worker_health.py +514 -0
  188. control_plane_api/app/routers/executions/streaming/__init__.py +22 -0
  189. control_plane_api/app/routers/executions/streaming/deduplication.py +352 -0
  190. control_plane_api/app/routers/executions/streaming/event_buffer.py +353 -0
  191. control_plane_api/app/routers/executions/streaming/event_formatter.py +964 -0
  192. control_plane_api/app/routers/executions/streaming/history_loader.py +588 -0
  193. control_plane_api/app/routers/executions/streaming/live_source.py +693 -0
  194. control_plane_api/app/routers/executions/streaming/streamer.py +849 -0
  195. control_plane_api/app/routers/executions.py +4888 -0
  196. control_plane_api/app/routers/health.py +165 -0
  197. control_plane_api/app/routers/health_v2.py +394 -0
  198. control_plane_api/app/routers/integration_templates.py +496 -0
  199. control_plane_api/app/routers/integrations.py +287 -0
  200. control_plane_api/app/routers/jobs.py +1809 -0
  201. control_plane_api/app/routers/metrics.py +517 -0
  202. control_plane_api/app/routers/models.py +82 -0
  203. control_plane_api/app/routers/models_v2.py +628 -0
  204. control_plane_api/app/routers/plan_executions.py +1481 -0
  205. control_plane_api/app/routers/plan_generation_async.py +304 -0
  206. control_plane_api/app/routers/policies.py +669 -0
  207. control_plane_api/app/routers/presence.py +234 -0
  208. control_plane_api/app/routers/projects.py +987 -0
  209. control_plane_api/app/routers/runners.py +379 -0
  210. control_plane_api/app/routers/runtimes.py +172 -0
  211. control_plane_api/app/routers/secrets.py +171 -0
  212. control_plane_api/app/routers/skills.py +1010 -0
  213. control_plane_api/app/routers/skills_definitions.py +140 -0
  214. control_plane_api/app/routers/storage.py +456 -0
  215. control_plane_api/app/routers/task_planning.py +611 -0
  216. control_plane_api/app/routers/task_queues.py +650 -0
  217. control_plane_api/app/routers/team_context.py +274 -0
  218. control_plane_api/app/routers/teams.py +1747 -0
  219. control_plane_api/app/routers/templates.py +248 -0
  220. control_plane_api/app/routers/traces.py +571 -0
  221. control_plane_api/app/routers/websocket_client.py +479 -0
  222. control_plane_api/app/routers/websocket_executions_status.py +437 -0
  223. control_plane_api/app/routers/websocket_gateway.py +323 -0
  224. control_plane_api/app/routers/websocket_traces.py +576 -0
  225. control_plane_api/app/routers/worker_queues.py +2555 -0
  226. control_plane_api/app/routers/worker_websocket.py +419 -0
  227. control_plane_api/app/routers/workers.py +1004 -0
  228. control_plane_api/app/routers/workflows.py +204 -0
  229. control_plane_api/app/runtimes/__init__.py +6 -0
  230. control_plane_api/app/runtimes/validation.py +344 -0
  231. control_plane_api/app/schemas/__init__.py +1 -0
  232. control_plane_api/app/schemas/job_schemas.py +302 -0
  233. control_plane_api/app/schemas/mcp_schemas.py +311 -0
  234. control_plane_api/app/schemas/template_schemas.py +133 -0
  235. control_plane_api/app/schemas/trace_schemas.py +168 -0
  236. control_plane_api/app/schemas/worker_queue_observability_schemas.py +165 -0
  237. control_plane_api/app/services/__init__.py +1 -0
  238. control_plane_api/app/services/agno_planning_strategy.py +233 -0
  239. control_plane_api/app/services/agno_service.py +838 -0
  240. control_plane_api/app/services/claude_code_planning_service.py +203 -0
  241. control_plane_api/app/services/context_graph_client.py +224 -0
  242. control_plane_api/app/services/custom_integration_service.py +415 -0
  243. control_plane_api/app/services/integration_resolution_service.py +345 -0
  244. control_plane_api/app/services/litellm_service.py +394 -0
  245. control_plane_api/app/services/plan_generator.py +79 -0
  246. control_plane_api/app/services/planning_strategy.py +66 -0
  247. control_plane_api/app/services/planning_strategy_factory.py +118 -0
  248. control_plane_api/app/services/policy_service.py +615 -0
  249. control_plane_api/app/services/state_transition_service.py +755 -0
  250. control_plane_api/app/services/storage_service.py +593 -0
  251. control_plane_api/app/services/temporal_cloud_provisioning.py +150 -0
  252. control_plane_api/app/services/toolsets/context_graph_skill.py +432 -0
  253. control_plane_api/app/services/trace_retention.py +354 -0
  254. control_plane_api/app/services/worker_queue_metrics_service.py +190 -0
  255. control_plane_api/app/services/workflow_cancellation_manager.py +135 -0
  256. control_plane_api/app/services/workflow_operations_service.py +611 -0
  257. control_plane_api/app/skills/__init__.py +100 -0
  258. control_plane_api/app/skills/base.py +239 -0
  259. control_plane_api/app/skills/builtin/__init__.py +37 -0
  260. control_plane_api/app/skills/builtin/agent_communication/__init__.py +8 -0
  261. control_plane_api/app/skills/builtin/agent_communication/skill.py +246 -0
  262. control_plane_api/app/skills/builtin/code_ingestion/__init__.py +4 -0
  263. control_plane_api/app/skills/builtin/code_ingestion/skill.py +267 -0
  264. control_plane_api/app/skills/builtin/cognitive_memory/__init__.py +4 -0
  265. control_plane_api/app/skills/builtin/cognitive_memory/skill.py +174 -0
  266. control_plane_api/app/skills/builtin/contextual_awareness/__init__.py +4 -0
  267. control_plane_api/app/skills/builtin/contextual_awareness/skill.py +387 -0
  268. control_plane_api/app/skills/builtin/data_visualization/__init__.py +4 -0
  269. control_plane_api/app/skills/builtin/data_visualization/skill.py +154 -0
  270. control_plane_api/app/skills/builtin/docker/__init__.py +4 -0
  271. control_plane_api/app/skills/builtin/docker/skill.py +104 -0
  272. control_plane_api/app/skills/builtin/file_generation/__init__.py +4 -0
  273. control_plane_api/app/skills/builtin/file_generation/skill.py +94 -0
  274. control_plane_api/app/skills/builtin/file_system/__init__.py +4 -0
  275. control_plane_api/app/skills/builtin/file_system/skill.py +110 -0
  276. control_plane_api/app/skills/builtin/knowledge_api/__init__.py +5 -0
  277. control_plane_api/app/skills/builtin/knowledge_api/skill.py +124 -0
  278. control_plane_api/app/skills/builtin/python/__init__.py +4 -0
  279. control_plane_api/app/skills/builtin/python/skill.py +92 -0
  280. control_plane_api/app/skills/builtin/remote_filesystem/__init__.py +5 -0
  281. control_plane_api/app/skills/builtin/remote_filesystem/skill.py +170 -0
  282. control_plane_api/app/skills/builtin/shell/__init__.py +4 -0
  283. control_plane_api/app/skills/builtin/shell/skill.py +161 -0
  284. control_plane_api/app/skills/builtin/slack/__init__.py +3 -0
  285. control_plane_api/app/skills/builtin/slack/skill.py +302 -0
  286. control_plane_api/app/skills/builtin/workflow_executor/__init__.py +4 -0
  287. control_plane_api/app/skills/builtin/workflow_executor/skill.py +469 -0
  288. control_plane_api/app/skills/business_intelligence.py +189 -0
  289. control_plane_api/app/skills/config.py +63 -0
  290. control_plane_api/app/skills/loaders/__init__.py +14 -0
  291. control_plane_api/app/skills/loaders/base.py +73 -0
  292. control_plane_api/app/skills/loaders/filesystem_loader.py +199 -0
  293. control_plane_api/app/skills/registry.py +125 -0
  294. control_plane_api/app/utils/helpers.py +12 -0
  295. control_plane_api/app/utils/workflow_executor.py +354 -0
  296. control_plane_api/app/workflows/__init__.py +11 -0
  297. control_plane_api/app/workflows/agent_execution.py +520 -0
  298. control_plane_api/app/workflows/agent_execution_with_skills.py +223 -0
  299. control_plane_api/app/workflows/namespace_provisioning.py +326 -0
  300. control_plane_api/app/workflows/plan_generation.py +254 -0
  301. control_plane_api/app/workflows/team_execution.py +442 -0
  302. control_plane_api/scripts/seed_models.py +240 -0
  303. control_plane_api/scripts/validate_existing_tool_names.py +492 -0
  304. control_plane_api/shared/__init__.py +8 -0
  305. control_plane_api/shared/version.py +17 -0
  306. control_plane_api/test_deduplication.py +274 -0
  307. control_plane_api/test_executor_deduplication_e2e.py +309 -0
  308. control_plane_api/test_job_execution_e2e.py +283 -0
  309. control_plane_api/test_real_integration.py +193 -0
  310. control_plane_api/version.py +38 -0
  311. control_plane_api/worker/__init__.py +0 -0
  312. control_plane_api/worker/activities/__init__.py +0 -0
  313. control_plane_api/worker/activities/agent_activities.py +1585 -0
  314. control_plane_api/worker/activities/approval_activities.py +234 -0
  315. control_plane_api/worker/activities/job_activities.py +199 -0
  316. control_plane_api/worker/activities/runtime_activities.py +1167 -0
  317. control_plane_api/worker/activities/skill_activities.py +282 -0
  318. control_plane_api/worker/activities/team_activities.py +479 -0
  319. control_plane_api/worker/agent_runtime_server.py +370 -0
  320. control_plane_api/worker/binary_manager.py +333 -0
  321. control_plane_api/worker/config/__init__.py +31 -0
  322. control_plane_api/worker/config/worker_config.py +273 -0
  323. control_plane_api/worker/control_plane_client.py +1491 -0
  324. control_plane_api/worker/examples/analytics_integration_example.py +362 -0
  325. control_plane_api/worker/health_monitor.py +159 -0
  326. control_plane_api/worker/metrics.py +237 -0
  327. control_plane_api/worker/models/__init__.py +1 -0
  328. control_plane_api/worker/models/error_events.py +105 -0
  329. control_plane_api/worker/models/inputs.py +89 -0
  330. control_plane_api/worker/runtimes/__init__.py +35 -0
  331. control_plane_api/worker/runtimes/agent_runtime/runtime.py +485 -0
  332. control_plane_api/worker/runtimes/agno/__init__.py +34 -0
  333. control_plane_api/worker/runtimes/agno/config.py +248 -0
  334. control_plane_api/worker/runtimes/agno/hooks.py +385 -0
  335. control_plane_api/worker/runtimes/agno/mcp_builder.py +195 -0
  336. control_plane_api/worker/runtimes/agno/runtime.py +1063 -0
  337. control_plane_api/worker/runtimes/agno/utils.py +163 -0
  338. control_plane_api/worker/runtimes/base.py +979 -0
  339. control_plane_api/worker/runtimes/claude_code/__init__.py +38 -0
  340. control_plane_api/worker/runtimes/claude_code/cleanup.py +184 -0
  341. control_plane_api/worker/runtimes/claude_code/client_pool.py +529 -0
  342. control_plane_api/worker/runtimes/claude_code/config.py +829 -0
  343. control_plane_api/worker/runtimes/claude_code/hooks.py +482 -0
  344. control_plane_api/worker/runtimes/claude_code/litellm_proxy.py +1702 -0
  345. control_plane_api/worker/runtimes/claude_code/mcp_builder.py +467 -0
  346. control_plane_api/worker/runtimes/claude_code/mcp_discovery.py +558 -0
  347. control_plane_api/worker/runtimes/claude_code/runtime.py +1546 -0
  348. control_plane_api/worker/runtimes/claude_code/tool_mapper.py +403 -0
  349. control_plane_api/worker/runtimes/claude_code/utils.py +149 -0
  350. control_plane_api/worker/runtimes/factory.py +173 -0
  351. control_plane_api/worker/runtimes/model_utils.py +107 -0
  352. control_plane_api/worker/runtimes/validation.py +93 -0
  353. control_plane_api/worker/services/__init__.py +1 -0
  354. control_plane_api/worker/services/agent_communication_tools.py +908 -0
  355. control_plane_api/worker/services/agent_executor.py +485 -0
  356. control_plane_api/worker/services/agent_executor_v2.py +793 -0
  357. control_plane_api/worker/services/analytics_collector.py +457 -0
  358. control_plane_api/worker/services/analytics_service.py +464 -0
  359. control_plane_api/worker/services/approval_tools.py +310 -0
  360. control_plane_api/worker/services/approval_tools_agno.py +207 -0
  361. control_plane_api/worker/services/cancellation_manager.py +177 -0
  362. control_plane_api/worker/services/code_ingestion_tools.py +465 -0
  363. control_plane_api/worker/services/contextual_awareness_tools.py +405 -0
  364. control_plane_api/worker/services/data_visualization.py +834 -0
  365. control_plane_api/worker/services/event_publisher.py +531 -0
  366. control_plane_api/worker/services/jira_tools.py +257 -0
  367. control_plane_api/worker/services/remote_filesystem_tools.py +498 -0
  368. control_plane_api/worker/services/runtime_analytics.py +328 -0
  369. control_plane_api/worker/services/session_service.py +365 -0
  370. control_plane_api/worker/services/skill_context_enhancement.py +181 -0
  371. control_plane_api/worker/services/skill_factory.py +471 -0
  372. control_plane_api/worker/services/system_prompt_enhancement.py +410 -0
  373. control_plane_api/worker/services/team_executor.py +715 -0
  374. control_plane_api/worker/services/team_executor_v2.py +1866 -0
  375. control_plane_api/worker/services/tool_enforcement.py +254 -0
  376. control_plane_api/worker/services/workflow_executor/__init__.py +52 -0
  377. control_plane_api/worker/services/workflow_executor/event_processor.py +287 -0
  378. control_plane_api/worker/services/workflow_executor/event_publisher.py +210 -0
  379. control_plane_api/worker/services/workflow_executor/executors/__init__.py +15 -0
  380. control_plane_api/worker/services/workflow_executor/executors/base.py +270 -0
  381. control_plane_api/worker/services/workflow_executor/executors/json_executor.py +50 -0
  382. control_plane_api/worker/services/workflow_executor/executors/python_executor.py +50 -0
  383. control_plane_api/worker/services/workflow_executor/models.py +142 -0
  384. control_plane_api/worker/services/workflow_executor_tools.py +1748 -0
  385. control_plane_api/worker/skills/__init__.py +12 -0
  386. control_plane_api/worker/skills/builtin/context_graph_search/README.md +213 -0
  387. control_plane_api/worker/skills/builtin/context_graph_search/__init__.py +5 -0
  388. control_plane_api/worker/skills/builtin/context_graph_search/agno_impl.py +808 -0
  389. control_plane_api/worker/skills/builtin/context_graph_search/skill.yaml +67 -0
  390. control_plane_api/worker/skills/builtin/contextual_awareness/__init__.py +4 -0
  391. control_plane_api/worker/skills/builtin/contextual_awareness/agno_impl.py +62 -0
  392. control_plane_api/worker/skills/builtin/data_visualization/agno_impl.py +18 -0
  393. control_plane_api/worker/skills/builtin/data_visualization/skill.yaml +84 -0
  394. control_plane_api/worker/skills/builtin/docker/agno_impl.py +65 -0
  395. control_plane_api/worker/skills/builtin/docker/skill.yaml +60 -0
  396. control_plane_api/worker/skills/builtin/file_generation/agno_impl.py +47 -0
  397. control_plane_api/worker/skills/builtin/file_generation/skill.yaml +64 -0
  398. control_plane_api/worker/skills/builtin/file_system/agno_impl.py +32 -0
  399. control_plane_api/worker/skills/builtin/file_system/skill.yaml +54 -0
  400. control_plane_api/worker/skills/builtin/knowledge_api/__init__.py +4 -0
  401. control_plane_api/worker/skills/builtin/knowledge_api/agno_impl.py +50 -0
  402. control_plane_api/worker/skills/builtin/knowledge_api/skill.yaml +66 -0
  403. control_plane_api/worker/skills/builtin/python/agno_impl.py +25 -0
  404. control_plane_api/worker/skills/builtin/python/skill.yaml +60 -0
  405. control_plane_api/worker/skills/builtin/schema_fix_mixin.py +260 -0
  406. control_plane_api/worker/skills/builtin/shell/agno_impl.py +31 -0
  407. control_plane_api/worker/skills/builtin/shell/skill.yaml +60 -0
  408. control_plane_api/worker/skills/builtin/slack/__init__.py +3 -0
  409. control_plane_api/worker/skills/builtin/slack/agno_impl.py +1282 -0
  410. control_plane_api/worker/skills/builtin/slack/skill.yaml +276 -0
  411. control_plane_api/worker/skills/builtin/workflow_executor/agno_impl.py +62 -0
  412. control_plane_api/worker/skills/builtin/workflow_executor/skill.yaml +79 -0
  413. control_plane_api/worker/skills/loaders/__init__.py +5 -0
  414. control_plane_api/worker/skills/loaders/base.py +23 -0
  415. control_plane_api/worker/skills/loaders/filesystem_loader.py +357 -0
  416. control_plane_api/worker/skills/registry.py +208 -0
  417. control_plane_api/worker/tests/__init__.py +1 -0
  418. control_plane_api/worker/tests/conftest.py +12 -0
  419. control_plane_api/worker/tests/e2e/__init__.py +0 -0
  420. control_plane_api/worker/tests/e2e/test_context_graph_real_api.py +338 -0
  421. control_plane_api/worker/tests/e2e/test_context_graph_templates_e2e.py +523 -0
  422. control_plane_api/worker/tests/e2e/test_enforcement_e2e.py +344 -0
  423. control_plane_api/worker/tests/e2e/test_execution_flow.py +571 -0
  424. control_plane_api/worker/tests/e2e/test_single_execution_mode.py +656 -0
  425. control_plane_api/worker/tests/integration/__init__.py +0 -0
  426. control_plane_api/worker/tests/integration/test_builtin_skills_fixes.py +245 -0
  427. control_plane_api/worker/tests/integration/test_context_graph_search_integration.py +365 -0
  428. control_plane_api/worker/tests/integration/test_control_plane_integration.py +308 -0
  429. control_plane_api/worker/tests/integration/test_hook_enforcement_integration.py +579 -0
  430. control_plane_api/worker/tests/integration/test_scheduled_job_workflow.py +237 -0
  431. control_plane_api/worker/tests/integration/test_system_prompt_enhancement_integration.py +343 -0
  432. control_plane_api/worker/tests/unit/__init__.py +0 -0
  433. control_plane_api/worker/tests/unit/test_builtin_skill_autoload.py +396 -0
  434. control_plane_api/worker/tests/unit/test_context_graph_search.py +450 -0
  435. control_plane_api/worker/tests/unit/test_context_graph_templates.py +403 -0
  436. control_plane_api/worker/tests/unit/test_control_plane_client.py +401 -0
  437. control_plane_api/worker/tests/unit/test_control_plane_client_jobs.py +345 -0
  438. control_plane_api/worker/tests/unit/test_job_activities.py +353 -0
  439. control_plane_api/worker/tests/unit/test_skill_context_enhancement.py +321 -0
  440. control_plane_api/worker/tests/unit/test_system_prompt_enhancement.py +415 -0
  441. control_plane_api/worker/tests/unit/test_tool_enforcement.py +324 -0
  442. control_plane_api/worker/utils/__init__.py +1 -0
  443. control_plane_api/worker/utils/chunk_batcher.py +330 -0
  444. control_plane_api/worker/utils/environment.py +65 -0
  445. control_plane_api/worker/utils/error_publisher.py +260 -0
  446. control_plane_api/worker/utils/event_batcher.py +256 -0
  447. control_plane_api/worker/utils/logging_config.py +335 -0
  448. control_plane_api/worker/utils/logging_helper.py +326 -0
  449. control_plane_api/worker/utils/parameter_validator.py +120 -0
  450. control_plane_api/worker/utils/retry_utils.py +60 -0
  451. control_plane_api/worker/utils/streaming_utils.py +665 -0
  452. control_plane_api/worker/utils/tool_validation.py +332 -0
  453. control_plane_api/worker/utils/workspace_manager.py +163 -0
  454. control_plane_api/worker/websocket_client.py +393 -0
  455. control_plane_api/worker/worker.py +1297 -0
  456. control_plane_api/worker/workflows/__init__.py +0 -0
  457. control_plane_api/worker/workflows/agent_execution.py +909 -0
  458. control_plane_api/worker/workflows/scheduled_job_wrapper.py +332 -0
  459. control_plane_api/worker/workflows/team_execution.py +611 -0
  460. kubiya_control_plane_api-0.9.15.dist-info/METADATA +354 -0
  461. kubiya_control_plane_api-0.9.15.dist-info/RECORD +479 -0
  462. kubiya_control_plane_api-0.9.15.dist-info/WHEEL +5 -0
  463. kubiya_control_plane_api-0.9.15.dist-info/entry_points.txt +5 -0
  464. kubiya_control_plane_api-0.9.15.dist-info/licenses/LICENSE +676 -0
  465. kubiya_control_plane_api-0.9.15.dist-info/top_level.txt +3 -0
  466. scripts/__init__.py +1 -0
  467. scripts/migrations.py +39 -0
  468. scripts/seed_worker_queues.py +128 -0
  469. scripts/setup_agent_runtime.py +142 -0
  470. worker_internal/__init__.py +1 -0
  471. worker_internal/planner/__init__.py +1 -0
  472. worker_internal/planner/activities.py +1499 -0
  473. worker_internal/planner/agent_tools.py +197 -0
  474. worker_internal/planner/event_models.py +148 -0
  475. worker_internal/planner/event_publisher.py +67 -0
  476. worker_internal/planner/models.py +199 -0
  477. worker_internal/planner/retry_logic.py +134 -0
  478. worker_internal/planner/worker.py +300 -0
  479. worker_internal/planner/workflows.py +970 -0
@@ -0,0 +1,1481 @@
1
+ """
2
+ Plan Execution Router - Execute and manage multi-task plans using Temporal orchestration
3
+
4
+ Supports both SSE and WebSocket streaming for plan execution updates.
5
+ """
6
+
7
+ from fastapi import APIRouter, Depends, HTTPException, status, Request, WebSocket, WebSocketDisconnect
8
+ from fastapi.responses import StreamingResponse
9
+ from sqlalchemy.orm import Session
10
+ from typing import Optional, Dict, Any
11
+ from datetime import datetime, timezone
12
+ import structlog
13
+ import uuid
14
+ import json
15
+ import os
16
+ import jwt as jwt_lib
17
+ import asyncio
18
+
19
+ from control_plane_api.app.database import get_db
20
+ from control_plane_api.app.models.plan_execution import PlanExecution, PlanExecutionStatus
21
+ from control_plane_api.app.lib.redis_client import get_redis_client
22
+ from control_plane_api.app.middleware.auth import get_current_organization, decode_jwt_token
23
+ from pydantic import BaseModel, Field
24
+
25
+ # Temporal client
26
+ from temporalio.client import Client as TemporalClient
27
+
28
+ # Import our workflow - conditionally to allow API to start without worker_internal
29
+ try:
30
+ from worker_internal.planner.workflows import PlanOrchestratorWorkflow
31
+ from worker_internal.planner.models import PlanOrchestratorInput, Plan
32
+ PLANNER_AVAILABLE = True
33
+ except ImportError as e:
34
+ PLANNER_AVAILABLE = False
35
+ PlanOrchestratorWorkflow = None
36
+ PlanOrchestratorInput = None
37
+ Plan = None
38
+ # Will log when first used
39
+
40
+ router = APIRouter()
41
+ logger = structlog.get_logger()
42
+
43
+
44
+ def extract_organization_id_from_token(api_token: Optional[str]) -> Optional[str]:
45
+ """
46
+ Extract organization ID from JWT token.
47
+
48
+ Args:
49
+ api_token: JWT token string
50
+
51
+ Returns:
52
+ Organization ID if found, None otherwise
53
+ """
54
+ if not api_token:
55
+ return None
56
+
57
+ try:
58
+ # Decode without verification to get organization
59
+ decoded = jwt_lib.decode(api_token, options={"verify_signature": False})
60
+ org_id = decoded.get("organization") or decoded.get("org") or decoded.get("org_id")
61
+
62
+ if org_id:
63
+ logger.debug("extracted_org_from_token", organization_id=org_id)
64
+
65
+ return org_id
66
+ except Exception as e:
67
+ logger.warning("failed_to_decode_token", error=str(e))
68
+ return None
69
+
70
+
71
+ def extract_user_id_from_token(api_token: Optional[str]) -> Optional[str]:
72
+ """
73
+ Extract user ID from JWT token.
74
+
75
+ Args:
76
+ api_token: JWT token string
77
+
78
+ Returns:
79
+ User ID if found, None otherwise
80
+ """
81
+ if not api_token:
82
+ return None
83
+
84
+ try:
85
+ decoded = jwt_lib.decode(api_token, options={"verify_signature": False})
86
+ user_id = decoded.get("user_id") or decoded.get("sub") or decoded.get("email")
87
+
88
+ if user_id:
89
+ logger.debug("extracted_user_from_token", user_id=user_id)
90
+
91
+ return user_id
92
+ except Exception as e:
93
+ logger.warning("failed_to_extract_user_id", error=str(e))
94
+ return None
95
+
96
+
97
+ class PlanExecutionRequest(BaseModel):
98
+ """Request to execute a plan"""
99
+ plan: dict = Field(..., description="The plan JSON to execute")
100
+ agent_id: Optional[str] = Field(None, description="Agent ID to use for task execution (optional, extracted from plan if not provided)")
101
+ worker_queue_id: str = Field(..., description="Worker queue ID to route task executions to")
102
+ plan_generation_id: Optional[str] = Field(None, description="ID of the plan generation execution that created this plan (for linking)")
103
+
104
+
105
+ class PlanExecutionResponse(BaseModel):
106
+ """Response for plan execution"""
107
+ execution_id: str
108
+ status: str
109
+ plan_title: str
110
+ total_tasks: int
111
+
112
+
113
+ class PlanStatusResponse(BaseModel):
114
+ """Response for plan status"""
115
+ execution_id: str
116
+ plan_generation_id: Optional[str] = None # Link to plan generation
117
+ status: str
118
+ plan_title: str
119
+ total_tasks: int
120
+ completed_tasks: int
121
+ failed_tasks: int
122
+ progress_percentage: float
123
+ started_at: Optional[datetime] = None
124
+ completed_at: Optional[datetime] = None
125
+ waiting_tasks: Optional[list] = None # Tasks waiting for user input
126
+ plan_json: Optional[dict] = None # The full plan for UI rendering
127
+
128
+
129
+ async def get_temporal_client() -> TemporalClient:
130
+ """
131
+ Get Temporal client using env-based credentials.
132
+
133
+ Uses shared namespace for all organizations.
134
+ """
135
+ from control_plane_api.app.lib.temporal_client import get_temporal_client as get_shared_client
136
+
137
+ return await get_shared_client()
138
+
139
+
140
+ @router.post("/execute", response_model=PlanExecutionResponse, status_code=status.HTTP_201_CREATED)
141
+ async def execute_plan(
142
+ request: PlanExecutionRequest,
143
+ http_request: Request,
144
+ db: Session = Depends(get_db),
145
+ ):
146
+ """
147
+ Execute a multi-task plan using Temporal orchestration.
148
+
149
+ This endpoint:
150
+ 1. Validates the plan structure
151
+ 2. Creates a plan execution record
152
+ 3. Starts a Temporal workflow to orchestrate task execution
153
+ 4. Returns execution ID for status tracking
154
+
155
+ The workflow uses a Claude Code agent to intelligently manage plan execution.
156
+ """
157
+ # Check if planner module is available
158
+ if not PLANNER_AVAILABLE:
159
+ logger.error("planner_module_not_available", message="worker_internal.planner not installed")
160
+ raise HTTPException(
161
+ status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
162
+ detail="Plan execution is not available. The planner module is not installed."
163
+ )
164
+
165
+ try:
166
+ # Extract auth token
167
+ auth_header = http_request.headers.get("authorization", "")
168
+ jwt_token = auth_header.replace("Bearer ", "").replace("UserKey ", "") if auth_header else None
169
+
170
+ # Extract organization ID and user ID from JWT token
171
+ organization_id = extract_organization_id_from_token(jwt_token)
172
+ user_id = extract_user_id_from_token(jwt_token)
173
+
174
+ if not organization_id:
175
+ raise HTTPException(
176
+ status_code=status.HTTP_401_UNAUTHORIZED,
177
+ detail="Could not extract organization_id from JWT token"
178
+ )
179
+
180
+ if not jwt_token:
181
+ raise HTTPException(
182
+ status_code=status.HTTP_401_UNAUTHORIZED,
183
+ detail="Authorization token required"
184
+ )
185
+
186
+ # Parse and validate plan
187
+ plan_data = request.plan.get("plan", request.plan) # Handle both formats
188
+ plan_title = plan_data.get("title", "Untitled Plan")
189
+ total_tasks = 0
190
+
191
+ # Extract agent_id or team_id from plan if not provided in request
192
+ agent_id = request.agent_id
193
+ team_id = None
194
+
195
+ if plan_data.get("team_breakdown"):
196
+ team = plan_data["team_breakdown"][0]
197
+
198
+ # Check if this is a team-based plan
199
+ team_id = team.get("team_id")
200
+
201
+ # Extract agent_id (could be null for team plans)
202
+ if not agent_id:
203
+ agent_id = team.get("agent_id")
204
+
205
+ # If still no agent_id, try to get from first task (fallback for team plans)
206
+ if not agent_id and team.get("tasks") and len(team.get("tasks", [])) > 0:
207
+ agent_id = team["tasks"][0].get("agent_id")
208
+ logger.info("agent_id_extracted_from_first_task", agent_id=agent_id)
209
+ elif agent_id:
210
+ logger.info("agent_id_extracted_from_plan", agent_id=agent_id)
211
+
212
+ # For team-based plans, agent_id can be null (tasks have individual agent_ids)
213
+ # Only raise error if it's not a team plan
214
+ if not agent_id and not team_id:
215
+ raise HTTPException(
216
+ status_code=status.HTTP_400_BAD_REQUEST,
217
+ detail="Either agent_id or team_id must be provided in plan"
218
+ )
219
+
220
+ if plan_data.get("team_breakdown"):
221
+ team = plan_data["team_breakdown"][0]
222
+ total_tasks = len(team.get("tasks", []))
223
+
224
+ logger.info(
225
+ "plan_execution_requested",
226
+ plan_title=plan_title,
227
+ total_tasks=total_tasks,
228
+ agent_id=agent_id,
229
+ )
230
+
231
+ # Generate execution ID
232
+ execution_id = str(uuid.uuid4())
233
+
234
+ # Create plan execution record
235
+ plan_execution = PlanExecution(
236
+ id=uuid.uuid4(),
237
+ execution_id=execution_id,
238
+ organization_id=organization_id,
239
+ agent_id=agent_id,
240
+ plan_generation_id=request.plan_generation_id, # Link to plan generation
241
+ title=plan_title,
242
+ summary=plan_data.get("summary"),
243
+ total_tasks=total_tasks,
244
+ completed_tasks=0,
245
+ failed_tasks=0,
246
+ status=PlanExecutionStatus.RUNNING,
247
+ plan_json=request.plan,
248
+ estimated_cost_usd=plan_data.get("cost_estimate", {}).get("estimated_cost_usd"),
249
+ started_at=datetime.utcnow(),
250
+ )
251
+ db.add(plan_execution)
252
+ db.commit()
253
+
254
+ # CRITICAL: Normalize plan data before validation
255
+ # Convert None to empty dicts/lists/strings to prevent Pydantic validation errors
256
+ def normalize_plan_data(data):
257
+ """Recursively normalize plan data for Pydantic validation"""
258
+ if isinstance(data, dict):
259
+ normalized = {}
260
+ for key, value in data.items():
261
+ if value is None:
262
+ # Convert None to appropriate empty value
263
+ if key in ['model_info', 'execution_environment', 'recommended_execution', 'cost_estimate', 'realized_savings']:
264
+ normalized[key] = {}
265
+ elif key.endswith('_to_use') or key in ['knowledge_references', 'subtasks', 'risks', 'prerequisites', 'success_criteria']:
266
+ normalized[key] = []
267
+ elif key in ['agent_id', 'agent_name', 'team_id', 'team_name']:
268
+ # For team-based plans, these can be empty strings
269
+ normalized[key] = ''
270
+ else:
271
+ normalized[key] = value
272
+ elif isinstance(value, (dict, list)):
273
+ normalized[key] = normalize_plan_data(value)
274
+ else:
275
+ normalized[key] = value
276
+ return normalized
277
+ elif isinstance(data, list):
278
+ return [normalize_plan_data(item) for item in data]
279
+ return data
280
+
281
+ plan_data = normalize_plan_data(plan_data)
282
+ logger.info("plan_data_normalized", plan_title=plan_data.get('title'))
283
+
284
+ # Parse plan into Pydantic model
285
+ plan_obj = Plan(**plan_data)
286
+
287
+ # CRITICAL: Ensure all tasks have worker_queue_id set
288
+ # The workflow requires each task to have worker_queue_id
289
+ for team in plan_obj.team_breakdown:
290
+ for task in team.tasks:
291
+ if not task.worker_queue_id:
292
+ task.worker_queue_id = request.worker_queue_id
293
+ logger.debug(
294
+ "set_worker_queue_id_on_task",
295
+ task_id=task.id,
296
+ worker_queue_id=request.worker_queue_id
297
+ )
298
+
299
+ # Start Temporal workflow
300
+ try:
301
+ temporal_client = await get_temporal_client()
302
+
303
+ workflow_input = PlanOrchestratorInput(
304
+ plan=plan_obj,
305
+ organization_id=organization_id,
306
+ agent_id=agent_id, # Use extracted agent_id (from request or plan)
307
+ worker_queue_id=request.worker_queue_id,
308
+ user_id=user_id,
309
+ execution_id=execution_id,
310
+ jwt_token=jwt_token,
311
+ )
312
+
313
+ # Use shared task queue for all organizations
314
+ task_queue = os.getenv("TASK_QUEUE", "agent-control-plane.internal")
315
+
316
+ # Start workflow
317
+ await temporal_client.start_workflow(
318
+ PlanOrchestratorWorkflow.run,
319
+ workflow_input,
320
+ id=f"plan-{execution_id}",
321
+ task_queue=task_queue,
322
+ )
323
+
324
+ logger.info(
325
+ "plan_workflow_started",
326
+ execution_id=execution_id,
327
+ workflow_id=f"plan-{execution_id}",
328
+ task_queue=task_queue,
329
+ organization_id=organization_id,
330
+ )
331
+
332
+ except Exception as e:
333
+ logger.error(
334
+ "failed_to_start_workflow",
335
+ error=str(e),
336
+ execution_id=execution_id,
337
+ )
338
+ # Update status to failed
339
+ plan_execution.status = PlanExecutionStatus.FAILED
340
+ db.commit()
341
+ raise HTTPException(
342
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
343
+ detail=f"Failed to start plan execution workflow: {str(e)}"
344
+ )
345
+
346
+ return PlanExecutionResponse(
347
+ execution_id=execution_id,
348
+ status="running",
349
+ plan_title=plan_title,
350
+ total_tasks=total_tasks,
351
+ )
352
+
353
+ except Exception as e:
354
+ logger.error("plan_execution_error", error=str(e))
355
+ raise HTTPException(
356
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
357
+ detail=f"Plan execution failed: {str(e)}"
358
+ )
359
+
360
+
361
+ @router.get("/plan-executions")
362
+ def list_plan_executions(
363
+ status: Optional[str] = None,
364
+ limit: int = 50,
365
+ offset: int = 0,
366
+ organization: dict = Depends(get_current_organization),
367
+ db: Session = Depends(get_db),
368
+ ):
369
+ """
370
+ List plan executions with optional filtering.
371
+ Automatically filters by the authenticated user's organization.
372
+ """
373
+ query = db.query(PlanExecution)
374
+
375
+ # Always filter by organization from auth
376
+ query = query.filter(PlanExecution.organization_id == organization["id"])
377
+
378
+ if status:
379
+ query = query.filter(PlanExecution.status == status)
380
+
381
+ # Order by created_at so newest executions appear first, regardless of whether they've started
382
+ query = query.order_by(PlanExecution.created_at.desc())
383
+ query = query.offset(offset).limit(limit)
384
+
385
+ executions = query.all()
386
+
387
+ return {
388
+ "executions": [
389
+ {
390
+ "execution_id": exec.execution_id,
391
+ "agent_id": str(exec.agent_id) if exec.agent_id else None,
392
+ "plan_generation_id": exec.plan_generation_id, # Link to plan generation
393
+ "title": exec.title,
394
+ "summary": exec.summary,
395
+ "status": exec.status.value if hasattr(exec.status, 'value') else exec.status,
396
+ "total_tasks": exec.total_tasks,
397
+ "completed_tasks": exec.completed_tasks,
398
+ "failed_tasks": exec.failed_tasks or 0,
399
+ "created_at": exec.created_at.isoformat() if exec.created_at else None,
400
+ "started_at": exec.started_at.isoformat() if exec.started_at else None,
401
+ "completed_at": exec.completed_at.isoformat() if exec.completed_at else None,
402
+ }
403
+ for exec in executions
404
+ ],
405
+ "total": query.count(),
406
+ "limit": limit,
407
+ "offset": offset,
408
+ }
409
+
410
+
411
+ @router.get("/{execution_id}", response_model=PlanStatusResponse)
412
+ def get_plan_execution(
413
+ execution_id: str,
414
+ db: Session = Depends(get_db),
415
+ ):
416
+ """
417
+ Get plan execution status and progress.
418
+
419
+ Returns current status, completed tasks, and progress information.
420
+ """
421
+ plan_exec = db.query(PlanExecution).filter(
422
+ PlanExecution.execution_id == execution_id
423
+ ).first()
424
+
425
+ if not plan_exec:
426
+ raise HTTPException(status_code=404, detail="Plan execution not found")
427
+
428
+ progress = 0.0
429
+ if plan_exec.total_tasks > 0:
430
+ progress = (plan_exec.completed_tasks / plan_exec.total_tasks) * 100
431
+
432
+ return PlanStatusResponse(
433
+ execution_id=plan_exec.execution_id,
434
+ plan_generation_id=plan_exec.plan_generation_id, # Include link to plan generation
435
+ status=plan_exec.status.value if hasattr(plan_exec.status, 'value') else plan_exec.status,
436
+ plan_title=plan_exec.title,
437
+ total_tasks=plan_exec.total_tasks,
438
+ completed_tasks=plan_exec.completed_tasks,
439
+ failed_tasks=plan_exec.failed_tasks or 0,
440
+ progress_percentage=progress,
441
+ started_at=plan_exec.started_at,
442
+ completed_at=plan_exec.completed_at,
443
+ waiting_tasks=plan_exec.waiting_tasks or [], # Include waiting tasks for continuation
444
+ plan_json=plan_exec.plan_json, # Include full plan for UI rendering
445
+ )
446
+
447
+
448
+ @router.get("/{execution_id}/stream")
449
+ async def stream_plan_execution(
450
+ execution_id: str,
451
+ request: Request,
452
+ last_event_id: Optional[str] = None,
453
+ db: Session = Depends(get_db),
454
+ ):
455
+ """
456
+ Stream plan execution updates using Server-Sent Events (SSE).
457
+
458
+ This endpoint streams real-time events from plan execution including:
459
+ - Plan started/completed
460
+ - Task started/completed/waiting for input
461
+ - Tasks running in parallel
462
+ - Progress updates
463
+
464
+ Uses Redis event bus for real-time updates (200ms polling).
465
+ Supports Last-Event-ID for reconnection and gap recovery.
466
+ """
467
+
468
+ async def generate_sse():
469
+ """Generate Server-Sent Events from Redis"""
470
+ import time
471
+
472
+ # Parse Last-Event-ID for reconnection
473
+ last_known_id = last_event_id or request.headers.get("Last-Event-ID")
474
+ last_counter = 0
475
+
476
+ if last_known_id:
477
+ try:
478
+ parts = last_known_id.split("_")
479
+ if len(parts) >= 2 and parts[0] == execution_id:
480
+ last_counter = int(parts[1])
481
+ logger.info(
482
+ "plan_stream_reconnection",
483
+ execution_id=execution_id[:8],
484
+ last_counter=last_counter
485
+ )
486
+ except (ValueError, IndexError):
487
+ logger.warning("invalid_last_event_id", execution_id=execution_id[:8])
488
+ last_counter = 0
489
+
490
+ # Event ID counter
491
+ event_id_counter = last_counter
492
+
493
+ def generate_event_id() -> str:
494
+ nonlocal event_id_counter
495
+ event_id_counter += 1
496
+ return f"{execution_id}_{event_id_counter}_{int(time.time() * 1000000)}"
497
+
498
+ try:
499
+ # Verify plan execution exists
500
+ plan_exec = db.query(PlanExecution).filter(
501
+ PlanExecution.execution_id == execution_id
502
+ ).first()
503
+
504
+ if not plan_exec:
505
+ raise HTTPException(status_code=404, detail="Plan execution not found")
506
+
507
+ # Get Redis client
508
+ redis_client = get_redis_client()
509
+ if not redis_client:
510
+ logger.error("redis_not_available", execution_id=execution_id[:8])
511
+ raise HTTPException(
512
+ status_code=503,
513
+ detail="Event streaming unavailable (Redis not configured)"
514
+ )
515
+
516
+ # Redis key for plan events
517
+ redis_key = f"plan-execution:{execution_id}:events"
518
+ last_redis_event_index = -1
519
+
520
+ logger.info(
521
+ "plan_stream_started",
522
+ execution_id=execution_id[:8],
523
+ redis_key=redis_key
524
+ )
525
+
526
+ # Polling loop (200ms interval, same as agent worker)
527
+ while True:
528
+ # Check if client disconnected
529
+ if await request.is_disconnected():
530
+ logger.info("plan_stream_client_disconnected", execution_id=execution_id[:8])
531
+ break
532
+
533
+ # Get current events from Redis
534
+ redis_events = await redis_client.lrange(redis_key, 0, -1)
535
+ total_events = len(redis_events)
536
+
537
+ if total_events > (last_redis_event_index + 1):
538
+ # New events available
539
+ chronological_events = list(reversed(redis_events)) # LPUSH stores in reverse
540
+
541
+ # Send only NEW events
542
+ for i in range(last_redis_event_index + 1, len(chronological_events)):
543
+ event_json = chronological_events[i]
544
+
545
+ try:
546
+ event_data = json.loads(event_json)
547
+ event_type = event_data.get("event_type", "unknown")
548
+ payload = event_data.get("data", {})
549
+
550
+ # Generate SSE event
551
+ event_id = generate_event_id()
552
+ yield f"id: {event_id}\n"
553
+ yield f"event: {event_type}\n"
554
+ yield f"data: {json.dumps(payload)}\n\n"
555
+
556
+ last_redis_event_index = i
557
+
558
+ except json.JSONDecodeError:
559
+ logger.warning(
560
+ "malformed_event_skipped",
561
+ execution_id=execution_id[:8],
562
+ index=i
563
+ )
564
+ continue
565
+
566
+ # Check if plan is complete
567
+ if plan_exec.status in [
568
+ PlanExecutionStatus.COMPLETED,
569
+ PlanExecutionStatus.FAILED,
570
+ PlanExecutionStatus.CANCELLED,
571
+ ]:
572
+ # Send final done event
573
+ event_id = generate_event_id()
574
+ yield f"id: {event_id}\n"
575
+ yield f"event: done\n"
576
+ yield f"data: {json.dumps({'status': plan_exec.status})}\n\n"
577
+ logger.info("plan_stream_complete", execution_id=execution_id[:8])
578
+ break
579
+
580
+ # Check if plan is waiting for user input (paused)
581
+ if plan_exec.status == PlanExecutionStatus.PENDING_USER_INPUT:
582
+ # Send waiting_for_user_input event with details
583
+ event_id = generate_event_id()
584
+ waiting_data = {
585
+ "status": "pending_user_input",
586
+ "waiting_tasks": plan_exec.waiting_tasks or [],
587
+ "message": "Plan paused - waiting for user input",
588
+ }
589
+ yield f"id: {event_id}\n"
590
+ yield f"event: plan_waiting_for_input\n"
591
+ yield f"data: {json.dumps(waiting_data)}\n\n"
592
+ logger.info("plan_paused_for_user_input", execution_id=execution_id[:8])
593
+ # Keep streaming (don't break) - stream will continue when user provides input
594
+ # But add a longer sleep to reduce polling when paused
595
+ await asyncio.sleep(2.0) # 2 seconds when paused
596
+ db.refresh(plan_exec)
597
+ continue
598
+
599
+ # Refresh plan exec status from DB
600
+ db.refresh(plan_exec)
601
+
602
+ # Wait 200ms before next poll (same as agent worker)
603
+ await asyncio.sleep(0.2)
604
+
605
+ except HTTPException:
606
+ raise
607
+ except Exception as e:
608
+ logger.error(
609
+ "plan_stream_error",
610
+ execution_id=execution_id[:8],
611
+ error=str(e)
612
+ )
613
+ # Send error event
614
+ event_id = generate_event_id()
615
+ yield f"id: {event_id}\n"
616
+ yield f"event: error\n"
617
+ yield f"data: {json.dumps({'error': str(e)})}\n\n"
618
+
619
+ return StreamingResponse(
620
+ generate_sse(),
621
+ media_type="text/event-stream",
622
+ headers={
623
+ "Cache-Control": "no-cache",
624
+ "X-Accel-Buffering": "no", # Disable nginx buffering
625
+ }
626
+ )
627
+
628
+
629
+ # =============================================================================
630
+ # WebSocket Streaming for Plan Execution
631
+ # =============================================================================
632
+
633
+ class PlanConnectionManager:
634
+ """
635
+ Manages WebSocket connections for plan execution streaming.
636
+
637
+ Similar to ClientConnectionManager in websocket_client.py but specific to plans.
638
+ """
639
+
640
+ def __init__(self):
641
+ # execution_id -> WebSocket
642
+ self._connections: Dict[str, WebSocket] = {}
643
+ # organization_id -> Set[execution_id]
644
+ self._org_connections: Dict[str, set] = {}
645
+ # Statistics
646
+ self._stats = {
647
+ "total_connections": 0,
648
+ "active_connections": 0,
649
+ "messages_sent": 0,
650
+ "errors": 0,
651
+ }
652
+
653
+ async def connect(
654
+ self,
655
+ execution_id: str,
656
+ organization_id: str,
657
+ websocket: WebSocket,
658
+ ) -> None:
659
+ """Register a new plan WebSocket connection."""
660
+ await websocket.accept()
661
+
662
+ self._connections[execution_id] = websocket
663
+
664
+ if organization_id not in self._org_connections:
665
+ self._org_connections[organization_id] = set()
666
+ self._org_connections[organization_id].add(execution_id)
667
+
668
+ self._stats["total_connections"] += 1
669
+ self._stats["active_connections"] = len(self._connections)
670
+
671
+ logger.info(
672
+ "plan_websocket_connected",
673
+ execution_id=execution_id[:8],
674
+ organization_id=organization_id[:8],
675
+ active_connections=self._stats["active_connections"],
676
+ )
677
+
678
+ async def disconnect(self, execution_id: str, organization_id: str) -> None:
679
+ """Unregister a plan WebSocket connection."""
680
+ if execution_id in self._connections:
681
+ del self._connections[execution_id]
682
+
683
+ if organization_id in self._org_connections:
684
+ self._org_connections[organization_id].discard(execution_id)
685
+ if not self._org_connections[organization_id]:
686
+ del self._org_connections[organization_id]
687
+
688
+ self._stats["active_connections"] = len(self._connections)
689
+
690
+ logger.info(
691
+ "plan_websocket_disconnected",
692
+ execution_id=execution_id[:8],
693
+ active_connections=self._stats["active_connections"],
694
+ )
695
+
696
+ def get_stats(self) -> Dict[str, int]:
697
+ """Get connection statistics."""
698
+ return self._stats.copy()
699
+
700
+
701
+ # Global connection manager for plan WebSocket connections
702
+ plan_connection_manager = PlanConnectionManager()
703
+
704
+
705
+ async def send_plan_json(websocket: WebSocket, event_type: str, data: Any) -> None:
706
+ """
707
+ Send JSON message via WebSocket for plan events.
708
+
709
+ Args:
710
+ websocket: WebSocket connection
711
+ event_type: Event type (e.g., 'plan_started', 'task_completed')
712
+ data: Event data payload
713
+ """
714
+ try:
715
+ message = {
716
+ "type": event_type,
717
+ "timestamp": datetime.now(timezone.utc).isoformat(),
718
+ **data
719
+ }
720
+ await websocket.send_text(json.dumps(message))
721
+ plan_connection_manager._stats["messages_sent"] += 1
722
+ logger.debug("plan_websocket_message_sent", event_type=event_type)
723
+ except Exception as e:
724
+ logger.error("failed_to_send_plan_websocket_message", error=str(e), event_type=event_type)
725
+ plan_connection_manager._stats["errors"] += 1
726
+ raise
727
+
728
+
729
+ async def handle_plan_auth(websocket: WebSocket, token: str) -> Optional[str]:
730
+ """
731
+ Handle authentication message for plan WebSocket.
732
+
733
+ Args:
734
+ websocket: WebSocket connection
735
+ token: JWT authentication token
736
+
737
+ Returns:
738
+ organization_id if authentication successful, None otherwise
739
+ """
740
+ try:
741
+ decoded = decode_jwt_token(token)
742
+
743
+ if not decoded:
744
+ logger.error("plan_jwt_decode_failed", reason="Invalid token format")
745
+ await send_plan_json(websocket, "auth_error", {
746
+ "error": "Invalid authentication token",
747
+ "code": "INVALID_TOKEN",
748
+ })
749
+ return None
750
+
751
+ # Extract organization ID from token claims
752
+ organization_id = (
753
+ decoded.get('https://kubiya.ai/org_id') or
754
+ decoded.get('org_id') or
755
+ decoded.get('organization_id') or
756
+ decoded.get('organization') or
757
+ decoded.get('org')
758
+ )
759
+
760
+ if not organization_id:
761
+ logger.error("plan_org_id_missing", decoded_claims=list(decoded.keys()))
762
+ await send_plan_json(websocket, "auth_error", {
763
+ "error": "Organization ID not found in token",
764
+ "code": "ORG_ID_MISSING",
765
+ })
766
+ return None
767
+
768
+ user_id = decoded.get('sub')
769
+
770
+ logger.info(
771
+ "plan_websocket_authenticated",
772
+ organization_id=organization_id[:8] if len(organization_id) > 8 else organization_id,
773
+ user_id=user_id[:8] if user_id and len(user_id) > 8 else user_id,
774
+ )
775
+
776
+ await send_plan_json(websocket, "auth_success", {
777
+ "organization_id": organization_id,
778
+ "user_id": user_id,
779
+ "authenticated_at": datetime.now(timezone.utc).isoformat(),
780
+ })
781
+
782
+ return organization_id
783
+
784
+ except Exception as e:
785
+ logger.error("plan_authentication_failed", error=str(e), error_type=type(e).__name__)
786
+ await send_plan_json(websocket, "auth_error", {
787
+ "error": "Authentication failed",
788
+ "code": "AUTH_FAILED",
789
+ })
790
+ return None
791
+
792
+
793
+ @router.websocket("/ws/{execution_id}")
794
+ async def websocket_plan_execution_stream(
795
+ websocket: WebSocket,
796
+ execution_id: str,
797
+ last_event_id: Optional[str] = None,
798
+ db: Session = Depends(get_db),
799
+ ):
800
+ """
801
+ WebSocket endpoint for plan execution streaming.
802
+
803
+ Streams plan execution events to frontend clients with automatic reconnection support.
804
+ This is the WebSocket equivalent of the SSE endpoint at /{execution_id}/stream.
805
+
806
+ Args:
807
+ websocket: WebSocket connection
808
+ execution_id: Plan execution ID to stream
809
+ last_event_id: Last received event ID (for resumption)
810
+ db: Database session
811
+
812
+ Flow:
813
+ 1. Accept WebSocket connection
814
+ 2. Wait for auth message with JWT token
815
+ 3. Validate token and extract organization_id
816
+ 4. Send 'connected' event
817
+ 5. Stream events from Redis (plan_started, task_completed, etc.)
818
+ 6. Handle ping/pong for keepalive
819
+ 7. Support resumption via last_event_id
820
+
821
+ Event Types:
822
+ - connected: Connection established
823
+ - auth_success: Authentication successful
824
+ - auth_error: Authentication failed
825
+ - plan_started: Plan execution started
826
+ - todo_list_initialized: Task list prepared
827
+ - todo_item_updated: Task status changed
828
+ - task_started: Individual task started
829
+ - task_completed: Individual task completed
830
+ - task_waiting_for_input: Task needs user input
831
+ - done: Plan execution completed
832
+ - error: Error occurred
833
+ """
834
+ organization_id: Optional[str] = None
835
+ authenticated = False
836
+
837
+ try:
838
+ # Accept connection first
839
+ await websocket.accept()
840
+
841
+ # Track connection in pending state
842
+ plan_connection_manager._connections[execution_id] = websocket
843
+ plan_connection_manager._org_connections.setdefault("pending", set()).add(execution_id)
844
+ plan_connection_manager._stats["total_connections"] += 1
845
+ plan_connection_manager._stats["active_connections"] = len(plan_connection_manager._connections)
846
+
847
+ logger.info(
848
+ "plan_websocket_connection_started",
849
+ execution_id=execution_id[:8],
850
+ last_event_id=last_event_id,
851
+ )
852
+
853
+ # Wait for authentication message (timeout after 5 seconds)
854
+ try:
855
+ auth_message = await asyncio.wait_for(websocket.receive_text(), timeout=5.0)
856
+ auth_data = json.loads(auth_message)
857
+
858
+ if auth_data.get("type") == "auth" and "token" in auth_data:
859
+ organization_id = await handle_plan_auth(websocket, auth_data["token"])
860
+ if not organization_id:
861
+ await websocket.close(code=4001, reason="Authentication failed")
862
+ return
863
+
864
+ authenticated = True
865
+
866
+ # Update connection from pending to actual org
867
+ if "pending" in plan_connection_manager._org_connections:
868
+ plan_connection_manager._org_connections["pending"].discard(execution_id)
869
+ if not plan_connection_manager._org_connections["pending"]:
870
+ del plan_connection_manager._org_connections["pending"]
871
+
872
+ plan_connection_manager._org_connections.setdefault(organization_id, set()).add(execution_id)
873
+
874
+ except asyncio.TimeoutError:
875
+ logger.error("plan_auth_timeout", execution_id=execution_id[:8])
876
+ await websocket.close(code=4002, reason="Authentication timeout")
877
+ return
878
+ except json.JSONDecodeError:
879
+ logger.error("plan_invalid_auth_message", execution_id=execution_id[:8])
880
+ await websocket.close(code=4003, reason="Invalid authentication message")
881
+ return
882
+
883
+ # Verify plan execution exists
884
+ plan_exec = db.query(PlanExecution).filter(
885
+ PlanExecution.execution_id == execution_id
886
+ ).first()
887
+
888
+ if not plan_exec:
889
+ await send_plan_json(websocket, "error", {"error": "Plan execution not found"})
890
+ await websocket.close(code=4004, reason="Plan execution not found")
891
+ return
892
+
893
+ # Send connected event
894
+ await send_plan_json(websocket, "connected", {
895
+ "execution_id": execution_id,
896
+ "organization_id": organization_id,
897
+ "plan_title": plan_exec.title,
898
+ "total_tasks": plan_exec.total_tasks,
899
+ "status": plan_exec.status.value if hasattr(plan_exec.status, 'value') else plan_exec.status,
900
+ "connected_at": datetime.now(timezone.utc).isoformat(),
901
+ })
902
+
903
+ # Get Redis client
904
+ redis_client = get_redis_client()
905
+ if not redis_client:
906
+ await send_plan_json(websocket, "error", {"error": "Redis not available"})
907
+ await websocket.close(code=503, reason="Event streaming unavailable")
908
+ return
909
+
910
+ # Redis key for plan events
911
+ redis_key = f"plan-execution:{execution_id}:events"
912
+
913
+ # Parse last_event_id for reconnection
914
+ last_redis_event_index = -1
915
+ if last_event_id:
916
+ try:
917
+ parts = last_event_id.split("_")
918
+ if len(parts) >= 2 and parts[0] == execution_id:
919
+ last_redis_event_index = int(parts[1]) - 1 # Convert to 0-indexed
920
+ logger.info(
921
+ "plan_websocket_reconnection",
922
+ execution_id=execution_id[:8],
923
+ last_index=last_redis_event_index
924
+ )
925
+ except (ValueError, IndexError):
926
+ logger.warning("plan_invalid_last_event_id", execution_id=execution_id[:8])
927
+
928
+ event_id_counter = last_redis_event_index + 1
929
+
930
+ def generate_event_id() -> str:
931
+ nonlocal event_id_counter
932
+ event_id_counter += 1
933
+ return f"{execution_id}_{event_id_counter}_{int(datetime.now(timezone.utc).timestamp() * 1000000)}"
934
+
935
+ logger.info("plan_websocket_streaming_started", execution_id=execution_id[:8])
936
+
937
+ # Listen for both Redis events and client messages
938
+ async def listen_redis():
939
+ """Listen for plan events from Redis."""
940
+ nonlocal last_redis_event_index
941
+
942
+ try:
943
+ while True:
944
+ # Get current events from Redis
945
+ redis_events = await redis_client.lrange(redis_key, 0, -1)
946
+ total_events = len(redis_events)
947
+
948
+ if total_events > (last_redis_event_index + 1):
949
+ # New events available - Redis LPUSH stores in reverse order
950
+ chronological_events = list(reversed(redis_events))
951
+
952
+ # Send only NEW events
953
+ for i in range(last_redis_event_index + 1, len(chronological_events)):
954
+ event_json = chronological_events[i]
955
+
956
+ try:
957
+ event_data = json.loads(event_json)
958
+ event_type = event_data.get("event_type", "unknown")
959
+ payload = event_data.get("data", {})
960
+
961
+ # Add event ID for client tracking
962
+ payload["event_id"] = generate_event_id()
963
+
964
+ await send_plan_json(websocket, event_type, payload)
965
+ last_redis_event_index = i
966
+
967
+ except json.JSONDecodeError:
968
+ logger.warning(
969
+ "plan_malformed_event_skipped",
970
+ execution_id=execution_id[:8],
971
+ index=i
972
+ )
973
+ continue
974
+
975
+ # Refresh plan execution status from DB
976
+ db.refresh(plan_exec)
977
+
978
+ # Check if plan is complete
979
+ if plan_exec.status in [
980
+ PlanExecutionStatus.COMPLETED,
981
+ PlanExecutionStatus.FAILED,
982
+ PlanExecutionStatus.CANCELLED,
983
+ ]:
984
+ # Send final done event
985
+ await send_plan_json(websocket, "done", {
986
+ "event_id": generate_event_id(),
987
+ "status": plan_exec.status.value if hasattr(plan_exec.status, 'value') else plan_exec.status,
988
+ "completed_tasks": plan_exec.completed_tasks,
989
+ "failed_tasks": plan_exec.failed_tasks or 0,
990
+ "total_tasks": plan_exec.total_tasks,
991
+ })
992
+ logger.info("plan_websocket_complete", execution_id=execution_id[:8])
993
+ return
994
+
995
+ # Check if waiting for user input
996
+ if plan_exec.status == PlanExecutionStatus.PENDING_USER_INPUT:
997
+ await send_plan_json(websocket, "plan_waiting_for_input", {
998
+ "event_id": generate_event_id(),
999
+ "status": "pending_user_input",
1000
+ "waiting_tasks": plan_exec.waiting_tasks or [],
1001
+ "message": "Plan paused - waiting for user input",
1002
+ })
1003
+ # Longer sleep when paused
1004
+ await asyncio.sleep(2.0)
1005
+ continue
1006
+
1007
+ # Normal polling interval (200ms)
1008
+ await asyncio.sleep(0.2)
1009
+
1010
+ except Exception as e:
1011
+ logger.error("plan_redis_stream_error", error=str(e), error_type=type(e).__name__)
1012
+ raise
1013
+
1014
+ async def listen_client():
1015
+ """Listen for client messages (ping, etc.)."""
1016
+ try:
1017
+ while True:
1018
+ message = await websocket.receive_text()
1019
+ data = json.loads(message)
1020
+
1021
+ if data.get("type") == "ping":
1022
+ await send_plan_json(websocket, "pong", {
1023
+ "timestamp": int(datetime.now(timezone.utc).timestamp() * 1000)
1024
+ })
1025
+
1026
+ elif data.get("type") == "resume":
1027
+ logger.info(
1028
+ "plan_resume_requested",
1029
+ execution_id=execution_id[:8],
1030
+ last_event_id=data.get("last_event_id")
1031
+ )
1032
+
1033
+ except WebSocketDisconnect:
1034
+ pass
1035
+ except Exception as e:
1036
+ logger.error("plan_client_message_error", error=str(e))
1037
+
1038
+ # Run both listeners concurrently
1039
+ redis_task = asyncio.create_task(listen_redis())
1040
+ client_task = asyncio.create_task(listen_client())
1041
+
1042
+ # Wait for either task to complete
1043
+ done, pending = await asyncio.wait(
1044
+ {redis_task, client_task},
1045
+ return_when=asyncio.FIRST_COMPLETED
1046
+ )
1047
+
1048
+ # Cancel pending tasks
1049
+ for task in pending:
1050
+ task.cancel()
1051
+ try:
1052
+ await task
1053
+ except asyncio.CancelledError:
1054
+ pass
1055
+
1056
+ except WebSocketDisconnect:
1057
+ logger.info("plan_client_disconnected", execution_id=execution_id[:8])
1058
+
1059
+ except Exception as e:
1060
+ logger.error(
1061
+ "plan_websocket_error",
1062
+ execution_id=execution_id[:8],
1063
+ error=str(e),
1064
+ error_type=type(e).__name__,
1065
+ )
1066
+ try:
1067
+ await send_plan_json(websocket, "error", {"error": str(e)})
1068
+ except:
1069
+ pass
1070
+
1071
+ finally:
1072
+ # Cleanup
1073
+ if organization_id:
1074
+ await plan_connection_manager.disconnect(execution_id, organization_id)
1075
+
1076
+ logger.info("plan_websocket_closed", execution_id=execution_id[:8])
1077
+
1078
+
1079
+ @router.get("/ws/stats")
1080
+ async def plan_websocket_stats():
1081
+ """Get plan WebSocket connection statistics."""
1082
+ return plan_connection_manager.get_stats()
1083
+
1084
+
1085
+ @router.post("/events/{execution_id}")
1086
+ async def publish_plan_event(
1087
+ execution_id: str,
1088
+ event: dict,
1089
+ ):
1090
+ """
1091
+ Publish a plan event to Redis (called by Temporal activities).
1092
+
1093
+ This endpoint receives events from plan activities and publishes them to Redis
1094
+ for the streaming endpoint to consume.
1095
+ """
1096
+ try:
1097
+ redis_client = get_redis_client()
1098
+ if not redis_client:
1099
+ logger.warning("redis_not_available", execution_id=execution_id[:8])
1100
+ return {"success": False, "error": "Redis not available"}
1101
+
1102
+ # Event should have: event_type, data, timestamp
1103
+ message_json = json.dumps(event)
1104
+
1105
+ # Redis keys
1106
+ list_key = f"plan-execution:{execution_id}:events"
1107
+ channel = f"plan-execution:{execution_id}:stream"
1108
+
1109
+ # Store in list (for replay)
1110
+ await redis_client.lpush(list_key, message_json)
1111
+ await redis_client.ltrim(list_key, 0, 999)
1112
+ await redis_client.expire(list_key, 3600)
1113
+
1114
+ # Publish to pub/sub (for real-time)
1115
+ await redis_client.publish(channel, message_json)
1116
+
1117
+ logger.debug(
1118
+ "plan_event_published_to_redis",
1119
+ execution_id=execution_id[:8],
1120
+ event_type=event.get("event_type"),
1121
+ )
1122
+
1123
+ return {"success": True}
1124
+
1125
+ except Exception as e:
1126
+ logger.error(
1127
+ "plan_event_publish_error",
1128
+ execution_id=execution_id[:8],
1129
+ error=str(e),
1130
+ )
1131
+ return {"success": False, "error": str(e)}
1132
+
1133
+
1134
+ @router.patch("/{execution_id}")
1135
+ def update_plan_execution(
1136
+ execution_id: str,
1137
+ updates: dict,
1138
+ db: Session = Depends(get_db),
1139
+ ):
1140
+ """
1141
+ Update plan execution state (called by Temporal activities).
1142
+
1143
+ Internal endpoint used by workflows to update plan state.
1144
+ """
1145
+ plan_exec = db.query(PlanExecution).filter(
1146
+ PlanExecution.execution_id == execution_id
1147
+ ).first()
1148
+
1149
+ if not plan_exec:
1150
+ raise HTTPException(status_code=404, detail="Plan execution not found")
1151
+
1152
+ # Update fields
1153
+ for key, value in updates.items():
1154
+ if hasattr(plan_exec, key):
1155
+ setattr(plan_exec, key, value)
1156
+
1157
+ plan_exec.updated_at = datetime.utcnow()
1158
+ db.commit()
1159
+
1160
+ logger.info(
1161
+ "plan_execution_updated",
1162
+ execution_id=execution_id,
1163
+ updates=list(updates.keys()),
1164
+ )
1165
+
1166
+ return {"success": True}
1167
+
1168
+
1169
+ @router.post("/{execution_id}/continue")
1170
+ async def continue_plan_execution(
1171
+ execution_id: str,
1172
+ request: dict,
1173
+ http_request: Request,
1174
+ db: Session = Depends(get_db),
1175
+ ):
1176
+ """
1177
+ Continue a plan execution that's waiting for user input.
1178
+
1179
+ This endpoint:
1180
+ 1. Accepts batch responses for multiple waiting tasks
1181
+ 2. Sends messages to each agent execution
1182
+ 3. Restarts the workflow to continue plan execution
1183
+
1184
+ Body:
1185
+ {
1186
+ "responses": [
1187
+ {
1188
+ "execution_id": "agent-exec-abc-123",
1189
+ "message": "5 + 3"
1190
+ },
1191
+ {
1192
+ "execution_id": "agent-exec-xyz-456",
1193
+ "message": "Calculate for today"
1194
+ }
1195
+ ]
1196
+ }
1197
+ """
1198
+ try:
1199
+ # Extract auth token
1200
+ auth_header = http_request.headers.get("authorization", "")
1201
+ jwt_token = auth_header.replace("Bearer ", "").replace("UserKey ", "") if auth_header else None
1202
+
1203
+ if not jwt_token:
1204
+ raise HTTPException(
1205
+ status_code=status.HTTP_401_UNAUTHORIZED,
1206
+ detail="Authorization token required"
1207
+ )
1208
+
1209
+ # Get responses from request
1210
+ responses = request.get("responses", [])
1211
+ if not responses:
1212
+ raise HTTPException(
1213
+ status_code=status.HTTP_400_BAD_REQUEST,
1214
+ detail="'responses' array is required"
1215
+ )
1216
+
1217
+ # Verify plan execution exists
1218
+ plan_exec = db.query(PlanExecution).filter(
1219
+ PlanExecution.execution_id == execution_id
1220
+ ).first()
1221
+
1222
+ if not plan_exec:
1223
+ raise HTTPException(status_code=404, detail="Plan execution not found")
1224
+
1225
+ if plan_exec.status != "pending_user_input":
1226
+ raise HTTPException(
1227
+ status_code=status.HTTP_400_BAD_REQUEST,
1228
+ detail=f"Plan is not waiting for user input (status: {plan_exec.status})"
1229
+ )
1230
+
1231
+ # Get waiting tasks from plan
1232
+ waiting_tasks = plan_exec.waiting_tasks or []
1233
+ if not waiting_tasks:
1234
+ raise HTTPException(
1235
+ status_code=status.HTTP_400_BAD_REQUEST,
1236
+ detail="No tasks are waiting for user input"
1237
+ )
1238
+
1239
+ # Validate that provided execution_ids exist in waiting tasks
1240
+ waiting_execution_ids = {task["execution_id"] for task in waiting_tasks}
1241
+ provided_execution_ids = {resp["execution_id"] for resp in responses}
1242
+
1243
+ invalid_ids = provided_execution_ids - waiting_execution_ids
1244
+ if invalid_ids:
1245
+ raise HTTPException(
1246
+ status_code=status.HTTP_400_BAD_REQUEST,
1247
+ detail=f"Invalid execution_ids: {invalid_ids}. Not in waiting tasks."
1248
+ )
1249
+
1250
+ # Send messages to each agent execution FIRST (before signaling workflow)
1251
+ # This allows agents to process and complete before workflow resumes
1252
+ import httpx
1253
+ control_plane_url = os.getenv("CONTROL_PLANE_URL", "https://control-plane.kubiya.ai")
1254
+
1255
+ sent_messages = []
1256
+ async with httpx.AsyncClient(timeout=30.0) as client:
1257
+ for response in responses:
1258
+ exec_id = response["execution_id"]
1259
+ message = response["message"]
1260
+
1261
+ # Send message to agent execution
1262
+ try:
1263
+ msg_response = await client.post(
1264
+ f"{control_plane_url}/api/v1/executions/{exec_id}/message",
1265
+ json={"message": message},
1266
+ headers={"Authorization": f"Bearer {jwt_token}"}
1267
+ )
1268
+
1269
+ if msg_response.status_code not in (200, 201, 202):
1270
+ logger.error(
1271
+ "failed_to_send_message_to_execution",
1272
+ execution_id=exec_id,
1273
+ status=msg_response.status_code,
1274
+ error=msg_response.text
1275
+ )
1276
+ raise Exception(f"Failed to send message to {exec_id}: {msg_response.text}")
1277
+
1278
+ sent_messages.append(exec_id)
1279
+ logger.info("message_sent_to_execution", execution_id=exec_id)
1280
+
1281
+ except Exception as e:
1282
+ logger.error("send_message_failed", execution_id=exec_id, error=str(e))
1283
+ raise HTTPException(
1284
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
1285
+ detail=f"Failed to send message to {exec_id}: {str(e)}"
1286
+ )
1287
+
1288
+ # Wait a moment for agents to process messages
1289
+ import asyncio
1290
+ await asyncio.sleep(2)
1291
+
1292
+ # Update plan status back to RUNNING and clear waiting tasks for the ones we responded to
1293
+ remaining_waiting = [
1294
+ task for task in waiting_tasks
1295
+ if task["execution_id"] not in provided_execution_ids
1296
+ ]
1297
+
1298
+ new_status = "pending_user_input" if remaining_waiting else "running"
1299
+
1300
+ plan_exec.status = new_status
1301
+ plan_exec.waiting_tasks = remaining_waiting
1302
+ plan_exec.updated_at = datetime.utcnow()
1303
+ db.commit()
1304
+
1305
+ logger.info(
1306
+ "plan_continuation_initiated",
1307
+ execution_id=execution_id,
1308
+ responses_count=len(responses),
1309
+ remaining_waiting=len(remaining_waiting),
1310
+ new_status=new_status,
1311
+ )
1312
+
1313
+ # Signal the paused workflow to continue (send signal for each task)
1314
+ try:
1315
+ temporal_client = await get_temporal_client()
1316
+ workflow_id = f"plan-{execution_id}"
1317
+ workflow_handle = temporal_client.get_workflow_handle(workflow_id)
1318
+
1319
+ # Map execution_id to task_id
1320
+ task_id_map = {task["execution_id"]: task["task_id"] for task in waiting_tasks}
1321
+
1322
+ signals_sent = []
1323
+ for response in responses:
1324
+ exec_id = response["execution_id"]
1325
+ message = response["message"]
1326
+ task_id = task_id_map.get(exec_id)
1327
+
1328
+ if task_id is not None:
1329
+ # Send signal to workflow to continue this task
1330
+ await workflow_handle.signal(
1331
+ "continue_task_signal",
1332
+ {"task_id": task_id, "user_message": message}
1333
+ )
1334
+ signals_sent.append(task_id)
1335
+ logger.info(
1336
+ "workflow_signal_sent",
1337
+ execution_id=execution_id,
1338
+ task_id=task_id,
1339
+ workflow_id=workflow_id
1340
+ )
1341
+
1342
+ logger.info(
1343
+ "plan_workflow_signaled",
1344
+ execution_id=execution_id,
1345
+ signals_sent=len(signals_sent),
1346
+ task_ids=signals_sent
1347
+ )
1348
+
1349
+ except Exception as e:
1350
+ logger.error("failed_to_signal_workflow", error=str(e), execution_id=execution_id)
1351
+ raise HTTPException(
1352
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
1353
+ detail=f"Failed to signal workflow: {str(e)}"
1354
+ )
1355
+
1356
+ return {
1357
+ "success": True,
1358
+ "execution_id": execution_id,
1359
+ "messages_sent": len(sent_messages),
1360
+ "sent_to_executions": sent_messages,
1361
+ "signals_sent": len(signals_sent),
1362
+ "task_ids": signals_sent,
1363
+ "remaining_waiting_tasks": len(remaining_waiting),
1364
+ "plan_status": new_status,
1365
+ "workflow_signaled": True,
1366
+ }
1367
+
1368
+ except HTTPException:
1369
+ raise
1370
+ except Exception as e:
1371
+ logger.error("continue_plan_error", error=str(e))
1372
+ raise HTTPException(
1373
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
1374
+ detail=f"Failed to continue plan: {str(e)}"
1375
+ )
1376
+
1377
+
1378
+ @router.post("/{execution_id}/tasks/{task_id}/message")
1379
+ async def send_task_message(
1380
+ execution_id: str,
1381
+ task_id: int,
1382
+ request: dict,
1383
+ http_request: Request,
1384
+ db: Session = Depends(get_db),
1385
+ ):
1386
+ """
1387
+ Send a message to a task that's waiting for user input.
1388
+
1389
+ This endpoint:
1390
+ 1. Verifies the plan execution and task exist
1391
+ 2. Signals the Temporal workflow to continue the task with the user's message
1392
+ 3. The workflow will resume the agent execution and continue streaming
1393
+
1394
+ Body:
1395
+ {
1396
+ "message": "user's message to continue the conversation"
1397
+ }
1398
+ """
1399
+ try:
1400
+ # Extract auth token
1401
+ auth_header = http_request.headers.get("authorization", "")
1402
+ jwt_token = auth_header.replace("Bearer ", "").replace("UserKey ", "") if auth_header else None
1403
+
1404
+ if not jwt_token:
1405
+ raise HTTPException(
1406
+ status_code=status.HTTP_401_UNAUTHORIZED,
1407
+ detail="Authorization token required"
1408
+ )
1409
+
1410
+ # Get message from request
1411
+ user_message = request.get("message")
1412
+ if not user_message:
1413
+ raise HTTPException(
1414
+ status_code=status.HTTP_400_BAD_REQUEST,
1415
+ detail="'message' field is required"
1416
+ )
1417
+
1418
+ # Verify plan execution exists
1419
+ plan_exec = db.query(PlanExecution).filter(
1420
+ PlanExecution.execution_id == execution_id
1421
+ ).first()
1422
+
1423
+ if not plan_exec:
1424
+ raise HTTPException(status_code=404, detail="Plan execution not found")
1425
+
1426
+ # Verify task exists in plan
1427
+ plan_data = plan_exec.plan_json.get("plan", plan_exec.plan_json)
1428
+ if plan_data.get("team_breakdown"):
1429
+ tasks = plan_data["team_breakdown"][0].get("tasks", [])
1430
+ task = next((t for t in tasks if t.get("id") == task_id), None)
1431
+
1432
+ if not task:
1433
+ raise HTTPException(status_code=404, detail=f"Task {task_id} not found in plan")
1434
+
1435
+ # Signal the Temporal workflow
1436
+ try:
1437
+ temporal_client = await get_temporal_client()
1438
+ workflow_id = f"plan-{execution_id}"
1439
+
1440
+ # Send signal to workflow
1441
+ workflow_handle = temporal_client.get_workflow_handle(workflow_id)
1442
+ await workflow_handle.signal(
1443
+ "continue_task_signal",
1444
+ {"task_id": task_id, "user_message": user_message}
1445
+ )
1446
+
1447
+ logger.info(
1448
+ "task_continuation_signal_sent",
1449
+ execution_id=execution_id,
1450
+ task_id=task_id,
1451
+ workflow_id=workflow_id,
1452
+ )
1453
+
1454
+ return {
1455
+ "success": True,
1456
+ "execution_id": execution_id,
1457
+ "task_id": task_id,
1458
+ "message_sent": True,
1459
+ "workflow_signaled": True,
1460
+ }
1461
+
1462
+ except Exception as e:
1463
+ logger.error(
1464
+ "failed_to_signal_workflow",
1465
+ error=str(e),
1466
+ execution_id=execution_id,
1467
+ task_id=task_id,
1468
+ )
1469
+ raise HTTPException(
1470
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
1471
+ detail=f"Failed to signal workflow: {str(e)}"
1472
+ )
1473
+
1474
+ except HTTPException:
1475
+ raise
1476
+ except Exception as e:
1477
+ logger.error("send_task_message_error", error=str(e))
1478
+ raise HTTPException(
1479
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
1480
+ detail=f"Failed to send message: {str(e)}"
1481
+ )