kubiya-control-plane-api 0.9.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (479) hide show
  1. control_plane_api/LICENSE +676 -0
  2. control_plane_api/README.md +350 -0
  3. control_plane_api/__init__.py +4 -0
  4. control_plane_api/__version__.py +8 -0
  5. control_plane_api/alembic/README +1 -0
  6. control_plane_api/alembic/env.py +121 -0
  7. control_plane_api/alembic/script.py.mako +28 -0
  8. control_plane_api/alembic/versions/2613c65c3dbe_initial_database_setup.py +32 -0
  9. control_plane_api/alembic/versions/2df520d4927d_merge_heads.py +28 -0
  10. control_plane_api/alembic/versions/43abf98d6a01_add_paused_status_to_executions.py +73 -0
  11. control_plane_api/alembic/versions/6289854264cb_merge_multiple_heads.py +28 -0
  12. control_plane_api/alembic/versions/6a4d4dc3d8dc_generate_execution_transitions.py +50 -0
  13. control_plane_api/alembic/versions/87d11cf0a783_add_disconnected_status_to_worker_.py +44 -0
  14. control_plane_api/alembic/versions/add_ephemeral_queue_support.py +85 -0
  15. control_plane_api/alembic/versions/add_model_type_to_llm_models.py +31 -0
  16. control_plane_api/alembic/versions/add_plan_executions_table.py +114 -0
  17. control_plane_api/alembic/versions/add_trace_span_tables.py +154 -0
  18. control_plane_api/alembic/versions/add_user_info_to_traces.py +36 -0
  19. control_plane_api/alembic/versions/adjusting_foreign_keys.py +32 -0
  20. control_plane_api/alembic/versions/b4983d976db2_initial_tables.py +1128 -0
  21. control_plane_api/alembic/versions/d181a3b40e71_rename_custom_metadata_to_metadata_in_.py +50 -0
  22. control_plane_api/alembic/versions/df9117888e82_add_missing_columns.py +82 -0
  23. control_plane_api/alembic/versions/f25de6ad895a_missing_migrations.py +34 -0
  24. control_plane_api/alembic/versions/f71305fb69b9_fix_ephemeral_queue_deletion_foreign_key.py +54 -0
  25. control_plane_api/alembic/versions/mark_local_exec_queues_as_ephemeral.py +68 -0
  26. control_plane_api/alembic.ini +148 -0
  27. control_plane_api/api/index.py +12 -0
  28. control_plane_api/app/__init__.py +11 -0
  29. control_plane_api/app/activities/__init__.py +20 -0
  30. control_plane_api/app/activities/agent_activities.py +384 -0
  31. control_plane_api/app/activities/plan_generation_activities.py +499 -0
  32. control_plane_api/app/activities/team_activities.py +424 -0
  33. control_plane_api/app/activities/temporal_cloud_activities.py +588 -0
  34. control_plane_api/app/config/__init__.py +35 -0
  35. control_plane_api/app/config/api_config.py +469 -0
  36. control_plane_api/app/config/config_loader.py +224 -0
  37. control_plane_api/app/config/model_pricing.py +323 -0
  38. control_plane_api/app/config/storage_config.py +159 -0
  39. control_plane_api/app/config.py +115 -0
  40. control_plane_api/app/controllers/__init__.py +0 -0
  41. control_plane_api/app/controllers/execution_environment_controller.py +1315 -0
  42. control_plane_api/app/database.py +135 -0
  43. control_plane_api/app/exceptions.py +408 -0
  44. control_plane_api/app/lib/__init__.py +11 -0
  45. control_plane_api/app/lib/environment.py +65 -0
  46. control_plane_api/app/lib/event_bus/__init__.py +17 -0
  47. control_plane_api/app/lib/event_bus/base.py +136 -0
  48. control_plane_api/app/lib/event_bus/manager.py +335 -0
  49. control_plane_api/app/lib/event_bus/providers/__init__.py +6 -0
  50. control_plane_api/app/lib/event_bus/providers/http_provider.py +166 -0
  51. control_plane_api/app/lib/event_bus/providers/nats_provider.py +324 -0
  52. control_plane_api/app/lib/event_bus/providers/redis_provider.py +233 -0
  53. control_plane_api/app/lib/event_bus/providers/websocket_provider.py +497 -0
  54. control_plane_api/app/lib/job_executor.py +330 -0
  55. control_plane_api/app/lib/kubiya_client.py +293 -0
  56. control_plane_api/app/lib/litellm_pricing.py +166 -0
  57. control_plane_api/app/lib/mcp_validation.py +163 -0
  58. control_plane_api/app/lib/nats/__init__.py +13 -0
  59. control_plane_api/app/lib/nats/credentials_manager.py +288 -0
  60. control_plane_api/app/lib/nats/listener.py +374 -0
  61. control_plane_api/app/lib/planning_prompt_builder.py +153 -0
  62. control_plane_api/app/lib/planning_tools/__init__.py +41 -0
  63. control_plane_api/app/lib/planning_tools/agents.py +409 -0
  64. control_plane_api/app/lib/planning_tools/agno_toolkit.py +836 -0
  65. control_plane_api/app/lib/planning_tools/base.py +119 -0
  66. control_plane_api/app/lib/planning_tools/cognitive_memory_tools.py +403 -0
  67. control_plane_api/app/lib/planning_tools/context_graph_tools.py +545 -0
  68. control_plane_api/app/lib/planning_tools/environments.py +218 -0
  69. control_plane_api/app/lib/planning_tools/knowledge.py +204 -0
  70. control_plane_api/app/lib/planning_tools/models.py +93 -0
  71. control_plane_api/app/lib/planning_tools/planning_service.py +646 -0
  72. control_plane_api/app/lib/planning_tools/resources.py +242 -0
  73. control_plane_api/app/lib/planning_tools/teams.py +334 -0
  74. control_plane_api/app/lib/policy_enforcer_client.py +1016 -0
  75. control_plane_api/app/lib/redis_client.py +803 -0
  76. control_plane_api/app/lib/sqlalchemy_utils.py +486 -0
  77. control_plane_api/app/lib/state_transition_tools/__init__.py +7 -0
  78. control_plane_api/app/lib/state_transition_tools/execution_context.py +388 -0
  79. control_plane_api/app/lib/storage/__init__.py +20 -0
  80. control_plane_api/app/lib/storage/base_provider.py +274 -0
  81. control_plane_api/app/lib/storage/provider_factory.py +157 -0
  82. control_plane_api/app/lib/storage/vercel_blob_provider.py +468 -0
  83. control_plane_api/app/lib/supabase.py +71 -0
  84. control_plane_api/app/lib/supabase_utils.py +138 -0
  85. control_plane_api/app/lib/task_planning/__init__.py +138 -0
  86. control_plane_api/app/lib/task_planning/agent_factory.py +308 -0
  87. control_plane_api/app/lib/task_planning/agents.py +389 -0
  88. control_plane_api/app/lib/task_planning/cache.py +218 -0
  89. control_plane_api/app/lib/task_planning/entity_resolver.py +273 -0
  90. control_plane_api/app/lib/task_planning/helpers.py +293 -0
  91. control_plane_api/app/lib/task_planning/hooks.py +474 -0
  92. control_plane_api/app/lib/task_planning/models.py +503 -0
  93. control_plane_api/app/lib/task_planning/plan_validator.py +166 -0
  94. control_plane_api/app/lib/task_planning/planning_workflow.py +2911 -0
  95. control_plane_api/app/lib/task_planning/runner.py +656 -0
  96. control_plane_api/app/lib/task_planning/streaming_hook.py +213 -0
  97. control_plane_api/app/lib/task_planning/workflow.py +424 -0
  98. control_plane_api/app/lib/templating/__init__.py +88 -0
  99. control_plane_api/app/lib/templating/compiler.py +278 -0
  100. control_plane_api/app/lib/templating/engine.py +178 -0
  101. control_plane_api/app/lib/templating/parsers/__init__.py +29 -0
  102. control_plane_api/app/lib/templating/parsers/base.py +96 -0
  103. control_plane_api/app/lib/templating/parsers/env.py +85 -0
  104. control_plane_api/app/lib/templating/parsers/graph.py +112 -0
  105. control_plane_api/app/lib/templating/parsers/secret.py +87 -0
  106. control_plane_api/app/lib/templating/parsers/simple.py +81 -0
  107. control_plane_api/app/lib/templating/resolver.py +366 -0
  108. control_plane_api/app/lib/templating/types.py +214 -0
  109. control_plane_api/app/lib/templating/validator.py +201 -0
  110. control_plane_api/app/lib/temporal_client.py +232 -0
  111. control_plane_api/app/lib/temporal_credentials_cache.py +178 -0
  112. control_plane_api/app/lib/temporal_credentials_service.py +203 -0
  113. control_plane_api/app/lib/validation/__init__.py +24 -0
  114. control_plane_api/app/lib/validation/runtime_validation.py +388 -0
  115. control_plane_api/app/main.py +531 -0
  116. control_plane_api/app/middleware/__init__.py +10 -0
  117. control_plane_api/app/middleware/auth.py +645 -0
  118. control_plane_api/app/middleware/exception_handler.py +267 -0
  119. control_plane_api/app/middleware/prometheus_middleware.py +173 -0
  120. control_plane_api/app/middleware/rate_limiting.py +384 -0
  121. control_plane_api/app/middleware/request_id.py +202 -0
  122. control_plane_api/app/models/__init__.py +40 -0
  123. control_plane_api/app/models/agent.py +90 -0
  124. control_plane_api/app/models/analytics.py +206 -0
  125. control_plane_api/app/models/associations.py +107 -0
  126. control_plane_api/app/models/auth_user.py +73 -0
  127. control_plane_api/app/models/context.py +161 -0
  128. control_plane_api/app/models/custom_integration.py +99 -0
  129. control_plane_api/app/models/environment.py +64 -0
  130. control_plane_api/app/models/execution.py +125 -0
  131. control_plane_api/app/models/execution_transition.py +50 -0
  132. control_plane_api/app/models/job.py +159 -0
  133. control_plane_api/app/models/llm_model.py +78 -0
  134. control_plane_api/app/models/orchestration.py +66 -0
  135. control_plane_api/app/models/plan_execution.py +102 -0
  136. control_plane_api/app/models/presence.py +49 -0
  137. control_plane_api/app/models/project.py +61 -0
  138. control_plane_api/app/models/project_management.py +85 -0
  139. control_plane_api/app/models/session.py +29 -0
  140. control_plane_api/app/models/skill.py +155 -0
  141. control_plane_api/app/models/system_tables.py +43 -0
  142. control_plane_api/app/models/task_planning.py +372 -0
  143. control_plane_api/app/models/team.py +86 -0
  144. control_plane_api/app/models/trace.py +257 -0
  145. control_plane_api/app/models/user_profile.py +54 -0
  146. control_plane_api/app/models/worker.py +221 -0
  147. control_plane_api/app/models/workflow.py +161 -0
  148. control_plane_api/app/models/workspace.py +50 -0
  149. control_plane_api/app/observability/__init__.py +177 -0
  150. control_plane_api/app/observability/context_logging.py +475 -0
  151. control_plane_api/app/observability/decorators.py +337 -0
  152. control_plane_api/app/observability/local_span_processor.py +702 -0
  153. control_plane_api/app/observability/metrics.py +303 -0
  154. control_plane_api/app/observability/middleware.py +246 -0
  155. control_plane_api/app/observability/optional.py +115 -0
  156. control_plane_api/app/observability/tracing.py +382 -0
  157. control_plane_api/app/policies/README.md +149 -0
  158. control_plane_api/app/policies/approved_users.rego +62 -0
  159. control_plane_api/app/policies/business_hours.rego +51 -0
  160. control_plane_api/app/policies/rate_limiting.rego +100 -0
  161. control_plane_api/app/policies/tool_enforcement/README.md +336 -0
  162. control_plane_api/app/policies/tool_enforcement/bash_command_validation.rego +71 -0
  163. control_plane_api/app/policies/tool_enforcement/business_hours_enforcement.rego +82 -0
  164. control_plane_api/app/policies/tool_enforcement/mcp_tool_allowlist.rego +58 -0
  165. control_plane_api/app/policies/tool_enforcement/production_safeguards.rego +80 -0
  166. control_plane_api/app/policies/tool_enforcement/role_based_tool_access.rego +44 -0
  167. control_plane_api/app/policies/tool_restrictions.rego +86 -0
  168. control_plane_api/app/routers/__init__.py +4 -0
  169. control_plane_api/app/routers/agents.py +382 -0
  170. control_plane_api/app/routers/agents_v2.py +1598 -0
  171. control_plane_api/app/routers/analytics.py +1310 -0
  172. control_plane_api/app/routers/auth.py +59 -0
  173. control_plane_api/app/routers/client_config.py +57 -0
  174. control_plane_api/app/routers/context_graph.py +561 -0
  175. control_plane_api/app/routers/context_manager.py +577 -0
  176. control_plane_api/app/routers/custom_integrations.py +490 -0
  177. control_plane_api/app/routers/enforcer.py +132 -0
  178. control_plane_api/app/routers/environment_context.py +252 -0
  179. control_plane_api/app/routers/environments.py +761 -0
  180. control_plane_api/app/routers/execution_environment.py +847 -0
  181. control_plane_api/app/routers/executions/__init__.py +28 -0
  182. control_plane_api/app/routers/executions/router.py +286 -0
  183. control_plane_api/app/routers/executions/services/__init__.py +22 -0
  184. control_plane_api/app/routers/executions/services/demo_worker_health.py +156 -0
  185. control_plane_api/app/routers/executions/services/status_service.py +420 -0
  186. control_plane_api/app/routers/executions/services/test_worker_health.py +480 -0
  187. control_plane_api/app/routers/executions/services/worker_health.py +514 -0
  188. control_plane_api/app/routers/executions/streaming/__init__.py +22 -0
  189. control_plane_api/app/routers/executions/streaming/deduplication.py +352 -0
  190. control_plane_api/app/routers/executions/streaming/event_buffer.py +353 -0
  191. control_plane_api/app/routers/executions/streaming/event_formatter.py +964 -0
  192. control_plane_api/app/routers/executions/streaming/history_loader.py +588 -0
  193. control_plane_api/app/routers/executions/streaming/live_source.py +693 -0
  194. control_plane_api/app/routers/executions/streaming/streamer.py +849 -0
  195. control_plane_api/app/routers/executions.py +4888 -0
  196. control_plane_api/app/routers/health.py +165 -0
  197. control_plane_api/app/routers/health_v2.py +394 -0
  198. control_plane_api/app/routers/integration_templates.py +496 -0
  199. control_plane_api/app/routers/integrations.py +287 -0
  200. control_plane_api/app/routers/jobs.py +1809 -0
  201. control_plane_api/app/routers/metrics.py +517 -0
  202. control_plane_api/app/routers/models.py +82 -0
  203. control_plane_api/app/routers/models_v2.py +628 -0
  204. control_plane_api/app/routers/plan_executions.py +1481 -0
  205. control_plane_api/app/routers/plan_generation_async.py +304 -0
  206. control_plane_api/app/routers/policies.py +669 -0
  207. control_plane_api/app/routers/presence.py +234 -0
  208. control_plane_api/app/routers/projects.py +987 -0
  209. control_plane_api/app/routers/runners.py +379 -0
  210. control_plane_api/app/routers/runtimes.py +172 -0
  211. control_plane_api/app/routers/secrets.py +171 -0
  212. control_plane_api/app/routers/skills.py +1010 -0
  213. control_plane_api/app/routers/skills_definitions.py +140 -0
  214. control_plane_api/app/routers/storage.py +456 -0
  215. control_plane_api/app/routers/task_planning.py +611 -0
  216. control_plane_api/app/routers/task_queues.py +650 -0
  217. control_plane_api/app/routers/team_context.py +274 -0
  218. control_plane_api/app/routers/teams.py +1747 -0
  219. control_plane_api/app/routers/templates.py +248 -0
  220. control_plane_api/app/routers/traces.py +571 -0
  221. control_plane_api/app/routers/websocket_client.py +479 -0
  222. control_plane_api/app/routers/websocket_executions_status.py +437 -0
  223. control_plane_api/app/routers/websocket_gateway.py +323 -0
  224. control_plane_api/app/routers/websocket_traces.py +576 -0
  225. control_plane_api/app/routers/worker_queues.py +2555 -0
  226. control_plane_api/app/routers/worker_websocket.py +419 -0
  227. control_plane_api/app/routers/workers.py +1004 -0
  228. control_plane_api/app/routers/workflows.py +204 -0
  229. control_plane_api/app/runtimes/__init__.py +6 -0
  230. control_plane_api/app/runtimes/validation.py +344 -0
  231. control_plane_api/app/schemas/__init__.py +1 -0
  232. control_plane_api/app/schemas/job_schemas.py +302 -0
  233. control_plane_api/app/schemas/mcp_schemas.py +311 -0
  234. control_plane_api/app/schemas/template_schemas.py +133 -0
  235. control_plane_api/app/schemas/trace_schemas.py +168 -0
  236. control_plane_api/app/schemas/worker_queue_observability_schemas.py +165 -0
  237. control_plane_api/app/services/__init__.py +1 -0
  238. control_plane_api/app/services/agno_planning_strategy.py +233 -0
  239. control_plane_api/app/services/agno_service.py +838 -0
  240. control_plane_api/app/services/claude_code_planning_service.py +203 -0
  241. control_plane_api/app/services/context_graph_client.py +224 -0
  242. control_plane_api/app/services/custom_integration_service.py +415 -0
  243. control_plane_api/app/services/integration_resolution_service.py +345 -0
  244. control_plane_api/app/services/litellm_service.py +394 -0
  245. control_plane_api/app/services/plan_generator.py +79 -0
  246. control_plane_api/app/services/planning_strategy.py +66 -0
  247. control_plane_api/app/services/planning_strategy_factory.py +118 -0
  248. control_plane_api/app/services/policy_service.py +615 -0
  249. control_plane_api/app/services/state_transition_service.py +755 -0
  250. control_plane_api/app/services/storage_service.py +593 -0
  251. control_plane_api/app/services/temporal_cloud_provisioning.py +150 -0
  252. control_plane_api/app/services/toolsets/context_graph_skill.py +432 -0
  253. control_plane_api/app/services/trace_retention.py +354 -0
  254. control_plane_api/app/services/worker_queue_metrics_service.py +190 -0
  255. control_plane_api/app/services/workflow_cancellation_manager.py +135 -0
  256. control_plane_api/app/services/workflow_operations_service.py +611 -0
  257. control_plane_api/app/skills/__init__.py +100 -0
  258. control_plane_api/app/skills/base.py +239 -0
  259. control_plane_api/app/skills/builtin/__init__.py +37 -0
  260. control_plane_api/app/skills/builtin/agent_communication/__init__.py +8 -0
  261. control_plane_api/app/skills/builtin/agent_communication/skill.py +246 -0
  262. control_plane_api/app/skills/builtin/code_ingestion/__init__.py +4 -0
  263. control_plane_api/app/skills/builtin/code_ingestion/skill.py +267 -0
  264. control_plane_api/app/skills/builtin/cognitive_memory/__init__.py +4 -0
  265. control_plane_api/app/skills/builtin/cognitive_memory/skill.py +174 -0
  266. control_plane_api/app/skills/builtin/contextual_awareness/__init__.py +4 -0
  267. control_plane_api/app/skills/builtin/contextual_awareness/skill.py +387 -0
  268. control_plane_api/app/skills/builtin/data_visualization/__init__.py +4 -0
  269. control_plane_api/app/skills/builtin/data_visualization/skill.py +154 -0
  270. control_plane_api/app/skills/builtin/docker/__init__.py +4 -0
  271. control_plane_api/app/skills/builtin/docker/skill.py +104 -0
  272. control_plane_api/app/skills/builtin/file_generation/__init__.py +4 -0
  273. control_plane_api/app/skills/builtin/file_generation/skill.py +94 -0
  274. control_plane_api/app/skills/builtin/file_system/__init__.py +4 -0
  275. control_plane_api/app/skills/builtin/file_system/skill.py +110 -0
  276. control_plane_api/app/skills/builtin/knowledge_api/__init__.py +5 -0
  277. control_plane_api/app/skills/builtin/knowledge_api/skill.py +124 -0
  278. control_plane_api/app/skills/builtin/python/__init__.py +4 -0
  279. control_plane_api/app/skills/builtin/python/skill.py +92 -0
  280. control_plane_api/app/skills/builtin/remote_filesystem/__init__.py +5 -0
  281. control_plane_api/app/skills/builtin/remote_filesystem/skill.py +170 -0
  282. control_plane_api/app/skills/builtin/shell/__init__.py +4 -0
  283. control_plane_api/app/skills/builtin/shell/skill.py +161 -0
  284. control_plane_api/app/skills/builtin/slack/__init__.py +3 -0
  285. control_plane_api/app/skills/builtin/slack/skill.py +302 -0
  286. control_plane_api/app/skills/builtin/workflow_executor/__init__.py +4 -0
  287. control_plane_api/app/skills/builtin/workflow_executor/skill.py +469 -0
  288. control_plane_api/app/skills/business_intelligence.py +189 -0
  289. control_plane_api/app/skills/config.py +63 -0
  290. control_plane_api/app/skills/loaders/__init__.py +14 -0
  291. control_plane_api/app/skills/loaders/base.py +73 -0
  292. control_plane_api/app/skills/loaders/filesystem_loader.py +199 -0
  293. control_plane_api/app/skills/registry.py +125 -0
  294. control_plane_api/app/utils/helpers.py +12 -0
  295. control_plane_api/app/utils/workflow_executor.py +354 -0
  296. control_plane_api/app/workflows/__init__.py +11 -0
  297. control_plane_api/app/workflows/agent_execution.py +520 -0
  298. control_plane_api/app/workflows/agent_execution_with_skills.py +223 -0
  299. control_plane_api/app/workflows/namespace_provisioning.py +326 -0
  300. control_plane_api/app/workflows/plan_generation.py +254 -0
  301. control_plane_api/app/workflows/team_execution.py +442 -0
  302. control_plane_api/scripts/seed_models.py +240 -0
  303. control_plane_api/scripts/validate_existing_tool_names.py +492 -0
  304. control_plane_api/shared/__init__.py +8 -0
  305. control_plane_api/shared/version.py +17 -0
  306. control_plane_api/test_deduplication.py +274 -0
  307. control_plane_api/test_executor_deduplication_e2e.py +309 -0
  308. control_plane_api/test_job_execution_e2e.py +283 -0
  309. control_plane_api/test_real_integration.py +193 -0
  310. control_plane_api/version.py +38 -0
  311. control_plane_api/worker/__init__.py +0 -0
  312. control_plane_api/worker/activities/__init__.py +0 -0
  313. control_plane_api/worker/activities/agent_activities.py +1585 -0
  314. control_plane_api/worker/activities/approval_activities.py +234 -0
  315. control_plane_api/worker/activities/job_activities.py +199 -0
  316. control_plane_api/worker/activities/runtime_activities.py +1167 -0
  317. control_plane_api/worker/activities/skill_activities.py +282 -0
  318. control_plane_api/worker/activities/team_activities.py +479 -0
  319. control_plane_api/worker/agent_runtime_server.py +370 -0
  320. control_plane_api/worker/binary_manager.py +333 -0
  321. control_plane_api/worker/config/__init__.py +31 -0
  322. control_plane_api/worker/config/worker_config.py +273 -0
  323. control_plane_api/worker/control_plane_client.py +1491 -0
  324. control_plane_api/worker/examples/analytics_integration_example.py +362 -0
  325. control_plane_api/worker/health_monitor.py +159 -0
  326. control_plane_api/worker/metrics.py +237 -0
  327. control_plane_api/worker/models/__init__.py +1 -0
  328. control_plane_api/worker/models/error_events.py +105 -0
  329. control_plane_api/worker/models/inputs.py +89 -0
  330. control_plane_api/worker/runtimes/__init__.py +35 -0
  331. control_plane_api/worker/runtimes/agent_runtime/runtime.py +485 -0
  332. control_plane_api/worker/runtimes/agno/__init__.py +34 -0
  333. control_plane_api/worker/runtimes/agno/config.py +248 -0
  334. control_plane_api/worker/runtimes/agno/hooks.py +385 -0
  335. control_plane_api/worker/runtimes/agno/mcp_builder.py +195 -0
  336. control_plane_api/worker/runtimes/agno/runtime.py +1063 -0
  337. control_plane_api/worker/runtimes/agno/utils.py +163 -0
  338. control_plane_api/worker/runtimes/base.py +979 -0
  339. control_plane_api/worker/runtimes/claude_code/__init__.py +38 -0
  340. control_plane_api/worker/runtimes/claude_code/cleanup.py +184 -0
  341. control_plane_api/worker/runtimes/claude_code/client_pool.py +529 -0
  342. control_plane_api/worker/runtimes/claude_code/config.py +829 -0
  343. control_plane_api/worker/runtimes/claude_code/hooks.py +482 -0
  344. control_plane_api/worker/runtimes/claude_code/litellm_proxy.py +1702 -0
  345. control_plane_api/worker/runtimes/claude_code/mcp_builder.py +467 -0
  346. control_plane_api/worker/runtimes/claude_code/mcp_discovery.py +558 -0
  347. control_plane_api/worker/runtimes/claude_code/runtime.py +1546 -0
  348. control_plane_api/worker/runtimes/claude_code/tool_mapper.py +403 -0
  349. control_plane_api/worker/runtimes/claude_code/utils.py +149 -0
  350. control_plane_api/worker/runtimes/factory.py +173 -0
  351. control_plane_api/worker/runtimes/model_utils.py +107 -0
  352. control_plane_api/worker/runtimes/validation.py +93 -0
  353. control_plane_api/worker/services/__init__.py +1 -0
  354. control_plane_api/worker/services/agent_communication_tools.py +908 -0
  355. control_plane_api/worker/services/agent_executor.py +485 -0
  356. control_plane_api/worker/services/agent_executor_v2.py +793 -0
  357. control_plane_api/worker/services/analytics_collector.py +457 -0
  358. control_plane_api/worker/services/analytics_service.py +464 -0
  359. control_plane_api/worker/services/approval_tools.py +310 -0
  360. control_plane_api/worker/services/approval_tools_agno.py +207 -0
  361. control_plane_api/worker/services/cancellation_manager.py +177 -0
  362. control_plane_api/worker/services/code_ingestion_tools.py +465 -0
  363. control_plane_api/worker/services/contextual_awareness_tools.py +405 -0
  364. control_plane_api/worker/services/data_visualization.py +834 -0
  365. control_plane_api/worker/services/event_publisher.py +531 -0
  366. control_plane_api/worker/services/jira_tools.py +257 -0
  367. control_plane_api/worker/services/remote_filesystem_tools.py +498 -0
  368. control_plane_api/worker/services/runtime_analytics.py +328 -0
  369. control_plane_api/worker/services/session_service.py +365 -0
  370. control_plane_api/worker/services/skill_context_enhancement.py +181 -0
  371. control_plane_api/worker/services/skill_factory.py +471 -0
  372. control_plane_api/worker/services/system_prompt_enhancement.py +410 -0
  373. control_plane_api/worker/services/team_executor.py +715 -0
  374. control_plane_api/worker/services/team_executor_v2.py +1866 -0
  375. control_plane_api/worker/services/tool_enforcement.py +254 -0
  376. control_plane_api/worker/services/workflow_executor/__init__.py +52 -0
  377. control_plane_api/worker/services/workflow_executor/event_processor.py +287 -0
  378. control_plane_api/worker/services/workflow_executor/event_publisher.py +210 -0
  379. control_plane_api/worker/services/workflow_executor/executors/__init__.py +15 -0
  380. control_plane_api/worker/services/workflow_executor/executors/base.py +270 -0
  381. control_plane_api/worker/services/workflow_executor/executors/json_executor.py +50 -0
  382. control_plane_api/worker/services/workflow_executor/executors/python_executor.py +50 -0
  383. control_plane_api/worker/services/workflow_executor/models.py +142 -0
  384. control_plane_api/worker/services/workflow_executor_tools.py +1748 -0
  385. control_plane_api/worker/skills/__init__.py +12 -0
  386. control_plane_api/worker/skills/builtin/context_graph_search/README.md +213 -0
  387. control_plane_api/worker/skills/builtin/context_graph_search/__init__.py +5 -0
  388. control_plane_api/worker/skills/builtin/context_graph_search/agno_impl.py +808 -0
  389. control_plane_api/worker/skills/builtin/context_graph_search/skill.yaml +67 -0
  390. control_plane_api/worker/skills/builtin/contextual_awareness/__init__.py +4 -0
  391. control_plane_api/worker/skills/builtin/contextual_awareness/agno_impl.py +62 -0
  392. control_plane_api/worker/skills/builtin/data_visualization/agno_impl.py +18 -0
  393. control_plane_api/worker/skills/builtin/data_visualization/skill.yaml +84 -0
  394. control_plane_api/worker/skills/builtin/docker/agno_impl.py +65 -0
  395. control_plane_api/worker/skills/builtin/docker/skill.yaml +60 -0
  396. control_plane_api/worker/skills/builtin/file_generation/agno_impl.py +47 -0
  397. control_plane_api/worker/skills/builtin/file_generation/skill.yaml +64 -0
  398. control_plane_api/worker/skills/builtin/file_system/agno_impl.py +32 -0
  399. control_plane_api/worker/skills/builtin/file_system/skill.yaml +54 -0
  400. control_plane_api/worker/skills/builtin/knowledge_api/__init__.py +4 -0
  401. control_plane_api/worker/skills/builtin/knowledge_api/agno_impl.py +50 -0
  402. control_plane_api/worker/skills/builtin/knowledge_api/skill.yaml +66 -0
  403. control_plane_api/worker/skills/builtin/python/agno_impl.py +25 -0
  404. control_plane_api/worker/skills/builtin/python/skill.yaml +60 -0
  405. control_plane_api/worker/skills/builtin/schema_fix_mixin.py +260 -0
  406. control_plane_api/worker/skills/builtin/shell/agno_impl.py +31 -0
  407. control_plane_api/worker/skills/builtin/shell/skill.yaml +60 -0
  408. control_plane_api/worker/skills/builtin/slack/__init__.py +3 -0
  409. control_plane_api/worker/skills/builtin/slack/agno_impl.py +1282 -0
  410. control_plane_api/worker/skills/builtin/slack/skill.yaml +276 -0
  411. control_plane_api/worker/skills/builtin/workflow_executor/agno_impl.py +62 -0
  412. control_plane_api/worker/skills/builtin/workflow_executor/skill.yaml +79 -0
  413. control_plane_api/worker/skills/loaders/__init__.py +5 -0
  414. control_plane_api/worker/skills/loaders/base.py +23 -0
  415. control_plane_api/worker/skills/loaders/filesystem_loader.py +357 -0
  416. control_plane_api/worker/skills/registry.py +208 -0
  417. control_plane_api/worker/tests/__init__.py +1 -0
  418. control_plane_api/worker/tests/conftest.py +12 -0
  419. control_plane_api/worker/tests/e2e/__init__.py +0 -0
  420. control_plane_api/worker/tests/e2e/test_context_graph_real_api.py +338 -0
  421. control_plane_api/worker/tests/e2e/test_context_graph_templates_e2e.py +523 -0
  422. control_plane_api/worker/tests/e2e/test_enforcement_e2e.py +344 -0
  423. control_plane_api/worker/tests/e2e/test_execution_flow.py +571 -0
  424. control_plane_api/worker/tests/e2e/test_single_execution_mode.py +656 -0
  425. control_plane_api/worker/tests/integration/__init__.py +0 -0
  426. control_plane_api/worker/tests/integration/test_builtin_skills_fixes.py +245 -0
  427. control_plane_api/worker/tests/integration/test_context_graph_search_integration.py +365 -0
  428. control_plane_api/worker/tests/integration/test_control_plane_integration.py +308 -0
  429. control_plane_api/worker/tests/integration/test_hook_enforcement_integration.py +579 -0
  430. control_plane_api/worker/tests/integration/test_scheduled_job_workflow.py +237 -0
  431. control_plane_api/worker/tests/integration/test_system_prompt_enhancement_integration.py +343 -0
  432. control_plane_api/worker/tests/unit/__init__.py +0 -0
  433. control_plane_api/worker/tests/unit/test_builtin_skill_autoload.py +396 -0
  434. control_plane_api/worker/tests/unit/test_context_graph_search.py +450 -0
  435. control_plane_api/worker/tests/unit/test_context_graph_templates.py +403 -0
  436. control_plane_api/worker/tests/unit/test_control_plane_client.py +401 -0
  437. control_plane_api/worker/tests/unit/test_control_plane_client_jobs.py +345 -0
  438. control_plane_api/worker/tests/unit/test_job_activities.py +353 -0
  439. control_plane_api/worker/tests/unit/test_skill_context_enhancement.py +321 -0
  440. control_plane_api/worker/tests/unit/test_system_prompt_enhancement.py +415 -0
  441. control_plane_api/worker/tests/unit/test_tool_enforcement.py +324 -0
  442. control_plane_api/worker/utils/__init__.py +1 -0
  443. control_plane_api/worker/utils/chunk_batcher.py +330 -0
  444. control_plane_api/worker/utils/environment.py +65 -0
  445. control_plane_api/worker/utils/error_publisher.py +260 -0
  446. control_plane_api/worker/utils/event_batcher.py +256 -0
  447. control_plane_api/worker/utils/logging_config.py +335 -0
  448. control_plane_api/worker/utils/logging_helper.py +326 -0
  449. control_plane_api/worker/utils/parameter_validator.py +120 -0
  450. control_plane_api/worker/utils/retry_utils.py +60 -0
  451. control_plane_api/worker/utils/streaming_utils.py +665 -0
  452. control_plane_api/worker/utils/tool_validation.py +332 -0
  453. control_plane_api/worker/utils/workspace_manager.py +163 -0
  454. control_plane_api/worker/websocket_client.py +393 -0
  455. control_plane_api/worker/worker.py +1297 -0
  456. control_plane_api/worker/workflows/__init__.py +0 -0
  457. control_plane_api/worker/workflows/agent_execution.py +909 -0
  458. control_plane_api/worker/workflows/scheduled_job_wrapper.py +332 -0
  459. control_plane_api/worker/workflows/team_execution.py +611 -0
  460. kubiya_control_plane_api-0.9.15.dist-info/METADATA +354 -0
  461. kubiya_control_plane_api-0.9.15.dist-info/RECORD +479 -0
  462. kubiya_control_plane_api-0.9.15.dist-info/WHEEL +5 -0
  463. kubiya_control_plane_api-0.9.15.dist-info/entry_points.txt +5 -0
  464. kubiya_control_plane_api-0.9.15.dist-info/licenses/LICENSE +676 -0
  465. kubiya_control_plane_api-0.9.15.dist-info/top_level.txt +3 -0
  466. scripts/__init__.py +1 -0
  467. scripts/migrations.py +39 -0
  468. scripts/seed_worker_queues.py +128 -0
  469. scripts/setup_agent_runtime.py +142 -0
  470. worker_internal/__init__.py +1 -0
  471. worker_internal/planner/__init__.py +1 -0
  472. worker_internal/planner/activities.py +1499 -0
  473. worker_internal/planner/agent_tools.py +197 -0
  474. worker_internal/planner/event_models.py +148 -0
  475. worker_internal/planner/event_publisher.py +67 -0
  476. worker_internal/planner/models.py +199 -0
  477. worker_internal/planner/retry_logic.py +134 -0
  478. worker_internal/planner/worker.py +300 -0
  479. worker_internal/planner/workflows.py +970 -0
@@ -0,0 +1,849 @@
1
+ """
2
+ ExecutionStreamer - Main orchestrator for resumable execution streaming.
3
+
4
+ This module provides the ExecutionStreamer class that orchestrates the complete
5
+ streaming lifecycle: immediate connection, historical message loading, and live
6
+ event streaming with gap recovery support.
7
+
8
+ Architecture:
9
+ This is the core component of the Resumable Execution Stream Architecture that
10
+ ties together all the specialized streaming components:
11
+
12
+ 1. Phase 1: Immediate Connection (<50ms)
13
+ - Send 'connected' event immediately to unblock EventSource
14
+ - Don't wait for any slow operations (DB, Temporal queries)
15
+
16
+ 2. Phase 2: Stream Historical Messages
17
+ - Use HistoryLoader to progressively stream database messages
18
+ - Yield one message at a time for instant UI rendering
19
+ - Track sent messages via MessageDeduplicator
20
+
21
+ 3. Phase 3: History Complete
22
+ - Send 'history_complete' event to signal transition
23
+ - Include message count and truncation flags
24
+
25
+ 4. Phase 4: Live Event Streaming
26
+ - Use LiveEventSource to stream real-time Redis events
27
+ - Poll at 200ms intervals for new events
28
+ - Continue until workflow completes or timeout
29
+
30
+ Gap Recovery:
31
+ - Supports Last-Event-ID pattern for client reconnection
32
+ - Uses EventBuffer to detect and handle gaps
33
+ - Replays missing events when possible
34
+ - Notifies client when gaps are unrecoverable
35
+
36
+ Test Strategy:
37
+ - Integration test full streaming flow (all 4 phases in order)
38
+ - Test phase transitions occur at correct times with correct data
39
+ - Test Last-Event-ID resumption skips already-sent events
40
+ - Test gap detection and replay from EventBuffer
41
+ - Test error handling in each phase (graceful degradation)
42
+ - Test statistics tracking across phases
43
+ - Test timeout handling (0 = no timeout, streams until task completes)
44
+ - Test workflow completion detection stops streaming
45
+ - Test deduplication across history + live phases
46
+ """
47
+
48
+ import asyncio
49
+ import logging
50
+ import time
51
+ from typing import Any, AsyncGenerator, Dict, List, Optional
52
+
53
+ from structlog import get_logger
54
+
55
+ from .deduplication import MessageDeduplicator
56
+ from .event_buffer import EventBuffer
57
+ from .event_formatter import EventFormatter
58
+ from .history_loader import HistoryLoader
59
+ from .live_source import LiveEventSource
60
+ from ..services.worker_health import WorkerHealthChecker, DegradationMode
61
+
62
+ logger = get_logger(__name__)
63
+
64
+
65
+ class ExecutionStreamer:
66
+ """
67
+ Main orchestrator for resumable execution streaming.
68
+
69
+ This class coordinates all phases of execution streaming:
70
+ 1. Immediate connection acknowledgment
71
+ 2. Progressive historical message streaming
72
+ 3. History completion notification
73
+ 4. Live event streaming with completion detection
74
+
75
+ The streamer supports gap recovery via Last-Event-ID pattern, enabling
76
+ clients to reconnect and resume from their last received event without
77
+ missing any updates.
78
+
79
+ Example usage:
80
+ ```python
81
+ streamer = ExecutionStreamer(
82
+ execution_id="exec-123",
83
+ organization_id="org-456",
84
+ db_session=db,
85
+ redis_client=redis,
86
+ temporal_client=temporal_client,
87
+ last_event_id="exec-123_42_1234567890", # Optional, for resumption
88
+ timeout_seconds=0, # 0 = no timeout
89
+ )
90
+
91
+ async for sse_event in streamer.stream():
92
+ # Send SSE event to client
93
+ yield sse_event
94
+ ```
95
+
96
+ Architecture:
97
+ - Delegates to specialized components for each concern
98
+ - Maintains single deduplicator instance shared across phases
99
+ - Uses EventBuffer for gap detection and replay
100
+ - Uses EventFormatter for consistent SSE formatting
101
+ """
102
+
103
+ def __init__(
104
+ self,
105
+ execution_id: str,
106
+ organization_id: str,
107
+ db_session, # SQLAlchemy session
108
+ redis_client, # Redis client (UpstashRedisClient or StandardRedisClient)
109
+ temporal_client, # temporalio.client.Client
110
+ last_event_id: Optional[str] = None,
111
+ timeout_seconds: int = 0, # 0 = no timeout, stream until task completes
112
+ execution_type: Optional[str] = None,
113
+ health_checker: Optional[WorkerHealthChecker] = None,
114
+ ):
115
+ """
116
+ Initialize ExecutionStreamer.
117
+
118
+ Args:
119
+ execution_id: Execution ID to stream
120
+ organization_id: Organization ID for authorization
121
+ db_session: SQLAlchemy database session for HistoryLoader
122
+ redis_client: Redis client for LiveEventSource (can be None)
123
+ temporal_client: Temporal client for workflow queries (can be None)
124
+ last_event_id: Last event ID client received (for resumption)
125
+ timeout_seconds: Maximum streaming duration (default: 0 = no timeout)
126
+ execution_type: Execution type ("AGENT" or "TEAM") to determine workflow_id
127
+ health_checker: WorkerHealthChecker instance for graceful degradation (optional)
128
+ """
129
+ self.execution_id = execution_id
130
+ self.organization_id = organization_id
131
+ self.db_session = db_session
132
+ self.redis_client = redis_client
133
+ self.temporal_client = temporal_client
134
+ self.last_event_id = last_event_id
135
+ self.timeout_seconds = timeout_seconds
136
+ self.execution_type = execution_type or "AGENT"
137
+
138
+ # Determine workflow ID based on execution type
139
+ if self.execution_type == "TEAM":
140
+ self.workflow_id = f"team-execution-{execution_id}"
141
+ else:
142
+ self.workflow_id = f"agent-execution-{execution_id}"
143
+
144
+ # Core components (initialized once, reused across phases)
145
+ self.deduplicator = MessageDeduplicator()
146
+ self.formatter = EventFormatter(execution_id)
147
+ self.buffer = EventBuffer(execution_id)
148
+
149
+ # Health checker for graceful degradation
150
+ self.health_checker = health_checker or WorkerHealthChecker(
151
+ temporal_client=temporal_client,
152
+ redis_client=redis_client,
153
+ db_session=db_session,
154
+ )
155
+
156
+ # Temporal workflow handle (cached)
157
+ self._workflow_handle = None
158
+ self._workflow_handle_error = None
159
+
160
+ # Degradation tracking
161
+ self._degradation_mode = None
162
+ self._last_health_check = None
163
+
164
+ # Message tracking for done event fallback
165
+ self._streamed_messages = []
166
+
167
+ # Statistics tracking
168
+ self._stats = {
169
+ "phase": "initializing",
170
+ "start_time": None,
171
+ "connection_time_ms": 0,
172
+ "history_load_time_ms": 0,
173
+ "live_streaming_time_ms": 0,
174
+ "total_events_sent": 0,
175
+ "history_messages_sent": 0,
176
+ "live_events_sent": 0,
177
+ "events_buffered": 0,
178
+ "events_replayed": 0,
179
+ "deduplication_stats": {},
180
+ "errors": [],
181
+ "degradation_mode": None,
182
+ }
183
+
184
+ logger.info(
185
+ "execution_streamer_initialized",
186
+ execution_id=execution_id[:8],
187
+ organization_id=organization_id[:8],
188
+ workflow_id=self.workflow_id,
189
+ has_last_event_id=bool(last_event_id),
190
+ timeout_seconds=timeout_seconds,
191
+ )
192
+
193
+ async def stream(self) -> AsyncGenerator[str, None]:
194
+ """
195
+ Main streaming generator that orchestrates all phases.
196
+
197
+ This method executes the complete streaming lifecycle:
198
+ 1. Send immediate 'connected' event
199
+ 2. Handle Last-Event-ID resumption (replay or gap detection)
200
+ 3. Stream historical messages from database
201
+ 4. Send 'history_complete' event
202
+ 5. Stream live events from Redis until completion
203
+
204
+ Yields:
205
+ SSE-formatted event strings ready to send to client
206
+
207
+ Example:
208
+ ```python
209
+ async for sse_event in streamer.stream():
210
+ # sse_event is already formatted: "id: ...\nevent: ...\ndata: ...\n\n"
211
+ yield sse_event
212
+ ```
213
+ """
214
+ self._stats["start_time"] = time.time()
215
+
216
+ try:
217
+ # ========== PHASE 1: IMMEDIATE CONNECTION ==========
218
+ yield await self._phase_1_connect()
219
+
220
+ # ========== HEALTH CHECK: Determine degradation mode ==========
221
+ degradation_mode = await self.health_checker.get_degradation_mode()
222
+ self._degradation_mode = degradation_mode
223
+ self._last_health_check = time.time()
224
+ self._stats["degradation_mode"] = degradation_mode.value
225
+
226
+ logger.info(
227
+ "health_check_complete",
228
+ execution_id=self.execution_id[:8],
229
+ degradation_mode=degradation_mode.value,
230
+ )
231
+
232
+ # Send degraded event if not in full mode
233
+ if degradation_mode != DegradationMode.FULL:
234
+ capabilities = self.health_checker.get_capabilities(degradation_mode)
235
+
236
+ # Determine reason message based on mode
237
+ if degradation_mode == DegradationMode.UNAVAILABLE:
238
+ reason = "All services unavailable"
239
+ message = "Unable to stream execution data - all services are down"
240
+ elif degradation_mode == DegradationMode.HISTORY_ONLY:
241
+ reason = "Live streaming unavailable"
242
+ message = "Real-time updates unavailable. Showing historical data only."
243
+ elif degradation_mode == DegradationMode.LIVE_ONLY:
244
+ reason = "Historical data unavailable"
245
+ message = "Database unavailable. Showing live updates only (no history)."
246
+ else:
247
+ reason = "Partial service availability"
248
+ message = "Some services unavailable. Functionality may be limited."
249
+
250
+ yield self.formatter.format_degraded_event(
251
+ mode=degradation_mode.value,
252
+ reason=reason,
253
+ message=message,
254
+ capabilities=capabilities,
255
+ )
256
+ self._stats["total_events_sent"] += 1
257
+
258
+ # If completely unavailable, stop here
259
+ if degradation_mode == DegradationMode.UNAVAILABLE:
260
+ yield self.formatter.format_error_event(
261
+ error="All services unavailable - cannot stream execution data",
262
+ error_type="unavailable",
263
+ )
264
+ return
265
+
266
+ # ========== PHASE 2: RESUMPTION (if Last-Event-ID provided) ==========
267
+ if self.last_event_id:
268
+ async for event in self._phase_2_resumption():
269
+ yield event
270
+
271
+ # ========== PHASE 3: STREAM HISTORICAL MESSAGES ==========
272
+ # Skip history if in LIVE_ONLY mode
273
+ if degradation_mode not in [DegradationMode.LIVE_ONLY]:
274
+ async for event in self._phase_3_history(degradation_mode):
275
+ yield event
276
+
277
+ # ========== PHASE 4: HISTORY COMPLETE ==========
278
+ # Only send if we attempted history loading
279
+ if degradation_mode not in [DegradationMode.LIVE_ONLY]:
280
+ yield await self._phase_4_history_complete()
281
+
282
+ # ========== PHASE 5: LIVE STREAMING ==========
283
+ # Skip live if in HISTORY_ONLY mode
284
+ if degradation_mode not in [DegradationMode.HISTORY_ONLY]:
285
+ async for event in self._phase_5_live_streaming(degradation_mode):
286
+ yield event
287
+
288
+ # ========== PHASE 6: SEND DONE EVENT ==========
289
+ # Send 'done' event to signal stream completion to CLI/clients
290
+ # This is critical for clients that wait for a terminal event
291
+ # IMPORTANT: Include messages array as fallback for completed executions
292
+ # where frontend may not have received message events properly
293
+ yield self.formatter.format_done_event(
294
+ response=None, # Response is in the messages already
295
+ workflow_status="completed",
296
+ messages=self._streamed_messages if self._streamed_messages else None,
297
+ )
298
+ self._stats["total_events_sent"] += 1
299
+
300
+ logger.info(
301
+ "phase_6_done_event_sent",
302
+ execution_id=self.execution_id[:8],
303
+ messages_included=len(self._streamed_messages),
304
+ )
305
+
306
+ except Exception as e:
307
+ # Log critical error
308
+ logger.error(
309
+ "streaming_orchestration_error",
310
+ execution_id=self.execution_id[:8],
311
+ phase=self._stats["phase"],
312
+ error=str(e),
313
+ error_type=type(e).__name__,
314
+ )
315
+ self._stats["errors"].append({
316
+ "phase": self._stats["phase"],
317
+ "error": str(e),
318
+ "error_type": type(e).__name__,
319
+ })
320
+
321
+ # Send error event to client
322
+ yield self.formatter.format_error_event(
323
+ error=str(e),
324
+ error_type="streaming_error",
325
+ )
326
+ finally:
327
+ # Log final statistics
328
+ elapsed = time.time() - self._stats["start_time"]
329
+ self._stats["total_duration_ms"] = int(elapsed * 1000)
330
+ self._stats["deduplication_stats"] = self.deduplicator.get_stats()
331
+
332
+ logger.info(
333
+ "execution_streaming_complete",
334
+ execution_id=self.execution_id[:8],
335
+ stats=self._stats,
336
+ )
337
+
338
+ async def _phase_1_connect(self) -> str:
339
+ """
340
+ Phase 1: Send immediate 'connected' event (<50ms).
341
+
342
+ This event is sent first to unblock the EventSource connection before
343
+ any slow operations (Temporal queries, DB lookups) are performed.
344
+
345
+ The client receives this event instantly, allowing the UI to show
346
+ "connecting..." state while we load data in the background.
347
+
348
+ Returns:
349
+ SSE-formatted 'connected' event string
350
+ """
351
+ t0 = time.time()
352
+ self._stats["phase"] = "connecting"
353
+
354
+ logger.info(
355
+ "phase_1_connecting",
356
+ execution_id=self.execution_id[:8],
357
+ )
358
+
359
+ # Send connected event with minimal data (no DB/Temporal queries)
360
+ event = self.formatter.format_connected_event(
361
+ organization_id=self.organization_id,
362
+ status="pending", # Default status, will be updated later
363
+ )
364
+
365
+ self._stats["connection_time_ms"] = int((time.time() - t0) * 1000)
366
+ self._stats["total_events_sent"] += 1
367
+
368
+ logger.info(
369
+ "phase_1_connected",
370
+ execution_id=self.execution_id[:8],
371
+ duration_ms=self._stats["connection_time_ms"],
372
+ )
373
+
374
+ return event
375
+
376
+ async def _phase_2_resumption(self) -> AsyncGenerator[str, None]:
377
+ """
378
+ Phase 2: Handle Last-Event-ID resumption (gap detection and replay).
379
+
380
+ If the client provided a Last-Event-ID, we need to:
381
+ 1. Check if we have buffered events after that ID
382
+ 2. If yes, replay them
383
+ 3. If no, check for gaps and notify client
384
+
385
+ This phase is skipped if no Last-Event-ID was provided (new connection).
386
+
387
+ Yields:
388
+ SSE-formatted events for replay or gap notification
389
+ """
390
+ if not self.last_event_id:
391
+ return
392
+
393
+ t0 = time.time()
394
+ self._stats["phase"] = "resumption"
395
+
396
+ logger.info(
397
+ "phase_2_resumption_start",
398
+ execution_id=self.execution_id[:8],
399
+ last_event_id=self.last_event_id,
400
+ )
401
+
402
+ try:
403
+ # Check for buffered events to replay
404
+ replay_events = self.buffer.replay_from_id(self.last_event_id)
405
+
406
+ if replay_events:
407
+ # Replay buffered events
408
+ logger.info(
409
+ "replaying_buffered_events",
410
+ execution_id=self.execution_id[:8],
411
+ replay_count=len(replay_events),
412
+ )
413
+
414
+ for event_id, event_type, data_json in replay_events:
415
+ # Use existing event ID (don't regenerate)
416
+ yield self.formatter.format_event(
417
+ event_type=event_type,
418
+ data={"replay": True}, # Placeholder, actual data in data_json
419
+ event_id=event_id,
420
+ )
421
+ self._stats["events_replayed"] += 1
422
+ self._stats["total_events_sent"] += 1
423
+
424
+ else:
425
+ # Check if last_event_id is too old (buffer miss)
426
+ buffer_miss = self.buffer.check_buffer_miss(self.last_event_id)
427
+
428
+ if buffer_miss:
429
+ # Gap detected - notify client
430
+ logger.warning(
431
+ "gap_detected_notifying_client",
432
+ execution_id=self.execution_id[:8],
433
+ buffer_miss=buffer_miss,
434
+ )
435
+
436
+ yield self.formatter.format_gap_detected_event(
437
+ reason=buffer_miss.get("reason", "Unknown gap"),
438
+ buffer_oldest=buffer_miss.get("buffer_oldest"),
439
+ )
440
+ self._stats["total_events_sent"] += 1
441
+
442
+ except Exception as e:
443
+ logger.error(
444
+ "phase_2_resumption_error",
445
+ execution_id=self.execution_id[:8],
446
+ error=str(e),
447
+ )
448
+ self._stats["errors"].append({
449
+ "phase": "resumption",
450
+ "error": str(e),
451
+ })
452
+
453
+ # Continue to history load despite error
454
+ # Client will receive full history instead of incremental replay
455
+
456
+ async def _phase_3_history(self, degradation_mode: DegradationMode) -> AsyncGenerator[str, None]:
457
+ """
458
+ Phase 3: Stream historical messages from database.
459
+
460
+ This phase progressively streams messages from the database using
461
+ HistoryLoader. Messages are yielded one at a time for instant UI
462
+ rendering without waiting for the entire history to load.
463
+
464
+ The HistoryLoader handles:
465
+ - Database query with Temporal fallback
466
+ - Message sorting and limiting (last 200)
467
+ - Deduplication via shared deduplicator
468
+ - Empty message filtering
469
+
470
+ Args:
471
+ degradation_mode: Current degradation mode for adaptive behavior
472
+
473
+ Yields:
474
+ SSE-formatted 'message' events for each historical message
475
+ """
476
+ t0 = time.time()
477
+ self._stats["phase"] = "history_loading"
478
+
479
+ logger.info(
480
+ "phase_3_history_start",
481
+ execution_id=self.execution_id[:8],
482
+ degradation_mode=degradation_mode.value,
483
+ )
484
+
485
+ try:
486
+ # Create history loader with shared deduplicator
487
+ history_loader = HistoryLoader(
488
+ execution_id=self.execution_id,
489
+ organization_id=self.organization_id,
490
+ db_session=self.db_session,
491
+ temporal_client=self.temporal_client,
492
+ deduplicator=self.deduplicator,
493
+ workflow_id=self.workflow_id,
494
+ )
495
+
496
+ # Stream messages progressively
497
+ message_count = 0
498
+ async for message in history_loader.stream():
499
+ # Track message for done event fallback
500
+ self._streamed_messages.append(message)
501
+
502
+ # Format as SSE event
503
+ event = self.formatter.format_message_event(message)
504
+
505
+ # Buffer event for gap recovery
506
+ event_id = self.formatter.generate_event_id()
507
+ self.buffer.add_event(
508
+ event_id=event_id,
509
+ event_type="message",
510
+ data=str(message), # Convert to JSON string for buffering
511
+ )
512
+ self._stats["events_buffered"] += 1
513
+
514
+ yield event
515
+ message_count += 1
516
+ self._stats["total_events_sent"] += 1
517
+ self._stats["history_messages_sent"] += 1
518
+
519
+ # Get history loader stats
520
+ history_stats = history_loader.get_stats()
521
+ self._stats["history_load_time_ms"] = int((time.time() - t0) * 1000)
522
+
523
+ logger.info(
524
+ "phase_3_history_complete",
525
+ execution_id=self.execution_id[:8],
526
+ message_count=message_count,
527
+ duration_ms=self._stats["history_load_time_ms"],
528
+ history_stats=history_stats,
529
+ )
530
+
531
+ except Exception as e:
532
+ logger.error(
533
+ "phase_3_history_error",
534
+ execution_id=self.execution_id[:8],
535
+ error=str(e),
536
+ error_type=type(e).__name__,
537
+ )
538
+ self._stats["errors"].append({
539
+ "phase": "history_loading",
540
+ "error": str(e),
541
+ "error_type": type(e).__name__,
542
+ })
543
+
544
+ # Send degraded event to notify client
545
+ yield self.formatter.format_degraded_event(
546
+ mode="history_unavailable",
547
+ reason="Failed to load message history",
548
+ message=f"Database query failed: {str(e)[:100]}",
549
+ capabilities=["live_events"], # Can still serve live if Redis available
550
+ )
551
+ self._stats["total_events_sent"] += 1
552
+
553
+ # Continue to live streaming despite history failure
554
+
555
+ async def _phase_4_history_complete(self) -> str:
556
+ """
557
+ Phase 4: Send 'history_complete' event.
558
+
559
+ This event signals to the client that all historical messages have
560
+ been loaded and the stream is transitioning to live event mode.
561
+
562
+ The client can use this to:
563
+ - Stop showing loading spinners
564
+ - Switch to real-time update mode
565
+ - Update UI to indicate "connected" state
566
+
567
+ Returns:
568
+ SSE-formatted 'history_complete' event string
569
+ """
570
+ self._stats["phase"] = "history_complete"
571
+
572
+ logger.info(
573
+ "phase_4_history_complete",
574
+ execution_id=self.execution_id[:8],
575
+ message_count=self._stats["history_messages_sent"],
576
+ )
577
+
578
+ event = self.formatter.format_history_complete_event(
579
+ message_count=self._stats["history_messages_sent"],
580
+ is_truncated=False, # HistoryLoader handles truncation internally
581
+ has_more=False,
582
+ )
583
+
584
+ self._stats["total_events_sent"] += 1
585
+
586
+ return event
587
+
588
+ async def _phase_5_live_streaming(self, degradation_mode: DegradationMode) -> AsyncGenerator[str, None]:
589
+ """
590
+ Phase 5: Stream live events from Redis until workflow completes.
591
+
592
+ This phase uses LiveEventSource to poll Redis for new events at
593
+ 200ms intervals. Events are deduplicated against history and yielded
594
+ as they arrive.
595
+
596
+ The streaming continues until:
597
+ - Workflow reaches terminal state (COMPLETED, FAILED, CANCELLED)
598
+ - Timeout is reached (default: 0 = no timeout, streams until complete)
599
+ - Client disconnects
600
+ - Critical error occurs
601
+
602
+ Includes periodic recovery checks every 30 seconds to detect when
603
+ services come back online.
604
+
605
+ Args:
606
+ degradation_mode: Current degradation mode for adaptive behavior
607
+
608
+ Yields:
609
+ SSE-formatted events for live updates, keepalives, status changes, etc.
610
+ """
611
+ t0 = time.time()
612
+ self._stats["phase"] = "live_streaming"
613
+
614
+ # Constants for recovery monitoring
615
+ HEALTH_CHECK_INTERVAL = 30 # seconds
616
+
617
+ logger.info(
618
+ "phase_5_live_streaming_start",
619
+ execution_id=self.execution_id[:8],
620
+ timeout_seconds=self.timeout_seconds,
621
+ degradation_mode=degradation_mode.value,
622
+ )
623
+
624
+ try:
625
+ # Get or create workflow handle
626
+ workflow_handle = await self._get_workflow_handle()
627
+
628
+ # Create live event source with shared deduplicator
629
+ live_source = LiveEventSource(
630
+ execution_id=self.execution_id,
631
+ organization_id=self.organization_id,
632
+ redis_client=self.redis_client,
633
+ workflow_handle=workflow_handle,
634
+ deduplicator=self.deduplicator,
635
+ timeout_seconds=self.timeout_seconds,
636
+ keepalive_interval=15,
637
+ db_session=self.db_session, # Pass database session for status polling
638
+ )
639
+
640
+ # Stream live events
641
+ event_count = 0
642
+ async for event in live_source.stream():
643
+ # Check for service recovery every 30 seconds
644
+ if time.time() - self._last_health_check > HEALTH_CHECK_INTERVAL:
645
+ new_mode = await self.health_checker.get_degradation_mode()
646
+ self._last_health_check = time.time()
647
+
648
+ # If services recovered to FULL from degraded mode
649
+ if new_mode == DegradationMode.FULL and self._degradation_mode != DegradationMode.FULL:
650
+ logger.info(
651
+ "services_recovered",
652
+ execution_id=self.execution_id[:8],
653
+ old_mode=self._degradation_mode.value,
654
+ new_mode=new_mode.value,
655
+ )
656
+
657
+ # Notify client of recovery
658
+ recovery_event = self.formatter.format_recovered_event(
659
+ message="Services recovered, resuming full functionality"
660
+ )
661
+ yield recovery_event
662
+ self._stats["total_events_sent"] += 1
663
+
664
+ # Update tracking
665
+ self._degradation_mode = new_mode
666
+ self._stats["degradation_mode"] = new_mode.value
667
+
668
+ # Event is already a dict with event_type and data
669
+ event_type = event.get("event_type", "message")
670
+
671
+ # Format based on event type
672
+ if event_type == "message":
673
+ sse_event = self.formatter.format_message_event(event)
674
+ elif event_type == "status":
675
+ sse_event = self.formatter.format_status_event(
676
+ status=event.get("status", "unknown"),
677
+ metadata=event.get("data", {}),
678
+ )
679
+ elif event_type == "tool_started":
680
+ sse_event = self.formatter.format_tool_started_event(event)
681
+ elif event_type == "tool_completed":
682
+ sse_event = self.formatter.format_tool_completed_event(event)
683
+ elif event_type == "member_tool_started":
684
+ sse_event = self.formatter.format_member_tool_started_event(event)
685
+ elif event_type == "member_tool_completed":
686
+ sse_event = self.formatter.format_member_tool_completed_event(event)
687
+ elif event_type == "message_chunk":
688
+ sse_event = self.formatter.format_message_chunk_event(event)
689
+ elif event_type == "member_message_chunk":
690
+ sse_event = self.formatter.format_member_message_chunk_event(event)
691
+ elif event_type == "member_message_complete":
692
+ sse_event = self.formatter.format_member_message_complete_event(event)
693
+ # Thinking/reasoning event types
694
+ elif event_type == "thinking_start":
695
+ sse_event = self.formatter.format_thinking_start_event(
696
+ message_id=event.get("data", {}).get("message_id", ""),
697
+ index=event.get("data", {}).get("index", 0),
698
+ budget_tokens=event.get("data", {}).get("budget_tokens"),
699
+ )
700
+ elif event_type == "thinking_delta":
701
+ sse_event = self.formatter.format_thinking_delta_event(
702
+ message_id=event.get("data", {}).get("message_id", ""),
703
+ thinking=event.get("data", {}).get("thinking", ""),
704
+ index=event.get("data", {}).get("index", 0),
705
+ )
706
+ elif event_type == "thinking_complete":
707
+ sse_event = self.formatter.format_thinking_complete_event(
708
+ message_id=event.get("data", {}).get("message_id", ""),
709
+ index=event.get("data", {}).get("index", 0),
710
+ signature=event.get("data", {}).get("signature"),
711
+ tokens_used=event.get("data", {}).get("tokens_used"),
712
+ )
713
+ elif event_type == "keepalive":
714
+ sse_event = self.formatter.format_keepalive()
715
+ elif event_type == "degraded":
716
+ # Handle legacy degraded events from LiveEventSource
717
+ sse_event = self.formatter.format_degraded_event(
718
+ mode="degraded",
719
+ reason=event.get("data", {}).get("reason", "unknown"),
720
+ message=event.get("data", {}).get("message", "Degraded mode"),
721
+ )
722
+ else:
723
+ # Generic event
724
+ sse_event = self.formatter.format_event(
725
+ event_type=event_type,
726
+ data=event.get("data", event),
727
+ )
728
+
729
+ # Buffer event for gap recovery (except keepalives)
730
+ if event_type != "keepalive":
731
+ event_id = self.formatter.generate_event_id()
732
+ self.buffer.add_event(
733
+ event_id=event_id,
734
+ event_type=event_type,
735
+ data=str(event), # Convert to JSON string
736
+ )
737
+ self._stats["events_buffered"] += 1
738
+
739
+ yield sse_event
740
+ event_count += 1
741
+ self._stats["total_events_sent"] += 1
742
+ self._stats["live_events_sent"] += 1
743
+
744
+ self._stats["live_streaming_time_ms"] = int((time.time() - t0) * 1000)
745
+
746
+ logger.info(
747
+ "phase_5_live_streaming_complete",
748
+ execution_id=self.execution_id[:8],
749
+ event_count=event_count,
750
+ duration_ms=self._stats["live_streaming_time_ms"],
751
+ )
752
+
753
+ except Exception as e:
754
+ logger.error(
755
+ "phase_5_live_streaming_error",
756
+ execution_id=self.execution_id[:8],
757
+ error=str(e),
758
+ error_type=type(e).__name__,
759
+ )
760
+ self._stats["errors"].append({
761
+ "phase": "live_streaming",
762
+ "error": str(e),
763
+ "error_type": type(e).__name__,
764
+ })
765
+
766
+ # Send degraded event to notify client
767
+ yield self.formatter.format_degraded_event(
768
+ mode="live_events_unavailable",
769
+ reason="Failed to stream live events",
770
+ message=f"Redis streaming failed: {str(e)[:100]}",
771
+ capabilities=["history"], # At least we served history
772
+ )
773
+ self._stats["total_events_sent"] += 1
774
+
775
+ # Don't crash - we already served history if it was available
776
+
777
+ async def _get_workflow_handle(self):
778
+ """
779
+ Get Temporal workflow handle with caching and error handling.
780
+
781
+ This method attempts to get a workflow handle from Temporal with a
782
+ 2-second timeout to fail fast when worker is down.
783
+
784
+ Returns:
785
+ Temporal workflow handle or None if unavailable
786
+
787
+ Note:
788
+ The workflow handle is cached after first successful retrieval.
789
+ If retrieval fails, None is cached and logged (graceful degradation).
790
+ """
791
+ if self._workflow_handle is not None:
792
+ return self._workflow_handle
793
+
794
+ if self._workflow_handle_error is not None:
795
+ # Already failed to get handle, don't retry
796
+ return None
797
+
798
+ if not self.temporal_client:
799
+ logger.warning(
800
+ "no_temporal_client_available",
801
+ execution_id=self.execution_id[:8],
802
+ )
803
+ self._workflow_handle_error = "No Temporal client"
804
+ return None
805
+
806
+ try:
807
+ # Try to get workflow handle with 2-second timeout
808
+ self._workflow_handle = self.temporal_client.get_workflow_handle(
809
+ self.workflow_id
810
+ )
811
+
812
+ logger.info(
813
+ "workflow_handle_obtained",
814
+ execution_id=self.execution_id[:8],
815
+ workflow_id=self.workflow_id,
816
+ )
817
+
818
+ return self._workflow_handle
819
+
820
+ except Exception as e:
821
+ logger.warning(
822
+ "failed_to_get_workflow_handle",
823
+ execution_id=self.execution_id[:8],
824
+ workflow_id=self.workflow_id,
825
+ error=str(e),
826
+ )
827
+ self._workflow_handle_error = str(e)
828
+ return None
829
+
830
+ def get_stats(self) -> Dict[str, Any]:
831
+ """
832
+ Get streaming statistics.
833
+
834
+ Returns:
835
+ Dictionary with statistics:
836
+ - phase: Current phase
837
+ - start_time: Stream start timestamp
838
+ - connection_time_ms: Phase 1 duration
839
+ - history_load_time_ms: Phase 3 duration
840
+ - live_streaming_time_ms: Phase 5 duration
841
+ - total_events_sent: Total events sent to client
842
+ - history_messages_sent: Messages sent in phase 3
843
+ - live_events_sent: Events sent in phase 5
844
+ - events_buffered: Events added to EventBuffer
845
+ - events_replayed: Events replayed in phase 2
846
+ - deduplication_stats: Stats from MessageDeduplicator
847
+ - errors: List of errors encountered
848
+ """
849
+ return self._stats.copy()