kubiya-control-plane-api 0.9.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (479) hide show
  1. control_plane_api/LICENSE +676 -0
  2. control_plane_api/README.md +350 -0
  3. control_plane_api/__init__.py +4 -0
  4. control_plane_api/__version__.py +8 -0
  5. control_plane_api/alembic/README +1 -0
  6. control_plane_api/alembic/env.py +121 -0
  7. control_plane_api/alembic/script.py.mako +28 -0
  8. control_plane_api/alembic/versions/2613c65c3dbe_initial_database_setup.py +32 -0
  9. control_plane_api/alembic/versions/2df520d4927d_merge_heads.py +28 -0
  10. control_plane_api/alembic/versions/43abf98d6a01_add_paused_status_to_executions.py +73 -0
  11. control_plane_api/alembic/versions/6289854264cb_merge_multiple_heads.py +28 -0
  12. control_plane_api/alembic/versions/6a4d4dc3d8dc_generate_execution_transitions.py +50 -0
  13. control_plane_api/alembic/versions/87d11cf0a783_add_disconnected_status_to_worker_.py +44 -0
  14. control_plane_api/alembic/versions/add_ephemeral_queue_support.py +85 -0
  15. control_plane_api/alembic/versions/add_model_type_to_llm_models.py +31 -0
  16. control_plane_api/alembic/versions/add_plan_executions_table.py +114 -0
  17. control_plane_api/alembic/versions/add_trace_span_tables.py +154 -0
  18. control_plane_api/alembic/versions/add_user_info_to_traces.py +36 -0
  19. control_plane_api/alembic/versions/adjusting_foreign_keys.py +32 -0
  20. control_plane_api/alembic/versions/b4983d976db2_initial_tables.py +1128 -0
  21. control_plane_api/alembic/versions/d181a3b40e71_rename_custom_metadata_to_metadata_in_.py +50 -0
  22. control_plane_api/alembic/versions/df9117888e82_add_missing_columns.py +82 -0
  23. control_plane_api/alembic/versions/f25de6ad895a_missing_migrations.py +34 -0
  24. control_plane_api/alembic/versions/f71305fb69b9_fix_ephemeral_queue_deletion_foreign_key.py +54 -0
  25. control_plane_api/alembic/versions/mark_local_exec_queues_as_ephemeral.py +68 -0
  26. control_plane_api/alembic.ini +148 -0
  27. control_plane_api/api/index.py +12 -0
  28. control_plane_api/app/__init__.py +11 -0
  29. control_plane_api/app/activities/__init__.py +20 -0
  30. control_plane_api/app/activities/agent_activities.py +384 -0
  31. control_plane_api/app/activities/plan_generation_activities.py +499 -0
  32. control_plane_api/app/activities/team_activities.py +424 -0
  33. control_plane_api/app/activities/temporal_cloud_activities.py +588 -0
  34. control_plane_api/app/config/__init__.py +35 -0
  35. control_plane_api/app/config/api_config.py +469 -0
  36. control_plane_api/app/config/config_loader.py +224 -0
  37. control_plane_api/app/config/model_pricing.py +323 -0
  38. control_plane_api/app/config/storage_config.py +159 -0
  39. control_plane_api/app/config.py +115 -0
  40. control_plane_api/app/controllers/__init__.py +0 -0
  41. control_plane_api/app/controllers/execution_environment_controller.py +1315 -0
  42. control_plane_api/app/database.py +135 -0
  43. control_plane_api/app/exceptions.py +408 -0
  44. control_plane_api/app/lib/__init__.py +11 -0
  45. control_plane_api/app/lib/environment.py +65 -0
  46. control_plane_api/app/lib/event_bus/__init__.py +17 -0
  47. control_plane_api/app/lib/event_bus/base.py +136 -0
  48. control_plane_api/app/lib/event_bus/manager.py +335 -0
  49. control_plane_api/app/lib/event_bus/providers/__init__.py +6 -0
  50. control_plane_api/app/lib/event_bus/providers/http_provider.py +166 -0
  51. control_plane_api/app/lib/event_bus/providers/nats_provider.py +324 -0
  52. control_plane_api/app/lib/event_bus/providers/redis_provider.py +233 -0
  53. control_plane_api/app/lib/event_bus/providers/websocket_provider.py +497 -0
  54. control_plane_api/app/lib/job_executor.py +330 -0
  55. control_plane_api/app/lib/kubiya_client.py +293 -0
  56. control_plane_api/app/lib/litellm_pricing.py +166 -0
  57. control_plane_api/app/lib/mcp_validation.py +163 -0
  58. control_plane_api/app/lib/nats/__init__.py +13 -0
  59. control_plane_api/app/lib/nats/credentials_manager.py +288 -0
  60. control_plane_api/app/lib/nats/listener.py +374 -0
  61. control_plane_api/app/lib/planning_prompt_builder.py +153 -0
  62. control_plane_api/app/lib/planning_tools/__init__.py +41 -0
  63. control_plane_api/app/lib/planning_tools/agents.py +409 -0
  64. control_plane_api/app/lib/planning_tools/agno_toolkit.py +836 -0
  65. control_plane_api/app/lib/planning_tools/base.py +119 -0
  66. control_plane_api/app/lib/planning_tools/cognitive_memory_tools.py +403 -0
  67. control_plane_api/app/lib/planning_tools/context_graph_tools.py +545 -0
  68. control_plane_api/app/lib/planning_tools/environments.py +218 -0
  69. control_plane_api/app/lib/planning_tools/knowledge.py +204 -0
  70. control_plane_api/app/lib/planning_tools/models.py +93 -0
  71. control_plane_api/app/lib/planning_tools/planning_service.py +646 -0
  72. control_plane_api/app/lib/planning_tools/resources.py +242 -0
  73. control_plane_api/app/lib/planning_tools/teams.py +334 -0
  74. control_plane_api/app/lib/policy_enforcer_client.py +1016 -0
  75. control_plane_api/app/lib/redis_client.py +803 -0
  76. control_plane_api/app/lib/sqlalchemy_utils.py +486 -0
  77. control_plane_api/app/lib/state_transition_tools/__init__.py +7 -0
  78. control_plane_api/app/lib/state_transition_tools/execution_context.py +388 -0
  79. control_plane_api/app/lib/storage/__init__.py +20 -0
  80. control_plane_api/app/lib/storage/base_provider.py +274 -0
  81. control_plane_api/app/lib/storage/provider_factory.py +157 -0
  82. control_plane_api/app/lib/storage/vercel_blob_provider.py +468 -0
  83. control_plane_api/app/lib/supabase.py +71 -0
  84. control_plane_api/app/lib/supabase_utils.py +138 -0
  85. control_plane_api/app/lib/task_planning/__init__.py +138 -0
  86. control_plane_api/app/lib/task_planning/agent_factory.py +308 -0
  87. control_plane_api/app/lib/task_planning/agents.py +389 -0
  88. control_plane_api/app/lib/task_planning/cache.py +218 -0
  89. control_plane_api/app/lib/task_planning/entity_resolver.py +273 -0
  90. control_plane_api/app/lib/task_planning/helpers.py +293 -0
  91. control_plane_api/app/lib/task_planning/hooks.py +474 -0
  92. control_plane_api/app/lib/task_planning/models.py +503 -0
  93. control_plane_api/app/lib/task_planning/plan_validator.py +166 -0
  94. control_plane_api/app/lib/task_planning/planning_workflow.py +2911 -0
  95. control_plane_api/app/lib/task_planning/runner.py +656 -0
  96. control_plane_api/app/lib/task_planning/streaming_hook.py +213 -0
  97. control_plane_api/app/lib/task_planning/workflow.py +424 -0
  98. control_plane_api/app/lib/templating/__init__.py +88 -0
  99. control_plane_api/app/lib/templating/compiler.py +278 -0
  100. control_plane_api/app/lib/templating/engine.py +178 -0
  101. control_plane_api/app/lib/templating/parsers/__init__.py +29 -0
  102. control_plane_api/app/lib/templating/parsers/base.py +96 -0
  103. control_plane_api/app/lib/templating/parsers/env.py +85 -0
  104. control_plane_api/app/lib/templating/parsers/graph.py +112 -0
  105. control_plane_api/app/lib/templating/parsers/secret.py +87 -0
  106. control_plane_api/app/lib/templating/parsers/simple.py +81 -0
  107. control_plane_api/app/lib/templating/resolver.py +366 -0
  108. control_plane_api/app/lib/templating/types.py +214 -0
  109. control_plane_api/app/lib/templating/validator.py +201 -0
  110. control_plane_api/app/lib/temporal_client.py +232 -0
  111. control_plane_api/app/lib/temporal_credentials_cache.py +178 -0
  112. control_plane_api/app/lib/temporal_credentials_service.py +203 -0
  113. control_plane_api/app/lib/validation/__init__.py +24 -0
  114. control_plane_api/app/lib/validation/runtime_validation.py +388 -0
  115. control_plane_api/app/main.py +531 -0
  116. control_plane_api/app/middleware/__init__.py +10 -0
  117. control_plane_api/app/middleware/auth.py +645 -0
  118. control_plane_api/app/middleware/exception_handler.py +267 -0
  119. control_plane_api/app/middleware/prometheus_middleware.py +173 -0
  120. control_plane_api/app/middleware/rate_limiting.py +384 -0
  121. control_plane_api/app/middleware/request_id.py +202 -0
  122. control_plane_api/app/models/__init__.py +40 -0
  123. control_plane_api/app/models/agent.py +90 -0
  124. control_plane_api/app/models/analytics.py +206 -0
  125. control_plane_api/app/models/associations.py +107 -0
  126. control_plane_api/app/models/auth_user.py +73 -0
  127. control_plane_api/app/models/context.py +161 -0
  128. control_plane_api/app/models/custom_integration.py +99 -0
  129. control_plane_api/app/models/environment.py +64 -0
  130. control_plane_api/app/models/execution.py +125 -0
  131. control_plane_api/app/models/execution_transition.py +50 -0
  132. control_plane_api/app/models/job.py +159 -0
  133. control_plane_api/app/models/llm_model.py +78 -0
  134. control_plane_api/app/models/orchestration.py +66 -0
  135. control_plane_api/app/models/plan_execution.py +102 -0
  136. control_plane_api/app/models/presence.py +49 -0
  137. control_plane_api/app/models/project.py +61 -0
  138. control_plane_api/app/models/project_management.py +85 -0
  139. control_plane_api/app/models/session.py +29 -0
  140. control_plane_api/app/models/skill.py +155 -0
  141. control_plane_api/app/models/system_tables.py +43 -0
  142. control_plane_api/app/models/task_planning.py +372 -0
  143. control_plane_api/app/models/team.py +86 -0
  144. control_plane_api/app/models/trace.py +257 -0
  145. control_plane_api/app/models/user_profile.py +54 -0
  146. control_plane_api/app/models/worker.py +221 -0
  147. control_plane_api/app/models/workflow.py +161 -0
  148. control_plane_api/app/models/workspace.py +50 -0
  149. control_plane_api/app/observability/__init__.py +177 -0
  150. control_plane_api/app/observability/context_logging.py +475 -0
  151. control_plane_api/app/observability/decorators.py +337 -0
  152. control_plane_api/app/observability/local_span_processor.py +702 -0
  153. control_plane_api/app/observability/metrics.py +303 -0
  154. control_plane_api/app/observability/middleware.py +246 -0
  155. control_plane_api/app/observability/optional.py +115 -0
  156. control_plane_api/app/observability/tracing.py +382 -0
  157. control_plane_api/app/policies/README.md +149 -0
  158. control_plane_api/app/policies/approved_users.rego +62 -0
  159. control_plane_api/app/policies/business_hours.rego +51 -0
  160. control_plane_api/app/policies/rate_limiting.rego +100 -0
  161. control_plane_api/app/policies/tool_enforcement/README.md +336 -0
  162. control_plane_api/app/policies/tool_enforcement/bash_command_validation.rego +71 -0
  163. control_plane_api/app/policies/tool_enforcement/business_hours_enforcement.rego +82 -0
  164. control_plane_api/app/policies/tool_enforcement/mcp_tool_allowlist.rego +58 -0
  165. control_plane_api/app/policies/tool_enforcement/production_safeguards.rego +80 -0
  166. control_plane_api/app/policies/tool_enforcement/role_based_tool_access.rego +44 -0
  167. control_plane_api/app/policies/tool_restrictions.rego +86 -0
  168. control_plane_api/app/routers/__init__.py +4 -0
  169. control_plane_api/app/routers/agents.py +382 -0
  170. control_plane_api/app/routers/agents_v2.py +1598 -0
  171. control_plane_api/app/routers/analytics.py +1310 -0
  172. control_plane_api/app/routers/auth.py +59 -0
  173. control_plane_api/app/routers/client_config.py +57 -0
  174. control_plane_api/app/routers/context_graph.py +561 -0
  175. control_plane_api/app/routers/context_manager.py +577 -0
  176. control_plane_api/app/routers/custom_integrations.py +490 -0
  177. control_plane_api/app/routers/enforcer.py +132 -0
  178. control_plane_api/app/routers/environment_context.py +252 -0
  179. control_plane_api/app/routers/environments.py +761 -0
  180. control_plane_api/app/routers/execution_environment.py +847 -0
  181. control_plane_api/app/routers/executions/__init__.py +28 -0
  182. control_plane_api/app/routers/executions/router.py +286 -0
  183. control_plane_api/app/routers/executions/services/__init__.py +22 -0
  184. control_plane_api/app/routers/executions/services/demo_worker_health.py +156 -0
  185. control_plane_api/app/routers/executions/services/status_service.py +420 -0
  186. control_plane_api/app/routers/executions/services/test_worker_health.py +480 -0
  187. control_plane_api/app/routers/executions/services/worker_health.py +514 -0
  188. control_plane_api/app/routers/executions/streaming/__init__.py +22 -0
  189. control_plane_api/app/routers/executions/streaming/deduplication.py +352 -0
  190. control_plane_api/app/routers/executions/streaming/event_buffer.py +353 -0
  191. control_plane_api/app/routers/executions/streaming/event_formatter.py +964 -0
  192. control_plane_api/app/routers/executions/streaming/history_loader.py +588 -0
  193. control_plane_api/app/routers/executions/streaming/live_source.py +693 -0
  194. control_plane_api/app/routers/executions/streaming/streamer.py +849 -0
  195. control_plane_api/app/routers/executions.py +4888 -0
  196. control_plane_api/app/routers/health.py +165 -0
  197. control_plane_api/app/routers/health_v2.py +394 -0
  198. control_plane_api/app/routers/integration_templates.py +496 -0
  199. control_plane_api/app/routers/integrations.py +287 -0
  200. control_plane_api/app/routers/jobs.py +1809 -0
  201. control_plane_api/app/routers/metrics.py +517 -0
  202. control_plane_api/app/routers/models.py +82 -0
  203. control_plane_api/app/routers/models_v2.py +628 -0
  204. control_plane_api/app/routers/plan_executions.py +1481 -0
  205. control_plane_api/app/routers/plan_generation_async.py +304 -0
  206. control_plane_api/app/routers/policies.py +669 -0
  207. control_plane_api/app/routers/presence.py +234 -0
  208. control_plane_api/app/routers/projects.py +987 -0
  209. control_plane_api/app/routers/runners.py +379 -0
  210. control_plane_api/app/routers/runtimes.py +172 -0
  211. control_plane_api/app/routers/secrets.py +171 -0
  212. control_plane_api/app/routers/skills.py +1010 -0
  213. control_plane_api/app/routers/skills_definitions.py +140 -0
  214. control_plane_api/app/routers/storage.py +456 -0
  215. control_plane_api/app/routers/task_planning.py +611 -0
  216. control_plane_api/app/routers/task_queues.py +650 -0
  217. control_plane_api/app/routers/team_context.py +274 -0
  218. control_plane_api/app/routers/teams.py +1747 -0
  219. control_plane_api/app/routers/templates.py +248 -0
  220. control_plane_api/app/routers/traces.py +571 -0
  221. control_plane_api/app/routers/websocket_client.py +479 -0
  222. control_plane_api/app/routers/websocket_executions_status.py +437 -0
  223. control_plane_api/app/routers/websocket_gateway.py +323 -0
  224. control_plane_api/app/routers/websocket_traces.py +576 -0
  225. control_plane_api/app/routers/worker_queues.py +2555 -0
  226. control_plane_api/app/routers/worker_websocket.py +419 -0
  227. control_plane_api/app/routers/workers.py +1004 -0
  228. control_plane_api/app/routers/workflows.py +204 -0
  229. control_plane_api/app/runtimes/__init__.py +6 -0
  230. control_plane_api/app/runtimes/validation.py +344 -0
  231. control_plane_api/app/schemas/__init__.py +1 -0
  232. control_plane_api/app/schemas/job_schemas.py +302 -0
  233. control_plane_api/app/schemas/mcp_schemas.py +311 -0
  234. control_plane_api/app/schemas/template_schemas.py +133 -0
  235. control_plane_api/app/schemas/trace_schemas.py +168 -0
  236. control_plane_api/app/schemas/worker_queue_observability_schemas.py +165 -0
  237. control_plane_api/app/services/__init__.py +1 -0
  238. control_plane_api/app/services/agno_planning_strategy.py +233 -0
  239. control_plane_api/app/services/agno_service.py +838 -0
  240. control_plane_api/app/services/claude_code_planning_service.py +203 -0
  241. control_plane_api/app/services/context_graph_client.py +224 -0
  242. control_plane_api/app/services/custom_integration_service.py +415 -0
  243. control_plane_api/app/services/integration_resolution_service.py +345 -0
  244. control_plane_api/app/services/litellm_service.py +394 -0
  245. control_plane_api/app/services/plan_generator.py +79 -0
  246. control_plane_api/app/services/planning_strategy.py +66 -0
  247. control_plane_api/app/services/planning_strategy_factory.py +118 -0
  248. control_plane_api/app/services/policy_service.py +615 -0
  249. control_plane_api/app/services/state_transition_service.py +755 -0
  250. control_plane_api/app/services/storage_service.py +593 -0
  251. control_plane_api/app/services/temporal_cloud_provisioning.py +150 -0
  252. control_plane_api/app/services/toolsets/context_graph_skill.py +432 -0
  253. control_plane_api/app/services/trace_retention.py +354 -0
  254. control_plane_api/app/services/worker_queue_metrics_service.py +190 -0
  255. control_plane_api/app/services/workflow_cancellation_manager.py +135 -0
  256. control_plane_api/app/services/workflow_operations_service.py +611 -0
  257. control_plane_api/app/skills/__init__.py +100 -0
  258. control_plane_api/app/skills/base.py +239 -0
  259. control_plane_api/app/skills/builtin/__init__.py +37 -0
  260. control_plane_api/app/skills/builtin/agent_communication/__init__.py +8 -0
  261. control_plane_api/app/skills/builtin/agent_communication/skill.py +246 -0
  262. control_plane_api/app/skills/builtin/code_ingestion/__init__.py +4 -0
  263. control_plane_api/app/skills/builtin/code_ingestion/skill.py +267 -0
  264. control_plane_api/app/skills/builtin/cognitive_memory/__init__.py +4 -0
  265. control_plane_api/app/skills/builtin/cognitive_memory/skill.py +174 -0
  266. control_plane_api/app/skills/builtin/contextual_awareness/__init__.py +4 -0
  267. control_plane_api/app/skills/builtin/contextual_awareness/skill.py +387 -0
  268. control_plane_api/app/skills/builtin/data_visualization/__init__.py +4 -0
  269. control_plane_api/app/skills/builtin/data_visualization/skill.py +154 -0
  270. control_plane_api/app/skills/builtin/docker/__init__.py +4 -0
  271. control_plane_api/app/skills/builtin/docker/skill.py +104 -0
  272. control_plane_api/app/skills/builtin/file_generation/__init__.py +4 -0
  273. control_plane_api/app/skills/builtin/file_generation/skill.py +94 -0
  274. control_plane_api/app/skills/builtin/file_system/__init__.py +4 -0
  275. control_plane_api/app/skills/builtin/file_system/skill.py +110 -0
  276. control_plane_api/app/skills/builtin/knowledge_api/__init__.py +5 -0
  277. control_plane_api/app/skills/builtin/knowledge_api/skill.py +124 -0
  278. control_plane_api/app/skills/builtin/python/__init__.py +4 -0
  279. control_plane_api/app/skills/builtin/python/skill.py +92 -0
  280. control_plane_api/app/skills/builtin/remote_filesystem/__init__.py +5 -0
  281. control_plane_api/app/skills/builtin/remote_filesystem/skill.py +170 -0
  282. control_plane_api/app/skills/builtin/shell/__init__.py +4 -0
  283. control_plane_api/app/skills/builtin/shell/skill.py +161 -0
  284. control_plane_api/app/skills/builtin/slack/__init__.py +3 -0
  285. control_plane_api/app/skills/builtin/slack/skill.py +302 -0
  286. control_plane_api/app/skills/builtin/workflow_executor/__init__.py +4 -0
  287. control_plane_api/app/skills/builtin/workflow_executor/skill.py +469 -0
  288. control_plane_api/app/skills/business_intelligence.py +189 -0
  289. control_plane_api/app/skills/config.py +63 -0
  290. control_plane_api/app/skills/loaders/__init__.py +14 -0
  291. control_plane_api/app/skills/loaders/base.py +73 -0
  292. control_plane_api/app/skills/loaders/filesystem_loader.py +199 -0
  293. control_plane_api/app/skills/registry.py +125 -0
  294. control_plane_api/app/utils/helpers.py +12 -0
  295. control_plane_api/app/utils/workflow_executor.py +354 -0
  296. control_plane_api/app/workflows/__init__.py +11 -0
  297. control_plane_api/app/workflows/agent_execution.py +520 -0
  298. control_plane_api/app/workflows/agent_execution_with_skills.py +223 -0
  299. control_plane_api/app/workflows/namespace_provisioning.py +326 -0
  300. control_plane_api/app/workflows/plan_generation.py +254 -0
  301. control_plane_api/app/workflows/team_execution.py +442 -0
  302. control_plane_api/scripts/seed_models.py +240 -0
  303. control_plane_api/scripts/validate_existing_tool_names.py +492 -0
  304. control_plane_api/shared/__init__.py +8 -0
  305. control_plane_api/shared/version.py +17 -0
  306. control_plane_api/test_deduplication.py +274 -0
  307. control_plane_api/test_executor_deduplication_e2e.py +309 -0
  308. control_plane_api/test_job_execution_e2e.py +283 -0
  309. control_plane_api/test_real_integration.py +193 -0
  310. control_plane_api/version.py +38 -0
  311. control_plane_api/worker/__init__.py +0 -0
  312. control_plane_api/worker/activities/__init__.py +0 -0
  313. control_plane_api/worker/activities/agent_activities.py +1585 -0
  314. control_plane_api/worker/activities/approval_activities.py +234 -0
  315. control_plane_api/worker/activities/job_activities.py +199 -0
  316. control_plane_api/worker/activities/runtime_activities.py +1167 -0
  317. control_plane_api/worker/activities/skill_activities.py +282 -0
  318. control_plane_api/worker/activities/team_activities.py +479 -0
  319. control_plane_api/worker/agent_runtime_server.py +370 -0
  320. control_plane_api/worker/binary_manager.py +333 -0
  321. control_plane_api/worker/config/__init__.py +31 -0
  322. control_plane_api/worker/config/worker_config.py +273 -0
  323. control_plane_api/worker/control_plane_client.py +1491 -0
  324. control_plane_api/worker/examples/analytics_integration_example.py +362 -0
  325. control_plane_api/worker/health_monitor.py +159 -0
  326. control_plane_api/worker/metrics.py +237 -0
  327. control_plane_api/worker/models/__init__.py +1 -0
  328. control_plane_api/worker/models/error_events.py +105 -0
  329. control_plane_api/worker/models/inputs.py +89 -0
  330. control_plane_api/worker/runtimes/__init__.py +35 -0
  331. control_plane_api/worker/runtimes/agent_runtime/runtime.py +485 -0
  332. control_plane_api/worker/runtimes/agno/__init__.py +34 -0
  333. control_plane_api/worker/runtimes/agno/config.py +248 -0
  334. control_plane_api/worker/runtimes/agno/hooks.py +385 -0
  335. control_plane_api/worker/runtimes/agno/mcp_builder.py +195 -0
  336. control_plane_api/worker/runtimes/agno/runtime.py +1063 -0
  337. control_plane_api/worker/runtimes/agno/utils.py +163 -0
  338. control_plane_api/worker/runtimes/base.py +979 -0
  339. control_plane_api/worker/runtimes/claude_code/__init__.py +38 -0
  340. control_plane_api/worker/runtimes/claude_code/cleanup.py +184 -0
  341. control_plane_api/worker/runtimes/claude_code/client_pool.py +529 -0
  342. control_plane_api/worker/runtimes/claude_code/config.py +829 -0
  343. control_plane_api/worker/runtimes/claude_code/hooks.py +482 -0
  344. control_plane_api/worker/runtimes/claude_code/litellm_proxy.py +1702 -0
  345. control_plane_api/worker/runtimes/claude_code/mcp_builder.py +467 -0
  346. control_plane_api/worker/runtimes/claude_code/mcp_discovery.py +558 -0
  347. control_plane_api/worker/runtimes/claude_code/runtime.py +1546 -0
  348. control_plane_api/worker/runtimes/claude_code/tool_mapper.py +403 -0
  349. control_plane_api/worker/runtimes/claude_code/utils.py +149 -0
  350. control_plane_api/worker/runtimes/factory.py +173 -0
  351. control_plane_api/worker/runtimes/model_utils.py +107 -0
  352. control_plane_api/worker/runtimes/validation.py +93 -0
  353. control_plane_api/worker/services/__init__.py +1 -0
  354. control_plane_api/worker/services/agent_communication_tools.py +908 -0
  355. control_plane_api/worker/services/agent_executor.py +485 -0
  356. control_plane_api/worker/services/agent_executor_v2.py +793 -0
  357. control_plane_api/worker/services/analytics_collector.py +457 -0
  358. control_plane_api/worker/services/analytics_service.py +464 -0
  359. control_plane_api/worker/services/approval_tools.py +310 -0
  360. control_plane_api/worker/services/approval_tools_agno.py +207 -0
  361. control_plane_api/worker/services/cancellation_manager.py +177 -0
  362. control_plane_api/worker/services/code_ingestion_tools.py +465 -0
  363. control_plane_api/worker/services/contextual_awareness_tools.py +405 -0
  364. control_plane_api/worker/services/data_visualization.py +834 -0
  365. control_plane_api/worker/services/event_publisher.py +531 -0
  366. control_plane_api/worker/services/jira_tools.py +257 -0
  367. control_plane_api/worker/services/remote_filesystem_tools.py +498 -0
  368. control_plane_api/worker/services/runtime_analytics.py +328 -0
  369. control_plane_api/worker/services/session_service.py +365 -0
  370. control_plane_api/worker/services/skill_context_enhancement.py +181 -0
  371. control_plane_api/worker/services/skill_factory.py +471 -0
  372. control_plane_api/worker/services/system_prompt_enhancement.py +410 -0
  373. control_plane_api/worker/services/team_executor.py +715 -0
  374. control_plane_api/worker/services/team_executor_v2.py +1866 -0
  375. control_plane_api/worker/services/tool_enforcement.py +254 -0
  376. control_plane_api/worker/services/workflow_executor/__init__.py +52 -0
  377. control_plane_api/worker/services/workflow_executor/event_processor.py +287 -0
  378. control_plane_api/worker/services/workflow_executor/event_publisher.py +210 -0
  379. control_plane_api/worker/services/workflow_executor/executors/__init__.py +15 -0
  380. control_plane_api/worker/services/workflow_executor/executors/base.py +270 -0
  381. control_plane_api/worker/services/workflow_executor/executors/json_executor.py +50 -0
  382. control_plane_api/worker/services/workflow_executor/executors/python_executor.py +50 -0
  383. control_plane_api/worker/services/workflow_executor/models.py +142 -0
  384. control_plane_api/worker/services/workflow_executor_tools.py +1748 -0
  385. control_plane_api/worker/skills/__init__.py +12 -0
  386. control_plane_api/worker/skills/builtin/context_graph_search/README.md +213 -0
  387. control_plane_api/worker/skills/builtin/context_graph_search/__init__.py +5 -0
  388. control_plane_api/worker/skills/builtin/context_graph_search/agno_impl.py +808 -0
  389. control_plane_api/worker/skills/builtin/context_graph_search/skill.yaml +67 -0
  390. control_plane_api/worker/skills/builtin/contextual_awareness/__init__.py +4 -0
  391. control_plane_api/worker/skills/builtin/contextual_awareness/agno_impl.py +62 -0
  392. control_plane_api/worker/skills/builtin/data_visualization/agno_impl.py +18 -0
  393. control_plane_api/worker/skills/builtin/data_visualization/skill.yaml +84 -0
  394. control_plane_api/worker/skills/builtin/docker/agno_impl.py +65 -0
  395. control_plane_api/worker/skills/builtin/docker/skill.yaml +60 -0
  396. control_plane_api/worker/skills/builtin/file_generation/agno_impl.py +47 -0
  397. control_plane_api/worker/skills/builtin/file_generation/skill.yaml +64 -0
  398. control_plane_api/worker/skills/builtin/file_system/agno_impl.py +32 -0
  399. control_plane_api/worker/skills/builtin/file_system/skill.yaml +54 -0
  400. control_plane_api/worker/skills/builtin/knowledge_api/__init__.py +4 -0
  401. control_plane_api/worker/skills/builtin/knowledge_api/agno_impl.py +50 -0
  402. control_plane_api/worker/skills/builtin/knowledge_api/skill.yaml +66 -0
  403. control_plane_api/worker/skills/builtin/python/agno_impl.py +25 -0
  404. control_plane_api/worker/skills/builtin/python/skill.yaml +60 -0
  405. control_plane_api/worker/skills/builtin/schema_fix_mixin.py +260 -0
  406. control_plane_api/worker/skills/builtin/shell/agno_impl.py +31 -0
  407. control_plane_api/worker/skills/builtin/shell/skill.yaml +60 -0
  408. control_plane_api/worker/skills/builtin/slack/__init__.py +3 -0
  409. control_plane_api/worker/skills/builtin/slack/agno_impl.py +1282 -0
  410. control_plane_api/worker/skills/builtin/slack/skill.yaml +276 -0
  411. control_plane_api/worker/skills/builtin/workflow_executor/agno_impl.py +62 -0
  412. control_plane_api/worker/skills/builtin/workflow_executor/skill.yaml +79 -0
  413. control_plane_api/worker/skills/loaders/__init__.py +5 -0
  414. control_plane_api/worker/skills/loaders/base.py +23 -0
  415. control_plane_api/worker/skills/loaders/filesystem_loader.py +357 -0
  416. control_plane_api/worker/skills/registry.py +208 -0
  417. control_plane_api/worker/tests/__init__.py +1 -0
  418. control_plane_api/worker/tests/conftest.py +12 -0
  419. control_plane_api/worker/tests/e2e/__init__.py +0 -0
  420. control_plane_api/worker/tests/e2e/test_context_graph_real_api.py +338 -0
  421. control_plane_api/worker/tests/e2e/test_context_graph_templates_e2e.py +523 -0
  422. control_plane_api/worker/tests/e2e/test_enforcement_e2e.py +344 -0
  423. control_plane_api/worker/tests/e2e/test_execution_flow.py +571 -0
  424. control_plane_api/worker/tests/e2e/test_single_execution_mode.py +656 -0
  425. control_plane_api/worker/tests/integration/__init__.py +0 -0
  426. control_plane_api/worker/tests/integration/test_builtin_skills_fixes.py +245 -0
  427. control_plane_api/worker/tests/integration/test_context_graph_search_integration.py +365 -0
  428. control_plane_api/worker/tests/integration/test_control_plane_integration.py +308 -0
  429. control_plane_api/worker/tests/integration/test_hook_enforcement_integration.py +579 -0
  430. control_plane_api/worker/tests/integration/test_scheduled_job_workflow.py +237 -0
  431. control_plane_api/worker/tests/integration/test_system_prompt_enhancement_integration.py +343 -0
  432. control_plane_api/worker/tests/unit/__init__.py +0 -0
  433. control_plane_api/worker/tests/unit/test_builtin_skill_autoload.py +396 -0
  434. control_plane_api/worker/tests/unit/test_context_graph_search.py +450 -0
  435. control_plane_api/worker/tests/unit/test_context_graph_templates.py +403 -0
  436. control_plane_api/worker/tests/unit/test_control_plane_client.py +401 -0
  437. control_plane_api/worker/tests/unit/test_control_plane_client_jobs.py +345 -0
  438. control_plane_api/worker/tests/unit/test_job_activities.py +353 -0
  439. control_plane_api/worker/tests/unit/test_skill_context_enhancement.py +321 -0
  440. control_plane_api/worker/tests/unit/test_system_prompt_enhancement.py +415 -0
  441. control_plane_api/worker/tests/unit/test_tool_enforcement.py +324 -0
  442. control_plane_api/worker/utils/__init__.py +1 -0
  443. control_plane_api/worker/utils/chunk_batcher.py +330 -0
  444. control_plane_api/worker/utils/environment.py +65 -0
  445. control_plane_api/worker/utils/error_publisher.py +260 -0
  446. control_plane_api/worker/utils/event_batcher.py +256 -0
  447. control_plane_api/worker/utils/logging_config.py +335 -0
  448. control_plane_api/worker/utils/logging_helper.py +326 -0
  449. control_plane_api/worker/utils/parameter_validator.py +120 -0
  450. control_plane_api/worker/utils/retry_utils.py +60 -0
  451. control_plane_api/worker/utils/streaming_utils.py +665 -0
  452. control_plane_api/worker/utils/tool_validation.py +332 -0
  453. control_plane_api/worker/utils/workspace_manager.py +163 -0
  454. control_plane_api/worker/websocket_client.py +393 -0
  455. control_plane_api/worker/worker.py +1297 -0
  456. control_plane_api/worker/workflows/__init__.py +0 -0
  457. control_plane_api/worker/workflows/agent_execution.py +909 -0
  458. control_plane_api/worker/workflows/scheduled_job_wrapper.py +332 -0
  459. control_plane_api/worker/workflows/team_execution.py +611 -0
  460. kubiya_control_plane_api-0.9.15.dist-info/METADATA +354 -0
  461. kubiya_control_plane_api-0.9.15.dist-info/RECORD +479 -0
  462. kubiya_control_plane_api-0.9.15.dist-info/WHEEL +5 -0
  463. kubiya_control_plane_api-0.9.15.dist-info/entry_points.txt +5 -0
  464. kubiya_control_plane_api-0.9.15.dist-info/licenses/LICENSE +676 -0
  465. kubiya_control_plane_api-0.9.15.dist-info/top_level.txt +3 -0
  466. scripts/__init__.py +1 -0
  467. scripts/migrations.py +39 -0
  468. scripts/seed_worker_queues.py +128 -0
  469. scripts/setup_agent_runtime.py +142 -0
  470. worker_internal/__init__.py +1 -0
  471. worker_internal/planner/__init__.py +1 -0
  472. worker_internal/planner/activities.py +1499 -0
  473. worker_internal/planner/agent_tools.py +197 -0
  474. worker_internal/planner/event_models.py +148 -0
  475. worker_internal/planner/event_publisher.py +67 -0
  476. worker_internal/planner/models.py +199 -0
  477. worker_internal/planner/retry_logic.py +134 -0
  478. worker_internal/planner/worker.py +300 -0
  479. worker_internal/planner/workflows.py +970 -0
@@ -0,0 +1,909 @@
1
+ """Agent execution workflow for Temporal"""
2
+
3
+ from dataclasses import dataclass, field
4
+ from datetime import timedelta
5
+ from typing import Optional, List, Dict, Any
6
+ from temporalio import workflow
7
+ from temporalio.common import RetryPolicy
8
+ import asyncio
9
+ import os
10
+
11
+ with workflow.unsafe.imports_passed_through():
12
+ from control_plane_api.worker.activities.agent_activities import (
13
+ execute_agent_llm,
14
+ update_execution_status,
15
+ get_execution_details,
16
+ update_agent_status,
17
+ persist_conversation_history,
18
+ submit_runtime_analytics_activity,
19
+ ActivityExecuteAgentInput,
20
+ ActivityUpdateExecutionInput,
21
+ ActivityGetExecutionInput,
22
+ ActivityUpdateAgentInput,
23
+ ActivityPersistConversationInput,
24
+ AnalyticsActivityInput,
25
+ )
26
+ from control_plane_api.worker.activities.runtime_activities import (
27
+ execute_with_runtime,
28
+ publish_user_message,
29
+ ActivityRuntimeExecuteInput,
30
+ PublishUserMessageInput,
31
+ )
32
+ from control_plane_api.worker.utils.logging_helper import execution_logger
33
+
34
+
35
+ # Heartbeat timeout: Prove activity is alive (default 30 minutes)
36
+ # This should be reasonable - heartbeats confirm the activity hasn't crashed
37
+ HEARTBEAT_TIMEOUT_SECONDS = int(os.environ.get("ACTIVITY_HEARTBEAT_TIMEOUT_SECONDS", "1800"))
38
+
39
+ # Activity execution timeout: Total time for activity to complete (default 24 hours)
40
+ # This is the maximum time an activity can run. For streaming workflows, this should be VERY long
41
+ # since the activity may stream for hours while the user interacts with the agent
42
+ ACTIVITY_EXECUTION_TIMEOUT_SECONDS = int(os.environ.get("ACTIVITY_EXECUTION_TIMEOUT_SECONDS", "86400"))
43
+
44
+
45
+ @dataclass
46
+ class AgentExecutionInput:
47
+ """Input for agent execution workflow"""
48
+ # Required fields (no defaults)
49
+ agent_id: str
50
+ organization_id: str
51
+ prompt: str
52
+ # Optional fields (with defaults)
53
+ execution_id: Optional[str] = None # Optional for backward compatibility with old schedules
54
+ system_prompt: Optional[str] = None
55
+ model_id: Optional[str] = None
56
+ model_config: dict = None
57
+ agent_config: dict = None
58
+ mcp_servers: dict = None # MCP servers configuration
59
+ user_metadata: dict = None
60
+ runtime_type: str = "default" # "default" (Agno) or "claude_code"
61
+ initial_message_timestamp: Optional[str] = None # Real-time timestamp for initial message
62
+
63
+ def __post_init__(self):
64
+ if self.model_config is None:
65
+ self.model_config = {}
66
+ if self.agent_config is None:
67
+ self.agent_config = {}
68
+ if self.mcp_servers is None:
69
+ self.mcp_servers = {}
70
+ if self.user_metadata is None:
71
+ self.user_metadata = {}
72
+
73
+
74
+ @dataclass
75
+ class TeamExecutionInput:
76
+ """Input for team execution workflow (uses same workflow as agent)"""
77
+ # Required fields (no defaults)
78
+ team_id: str
79
+ organization_id: str
80
+ prompt: str
81
+ # Optional fields (with defaults)
82
+ execution_id: Optional[str] = None # Optional for backward compatibility with old schedules
83
+ system_prompt: Optional[str] = None
84
+ model_id: Optional[str] = None
85
+ model_config: dict = None
86
+ team_config: dict = None
87
+ mcp_servers: dict = None # MCP servers configuration
88
+ user_metadata: dict = None
89
+ runtime_type: str = "default" # "default" (Agno) or "claude_code"
90
+ initial_message_timestamp: Optional[str] = None # Real-time timestamp for initial message
91
+
92
+ def __post_init__(self):
93
+ if self.model_config is None:
94
+ self.model_config = {}
95
+ if self.team_config is None:
96
+ self.team_config = {}
97
+ if self.mcp_servers is None:
98
+ self.mcp_servers = {}
99
+ if self.user_metadata is None:
100
+ self.user_metadata = {}
101
+
102
+ def to_agent_input(self) -> AgentExecutionInput:
103
+ """Convert TeamExecutionInput to AgentExecutionInput for workflow reuse"""
104
+ return AgentExecutionInput(
105
+ execution_id=self.execution_id,
106
+ agent_id=self.team_id, # Use team_id as agent_id
107
+ organization_id=self.organization_id,
108
+ prompt=self.prompt,
109
+ system_prompt=self.system_prompt,
110
+ model_id=self.model_id,
111
+ model_config=self.model_config,
112
+ agent_config=self.team_config,
113
+ mcp_servers=self.mcp_servers,
114
+ user_metadata=self.user_metadata,
115
+ runtime_type=self.runtime_type,
116
+ initial_message_timestamp=self.initial_message_timestamp,
117
+ )
118
+
119
+
120
+ @dataclass
121
+ class ChatMessage:
122
+ """Represents a message in the conversation"""
123
+ role: str # "user", "assistant", "system", "tool"
124
+ content: str
125
+ timestamp: str
126
+ tool_name: Optional[str] = None
127
+ tool_input: Optional[Dict[str, Any]] = None
128
+ tool_output: Optional[Dict[str, Any]] = None
129
+ message_id: Optional[str] = None # Unique identifier for deduplication
130
+ user_id: Optional[str] = None # User who sent the message
131
+ user_name: Optional[str] = None
132
+ user_email: Optional[str] = None
133
+ user_avatar: Optional[str] = None
134
+
135
+
136
+ @dataclass
137
+ class ExecutionState:
138
+ """Current state of the execution for queries"""
139
+ status: str # "pending", "running", "waiting_for_input", "completed", "failed"
140
+ messages: List[ChatMessage] = field(default_factory=list)
141
+ current_response: str = ""
142
+ error_message: Optional[str] = None
143
+ usage: Dict[str, Any] = field(default_factory=dict)
144
+ metadata: Dict[str, Any] = field(default_factory=dict)
145
+ is_waiting_for_input: bool = False
146
+ should_complete: bool = False
147
+
148
+
149
+ @workflow.defn
150
+ class AgentExecutionWorkflow:
151
+ """
152
+ Workflow for executing an agent with LLM with Temporal message passing support.
153
+
154
+ This workflow:
155
+ 1. Updates execution status to running
156
+ 2. Executes the agent's LLM call
157
+ 3. Updates execution with results
158
+ 4. Updates agent status
159
+ 5. Supports queries for real-time state access
160
+ 6. Supports signals for adding followup messages
161
+ """
162
+
163
+ def __init__(self) -> None:
164
+ """Initialize workflow state"""
165
+ self._state = ExecutionState(status="pending")
166
+ self._lock = asyncio.Lock()
167
+ self._new_message_count = 0
168
+ self._processed_message_count = 0
169
+
170
+ def _messages_to_dict(self, messages: List[ChatMessage]) -> List[Dict[str, Any]]:
171
+ """
172
+ Convert ChatMessage objects to dict format for persistence.
173
+
174
+ This ensures the conversation history is in a clean, serializable format
175
+ that can be stored in the database and retrieved later.
176
+
177
+ Args:
178
+ messages: List of ChatMessage objects from workflow state
179
+
180
+ Returns:
181
+ List of message dicts ready for persistence
182
+ """
183
+ return [
184
+ {
185
+ "role": msg.role,
186
+ "content": msg.content,
187
+ "timestamp": msg.timestamp,
188
+ "tool_name": msg.tool_name,
189
+ "tool_input": msg.tool_input,
190
+ "tool_output": msg.tool_output,
191
+ "tool_execution_id": getattr(msg, "tool_execution_id", None), # CRITICAL: For tool message deduplication
192
+ "message_id": getattr(msg, "message_id", None), # CRITICAL: For message deduplication
193
+ "user_id": getattr(msg, "user_id", None),
194
+ "user_name": getattr(msg, "user_name", None),
195
+ "user_email": getattr(msg, "user_email", None),
196
+ "user_avatar": getattr(msg, "user_avatar", None),
197
+ }
198
+ for msg in messages
199
+ ]
200
+
201
+ @workflow.query
202
+ def get_state(self) -> ExecutionState:
203
+ """Query handler: Get current execution state including messages and status"""
204
+ return self._state
205
+
206
+ @workflow.signal
207
+ async def add_message(self, message: ChatMessage) -> None:
208
+ """
209
+ Signal handler: Add a message to the conversation.
210
+ This allows clients to send followup messages while the workflow is running.
211
+ The workflow will wake up and process this message.
212
+ """
213
+ async with self._lock:
214
+ self._state.messages.append(message)
215
+ self._new_message_count += 1
216
+ self._state.is_waiting_for_input = False
217
+ workflow.logger.info(
218
+ f"Message added to conversation",
219
+ extra={
220
+ "role": message.role,
221
+ "content_preview": message.content[:100] if message.content else "",
222
+ "total_messages": len(self._state.messages)
223
+ }
224
+ )
225
+
226
+ @workflow.signal
227
+ async def mark_as_done(self) -> None:
228
+ """
229
+ Signal handler: Mark the workflow as complete.
230
+ This signals that the user is done with the conversation and the workflow should complete.
231
+ """
232
+ async with self._lock:
233
+ self._state.should_complete = True
234
+ self._state.is_waiting_for_input = False
235
+ workflow.logger.info("Workflow marked as done by user")
236
+
237
+ @workflow.run
238
+ async def run(self, input: AgentExecutionInput) -> dict:
239
+ """
240
+ Run the agent execution workflow with Human-in-the-Loop (HITL) pattern.
241
+
242
+ This workflow implements a continuous conversation loop:
243
+ 1. Process the initial user message
244
+ 2. Execute LLM and return response
245
+ 3. Wait for user input (signals)
246
+ 4. Process followup messages in a loop
247
+ 5. Only complete when user explicitly marks as done
248
+
249
+ Args:
250
+ input: Workflow input with execution details
251
+
252
+ Returns:
253
+ Execution result dict with response, usage, etc.
254
+ """
255
+ # Generate execution_id if not provided (for backward compatibility with old schedules)
256
+ execution_id = input.execution_id
257
+ if not execution_id:
258
+ execution_id = workflow.uuid4()
259
+ workflow.logger.info(
260
+ "Generated execution_id for backward compatibility",
261
+ extra={"execution_id": execution_id}
262
+ )
263
+ # Update input object to use generated ID
264
+ input.execution_id = execution_id
265
+
266
+ # Removed: execution start logging (was appearing for all workers, possibly due to Temporal replays)
267
+ # execution_logger.execution_started(
268
+ # input.execution_id,
269
+ # agent_id=input.agent_id,
270
+ # model=input.model_id,
271
+ # runtime=input.runtime_type
272
+ # )
273
+
274
+ workflow.logger.info(
275
+ f"Starting agent execution workflow with HITL pattern",
276
+ extra={
277
+ "execution_id": input.execution_id,
278
+ "agent_id": input.agent_id,
279
+ "organization_id": input.organization_id,
280
+ }
281
+ )
282
+
283
+ # Initialize state with user's initial message
284
+ # CRITICAL: Use real-time timestamp (not workflow.now()) to ensure chronological ordering
285
+ # This prevents timestamp mismatches between initial and follow-up messages
286
+ message_timestamp = input.initial_message_timestamp or workflow.now().isoformat()
287
+
288
+ initial_user_message = ChatMessage(
289
+ role="user",
290
+ content=input.prompt,
291
+ timestamp=message_timestamp,
292
+ message_id=f"{input.execution_id}_user_1", # Generate deterministic ID
293
+ )
294
+ self._state.messages.append(initial_user_message)
295
+ self._state.status = "running"
296
+ self._new_message_count = 1 # Initial message counts as a new message
297
+ self._processed_message_count = 0 # No messages processed yet (no response)
298
+
299
+ try:
300
+ # Step 1: Update execution status to running
301
+ await workflow.execute_activity(
302
+ update_execution_status,
303
+ ActivityUpdateExecutionInput(
304
+ execution_id=input.execution_id,
305
+ status="running",
306
+ started_at=workflow.now().isoformat(),
307
+ execution_metadata={
308
+ "workflow_started": True,
309
+ "has_mcp_servers": bool(input.mcp_servers),
310
+ "mcp_server_count": len(input.mcp_servers) if input.mcp_servers else 0,
311
+ "hitl_enabled": True,
312
+ },
313
+ ),
314
+ start_to_close_timeout=timedelta(seconds=30),
315
+ )
316
+
317
+ # Step 2: Update agent status to running
318
+ await workflow.execute_activity(
319
+ update_agent_status,
320
+ ActivityUpdateAgentInput(
321
+ agent_id=input.agent_id,
322
+ organization_id=input.organization_id,
323
+ status="running",
324
+ last_active_at=workflow.now().isoformat(),
325
+ ),
326
+ start_to_close_timeout=timedelta(seconds=30),
327
+ )
328
+
329
+ # Deprecate old patch: We moved status update BEFORE persistence (was after)
330
+ # This ensures deterministic replay when continuing multi-turn conversations
331
+ workflow.deprecate_patch("status-update-before-persistence")
332
+
333
+ # HITL Conversation Loop - Continue until user marks as done
334
+ conversation_turn = 0
335
+ while not self._state.should_complete:
336
+ conversation_turn += 1
337
+ workflow.logger.info(
338
+ f"Starting conversation turn {conversation_turn}",
339
+ extra={"turn": conversation_turn, "message_count": len(self._state.messages)}
340
+ )
341
+
342
+ # Get the latest user message (last message added)
343
+ latest_message = self._state.messages[-1] if self._state.messages else None
344
+ latest_prompt = latest_message.content if latest_message and latest_message.role == "user" else input.prompt
345
+
346
+ # Extract user message metadata for session persistence deduplication
347
+ user_message_id = latest_message.message_id if latest_message and latest_message.role == "user" else None
348
+ user_id = latest_message.user_id if latest_message and latest_message.role == "user" else None
349
+ user_name = latest_message.user_name if latest_message and latest_message.role == "user" else None
350
+ user_email = latest_message.user_email if latest_message and latest_message.role == "user" else None
351
+ user_avatar = latest_message.user_avatar if latest_message and latest_message.role == "user" else None
352
+
353
+ # Step 3: Publish user message to stream IMMEDIATELY (for turn 1)
354
+ # For follow-up turns, the message is published when received via signal
355
+ # This ensures the initial user message appears in UI before assistant response
356
+ #
357
+ # IMPORTANT: Use workflow patching to handle existing workflows that don't have this activity
358
+ # Existing workflows will skip this during replay; new workflows will execute it
359
+ if conversation_turn == 1 and workflow.patched("publish-user-message-v1"):
360
+ workflow.logger.info(
361
+ f"Publishing initial user message to stream",
362
+ extra={
363
+ "turn": conversation_turn,
364
+ "message_id": user_message_id,
365
+ "execution_id": str(input.execution_id)[:8] if input.execution_id else "unknown"
366
+ }
367
+ )
368
+ await workflow.execute_activity(
369
+ publish_user_message,
370
+ PublishUserMessageInput(
371
+ execution_id=input.execution_id,
372
+ prompt=input.prompt,
373
+ timestamp=initial_user_message.timestamp,
374
+ message_id=user_message_id,
375
+ user_id=input.user_metadata.get("user_id") if input.user_metadata else None,
376
+ user_name=input.user_metadata.get("user_name") if input.user_metadata else None,
377
+ user_email=input.user_metadata.get("user_email") if input.user_metadata else None,
378
+ user_avatar=input.user_metadata.get("user_avatar") if input.user_metadata else None,
379
+ ),
380
+ start_to_close_timeout=timedelta(seconds=10),
381
+ )
382
+
383
+ # Execute using RuntimeFactory (supports both "default" Agno and "claude_code")
384
+ workflow.logger.info(
385
+ f"Executing with runtime: {input.runtime_type}",
386
+ extra={
387
+ "runtime_type": input.runtime_type,
388
+ "turn": conversation_turn,
389
+ "user_message_id": user_message_id # Log for debugging
390
+ }
391
+ )
392
+
393
+ # DEBUG: Log MCP servers in workflow input
394
+ workflow.logger.info(
395
+ f"🔍 DEBUG: Workflow MCP servers",
396
+ extra={
397
+ "mcp_servers_type": str(type(input.mcp_servers)),
398
+ "mcp_servers_count": len(input.mcp_servers) if input.mcp_servers else 0,
399
+ "mcp_server_names": list(input.mcp_servers.keys()) if input.mcp_servers else []
400
+ }
401
+ )
402
+
403
+ # Track turn start time for analytics
404
+ # workflow.time() already returns a float timestamp, not a datetime
405
+ turn_start_time = workflow.time()
406
+
407
+ llm_result = await workflow.execute_activity(
408
+ execute_with_runtime,
409
+ ActivityRuntimeExecuteInput(
410
+ execution_id=input.execution_id,
411
+ agent_id=input.agent_id,
412
+ organization_id=input.organization_id,
413
+ prompt=latest_prompt, # Current turn's prompt
414
+ runtime_type=input.runtime_type,
415
+ system_prompt=input.system_prompt,
416
+ model_id=input.model_id,
417
+ model_config=input.model_config,
418
+ agent_config=input.agent_config,
419
+ mcp_servers=input.mcp_servers,
420
+ conversation_history=[], # Agno manages history via session_id
421
+ user_metadata=input.user_metadata,
422
+ runtime_config={
423
+ "session_id": input.execution_id, # For Agno runtime
424
+ },
425
+ stream=True, # Enable streaming for real-time updates
426
+ conversation_turn=conversation_turn, # Pass turn number for analytics
427
+ # CRITICAL: Pass user message metadata for consistent deduplication
428
+ user_message_id=user_message_id,
429
+ user_id=user_id,
430
+ user_name=user_name,
431
+ user_email=user_email,
432
+ user_avatar=user_avatar,
433
+ ),
434
+ start_to_close_timeout=timedelta(seconds=ACTIVITY_EXECUTION_TIMEOUT_SECONDS), # Configurable, default 24 hours for long-running streaming
435
+ heartbeat_timeout=timedelta(seconds=HEARTBEAT_TIMEOUT_SECONDS), # Configurable, default 30 min for long-running tasks
436
+ retry_policy=RetryPolicy(
437
+ maximum_attempts=3, # Retry automatically 1-3 times
438
+ initial_interval=timedelta(seconds=1),
439
+ maximum_interval=timedelta(seconds=10),
440
+ backoff_coefficient=2.0,
441
+ non_retryable_error_types=["ExecutionNotFound"], # Don't retry if execution deleted
442
+ ),
443
+ )
444
+
445
+ # Submit analytics as separate activity (fire-and-forget)
446
+ # This runs independently and doesn't block workflow progression
447
+ workflow.start_activity(
448
+ submit_runtime_analytics_activity,
449
+ AnalyticsActivityInput(
450
+ execution_id=input.execution_id,
451
+ turn_number=conversation_turn,
452
+ result=llm_result,
453
+ turn_start_time=turn_start_time,
454
+ ),
455
+ start_to_close_timeout=timedelta(seconds=30),
456
+ retry_policy=RetryPolicy(
457
+ maximum_attempts=3,
458
+ initial_interval=timedelta(seconds=2),
459
+ maximum_interval=timedelta(seconds=10),
460
+ backoff_coefficient=2.0,
461
+ non_retryable_error_types=["ValueError", "TypeError"],
462
+ ),
463
+ )
464
+
465
+ # Add tool execution status messages (real-time updates)
466
+ if llm_result.get("tool_execution_messages"):
467
+ async with self._lock:
468
+ for tool_msg in llm_result["tool_execution_messages"]:
469
+ self._state.messages.append(ChatMessage(
470
+ role="system",
471
+ content=tool_msg.get("content", ""),
472
+ timestamp=tool_msg.get("timestamp", workflow.now().isoformat()),
473
+ tool_name=tool_msg.get("tool_name"),
474
+ ))
475
+
476
+ # Add tool messages to state (detailed tool info)
477
+ if llm_result.get("tool_messages"):
478
+ async with self._lock:
479
+ for tool_msg in llm_result["tool_messages"]:
480
+ self._state.messages.append(ChatMessage(
481
+ role="tool",
482
+ content=tool_msg.get("content", ""),
483
+ timestamp=tool_msg.get("timestamp", workflow.now().isoformat()),
484
+ tool_name=tool_msg.get("tool_name"),
485
+ tool_input=tool_msg.get("tool_input"),
486
+ ))
487
+
488
+ # Update state with assistant response
489
+ if llm_result.get("response"):
490
+ async with self._lock:
491
+ self._state.messages.append(ChatMessage(
492
+ role="assistant",
493
+ content=llm_result["response"],
494
+ timestamp=workflow.now().isoformat(),
495
+ ))
496
+ self._state.current_response = llm_result["response"]
497
+ self._processed_message_count += 1
498
+
499
+ # Update usage and metadata (accumulate across turns)
500
+ if llm_result.get("usage"):
501
+ # Accumulate token usage across conversation turns
502
+ current_usage = self._state.usage
503
+ new_usage = llm_result.get("usage", {})
504
+ self._state.usage = {
505
+ "prompt_tokens": current_usage.get("prompt_tokens", 0) + new_usage.get("prompt_tokens", 0),
506
+ "completion_tokens": current_usage.get("completion_tokens", 0) + new_usage.get("completion_tokens", 0),
507
+ "total_tokens": current_usage.get("total_tokens", 0) + new_usage.get("total_tokens", 0),
508
+ }
509
+
510
+ # Update metadata with latest turn info
511
+ self._state.metadata.update({
512
+ "model": llm_result.get("model"),
513
+ "latest_finish_reason": llm_result.get("finish_reason"),
514
+ "mcp_tools_used": self._state.metadata.get("mcp_tools_used", 0) + llm_result.get("mcp_tools_used", 0),
515
+ "latest_run_id": llm_result.get("run_id"),
516
+ "conversation_turns": conversation_turn,
517
+ })
518
+
519
+ # Extract session_id from runtime result for conversation continuity
520
+ # This enables multi-turn conversations in Claude Code runtime
521
+ llm_metadata = llm_result.get("metadata", {})
522
+ if "claude_code_session_id" in llm_metadata:
523
+ # Update input.user_metadata so next turn can resume the session
524
+ if not input.user_metadata:
525
+ input.user_metadata = {}
526
+ session_id_value = llm_metadata["claude_code_session_id"]
527
+ input.user_metadata["claude_code_session_id"] = session_id_value
528
+ workflow.logger.info(
529
+ f"Updated user_metadata with session_id for turn continuity",
530
+ extra={
531
+ "turn": conversation_turn,
532
+ "session_id": session_id_value[:16] if session_id_value else None
533
+ }
534
+ )
535
+
536
+ # Check if LLM call was cancelled (DURABILITY FIX)
537
+ finish_reason = llm_result.get("finish_reason")
538
+ if finish_reason == "cancelled":
539
+ # Execution was cancelled/interrupted - handle gracefully
540
+ workflow.logger.warning(
541
+ f"⚠️ Execution cancelled during turn {conversation_turn}",
542
+ extra={
543
+ "turn": conversation_turn,
544
+ "execution_id": input.execution_id,
545
+ "metadata": llm_result.get("metadata", {}),
546
+ }
547
+ )
548
+
549
+ # Mark as interrupted (not failed) to indicate this is recoverable
550
+ self._state.status = "interrupted"
551
+ self._state.error_message = "Execution was interrupted and can be resumed"
552
+
553
+ # Save any accumulated response before breaking
554
+ if llm_result.get("response"):
555
+ async with self._lock:
556
+ self._state.messages.append(ChatMessage(
557
+ role="assistant",
558
+ content=llm_result["response"],
559
+ timestamp=workflow.now().isoformat(),
560
+ ))
561
+ self._state.current_response = llm_result["response"]
562
+
563
+ # Break but allow workflow to complete gracefully
564
+ break
565
+
566
+ # Check if LLM call failed
567
+ if not llm_result.get("success"):
568
+ self._state.status = "failed"
569
+ # Validate error message is never empty
570
+ error_msg = llm_result.get("error") or "Execution failed with unknown error"
571
+ if not error_msg.strip():
572
+ error_msg = "Execution failed (error details not available)"
573
+ self._state.error_message = error_msg
574
+ break
575
+
576
+ # Wait for control plane to make intelligent state decision
577
+ # The control plane AI analyzes the turn and determines the appropriate state
578
+ workflow.logger.info(
579
+ f"⏳ Waiting for control plane state decision for turn {conversation_turn}",
580
+ extra={"turn": conversation_turn, "execution_id": str(input.execution_id)[:8] if input.execution_id else "unknown"}
581
+ )
582
+
583
+ # Give control plane time to make AI decision (up to 6 seconds with retries)
584
+ max_retries = 3
585
+ retry_delay = 2 # seconds
586
+
587
+ for retry in range(max_retries):
588
+ await asyncio.sleep(retry_delay)
589
+
590
+ # Query execution state from control plane
591
+ try:
592
+ current_execution = await workflow.execute_activity(
593
+ get_execution_details,
594
+ ActivityGetExecutionInput(execution_id=input.execution_id),
595
+ start_to_close_timeout=timedelta(seconds=10),
596
+ )
597
+
598
+ control_plane_status = current_execution.get("status", "unknown")
599
+
600
+ # Check if status has been updated from "running" (indicates AI made a decision)
601
+ if control_plane_status != "running":
602
+ workflow.logger.info(
603
+ f"✅ Control plane decided state: {control_plane_status}",
604
+ extra={
605
+ "execution_id": input.execution_id,
606
+ "turn": conversation_turn,
607
+ "decided_status": control_plane_status,
608
+ "retry": retry + 1
609
+ }
610
+ )
611
+ break
612
+ else:
613
+ if retry < max_retries - 1:
614
+ workflow.logger.info(
615
+ f"⏳ Control plane still processing, retry {retry + 1}/{max_retries}",
616
+ extra={"turn": conversation_turn}
617
+ )
618
+ except Exception as e:
619
+ workflow.logger.warning(
620
+ f"⚠️ Failed to query execution state: {str(e)}",
621
+ extra={"turn": conversation_turn, "retry": retry + 1}
622
+ )
623
+ if retry == max_retries - 1:
624
+ # Final retry failed - default to waiting_for_input (safe fallback)
625
+ control_plane_status = "waiting_for_input"
626
+ workflow.logger.warning(
627
+ "Using safe fallback state: waiting_for_input",
628
+ extra={"turn": conversation_turn}
629
+ )
630
+
631
+ # Update internal state based on control plane decision
632
+ self._state.status = control_plane_status
633
+ self._state.is_waiting_for_input = (control_plane_status == "waiting_for_input")
634
+
635
+ workflow.logger.info(
636
+ f"🎯 State transition complete: {control_plane_status}",
637
+ extra={
638
+ "execution_id": input.execution_id,
639
+ "turn": conversation_turn,
640
+ "status": control_plane_status
641
+ }
642
+ )
643
+
644
+ # Then persist conversation
645
+ workflow.logger.info(
646
+ f"Persisting conversation after turn {conversation_turn}",
647
+ extra={"turn": conversation_turn, "message_count": len(self._state.messages)}
648
+ )
649
+
650
+ try:
651
+ persist_result = await workflow.execute_activity(
652
+ persist_conversation_history,
653
+ ActivityPersistConversationInput(
654
+ execution_id=input.execution_id,
655
+ session_id=input.execution_id,
656
+ messages=self._messages_to_dict(self._state.messages),
657
+ user_id=input.user_metadata.get("user_id") if input.user_metadata else None,
658
+ metadata={
659
+ "agent_id": input.agent_id,
660
+ "organization_id": input.organization_id,
661
+ "conversation_turn": conversation_turn,
662
+ "total_messages": len(self._state.messages),
663
+ },
664
+ ),
665
+ start_to_close_timeout=timedelta(seconds=30),
666
+ )
667
+
668
+ if persist_result.get("success"):
669
+ workflow.logger.info(
670
+ f"✅ Conversation persisted for turn {conversation_turn}",
671
+ extra={
672
+ "turn": conversation_turn,
673
+ "message_count": persist_result.get("message_count", len(self._state.messages))
674
+ }
675
+ )
676
+ else:
677
+ workflow.logger.warning(
678
+ f"⚠️ Persistence returned failure for turn {conversation_turn}",
679
+ extra={
680
+ "turn": conversation_turn,
681
+ "error": persist_result.get("error", "Unknown error")
682
+ }
683
+ )
684
+
685
+ except Exception as persist_error:
686
+ # Log but don't fail the workflow if persistence fails
687
+ error_type = type(persist_error).__name__
688
+ error_msg = str(persist_error) if str(persist_error) else "No error message"
689
+
690
+ workflow.logger.error(
691
+ f"❌ Failed to persist conversation for turn {conversation_turn}",
692
+ extra={
693
+ "turn": conversation_turn,
694
+ "error_type": error_type,
695
+ "error": error_msg[:200], # Truncate long errors
696
+ "message_count": len(self._state.messages),
697
+ }
698
+ )
699
+
700
+ # Handle different states based on control plane decision
701
+ if control_plane_status == "completed":
702
+ workflow.logger.info(
703
+ f"✅ Task completed (AI decision) after turn {conversation_turn}",
704
+ extra={"turn": conversation_turn}
705
+ )
706
+ # Task is complete - exit loop
707
+ break
708
+
709
+ elif control_plane_status == "failed":
710
+ workflow.logger.info(
711
+ f"❌ Task failed (AI decision) after turn {conversation_turn}",
712
+ extra={"turn": conversation_turn}
713
+ )
714
+ # Unrecoverable error - exit loop
715
+ break
716
+
717
+ elif control_plane_status == "waiting_for_input":
718
+ workflow.logger.info(
719
+ f"⏸️ Waiting for user input after turn {conversation_turn}",
720
+ extra={"turn": conversation_turn}
721
+ )
722
+
723
+ # Wait for either:
724
+ # 1. New message from user (add_message signal)
725
+ # 2. User marks as done (mark_as_done signal)
726
+ # 3. Timeout (24 hours for long-running conversations)
727
+ await workflow.wait_condition(
728
+ lambda: self._new_message_count > self._processed_message_count or self._state.should_complete,
729
+ timeout=timedelta(hours=24)
730
+ )
731
+
732
+ # Don't update processed count here - it will be updated after we add the assistant's response
733
+
734
+ if self._state.should_complete:
735
+ workflow.logger.info("User marked workflow as done")
736
+ break
737
+
738
+ # Continue loop to process new message
739
+ self._state.status = "running"
740
+
741
+ elif control_plane_status == "running":
742
+ workflow.logger.info(
743
+ f"▶️ Continuing automatically to next turn {conversation_turn + 1}",
744
+ extra={"turn": conversation_turn}
745
+ )
746
+ # Continue automatically to next turn (no user input needed)
747
+ # Just loop back to execute_agent_llm
748
+ continue
749
+
750
+ elif control_plane_status == "queued":
751
+ workflow.logger.info(
752
+ f"📥 Message queued, continuing to next turn {conversation_turn + 1}",
753
+ extra={"turn": conversation_turn}
754
+ )
755
+ # "queued" means a new message was received and is waiting to be processed
756
+ # Treat same as "running" - continue to next turn automatically
757
+ continue
758
+
759
+ else:
760
+ # Unknown status - default to waiting_for_input (safe fallback)
761
+ workflow.logger.warning(
762
+ f"⚠️ Unknown status '{control_plane_status}', defaulting to waiting_for_input",
763
+ extra={"turn": conversation_turn, "status": control_plane_status}
764
+ )
765
+ self._state.status = "waiting_for_input"
766
+ self._state.is_waiting_for_input = True
767
+
768
+ await workflow.wait_condition(
769
+ lambda: self._new_message_count > self._processed_message_count or self._state.should_complete,
770
+ timeout=timedelta(hours=24)
771
+ )
772
+
773
+ if self._state.should_complete:
774
+ workflow.logger.info("User marked workflow as done")
775
+ break
776
+
777
+ self._state.status = "running"
778
+
779
+ # Conversation complete - finalize workflow
780
+ # DURABILITY FIX: Handle interrupted status separately from failed/completed
781
+ if self._state.status == "interrupted":
782
+ final_status = "interrupted"
783
+ workflow.logger.warning(
784
+ f"⚠️ Workflow interrupted (not failed)",
785
+ extra={
786
+ "execution_id": input.execution_id,
787
+ "conversation_turns": conversation_turn,
788
+ }
789
+ )
790
+ elif self._state.status == "failed":
791
+ final_status = "failed"
792
+ else:
793
+ final_status = "completed"
794
+
795
+ self._state.status = final_status
796
+
797
+ await workflow.execute_activity(
798
+ update_execution_status,
799
+ ActivityUpdateExecutionInput(
800
+ execution_id=input.execution_id,
801
+ status=final_status,
802
+ completed_at=workflow.now().isoformat(),
803
+ response=self._state.current_response,
804
+ error_message=self._state.error_message,
805
+ usage=self._state.usage,
806
+ execution_metadata={
807
+ **self._state.metadata,
808
+ "workflow_completed": True,
809
+ "total_conversation_turns": conversation_turn,
810
+ "was_interrupted": final_status == "interrupted",
811
+ },
812
+ ),
813
+ start_to_close_timeout=timedelta(seconds=30),
814
+ )
815
+
816
+ # Update agent final status
817
+ # DURABILITY FIX: Treat interrupted as a partial success, not a failure
818
+ agent_final_status = "failed" if final_status == "failed" else "completed"
819
+ await workflow.execute_activity(
820
+ update_agent_status,
821
+ ActivityUpdateAgentInput(
822
+ agent_id=input.agent_id,
823
+ organization_id=input.organization_id,
824
+ status=agent_final_status,
825
+ last_active_at=workflow.now().isoformat(),
826
+ error_message=self._state.error_message if final_status == "failed" else None,
827
+ ),
828
+ start_to_close_timeout=timedelta(seconds=30),
829
+ )
830
+
831
+ workflow.logger.info(
832
+ f"Agent execution workflow completed with HITL",
833
+ extra={
834
+ "execution_id": input.execution_id,
835
+ "status": final_status,
836
+ "conversation_turns": conversation_turn,
837
+ }
838
+ )
839
+
840
+ return {
841
+ "success": final_status == "completed",
842
+ "execution_id": input.execution_id,
843
+ "status": final_status,
844
+ "response": self._state.current_response,
845
+ "usage": self._state.usage,
846
+ "conversation_turns": conversation_turn,
847
+ }
848
+
849
+ except Exception as e:
850
+ # Update state with error
851
+ self._state.status = "failed"
852
+ self._state.error_message = str(e)
853
+ self._state.metadata["error_type"] = type(e).__name__
854
+
855
+ # Log failure with clear context
856
+ execution_logger.execution_failed(
857
+ input.execution_id,
858
+ error=str(e),
859
+ error_type=type(e).__name__,
860
+ recoverable=False
861
+ )
862
+
863
+ workflow.logger.error(
864
+ f"Agent execution workflow failed",
865
+ extra={
866
+ "execution_id": input.execution_id,
867
+ "error": str(e),
868
+ }
869
+ )
870
+
871
+ # Update execution as failed
872
+ try:
873
+ await workflow.execute_activity(
874
+ update_execution_status,
875
+ ActivityUpdateExecutionInput(
876
+ execution_id=input.execution_id,
877
+ status="failed",
878
+ completed_at=workflow.now().isoformat(),
879
+ error_message=f"Workflow error: {str(e)}",
880
+ execution_metadata={
881
+ "workflow_error": True,
882
+ "error_type": type(e).__name__,
883
+ },
884
+ ),
885
+ start_to_close_timeout=timedelta(seconds=30),
886
+ )
887
+
888
+ await workflow.execute_activity(
889
+ update_agent_status,
890
+ ActivityUpdateAgentInput(
891
+ agent_id=input.agent_id,
892
+ organization_id=input.organization_id,
893
+ status="failed",
894
+ last_active_at=workflow.now().isoformat(),
895
+ error_message=str(e),
896
+ ),
897
+ start_to_close_timeout=timedelta(seconds=30),
898
+ )
899
+ except Exception as update_error:
900
+ execution_logger.warning(
901
+ input.execution_id,
902
+ f"Could not update execution status after workflow error: {str(update_error)}"
903
+ )
904
+ workflow.logger.error(
905
+ f"Failed to update status after error",
906
+ extra={"error": str(update_error)}
907
+ )
908
+
909
+ raise