kubiya-control-plane-api 0.9.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (479) hide show
  1. control_plane_api/LICENSE +676 -0
  2. control_plane_api/README.md +350 -0
  3. control_plane_api/__init__.py +4 -0
  4. control_plane_api/__version__.py +8 -0
  5. control_plane_api/alembic/README +1 -0
  6. control_plane_api/alembic/env.py +121 -0
  7. control_plane_api/alembic/script.py.mako +28 -0
  8. control_plane_api/alembic/versions/2613c65c3dbe_initial_database_setup.py +32 -0
  9. control_plane_api/alembic/versions/2df520d4927d_merge_heads.py +28 -0
  10. control_plane_api/alembic/versions/43abf98d6a01_add_paused_status_to_executions.py +73 -0
  11. control_plane_api/alembic/versions/6289854264cb_merge_multiple_heads.py +28 -0
  12. control_plane_api/alembic/versions/6a4d4dc3d8dc_generate_execution_transitions.py +50 -0
  13. control_plane_api/alembic/versions/87d11cf0a783_add_disconnected_status_to_worker_.py +44 -0
  14. control_plane_api/alembic/versions/add_ephemeral_queue_support.py +85 -0
  15. control_plane_api/alembic/versions/add_model_type_to_llm_models.py +31 -0
  16. control_plane_api/alembic/versions/add_plan_executions_table.py +114 -0
  17. control_plane_api/alembic/versions/add_trace_span_tables.py +154 -0
  18. control_plane_api/alembic/versions/add_user_info_to_traces.py +36 -0
  19. control_plane_api/alembic/versions/adjusting_foreign_keys.py +32 -0
  20. control_plane_api/alembic/versions/b4983d976db2_initial_tables.py +1128 -0
  21. control_plane_api/alembic/versions/d181a3b40e71_rename_custom_metadata_to_metadata_in_.py +50 -0
  22. control_plane_api/alembic/versions/df9117888e82_add_missing_columns.py +82 -0
  23. control_plane_api/alembic/versions/f25de6ad895a_missing_migrations.py +34 -0
  24. control_plane_api/alembic/versions/f71305fb69b9_fix_ephemeral_queue_deletion_foreign_key.py +54 -0
  25. control_plane_api/alembic/versions/mark_local_exec_queues_as_ephemeral.py +68 -0
  26. control_plane_api/alembic.ini +148 -0
  27. control_plane_api/api/index.py +12 -0
  28. control_plane_api/app/__init__.py +11 -0
  29. control_plane_api/app/activities/__init__.py +20 -0
  30. control_plane_api/app/activities/agent_activities.py +384 -0
  31. control_plane_api/app/activities/plan_generation_activities.py +499 -0
  32. control_plane_api/app/activities/team_activities.py +424 -0
  33. control_plane_api/app/activities/temporal_cloud_activities.py +588 -0
  34. control_plane_api/app/config/__init__.py +35 -0
  35. control_plane_api/app/config/api_config.py +469 -0
  36. control_plane_api/app/config/config_loader.py +224 -0
  37. control_plane_api/app/config/model_pricing.py +323 -0
  38. control_plane_api/app/config/storage_config.py +159 -0
  39. control_plane_api/app/config.py +115 -0
  40. control_plane_api/app/controllers/__init__.py +0 -0
  41. control_plane_api/app/controllers/execution_environment_controller.py +1315 -0
  42. control_plane_api/app/database.py +135 -0
  43. control_plane_api/app/exceptions.py +408 -0
  44. control_plane_api/app/lib/__init__.py +11 -0
  45. control_plane_api/app/lib/environment.py +65 -0
  46. control_plane_api/app/lib/event_bus/__init__.py +17 -0
  47. control_plane_api/app/lib/event_bus/base.py +136 -0
  48. control_plane_api/app/lib/event_bus/manager.py +335 -0
  49. control_plane_api/app/lib/event_bus/providers/__init__.py +6 -0
  50. control_plane_api/app/lib/event_bus/providers/http_provider.py +166 -0
  51. control_plane_api/app/lib/event_bus/providers/nats_provider.py +324 -0
  52. control_plane_api/app/lib/event_bus/providers/redis_provider.py +233 -0
  53. control_plane_api/app/lib/event_bus/providers/websocket_provider.py +497 -0
  54. control_plane_api/app/lib/job_executor.py +330 -0
  55. control_plane_api/app/lib/kubiya_client.py +293 -0
  56. control_plane_api/app/lib/litellm_pricing.py +166 -0
  57. control_plane_api/app/lib/mcp_validation.py +163 -0
  58. control_plane_api/app/lib/nats/__init__.py +13 -0
  59. control_plane_api/app/lib/nats/credentials_manager.py +288 -0
  60. control_plane_api/app/lib/nats/listener.py +374 -0
  61. control_plane_api/app/lib/planning_prompt_builder.py +153 -0
  62. control_plane_api/app/lib/planning_tools/__init__.py +41 -0
  63. control_plane_api/app/lib/planning_tools/agents.py +409 -0
  64. control_plane_api/app/lib/planning_tools/agno_toolkit.py +836 -0
  65. control_plane_api/app/lib/planning_tools/base.py +119 -0
  66. control_plane_api/app/lib/planning_tools/cognitive_memory_tools.py +403 -0
  67. control_plane_api/app/lib/planning_tools/context_graph_tools.py +545 -0
  68. control_plane_api/app/lib/planning_tools/environments.py +218 -0
  69. control_plane_api/app/lib/planning_tools/knowledge.py +204 -0
  70. control_plane_api/app/lib/planning_tools/models.py +93 -0
  71. control_plane_api/app/lib/planning_tools/planning_service.py +646 -0
  72. control_plane_api/app/lib/planning_tools/resources.py +242 -0
  73. control_plane_api/app/lib/planning_tools/teams.py +334 -0
  74. control_plane_api/app/lib/policy_enforcer_client.py +1016 -0
  75. control_plane_api/app/lib/redis_client.py +803 -0
  76. control_plane_api/app/lib/sqlalchemy_utils.py +486 -0
  77. control_plane_api/app/lib/state_transition_tools/__init__.py +7 -0
  78. control_plane_api/app/lib/state_transition_tools/execution_context.py +388 -0
  79. control_plane_api/app/lib/storage/__init__.py +20 -0
  80. control_plane_api/app/lib/storage/base_provider.py +274 -0
  81. control_plane_api/app/lib/storage/provider_factory.py +157 -0
  82. control_plane_api/app/lib/storage/vercel_blob_provider.py +468 -0
  83. control_plane_api/app/lib/supabase.py +71 -0
  84. control_plane_api/app/lib/supabase_utils.py +138 -0
  85. control_plane_api/app/lib/task_planning/__init__.py +138 -0
  86. control_plane_api/app/lib/task_planning/agent_factory.py +308 -0
  87. control_plane_api/app/lib/task_planning/agents.py +389 -0
  88. control_plane_api/app/lib/task_planning/cache.py +218 -0
  89. control_plane_api/app/lib/task_planning/entity_resolver.py +273 -0
  90. control_plane_api/app/lib/task_planning/helpers.py +293 -0
  91. control_plane_api/app/lib/task_planning/hooks.py +474 -0
  92. control_plane_api/app/lib/task_planning/models.py +503 -0
  93. control_plane_api/app/lib/task_planning/plan_validator.py +166 -0
  94. control_plane_api/app/lib/task_planning/planning_workflow.py +2911 -0
  95. control_plane_api/app/lib/task_planning/runner.py +656 -0
  96. control_plane_api/app/lib/task_planning/streaming_hook.py +213 -0
  97. control_plane_api/app/lib/task_planning/workflow.py +424 -0
  98. control_plane_api/app/lib/templating/__init__.py +88 -0
  99. control_plane_api/app/lib/templating/compiler.py +278 -0
  100. control_plane_api/app/lib/templating/engine.py +178 -0
  101. control_plane_api/app/lib/templating/parsers/__init__.py +29 -0
  102. control_plane_api/app/lib/templating/parsers/base.py +96 -0
  103. control_plane_api/app/lib/templating/parsers/env.py +85 -0
  104. control_plane_api/app/lib/templating/parsers/graph.py +112 -0
  105. control_plane_api/app/lib/templating/parsers/secret.py +87 -0
  106. control_plane_api/app/lib/templating/parsers/simple.py +81 -0
  107. control_plane_api/app/lib/templating/resolver.py +366 -0
  108. control_plane_api/app/lib/templating/types.py +214 -0
  109. control_plane_api/app/lib/templating/validator.py +201 -0
  110. control_plane_api/app/lib/temporal_client.py +232 -0
  111. control_plane_api/app/lib/temporal_credentials_cache.py +178 -0
  112. control_plane_api/app/lib/temporal_credentials_service.py +203 -0
  113. control_plane_api/app/lib/validation/__init__.py +24 -0
  114. control_plane_api/app/lib/validation/runtime_validation.py +388 -0
  115. control_plane_api/app/main.py +531 -0
  116. control_plane_api/app/middleware/__init__.py +10 -0
  117. control_plane_api/app/middleware/auth.py +645 -0
  118. control_plane_api/app/middleware/exception_handler.py +267 -0
  119. control_plane_api/app/middleware/prometheus_middleware.py +173 -0
  120. control_plane_api/app/middleware/rate_limiting.py +384 -0
  121. control_plane_api/app/middleware/request_id.py +202 -0
  122. control_plane_api/app/models/__init__.py +40 -0
  123. control_plane_api/app/models/agent.py +90 -0
  124. control_plane_api/app/models/analytics.py +206 -0
  125. control_plane_api/app/models/associations.py +107 -0
  126. control_plane_api/app/models/auth_user.py +73 -0
  127. control_plane_api/app/models/context.py +161 -0
  128. control_plane_api/app/models/custom_integration.py +99 -0
  129. control_plane_api/app/models/environment.py +64 -0
  130. control_plane_api/app/models/execution.py +125 -0
  131. control_plane_api/app/models/execution_transition.py +50 -0
  132. control_plane_api/app/models/job.py +159 -0
  133. control_plane_api/app/models/llm_model.py +78 -0
  134. control_plane_api/app/models/orchestration.py +66 -0
  135. control_plane_api/app/models/plan_execution.py +102 -0
  136. control_plane_api/app/models/presence.py +49 -0
  137. control_plane_api/app/models/project.py +61 -0
  138. control_plane_api/app/models/project_management.py +85 -0
  139. control_plane_api/app/models/session.py +29 -0
  140. control_plane_api/app/models/skill.py +155 -0
  141. control_plane_api/app/models/system_tables.py +43 -0
  142. control_plane_api/app/models/task_planning.py +372 -0
  143. control_plane_api/app/models/team.py +86 -0
  144. control_plane_api/app/models/trace.py +257 -0
  145. control_plane_api/app/models/user_profile.py +54 -0
  146. control_plane_api/app/models/worker.py +221 -0
  147. control_plane_api/app/models/workflow.py +161 -0
  148. control_plane_api/app/models/workspace.py +50 -0
  149. control_plane_api/app/observability/__init__.py +177 -0
  150. control_plane_api/app/observability/context_logging.py +475 -0
  151. control_plane_api/app/observability/decorators.py +337 -0
  152. control_plane_api/app/observability/local_span_processor.py +702 -0
  153. control_plane_api/app/observability/metrics.py +303 -0
  154. control_plane_api/app/observability/middleware.py +246 -0
  155. control_plane_api/app/observability/optional.py +115 -0
  156. control_plane_api/app/observability/tracing.py +382 -0
  157. control_plane_api/app/policies/README.md +149 -0
  158. control_plane_api/app/policies/approved_users.rego +62 -0
  159. control_plane_api/app/policies/business_hours.rego +51 -0
  160. control_plane_api/app/policies/rate_limiting.rego +100 -0
  161. control_plane_api/app/policies/tool_enforcement/README.md +336 -0
  162. control_plane_api/app/policies/tool_enforcement/bash_command_validation.rego +71 -0
  163. control_plane_api/app/policies/tool_enforcement/business_hours_enforcement.rego +82 -0
  164. control_plane_api/app/policies/tool_enforcement/mcp_tool_allowlist.rego +58 -0
  165. control_plane_api/app/policies/tool_enforcement/production_safeguards.rego +80 -0
  166. control_plane_api/app/policies/tool_enforcement/role_based_tool_access.rego +44 -0
  167. control_plane_api/app/policies/tool_restrictions.rego +86 -0
  168. control_plane_api/app/routers/__init__.py +4 -0
  169. control_plane_api/app/routers/agents.py +382 -0
  170. control_plane_api/app/routers/agents_v2.py +1598 -0
  171. control_plane_api/app/routers/analytics.py +1310 -0
  172. control_plane_api/app/routers/auth.py +59 -0
  173. control_plane_api/app/routers/client_config.py +57 -0
  174. control_plane_api/app/routers/context_graph.py +561 -0
  175. control_plane_api/app/routers/context_manager.py +577 -0
  176. control_plane_api/app/routers/custom_integrations.py +490 -0
  177. control_plane_api/app/routers/enforcer.py +132 -0
  178. control_plane_api/app/routers/environment_context.py +252 -0
  179. control_plane_api/app/routers/environments.py +761 -0
  180. control_plane_api/app/routers/execution_environment.py +847 -0
  181. control_plane_api/app/routers/executions/__init__.py +28 -0
  182. control_plane_api/app/routers/executions/router.py +286 -0
  183. control_plane_api/app/routers/executions/services/__init__.py +22 -0
  184. control_plane_api/app/routers/executions/services/demo_worker_health.py +156 -0
  185. control_plane_api/app/routers/executions/services/status_service.py +420 -0
  186. control_plane_api/app/routers/executions/services/test_worker_health.py +480 -0
  187. control_plane_api/app/routers/executions/services/worker_health.py +514 -0
  188. control_plane_api/app/routers/executions/streaming/__init__.py +22 -0
  189. control_plane_api/app/routers/executions/streaming/deduplication.py +352 -0
  190. control_plane_api/app/routers/executions/streaming/event_buffer.py +353 -0
  191. control_plane_api/app/routers/executions/streaming/event_formatter.py +964 -0
  192. control_plane_api/app/routers/executions/streaming/history_loader.py +588 -0
  193. control_plane_api/app/routers/executions/streaming/live_source.py +693 -0
  194. control_plane_api/app/routers/executions/streaming/streamer.py +849 -0
  195. control_plane_api/app/routers/executions.py +4888 -0
  196. control_plane_api/app/routers/health.py +165 -0
  197. control_plane_api/app/routers/health_v2.py +394 -0
  198. control_plane_api/app/routers/integration_templates.py +496 -0
  199. control_plane_api/app/routers/integrations.py +287 -0
  200. control_plane_api/app/routers/jobs.py +1809 -0
  201. control_plane_api/app/routers/metrics.py +517 -0
  202. control_plane_api/app/routers/models.py +82 -0
  203. control_plane_api/app/routers/models_v2.py +628 -0
  204. control_plane_api/app/routers/plan_executions.py +1481 -0
  205. control_plane_api/app/routers/plan_generation_async.py +304 -0
  206. control_plane_api/app/routers/policies.py +669 -0
  207. control_plane_api/app/routers/presence.py +234 -0
  208. control_plane_api/app/routers/projects.py +987 -0
  209. control_plane_api/app/routers/runners.py +379 -0
  210. control_plane_api/app/routers/runtimes.py +172 -0
  211. control_plane_api/app/routers/secrets.py +171 -0
  212. control_plane_api/app/routers/skills.py +1010 -0
  213. control_plane_api/app/routers/skills_definitions.py +140 -0
  214. control_plane_api/app/routers/storage.py +456 -0
  215. control_plane_api/app/routers/task_planning.py +611 -0
  216. control_plane_api/app/routers/task_queues.py +650 -0
  217. control_plane_api/app/routers/team_context.py +274 -0
  218. control_plane_api/app/routers/teams.py +1747 -0
  219. control_plane_api/app/routers/templates.py +248 -0
  220. control_plane_api/app/routers/traces.py +571 -0
  221. control_plane_api/app/routers/websocket_client.py +479 -0
  222. control_plane_api/app/routers/websocket_executions_status.py +437 -0
  223. control_plane_api/app/routers/websocket_gateway.py +323 -0
  224. control_plane_api/app/routers/websocket_traces.py +576 -0
  225. control_plane_api/app/routers/worker_queues.py +2555 -0
  226. control_plane_api/app/routers/worker_websocket.py +419 -0
  227. control_plane_api/app/routers/workers.py +1004 -0
  228. control_plane_api/app/routers/workflows.py +204 -0
  229. control_plane_api/app/runtimes/__init__.py +6 -0
  230. control_plane_api/app/runtimes/validation.py +344 -0
  231. control_plane_api/app/schemas/__init__.py +1 -0
  232. control_plane_api/app/schemas/job_schemas.py +302 -0
  233. control_plane_api/app/schemas/mcp_schemas.py +311 -0
  234. control_plane_api/app/schemas/template_schemas.py +133 -0
  235. control_plane_api/app/schemas/trace_schemas.py +168 -0
  236. control_plane_api/app/schemas/worker_queue_observability_schemas.py +165 -0
  237. control_plane_api/app/services/__init__.py +1 -0
  238. control_plane_api/app/services/agno_planning_strategy.py +233 -0
  239. control_plane_api/app/services/agno_service.py +838 -0
  240. control_plane_api/app/services/claude_code_planning_service.py +203 -0
  241. control_plane_api/app/services/context_graph_client.py +224 -0
  242. control_plane_api/app/services/custom_integration_service.py +415 -0
  243. control_plane_api/app/services/integration_resolution_service.py +345 -0
  244. control_plane_api/app/services/litellm_service.py +394 -0
  245. control_plane_api/app/services/plan_generator.py +79 -0
  246. control_plane_api/app/services/planning_strategy.py +66 -0
  247. control_plane_api/app/services/planning_strategy_factory.py +118 -0
  248. control_plane_api/app/services/policy_service.py +615 -0
  249. control_plane_api/app/services/state_transition_service.py +755 -0
  250. control_plane_api/app/services/storage_service.py +593 -0
  251. control_plane_api/app/services/temporal_cloud_provisioning.py +150 -0
  252. control_plane_api/app/services/toolsets/context_graph_skill.py +432 -0
  253. control_plane_api/app/services/trace_retention.py +354 -0
  254. control_plane_api/app/services/worker_queue_metrics_service.py +190 -0
  255. control_plane_api/app/services/workflow_cancellation_manager.py +135 -0
  256. control_plane_api/app/services/workflow_operations_service.py +611 -0
  257. control_plane_api/app/skills/__init__.py +100 -0
  258. control_plane_api/app/skills/base.py +239 -0
  259. control_plane_api/app/skills/builtin/__init__.py +37 -0
  260. control_plane_api/app/skills/builtin/agent_communication/__init__.py +8 -0
  261. control_plane_api/app/skills/builtin/agent_communication/skill.py +246 -0
  262. control_plane_api/app/skills/builtin/code_ingestion/__init__.py +4 -0
  263. control_plane_api/app/skills/builtin/code_ingestion/skill.py +267 -0
  264. control_plane_api/app/skills/builtin/cognitive_memory/__init__.py +4 -0
  265. control_plane_api/app/skills/builtin/cognitive_memory/skill.py +174 -0
  266. control_plane_api/app/skills/builtin/contextual_awareness/__init__.py +4 -0
  267. control_plane_api/app/skills/builtin/contextual_awareness/skill.py +387 -0
  268. control_plane_api/app/skills/builtin/data_visualization/__init__.py +4 -0
  269. control_plane_api/app/skills/builtin/data_visualization/skill.py +154 -0
  270. control_plane_api/app/skills/builtin/docker/__init__.py +4 -0
  271. control_plane_api/app/skills/builtin/docker/skill.py +104 -0
  272. control_plane_api/app/skills/builtin/file_generation/__init__.py +4 -0
  273. control_plane_api/app/skills/builtin/file_generation/skill.py +94 -0
  274. control_plane_api/app/skills/builtin/file_system/__init__.py +4 -0
  275. control_plane_api/app/skills/builtin/file_system/skill.py +110 -0
  276. control_plane_api/app/skills/builtin/knowledge_api/__init__.py +5 -0
  277. control_plane_api/app/skills/builtin/knowledge_api/skill.py +124 -0
  278. control_plane_api/app/skills/builtin/python/__init__.py +4 -0
  279. control_plane_api/app/skills/builtin/python/skill.py +92 -0
  280. control_plane_api/app/skills/builtin/remote_filesystem/__init__.py +5 -0
  281. control_plane_api/app/skills/builtin/remote_filesystem/skill.py +170 -0
  282. control_plane_api/app/skills/builtin/shell/__init__.py +4 -0
  283. control_plane_api/app/skills/builtin/shell/skill.py +161 -0
  284. control_plane_api/app/skills/builtin/slack/__init__.py +3 -0
  285. control_plane_api/app/skills/builtin/slack/skill.py +302 -0
  286. control_plane_api/app/skills/builtin/workflow_executor/__init__.py +4 -0
  287. control_plane_api/app/skills/builtin/workflow_executor/skill.py +469 -0
  288. control_plane_api/app/skills/business_intelligence.py +189 -0
  289. control_plane_api/app/skills/config.py +63 -0
  290. control_plane_api/app/skills/loaders/__init__.py +14 -0
  291. control_plane_api/app/skills/loaders/base.py +73 -0
  292. control_plane_api/app/skills/loaders/filesystem_loader.py +199 -0
  293. control_plane_api/app/skills/registry.py +125 -0
  294. control_plane_api/app/utils/helpers.py +12 -0
  295. control_plane_api/app/utils/workflow_executor.py +354 -0
  296. control_plane_api/app/workflows/__init__.py +11 -0
  297. control_plane_api/app/workflows/agent_execution.py +520 -0
  298. control_plane_api/app/workflows/agent_execution_with_skills.py +223 -0
  299. control_plane_api/app/workflows/namespace_provisioning.py +326 -0
  300. control_plane_api/app/workflows/plan_generation.py +254 -0
  301. control_plane_api/app/workflows/team_execution.py +442 -0
  302. control_plane_api/scripts/seed_models.py +240 -0
  303. control_plane_api/scripts/validate_existing_tool_names.py +492 -0
  304. control_plane_api/shared/__init__.py +8 -0
  305. control_plane_api/shared/version.py +17 -0
  306. control_plane_api/test_deduplication.py +274 -0
  307. control_plane_api/test_executor_deduplication_e2e.py +309 -0
  308. control_plane_api/test_job_execution_e2e.py +283 -0
  309. control_plane_api/test_real_integration.py +193 -0
  310. control_plane_api/version.py +38 -0
  311. control_plane_api/worker/__init__.py +0 -0
  312. control_plane_api/worker/activities/__init__.py +0 -0
  313. control_plane_api/worker/activities/agent_activities.py +1585 -0
  314. control_plane_api/worker/activities/approval_activities.py +234 -0
  315. control_plane_api/worker/activities/job_activities.py +199 -0
  316. control_plane_api/worker/activities/runtime_activities.py +1167 -0
  317. control_plane_api/worker/activities/skill_activities.py +282 -0
  318. control_plane_api/worker/activities/team_activities.py +479 -0
  319. control_plane_api/worker/agent_runtime_server.py +370 -0
  320. control_plane_api/worker/binary_manager.py +333 -0
  321. control_plane_api/worker/config/__init__.py +31 -0
  322. control_plane_api/worker/config/worker_config.py +273 -0
  323. control_plane_api/worker/control_plane_client.py +1491 -0
  324. control_plane_api/worker/examples/analytics_integration_example.py +362 -0
  325. control_plane_api/worker/health_monitor.py +159 -0
  326. control_plane_api/worker/metrics.py +237 -0
  327. control_plane_api/worker/models/__init__.py +1 -0
  328. control_plane_api/worker/models/error_events.py +105 -0
  329. control_plane_api/worker/models/inputs.py +89 -0
  330. control_plane_api/worker/runtimes/__init__.py +35 -0
  331. control_plane_api/worker/runtimes/agent_runtime/runtime.py +485 -0
  332. control_plane_api/worker/runtimes/agno/__init__.py +34 -0
  333. control_plane_api/worker/runtimes/agno/config.py +248 -0
  334. control_plane_api/worker/runtimes/agno/hooks.py +385 -0
  335. control_plane_api/worker/runtimes/agno/mcp_builder.py +195 -0
  336. control_plane_api/worker/runtimes/agno/runtime.py +1063 -0
  337. control_plane_api/worker/runtimes/agno/utils.py +163 -0
  338. control_plane_api/worker/runtimes/base.py +979 -0
  339. control_plane_api/worker/runtimes/claude_code/__init__.py +38 -0
  340. control_plane_api/worker/runtimes/claude_code/cleanup.py +184 -0
  341. control_plane_api/worker/runtimes/claude_code/client_pool.py +529 -0
  342. control_plane_api/worker/runtimes/claude_code/config.py +829 -0
  343. control_plane_api/worker/runtimes/claude_code/hooks.py +482 -0
  344. control_plane_api/worker/runtimes/claude_code/litellm_proxy.py +1702 -0
  345. control_plane_api/worker/runtimes/claude_code/mcp_builder.py +467 -0
  346. control_plane_api/worker/runtimes/claude_code/mcp_discovery.py +558 -0
  347. control_plane_api/worker/runtimes/claude_code/runtime.py +1546 -0
  348. control_plane_api/worker/runtimes/claude_code/tool_mapper.py +403 -0
  349. control_plane_api/worker/runtimes/claude_code/utils.py +149 -0
  350. control_plane_api/worker/runtimes/factory.py +173 -0
  351. control_plane_api/worker/runtimes/model_utils.py +107 -0
  352. control_plane_api/worker/runtimes/validation.py +93 -0
  353. control_plane_api/worker/services/__init__.py +1 -0
  354. control_plane_api/worker/services/agent_communication_tools.py +908 -0
  355. control_plane_api/worker/services/agent_executor.py +485 -0
  356. control_plane_api/worker/services/agent_executor_v2.py +793 -0
  357. control_plane_api/worker/services/analytics_collector.py +457 -0
  358. control_plane_api/worker/services/analytics_service.py +464 -0
  359. control_plane_api/worker/services/approval_tools.py +310 -0
  360. control_plane_api/worker/services/approval_tools_agno.py +207 -0
  361. control_plane_api/worker/services/cancellation_manager.py +177 -0
  362. control_plane_api/worker/services/code_ingestion_tools.py +465 -0
  363. control_plane_api/worker/services/contextual_awareness_tools.py +405 -0
  364. control_plane_api/worker/services/data_visualization.py +834 -0
  365. control_plane_api/worker/services/event_publisher.py +531 -0
  366. control_plane_api/worker/services/jira_tools.py +257 -0
  367. control_plane_api/worker/services/remote_filesystem_tools.py +498 -0
  368. control_plane_api/worker/services/runtime_analytics.py +328 -0
  369. control_plane_api/worker/services/session_service.py +365 -0
  370. control_plane_api/worker/services/skill_context_enhancement.py +181 -0
  371. control_plane_api/worker/services/skill_factory.py +471 -0
  372. control_plane_api/worker/services/system_prompt_enhancement.py +410 -0
  373. control_plane_api/worker/services/team_executor.py +715 -0
  374. control_plane_api/worker/services/team_executor_v2.py +1866 -0
  375. control_plane_api/worker/services/tool_enforcement.py +254 -0
  376. control_plane_api/worker/services/workflow_executor/__init__.py +52 -0
  377. control_plane_api/worker/services/workflow_executor/event_processor.py +287 -0
  378. control_plane_api/worker/services/workflow_executor/event_publisher.py +210 -0
  379. control_plane_api/worker/services/workflow_executor/executors/__init__.py +15 -0
  380. control_plane_api/worker/services/workflow_executor/executors/base.py +270 -0
  381. control_plane_api/worker/services/workflow_executor/executors/json_executor.py +50 -0
  382. control_plane_api/worker/services/workflow_executor/executors/python_executor.py +50 -0
  383. control_plane_api/worker/services/workflow_executor/models.py +142 -0
  384. control_plane_api/worker/services/workflow_executor_tools.py +1748 -0
  385. control_plane_api/worker/skills/__init__.py +12 -0
  386. control_plane_api/worker/skills/builtin/context_graph_search/README.md +213 -0
  387. control_plane_api/worker/skills/builtin/context_graph_search/__init__.py +5 -0
  388. control_plane_api/worker/skills/builtin/context_graph_search/agno_impl.py +808 -0
  389. control_plane_api/worker/skills/builtin/context_graph_search/skill.yaml +67 -0
  390. control_plane_api/worker/skills/builtin/contextual_awareness/__init__.py +4 -0
  391. control_plane_api/worker/skills/builtin/contextual_awareness/agno_impl.py +62 -0
  392. control_plane_api/worker/skills/builtin/data_visualization/agno_impl.py +18 -0
  393. control_plane_api/worker/skills/builtin/data_visualization/skill.yaml +84 -0
  394. control_plane_api/worker/skills/builtin/docker/agno_impl.py +65 -0
  395. control_plane_api/worker/skills/builtin/docker/skill.yaml +60 -0
  396. control_plane_api/worker/skills/builtin/file_generation/agno_impl.py +47 -0
  397. control_plane_api/worker/skills/builtin/file_generation/skill.yaml +64 -0
  398. control_plane_api/worker/skills/builtin/file_system/agno_impl.py +32 -0
  399. control_plane_api/worker/skills/builtin/file_system/skill.yaml +54 -0
  400. control_plane_api/worker/skills/builtin/knowledge_api/__init__.py +4 -0
  401. control_plane_api/worker/skills/builtin/knowledge_api/agno_impl.py +50 -0
  402. control_plane_api/worker/skills/builtin/knowledge_api/skill.yaml +66 -0
  403. control_plane_api/worker/skills/builtin/python/agno_impl.py +25 -0
  404. control_plane_api/worker/skills/builtin/python/skill.yaml +60 -0
  405. control_plane_api/worker/skills/builtin/schema_fix_mixin.py +260 -0
  406. control_plane_api/worker/skills/builtin/shell/agno_impl.py +31 -0
  407. control_plane_api/worker/skills/builtin/shell/skill.yaml +60 -0
  408. control_plane_api/worker/skills/builtin/slack/__init__.py +3 -0
  409. control_plane_api/worker/skills/builtin/slack/agno_impl.py +1282 -0
  410. control_plane_api/worker/skills/builtin/slack/skill.yaml +276 -0
  411. control_plane_api/worker/skills/builtin/workflow_executor/agno_impl.py +62 -0
  412. control_plane_api/worker/skills/builtin/workflow_executor/skill.yaml +79 -0
  413. control_plane_api/worker/skills/loaders/__init__.py +5 -0
  414. control_plane_api/worker/skills/loaders/base.py +23 -0
  415. control_plane_api/worker/skills/loaders/filesystem_loader.py +357 -0
  416. control_plane_api/worker/skills/registry.py +208 -0
  417. control_plane_api/worker/tests/__init__.py +1 -0
  418. control_plane_api/worker/tests/conftest.py +12 -0
  419. control_plane_api/worker/tests/e2e/__init__.py +0 -0
  420. control_plane_api/worker/tests/e2e/test_context_graph_real_api.py +338 -0
  421. control_plane_api/worker/tests/e2e/test_context_graph_templates_e2e.py +523 -0
  422. control_plane_api/worker/tests/e2e/test_enforcement_e2e.py +344 -0
  423. control_plane_api/worker/tests/e2e/test_execution_flow.py +571 -0
  424. control_plane_api/worker/tests/e2e/test_single_execution_mode.py +656 -0
  425. control_plane_api/worker/tests/integration/__init__.py +0 -0
  426. control_plane_api/worker/tests/integration/test_builtin_skills_fixes.py +245 -0
  427. control_plane_api/worker/tests/integration/test_context_graph_search_integration.py +365 -0
  428. control_plane_api/worker/tests/integration/test_control_plane_integration.py +308 -0
  429. control_plane_api/worker/tests/integration/test_hook_enforcement_integration.py +579 -0
  430. control_plane_api/worker/tests/integration/test_scheduled_job_workflow.py +237 -0
  431. control_plane_api/worker/tests/integration/test_system_prompt_enhancement_integration.py +343 -0
  432. control_plane_api/worker/tests/unit/__init__.py +0 -0
  433. control_plane_api/worker/tests/unit/test_builtin_skill_autoload.py +396 -0
  434. control_plane_api/worker/tests/unit/test_context_graph_search.py +450 -0
  435. control_plane_api/worker/tests/unit/test_context_graph_templates.py +403 -0
  436. control_plane_api/worker/tests/unit/test_control_plane_client.py +401 -0
  437. control_plane_api/worker/tests/unit/test_control_plane_client_jobs.py +345 -0
  438. control_plane_api/worker/tests/unit/test_job_activities.py +353 -0
  439. control_plane_api/worker/tests/unit/test_skill_context_enhancement.py +321 -0
  440. control_plane_api/worker/tests/unit/test_system_prompt_enhancement.py +415 -0
  441. control_plane_api/worker/tests/unit/test_tool_enforcement.py +324 -0
  442. control_plane_api/worker/utils/__init__.py +1 -0
  443. control_plane_api/worker/utils/chunk_batcher.py +330 -0
  444. control_plane_api/worker/utils/environment.py +65 -0
  445. control_plane_api/worker/utils/error_publisher.py +260 -0
  446. control_plane_api/worker/utils/event_batcher.py +256 -0
  447. control_plane_api/worker/utils/logging_config.py +335 -0
  448. control_plane_api/worker/utils/logging_helper.py +326 -0
  449. control_plane_api/worker/utils/parameter_validator.py +120 -0
  450. control_plane_api/worker/utils/retry_utils.py +60 -0
  451. control_plane_api/worker/utils/streaming_utils.py +665 -0
  452. control_plane_api/worker/utils/tool_validation.py +332 -0
  453. control_plane_api/worker/utils/workspace_manager.py +163 -0
  454. control_plane_api/worker/websocket_client.py +393 -0
  455. control_plane_api/worker/worker.py +1297 -0
  456. control_plane_api/worker/workflows/__init__.py +0 -0
  457. control_plane_api/worker/workflows/agent_execution.py +909 -0
  458. control_plane_api/worker/workflows/scheduled_job_wrapper.py +332 -0
  459. control_plane_api/worker/workflows/team_execution.py +611 -0
  460. kubiya_control_plane_api-0.9.15.dist-info/METADATA +354 -0
  461. kubiya_control_plane_api-0.9.15.dist-info/RECORD +479 -0
  462. kubiya_control_plane_api-0.9.15.dist-info/WHEEL +5 -0
  463. kubiya_control_plane_api-0.9.15.dist-info/entry_points.txt +5 -0
  464. kubiya_control_plane_api-0.9.15.dist-info/licenses/LICENSE +676 -0
  465. kubiya_control_plane_api-0.9.15.dist-info/top_level.txt +3 -0
  466. scripts/__init__.py +1 -0
  467. scripts/migrations.py +39 -0
  468. scripts/seed_worker_queues.py +128 -0
  469. scripts/setup_agent_runtime.py +142 -0
  470. worker_internal/__init__.py +1 -0
  471. worker_internal/planner/__init__.py +1 -0
  472. worker_internal/planner/activities.py +1499 -0
  473. worker_internal/planner/agent_tools.py +197 -0
  474. worker_internal/planner/event_models.py +148 -0
  475. worker_internal/planner/event_publisher.py +67 -0
  476. worker_internal/planner/models.py +199 -0
  477. worker_internal/planner/retry_logic.py +134 -0
  478. worker_internal/planner/worker.py +300 -0
  479. worker_internal/planner/workflows.py +970 -0
@@ -0,0 +1,1499 @@
1
+ """Temporal activities for plan orchestration."""
2
+
3
+ import os
4
+ import json
5
+ import httpx
6
+ from typing import Dict, Any, Optional, List
7
+ from datetime import datetime, timezone
8
+ from temporalio import activity
9
+ import structlog
10
+
11
+ from worker_internal.planner.models import (
12
+ CreatePlanExecutionInput,
13
+ UpdatePlanStateInput,
14
+ TaskExecutionResult,
15
+ TaskValidationResult,
16
+ TaskStatus,
17
+ PlanTask,
18
+ TaskRetryContext,
19
+ )
20
+ from worker_internal.planner.event_publisher import publish_plan_event
21
+ from worker_internal.planner.event_models import (
22
+ PlanStartedEvent,
23
+ TaskStartedEvent,
24
+ TaskRunningEvent,
25
+ TaskWaitingForInputEvent,
26
+ TaskCompletedEvent,
27
+ TaskValidationStartedEvent,
28
+ TaskValidationCompleteEvent,
29
+ PlanStatusUpdateEvent,
30
+ TodoListInitializedEvent,
31
+ TodoItemUpdatedEvent,
32
+ TodoItem,
33
+ )
34
+
35
+ logger = structlog.get_logger()
36
+
37
+
38
+ def extract_user_from_jwt(jwt_token: Optional[str]) -> Optional[str]:
39
+ """
40
+ Extract user email from JWT token.
41
+
42
+ Args:
43
+ jwt_token: JWT token string
44
+
45
+ Returns:
46
+ User email if found, None otherwise
47
+ """
48
+ if not jwt_token:
49
+ return None
50
+
51
+ try:
52
+ import jwt as pyjwt
53
+ # Decode without verification to extract email
54
+ decoded = pyjwt.decode(jwt_token, options={"verify_signature": False})
55
+ return decoded.get("email")
56
+ except Exception as e:
57
+ logger.warning(f"failed_to_extract_user_from_jwt: {str(e)}")
58
+ return None
59
+
60
+
61
+ def build_langfuse_metadata(
62
+ plan_execution_id: str,
63
+ generation_name: str,
64
+ user_id: Optional[str] = None,
65
+ organization_id: Optional[str] = None,
66
+ agent_id: Optional[str] = None,
67
+ task_id: Optional[int] = None,
68
+ ) -> Dict[str, Any]:
69
+ """
70
+ Build Langfuse metadata for LLM calls in plan execution.
71
+
72
+ This follows the same pattern as the agent worker to ensure proper
73
+ observability in Langfuse. All LLM calls within a plan execution
74
+ will be grouped under the same trace.
75
+
76
+ Args:
77
+ plan_execution_id: Plan execution ID (used as trace_id and session_id)
78
+ generation_name: Name for this specific LLM call (e.g., "task-1-completion-analysis")
79
+ user_id: User email (proxy will format as email-org)
80
+ organization_id: Organization ID
81
+ agent_id: Agent ID making the call
82
+ task_id: Task ID if this call is for a specific task
83
+
84
+ Returns:
85
+ Context dict for proxy to inject Langfuse metadata
86
+ """
87
+ context = {}
88
+
89
+ # CRITICAL: Pass raw user_id and organization_id for proxy to format
90
+ # Proxy will create trace_user_id = "email-org" to avoid 401 errors
91
+ if user_id:
92
+ context["user_id"] = user_id
93
+ if organization_id:
94
+ context["organization_id"] = organization_id
95
+
96
+ # CRITICAL: Use plan_execution_id as session_id to group all LLM calls
97
+ # Proxy will set this as trace_id
98
+ context["session_id"] = plan_execution_id
99
+
100
+ # Set custom names (proxy will preserve these instead of defaulting to "agent-chat")
101
+ context["trace_name"] = "plan-execution"
102
+ context["generation_name"] = generation_name
103
+ context["name"] = generation_name
104
+
105
+ # Additional context metadata
106
+ if agent_id:
107
+ context["agent_id"] = agent_id
108
+ if task_id is not None:
109
+ context["task_id"] = task_id
110
+
111
+ return context
112
+
113
+
114
+ @activity.defn
115
+ async def publish_event_activity(
116
+ execution_id: str,
117
+ event_type: str,
118
+ event_data: Dict[str, Any],
119
+ ) -> bool:
120
+ """Activity to publish events from workflow context."""
121
+ try:
122
+ redis_client = get_redis_client()
123
+ if not redis_client:
124
+ activity.logger.warning("redis_not_available", execution_id=execution_id[:8])
125
+ return False
126
+
127
+ message = {
128
+ "event_type": event_type,
129
+ "data": event_data,
130
+ "timestamp": datetime.now(timezone.utc).isoformat(),
131
+ }
132
+
133
+ # Serialize to JSON string
134
+ message_json = json.dumps(message)
135
+
136
+ list_key = f"plan-execution:{execution_id}:events"
137
+ channel = f"plan-execution:{execution_id}:stream"
138
+
139
+ await redis_client.lpush(list_key, message_json)
140
+ await redis_client.ltrim(list_key, 0, 999)
141
+ await redis_client.expire(list_key, 3600)
142
+ await redis_client.publish(channel, message_json)
143
+
144
+ activity.logger.debug(
145
+ "plan_event_published_from_workflow",
146
+ execution_id=execution_id[:8],
147
+ event_type=event_type,
148
+ )
149
+ return True
150
+ except Exception as e:
151
+ activity.logger.error("publish_event_failed", error=str(e), execution_id=execution_id[:8])
152
+ return False
153
+
154
+
155
+ def get_redis_client():
156
+ """Get Redis client for event publishing."""
157
+ from control_plane_api.app.lib.redis_client import get_redis_client as _get_redis_client
158
+ return _get_redis_client()
159
+
160
+
161
+ def get_control_plane_url() -> str:
162
+ """Get Control Plane API URL from environment."""
163
+ return os.getenv("CONTROL_PLANE_URL", "http://localhost:8000")
164
+
165
+
166
+ def get_auth_headers(jwt_token: Optional[str] = None) -> Dict[str, str]:
167
+ """Get authentication headers for Control Plane API."""
168
+ headers = {"Content-Type": "application/json"}
169
+ if jwt_token:
170
+ headers["Authorization"] = f"Bearer {jwt_token}"
171
+ return headers
172
+
173
+
174
+ @activity.defn
175
+ async def create_plan_execution(input: CreatePlanExecutionInput) -> Dict[str, Any]:
176
+ """
177
+ Create plan execution record in database.
178
+
179
+ NOTE: The API already creates this record before starting the workflow,
180
+ so this activity just validates it exists and returns success.
181
+ """
182
+ activity.logger.info(
183
+ "plan_execution_already_created_by_api",
184
+ extra={
185
+ "execution_id": input.execution_id[:8],
186
+ "title": input.title,
187
+ "total_tasks": input.total_tasks,
188
+ }
189
+ )
190
+
191
+ # Publish plan_started event
192
+ await publish_plan_event(
193
+ execution_id=input.execution_id,
194
+ event_type="plan_started",
195
+ event_data=PlanStartedEvent(
196
+ execution_id=input.execution_id,
197
+ title=input.title,
198
+ total_tasks=input.total_tasks,
199
+ agent_id=input.agent_id,
200
+ )
201
+ )
202
+
203
+ # Record already created by API, just return success
204
+ return {"success": True, "plan_execution_id": input.execution_id}
205
+
206
+
207
+ @activity.defn
208
+ async def update_plan_state(input: UpdatePlanStateInput) -> Dict[str, Any]:
209
+ """
210
+ Update plan execution state in database via HTTP API.
211
+ """
212
+ activity.logger.info(
213
+ f"updating_plan_state: plan_id={input.plan_execution_id[:8]}, status={input.status}, completed={input.completed_tasks}"
214
+ )
215
+
216
+ try:
217
+ control_plane_url = get_control_plane_url()
218
+
219
+ # Build update payload
220
+ updates = {}
221
+ if input.status is not None:
222
+ updates["status"] = input.status.value if hasattr(input.status, 'value') else input.status
223
+ if input.completed_tasks is not None:
224
+ updates["completed_tasks"] = input.completed_tasks
225
+ if input.failed_tasks is not None:
226
+ updates["failed_tasks"] = input.failed_tasks
227
+ if input.waiting_tasks is not None:
228
+ updates["waiting_tasks"] = input.waiting_tasks
229
+ if input.dag_state is not None:
230
+ updates["dag_state"] = input.dag_state
231
+ if input.total_tokens is not None:
232
+ updates["total_tokens"] = input.total_tokens
233
+ if input.actual_cost_usd is not None:
234
+ updates["actual_cost_usd"] = input.actual_cost_usd
235
+
236
+ if not updates:
237
+ return {"success": True, "message": "No updates to apply"}
238
+
239
+ # Update via API
240
+ async with httpx.AsyncClient(timeout=30.0) as client:
241
+ response = await client.patch(
242
+ f"{control_plane_url}/api/v1/tasks/plan/{input.plan_execution_id}",
243
+ json=updates,
244
+ )
245
+
246
+ if response.status_code not in (200, 201):
247
+ activity.logger.error(
248
+ f"failed_to_update_plan_state: status={response.status_code}, response={response.text[:200]}"
249
+ )
250
+ return {"success": False, "error": response.text}
251
+
252
+ activity.logger.info(f"plan_state_updated: updates={list(updates.keys())}")
253
+ return {"success": True}
254
+
255
+ except Exception as e:
256
+ activity.logger.error(f"update_plan_state_failed: {str(e)}")
257
+ return {"success": False, "error": str(e)}
258
+
259
+
260
+ @activity.defn
261
+ async def execute_task_activity(
262
+ task: PlanTask,
263
+ plan_execution_id: str,
264
+ organization_id: str,
265
+ dependency_outputs: Optional[Dict[int, str]] = None,
266
+ jwt_token: Optional[str] = None,
267
+ model_id: Optional[str] = None,
268
+ retry_context: Optional[TaskRetryContext] = None,
269
+ default_worker_queue_id: Optional[str] = None, # Fallback from workflow input
270
+ ) -> TaskExecutionResult:
271
+ """
272
+ Execute a task by triggering an agent execution.
273
+
274
+ This spawns a child agent execution and waits for it to complete.
275
+ Returns the execution result for the orchestrator agent to analyze.
276
+
277
+ Uses agent_id and worker_queue_id from the task object.
278
+ Falls back to default_worker_queue_id if task doesn't have one.
279
+ Includes outputs from dependent tasks if provided.
280
+ If retry_context is provided, enriches the task with failure history.
281
+ """
282
+ from worker_internal.planner.retry_logic import enrich_task_with_retry_context
283
+
284
+ if retry_context:
285
+ task = enrich_task_with_retry_context(task, retry_context)
286
+
287
+ # Use agent_id and worker_queue_id from task, with fallback to workflow-level default
288
+ agent_id = task.agent_id
289
+ worker_queue_id = task.worker_queue_id or default_worker_queue_id
290
+
291
+ if not agent_id:
292
+ raise ValueError(f"Task {task.id} missing agent_id")
293
+ if not worker_queue_id:
294
+ raise ValueError(f"Task {task.id} missing worker_queue_id (and no default_worker_queue_id provided)")
295
+
296
+ activity.logger.info(
297
+ "executing_task",
298
+ extra={
299
+ "task_id": task.id,
300
+ "task_title": task.title,
301
+ "plan_execution_id": plan_execution_id[:8],
302
+ "has_jwt_token": bool(jwt_token),
303
+ "jwt_token_length": len(jwt_token) if jwt_token else 0,
304
+ "worker_queue_id": worker_queue_id,
305
+ "agent_id": agent_id,
306
+ "dependencies": task.dependencies,
307
+ "has_dependency_outputs": bool(dependency_outputs),
308
+ "is_retry": bool(retry_context),
309
+ "retry_attempt": retry_context.current_attempt if retry_context else 0,
310
+ }
311
+ )
312
+
313
+ started_at = datetime.now(timezone.utc)
314
+
315
+ try:
316
+ # Build dependency context if this task depends on others
317
+ dependency_context = ""
318
+ if task.dependencies and dependency_outputs:
319
+ dependency_context = "\n## Outputs from Previous Tasks\n"
320
+ for dep_task_id in task.dependencies:
321
+ if dep_task_id in dependency_outputs:
322
+ output = dependency_outputs[dep_task_id]
323
+ dependency_context += f"\n### Task {dep_task_id} Output:\n```\n{output}\n```\n"
324
+ else:
325
+ dependency_context += f"\n### Task {dep_task_id}: Output not available\n"
326
+ dependency_context += "\n"
327
+
328
+ # Build enriched prompt for the task
329
+ enriched_prompt = f"""# Task: {task.title}
330
+
331
+ ## Description
332
+ {task.description}
333
+
334
+ ## Detailed Instructions
335
+ {task.details}
336
+ {dependency_context}
337
+ ## Test Strategy
338
+ {task.test_strategy or 'Complete the task as described and verify the output.'}
339
+
340
+ ## Priority
341
+ {task.priority}
342
+
343
+ ## Available Skills
344
+ {', '.join(task.skills_to_use) if task.skills_to_use else 'Use any available skills as needed'}
345
+
346
+ Please complete this task following the instructions above. Be thorough and verify your work.
347
+ """
348
+
349
+ # Trigger agent execution via Control Plane API
350
+ control_plane_url = get_control_plane_url()
351
+
352
+ async with httpx.AsyncClient(timeout=600.0) as client: # 10 min timeout for task execution
353
+ response = await client.post(
354
+ f"{control_plane_url}/api/v1/agents/{agent_id}/execute",
355
+ json={
356
+ "prompt": enriched_prompt,
357
+ "worker_queue_id": worker_queue_id, # Use worker_queue_id from plan request
358
+ # Don't pass execution_id - let API generate it
359
+ "user_metadata": {
360
+ "plan_execution_id": plan_execution_id,
361
+ "task_id": task.id,
362
+ "task_title": task.title,
363
+ "skills_filter": task.skills_to_use,
364
+ "env_vars_filter": task.env_vars_to_use,
365
+ "secrets_filter": task.secrets_to_use,
366
+ "session_id": plan_execution_id, # For agent worker to use
367
+ },
368
+ "runtime_config": {
369
+ "session_id": plan_execution_id, # CRITICAL: Use plan_execution_id to group agent LLM calls under plan trace
370
+ }
371
+ },
372
+ headers=get_auth_headers(jwt_token),
373
+ )
374
+
375
+ if response.status_code not in (200, 201, 202):
376
+ activity.logger.error(
377
+ f"agent_execution_api_failed: status={response.status_code}, response={response.text[:500]}"
378
+ )
379
+ raise Exception(f"Failed to execute task: {response.text}")
380
+
381
+ result = response.json()
382
+ # Use execution_id from API response
383
+ execution_id = result.get("execution_id")
384
+ activity.logger.info(
385
+ f"agent_execution_started: execution_id={execution_id}, workflow_id={result.get('workflow_id')}"
386
+ )
387
+
388
+ # Publish task_started event (now we have task_execution_id)
389
+ await publish_plan_event(
390
+ execution_id=plan_execution_id,
391
+ event_type="task_started",
392
+ event_data=TaskStartedEvent(
393
+ execution_id=plan_execution_id,
394
+ task_id=task.id,
395
+ title=task.title,
396
+ description=task.description,
397
+ agent_id=agent_id,
398
+ task_execution_id=execution_id, # Agent execution ID
399
+ dependencies=task.dependencies or [],
400
+ )
401
+ )
402
+
403
+ # Publish TODO update: pending -> running
404
+ await publish_plan_event(
405
+ execution_id=plan_execution_id,
406
+ event_type="todo_item_updated",
407
+ event_data=TodoItemUpdatedEvent(
408
+ execution_id=plan_execution_id,
409
+ task_id=task.id,
410
+ title=task.title,
411
+ old_status="pending",
412
+ new_status="running",
413
+ message=f"Started executing: {task.title}",
414
+ )
415
+ )
416
+
417
+ # Stream execution events instead of polling
418
+ import asyncio
419
+ activity.logger.info(f"streaming_task_execution: execution_id={execution_id}, task_id={task.id}")
420
+
421
+ final_status = None
422
+ final_output = ""
423
+ final_tokens = 0
424
+ final_cost = 0.0
425
+ final_error = None
426
+ all_events = [] # Store all stream events
427
+
428
+ # Stream events from execution
429
+ async with client.stream(
430
+ "GET",
431
+ f"{control_plane_url}/api/v1/executions/{execution_id}/stream",
432
+ headers=get_auth_headers(jwt_token),
433
+ timeout=600.0, # 10 min timeout
434
+ ) as stream_response:
435
+ if stream_response.status_code not in (200, 201):
436
+ raise Exception(f"Failed to stream execution: {stream_response.status_code}")
437
+
438
+ current_event = None
439
+ async for line in stream_response.aiter_lines():
440
+ if not line:
441
+ continue
442
+
443
+ # Parse SSE format: "event: type\ndata: json"
444
+ if line.startswith("event: "):
445
+ current_event = line[7:] # Get event type
446
+ continue
447
+
448
+ if line.startswith("data: "):
449
+ try:
450
+ # Parse SSE data
451
+ data = json.loads(line[6:]) # Remove "data: " prefix
452
+ status = data.get("status")
453
+
454
+ # Store event
455
+ all_events.append({
456
+ "event": current_event,
457
+ "data": data,
458
+ "timestamp": data.get("timestamp", datetime.now(timezone.utc).isoformat())
459
+ })
460
+
461
+ # Log event
462
+ activity.logger.info(
463
+ f"stream_event: event={current_event}, "
464
+ f"status={status}, task_id={task.id}"
465
+ )
466
+
467
+ # Track status events for completion
468
+ if current_event == "status" and status:
469
+ if status in ("waiting_for_input", "completed", "success", "failed", "error"):
470
+ final_status = status
471
+ activity.logger.info(f"✅ Task complete! status={final_status}, task_id={task.id}")
472
+ break # Done!
473
+
474
+ # Track message content for summary output (only assistant messages)
475
+ if current_event in ("message", "message_chunk"):
476
+ # Only capture assistant messages, not user prompts
477
+ msg_data = data.get("data", {})
478
+ role = msg_data.get("role", data.get("role"))
479
+ content = msg_data.get("content", data.get("content", ""))
480
+
481
+ if role == "assistant" and content and content != "(no content)":
482
+ final_output += content
483
+
484
+ except json.JSONDecodeError:
485
+ continue # Skip malformed events
486
+
487
+ # Return result based on stream
488
+ completed_at = datetime.now(timezone.utc)
489
+
490
+ # Determine task status based on final_status
491
+ if final_status in ("completed", "success"):
492
+ # Task completed successfully
493
+ task_status = TaskStatus.SUCCESS
494
+ needs_continuation = False
495
+ user_question = None
496
+
497
+ elif final_status == "waiting_for_input":
498
+ # Agent is waiting for user response - use LLM to analyze if task is complete
499
+ activity.logger.info(
500
+ f"analyzing_waiting_for_input_status: task_id={task.id}, analyzing if task is complete or needs user input"
501
+ )
502
+
503
+ analysis = await analyze_task_completion_status(
504
+ task,
505
+ final_output,
506
+ all_events,
507
+ plan_execution_id=plan_execution_id,
508
+ organization_id=organization_id,
509
+ user_id=None,
510
+ jwt_token=jwt_token,
511
+ )
512
+
513
+ if analysis.get("task_complete", False):
514
+ # Task is actually complete despite waiting_for_input status
515
+ activity.logger.info(
516
+ f"task_complete_despite_waiting: task_id={task.id}, "
517
+ f"reasoning={analysis.get('reasoning')}"
518
+ )
519
+ task_status = TaskStatus.SUCCESS
520
+ needs_continuation = False
521
+ user_question = None
522
+ else:
523
+ # Task genuinely needs user input to continue
524
+ activity.logger.info(
525
+ f"task_needs_user_input: task_id={task.id}, "
526
+ f"user_question={analysis.get('user_question')}"
527
+ )
528
+ task_status = TaskStatus.WAITING_FOR_INPUT
529
+ needs_continuation = True
530
+ user_question = analysis.get("user_question")
531
+
532
+ else:
533
+ # Task failed or errored
534
+ task_status = TaskStatus.FAILED
535
+ needs_continuation = False
536
+ user_question = None
537
+
538
+ # Publish appropriate event based on status
539
+ if task_status == TaskStatus.WAITING_FOR_INPUT:
540
+ await publish_plan_event(
541
+ execution_id=plan_execution_id,
542
+ event_type="task_waiting_for_input",
543
+ event_data=TaskWaitingForInputEvent(
544
+ execution_id=plan_execution_id,
545
+ task_id=task.id,
546
+ question=user_question or "Waiting for user input",
547
+ task_execution_id=execution_id,
548
+ )
549
+ )
550
+ # Publish TODO update: running -> waiting_for_input
551
+ await publish_plan_event(
552
+ execution_id=plan_execution_id,
553
+ event_type="todo_item_updated",
554
+ event_data=TodoItemUpdatedEvent(
555
+ execution_id=plan_execution_id,
556
+ task_id=task.id,
557
+ title=task.title,
558
+ old_status="running",
559
+ new_status="waiting_for_input",
560
+ message=user_question or "Waiting for user input",
561
+ )
562
+ )
563
+ else:
564
+ # Task completed (success or failed)
565
+ await publish_plan_event(
566
+ execution_id=plan_execution_id,
567
+ event_type="task_completed",
568
+ event_data=TaskCompletedEvent(
569
+ execution_id=plan_execution_id,
570
+ task_id=task.id,
571
+ title=task.title,
572
+ status="success" if task_status == TaskStatus.SUCCESS else "failed",
573
+ output=final_output[:500] if final_output else "", # Truncate for event
574
+ error=final_error,
575
+ tokens=final_tokens,
576
+ cost=final_cost,
577
+ )
578
+ )
579
+ # Publish TODO update: running -> completed/failed
580
+ await publish_plan_event(
581
+ execution_id=plan_execution_id,
582
+ event_type="todo_item_updated",
583
+ event_data=TodoItemUpdatedEvent(
584
+ execution_id=plan_execution_id,
585
+ task_id=task.id,
586
+ title=task.title,
587
+ old_status="running",
588
+ new_status="completed" if task_status == TaskStatus.SUCCESS else "failed",
589
+ message=f"Task {'completed successfully' if task_status == TaskStatus.SUCCESS else 'failed'}",
590
+ )
591
+ )
592
+
593
+ return TaskExecutionResult(
594
+ task_id=task.id,
595
+ status=task_status,
596
+ execution_id=execution_id,
597
+ output=final_output,
598
+ events=all_events, # Include all stream events
599
+ tokens=final_tokens,
600
+ cost=final_cost,
601
+ started_at=started_at,
602
+ completed_at=completed_at,
603
+ error=final_error,
604
+ needs_continuation=needs_continuation,
605
+ user_question=user_question,
606
+ )
607
+
608
+ except Exception as e:
609
+ activity.logger.error(
610
+ "execute_task_failed",
611
+ extra={
612
+ "task_id": task.id,
613
+ "error": str(e),
614
+ }
615
+ )
616
+
617
+ return TaskExecutionResult(
618
+ task_id=task.id,
619
+ status=TaskStatus.FAILED,
620
+ execution_id=f"{plan_execution_id}-task-{task.id}",
621
+ output="",
622
+ events=[], # No events on error
623
+ tokens=0,
624
+ cost=0.0,
625
+ started_at=started_at,
626
+ completed_at=datetime.now(timezone.utc),
627
+ error=str(e),
628
+ )
629
+
630
+
631
+ @activity.defn
632
+ async def analyze_task_completion_status(
633
+ task: PlanTask,
634
+ agent_output: str,
635
+ events: List[Dict[str, Any]] = None,
636
+ plan_execution_id: Optional[str] = None,
637
+ organization_id: Optional[str] = None,
638
+ user_id: Optional[str] = None,
639
+ jwt_token: Optional[str] = None,
640
+ ) -> Dict[str, Any]:
641
+ """
642
+ Analyze if a task is complete or needs user input.
643
+
644
+ When an agent execution reaches 'waiting_for_input' status, we need to determine:
645
+ - Is the task actually complete? (agent finished the work)
646
+ - Or does the task need user input to continue? (agent is asking a question)
647
+
648
+ This uses LLM analysis to make an intelligent decision.
649
+ """
650
+ # Extract user_id from JWT if not provided
651
+ if not user_id and jwt_token:
652
+ user_id = extract_user_from_jwt(jwt_token)
653
+ # Extract full conversation from events (all user/assistant messages + tool executions)
654
+ conversation_summary = ""
655
+ if events:
656
+ # Build conversation from message and tool events
657
+ messages_by_id = {} # message_id -> accumulated content
658
+ conversation_order = [] # (message_id, role, timestamp)
659
+ tool_executions = [] # Track tool executions
660
+
661
+ for event in events:
662
+ event_type = event.get("event")
663
+
664
+ # Track tool executions
665
+ if event_type == "tool_completed":
666
+ tool_data = event.get("data", {}).get("data", {})
667
+ tool_name = tool_data.get("tool_name", "")
668
+ tool_output = tool_data.get("tool_output", "")
669
+ if tool_name and tool_output:
670
+ # Extract stdout if it's in dict format
671
+ if isinstance(tool_output, str) and "stdout" in tool_output:
672
+ try:
673
+ import ast
674
+ tool_dict = ast.literal_eval(tool_output)
675
+ if isinstance(tool_dict, dict):
676
+ tool_output = tool_dict.get("tool_response", {}).get("stdout", tool_output)
677
+ except:
678
+ pass
679
+ tool_executions.append(f"TOOL({tool_name}): {tool_output}")
680
+
681
+ # Track messages
682
+ if event_type in ("message", "message_chunk"):
683
+ data = event.get("data", {})
684
+ if isinstance(data, dict):
685
+ if event_type == "message_chunk" and "data" in data:
686
+ msg_data = data.get("data", {})
687
+ else:
688
+ msg_data = data
689
+
690
+ role = msg_data.get("role")
691
+ content = msg_data.get("content", "")
692
+ message_id = msg_data.get("message_id", "")
693
+ timestamp = event.get("timestamp", "")
694
+
695
+ # Skip tool messages and empty/no-content
696
+ if role in ("user", "assistant") and content and content != "(no content)":
697
+ if message_id not in messages_by_id:
698
+ messages_by_id[message_id] = ""
699
+ conversation_order.append((message_id, role, timestamp))
700
+
701
+ # Accumulate chunks for this message
702
+ messages_by_id[message_id] += content
703
+
704
+ # Build conversation in order, including tool executions
705
+ conversation_turns = []
706
+ for message_id, role, timestamp in conversation_order:
707
+ content = messages_by_id[message_id].strip()
708
+ if content:
709
+ truncated_content = content if len(content) <= 500 else content[:500] + "..."
710
+ conversation_turns.append(f"{role.upper()}: {truncated_content}")
711
+
712
+ # Add tool executions to conversation
713
+ if tool_executions:
714
+ conversation_turns.extend(tool_executions)
715
+
716
+ if conversation_turns:
717
+ conversation_summary = "\n\n".join(conversation_turns)
718
+ activity.logger.info(
719
+ f"extracted_full_conversation_from_events",
720
+ extra={
721
+ "task_id": task.id,
722
+ "total_events": len(events),
723
+ "conversation_turns": len(conversation_turns),
724
+ "conversation_preview": conversation_summary[:400],
725
+ }
726
+ )
727
+ else:
728
+ # No conversation in events, use accumulated output
729
+ conversation_summary = agent_output
730
+ activity.logger.info(
731
+ f"no_conversation_in_events_using_accumulated_output",
732
+ extra={
733
+ "task_id": task.id,
734
+ "output_length": len(agent_output),
735
+ }
736
+ )
737
+
738
+ # Use conversation summary for analysis
739
+ analysis_text = conversation_summary
740
+
741
+ activity.logger.info(
742
+ "analyzing_task_completion_status",
743
+ extra={
744
+ "task_id": task.id,
745
+ "task_title": task.title,
746
+ "analysis_text_length": len(analysis_text),
747
+ "analysis_text_preview": analysis_text[:300],
748
+ "using_conversation_summary": bool(conversation_summary),
749
+ }
750
+ )
751
+
752
+ try:
753
+ # Build analysis prompt
754
+ analysis_prompt = f"""Analyze this task execution to determine if the task is complete or if it needs user input to continue.
755
+
756
+ Task Requirement:
757
+ Title: {task.title}
758
+ Description: {task.description}
759
+ Details: {task.details}
760
+ Test Strategy: {task.test_strategy or 'Complete the task as described'}
761
+
762
+ Full Conversation for this Task:
763
+ {analysis_text[:10000] if analysis_text else 'No output available'}
764
+
765
+ Question: Looking at the FULL conversation above, did the agent complete the task requirement, or does it still need more user input?
766
+
767
+ Analyze the complete conversation flow:
768
+ 1. What did the task require? (from Description and Details)
769
+ 2. What has happened in the conversation so far?
770
+ 3. Has the agent fulfilled the task requirement?
771
+ 4. Is the LATEST agent message asking for NEW information, or just confirming completion?
772
+
773
+ Decision Rules:
774
+ - **CRITICAL: If the agent explicitly says "completed", "done", "finished" → task_complete=true**
775
+ - If the task said "ask user for X, then do Y" AND the conversation shows user provided X AND agent did Y → task_complete=true
776
+ - If the task said "ask user" AND agent asked AND user hasn't responded yet → needs_user_input=true
777
+ - If agent provided a result/answer that satisfies the task → task_complete=true
778
+ - If agent's latest message is asking for the FIRST TIME for input → needs_user_input=true
779
+ - If agent already got input and produced a result, even if asking again → task_complete=true (use the result before the repeat)
780
+ - **If agent's LAST message confirms completion (not asking a question) → task_complete=true**
781
+
782
+ Examples:
783
+ - Task: "Ask for number, calculate" | Conv: "ASSISTANT: What number? USER: 5 ASSISTANT: Result is 10" → task_complete=true (result: 10)
784
+ - Task: "Ask for input" | Conv: "ASSISTANT: What input?" → needs_user_input=true
785
+ - Task: "Generate random number" | Conv: "ASSISTANT: Generated 7" → task_complete=true
786
+
787
+ Respond with ONLY a JSON object (no markdown, no explanation):
788
+ {{
789
+ "task_complete": true | false,
790
+ "reasoning": "brief explanation of your determination",
791
+ "confidence": 0.95,
792
+ "needs_user_input": true | false,
793
+ "user_question": "what the agent is asking for (if needs_user_input=true, otherwise null)"
794
+ }}
795
+
796
+ Guidelines:
797
+ - task_complete=true: The task requirement was satisfied, agent produced a result
798
+ - task_complete=false: The task is not complete yet
799
+ - needs_user_input=true: The agent is explicitly asking for user input/clarification
800
+ - needs_user_input=false: The task is complete or failed, no user input needed
801
+ """
802
+
803
+ # Use LiteLLM directly with metadata in request body
804
+ litellm_api_base = os.getenv("LITELLM_API_BASE", "https://llm-proxy.kubiya.ai")
805
+ litellm_api_key = os.getenv("LITELLM_API_KEY")
806
+ model = "kubiya/claude-sonnet-4"
807
+
808
+ # Build Langfuse metadata
809
+ metadata_context = build_langfuse_metadata(
810
+ plan_execution_id=plan_execution_id or "unknown",
811
+ generation_name=f"task-{task.id}-completion-analysis",
812
+ user_id=user_id,
813
+ organization_id=organization_id,
814
+ agent_id=task.agent_id,
815
+ task_id=task.id,
816
+ )
817
+
818
+ # Format user for LiteLLM (format: email-org)
819
+ user_field = None
820
+ if user_id and organization_id:
821
+ user_field = f"{user_id}-{organization_id}"
822
+
823
+ activity.logger.info(
824
+ "calling_llm_for_task_completion_analysis",
825
+ extra={
826
+ "task_id": task.id,
827
+ "plan_execution_id": plan_execution_id[:8] if plan_execution_id else "unknown",
828
+ "generation_name": metadata_context.get("generation_name"),
829
+ "session_id": metadata_context.get("session_id"),
830
+ }
831
+ )
832
+
833
+ async with httpx.AsyncClient(timeout=60.0) as client:
834
+ request_body = {
835
+ "model": model,
836
+ "messages": [
837
+ {"role": "user", "content": analysis_prompt}
838
+ ],
839
+ "temperature": 0.0,
840
+ "max_tokens": 500,
841
+ }
842
+
843
+ # DON'T add user field - Anthropic rejects emails!
844
+ # LiteLLM will extract trace_user_id from metadata for Langfuse
845
+
846
+ # Add metadata (LiteLLM extracts Langfuse fields from here)
847
+ # CRITICAL: Don't include user_id in metadata - Anthropic rejects emails!
848
+ # Only use trace_user_id which LiteLLM extracts for Langfuse
849
+ request_body["metadata"] = {
850
+ "trace_name": metadata_context.get("trace_name"),
851
+ "generation_name": metadata_context.get("generation_name"),
852
+ "trace_id": metadata_context.get("session_id"),
853
+ "session_id": metadata_context.get("session_id"),
854
+ "trace_user_id": user_field, # For Langfuse only
855
+ "organization_id": organization_id,
856
+ "agent_id": metadata_context.get("agent_id"),
857
+ "task_id": metadata_context.get("task_id"),
858
+ }
859
+
860
+ response = await client.post(
861
+ f"{litellm_api_base}/v1/chat/completions",
862
+ json=request_body,
863
+ headers={
864
+ "Authorization": f"Bearer {litellm_api_key}",
865
+ "Content-Type": "application/json",
866
+ }
867
+ )
868
+
869
+ if response.status_code != 200:
870
+ raise Exception(f"LLM analysis failed: {response.status_code} - {response.text}")
871
+
872
+ result = response.json()
873
+ content = result['choices'][0]['message']['content']
874
+
875
+ # Parse JSON response
876
+ content = content.strip()
877
+ if content.startswith('```'):
878
+ content = content.split('```')[1]
879
+ if content.startswith('json'):
880
+ content = content[4:]
881
+ content = content.strip()
882
+
883
+ analysis_data = json.loads(content)
884
+
885
+ activity.logger.info(
886
+ "task_completion_analysis_complete",
887
+ extra={
888
+ "task_id": task.id,
889
+ "task_complete": analysis_data.get("task_complete"),
890
+ "needs_user_input": analysis_data.get("needs_user_input"),
891
+ "confidence": analysis_data.get("confidence"),
892
+ "reasoning": analysis_data.get("reasoning"),
893
+ "analyzed_text_preview": analysis_text[:200],
894
+ }
895
+ )
896
+
897
+ return analysis_data
898
+
899
+ except Exception as e:
900
+ activity.logger.error(
901
+ "task_completion_analysis_failed",
902
+ extra={
903
+ "task_id": task.id,
904
+ "error": str(e),
905
+ "litellm_api_base": os.getenv("LITELLM_API_BASE", "https://llm-proxy.kubiya.ai"),
906
+ "has_api_key": bool(os.getenv("LITELLM_API_KEY")),
907
+ }
908
+ )
909
+ # Re-raise the exception so we can see what's wrong
910
+ raise Exception(f"Failed to analyze task completion for task {task.id}: {str(e)}") from e
911
+
912
+
913
+ @activity.defn
914
+ async def validate_task_completion(
915
+ task: PlanTask,
916
+ execution_result: TaskExecutionResult,
917
+ plan_execution_id: Optional[str] = None,
918
+ organization_id: Optional[str] = None,
919
+ user_id: Optional[str] = None,
920
+ jwt_token: Optional[str] = None,
921
+ ) -> TaskValidationResult:
922
+ """
923
+ Validate task completion using LLM analysis.
924
+
925
+ Analyzes the task output to determine if it actually completed successfully.
926
+ """
927
+ # Extract user_id from JWT if not provided
928
+ if not user_id and jwt_token:
929
+ user_id = extract_user_from_jwt(jwt_token)
930
+ activity.logger.info(
931
+ "validating_task",
932
+ extra={
933
+ "task_id": task.id,
934
+ "task_title": task.title,
935
+ }
936
+ )
937
+
938
+ try:
939
+ # Build validation prompt
940
+ validation_prompt = f"""Analyze this task execution and determine if it completed successfully.
941
+
942
+ Task: {task.title}
943
+
944
+ Description: {task.description}
945
+
946
+ Test Strategy: {task.test_strategy or 'Task should be completed as described'}
947
+
948
+ Task Output:
949
+ {execution_result.output[:2000] if execution_result.output else 'No output available'}
950
+
951
+ Execution Status: {execution_result.status}
952
+ {f"Error: {execution_result.error}" if execution_result.error else ""}
953
+
954
+ Respond with ONLY a JSON object (no markdown, no explanation):
955
+ {{
956
+ "status": "success" | "failed" | "pending",
957
+ "reason": "brief explanation of why you determined this status",
958
+ "confidence": 0.95,
959
+ "suggestions": "optional suggestions for improvement or next steps"
960
+ }}
961
+
962
+ Guidelines:
963
+ - "success": Task completed and output matches test strategy
964
+ - "failed": Task failed, errored, or output doesn't match requirements
965
+ - "pending": Task seems incomplete or needs clarification
966
+ """
967
+
968
+ # Use LiteLLM directly with metadata in request body
969
+ litellm_api_base = os.getenv("LITELLM_API_BASE", "https://llm-proxy.kubiya.ai")
970
+ litellm_api_key = os.getenv("LITELLM_API_KEY")
971
+ model = "kubiya/claude-sonnet-4"
972
+
973
+ # Build Langfuse metadata
974
+ metadata_context = build_langfuse_metadata(
975
+ plan_execution_id=plan_execution_id or "unknown",
976
+ generation_name=f"task-{task.id}-validation",
977
+ user_id=user_id,
978
+ organization_id=organization_id,
979
+ agent_id=task.agent_id,
980
+ task_id=task.id,
981
+ )
982
+
983
+ # Format user for LiteLLM (format: email-org)
984
+ user_field = None
985
+ if user_id and organization_id:
986
+ user_field = f"{user_id}-{organization_id}"
987
+
988
+ activity.logger.info(
989
+ "calling_llm_for_task_validation",
990
+ extra={
991
+ "task_id": task.id,
992
+ "plan_execution_id": plan_execution_id[:8] if plan_execution_id else "unknown",
993
+ "generation_name": metadata_context.get("generation_name"),
994
+ "session_id": metadata_context.get("session_id"),
995
+ }
996
+ )
997
+
998
+ async with httpx.AsyncClient(timeout=60.0) as client:
999
+ request_body = {
1000
+ "model": model,
1001
+ "messages": [
1002
+ {"role": "user", "content": validation_prompt}
1003
+ ],
1004
+ "temperature": 0.0,
1005
+ "max_tokens": 500,
1006
+ }
1007
+
1008
+ # DON'T add user field - Anthropic rejects emails!
1009
+ # LiteLLM will extract trace_user_id from metadata for Langfuse
1010
+
1011
+ # Add metadata (LiteLLM extracts Langfuse fields from here)
1012
+ # CRITICAL: Don't include user_id in metadata - Anthropic rejects emails!
1013
+ # Only use trace_user_id which LiteLLM extracts for Langfuse
1014
+ request_body["metadata"] = {
1015
+ "trace_name": metadata_context.get("trace_name"),
1016
+ "generation_name": metadata_context.get("generation_name"),
1017
+ "trace_id": metadata_context.get("session_id"),
1018
+ "session_id": metadata_context.get("session_id"),
1019
+ "trace_user_id": user_field, # For Langfuse only
1020
+ "organization_id": organization_id,
1021
+ "agent_id": metadata_context.get("agent_id"),
1022
+ "task_id": metadata_context.get("task_id"),
1023
+ }
1024
+
1025
+ response = await client.post(
1026
+ f"{litellm_api_base}/v1/chat/completions",
1027
+ json=request_body,
1028
+ headers={
1029
+ "Authorization": f"Bearer {litellm_api_key}",
1030
+ "Content-Type": "application/json",
1031
+ }
1032
+ )
1033
+
1034
+ if response.status_code != 200:
1035
+ raise Exception(f"LLM validation failed: {response.status_code} - {response.text}")
1036
+
1037
+ result = response.json()
1038
+ content = result['choices'][0]['message']['content']
1039
+
1040
+ # Parse JSON response
1041
+ content = content.strip()
1042
+ if content.startswith('```'):
1043
+ content = content.split('```')[1]
1044
+ if content.startswith('json'):
1045
+ content = content[4:]
1046
+ content = content.strip()
1047
+
1048
+ validation_data = json.loads(content)
1049
+
1050
+ # Map status string to TaskStatus enum
1051
+ status_map = {
1052
+ "success": TaskStatus.SUCCESS,
1053
+ "failed": TaskStatus.FAILED,
1054
+ "pending": TaskStatus.PENDING,
1055
+ }
1056
+
1057
+ return TaskValidationResult(
1058
+ task_id=task.id,
1059
+ status=status_map.get(validation_data.get("status", "failed"), TaskStatus.FAILED),
1060
+ reason=validation_data.get("reason", "Validation completed"),
1061
+ confidence=validation_data.get("confidence", 0.5),
1062
+ suggestions=validation_data.get("suggestions"),
1063
+ )
1064
+
1065
+ except Exception as e:
1066
+ activity.logger.error(
1067
+ "task_validation_failed",
1068
+ extra={
1069
+ "task_id": task.id,
1070
+ "error": str(e),
1071
+ }
1072
+ )
1073
+
1074
+ # Default to success if validation fails
1075
+ return TaskValidationResult(
1076
+ task_id=task.id,
1077
+ status=TaskStatus.SUCCESS,
1078
+ reason=f"Validation failed, assuming success: {str(e)}",
1079
+ confidence=0.5,
1080
+ )
1081
+
1082
+
1083
+ @activity.defn
1084
+ async def continue_task_activity(
1085
+ task: PlanTask,
1086
+ execution_id: str,
1087
+ user_message: str,
1088
+ plan_execution_id: str,
1089
+ jwt_token: Optional[str] = None,
1090
+ model_id: Optional[str] = None,
1091
+ organization_id: Optional[str] = None,
1092
+ ) -> TaskExecutionResult:
1093
+ """
1094
+ Continue a task execution after user provides input.
1095
+
1096
+ This sends the user's message to the existing agent execution,
1097
+ then continues streaming events until the task completes or needs more input.
1098
+ """
1099
+ activity.logger.info(
1100
+ "continuing_task_execution",
1101
+ extra={
1102
+ "task_id": task.id,
1103
+ "execution_id": execution_id,
1104
+ "plan_execution_id": plan_execution_id[:8],
1105
+ "message_preview": user_message[:100],
1106
+ }
1107
+ )
1108
+
1109
+ started_at = datetime.now(timezone.utc)
1110
+
1111
+ try:
1112
+ control_plane_url = get_control_plane_url()
1113
+
1114
+ async with httpx.AsyncClient(timeout=600.0) as client:
1115
+ # Step 1: Send user message to continue conversation (only if message provided)
1116
+ if user_message:
1117
+ message_response = await client.post(
1118
+ f"{control_plane_url}/api/v1/executions/{execution_id}/message",
1119
+ json={"message": user_message},
1120
+ headers=get_auth_headers(jwt_token),
1121
+ )
1122
+
1123
+ if message_response.status_code not in (200, 201, 202):
1124
+ raise Exception(f"Failed to send message: {message_response.text}")
1125
+
1126
+ activity.logger.info(
1127
+ f"user_message_sent_to_execution: execution_id={execution_id}"
1128
+ )
1129
+ else:
1130
+ activity.logger.info(
1131
+ f"skipping_message_send_already_sent_by_api: execution_id={execution_id}"
1132
+ )
1133
+
1134
+ # Step 2: Continue streaming from the execution
1135
+ final_status = None
1136
+ final_output = ""
1137
+ final_tokens = 0
1138
+ final_cost = 0.0
1139
+ final_error = None
1140
+ all_events = []
1141
+ seen_events_after_message = False # Track if we've seen NEW events after sending message
1142
+
1143
+ async with client.stream(
1144
+ "GET",
1145
+ f"{control_plane_url}/api/v1/executions/{execution_id}/stream",
1146
+ headers=get_auth_headers(jwt_token),
1147
+ timeout=600.0,
1148
+ ) as stream_response:
1149
+ if stream_response.status_code not in (200, 201):
1150
+ raise Exception(f"Failed to stream execution: {stream_response.status_code}")
1151
+
1152
+ current_event = None
1153
+ async for line in stream_response.aiter_lines():
1154
+ if not line:
1155
+ continue
1156
+
1157
+ # Parse SSE format
1158
+ if line.startswith("event: "):
1159
+ current_event = line[7:]
1160
+ continue
1161
+
1162
+ if line.startswith("data: "):
1163
+ try:
1164
+ data = json.loads(line[6:])
1165
+ status = data.get("status")
1166
+
1167
+ # Check if this is a NEW event (after our message was sent)
1168
+ event_timestamp = data.get("timestamp", "")
1169
+ if event_timestamp and event_timestamp > started_at.isoformat():
1170
+ seen_events_after_message = True
1171
+
1172
+ all_events.append({
1173
+ "event": current_event,
1174
+ "data": data,
1175
+ "timestamp": data.get("timestamp", datetime.now(timezone.utc).isoformat())
1176
+ })
1177
+
1178
+ activity.logger.info(
1179
+ f"stream_event: event={current_event}, status={status}, task_id={task.id}, new={seen_events_after_message}"
1180
+ )
1181
+
1182
+ # Check for completion (but ignore old waiting_for_input status)
1183
+ if current_event == "status" and status:
1184
+ # During continuation, ignore waiting_for_input unless we've seen new events
1185
+ # This prevents breaking on old cached status
1186
+ if status in ("completed", "success", "failed", "error"):
1187
+ final_status = status
1188
+ activity.logger.info(
1189
+ f"task_continuation_complete: status={final_status}, task_id={task.id}"
1190
+ )
1191
+ break
1192
+ elif status == "waiting_for_input" and seen_events_after_message:
1193
+ # Agent needs MORE input after our message
1194
+ final_status = status
1195
+ activity.logger.info(
1196
+ f"task_needs_more_input: status={final_status}, task_id={task.id}"
1197
+ )
1198
+ break
1199
+
1200
+ # Track assistant messages
1201
+ if current_event in ("message", "message_chunk"):
1202
+ msg_data = data.get("data", {})
1203
+ role = msg_data.get("role", data.get("role"))
1204
+ content = msg_data.get("content", data.get("content", ""))
1205
+
1206
+ if role == "assistant" and content and content != "(no content)":
1207
+ final_output += content
1208
+
1209
+ except json.JSONDecodeError:
1210
+ continue
1211
+
1212
+ # Analyze completion status
1213
+ completed_at = datetime.now(timezone.utc)
1214
+
1215
+ if final_status in ("completed", "success"):
1216
+ task_status = TaskStatus.SUCCESS
1217
+ needs_continuation = False
1218
+ user_question = None
1219
+
1220
+ elif final_status == "waiting_for_input":
1221
+ # Use LLM analysis again
1222
+ activity.logger.info(
1223
+ f"re_analyzing_after_user_input: task_id={task.id}, analyzing continuation result"
1224
+ )
1225
+ analysis = await analyze_task_completion_status(
1226
+ task,
1227
+ final_output,
1228
+ all_events,
1229
+ plan_execution_id=plan_execution_id,
1230
+ organization_id=organization_id,
1231
+ user_id=None,
1232
+ jwt_token=jwt_token,
1233
+ )
1234
+
1235
+ if analysis.get("task_complete", False):
1236
+ task_status = TaskStatus.SUCCESS
1237
+ needs_continuation = False
1238
+ user_question = None
1239
+ activity.logger.info(
1240
+ f"task_complete_after_user_input: task_id={task.id}"
1241
+ )
1242
+ else:
1243
+ # Task still needs more input
1244
+ task_status = TaskStatus.WAITING_FOR_INPUT
1245
+ needs_continuation = True
1246
+ user_question = analysis.get("user_question")
1247
+ activity.logger.info(
1248
+ f"task_still_needs_input: task_id={task.id}, question={user_question}"
1249
+ )
1250
+
1251
+ else:
1252
+ task_status = TaskStatus.FAILED
1253
+ needs_continuation = False
1254
+ user_question = None
1255
+
1256
+ # Publish completion events (same as execute_task_activity)
1257
+ if task_status == TaskStatus.WAITING_FOR_INPUT:
1258
+ await publish_plan_event(
1259
+ execution_id=plan_execution_id,
1260
+ event_type="task_waiting_for_input",
1261
+ event_data=TaskWaitingForInputEvent(
1262
+ execution_id=plan_execution_id,
1263
+ task_id=task.id,
1264
+ question=user_question or "Waiting for user input",
1265
+ task_execution_id=execution_id,
1266
+ )
1267
+ )
1268
+ await publish_plan_event(
1269
+ execution_id=plan_execution_id,
1270
+ event_type="todo_item_updated",
1271
+ event_data=TodoItemUpdatedEvent(
1272
+ execution_id=plan_execution_id,
1273
+ task_id=task.id,
1274
+ title=task.title,
1275
+ old_status="running",
1276
+ new_status="waiting_for_input",
1277
+ message=user_question or "Waiting for user input",
1278
+ )
1279
+ )
1280
+ else:
1281
+ # Task completed (success or failed)
1282
+ await publish_plan_event(
1283
+ execution_id=plan_execution_id,
1284
+ event_type="task_completed",
1285
+ event_data=TaskCompletedEvent(
1286
+ execution_id=plan_execution_id,
1287
+ task_id=task.id,
1288
+ title=task.title,
1289
+ status="success" if task_status == TaskStatus.SUCCESS else "failed",
1290
+ output=final_output[:500] if final_output else "",
1291
+ error=final_error,
1292
+ tokens=final_tokens,
1293
+ cost=final_cost,
1294
+ )
1295
+ )
1296
+ await publish_plan_event(
1297
+ execution_id=plan_execution_id,
1298
+ event_type="todo_item_updated",
1299
+ event_data=TodoItemUpdatedEvent(
1300
+ execution_id=plan_execution_id,
1301
+ task_id=task.id,
1302
+ title=task.title,
1303
+ old_status="waiting_for_input", # Was waiting, now completing
1304
+ new_status="completed" if task_status == TaskStatus.SUCCESS else "failed",
1305
+ message=f"Task {'completed successfully' if task_status == TaskStatus.SUCCESS else 'failed'}",
1306
+ )
1307
+ )
1308
+
1309
+ return TaskExecutionResult(
1310
+ task_id=task.id,
1311
+ status=task_status,
1312
+ execution_id=execution_id,
1313
+ output=final_output,
1314
+ events=all_events,
1315
+ tokens=final_tokens,
1316
+ cost=final_cost,
1317
+ started_at=started_at,
1318
+ completed_at=completed_at,
1319
+ error=final_error,
1320
+ needs_continuation=needs_continuation,
1321
+ user_question=user_question,
1322
+ )
1323
+
1324
+ except Exception as e:
1325
+ activity.logger.error(
1326
+ "continue_task_failed",
1327
+ extra={
1328
+ "task_id": task.id,
1329
+ "execution_id": execution_id,
1330
+ "error": str(e),
1331
+ }
1332
+ )
1333
+
1334
+ return TaskExecutionResult(
1335
+ task_id=task.id,
1336
+ status=TaskStatus.FAILED,
1337
+ execution_id=execution_id,
1338
+ output="",
1339
+ events=[],
1340
+ tokens=0,
1341
+ cost=0.0,
1342
+ started_at=started_at,
1343
+ completed_at=datetime.now(timezone.utc),
1344
+ error=str(e),
1345
+ needs_continuation=False,
1346
+ user_question=None,
1347
+ )
1348
+
1349
+
1350
+ @activity.defn
1351
+ async def get_task_status_activity(
1352
+ task_id: int,
1353
+ task_results: Dict[int, TaskExecutionResult],
1354
+ ) -> Dict[str, Any]:
1355
+ """Get the current status of a task."""
1356
+ if task_id in task_results:
1357
+ result = task_results[task_id]
1358
+ return {
1359
+ "found": True,
1360
+ "status": result.status.value,
1361
+ "output": result.output,
1362
+ "tokens": result.tokens,
1363
+ "cost": result.cost,
1364
+ "error": result.error,
1365
+ }
1366
+ else:
1367
+ return {
1368
+ "found": False,
1369
+ "status": "pending",
1370
+ }
1371
+
1372
+
1373
+ @activity.defn
1374
+ async def call_llm_activity(
1375
+ messages: List[Dict[str, Any]],
1376
+ system_prompt: str,
1377
+ tools: List[Dict[str, Any]],
1378
+ model_id: str,
1379
+ plan_execution_id: Optional[str] = None,
1380
+ organization_id: Optional[str] = None,
1381
+ user_id: Optional[str] = None,
1382
+ task_id: Optional[int] = None,
1383
+ generation_name: Optional[str] = None,
1384
+ jwt_token: Optional[str] = None,
1385
+ ) -> Dict[str, Any]:
1386
+ """
1387
+ Activity to call Anthropic API directly (like Claude Code runtime does).
1388
+
1389
+ This activity now includes Langfuse metadata for proper observability.
1390
+ """
1391
+ # Extract user_id from JWT if not provided
1392
+ if not user_id and jwt_token:
1393
+ user_id = extract_user_from_jwt(jwt_token)
1394
+
1395
+ activity.logger.info(
1396
+ "calling_anthropic_api",
1397
+ model=model_id,
1398
+ message_count=len(messages),
1399
+ tool_count=len(tools),
1400
+ plan_execution_id=plan_execution_id[:8] if plan_execution_id else "unknown",
1401
+ )
1402
+
1403
+ try:
1404
+ # Use httpx directly to have full control over request with metadata
1405
+ litellm_api_base = os.getenv("LITELLM_API_BASE", "https://llm-proxy.kubiya.ai")
1406
+ litellm_api_key = os.getenv("LITELLM_API_KEY")
1407
+
1408
+ # Build Langfuse metadata
1409
+ metadata_context = build_langfuse_metadata(
1410
+ plan_execution_id=plan_execution_id or "unknown",
1411
+ generation_name=generation_name or "plan-orchestrator-llm-call",
1412
+ user_id=user_id,
1413
+ organization_id=organization_id,
1414
+ task_id=task_id,
1415
+ )
1416
+
1417
+ # Format user for LiteLLM (format: email-org)
1418
+ user_field = None
1419
+ if user_id and organization_id:
1420
+ user_field = f"{user_id}-{organization_id}"
1421
+
1422
+ activity.logger.info(
1423
+ "calling_anthropic_with_metadata",
1424
+ extra={
1425
+ "plan_execution_id": plan_execution_id[:8] if plan_execution_id else "unknown",
1426
+ "generation_name": metadata_context.get("generation_name"),
1427
+ "session_id": metadata_context.get("session_id"),
1428
+ }
1429
+ )
1430
+
1431
+ # Build request body in Anthropic format with metadata
1432
+ request_body = {
1433
+ "model": model_id,
1434
+ "max_tokens": 4096,
1435
+ "system": system_prompt,
1436
+ "messages": messages,
1437
+ "tools": tools,
1438
+ "temperature": 0.0,
1439
+ }
1440
+
1441
+ # DON'T add user field - Anthropic rejects emails!
1442
+ # LiteLLM will extract trace_user_id from metadata for Langfuse
1443
+
1444
+ # Add metadata (LiteLLM extracts Langfuse fields from here)
1445
+ # CRITICAL: Don't include user_id - Anthropic rejects emails!
1446
+ request_body["metadata"] = {
1447
+ "trace_name": metadata_context.get("trace_name"),
1448
+ "generation_name": metadata_context.get("generation_name"),
1449
+ "trace_id": metadata_context.get("session_id"),
1450
+ "session_id": metadata_context.get("session_id"),
1451
+ "trace_user_id": user_field, # For Langfuse only
1452
+ "organization_id": organization_id,
1453
+ "agent_id": metadata_context.get("agent_id"),
1454
+ "task_id": metadata_context.get("task_id"),
1455
+ }
1456
+
1457
+ async with httpx.AsyncClient(timeout=300.0) as http_client:
1458
+ response = await http_client.post(
1459
+ f"{litellm_api_base}/v1/messages",
1460
+ json=request_body,
1461
+ headers={
1462
+ "Authorization": f"Bearer {litellm_api_key}",
1463
+ "Content-Type": "application/json",
1464
+ "anthropic-version": "2023-06-01",
1465
+ },
1466
+ )
1467
+
1468
+ if response.status_code != 200:
1469
+ raise Exception(f"Anthropic API call failed: {response.status_code} - {response.text}")
1470
+
1471
+ result = response.json()
1472
+
1473
+ # Extract tool calls from response
1474
+ tool_calls = []
1475
+ content_text = ""
1476
+
1477
+ for block in result.get("content", []):
1478
+ if block.get("type") == "text":
1479
+ content_text = block.get("text", "")
1480
+ elif block.get("type") == "tool_use":
1481
+ tool_calls.append({
1482
+ "id": block.get("id"),
1483
+ "name": block.get("name"),
1484
+ "input": block.get("input", {}),
1485
+ })
1486
+
1487
+ activity.logger.info(
1488
+ "anthropic_call_complete",
1489
+ tool_calls_count=len(tool_calls),
1490
+ )
1491
+
1492
+ return {
1493
+ "content": content_text,
1494
+ "tool_calls": tool_calls,
1495
+ }
1496
+
1497
+ except Exception as e:
1498
+ activity.logger.error(f"anthropic_call_failed: {str(e)}")
1499
+ raise