kubiya-control-plane-api 0.9.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (479) hide show
  1. control_plane_api/LICENSE +676 -0
  2. control_plane_api/README.md +350 -0
  3. control_plane_api/__init__.py +4 -0
  4. control_plane_api/__version__.py +8 -0
  5. control_plane_api/alembic/README +1 -0
  6. control_plane_api/alembic/env.py +121 -0
  7. control_plane_api/alembic/script.py.mako +28 -0
  8. control_plane_api/alembic/versions/2613c65c3dbe_initial_database_setup.py +32 -0
  9. control_plane_api/alembic/versions/2df520d4927d_merge_heads.py +28 -0
  10. control_plane_api/alembic/versions/43abf98d6a01_add_paused_status_to_executions.py +73 -0
  11. control_plane_api/alembic/versions/6289854264cb_merge_multiple_heads.py +28 -0
  12. control_plane_api/alembic/versions/6a4d4dc3d8dc_generate_execution_transitions.py +50 -0
  13. control_plane_api/alembic/versions/87d11cf0a783_add_disconnected_status_to_worker_.py +44 -0
  14. control_plane_api/alembic/versions/add_ephemeral_queue_support.py +85 -0
  15. control_plane_api/alembic/versions/add_model_type_to_llm_models.py +31 -0
  16. control_plane_api/alembic/versions/add_plan_executions_table.py +114 -0
  17. control_plane_api/alembic/versions/add_trace_span_tables.py +154 -0
  18. control_plane_api/alembic/versions/add_user_info_to_traces.py +36 -0
  19. control_plane_api/alembic/versions/adjusting_foreign_keys.py +32 -0
  20. control_plane_api/alembic/versions/b4983d976db2_initial_tables.py +1128 -0
  21. control_plane_api/alembic/versions/d181a3b40e71_rename_custom_metadata_to_metadata_in_.py +50 -0
  22. control_plane_api/alembic/versions/df9117888e82_add_missing_columns.py +82 -0
  23. control_plane_api/alembic/versions/f25de6ad895a_missing_migrations.py +34 -0
  24. control_plane_api/alembic/versions/f71305fb69b9_fix_ephemeral_queue_deletion_foreign_key.py +54 -0
  25. control_plane_api/alembic/versions/mark_local_exec_queues_as_ephemeral.py +68 -0
  26. control_plane_api/alembic.ini +148 -0
  27. control_plane_api/api/index.py +12 -0
  28. control_plane_api/app/__init__.py +11 -0
  29. control_plane_api/app/activities/__init__.py +20 -0
  30. control_plane_api/app/activities/agent_activities.py +384 -0
  31. control_plane_api/app/activities/plan_generation_activities.py +499 -0
  32. control_plane_api/app/activities/team_activities.py +424 -0
  33. control_plane_api/app/activities/temporal_cloud_activities.py +588 -0
  34. control_plane_api/app/config/__init__.py +35 -0
  35. control_plane_api/app/config/api_config.py +469 -0
  36. control_plane_api/app/config/config_loader.py +224 -0
  37. control_plane_api/app/config/model_pricing.py +323 -0
  38. control_plane_api/app/config/storage_config.py +159 -0
  39. control_plane_api/app/config.py +115 -0
  40. control_plane_api/app/controllers/__init__.py +0 -0
  41. control_plane_api/app/controllers/execution_environment_controller.py +1315 -0
  42. control_plane_api/app/database.py +135 -0
  43. control_plane_api/app/exceptions.py +408 -0
  44. control_plane_api/app/lib/__init__.py +11 -0
  45. control_plane_api/app/lib/environment.py +65 -0
  46. control_plane_api/app/lib/event_bus/__init__.py +17 -0
  47. control_plane_api/app/lib/event_bus/base.py +136 -0
  48. control_plane_api/app/lib/event_bus/manager.py +335 -0
  49. control_plane_api/app/lib/event_bus/providers/__init__.py +6 -0
  50. control_plane_api/app/lib/event_bus/providers/http_provider.py +166 -0
  51. control_plane_api/app/lib/event_bus/providers/nats_provider.py +324 -0
  52. control_plane_api/app/lib/event_bus/providers/redis_provider.py +233 -0
  53. control_plane_api/app/lib/event_bus/providers/websocket_provider.py +497 -0
  54. control_plane_api/app/lib/job_executor.py +330 -0
  55. control_plane_api/app/lib/kubiya_client.py +293 -0
  56. control_plane_api/app/lib/litellm_pricing.py +166 -0
  57. control_plane_api/app/lib/mcp_validation.py +163 -0
  58. control_plane_api/app/lib/nats/__init__.py +13 -0
  59. control_plane_api/app/lib/nats/credentials_manager.py +288 -0
  60. control_plane_api/app/lib/nats/listener.py +374 -0
  61. control_plane_api/app/lib/planning_prompt_builder.py +153 -0
  62. control_plane_api/app/lib/planning_tools/__init__.py +41 -0
  63. control_plane_api/app/lib/planning_tools/agents.py +409 -0
  64. control_plane_api/app/lib/planning_tools/agno_toolkit.py +836 -0
  65. control_plane_api/app/lib/planning_tools/base.py +119 -0
  66. control_plane_api/app/lib/planning_tools/cognitive_memory_tools.py +403 -0
  67. control_plane_api/app/lib/planning_tools/context_graph_tools.py +545 -0
  68. control_plane_api/app/lib/planning_tools/environments.py +218 -0
  69. control_plane_api/app/lib/planning_tools/knowledge.py +204 -0
  70. control_plane_api/app/lib/planning_tools/models.py +93 -0
  71. control_plane_api/app/lib/planning_tools/planning_service.py +646 -0
  72. control_plane_api/app/lib/planning_tools/resources.py +242 -0
  73. control_plane_api/app/lib/planning_tools/teams.py +334 -0
  74. control_plane_api/app/lib/policy_enforcer_client.py +1016 -0
  75. control_plane_api/app/lib/redis_client.py +803 -0
  76. control_plane_api/app/lib/sqlalchemy_utils.py +486 -0
  77. control_plane_api/app/lib/state_transition_tools/__init__.py +7 -0
  78. control_plane_api/app/lib/state_transition_tools/execution_context.py +388 -0
  79. control_plane_api/app/lib/storage/__init__.py +20 -0
  80. control_plane_api/app/lib/storage/base_provider.py +274 -0
  81. control_plane_api/app/lib/storage/provider_factory.py +157 -0
  82. control_plane_api/app/lib/storage/vercel_blob_provider.py +468 -0
  83. control_plane_api/app/lib/supabase.py +71 -0
  84. control_plane_api/app/lib/supabase_utils.py +138 -0
  85. control_plane_api/app/lib/task_planning/__init__.py +138 -0
  86. control_plane_api/app/lib/task_planning/agent_factory.py +308 -0
  87. control_plane_api/app/lib/task_planning/agents.py +389 -0
  88. control_plane_api/app/lib/task_planning/cache.py +218 -0
  89. control_plane_api/app/lib/task_planning/entity_resolver.py +273 -0
  90. control_plane_api/app/lib/task_planning/helpers.py +293 -0
  91. control_plane_api/app/lib/task_planning/hooks.py +474 -0
  92. control_plane_api/app/lib/task_planning/models.py +503 -0
  93. control_plane_api/app/lib/task_planning/plan_validator.py +166 -0
  94. control_plane_api/app/lib/task_planning/planning_workflow.py +2911 -0
  95. control_plane_api/app/lib/task_planning/runner.py +656 -0
  96. control_plane_api/app/lib/task_planning/streaming_hook.py +213 -0
  97. control_plane_api/app/lib/task_planning/workflow.py +424 -0
  98. control_plane_api/app/lib/templating/__init__.py +88 -0
  99. control_plane_api/app/lib/templating/compiler.py +278 -0
  100. control_plane_api/app/lib/templating/engine.py +178 -0
  101. control_plane_api/app/lib/templating/parsers/__init__.py +29 -0
  102. control_plane_api/app/lib/templating/parsers/base.py +96 -0
  103. control_plane_api/app/lib/templating/parsers/env.py +85 -0
  104. control_plane_api/app/lib/templating/parsers/graph.py +112 -0
  105. control_plane_api/app/lib/templating/parsers/secret.py +87 -0
  106. control_plane_api/app/lib/templating/parsers/simple.py +81 -0
  107. control_plane_api/app/lib/templating/resolver.py +366 -0
  108. control_plane_api/app/lib/templating/types.py +214 -0
  109. control_plane_api/app/lib/templating/validator.py +201 -0
  110. control_plane_api/app/lib/temporal_client.py +232 -0
  111. control_plane_api/app/lib/temporal_credentials_cache.py +178 -0
  112. control_plane_api/app/lib/temporal_credentials_service.py +203 -0
  113. control_plane_api/app/lib/validation/__init__.py +24 -0
  114. control_plane_api/app/lib/validation/runtime_validation.py +388 -0
  115. control_plane_api/app/main.py +531 -0
  116. control_plane_api/app/middleware/__init__.py +10 -0
  117. control_plane_api/app/middleware/auth.py +645 -0
  118. control_plane_api/app/middleware/exception_handler.py +267 -0
  119. control_plane_api/app/middleware/prometheus_middleware.py +173 -0
  120. control_plane_api/app/middleware/rate_limiting.py +384 -0
  121. control_plane_api/app/middleware/request_id.py +202 -0
  122. control_plane_api/app/models/__init__.py +40 -0
  123. control_plane_api/app/models/agent.py +90 -0
  124. control_plane_api/app/models/analytics.py +206 -0
  125. control_plane_api/app/models/associations.py +107 -0
  126. control_plane_api/app/models/auth_user.py +73 -0
  127. control_plane_api/app/models/context.py +161 -0
  128. control_plane_api/app/models/custom_integration.py +99 -0
  129. control_plane_api/app/models/environment.py +64 -0
  130. control_plane_api/app/models/execution.py +125 -0
  131. control_plane_api/app/models/execution_transition.py +50 -0
  132. control_plane_api/app/models/job.py +159 -0
  133. control_plane_api/app/models/llm_model.py +78 -0
  134. control_plane_api/app/models/orchestration.py +66 -0
  135. control_plane_api/app/models/plan_execution.py +102 -0
  136. control_plane_api/app/models/presence.py +49 -0
  137. control_plane_api/app/models/project.py +61 -0
  138. control_plane_api/app/models/project_management.py +85 -0
  139. control_plane_api/app/models/session.py +29 -0
  140. control_plane_api/app/models/skill.py +155 -0
  141. control_plane_api/app/models/system_tables.py +43 -0
  142. control_plane_api/app/models/task_planning.py +372 -0
  143. control_plane_api/app/models/team.py +86 -0
  144. control_plane_api/app/models/trace.py +257 -0
  145. control_plane_api/app/models/user_profile.py +54 -0
  146. control_plane_api/app/models/worker.py +221 -0
  147. control_plane_api/app/models/workflow.py +161 -0
  148. control_plane_api/app/models/workspace.py +50 -0
  149. control_plane_api/app/observability/__init__.py +177 -0
  150. control_plane_api/app/observability/context_logging.py +475 -0
  151. control_plane_api/app/observability/decorators.py +337 -0
  152. control_plane_api/app/observability/local_span_processor.py +702 -0
  153. control_plane_api/app/observability/metrics.py +303 -0
  154. control_plane_api/app/observability/middleware.py +246 -0
  155. control_plane_api/app/observability/optional.py +115 -0
  156. control_plane_api/app/observability/tracing.py +382 -0
  157. control_plane_api/app/policies/README.md +149 -0
  158. control_plane_api/app/policies/approved_users.rego +62 -0
  159. control_plane_api/app/policies/business_hours.rego +51 -0
  160. control_plane_api/app/policies/rate_limiting.rego +100 -0
  161. control_plane_api/app/policies/tool_enforcement/README.md +336 -0
  162. control_plane_api/app/policies/tool_enforcement/bash_command_validation.rego +71 -0
  163. control_plane_api/app/policies/tool_enforcement/business_hours_enforcement.rego +82 -0
  164. control_plane_api/app/policies/tool_enforcement/mcp_tool_allowlist.rego +58 -0
  165. control_plane_api/app/policies/tool_enforcement/production_safeguards.rego +80 -0
  166. control_plane_api/app/policies/tool_enforcement/role_based_tool_access.rego +44 -0
  167. control_plane_api/app/policies/tool_restrictions.rego +86 -0
  168. control_plane_api/app/routers/__init__.py +4 -0
  169. control_plane_api/app/routers/agents.py +382 -0
  170. control_plane_api/app/routers/agents_v2.py +1598 -0
  171. control_plane_api/app/routers/analytics.py +1310 -0
  172. control_plane_api/app/routers/auth.py +59 -0
  173. control_plane_api/app/routers/client_config.py +57 -0
  174. control_plane_api/app/routers/context_graph.py +561 -0
  175. control_plane_api/app/routers/context_manager.py +577 -0
  176. control_plane_api/app/routers/custom_integrations.py +490 -0
  177. control_plane_api/app/routers/enforcer.py +132 -0
  178. control_plane_api/app/routers/environment_context.py +252 -0
  179. control_plane_api/app/routers/environments.py +761 -0
  180. control_plane_api/app/routers/execution_environment.py +847 -0
  181. control_plane_api/app/routers/executions/__init__.py +28 -0
  182. control_plane_api/app/routers/executions/router.py +286 -0
  183. control_plane_api/app/routers/executions/services/__init__.py +22 -0
  184. control_plane_api/app/routers/executions/services/demo_worker_health.py +156 -0
  185. control_plane_api/app/routers/executions/services/status_service.py +420 -0
  186. control_plane_api/app/routers/executions/services/test_worker_health.py +480 -0
  187. control_plane_api/app/routers/executions/services/worker_health.py +514 -0
  188. control_plane_api/app/routers/executions/streaming/__init__.py +22 -0
  189. control_plane_api/app/routers/executions/streaming/deduplication.py +352 -0
  190. control_plane_api/app/routers/executions/streaming/event_buffer.py +353 -0
  191. control_plane_api/app/routers/executions/streaming/event_formatter.py +964 -0
  192. control_plane_api/app/routers/executions/streaming/history_loader.py +588 -0
  193. control_plane_api/app/routers/executions/streaming/live_source.py +693 -0
  194. control_plane_api/app/routers/executions/streaming/streamer.py +849 -0
  195. control_plane_api/app/routers/executions.py +4888 -0
  196. control_plane_api/app/routers/health.py +165 -0
  197. control_plane_api/app/routers/health_v2.py +394 -0
  198. control_plane_api/app/routers/integration_templates.py +496 -0
  199. control_plane_api/app/routers/integrations.py +287 -0
  200. control_plane_api/app/routers/jobs.py +1809 -0
  201. control_plane_api/app/routers/metrics.py +517 -0
  202. control_plane_api/app/routers/models.py +82 -0
  203. control_plane_api/app/routers/models_v2.py +628 -0
  204. control_plane_api/app/routers/plan_executions.py +1481 -0
  205. control_plane_api/app/routers/plan_generation_async.py +304 -0
  206. control_plane_api/app/routers/policies.py +669 -0
  207. control_plane_api/app/routers/presence.py +234 -0
  208. control_plane_api/app/routers/projects.py +987 -0
  209. control_plane_api/app/routers/runners.py +379 -0
  210. control_plane_api/app/routers/runtimes.py +172 -0
  211. control_plane_api/app/routers/secrets.py +171 -0
  212. control_plane_api/app/routers/skills.py +1010 -0
  213. control_plane_api/app/routers/skills_definitions.py +140 -0
  214. control_plane_api/app/routers/storage.py +456 -0
  215. control_plane_api/app/routers/task_planning.py +611 -0
  216. control_plane_api/app/routers/task_queues.py +650 -0
  217. control_plane_api/app/routers/team_context.py +274 -0
  218. control_plane_api/app/routers/teams.py +1747 -0
  219. control_plane_api/app/routers/templates.py +248 -0
  220. control_plane_api/app/routers/traces.py +571 -0
  221. control_plane_api/app/routers/websocket_client.py +479 -0
  222. control_plane_api/app/routers/websocket_executions_status.py +437 -0
  223. control_plane_api/app/routers/websocket_gateway.py +323 -0
  224. control_plane_api/app/routers/websocket_traces.py +576 -0
  225. control_plane_api/app/routers/worker_queues.py +2555 -0
  226. control_plane_api/app/routers/worker_websocket.py +419 -0
  227. control_plane_api/app/routers/workers.py +1004 -0
  228. control_plane_api/app/routers/workflows.py +204 -0
  229. control_plane_api/app/runtimes/__init__.py +6 -0
  230. control_plane_api/app/runtimes/validation.py +344 -0
  231. control_plane_api/app/schemas/__init__.py +1 -0
  232. control_plane_api/app/schemas/job_schemas.py +302 -0
  233. control_plane_api/app/schemas/mcp_schemas.py +311 -0
  234. control_plane_api/app/schemas/template_schemas.py +133 -0
  235. control_plane_api/app/schemas/trace_schemas.py +168 -0
  236. control_plane_api/app/schemas/worker_queue_observability_schemas.py +165 -0
  237. control_plane_api/app/services/__init__.py +1 -0
  238. control_plane_api/app/services/agno_planning_strategy.py +233 -0
  239. control_plane_api/app/services/agno_service.py +838 -0
  240. control_plane_api/app/services/claude_code_planning_service.py +203 -0
  241. control_plane_api/app/services/context_graph_client.py +224 -0
  242. control_plane_api/app/services/custom_integration_service.py +415 -0
  243. control_plane_api/app/services/integration_resolution_service.py +345 -0
  244. control_plane_api/app/services/litellm_service.py +394 -0
  245. control_plane_api/app/services/plan_generator.py +79 -0
  246. control_plane_api/app/services/planning_strategy.py +66 -0
  247. control_plane_api/app/services/planning_strategy_factory.py +118 -0
  248. control_plane_api/app/services/policy_service.py +615 -0
  249. control_plane_api/app/services/state_transition_service.py +755 -0
  250. control_plane_api/app/services/storage_service.py +593 -0
  251. control_plane_api/app/services/temporal_cloud_provisioning.py +150 -0
  252. control_plane_api/app/services/toolsets/context_graph_skill.py +432 -0
  253. control_plane_api/app/services/trace_retention.py +354 -0
  254. control_plane_api/app/services/worker_queue_metrics_service.py +190 -0
  255. control_plane_api/app/services/workflow_cancellation_manager.py +135 -0
  256. control_plane_api/app/services/workflow_operations_service.py +611 -0
  257. control_plane_api/app/skills/__init__.py +100 -0
  258. control_plane_api/app/skills/base.py +239 -0
  259. control_plane_api/app/skills/builtin/__init__.py +37 -0
  260. control_plane_api/app/skills/builtin/agent_communication/__init__.py +8 -0
  261. control_plane_api/app/skills/builtin/agent_communication/skill.py +246 -0
  262. control_plane_api/app/skills/builtin/code_ingestion/__init__.py +4 -0
  263. control_plane_api/app/skills/builtin/code_ingestion/skill.py +267 -0
  264. control_plane_api/app/skills/builtin/cognitive_memory/__init__.py +4 -0
  265. control_plane_api/app/skills/builtin/cognitive_memory/skill.py +174 -0
  266. control_plane_api/app/skills/builtin/contextual_awareness/__init__.py +4 -0
  267. control_plane_api/app/skills/builtin/contextual_awareness/skill.py +387 -0
  268. control_plane_api/app/skills/builtin/data_visualization/__init__.py +4 -0
  269. control_plane_api/app/skills/builtin/data_visualization/skill.py +154 -0
  270. control_plane_api/app/skills/builtin/docker/__init__.py +4 -0
  271. control_plane_api/app/skills/builtin/docker/skill.py +104 -0
  272. control_plane_api/app/skills/builtin/file_generation/__init__.py +4 -0
  273. control_plane_api/app/skills/builtin/file_generation/skill.py +94 -0
  274. control_plane_api/app/skills/builtin/file_system/__init__.py +4 -0
  275. control_plane_api/app/skills/builtin/file_system/skill.py +110 -0
  276. control_plane_api/app/skills/builtin/knowledge_api/__init__.py +5 -0
  277. control_plane_api/app/skills/builtin/knowledge_api/skill.py +124 -0
  278. control_plane_api/app/skills/builtin/python/__init__.py +4 -0
  279. control_plane_api/app/skills/builtin/python/skill.py +92 -0
  280. control_plane_api/app/skills/builtin/remote_filesystem/__init__.py +5 -0
  281. control_plane_api/app/skills/builtin/remote_filesystem/skill.py +170 -0
  282. control_plane_api/app/skills/builtin/shell/__init__.py +4 -0
  283. control_plane_api/app/skills/builtin/shell/skill.py +161 -0
  284. control_plane_api/app/skills/builtin/slack/__init__.py +3 -0
  285. control_plane_api/app/skills/builtin/slack/skill.py +302 -0
  286. control_plane_api/app/skills/builtin/workflow_executor/__init__.py +4 -0
  287. control_plane_api/app/skills/builtin/workflow_executor/skill.py +469 -0
  288. control_plane_api/app/skills/business_intelligence.py +189 -0
  289. control_plane_api/app/skills/config.py +63 -0
  290. control_plane_api/app/skills/loaders/__init__.py +14 -0
  291. control_plane_api/app/skills/loaders/base.py +73 -0
  292. control_plane_api/app/skills/loaders/filesystem_loader.py +199 -0
  293. control_plane_api/app/skills/registry.py +125 -0
  294. control_plane_api/app/utils/helpers.py +12 -0
  295. control_plane_api/app/utils/workflow_executor.py +354 -0
  296. control_plane_api/app/workflows/__init__.py +11 -0
  297. control_plane_api/app/workflows/agent_execution.py +520 -0
  298. control_plane_api/app/workflows/agent_execution_with_skills.py +223 -0
  299. control_plane_api/app/workflows/namespace_provisioning.py +326 -0
  300. control_plane_api/app/workflows/plan_generation.py +254 -0
  301. control_plane_api/app/workflows/team_execution.py +442 -0
  302. control_plane_api/scripts/seed_models.py +240 -0
  303. control_plane_api/scripts/validate_existing_tool_names.py +492 -0
  304. control_plane_api/shared/__init__.py +8 -0
  305. control_plane_api/shared/version.py +17 -0
  306. control_plane_api/test_deduplication.py +274 -0
  307. control_plane_api/test_executor_deduplication_e2e.py +309 -0
  308. control_plane_api/test_job_execution_e2e.py +283 -0
  309. control_plane_api/test_real_integration.py +193 -0
  310. control_plane_api/version.py +38 -0
  311. control_plane_api/worker/__init__.py +0 -0
  312. control_plane_api/worker/activities/__init__.py +0 -0
  313. control_plane_api/worker/activities/agent_activities.py +1585 -0
  314. control_plane_api/worker/activities/approval_activities.py +234 -0
  315. control_plane_api/worker/activities/job_activities.py +199 -0
  316. control_plane_api/worker/activities/runtime_activities.py +1167 -0
  317. control_plane_api/worker/activities/skill_activities.py +282 -0
  318. control_plane_api/worker/activities/team_activities.py +479 -0
  319. control_plane_api/worker/agent_runtime_server.py +370 -0
  320. control_plane_api/worker/binary_manager.py +333 -0
  321. control_plane_api/worker/config/__init__.py +31 -0
  322. control_plane_api/worker/config/worker_config.py +273 -0
  323. control_plane_api/worker/control_plane_client.py +1491 -0
  324. control_plane_api/worker/examples/analytics_integration_example.py +362 -0
  325. control_plane_api/worker/health_monitor.py +159 -0
  326. control_plane_api/worker/metrics.py +237 -0
  327. control_plane_api/worker/models/__init__.py +1 -0
  328. control_plane_api/worker/models/error_events.py +105 -0
  329. control_plane_api/worker/models/inputs.py +89 -0
  330. control_plane_api/worker/runtimes/__init__.py +35 -0
  331. control_plane_api/worker/runtimes/agent_runtime/runtime.py +485 -0
  332. control_plane_api/worker/runtimes/agno/__init__.py +34 -0
  333. control_plane_api/worker/runtimes/agno/config.py +248 -0
  334. control_plane_api/worker/runtimes/agno/hooks.py +385 -0
  335. control_plane_api/worker/runtimes/agno/mcp_builder.py +195 -0
  336. control_plane_api/worker/runtimes/agno/runtime.py +1063 -0
  337. control_plane_api/worker/runtimes/agno/utils.py +163 -0
  338. control_plane_api/worker/runtimes/base.py +979 -0
  339. control_plane_api/worker/runtimes/claude_code/__init__.py +38 -0
  340. control_plane_api/worker/runtimes/claude_code/cleanup.py +184 -0
  341. control_plane_api/worker/runtimes/claude_code/client_pool.py +529 -0
  342. control_plane_api/worker/runtimes/claude_code/config.py +829 -0
  343. control_plane_api/worker/runtimes/claude_code/hooks.py +482 -0
  344. control_plane_api/worker/runtimes/claude_code/litellm_proxy.py +1702 -0
  345. control_plane_api/worker/runtimes/claude_code/mcp_builder.py +467 -0
  346. control_plane_api/worker/runtimes/claude_code/mcp_discovery.py +558 -0
  347. control_plane_api/worker/runtimes/claude_code/runtime.py +1546 -0
  348. control_plane_api/worker/runtimes/claude_code/tool_mapper.py +403 -0
  349. control_plane_api/worker/runtimes/claude_code/utils.py +149 -0
  350. control_plane_api/worker/runtimes/factory.py +173 -0
  351. control_plane_api/worker/runtimes/model_utils.py +107 -0
  352. control_plane_api/worker/runtimes/validation.py +93 -0
  353. control_plane_api/worker/services/__init__.py +1 -0
  354. control_plane_api/worker/services/agent_communication_tools.py +908 -0
  355. control_plane_api/worker/services/agent_executor.py +485 -0
  356. control_plane_api/worker/services/agent_executor_v2.py +793 -0
  357. control_plane_api/worker/services/analytics_collector.py +457 -0
  358. control_plane_api/worker/services/analytics_service.py +464 -0
  359. control_plane_api/worker/services/approval_tools.py +310 -0
  360. control_plane_api/worker/services/approval_tools_agno.py +207 -0
  361. control_plane_api/worker/services/cancellation_manager.py +177 -0
  362. control_plane_api/worker/services/code_ingestion_tools.py +465 -0
  363. control_plane_api/worker/services/contextual_awareness_tools.py +405 -0
  364. control_plane_api/worker/services/data_visualization.py +834 -0
  365. control_plane_api/worker/services/event_publisher.py +531 -0
  366. control_plane_api/worker/services/jira_tools.py +257 -0
  367. control_plane_api/worker/services/remote_filesystem_tools.py +498 -0
  368. control_plane_api/worker/services/runtime_analytics.py +328 -0
  369. control_plane_api/worker/services/session_service.py +365 -0
  370. control_plane_api/worker/services/skill_context_enhancement.py +181 -0
  371. control_plane_api/worker/services/skill_factory.py +471 -0
  372. control_plane_api/worker/services/system_prompt_enhancement.py +410 -0
  373. control_plane_api/worker/services/team_executor.py +715 -0
  374. control_plane_api/worker/services/team_executor_v2.py +1866 -0
  375. control_plane_api/worker/services/tool_enforcement.py +254 -0
  376. control_plane_api/worker/services/workflow_executor/__init__.py +52 -0
  377. control_plane_api/worker/services/workflow_executor/event_processor.py +287 -0
  378. control_plane_api/worker/services/workflow_executor/event_publisher.py +210 -0
  379. control_plane_api/worker/services/workflow_executor/executors/__init__.py +15 -0
  380. control_plane_api/worker/services/workflow_executor/executors/base.py +270 -0
  381. control_plane_api/worker/services/workflow_executor/executors/json_executor.py +50 -0
  382. control_plane_api/worker/services/workflow_executor/executors/python_executor.py +50 -0
  383. control_plane_api/worker/services/workflow_executor/models.py +142 -0
  384. control_plane_api/worker/services/workflow_executor_tools.py +1748 -0
  385. control_plane_api/worker/skills/__init__.py +12 -0
  386. control_plane_api/worker/skills/builtin/context_graph_search/README.md +213 -0
  387. control_plane_api/worker/skills/builtin/context_graph_search/__init__.py +5 -0
  388. control_plane_api/worker/skills/builtin/context_graph_search/agno_impl.py +808 -0
  389. control_plane_api/worker/skills/builtin/context_graph_search/skill.yaml +67 -0
  390. control_plane_api/worker/skills/builtin/contextual_awareness/__init__.py +4 -0
  391. control_plane_api/worker/skills/builtin/contextual_awareness/agno_impl.py +62 -0
  392. control_plane_api/worker/skills/builtin/data_visualization/agno_impl.py +18 -0
  393. control_plane_api/worker/skills/builtin/data_visualization/skill.yaml +84 -0
  394. control_plane_api/worker/skills/builtin/docker/agno_impl.py +65 -0
  395. control_plane_api/worker/skills/builtin/docker/skill.yaml +60 -0
  396. control_plane_api/worker/skills/builtin/file_generation/agno_impl.py +47 -0
  397. control_plane_api/worker/skills/builtin/file_generation/skill.yaml +64 -0
  398. control_plane_api/worker/skills/builtin/file_system/agno_impl.py +32 -0
  399. control_plane_api/worker/skills/builtin/file_system/skill.yaml +54 -0
  400. control_plane_api/worker/skills/builtin/knowledge_api/__init__.py +4 -0
  401. control_plane_api/worker/skills/builtin/knowledge_api/agno_impl.py +50 -0
  402. control_plane_api/worker/skills/builtin/knowledge_api/skill.yaml +66 -0
  403. control_plane_api/worker/skills/builtin/python/agno_impl.py +25 -0
  404. control_plane_api/worker/skills/builtin/python/skill.yaml +60 -0
  405. control_plane_api/worker/skills/builtin/schema_fix_mixin.py +260 -0
  406. control_plane_api/worker/skills/builtin/shell/agno_impl.py +31 -0
  407. control_plane_api/worker/skills/builtin/shell/skill.yaml +60 -0
  408. control_plane_api/worker/skills/builtin/slack/__init__.py +3 -0
  409. control_plane_api/worker/skills/builtin/slack/agno_impl.py +1282 -0
  410. control_plane_api/worker/skills/builtin/slack/skill.yaml +276 -0
  411. control_plane_api/worker/skills/builtin/workflow_executor/agno_impl.py +62 -0
  412. control_plane_api/worker/skills/builtin/workflow_executor/skill.yaml +79 -0
  413. control_plane_api/worker/skills/loaders/__init__.py +5 -0
  414. control_plane_api/worker/skills/loaders/base.py +23 -0
  415. control_plane_api/worker/skills/loaders/filesystem_loader.py +357 -0
  416. control_plane_api/worker/skills/registry.py +208 -0
  417. control_plane_api/worker/tests/__init__.py +1 -0
  418. control_plane_api/worker/tests/conftest.py +12 -0
  419. control_plane_api/worker/tests/e2e/__init__.py +0 -0
  420. control_plane_api/worker/tests/e2e/test_context_graph_real_api.py +338 -0
  421. control_plane_api/worker/tests/e2e/test_context_graph_templates_e2e.py +523 -0
  422. control_plane_api/worker/tests/e2e/test_enforcement_e2e.py +344 -0
  423. control_plane_api/worker/tests/e2e/test_execution_flow.py +571 -0
  424. control_plane_api/worker/tests/e2e/test_single_execution_mode.py +656 -0
  425. control_plane_api/worker/tests/integration/__init__.py +0 -0
  426. control_plane_api/worker/tests/integration/test_builtin_skills_fixes.py +245 -0
  427. control_plane_api/worker/tests/integration/test_context_graph_search_integration.py +365 -0
  428. control_plane_api/worker/tests/integration/test_control_plane_integration.py +308 -0
  429. control_plane_api/worker/tests/integration/test_hook_enforcement_integration.py +579 -0
  430. control_plane_api/worker/tests/integration/test_scheduled_job_workflow.py +237 -0
  431. control_plane_api/worker/tests/integration/test_system_prompt_enhancement_integration.py +343 -0
  432. control_plane_api/worker/tests/unit/__init__.py +0 -0
  433. control_plane_api/worker/tests/unit/test_builtin_skill_autoload.py +396 -0
  434. control_plane_api/worker/tests/unit/test_context_graph_search.py +450 -0
  435. control_plane_api/worker/tests/unit/test_context_graph_templates.py +403 -0
  436. control_plane_api/worker/tests/unit/test_control_plane_client.py +401 -0
  437. control_plane_api/worker/tests/unit/test_control_plane_client_jobs.py +345 -0
  438. control_plane_api/worker/tests/unit/test_job_activities.py +353 -0
  439. control_plane_api/worker/tests/unit/test_skill_context_enhancement.py +321 -0
  440. control_plane_api/worker/tests/unit/test_system_prompt_enhancement.py +415 -0
  441. control_plane_api/worker/tests/unit/test_tool_enforcement.py +324 -0
  442. control_plane_api/worker/utils/__init__.py +1 -0
  443. control_plane_api/worker/utils/chunk_batcher.py +330 -0
  444. control_plane_api/worker/utils/environment.py +65 -0
  445. control_plane_api/worker/utils/error_publisher.py +260 -0
  446. control_plane_api/worker/utils/event_batcher.py +256 -0
  447. control_plane_api/worker/utils/logging_config.py +335 -0
  448. control_plane_api/worker/utils/logging_helper.py +326 -0
  449. control_plane_api/worker/utils/parameter_validator.py +120 -0
  450. control_plane_api/worker/utils/retry_utils.py +60 -0
  451. control_plane_api/worker/utils/streaming_utils.py +665 -0
  452. control_plane_api/worker/utils/tool_validation.py +332 -0
  453. control_plane_api/worker/utils/workspace_manager.py +163 -0
  454. control_plane_api/worker/websocket_client.py +393 -0
  455. control_plane_api/worker/worker.py +1297 -0
  456. control_plane_api/worker/workflows/__init__.py +0 -0
  457. control_plane_api/worker/workflows/agent_execution.py +909 -0
  458. control_plane_api/worker/workflows/scheduled_job_wrapper.py +332 -0
  459. control_plane_api/worker/workflows/team_execution.py +611 -0
  460. kubiya_control_plane_api-0.9.15.dist-info/METADATA +354 -0
  461. kubiya_control_plane_api-0.9.15.dist-info/RECORD +479 -0
  462. kubiya_control_plane_api-0.9.15.dist-info/WHEEL +5 -0
  463. kubiya_control_plane_api-0.9.15.dist-info/entry_points.txt +5 -0
  464. kubiya_control_plane_api-0.9.15.dist-info/licenses/LICENSE +676 -0
  465. kubiya_control_plane_api-0.9.15.dist-info/top_level.txt +3 -0
  466. scripts/__init__.py +1 -0
  467. scripts/migrations.py +39 -0
  468. scripts/seed_worker_queues.py +128 -0
  469. scripts/setup_agent_runtime.py +142 -0
  470. worker_internal/__init__.py +1 -0
  471. worker_internal/planner/__init__.py +1 -0
  472. worker_internal/planner/activities.py +1499 -0
  473. worker_internal/planner/agent_tools.py +197 -0
  474. worker_internal/planner/event_models.py +148 -0
  475. worker_internal/planner/event_publisher.py +67 -0
  476. worker_internal/planner/models.py +199 -0
  477. worker_internal/planner/retry_logic.py +134 -0
  478. worker_internal/planner/worker.py +300 -0
  479. worker_internal/planner/workflows.py +970 -0
@@ -0,0 +1,354 @@
1
+ """
2
+ Trace Retention Service.
3
+
4
+ Handles automatic cleanup of old traces based on configurable retention period.
5
+ Also provides storage statistics per organization.
6
+
7
+ Features:
8
+ - Configurable retention period (default: 30 days)
9
+ - Per-organization storage stats
10
+ - Batch deletion for performance
11
+ - Scheduled cleanup job support
12
+ """
13
+
14
+ import structlog
15
+ from datetime import datetime, timezone, timedelta
16
+ from typing import Optional, Dict, Any
17
+
18
+ from sqlalchemy.orm import Session
19
+ from sqlalchemy import func, delete
20
+
21
+ from control_plane_api.app.config import settings
22
+ from control_plane_api.app.database import get_session_local
23
+ from control_plane_api.app.models.trace import Trace, Span
24
+
25
+ logger = structlog.get_logger()
26
+
27
+ # Configuration
28
+ RETENTION_DAYS = getattr(settings, 'OTEL_LOCAL_STORAGE_RETENTION_DAYS', 30)
29
+ BATCH_SIZE = 1000 # Number of traces to delete per batch
30
+
31
+
32
+ class TraceRetentionService:
33
+ """Service for managing trace retention and cleanup."""
34
+
35
+ def __init__(self, retention_days: int = None):
36
+ """
37
+ Initialize the retention service.
38
+
39
+ Args:
40
+ retention_days: Number of days to retain traces (default from config)
41
+ """
42
+ self.retention_days = retention_days or RETENTION_DAYS
43
+ self._stats = {
44
+ "last_cleanup": None,
45
+ "traces_deleted": 0,
46
+ "spans_deleted": 0,
47
+ "errors": 0,
48
+ }
49
+
50
+ def get_cutoff_date(self) -> datetime:
51
+ """Get the cutoff date for trace retention."""
52
+ return datetime.now(timezone.utc) - timedelta(days=self.retention_days)
53
+
54
+ async def cleanup_old_traces(
55
+ self,
56
+ organization_id: Optional[str] = None,
57
+ batch_size: int = BATCH_SIZE,
58
+ ) -> Dict[str, int]:
59
+ """
60
+ Delete traces older than the retention period.
61
+
62
+ Args:
63
+ organization_id: Optional org to limit cleanup to
64
+ batch_size: Number of traces to delete per batch
65
+
66
+ Returns:
67
+ Dict with deletion statistics
68
+ """
69
+ cutoff_date = self.get_cutoff_date()
70
+ total_traces_deleted = 0
71
+ total_spans_deleted = 0
72
+
73
+ logger.info(
74
+ "trace_cleanup_starting",
75
+ retention_days=self.retention_days,
76
+ cutoff_date=cutoff_date.isoformat(),
77
+ organization_id=organization_id,
78
+ )
79
+
80
+ try:
81
+ SessionLocal = get_session_local()
82
+ session = SessionLocal()
83
+
84
+ try:
85
+ while True:
86
+ # Find old traces to delete
87
+ query = session.query(Trace).filter(
88
+ Trace.started_at < cutoff_date
89
+ )
90
+
91
+ if organization_id:
92
+ query = query.filter(Trace.organization_id == organization_id)
93
+
94
+ # Get batch of trace IDs
95
+ traces_to_delete = query.limit(batch_size).all()
96
+
97
+ if not traces_to_delete:
98
+ break
99
+
100
+ trace_ids = [t.trace_id for t in traces_to_delete]
101
+
102
+ # Count spans being deleted (for stats)
103
+ span_count = session.query(func.count(Span.id)).filter(
104
+ Span.trace_id.in_(trace_ids)
105
+ ).scalar()
106
+
107
+ # Delete traces (cascade will delete spans due to FK)
108
+ for trace in traces_to_delete:
109
+ session.delete(trace)
110
+
111
+ session.commit()
112
+
113
+ total_traces_deleted += len(trace_ids)
114
+ total_spans_deleted += span_count
115
+
116
+ logger.info(
117
+ "trace_cleanup_batch_completed",
118
+ traces_deleted=len(trace_ids),
119
+ spans_deleted=span_count,
120
+ total_traces_deleted=total_traces_deleted,
121
+ )
122
+
123
+ except Exception as e:
124
+ session.rollback()
125
+ self._stats["errors"] += 1
126
+ logger.error("trace_cleanup_batch_failed", error=str(e), exc_info=True)
127
+ raise
128
+
129
+ finally:
130
+ session.close()
131
+
132
+ except Exception as e:
133
+ self._stats["errors"] += 1
134
+ logger.error("trace_cleanup_failed", error=str(e), exc_info=True)
135
+ raise
136
+
137
+ # Update stats
138
+ self._stats["last_cleanup"] = datetime.now(timezone.utc).isoformat()
139
+ self._stats["traces_deleted"] += total_traces_deleted
140
+ self._stats["spans_deleted"] += total_spans_deleted
141
+
142
+ logger.info(
143
+ "trace_cleanup_completed",
144
+ total_traces_deleted=total_traces_deleted,
145
+ total_spans_deleted=total_spans_deleted,
146
+ retention_days=self.retention_days,
147
+ )
148
+
149
+ return {
150
+ "traces_deleted": total_traces_deleted,
151
+ "spans_deleted": total_spans_deleted,
152
+ "cutoff_date": cutoff_date.isoformat(),
153
+ }
154
+
155
+ async def get_storage_stats(
156
+ self,
157
+ organization_id: Optional[str] = None,
158
+ ) -> Dict[str, Any]:
159
+ """
160
+ Get storage statistics for traces.
161
+
162
+ Args:
163
+ organization_id: Optional org to get stats for
164
+
165
+ Returns:
166
+ Dict with storage statistics
167
+ """
168
+ try:
169
+ SessionLocal = get_session_local()
170
+ session = SessionLocal()
171
+
172
+ try:
173
+ # Base queries
174
+ trace_query = session.query(Trace)
175
+ span_query = session.query(Span)
176
+
177
+ if organization_id:
178
+ trace_query = trace_query.filter(Trace.organization_id == organization_id)
179
+ span_query = span_query.filter(Span.organization_id == organization_id)
180
+
181
+ # Total counts
182
+ total_traces = trace_query.count()
183
+ total_spans = span_query.count()
184
+
185
+ # Counts by status
186
+ success_count = trace_query.filter(Trace.status == "success").count()
187
+ error_count = trace_query.filter(Trace.status == "error").count()
188
+ running_count = trace_query.filter(Trace.status == "running").count()
189
+
190
+ # Date range
191
+ oldest_trace = trace_query.order_by(Trace.started_at.asc()).first()
192
+ newest_trace = trace_query.order_by(Trace.started_at.desc()).first()
193
+
194
+ # Average metrics
195
+ avg_duration = session.query(func.avg(Trace.duration_ms)).filter(
196
+ Trace.duration_ms.isnot(None)
197
+ )
198
+ avg_span_count = session.query(func.avg(Trace.span_count))
199
+
200
+ if organization_id:
201
+ avg_duration = avg_duration.filter(Trace.organization_id == organization_id)
202
+ avg_span_count = avg_span_count.filter(Trace.organization_id == organization_id)
203
+
204
+ avg_duration_val = avg_duration.scalar()
205
+ avg_span_count_val = avg_span_count.scalar()
206
+
207
+ # Retention info
208
+ cutoff_date = self.get_cutoff_date()
209
+ traces_to_expire = trace_query.filter(Trace.started_at < cutoff_date).count()
210
+
211
+ return {
212
+ "total_traces": total_traces,
213
+ "total_spans": total_spans,
214
+ "status_breakdown": {
215
+ "success": success_count,
216
+ "error": error_count,
217
+ "running": running_count,
218
+ },
219
+ "date_range": {
220
+ "oldest": oldest_trace.started_at.isoformat() if oldest_trace else None,
221
+ "newest": newest_trace.started_at.isoformat() if newest_trace else None,
222
+ },
223
+ "averages": {
224
+ "duration_ms": round(avg_duration_val, 2) if avg_duration_val else None,
225
+ "span_count": round(avg_span_count_val, 2) if avg_span_count_val else None,
226
+ },
227
+ "retention": {
228
+ "retention_days": self.retention_days,
229
+ "cutoff_date": cutoff_date.isoformat(),
230
+ "traces_to_expire": traces_to_expire,
231
+ },
232
+ "organization_id": organization_id,
233
+ "retrieved_at": datetime.now(timezone.utc).isoformat(),
234
+ }
235
+
236
+ finally:
237
+ session.close()
238
+
239
+ except Exception as e:
240
+ logger.error("get_storage_stats_failed", error=str(e), exc_info=True)
241
+ raise
242
+
243
+ def get_service_stats(self) -> Dict[str, Any]:
244
+ """Get retention service statistics."""
245
+ return {
246
+ **self._stats,
247
+ "retention_days": self.retention_days,
248
+ }
249
+
250
+
251
+ # Singleton instance
252
+ _retention_service: Optional[TraceRetentionService] = None
253
+
254
+
255
+ def get_retention_service() -> TraceRetentionService:
256
+ """Get the singleton retention service instance."""
257
+ global _retention_service
258
+ if _retention_service is None:
259
+ _retention_service = TraceRetentionService()
260
+ return _retention_service
261
+
262
+
263
+ async def run_cleanup_job():
264
+ """
265
+ Run the trace cleanup job.
266
+
267
+ This function can be called from a scheduled job (e.g., Temporal workflow,
268
+ cron job, or APScheduler).
269
+ """
270
+ service = get_retention_service()
271
+ result = await service.cleanup_old_traces()
272
+
273
+ logger.info(
274
+ "scheduled_trace_cleanup_completed",
275
+ **result
276
+ )
277
+
278
+ return result
279
+
280
+
281
+ # Scheduler configuration
282
+ CLEANUP_INTERVAL_HOURS = 6 # Run cleanup every 6 hours
283
+
284
+
285
+ async def _retention_scheduler_loop():
286
+ """
287
+ Background loop that runs retention cleanup periodically.
288
+
289
+ Runs every CLEANUP_INTERVAL_HOURS to clean up old traces.
290
+ Uses asyncio.sleep for non-blocking scheduling.
291
+ """
292
+ import asyncio
293
+
294
+ logger.info(
295
+ "retention_scheduler_started",
296
+ interval_hours=CLEANUP_INTERVAL_HOURS,
297
+ retention_days=RETENTION_DAYS,
298
+ )
299
+
300
+ while True:
301
+ try:
302
+ # Wait for the interval
303
+ await asyncio.sleep(CLEANUP_INTERVAL_HOURS * 3600)
304
+
305
+ # Run cleanup
306
+ logger.info("retention_cleanup_starting")
307
+ result = await run_cleanup_job()
308
+ logger.info(
309
+ "retention_cleanup_completed",
310
+ traces_deleted=result.get("traces_deleted", 0),
311
+ spans_deleted=result.get("spans_deleted", 0),
312
+ )
313
+
314
+ except asyncio.CancelledError:
315
+ logger.info("retention_scheduler_cancelled")
316
+ break
317
+ except Exception as e:
318
+ logger.error(
319
+ "retention_scheduler_error",
320
+ error=str(e),
321
+ exc_info=True,
322
+ )
323
+ # Continue running despite errors
324
+ await asyncio.sleep(60) # Brief pause before retrying
325
+
326
+
327
+ async def start_retention_scheduler():
328
+ """
329
+ Start the retention cleanup scheduler as a background task.
330
+
331
+ Returns:
332
+ asyncio.Task: The scheduler task (for cancellation on shutdown)
333
+ """
334
+ import asyncio
335
+
336
+ task = asyncio.create_task(_retention_scheduler_loop())
337
+ return task
338
+
339
+
340
+ async def stop_retention_scheduler(task):
341
+ """
342
+ Stop the retention cleanup scheduler.
343
+
344
+ Args:
345
+ task: The scheduler task returned by start_retention_scheduler()
346
+ """
347
+ if task and not task.done():
348
+ task.cancel()
349
+ try:
350
+ await task
351
+ except Exception:
352
+ pass # Task cancelled, ignore
353
+
354
+ logger.info("retention_scheduler_stopped")
@@ -0,0 +1,190 @@
1
+ """
2
+ Worker Queue Metrics Service.
3
+
4
+ This service provides business logic for calculating worker queue metrics
5
+ including worker health, task statistics, and performance metrics.
6
+ """
7
+
8
+ import structlog
9
+ from datetime import datetime, timedelta
10
+ from typing import Optional, Dict
11
+ from sqlalchemy.orm import Session
12
+ from sqlalchemy import func
13
+
14
+ from control_plane_api.app.models.worker import WorkerQueue, WorkerHeartbeat
15
+ from control_plane_api.app.models.execution import Execution
16
+ from control_plane_api.app.schemas.worker_queue_observability_schemas import WorkerQueueMetricsResponse
17
+
18
+ logger = structlog.get_logger()
19
+
20
+
21
+ class WorkerQueueMetricsService:
22
+ """Service for calculating worker queue metrics"""
23
+
24
+ def __init__(self, db: Session):
25
+ self.db = db
26
+
27
+ async def get_queue_metrics(
28
+ self,
29
+ queue_id: str,
30
+ organization_id: str
31
+ ) -> WorkerQueueMetricsResponse:
32
+ """
33
+ Calculate comprehensive metrics for a worker queue.
34
+
35
+ Args:
36
+ queue_id: Worker queue UUID
37
+ organization_id: Organization ID
38
+
39
+ Returns:
40
+ WorkerQueueMetricsResponse with calculated metrics
41
+
42
+ Raises:
43
+ ValueError: If queue not found or doesn't belong to organization
44
+ """
45
+ # Verify queue exists and belongs to organization
46
+ queue = self.db.query(WorkerQueue).filter(
47
+ WorkerQueue.id == queue_id,
48
+ WorkerQueue.organization_id == organization_id
49
+ ).first()
50
+
51
+ if not queue:
52
+ raise ValueError("Worker queue not found")
53
+
54
+ now = datetime.utcnow()
55
+
56
+ # Calculate worker status counts
57
+ worker_stats = self._get_worker_status_counts(queue_id, now)
58
+
59
+ # Calculate 24h task metrics
60
+ task_metrics = self._get_task_metrics_24h(queue_id, now)
61
+
62
+ # Get last activity timestamp
63
+ last_activity = self._get_last_activity(queue_id)
64
+
65
+ # Build response
66
+ return WorkerQueueMetricsResponse(
67
+ queue_id=queue_id,
68
+ active_workers=worker_stats["active"],
69
+ idle_workers=worker_stats["idle"],
70
+ busy_workers=worker_stats["busy"],
71
+ total_workers=worker_stats["total"],
72
+ tasks_processed_24h=task_metrics["processed"],
73
+ tasks_failed_24h=task_metrics["failed"],
74
+ tasks_pending=task_metrics["pending"],
75
+ avg_task_duration_ms=task_metrics["avg_duration_ms"],
76
+ error_rate_percent=task_metrics["error_rate"],
77
+ last_error_at=task_metrics["last_error_at"],
78
+ task_queue_backlog=0, # TODO: Implement Temporal queue metrics
79
+ task_queue_pollers=0, # TODO: Implement Temporal queue metrics
80
+ last_activity_at=last_activity,
81
+ updated_at=now
82
+ )
83
+
84
+ def _get_worker_status_counts(self, queue_id: str, now: datetime) -> Dict[str, int]:
85
+ """
86
+ Get worker status counts from WorkerHeartbeat table.
87
+
88
+ Workers are considered stale if last_heartbeat > 90 seconds ago.
89
+ """
90
+ stale_threshold = now - timedelta(seconds=90)
91
+
92
+ # Query recent heartbeats
93
+ heartbeats = self.db.query(WorkerHeartbeat).filter(
94
+ WorkerHeartbeat.worker_queue_id == queue_id,
95
+ WorkerHeartbeat.last_heartbeat > stale_threshold
96
+ ).all()
97
+
98
+ active = sum(1 for hb in heartbeats if hb.status == "active")
99
+ idle = sum(1 for hb in heartbeats if hb.status == "idle")
100
+ busy = sum(1 for hb in heartbeats if hb.status == "busy")
101
+ total = len(heartbeats)
102
+
103
+ logger.info(
104
+ "worker_status_calculated",
105
+ queue_id=queue_id,
106
+ active=active,
107
+ idle=idle,
108
+ busy=busy,
109
+ total=total
110
+ )
111
+
112
+ return {
113
+ "active": active,
114
+ "idle": idle,
115
+ "busy": busy,
116
+ "total": total
117
+ }
118
+
119
+ def _get_task_metrics_24h(self, queue_id: str, now: datetime) -> Dict:
120
+ """Calculate task metrics for the last 24 hours"""
121
+ twenty_four_hours_ago = now - timedelta(hours=24)
122
+
123
+ # Get executions in last 24h
124
+ executions_24h = self.db.query(Execution).filter(
125
+ Execution.worker_queue_id == queue_id,
126
+ Execution.created_at >= twenty_four_hours_ago
127
+ ).all()
128
+
129
+ # Count processed and failed tasks
130
+ processed = sum(1 for e in executions_24h if e.status in ["completed", "failed"])
131
+ failed = sum(1 for e in executions_24h if e.status == "failed")
132
+
133
+ # Get pending tasks count
134
+ pending = self.db.query(Execution).filter(
135
+ Execution.worker_queue_id == queue_id,
136
+ Execution.status == "pending"
137
+ ).count()
138
+
139
+ # Calculate average duration for completed tasks
140
+ completed_executions = [
141
+ e for e in executions_24h
142
+ if e.status == "completed" and e.started_at and e.completed_at
143
+ ]
144
+
145
+ if completed_executions:
146
+ total_duration_ms = sum(
147
+ (e.completed_at - e.started_at).total_seconds() * 1000
148
+ for e in completed_executions
149
+ )
150
+ avg_duration_ms = total_duration_ms / len(completed_executions)
151
+ else:
152
+ avg_duration_ms = 0
153
+
154
+ # Calculate error rate
155
+ error_rate = (failed / processed * 100) if processed > 0 else 0
156
+
157
+ # Get last error timestamp
158
+ last_error = self.db.query(Execution).filter(
159
+ Execution.worker_queue_id == queue_id,
160
+ Execution.status == "failed"
161
+ ).order_by(Execution.completed_at.desc()).first()
162
+
163
+ last_error_at = last_error.completed_at if last_error else None
164
+
165
+ logger.info(
166
+ "task_metrics_calculated",
167
+ queue_id=queue_id,
168
+ processed=processed,
169
+ failed=failed,
170
+ pending=pending,
171
+ avg_duration_ms=avg_duration_ms,
172
+ error_rate=error_rate
173
+ )
174
+
175
+ return {
176
+ "processed": processed,
177
+ "failed": failed,
178
+ "pending": pending,
179
+ "avg_duration_ms": avg_duration_ms,
180
+ "error_rate": error_rate,
181
+ "last_error_at": last_error_at
182
+ }
183
+
184
+ def _get_last_activity(self, queue_id: str) -> Optional[datetime]:
185
+ """Get timestamp of last worker activity"""
186
+ last_activity = self.db.query(WorkerHeartbeat).filter(
187
+ WorkerHeartbeat.worker_queue_id == queue_id
188
+ ).order_by(WorkerHeartbeat.last_heartbeat.desc()).first()
189
+
190
+ return last_activity.last_heartbeat if last_activity else None
@@ -0,0 +1,135 @@
1
+ """
2
+ Workflow Cancellation Manager - handles workflow-specific cancellation without affecting the agent execution.
3
+
4
+ This allows cancelling individual workflow tool calls while the agent continues to run.
5
+ """
6
+
7
+ from typing import Dict, Set, Any, Optional
8
+ from datetime import datetime, timezone
9
+ import structlog
10
+ import threading
11
+
12
+ logger = structlog.get_logger()
13
+
14
+
15
+ class WorkflowCancellationManager:
16
+ """
17
+ Manages cancellation flags for active workflow executions.
18
+
19
+ Workflow tool calls check these flags periodically to see if they should stop,
20
+ without affecting the parent agent/team execution.
21
+ """
22
+
23
+ def __init__(self):
24
+ # Key: workflow_execution_key (execution_id + workflow_message_id), Value: cancellation time
25
+ self._cancelled_workflows: Dict[str, str] = {}
26
+ # Key: workflow_execution_key, Value: threading.Event for immediate cancellation
27
+ self._cancellation_events: Dict[str, threading.Event] = {}
28
+ self._lock = threading.Lock()
29
+
30
+ def _make_key(self, execution_id: str, workflow_message_id: str) -> str:
31
+ """Create a unique key for a workflow execution."""
32
+ return f"{execution_id}:{workflow_message_id}"
33
+
34
+ def register_workflow(self, execution_id: str, workflow_message_id: str) -> threading.Event:
35
+ """
36
+ Register a workflow execution and get a cancellation event.
37
+
38
+ Args:
39
+ execution_id: The agent execution ID
40
+ workflow_message_id: The unique workflow message ID
41
+
42
+ Returns:
43
+ threading.Event that will be set when cancellation is requested
44
+ """
45
+ with self._lock:
46
+ key = self._make_key(execution_id, workflow_message_id)
47
+ event = threading.Event()
48
+ self._cancellation_events[key] = event
49
+
50
+ logger.info(
51
+ "workflow_registered",
52
+ execution_id=execution_id[:8],
53
+ workflow_message_id=workflow_message_id[-12:],
54
+ key=key
55
+ )
56
+
57
+ return event
58
+
59
+ def request_cancellation(self, execution_id: str, workflow_message_id: str) -> bool:
60
+ """
61
+ Request cancellation of a specific workflow.
62
+
63
+ Args:
64
+ execution_id: The agent execution ID
65
+ workflow_message_id: The unique workflow message ID
66
+
67
+ Returns:
68
+ True if cancellation was requested, False if workflow not found
69
+ """
70
+ with self._lock:
71
+ key = self._make_key(execution_id, workflow_message_id)
72
+ self._cancelled_workflows[key] = datetime.now(timezone.utc).isoformat()
73
+
74
+ # Signal the cancellation event immediately
75
+ if key in self._cancellation_events:
76
+ self._cancellation_events[key].set()
77
+ logger.info(
78
+ "workflow_cancellation_event_signaled",
79
+ execution_id=execution_id[:8],
80
+ workflow_message_id=workflow_message_id[-12:],
81
+ key=key
82
+ )
83
+
84
+ logger.info(
85
+ "workflow_cancellation_requested",
86
+ execution_id=execution_id[:8],
87
+ workflow_message_id=workflow_message_id[-12:],
88
+ key=key
89
+ )
90
+
91
+ return True
92
+
93
+ def is_cancelled(self, execution_id: str, workflow_message_id: str) -> bool:
94
+ """
95
+ Check if a workflow has been cancelled.
96
+
97
+ Args:
98
+ execution_id: The agent execution ID
99
+ workflow_message_id: The unique workflow message ID
100
+
101
+ Returns:
102
+ True if the workflow has been cancelled
103
+ """
104
+ with self._lock:
105
+ key = self._make_key(execution_id, workflow_message_id)
106
+ return key in self._cancelled_workflows
107
+
108
+ def clear_cancellation(self, execution_id: str, workflow_message_id: str) -> None:
109
+ """
110
+ Clear the cancellation flag for a workflow (called when workflow completes/fails).
111
+
112
+ Args:
113
+ execution_id: The agent execution ID
114
+ workflow_message_id: The unique workflow message ID
115
+ """
116
+ with self._lock:
117
+ key = self._make_key(execution_id, workflow_message_id)
118
+ if key in self._cancelled_workflows:
119
+ del self._cancelled_workflows[key]
120
+ if key in self._cancellation_events:
121
+ del self._cancellation_events[key]
122
+ logger.info(
123
+ "workflow_cancellation_cleared",
124
+ execution_id=execution_id[:8],
125
+ workflow_message_id=workflow_message_id[-12:]
126
+ )
127
+
128
+ def get_active_count(self) -> int:
129
+ """Get number of workflows with pending cancellation."""
130
+ with self._lock:
131
+ return len(self._cancelled_workflows)
132
+
133
+
134
+ # Global singleton instance
135
+ workflow_cancellation_manager = WorkflowCancellationManager()