kubiya-control-plane-api 0.9.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (479) hide show
  1. control_plane_api/LICENSE +676 -0
  2. control_plane_api/README.md +350 -0
  3. control_plane_api/__init__.py +4 -0
  4. control_plane_api/__version__.py +8 -0
  5. control_plane_api/alembic/README +1 -0
  6. control_plane_api/alembic/env.py +121 -0
  7. control_plane_api/alembic/script.py.mako +28 -0
  8. control_plane_api/alembic/versions/2613c65c3dbe_initial_database_setup.py +32 -0
  9. control_plane_api/alembic/versions/2df520d4927d_merge_heads.py +28 -0
  10. control_plane_api/alembic/versions/43abf98d6a01_add_paused_status_to_executions.py +73 -0
  11. control_plane_api/alembic/versions/6289854264cb_merge_multiple_heads.py +28 -0
  12. control_plane_api/alembic/versions/6a4d4dc3d8dc_generate_execution_transitions.py +50 -0
  13. control_plane_api/alembic/versions/87d11cf0a783_add_disconnected_status_to_worker_.py +44 -0
  14. control_plane_api/alembic/versions/add_ephemeral_queue_support.py +85 -0
  15. control_plane_api/alembic/versions/add_model_type_to_llm_models.py +31 -0
  16. control_plane_api/alembic/versions/add_plan_executions_table.py +114 -0
  17. control_plane_api/alembic/versions/add_trace_span_tables.py +154 -0
  18. control_plane_api/alembic/versions/add_user_info_to_traces.py +36 -0
  19. control_plane_api/alembic/versions/adjusting_foreign_keys.py +32 -0
  20. control_plane_api/alembic/versions/b4983d976db2_initial_tables.py +1128 -0
  21. control_plane_api/alembic/versions/d181a3b40e71_rename_custom_metadata_to_metadata_in_.py +50 -0
  22. control_plane_api/alembic/versions/df9117888e82_add_missing_columns.py +82 -0
  23. control_plane_api/alembic/versions/f25de6ad895a_missing_migrations.py +34 -0
  24. control_plane_api/alembic/versions/f71305fb69b9_fix_ephemeral_queue_deletion_foreign_key.py +54 -0
  25. control_plane_api/alembic/versions/mark_local_exec_queues_as_ephemeral.py +68 -0
  26. control_plane_api/alembic.ini +148 -0
  27. control_plane_api/api/index.py +12 -0
  28. control_plane_api/app/__init__.py +11 -0
  29. control_plane_api/app/activities/__init__.py +20 -0
  30. control_plane_api/app/activities/agent_activities.py +384 -0
  31. control_plane_api/app/activities/plan_generation_activities.py +499 -0
  32. control_plane_api/app/activities/team_activities.py +424 -0
  33. control_plane_api/app/activities/temporal_cloud_activities.py +588 -0
  34. control_plane_api/app/config/__init__.py +35 -0
  35. control_plane_api/app/config/api_config.py +469 -0
  36. control_plane_api/app/config/config_loader.py +224 -0
  37. control_plane_api/app/config/model_pricing.py +323 -0
  38. control_plane_api/app/config/storage_config.py +159 -0
  39. control_plane_api/app/config.py +115 -0
  40. control_plane_api/app/controllers/__init__.py +0 -0
  41. control_plane_api/app/controllers/execution_environment_controller.py +1315 -0
  42. control_plane_api/app/database.py +135 -0
  43. control_plane_api/app/exceptions.py +408 -0
  44. control_plane_api/app/lib/__init__.py +11 -0
  45. control_plane_api/app/lib/environment.py +65 -0
  46. control_plane_api/app/lib/event_bus/__init__.py +17 -0
  47. control_plane_api/app/lib/event_bus/base.py +136 -0
  48. control_plane_api/app/lib/event_bus/manager.py +335 -0
  49. control_plane_api/app/lib/event_bus/providers/__init__.py +6 -0
  50. control_plane_api/app/lib/event_bus/providers/http_provider.py +166 -0
  51. control_plane_api/app/lib/event_bus/providers/nats_provider.py +324 -0
  52. control_plane_api/app/lib/event_bus/providers/redis_provider.py +233 -0
  53. control_plane_api/app/lib/event_bus/providers/websocket_provider.py +497 -0
  54. control_plane_api/app/lib/job_executor.py +330 -0
  55. control_plane_api/app/lib/kubiya_client.py +293 -0
  56. control_plane_api/app/lib/litellm_pricing.py +166 -0
  57. control_plane_api/app/lib/mcp_validation.py +163 -0
  58. control_plane_api/app/lib/nats/__init__.py +13 -0
  59. control_plane_api/app/lib/nats/credentials_manager.py +288 -0
  60. control_plane_api/app/lib/nats/listener.py +374 -0
  61. control_plane_api/app/lib/planning_prompt_builder.py +153 -0
  62. control_plane_api/app/lib/planning_tools/__init__.py +41 -0
  63. control_plane_api/app/lib/planning_tools/agents.py +409 -0
  64. control_plane_api/app/lib/planning_tools/agno_toolkit.py +836 -0
  65. control_plane_api/app/lib/planning_tools/base.py +119 -0
  66. control_plane_api/app/lib/planning_tools/cognitive_memory_tools.py +403 -0
  67. control_plane_api/app/lib/planning_tools/context_graph_tools.py +545 -0
  68. control_plane_api/app/lib/planning_tools/environments.py +218 -0
  69. control_plane_api/app/lib/planning_tools/knowledge.py +204 -0
  70. control_plane_api/app/lib/planning_tools/models.py +93 -0
  71. control_plane_api/app/lib/planning_tools/planning_service.py +646 -0
  72. control_plane_api/app/lib/planning_tools/resources.py +242 -0
  73. control_plane_api/app/lib/planning_tools/teams.py +334 -0
  74. control_plane_api/app/lib/policy_enforcer_client.py +1016 -0
  75. control_plane_api/app/lib/redis_client.py +803 -0
  76. control_plane_api/app/lib/sqlalchemy_utils.py +486 -0
  77. control_plane_api/app/lib/state_transition_tools/__init__.py +7 -0
  78. control_plane_api/app/lib/state_transition_tools/execution_context.py +388 -0
  79. control_plane_api/app/lib/storage/__init__.py +20 -0
  80. control_plane_api/app/lib/storage/base_provider.py +274 -0
  81. control_plane_api/app/lib/storage/provider_factory.py +157 -0
  82. control_plane_api/app/lib/storage/vercel_blob_provider.py +468 -0
  83. control_plane_api/app/lib/supabase.py +71 -0
  84. control_plane_api/app/lib/supabase_utils.py +138 -0
  85. control_plane_api/app/lib/task_planning/__init__.py +138 -0
  86. control_plane_api/app/lib/task_planning/agent_factory.py +308 -0
  87. control_plane_api/app/lib/task_planning/agents.py +389 -0
  88. control_plane_api/app/lib/task_planning/cache.py +218 -0
  89. control_plane_api/app/lib/task_planning/entity_resolver.py +273 -0
  90. control_plane_api/app/lib/task_planning/helpers.py +293 -0
  91. control_plane_api/app/lib/task_planning/hooks.py +474 -0
  92. control_plane_api/app/lib/task_planning/models.py +503 -0
  93. control_plane_api/app/lib/task_planning/plan_validator.py +166 -0
  94. control_plane_api/app/lib/task_planning/planning_workflow.py +2911 -0
  95. control_plane_api/app/lib/task_planning/runner.py +656 -0
  96. control_plane_api/app/lib/task_planning/streaming_hook.py +213 -0
  97. control_plane_api/app/lib/task_planning/workflow.py +424 -0
  98. control_plane_api/app/lib/templating/__init__.py +88 -0
  99. control_plane_api/app/lib/templating/compiler.py +278 -0
  100. control_plane_api/app/lib/templating/engine.py +178 -0
  101. control_plane_api/app/lib/templating/parsers/__init__.py +29 -0
  102. control_plane_api/app/lib/templating/parsers/base.py +96 -0
  103. control_plane_api/app/lib/templating/parsers/env.py +85 -0
  104. control_plane_api/app/lib/templating/parsers/graph.py +112 -0
  105. control_plane_api/app/lib/templating/parsers/secret.py +87 -0
  106. control_plane_api/app/lib/templating/parsers/simple.py +81 -0
  107. control_plane_api/app/lib/templating/resolver.py +366 -0
  108. control_plane_api/app/lib/templating/types.py +214 -0
  109. control_plane_api/app/lib/templating/validator.py +201 -0
  110. control_plane_api/app/lib/temporal_client.py +232 -0
  111. control_plane_api/app/lib/temporal_credentials_cache.py +178 -0
  112. control_plane_api/app/lib/temporal_credentials_service.py +203 -0
  113. control_plane_api/app/lib/validation/__init__.py +24 -0
  114. control_plane_api/app/lib/validation/runtime_validation.py +388 -0
  115. control_plane_api/app/main.py +531 -0
  116. control_plane_api/app/middleware/__init__.py +10 -0
  117. control_plane_api/app/middleware/auth.py +645 -0
  118. control_plane_api/app/middleware/exception_handler.py +267 -0
  119. control_plane_api/app/middleware/prometheus_middleware.py +173 -0
  120. control_plane_api/app/middleware/rate_limiting.py +384 -0
  121. control_plane_api/app/middleware/request_id.py +202 -0
  122. control_plane_api/app/models/__init__.py +40 -0
  123. control_plane_api/app/models/agent.py +90 -0
  124. control_plane_api/app/models/analytics.py +206 -0
  125. control_plane_api/app/models/associations.py +107 -0
  126. control_plane_api/app/models/auth_user.py +73 -0
  127. control_plane_api/app/models/context.py +161 -0
  128. control_plane_api/app/models/custom_integration.py +99 -0
  129. control_plane_api/app/models/environment.py +64 -0
  130. control_plane_api/app/models/execution.py +125 -0
  131. control_plane_api/app/models/execution_transition.py +50 -0
  132. control_plane_api/app/models/job.py +159 -0
  133. control_plane_api/app/models/llm_model.py +78 -0
  134. control_plane_api/app/models/orchestration.py +66 -0
  135. control_plane_api/app/models/plan_execution.py +102 -0
  136. control_plane_api/app/models/presence.py +49 -0
  137. control_plane_api/app/models/project.py +61 -0
  138. control_plane_api/app/models/project_management.py +85 -0
  139. control_plane_api/app/models/session.py +29 -0
  140. control_plane_api/app/models/skill.py +155 -0
  141. control_plane_api/app/models/system_tables.py +43 -0
  142. control_plane_api/app/models/task_planning.py +372 -0
  143. control_plane_api/app/models/team.py +86 -0
  144. control_plane_api/app/models/trace.py +257 -0
  145. control_plane_api/app/models/user_profile.py +54 -0
  146. control_plane_api/app/models/worker.py +221 -0
  147. control_plane_api/app/models/workflow.py +161 -0
  148. control_plane_api/app/models/workspace.py +50 -0
  149. control_plane_api/app/observability/__init__.py +177 -0
  150. control_plane_api/app/observability/context_logging.py +475 -0
  151. control_plane_api/app/observability/decorators.py +337 -0
  152. control_plane_api/app/observability/local_span_processor.py +702 -0
  153. control_plane_api/app/observability/metrics.py +303 -0
  154. control_plane_api/app/observability/middleware.py +246 -0
  155. control_plane_api/app/observability/optional.py +115 -0
  156. control_plane_api/app/observability/tracing.py +382 -0
  157. control_plane_api/app/policies/README.md +149 -0
  158. control_plane_api/app/policies/approved_users.rego +62 -0
  159. control_plane_api/app/policies/business_hours.rego +51 -0
  160. control_plane_api/app/policies/rate_limiting.rego +100 -0
  161. control_plane_api/app/policies/tool_enforcement/README.md +336 -0
  162. control_plane_api/app/policies/tool_enforcement/bash_command_validation.rego +71 -0
  163. control_plane_api/app/policies/tool_enforcement/business_hours_enforcement.rego +82 -0
  164. control_plane_api/app/policies/tool_enforcement/mcp_tool_allowlist.rego +58 -0
  165. control_plane_api/app/policies/tool_enforcement/production_safeguards.rego +80 -0
  166. control_plane_api/app/policies/tool_enforcement/role_based_tool_access.rego +44 -0
  167. control_plane_api/app/policies/tool_restrictions.rego +86 -0
  168. control_plane_api/app/routers/__init__.py +4 -0
  169. control_plane_api/app/routers/agents.py +382 -0
  170. control_plane_api/app/routers/agents_v2.py +1598 -0
  171. control_plane_api/app/routers/analytics.py +1310 -0
  172. control_plane_api/app/routers/auth.py +59 -0
  173. control_plane_api/app/routers/client_config.py +57 -0
  174. control_plane_api/app/routers/context_graph.py +561 -0
  175. control_plane_api/app/routers/context_manager.py +577 -0
  176. control_plane_api/app/routers/custom_integrations.py +490 -0
  177. control_plane_api/app/routers/enforcer.py +132 -0
  178. control_plane_api/app/routers/environment_context.py +252 -0
  179. control_plane_api/app/routers/environments.py +761 -0
  180. control_plane_api/app/routers/execution_environment.py +847 -0
  181. control_plane_api/app/routers/executions/__init__.py +28 -0
  182. control_plane_api/app/routers/executions/router.py +286 -0
  183. control_plane_api/app/routers/executions/services/__init__.py +22 -0
  184. control_plane_api/app/routers/executions/services/demo_worker_health.py +156 -0
  185. control_plane_api/app/routers/executions/services/status_service.py +420 -0
  186. control_plane_api/app/routers/executions/services/test_worker_health.py +480 -0
  187. control_plane_api/app/routers/executions/services/worker_health.py +514 -0
  188. control_plane_api/app/routers/executions/streaming/__init__.py +22 -0
  189. control_plane_api/app/routers/executions/streaming/deduplication.py +352 -0
  190. control_plane_api/app/routers/executions/streaming/event_buffer.py +353 -0
  191. control_plane_api/app/routers/executions/streaming/event_formatter.py +964 -0
  192. control_plane_api/app/routers/executions/streaming/history_loader.py +588 -0
  193. control_plane_api/app/routers/executions/streaming/live_source.py +693 -0
  194. control_plane_api/app/routers/executions/streaming/streamer.py +849 -0
  195. control_plane_api/app/routers/executions.py +4888 -0
  196. control_plane_api/app/routers/health.py +165 -0
  197. control_plane_api/app/routers/health_v2.py +394 -0
  198. control_plane_api/app/routers/integration_templates.py +496 -0
  199. control_plane_api/app/routers/integrations.py +287 -0
  200. control_plane_api/app/routers/jobs.py +1809 -0
  201. control_plane_api/app/routers/metrics.py +517 -0
  202. control_plane_api/app/routers/models.py +82 -0
  203. control_plane_api/app/routers/models_v2.py +628 -0
  204. control_plane_api/app/routers/plan_executions.py +1481 -0
  205. control_plane_api/app/routers/plan_generation_async.py +304 -0
  206. control_plane_api/app/routers/policies.py +669 -0
  207. control_plane_api/app/routers/presence.py +234 -0
  208. control_plane_api/app/routers/projects.py +987 -0
  209. control_plane_api/app/routers/runners.py +379 -0
  210. control_plane_api/app/routers/runtimes.py +172 -0
  211. control_plane_api/app/routers/secrets.py +171 -0
  212. control_plane_api/app/routers/skills.py +1010 -0
  213. control_plane_api/app/routers/skills_definitions.py +140 -0
  214. control_plane_api/app/routers/storage.py +456 -0
  215. control_plane_api/app/routers/task_planning.py +611 -0
  216. control_plane_api/app/routers/task_queues.py +650 -0
  217. control_plane_api/app/routers/team_context.py +274 -0
  218. control_plane_api/app/routers/teams.py +1747 -0
  219. control_plane_api/app/routers/templates.py +248 -0
  220. control_plane_api/app/routers/traces.py +571 -0
  221. control_plane_api/app/routers/websocket_client.py +479 -0
  222. control_plane_api/app/routers/websocket_executions_status.py +437 -0
  223. control_plane_api/app/routers/websocket_gateway.py +323 -0
  224. control_plane_api/app/routers/websocket_traces.py +576 -0
  225. control_plane_api/app/routers/worker_queues.py +2555 -0
  226. control_plane_api/app/routers/worker_websocket.py +419 -0
  227. control_plane_api/app/routers/workers.py +1004 -0
  228. control_plane_api/app/routers/workflows.py +204 -0
  229. control_plane_api/app/runtimes/__init__.py +6 -0
  230. control_plane_api/app/runtimes/validation.py +344 -0
  231. control_plane_api/app/schemas/__init__.py +1 -0
  232. control_plane_api/app/schemas/job_schemas.py +302 -0
  233. control_plane_api/app/schemas/mcp_schemas.py +311 -0
  234. control_plane_api/app/schemas/template_schemas.py +133 -0
  235. control_plane_api/app/schemas/trace_schemas.py +168 -0
  236. control_plane_api/app/schemas/worker_queue_observability_schemas.py +165 -0
  237. control_plane_api/app/services/__init__.py +1 -0
  238. control_plane_api/app/services/agno_planning_strategy.py +233 -0
  239. control_plane_api/app/services/agno_service.py +838 -0
  240. control_plane_api/app/services/claude_code_planning_service.py +203 -0
  241. control_plane_api/app/services/context_graph_client.py +224 -0
  242. control_plane_api/app/services/custom_integration_service.py +415 -0
  243. control_plane_api/app/services/integration_resolution_service.py +345 -0
  244. control_plane_api/app/services/litellm_service.py +394 -0
  245. control_plane_api/app/services/plan_generator.py +79 -0
  246. control_plane_api/app/services/planning_strategy.py +66 -0
  247. control_plane_api/app/services/planning_strategy_factory.py +118 -0
  248. control_plane_api/app/services/policy_service.py +615 -0
  249. control_plane_api/app/services/state_transition_service.py +755 -0
  250. control_plane_api/app/services/storage_service.py +593 -0
  251. control_plane_api/app/services/temporal_cloud_provisioning.py +150 -0
  252. control_plane_api/app/services/toolsets/context_graph_skill.py +432 -0
  253. control_plane_api/app/services/trace_retention.py +354 -0
  254. control_plane_api/app/services/worker_queue_metrics_service.py +190 -0
  255. control_plane_api/app/services/workflow_cancellation_manager.py +135 -0
  256. control_plane_api/app/services/workflow_operations_service.py +611 -0
  257. control_plane_api/app/skills/__init__.py +100 -0
  258. control_plane_api/app/skills/base.py +239 -0
  259. control_plane_api/app/skills/builtin/__init__.py +37 -0
  260. control_plane_api/app/skills/builtin/agent_communication/__init__.py +8 -0
  261. control_plane_api/app/skills/builtin/agent_communication/skill.py +246 -0
  262. control_plane_api/app/skills/builtin/code_ingestion/__init__.py +4 -0
  263. control_plane_api/app/skills/builtin/code_ingestion/skill.py +267 -0
  264. control_plane_api/app/skills/builtin/cognitive_memory/__init__.py +4 -0
  265. control_plane_api/app/skills/builtin/cognitive_memory/skill.py +174 -0
  266. control_plane_api/app/skills/builtin/contextual_awareness/__init__.py +4 -0
  267. control_plane_api/app/skills/builtin/contextual_awareness/skill.py +387 -0
  268. control_plane_api/app/skills/builtin/data_visualization/__init__.py +4 -0
  269. control_plane_api/app/skills/builtin/data_visualization/skill.py +154 -0
  270. control_plane_api/app/skills/builtin/docker/__init__.py +4 -0
  271. control_plane_api/app/skills/builtin/docker/skill.py +104 -0
  272. control_plane_api/app/skills/builtin/file_generation/__init__.py +4 -0
  273. control_plane_api/app/skills/builtin/file_generation/skill.py +94 -0
  274. control_plane_api/app/skills/builtin/file_system/__init__.py +4 -0
  275. control_plane_api/app/skills/builtin/file_system/skill.py +110 -0
  276. control_plane_api/app/skills/builtin/knowledge_api/__init__.py +5 -0
  277. control_plane_api/app/skills/builtin/knowledge_api/skill.py +124 -0
  278. control_plane_api/app/skills/builtin/python/__init__.py +4 -0
  279. control_plane_api/app/skills/builtin/python/skill.py +92 -0
  280. control_plane_api/app/skills/builtin/remote_filesystem/__init__.py +5 -0
  281. control_plane_api/app/skills/builtin/remote_filesystem/skill.py +170 -0
  282. control_plane_api/app/skills/builtin/shell/__init__.py +4 -0
  283. control_plane_api/app/skills/builtin/shell/skill.py +161 -0
  284. control_plane_api/app/skills/builtin/slack/__init__.py +3 -0
  285. control_plane_api/app/skills/builtin/slack/skill.py +302 -0
  286. control_plane_api/app/skills/builtin/workflow_executor/__init__.py +4 -0
  287. control_plane_api/app/skills/builtin/workflow_executor/skill.py +469 -0
  288. control_plane_api/app/skills/business_intelligence.py +189 -0
  289. control_plane_api/app/skills/config.py +63 -0
  290. control_plane_api/app/skills/loaders/__init__.py +14 -0
  291. control_plane_api/app/skills/loaders/base.py +73 -0
  292. control_plane_api/app/skills/loaders/filesystem_loader.py +199 -0
  293. control_plane_api/app/skills/registry.py +125 -0
  294. control_plane_api/app/utils/helpers.py +12 -0
  295. control_plane_api/app/utils/workflow_executor.py +354 -0
  296. control_plane_api/app/workflows/__init__.py +11 -0
  297. control_plane_api/app/workflows/agent_execution.py +520 -0
  298. control_plane_api/app/workflows/agent_execution_with_skills.py +223 -0
  299. control_plane_api/app/workflows/namespace_provisioning.py +326 -0
  300. control_plane_api/app/workflows/plan_generation.py +254 -0
  301. control_plane_api/app/workflows/team_execution.py +442 -0
  302. control_plane_api/scripts/seed_models.py +240 -0
  303. control_plane_api/scripts/validate_existing_tool_names.py +492 -0
  304. control_plane_api/shared/__init__.py +8 -0
  305. control_plane_api/shared/version.py +17 -0
  306. control_plane_api/test_deduplication.py +274 -0
  307. control_plane_api/test_executor_deduplication_e2e.py +309 -0
  308. control_plane_api/test_job_execution_e2e.py +283 -0
  309. control_plane_api/test_real_integration.py +193 -0
  310. control_plane_api/version.py +38 -0
  311. control_plane_api/worker/__init__.py +0 -0
  312. control_plane_api/worker/activities/__init__.py +0 -0
  313. control_plane_api/worker/activities/agent_activities.py +1585 -0
  314. control_plane_api/worker/activities/approval_activities.py +234 -0
  315. control_plane_api/worker/activities/job_activities.py +199 -0
  316. control_plane_api/worker/activities/runtime_activities.py +1167 -0
  317. control_plane_api/worker/activities/skill_activities.py +282 -0
  318. control_plane_api/worker/activities/team_activities.py +479 -0
  319. control_plane_api/worker/agent_runtime_server.py +370 -0
  320. control_plane_api/worker/binary_manager.py +333 -0
  321. control_plane_api/worker/config/__init__.py +31 -0
  322. control_plane_api/worker/config/worker_config.py +273 -0
  323. control_plane_api/worker/control_plane_client.py +1491 -0
  324. control_plane_api/worker/examples/analytics_integration_example.py +362 -0
  325. control_plane_api/worker/health_monitor.py +159 -0
  326. control_plane_api/worker/metrics.py +237 -0
  327. control_plane_api/worker/models/__init__.py +1 -0
  328. control_plane_api/worker/models/error_events.py +105 -0
  329. control_plane_api/worker/models/inputs.py +89 -0
  330. control_plane_api/worker/runtimes/__init__.py +35 -0
  331. control_plane_api/worker/runtimes/agent_runtime/runtime.py +485 -0
  332. control_plane_api/worker/runtimes/agno/__init__.py +34 -0
  333. control_plane_api/worker/runtimes/agno/config.py +248 -0
  334. control_plane_api/worker/runtimes/agno/hooks.py +385 -0
  335. control_plane_api/worker/runtimes/agno/mcp_builder.py +195 -0
  336. control_plane_api/worker/runtimes/agno/runtime.py +1063 -0
  337. control_plane_api/worker/runtimes/agno/utils.py +163 -0
  338. control_plane_api/worker/runtimes/base.py +979 -0
  339. control_plane_api/worker/runtimes/claude_code/__init__.py +38 -0
  340. control_plane_api/worker/runtimes/claude_code/cleanup.py +184 -0
  341. control_plane_api/worker/runtimes/claude_code/client_pool.py +529 -0
  342. control_plane_api/worker/runtimes/claude_code/config.py +829 -0
  343. control_plane_api/worker/runtimes/claude_code/hooks.py +482 -0
  344. control_plane_api/worker/runtimes/claude_code/litellm_proxy.py +1702 -0
  345. control_plane_api/worker/runtimes/claude_code/mcp_builder.py +467 -0
  346. control_plane_api/worker/runtimes/claude_code/mcp_discovery.py +558 -0
  347. control_plane_api/worker/runtimes/claude_code/runtime.py +1546 -0
  348. control_plane_api/worker/runtimes/claude_code/tool_mapper.py +403 -0
  349. control_plane_api/worker/runtimes/claude_code/utils.py +149 -0
  350. control_plane_api/worker/runtimes/factory.py +173 -0
  351. control_plane_api/worker/runtimes/model_utils.py +107 -0
  352. control_plane_api/worker/runtimes/validation.py +93 -0
  353. control_plane_api/worker/services/__init__.py +1 -0
  354. control_plane_api/worker/services/agent_communication_tools.py +908 -0
  355. control_plane_api/worker/services/agent_executor.py +485 -0
  356. control_plane_api/worker/services/agent_executor_v2.py +793 -0
  357. control_plane_api/worker/services/analytics_collector.py +457 -0
  358. control_plane_api/worker/services/analytics_service.py +464 -0
  359. control_plane_api/worker/services/approval_tools.py +310 -0
  360. control_plane_api/worker/services/approval_tools_agno.py +207 -0
  361. control_plane_api/worker/services/cancellation_manager.py +177 -0
  362. control_plane_api/worker/services/code_ingestion_tools.py +465 -0
  363. control_plane_api/worker/services/contextual_awareness_tools.py +405 -0
  364. control_plane_api/worker/services/data_visualization.py +834 -0
  365. control_plane_api/worker/services/event_publisher.py +531 -0
  366. control_plane_api/worker/services/jira_tools.py +257 -0
  367. control_plane_api/worker/services/remote_filesystem_tools.py +498 -0
  368. control_plane_api/worker/services/runtime_analytics.py +328 -0
  369. control_plane_api/worker/services/session_service.py +365 -0
  370. control_plane_api/worker/services/skill_context_enhancement.py +181 -0
  371. control_plane_api/worker/services/skill_factory.py +471 -0
  372. control_plane_api/worker/services/system_prompt_enhancement.py +410 -0
  373. control_plane_api/worker/services/team_executor.py +715 -0
  374. control_plane_api/worker/services/team_executor_v2.py +1866 -0
  375. control_plane_api/worker/services/tool_enforcement.py +254 -0
  376. control_plane_api/worker/services/workflow_executor/__init__.py +52 -0
  377. control_plane_api/worker/services/workflow_executor/event_processor.py +287 -0
  378. control_plane_api/worker/services/workflow_executor/event_publisher.py +210 -0
  379. control_plane_api/worker/services/workflow_executor/executors/__init__.py +15 -0
  380. control_plane_api/worker/services/workflow_executor/executors/base.py +270 -0
  381. control_plane_api/worker/services/workflow_executor/executors/json_executor.py +50 -0
  382. control_plane_api/worker/services/workflow_executor/executors/python_executor.py +50 -0
  383. control_plane_api/worker/services/workflow_executor/models.py +142 -0
  384. control_plane_api/worker/services/workflow_executor_tools.py +1748 -0
  385. control_plane_api/worker/skills/__init__.py +12 -0
  386. control_plane_api/worker/skills/builtin/context_graph_search/README.md +213 -0
  387. control_plane_api/worker/skills/builtin/context_graph_search/__init__.py +5 -0
  388. control_plane_api/worker/skills/builtin/context_graph_search/agno_impl.py +808 -0
  389. control_plane_api/worker/skills/builtin/context_graph_search/skill.yaml +67 -0
  390. control_plane_api/worker/skills/builtin/contextual_awareness/__init__.py +4 -0
  391. control_plane_api/worker/skills/builtin/contextual_awareness/agno_impl.py +62 -0
  392. control_plane_api/worker/skills/builtin/data_visualization/agno_impl.py +18 -0
  393. control_plane_api/worker/skills/builtin/data_visualization/skill.yaml +84 -0
  394. control_plane_api/worker/skills/builtin/docker/agno_impl.py +65 -0
  395. control_plane_api/worker/skills/builtin/docker/skill.yaml +60 -0
  396. control_plane_api/worker/skills/builtin/file_generation/agno_impl.py +47 -0
  397. control_plane_api/worker/skills/builtin/file_generation/skill.yaml +64 -0
  398. control_plane_api/worker/skills/builtin/file_system/agno_impl.py +32 -0
  399. control_plane_api/worker/skills/builtin/file_system/skill.yaml +54 -0
  400. control_plane_api/worker/skills/builtin/knowledge_api/__init__.py +4 -0
  401. control_plane_api/worker/skills/builtin/knowledge_api/agno_impl.py +50 -0
  402. control_plane_api/worker/skills/builtin/knowledge_api/skill.yaml +66 -0
  403. control_plane_api/worker/skills/builtin/python/agno_impl.py +25 -0
  404. control_plane_api/worker/skills/builtin/python/skill.yaml +60 -0
  405. control_plane_api/worker/skills/builtin/schema_fix_mixin.py +260 -0
  406. control_plane_api/worker/skills/builtin/shell/agno_impl.py +31 -0
  407. control_plane_api/worker/skills/builtin/shell/skill.yaml +60 -0
  408. control_plane_api/worker/skills/builtin/slack/__init__.py +3 -0
  409. control_plane_api/worker/skills/builtin/slack/agno_impl.py +1282 -0
  410. control_plane_api/worker/skills/builtin/slack/skill.yaml +276 -0
  411. control_plane_api/worker/skills/builtin/workflow_executor/agno_impl.py +62 -0
  412. control_plane_api/worker/skills/builtin/workflow_executor/skill.yaml +79 -0
  413. control_plane_api/worker/skills/loaders/__init__.py +5 -0
  414. control_plane_api/worker/skills/loaders/base.py +23 -0
  415. control_plane_api/worker/skills/loaders/filesystem_loader.py +357 -0
  416. control_plane_api/worker/skills/registry.py +208 -0
  417. control_plane_api/worker/tests/__init__.py +1 -0
  418. control_plane_api/worker/tests/conftest.py +12 -0
  419. control_plane_api/worker/tests/e2e/__init__.py +0 -0
  420. control_plane_api/worker/tests/e2e/test_context_graph_real_api.py +338 -0
  421. control_plane_api/worker/tests/e2e/test_context_graph_templates_e2e.py +523 -0
  422. control_plane_api/worker/tests/e2e/test_enforcement_e2e.py +344 -0
  423. control_plane_api/worker/tests/e2e/test_execution_flow.py +571 -0
  424. control_plane_api/worker/tests/e2e/test_single_execution_mode.py +656 -0
  425. control_plane_api/worker/tests/integration/__init__.py +0 -0
  426. control_plane_api/worker/tests/integration/test_builtin_skills_fixes.py +245 -0
  427. control_plane_api/worker/tests/integration/test_context_graph_search_integration.py +365 -0
  428. control_plane_api/worker/tests/integration/test_control_plane_integration.py +308 -0
  429. control_plane_api/worker/tests/integration/test_hook_enforcement_integration.py +579 -0
  430. control_plane_api/worker/tests/integration/test_scheduled_job_workflow.py +237 -0
  431. control_plane_api/worker/tests/integration/test_system_prompt_enhancement_integration.py +343 -0
  432. control_plane_api/worker/tests/unit/__init__.py +0 -0
  433. control_plane_api/worker/tests/unit/test_builtin_skill_autoload.py +396 -0
  434. control_plane_api/worker/tests/unit/test_context_graph_search.py +450 -0
  435. control_plane_api/worker/tests/unit/test_context_graph_templates.py +403 -0
  436. control_plane_api/worker/tests/unit/test_control_plane_client.py +401 -0
  437. control_plane_api/worker/tests/unit/test_control_plane_client_jobs.py +345 -0
  438. control_plane_api/worker/tests/unit/test_job_activities.py +353 -0
  439. control_plane_api/worker/tests/unit/test_skill_context_enhancement.py +321 -0
  440. control_plane_api/worker/tests/unit/test_system_prompt_enhancement.py +415 -0
  441. control_plane_api/worker/tests/unit/test_tool_enforcement.py +324 -0
  442. control_plane_api/worker/utils/__init__.py +1 -0
  443. control_plane_api/worker/utils/chunk_batcher.py +330 -0
  444. control_plane_api/worker/utils/environment.py +65 -0
  445. control_plane_api/worker/utils/error_publisher.py +260 -0
  446. control_plane_api/worker/utils/event_batcher.py +256 -0
  447. control_plane_api/worker/utils/logging_config.py +335 -0
  448. control_plane_api/worker/utils/logging_helper.py +326 -0
  449. control_plane_api/worker/utils/parameter_validator.py +120 -0
  450. control_plane_api/worker/utils/retry_utils.py +60 -0
  451. control_plane_api/worker/utils/streaming_utils.py +665 -0
  452. control_plane_api/worker/utils/tool_validation.py +332 -0
  453. control_plane_api/worker/utils/workspace_manager.py +163 -0
  454. control_plane_api/worker/websocket_client.py +393 -0
  455. control_plane_api/worker/worker.py +1297 -0
  456. control_plane_api/worker/workflows/__init__.py +0 -0
  457. control_plane_api/worker/workflows/agent_execution.py +909 -0
  458. control_plane_api/worker/workflows/scheduled_job_wrapper.py +332 -0
  459. control_plane_api/worker/workflows/team_execution.py +611 -0
  460. kubiya_control_plane_api-0.9.15.dist-info/METADATA +354 -0
  461. kubiya_control_plane_api-0.9.15.dist-info/RECORD +479 -0
  462. kubiya_control_plane_api-0.9.15.dist-info/WHEEL +5 -0
  463. kubiya_control_plane_api-0.9.15.dist-info/entry_points.txt +5 -0
  464. kubiya_control_plane_api-0.9.15.dist-info/licenses/LICENSE +676 -0
  465. kubiya_control_plane_api-0.9.15.dist-info/top_level.txt +3 -0
  466. scripts/__init__.py +1 -0
  467. scripts/migrations.py +39 -0
  468. scripts/seed_worker_queues.py +128 -0
  469. scripts/setup_agent_runtime.py +142 -0
  470. worker_internal/__init__.py +1 -0
  471. worker_internal/planner/__init__.py +1 -0
  472. worker_internal/planner/activities.py +1499 -0
  473. worker_internal/planner/agent_tools.py +197 -0
  474. worker_internal/planner/event_models.py +148 -0
  475. worker_internal/planner/event_publisher.py +67 -0
  476. worker_internal/planner/models.py +199 -0
  477. worker_internal/planner/retry_logic.py +134 -0
  478. worker_internal/planner/worker.py +300 -0
  479. worker_internal/planner/workflows.py +970 -0
@@ -0,0 +1,517 @@
1
+ """
2
+ Prometheus metrics endpoint for Kubiya Control Plane.
3
+
4
+ This router exposes the /metrics endpoint for Prometheus scraping.
5
+ All gauge metrics are calculated on-the-fly from the database when
6
+ the endpoint is called, ensuring fresh data on every scrape.
7
+
8
+ Metrics collected:
9
+ - Active tasks by type and status
10
+ - Task failures by type
11
+ - Execution duration by type
12
+ - Worker queue depth
13
+ - LLM requests, latency, and token usage
14
+ - Tool execution metrics
15
+ - Streaming connections
16
+ - Webhook requests
17
+ - Organization-level metrics (agents, executions)
18
+ - Scheduled jobs status
19
+ """
20
+
21
+ from datetime import datetime, timedelta
22
+ from fastapi import APIRouter, Depends
23
+ from fastapi.responses import Response
24
+ from sqlalchemy.orm import Session
25
+ from sqlalchemy import select, func, and_, case, extract
26
+ import structlog
27
+
28
+ from control_plane_api.app.database import get_db
29
+ from control_plane_api.app.observability.metrics import (
30
+ get_metrics_response,
31
+ # Original
32
+ update_active_tasks,
33
+ update_task_failures,
34
+ # HIGH priority
35
+ update_execution_duration,
36
+ update_worker_queue_depth,
37
+ update_llm_requests,
38
+ update_llm_latency,
39
+ update_llm_tokens,
40
+ # MEDIUM priority
41
+ update_streaming_connections,
42
+ update_tool_execution_duration,
43
+ update_tool_executions,
44
+ update_execution_wait_time,
45
+ update_webhook_requests,
46
+ # LOWER priority
47
+ update_executions_by_org,
48
+ update_agents_active,
49
+ update_scheduled_jobs,
50
+ update_conversation_turns,
51
+ )
52
+
53
+ logger = structlog.get_logger(__name__)
54
+
55
+ router = APIRouter(tags=["monitoring"])
56
+
57
+ # Constants
58
+ ACTIVE_STATUSES = ['pending', 'queued', 'running', 'waiting_for_input', 'paused']
59
+ EXECUTION_TYPES = ['agent', 'team', 'workflow']
60
+ TERMINAL_STATUSES = ['completed', 'failed', 'cancelled']
61
+
62
+
63
+ # ==================== ORIGINAL METRICS ====================
64
+
65
+ def _collect_active_tasks(db: Session) -> None:
66
+ """Query database for active task counts."""
67
+ try:
68
+ from control_plane_api.app.models.execution import Execution
69
+
70
+ query = (
71
+ select(
72
+ Execution.execution_type,
73
+ Execution.status,
74
+ func.count(Execution.id).label('count')
75
+ )
76
+ .where(Execution.status.in_(ACTIVE_STATUSES))
77
+ .group_by(Execution.execution_type, Execution.status)
78
+ )
79
+
80
+ result = db.execute(query)
81
+ seen = set()
82
+
83
+ for row in result.fetchall():
84
+ execution_type = row.execution_type or 'unknown'
85
+ status = row.status or 'unknown'
86
+ update_active_tasks(execution_type, status, int(row.count))
87
+ seen.add((execution_type, status))
88
+
89
+ # Reset missing combinations to zero
90
+ for et in EXECUTION_TYPES:
91
+ for st in ACTIVE_STATUSES:
92
+ if (et, st) not in seen:
93
+ update_active_tasks(et, st, 0)
94
+
95
+ except Exception as e:
96
+ logger.error("active_tasks_collection_failed", error=str(e))
97
+
98
+
99
+ def _collect_task_failures(db: Session) -> None:
100
+ """Query database for failed execution counts."""
101
+ try:
102
+ from control_plane_api.app.models.execution import Execution
103
+
104
+ query = (
105
+ select(
106
+ Execution.execution_type,
107
+ func.count(Execution.id).label('count')
108
+ )
109
+ .where(Execution.status == 'failed')
110
+ .group_by(Execution.execution_type)
111
+ )
112
+
113
+ result = db.execute(query)
114
+ seen = set()
115
+
116
+ for row in result.fetchall():
117
+ execution_type = row.execution_type or 'unknown'
118
+ update_task_failures(execution_type, int(row.count))
119
+ seen.add(execution_type)
120
+
121
+ for et in EXECUTION_TYPES:
122
+ if et not in seen:
123
+ update_task_failures(et, 0)
124
+
125
+ except Exception as e:
126
+ logger.error("task_failures_collection_failed", error=str(e))
127
+
128
+
129
+ # ==================== HIGH PRIORITY METRICS ====================
130
+
131
+ def _collect_execution_duration(db: Session) -> None:
132
+ """Calculate average execution duration for completed executions."""
133
+ try:
134
+ from control_plane_api.app.models.execution import Execution
135
+
136
+ # Average duration for terminal state executions (last 24h)
137
+ cutoff = datetime.utcnow() - timedelta(hours=24)
138
+
139
+ query = (
140
+ select(
141
+ Execution.execution_type,
142
+ Execution.status,
143
+ func.avg(
144
+ extract('epoch', Execution.completed_at) -
145
+ extract('epoch', Execution.started_at)
146
+ ).label('avg_duration')
147
+ )
148
+ .where(
149
+ and_(
150
+ Execution.status.in_(TERMINAL_STATUSES),
151
+ Execution.started_at.isnot(None),
152
+ Execution.completed_at.isnot(None),
153
+ Execution.completed_at >= cutoff,
154
+ )
155
+ )
156
+ .group_by(Execution.execution_type, Execution.status)
157
+ )
158
+
159
+ result = db.execute(query)
160
+
161
+ for row in result.fetchall():
162
+ if row.avg_duration is not None:
163
+ update_execution_duration(
164
+ row.execution_type or 'unknown',
165
+ row.status or 'unknown',
166
+ float(row.avg_duration)
167
+ )
168
+
169
+ except Exception as e:
170
+ logger.error("execution_duration_collection_failed", error=str(e))
171
+
172
+
173
+ def _collect_worker_queue_depth(db: Session) -> None:
174
+ """Count pending executions per worker queue."""
175
+ try:
176
+ from control_plane_api.app.models.execution import Execution
177
+
178
+ query = (
179
+ select(
180
+ Execution.worker_queue_id,
181
+ func.count(Execution.id).label('count')
182
+ )
183
+ .where(
184
+ and_(
185
+ Execution.status.in_(['pending', 'queued']),
186
+ Execution.worker_queue_id.isnot(None),
187
+ )
188
+ )
189
+ .group_by(Execution.worker_queue_id)
190
+ )
191
+
192
+ result = db.execute(query)
193
+
194
+ for row in result.fetchall():
195
+ queue_id = str(row.worker_queue_id) if row.worker_queue_id else 'default'
196
+ update_worker_queue_depth(queue_id, int(row.count))
197
+
198
+ except Exception as e:
199
+ logger.error("worker_queue_depth_collection_failed", error=str(e))
200
+
201
+
202
+ def _collect_llm_metrics(db: Session) -> None:
203
+ """Collect LLM request, latency, and token metrics."""
204
+ try:
205
+ from control_plane_api.app.models.analytics import ExecutionTurn
206
+
207
+ # Last 24h for performance
208
+ cutoff = datetime.utcnow() - timedelta(hours=24)
209
+
210
+ # LLM requests by model and status (success vs error based on error_message)
211
+ query = (
212
+ select(
213
+ ExecutionTurn.model,
214
+ case(
215
+ (ExecutionTurn.error_message.isnot(None), 'error'),
216
+ else_='success'
217
+ ).label('status'),
218
+ func.count(ExecutionTurn.id).label('count'),
219
+ func.avg(ExecutionTurn.duration_ms).label('avg_latency_ms'),
220
+ func.sum(ExecutionTurn.input_tokens).label('input_tokens'),
221
+ func.sum(ExecutionTurn.output_tokens).label('output_tokens'),
222
+ )
223
+ .where(ExecutionTurn.created_at >= cutoff)
224
+ .group_by(ExecutionTurn.model, 'status')
225
+ )
226
+
227
+ result = db.execute(query)
228
+
229
+ for row in result.fetchall():
230
+ model = row.model or 'unknown'
231
+ status = row.status
232
+
233
+ update_llm_requests(model, status, int(row.count))
234
+
235
+ if row.avg_latency_ms:
236
+ update_llm_latency(model, float(row.avg_latency_ms) / 1000.0)
237
+
238
+ if row.input_tokens:
239
+ update_llm_tokens(model, 'input', int(row.input_tokens))
240
+ if row.output_tokens:
241
+ update_llm_tokens(model, 'output', int(row.output_tokens))
242
+
243
+ except Exception as e:
244
+ logger.error("llm_metrics_collection_failed", error=str(e))
245
+
246
+
247
+ # ==================== MEDIUM PRIORITY METRICS ====================
248
+
249
+ def _collect_tool_metrics(db: Session) -> None:
250
+ """Collect tool execution metrics."""
251
+ try:
252
+ from control_plane_api.app.models.analytics import ExecutionToolCall
253
+
254
+ cutoff = datetime.utcnow() - timedelta(hours=24)
255
+
256
+ query = (
257
+ select(
258
+ ExecutionToolCall.tool_name,
259
+ ExecutionToolCall.success,
260
+ func.count(ExecutionToolCall.id).label('count'),
261
+ func.avg(ExecutionToolCall.duration_ms).label('avg_duration_ms'),
262
+ )
263
+ .where(ExecutionToolCall.created_at >= cutoff)
264
+ .group_by(ExecutionToolCall.tool_name, ExecutionToolCall.success)
265
+ )
266
+
267
+ result = db.execute(query)
268
+
269
+ for row in result.fetchall():
270
+ tool_name = row.tool_name or 'unknown'
271
+ status = 'success' if row.success else 'failed'
272
+
273
+ update_tool_executions(tool_name, status, int(row.count))
274
+
275
+ if row.avg_duration_ms:
276
+ update_tool_execution_duration(tool_name, float(row.avg_duration_ms) / 1000.0)
277
+
278
+ except Exception as e:
279
+ logger.error("tool_metrics_collection_failed", error=str(e))
280
+
281
+
282
+ def _collect_execution_wait_time(db: Session) -> None:
283
+ """Calculate average wait time (created_at to started_at)."""
284
+ try:
285
+ from control_plane_api.app.models.execution import Execution
286
+
287
+ cutoff = datetime.utcnow() - timedelta(hours=24)
288
+
289
+ query = (
290
+ select(
291
+ Execution.execution_type,
292
+ func.avg(
293
+ extract('epoch', Execution.started_at) -
294
+ extract('epoch', Execution.created_at)
295
+ ).label('avg_wait')
296
+ )
297
+ .where(
298
+ and_(
299
+ Execution.started_at.isnot(None),
300
+ Execution.created_at.isnot(None),
301
+ Execution.started_at >= cutoff,
302
+ )
303
+ )
304
+ .group_by(Execution.execution_type)
305
+ )
306
+
307
+ result = db.execute(query)
308
+
309
+ for row in result.fetchall():
310
+ if row.avg_wait is not None:
311
+ avg_wait = float(row.avg_wait)
312
+ if avg_wait >= 0:
313
+ update_execution_wait_time(
314
+ row.execution_type or 'unknown',
315
+ avg_wait
316
+ )
317
+
318
+ except Exception as e:
319
+ logger.error("execution_wait_time_collection_failed", error=str(e))
320
+
321
+
322
+ def _collect_webhook_metrics(db: Session) -> None:
323
+ """Count webhook-triggered executions."""
324
+ try:
325
+ from control_plane_api.app.models.execution import Execution
326
+
327
+ query = (
328
+ select(
329
+ Execution.status,
330
+ func.count(Execution.id).label('count')
331
+ )
332
+ .where(Execution.trigger_source == 'job_webhook')
333
+ .group_by(Execution.status)
334
+ )
335
+
336
+ result = db.execute(query)
337
+
338
+ for row in result.fetchall():
339
+ update_webhook_requests(row.status or 'unknown', int(row.count))
340
+
341
+ except Exception as e:
342
+ logger.error("webhook_metrics_collection_failed", error=str(e))
343
+
344
+
345
+ def _collect_streaming_connections(db: Session) -> None:
346
+ """Count currently streaming executions (proxy for active SSE connections)."""
347
+ try:
348
+ from control_plane_api.app.models.execution import Execution
349
+
350
+ # Running executions are likely streaming
351
+ query = (
352
+ select(func.count(Execution.id))
353
+ .where(Execution.status == 'running')
354
+ )
355
+
356
+ result = db.execute(query)
357
+ count = result.scalar() or 0
358
+ update_streaming_connections(count)
359
+
360
+ except Exception as e:
361
+ logger.error("streaming_connections_collection_failed", error=str(e))
362
+
363
+
364
+ # ==================== LOWER PRIORITY METRICS ====================
365
+
366
+ def _collect_executions_by_org(db: Session) -> None:
367
+ """Count total executions by organization (top 50)."""
368
+ try:
369
+ from control_plane_api.app.models.execution import Execution
370
+
371
+ query = (
372
+ select(
373
+ Execution.organization_id,
374
+ func.count(Execution.id).label('count')
375
+ )
376
+ .group_by(Execution.organization_id)
377
+ .order_by(func.count(Execution.id).desc())
378
+ .limit(50) # Limit to prevent high cardinality
379
+ )
380
+
381
+ result = db.execute(query)
382
+
383
+ for row in result.fetchall():
384
+ org_id = row.organization_id or 'unknown'
385
+ # Truncate org_id to reduce cardinality
386
+ org_id_short = org_id[:36] if len(org_id) > 36 else org_id
387
+ update_executions_by_org(org_id_short, int(row.count))
388
+
389
+ except Exception as e:
390
+ logger.error("executions_by_org_collection_failed", error=str(e))
391
+
392
+
393
+ def _collect_agents_active(db: Session) -> None:
394
+ """Count active agents by organization (top 50)."""
395
+ try:
396
+ from control_plane_api.app.models.agent import Agent
397
+
398
+ query = (
399
+ select(
400
+ Agent.organization_id,
401
+ func.count(Agent.id).label('count')
402
+ )
403
+ .where(Agent.status == 'active')
404
+ .group_by(Agent.organization_id)
405
+ .order_by(func.count(Agent.id).desc())
406
+ .limit(50)
407
+ )
408
+
409
+ result = db.execute(query)
410
+
411
+ for row in result.fetchall():
412
+ org_id = row.organization_id or 'unknown'
413
+ org_id_short = org_id[:36] if len(org_id) > 36 else org_id
414
+ update_agents_active(org_id_short, int(row.count))
415
+
416
+ except Exception as e:
417
+ logger.error("agents_active_collection_failed", error=str(e))
418
+
419
+
420
+ def _collect_scheduled_jobs(db: Session) -> None:
421
+ """Count scheduled jobs by status."""
422
+ try:
423
+ from control_plane_api.app.models.job import Job
424
+
425
+ query = (
426
+ select(
427
+ Job.status,
428
+ func.count(Job.id).label('count')
429
+ )
430
+ .group_by(Job.status)
431
+ )
432
+
433
+ result = db.execute(query)
434
+
435
+ for row in result.fetchall():
436
+ update_scheduled_jobs(row.status or 'unknown', int(row.count))
437
+
438
+ except Exception as e:
439
+ logger.error("scheduled_jobs_collection_failed", error=str(e))
440
+
441
+
442
+ def _collect_conversation_turns(db: Session) -> None:
443
+ """Count total conversation turns by execution type."""
444
+ try:
445
+ from control_plane_api.app.models.analytics import ExecutionTurn
446
+ from control_plane_api.app.models.execution import Execution
447
+
448
+ query = (
449
+ select(
450
+ Execution.execution_type,
451
+ func.count(ExecutionTurn.id).label('count')
452
+ )
453
+ .join(Execution, ExecutionTurn.execution_id == Execution.id)
454
+ .group_by(Execution.execution_type)
455
+ )
456
+
457
+ result = db.execute(query)
458
+
459
+ for row in result.fetchall():
460
+ update_conversation_turns(row.execution_type or 'unknown', int(row.count))
461
+
462
+ except Exception as e:
463
+ logger.error("conversation_turns_collection_failed", error=str(e))
464
+
465
+
466
+ # ==================== ENDPOINT ====================
467
+
468
+ @router.get("/metrics", include_in_schema=False)
469
+ async def prometheus_metrics(db: Session = Depends(get_db)) -> Response:
470
+ """
471
+ Prometheus metrics endpoint.
472
+
473
+ Returns all application metrics in Prometheus text format.
474
+ All gauge metrics are calculated on-the-fly from the database.
475
+ """
476
+ try:
477
+ # Original metrics
478
+ _collect_active_tasks(db)
479
+ _collect_task_failures(db)
480
+
481
+ # HIGH priority metrics
482
+ _collect_execution_duration(db)
483
+ _collect_worker_queue_depth(db)
484
+ _collect_llm_metrics(db)
485
+
486
+ # MEDIUM priority metrics
487
+ _collect_tool_metrics(db)
488
+ _collect_execution_wait_time(db)
489
+ _collect_webhook_metrics(db)
490
+ _collect_streaming_connections(db)
491
+
492
+ # LOWER priority metrics
493
+ _collect_executions_by_org(db)
494
+ _collect_agents_active(db)
495
+ _collect_scheduled_jobs(db)
496
+ _collect_conversation_turns(db)
497
+
498
+ # Generate response
499
+ output, content_type = get_metrics_response()
500
+
501
+ return Response(
502
+ content=output,
503
+ media_type=content_type,
504
+ headers={
505
+ "Cache-Control": "no-cache, no-store, must-revalidate",
506
+ "Pragma": "no-cache",
507
+ "Expires": "0",
508
+ }
509
+ )
510
+
511
+ except Exception as e:
512
+ logger.error("metrics_endpoint_failed", error=str(e))
513
+ return Response(
514
+ content=f"# Error: {str(e)}\n".encode('utf-8'),
515
+ media_type="text/plain; charset=utf-8",
516
+ status_code=500,
517
+ )
@@ -0,0 +1,82 @@
1
+ """
2
+ API router for LLM models configuration
3
+ """
4
+ from fastapi import APIRouter
5
+ from typing import List, Optional
6
+ from pydantic import BaseModel
7
+
8
+ router = APIRouter(prefix="/api/v1/models", tags=["models"])
9
+
10
+
11
+ class LLMModel(BaseModel):
12
+ """LLM Model configuration"""
13
+ value: str
14
+ label: str
15
+ provider: str
16
+ logo: str
17
+ recommended: bool = False
18
+ description: Optional[str] = None
19
+
20
+
21
+ # Kubiya's supported LLM models
22
+ # NOTE: All models must include "kubiya/" prefix for LiteLLM routing
23
+ KUBIYA_LLM_MODELS = [
24
+ LLMModel(
25
+ value="kubiya/claude-sonnet-4",
26
+ label="Claude Sonnet 4",
27
+ provider="Anthropic",
28
+ logo="/logos/claude-color.svg",
29
+ recommended=True,
30
+ description="Most intelligent model with best reasoning capabilities"
31
+ ),
32
+ LLMModel(
33
+ value="kubiya/claude-opus-4",
34
+ label="Claude Opus 4",
35
+ provider="Anthropic",
36
+ logo="/logos/claude-color.svg",
37
+ description="Powerful model for complex tasks requiring deep analysis"
38
+ ),
39
+ LLMModel(
40
+ value="kubiya/gpt-4o",
41
+ label="GPT-4o",
42
+ provider="OpenAI",
43
+ logo="/thirdparty/logos/openai.svg",
44
+ description="Fast and capable model with vision support"
45
+ ),
46
+ LLMModel(
47
+ value="kubiya/gpt-4-turbo",
48
+ label="GPT-4 Turbo",
49
+ provider="OpenAI",
50
+ logo="/thirdparty/logos/openai.svg",
51
+ description="Enhanced GPT-4 with improved speed and capabilities"
52
+ ),
53
+ LLMModel(
54
+ value="kubiya/claude-3-5-sonnet-20241022",
55
+ label="Claude 3.5 Sonnet",
56
+ provider="Anthropic",
57
+ logo="/logos/claude-color.svg",
58
+ description="Previous generation Sonnet with excellent performance"
59
+ ),
60
+ ]
61
+
62
+
63
+ @router.get("", response_model=List[LLMModel])
64
+ async def list_models():
65
+ """
66
+ Get list of available LLM models.
67
+
68
+ Returns:
69
+ List of LLM model configurations with logos and metadata
70
+ """
71
+ return KUBIYA_LLM_MODELS
72
+
73
+
74
+ @router.get("/default", response_model=LLMModel)
75
+ async def get_default_model():
76
+ """
77
+ Get the default recommended LLM model.
78
+
79
+ Returns:
80
+ The recommended default model configuration
81
+ """
82
+ return next((model for model in KUBIYA_LLM_MODELS if model.recommended), KUBIYA_LLM_MODELS[0])