kubiya-control-plane-api 0.9.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (479) hide show
  1. control_plane_api/LICENSE +676 -0
  2. control_plane_api/README.md +350 -0
  3. control_plane_api/__init__.py +4 -0
  4. control_plane_api/__version__.py +8 -0
  5. control_plane_api/alembic/README +1 -0
  6. control_plane_api/alembic/env.py +121 -0
  7. control_plane_api/alembic/script.py.mako +28 -0
  8. control_plane_api/alembic/versions/2613c65c3dbe_initial_database_setup.py +32 -0
  9. control_plane_api/alembic/versions/2df520d4927d_merge_heads.py +28 -0
  10. control_plane_api/alembic/versions/43abf98d6a01_add_paused_status_to_executions.py +73 -0
  11. control_plane_api/alembic/versions/6289854264cb_merge_multiple_heads.py +28 -0
  12. control_plane_api/alembic/versions/6a4d4dc3d8dc_generate_execution_transitions.py +50 -0
  13. control_plane_api/alembic/versions/87d11cf0a783_add_disconnected_status_to_worker_.py +44 -0
  14. control_plane_api/alembic/versions/add_ephemeral_queue_support.py +85 -0
  15. control_plane_api/alembic/versions/add_model_type_to_llm_models.py +31 -0
  16. control_plane_api/alembic/versions/add_plan_executions_table.py +114 -0
  17. control_plane_api/alembic/versions/add_trace_span_tables.py +154 -0
  18. control_plane_api/alembic/versions/add_user_info_to_traces.py +36 -0
  19. control_plane_api/alembic/versions/adjusting_foreign_keys.py +32 -0
  20. control_plane_api/alembic/versions/b4983d976db2_initial_tables.py +1128 -0
  21. control_plane_api/alembic/versions/d181a3b40e71_rename_custom_metadata_to_metadata_in_.py +50 -0
  22. control_plane_api/alembic/versions/df9117888e82_add_missing_columns.py +82 -0
  23. control_plane_api/alembic/versions/f25de6ad895a_missing_migrations.py +34 -0
  24. control_plane_api/alembic/versions/f71305fb69b9_fix_ephemeral_queue_deletion_foreign_key.py +54 -0
  25. control_plane_api/alembic/versions/mark_local_exec_queues_as_ephemeral.py +68 -0
  26. control_plane_api/alembic.ini +148 -0
  27. control_plane_api/api/index.py +12 -0
  28. control_plane_api/app/__init__.py +11 -0
  29. control_plane_api/app/activities/__init__.py +20 -0
  30. control_plane_api/app/activities/agent_activities.py +384 -0
  31. control_plane_api/app/activities/plan_generation_activities.py +499 -0
  32. control_plane_api/app/activities/team_activities.py +424 -0
  33. control_plane_api/app/activities/temporal_cloud_activities.py +588 -0
  34. control_plane_api/app/config/__init__.py +35 -0
  35. control_plane_api/app/config/api_config.py +469 -0
  36. control_plane_api/app/config/config_loader.py +224 -0
  37. control_plane_api/app/config/model_pricing.py +323 -0
  38. control_plane_api/app/config/storage_config.py +159 -0
  39. control_plane_api/app/config.py +115 -0
  40. control_plane_api/app/controllers/__init__.py +0 -0
  41. control_plane_api/app/controllers/execution_environment_controller.py +1315 -0
  42. control_plane_api/app/database.py +135 -0
  43. control_plane_api/app/exceptions.py +408 -0
  44. control_plane_api/app/lib/__init__.py +11 -0
  45. control_plane_api/app/lib/environment.py +65 -0
  46. control_plane_api/app/lib/event_bus/__init__.py +17 -0
  47. control_plane_api/app/lib/event_bus/base.py +136 -0
  48. control_plane_api/app/lib/event_bus/manager.py +335 -0
  49. control_plane_api/app/lib/event_bus/providers/__init__.py +6 -0
  50. control_plane_api/app/lib/event_bus/providers/http_provider.py +166 -0
  51. control_plane_api/app/lib/event_bus/providers/nats_provider.py +324 -0
  52. control_plane_api/app/lib/event_bus/providers/redis_provider.py +233 -0
  53. control_plane_api/app/lib/event_bus/providers/websocket_provider.py +497 -0
  54. control_plane_api/app/lib/job_executor.py +330 -0
  55. control_plane_api/app/lib/kubiya_client.py +293 -0
  56. control_plane_api/app/lib/litellm_pricing.py +166 -0
  57. control_plane_api/app/lib/mcp_validation.py +163 -0
  58. control_plane_api/app/lib/nats/__init__.py +13 -0
  59. control_plane_api/app/lib/nats/credentials_manager.py +288 -0
  60. control_plane_api/app/lib/nats/listener.py +374 -0
  61. control_plane_api/app/lib/planning_prompt_builder.py +153 -0
  62. control_plane_api/app/lib/planning_tools/__init__.py +41 -0
  63. control_plane_api/app/lib/planning_tools/agents.py +409 -0
  64. control_plane_api/app/lib/planning_tools/agno_toolkit.py +836 -0
  65. control_plane_api/app/lib/planning_tools/base.py +119 -0
  66. control_plane_api/app/lib/planning_tools/cognitive_memory_tools.py +403 -0
  67. control_plane_api/app/lib/planning_tools/context_graph_tools.py +545 -0
  68. control_plane_api/app/lib/planning_tools/environments.py +218 -0
  69. control_plane_api/app/lib/planning_tools/knowledge.py +204 -0
  70. control_plane_api/app/lib/planning_tools/models.py +93 -0
  71. control_plane_api/app/lib/planning_tools/planning_service.py +646 -0
  72. control_plane_api/app/lib/planning_tools/resources.py +242 -0
  73. control_plane_api/app/lib/planning_tools/teams.py +334 -0
  74. control_plane_api/app/lib/policy_enforcer_client.py +1016 -0
  75. control_plane_api/app/lib/redis_client.py +803 -0
  76. control_plane_api/app/lib/sqlalchemy_utils.py +486 -0
  77. control_plane_api/app/lib/state_transition_tools/__init__.py +7 -0
  78. control_plane_api/app/lib/state_transition_tools/execution_context.py +388 -0
  79. control_plane_api/app/lib/storage/__init__.py +20 -0
  80. control_plane_api/app/lib/storage/base_provider.py +274 -0
  81. control_plane_api/app/lib/storage/provider_factory.py +157 -0
  82. control_plane_api/app/lib/storage/vercel_blob_provider.py +468 -0
  83. control_plane_api/app/lib/supabase.py +71 -0
  84. control_plane_api/app/lib/supabase_utils.py +138 -0
  85. control_plane_api/app/lib/task_planning/__init__.py +138 -0
  86. control_plane_api/app/lib/task_planning/agent_factory.py +308 -0
  87. control_plane_api/app/lib/task_planning/agents.py +389 -0
  88. control_plane_api/app/lib/task_planning/cache.py +218 -0
  89. control_plane_api/app/lib/task_planning/entity_resolver.py +273 -0
  90. control_plane_api/app/lib/task_planning/helpers.py +293 -0
  91. control_plane_api/app/lib/task_planning/hooks.py +474 -0
  92. control_plane_api/app/lib/task_planning/models.py +503 -0
  93. control_plane_api/app/lib/task_planning/plan_validator.py +166 -0
  94. control_plane_api/app/lib/task_planning/planning_workflow.py +2911 -0
  95. control_plane_api/app/lib/task_planning/runner.py +656 -0
  96. control_plane_api/app/lib/task_planning/streaming_hook.py +213 -0
  97. control_plane_api/app/lib/task_planning/workflow.py +424 -0
  98. control_plane_api/app/lib/templating/__init__.py +88 -0
  99. control_plane_api/app/lib/templating/compiler.py +278 -0
  100. control_plane_api/app/lib/templating/engine.py +178 -0
  101. control_plane_api/app/lib/templating/parsers/__init__.py +29 -0
  102. control_plane_api/app/lib/templating/parsers/base.py +96 -0
  103. control_plane_api/app/lib/templating/parsers/env.py +85 -0
  104. control_plane_api/app/lib/templating/parsers/graph.py +112 -0
  105. control_plane_api/app/lib/templating/parsers/secret.py +87 -0
  106. control_plane_api/app/lib/templating/parsers/simple.py +81 -0
  107. control_plane_api/app/lib/templating/resolver.py +366 -0
  108. control_plane_api/app/lib/templating/types.py +214 -0
  109. control_plane_api/app/lib/templating/validator.py +201 -0
  110. control_plane_api/app/lib/temporal_client.py +232 -0
  111. control_plane_api/app/lib/temporal_credentials_cache.py +178 -0
  112. control_plane_api/app/lib/temporal_credentials_service.py +203 -0
  113. control_plane_api/app/lib/validation/__init__.py +24 -0
  114. control_plane_api/app/lib/validation/runtime_validation.py +388 -0
  115. control_plane_api/app/main.py +531 -0
  116. control_plane_api/app/middleware/__init__.py +10 -0
  117. control_plane_api/app/middleware/auth.py +645 -0
  118. control_plane_api/app/middleware/exception_handler.py +267 -0
  119. control_plane_api/app/middleware/prometheus_middleware.py +173 -0
  120. control_plane_api/app/middleware/rate_limiting.py +384 -0
  121. control_plane_api/app/middleware/request_id.py +202 -0
  122. control_plane_api/app/models/__init__.py +40 -0
  123. control_plane_api/app/models/agent.py +90 -0
  124. control_plane_api/app/models/analytics.py +206 -0
  125. control_plane_api/app/models/associations.py +107 -0
  126. control_plane_api/app/models/auth_user.py +73 -0
  127. control_plane_api/app/models/context.py +161 -0
  128. control_plane_api/app/models/custom_integration.py +99 -0
  129. control_plane_api/app/models/environment.py +64 -0
  130. control_plane_api/app/models/execution.py +125 -0
  131. control_plane_api/app/models/execution_transition.py +50 -0
  132. control_plane_api/app/models/job.py +159 -0
  133. control_plane_api/app/models/llm_model.py +78 -0
  134. control_plane_api/app/models/orchestration.py +66 -0
  135. control_plane_api/app/models/plan_execution.py +102 -0
  136. control_plane_api/app/models/presence.py +49 -0
  137. control_plane_api/app/models/project.py +61 -0
  138. control_plane_api/app/models/project_management.py +85 -0
  139. control_plane_api/app/models/session.py +29 -0
  140. control_plane_api/app/models/skill.py +155 -0
  141. control_plane_api/app/models/system_tables.py +43 -0
  142. control_plane_api/app/models/task_planning.py +372 -0
  143. control_plane_api/app/models/team.py +86 -0
  144. control_plane_api/app/models/trace.py +257 -0
  145. control_plane_api/app/models/user_profile.py +54 -0
  146. control_plane_api/app/models/worker.py +221 -0
  147. control_plane_api/app/models/workflow.py +161 -0
  148. control_plane_api/app/models/workspace.py +50 -0
  149. control_plane_api/app/observability/__init__.py +177 -0
  150. control_plane_api/app/observability/context_logging.py +475 -0
  151. control_plane_api/app/observability/decorators.py +337 -0
  152. control_plane_api/app/observability/local_span_processor.py +702 -0
  153. control_plane_api/app/observability/metrics.py +303 -0
  154. control_plane_api/app/observability/middleware.py +246 -0
  155. control_plane_api/app/observability/optional.py +115 -0
  156. control_plane_api/app/observability/tracing.py +382 -0
  157. control_plane_api/app/policies/README.md +149 -0
  158. control_plane_api/app/policies/approved_users.rego +62 -0
  159. control_plane_api/app/policies/business_hours.rego +51 -0
  160. control_plane_api/app/policies/rate_limiting.rego +100 -0
  161. control_plane_api/app/policies/tool_enforcement/README.md +336 -0
  162. control_plane_api/app/policies/tool_enforcement/bash_command_validation.rego +71 -0
  163. control_plane_api/app/policies/tool_enforcement/business_hours_enforcement.rego +82 -0
  164. control_plane_api/app/policies/tool_enforcement/mcp_tool_allowlist.rego +58 -0
  165. control_plane_api/app/policies/tool_enforcement/production_safeguards.rego +80 -0
  166. control_plane_api/app/policies/tool_enforcement/role_based_tool_access.rego +44 -0
  167. control_plane_api/app/policies/tool_restrictions.rego +86 -0
  168. control_plane_api/app/routers/__init__.py +4 -0
  169. control_plane_api/app/routers/agents.py +382 -0
  170. control_plane_api/app/routers/agents_v2.py +1598 -0
  171. control_plane_api/app/routers/analytics.py +1310 -0
  172. control_plane_api/app/routers/auth.py +59 -0
  173. control_plane_api/app/routers/client_config.py +57 -0
  174. control_plane_api/app/routers/context_graph.py +561 -0
  175. control_plane_api/app/routers/context_manager.py +577 -0
  176. control_plane_api/app/routers/custom_integrations.py +490 -0
  177. control_plane_api/app/routers/enforcer.py +132 -0
  178. control_plane_api/app/routers/environment_context.py +252 -0
  179. control_plane_api/app/routers/environments.py +761 -0
  180. control_plane_api/app/routers/execution_environment.py +847 -0
  181. control_plane_api/app/routers/executions/__init__.py +28 -0
  182. control_plane_api/app/routers/executions/router.py +286 -0
  183. control_plane_api/app/routers/executions/services/__init__.py +22 -0
  184. control_plane_api/app/routers/executions/services/demo_worker_health.py +156 -0
  185. control_plane_api/app/routers/executions/services/status_service.py +420 -0
  186. control_plane_api/app/routers/executions/services/test_worker_health.py +480 -0
  187. control_plane_api/app/routers/executions/services/worker_health.py +514 -0
  188. control_plane_api/app/routers/executions/streaming/__init__.py +22 -0
  189. control_plane_api/app/routers/executions/streaming/deduplication.py +352 -0
  190. control_plane_api/app/routers/executions/streaming/event_buffer.py +353 -0
  191. control_plane_api/app/routers/executions/streaming/event_formatter.py +964 -0
  192. control_plane_api/app/routers/executions/streaming/history_loader.py +588 -0
  193. control_plane_api/app/routers/executions/streaming/live_source.py +693 -0
  194. control_plane_api/app/routers/executions/streaming/streamer.py +849 -0
  195. control_plane_api/app/routers/executions.py +4888 -0
  196. control_plane_api/app/routers/health.py +165 -0
  197. control_plane_api/app/routers/health_v2.py +394 -0
  198. control_plane_api/app/routers/integration_templates.py +496 -0
  199. control_plane_api/app/routers/integrations.py +287 -0
  200. control_plane_api/app/routers/jobs.py +1809 -0
  201. control_plane_api/app/routers/metrics.py +517 -0
  202. control_plane_api/app/routers/models.py +82 -0
  203. control_plane_api/app/routers/models_v2.py +628 -0
  204. control_plane_api/app/routers/plan_executions.py +1481 -0
  205. control_plane_api/app/routers/plan_generation_async.py +304 -0
  206. control_plane_api/app/routers/policies.py +669 -0
  207. control_plane_api/app/routers/presence.py +234 -0
  208. control_plane_api/app/routers/projects.py +987 -0
  209. control_plane_api/app/routers/runners.py +379 -0
  210. control_plane_api/app/routers/runtimes.py +172 -0
  211. control_plane_api/app/routers/secrets.py +171 -0
  212. control_plane_api/app/routers/skills.py +1010 -0
  213. control_plane_api/app/routers/skills_definitions.py +140 -0
  214. control_plane_api/app/routers/storage.py +456 -0
  215. control_plane_api/app/routers/task_planning.py +611 -0
  216. control_plane_api/app/routers/task_queues.py +650 -0
  217. control_plane_api/app/routers/team_context.py +274 -0
  218. control_plane_api/app/routers/teams.py +1747 -0
  219. control_plane_api/app/routers/templates.py +248 -0
  220. control_plane_api/app/routers/traces.py +571 -0
  221. control_plane_api/app/routers/websocket_client.py +479 -0
  222. control_plane_api/app/routers/websocket_executions_status.py +437 -0
  223. control_plane_api/app/routers/websocket_gateway.py +323 -0
  224. control_plane_api/app/routers/websocket_traces.py +576 -0
  225. control_plane_api/app/routers/worker_queues.py +2555 -0
  226. control_plane_api/app/routers/worker_websocket.py +419 -0
  227. control_plane_api/app/routers/workers.py +1004 -0
  228. control_plane_api/app/routers/workflows.py +204 -0
  229. control_plane_api/app/runtimes/__init__.py +6 -0
  230. control_plane_api/app/runtimes/validation.py +344 -0
  231. control_plane_api/app/schemas/__init__.py +1 -0
  232. control_plane_api/app/schemas/job_schemas.py +302 -0
  233. control_plane_api/app/schemas/mcp_schemas.py +311 -0
  234. control_plane_api/app/schemas/template_schemas.py +133 -0
  235. control_plane_api/app/schemas/trace_schemas.py +168 -0
  236. control_plane_api/app/schemas/worker_queue_observability_schemas.py +165 -0
  237. control_plane_api/app/services/__init__.py +1 -0
  238. control_plane_api/app/services/agno_planning_strategy.py +233 -0
  239. control_plane_api/app/services/agno_service.py +838 -0
  240. control_plane_api/app/services/claude_code_planning_service.py +203 -0
  241. control_plane_api/app/services/context_graph_client.py +224 -0
  242. control_plane_api/app/services/custom_integration_service.py +415 -0
  243. control_plane_api/app/services/integration_resolution_service.py +345 -0
  244. control_plane_api/app/services/litellm_service.py +394 -0
  245. control_plane_api/app/services/plan_generator.py +79 -0
  246. control_plane_api/app/services/planning_strategy.py +66 -0
  247. control_plane_api/app/services/planning_strategy_factory.py +118 -0
  248. control_plane_api/app/services/policy_service.py +615 -0
  249. control_plane_api/app/services/state_transition_service.py +755 -0
  250. control_plane_api/app/services/storage_service.py +593 -0
  251. control_plane_api/app/services/temporal_cloud_provisioning.py +150 -0
  252. control_plane_api/app/services/toolsets/context_graph_skill.py +432 -0
  253. control_plane_api/app/services/trace_retention.py +354 -0
  254. control_plane_api/app/services/worker_queue_metrics_service.py +190 -0
  255. control_plane_api/app/services/workflow_cancellation_manager.py +135 -0
  256. control_plane_api/app/services/workflow_operations_service.py +611 -0
  257. control_plane_api/app/skills/__init__.py +100 -0
  258. control_plane_api/app/skills/base.py +239 -0
  259. control_plane_api/app/skills/builtin/__init__.py +37 -0
  260. control_plane_api/app/skills/builtin/agent_communication/__init__.py +8 -0
  261. control_plane_api/app/skills/builtin/agent_communication/skill.py +246 -0
  262. control_plane_api/app/skills/builtin/code_ingestion/__init__.py +4 -0
  263. control_plane_api/app/skills/builtin/code_ingestion/skill.py +267 -0
  264. control_plane_api/app/skills/builtin/cognitive_memory/__init__.py +4 -0
  265. control_plane_api/app/skills/builtin/cognitive_memory/skill.py +174 -0
  266. control_plane_api/app/skills/builtin/contextual_awareness/__init__.py +4 -0
  267. control_plane_api/app/skills/builtin/contextual_awareness/skill.py +387 -0
  268. control_plane_api/app/skills/builtin/data_visualization/__init__.py +4 -0
  269. control_plane_api/app/skills/builtin/data_visualization/skill.py +154 -0
  270. control_plane_api/app/skills/builtin/docker/__init__.py +4 -0
  271. control_plane_api/app/skills/builtin/docker/skill.py +104 -0
  272. control_plane_api/app/skills/builtin/file_generation/__init__.py +4 -0
  273. control_plane_api/app/skills/builtin/file_generation/skill.py +94 -0
  274. control_plane_api/app/skills/builtin/file_system/__init__.py +4 -0
  275. control_plane_api/app/skills/builtin/file_system/skill.py +110 -0
  276. control_plane_api/app/skills/builtin/knowledge_api/__init__.py +5 -0
  277. control_plane_api/app/skills/builtin/knowledge_api/skill.py +124 -0
  278. control_plane_api/app/skills/builtin/python/__init__.py +4 -0
  279. control_plane_api/app/skills/builtin/python/skill.py +92 -0
  280. control_plane_api/app/skills/builtin/remote_filesystem/__init__.py +5 -0
  281. control_plane_api/app/skills/builtin/remote_filesystem/skill.py +170 -0
  282. control_plane_api/app/skills/builtin/shell/__init__.py +4 -0
  283. control_plane_api/app/skills/builtin/shell/skill.py +161 -0
  284. control_plane_api/app/skills/builtin/slack/__init__.py +3 -0
  285. control_plane_api/app/skills/builtin/slack/skill.py +302 -0
  286. control_plane_api/app/skills/builtin/workflow_executor/__init__.py +4 -0
  287. control_plane_api/app/skills/builtin/workflow_executor/skill.py +469 -0
  288. control_plane_api/app/skills/business_intelligence.py +189 -0
  289. control_plane_api/app/skills/config.py +63 -0
  290. control_plane_api/app/skills/loaders/__init__.py +14 -0
  291. control_plane_api/app/skills/loaders/base.py +73 -0
  292. control_plane_api/app/skills/loaders/filesystem_loader.py +199 -0
  293. control_plane_api/app/skills/registry.py +125 -0
  294. control_plane_api/app/utils/helpers.py +12 -0
  295. control_plane_api/app/utils/workflow_executor.py +354 -0
  296. control_plane_api/app/workflows/__init__.py +11 -0
  297. control_plane_api/app/workflows/agent_execution.py +520 -0
  298. control_plane_api/app/workflows/agent_execution_with_skills.py +223 -0
  299. control_plane_api/app/workflows/namespace_provisioning.py +326 -0
  300. control_plane_api/app/workflows/plan_generation.py +254 -0
  301. control_plane_api/app/workflows/team_execution.py +442 -0
  302. control_plane_api/scripts/seed_models.py +240 -0
  303. control_plane_api/scripts/validate_existing_tool_names.py +492 -0
  304. control_plane_api/shared/__init__.py +8 -0
  305. control_plane_api/shared/version.py +17 -0
  306. control_plane_api/test_deduplication.py +274 -0
  307. control_plane_api/test_executor_deduplication_e2e.py +309 -0
  308. control_plane_api/test_job_execution_e2e.py +283 -0
  309. control_plane_api/test_real_integration.py +193 -0
  310. control_plane_api/version.py +38 -0
  311. control_plane_api/worker/__init__.py +0 -0
  312. control_plane_api/worker/activities/__init__.py +0 -0
  313. control_plane_api/worker/activities/agent_activities.py +1585 -0
  314. control_plane_api/worker/activities/approval_activities.py +234 -0
  315. control_plane_api/worker/activities/job_activities.py +199 -0
  316. control_plane_api/worker/activities/runtime_activities.py +1167 -0
  317. control_plane_api/worker/activities/skill_activities.py +282 -0
  318. control_plane_api/worker/activities/team_activities.py +479 -0
  319. control_plane_api/worker/agent_runtime_server.py +370 -0
  320. control_plane_api/worker/binary_manager.py +333 -0
  321. control_plane_api/worker/config/__init__.py +31 -0
  322. control_plane_api/worker/config/worker_config.py +273 -0
  323. control_plane_api/worker/control_plane_client.py +1491 -0
  324. control_plane_api/worker/examples/analytics_integration_example.py +362 -0
  325. control_plane_api/worker/health_monitor.py +159 -0
  326. control_plane_api/worker/metrics.py +237 -0
  327. control_plane_api/worker/models/__init__.py +1 -0
  328. control_plane_api/worker/models/error_events.py +105 -0
  329. control_plane_api/worker/models/inputs.py +89 -0
  330. control_plane_api/worker/runtimes/__init__.py +35 -0
  331. control_plane_api/worker/runtimes/agent_runtime/runtime.py +485 -0
  332. control_plane_api/worker/runtimes/agno/__init__.py +34 -0
  333. control_plane_api/worker/runtimes/agno/config.py +248 -0
  334. control_plane_api/worker/runtimes/agno/hooks.py +385 -0
  335. control_plane_api/worker/runtimes/agno/mcp_builder.py +195 -0
  336. control_plane_api/worker/runtimes/agno/runtime.py +1063 -0
  337. control_plane_api/worker/runtimes/agno/utils.py +163 -0
  338. control_plane_api/worker/runtimes/base.py +979 -0
  339. control_plane_api/worker/runtimes/claude_code/__init__.py +38 -0
  340. control_plane_api/worker/runtimes/claude_code/cleanup.py +184 -0
  341. control_plane_api/worker/runtimes/claude_code/client_pool.py +529 -0
  342. control_plane_api/worker/runtimes/claude_code/config.py +829 -0
  343. control_plane_api/worker/runtimes/claude_code/hooks.py +482 -0
  344. control_plane_api/worker/runtimes/claude_code/litellm_proxy.py +1702 -0
  345. control_plane_api/worker/runtimes/claude_code/mcp_builder.py +467 -0
  346. control_plane_api/worker/runtimes/claude_code/mcp_discovery.py +558 -0
  347. control_plane_api/worker/runtimes/claude_code/runtime.py +1546 -0
  348. control_plane_api/worker/runtimes/claude_code/tool_mapper.py +403 -0
  349. control_plane_api/worker/runtimes/claude_code/utils.py +149 -0
  350. control_plane_api/worker/runtimes/factory.py +173 -0
  351. control_plane_api/worker/runtimes/model_utils.py +107 -0
  352. control_plane_api/worker/runtimes/validation.py +93 -0
  353. control_plane_api/worker/services/__init__.py +1 -0
  354. control_plane_api/worker/services/agent_communication_tools.py +908 -0
  355. control_plane_api/worker/services/agent_executor.py +485 -0
  356. control_plane_api/worker/services/agent_executor_v2.py +793 -0
  357. control_plane_api/worker/services/analytics_collector.py +457 -0
  358. control_plane_api/worker/services/analytics_service.py +464 -0
  359. control_plane_api/worker/services/approval_tools.py +310 -0
  360. control_plane_api/worker/services/approval_tools_agno.py +207 -0
  361. control_plane_api/worker/services/cancellation_manager.py +177 -0
  362. control_plane_api/worker/services/code_ingestion_tools.py +465 -0
  363. control_plane_api/worker/services/contextual_awareness_tools.py +405 -0
  364. control_plane_api/worker/services/data_visualization.py +834 -0
  365. control_plane_api/worker/services/event_publisher.py +531 -0
  366. control_plane_api/worker/services/jira_tools.py +257 -0
  367. control_plane_api/worker/services/remote_filesystem_tools.py +498 -0
  368. control_plane_api/worker/services/runtime_analytics.py +328 -0
  369. control_plane_api/worker/services/session_service.py +365 -0
  370. control_plane_api/worker/services/skill_context_enhancement.py +181 -0
  371. control_plane_api/worker/services/skill_factory.py +471 -0
  372. control_plane_api/worker/services/system_prompt_enhancement.py +410 -0
  373. control_plane_api/worker/services/team_executor.py +715 -0
  374. control_plane_api/worker/services/team_executor_v2.py +1866 -0
  375. control_plane_api/worker/services/tool_enforcement.py +254 -0
  376. control_plane_api/worker/services/workflow_executor/__init__.py +52 -0
  377. control_plane_api/worker/services/workflow_executor/event_processor.py +287 -0
  378. control_plane_api/worker/services/workflow_executor/event_publisher.py +210 -0
  379. control_plane_api/worker/services/workflow_executor/executors/__init__.py +15 -0
  380. control_plane_api/worker/services/workflow_executor/executors/base.py +270 -0
  381. control_plane_api/worker/services/workflow_executor/executors/json_executor.py +50 -0
  382. control_plane_api/worker/services/workflow_executor/executors/python_executor.py +50 -0
  383. control_plane_api/worker/services/workflow_executor/models.py +142 -0
  384. control_plane_api/worker/services/workflow_executor_tools.py +1748 -0
  385. control_plane_api/worker/skills/__init__.py +12 -0
  386. control_plane_api/worker/skills/builtin/context_graph_search/README.md +213 -0
  387. control_plane_api/worker/skills/builtin/context_graph_search/__init__.py +5 -0
  388. control_plane_api/worker/skills/builtin/context_graph_search/agno_impl.py +808 -0
  389. control_plane_api/worker/skills/builtin/context_graph_search/skill.yaml +67 -0
  390. control_plane_api/worker/skills/builtin/contextual_awareness/__init__.py +4 -0
  391. control_plane_api/worker/skills/builtin/contextual_awareness/agno_impl.py +62 -0
  392. control_plane_api/worker/skills/builtin/data_visualization/agno_impl.py +18 -0
  393. control_plane_api/worker/skills/builtin/data_visualization/skill.yaml +84 -0
  394. control_plane_api/worker/skills/builtin/docker/agno_impl.py +65 -0
  395. control_plane_api/worker/skills/builtin/docker/skill.yaml +60 -0
  396. control_plane_api/worker/skills/builtin/file_generation/agno_impl.py +47 -0
  397. control_plane_api/worker/skills/builtin/file_generation/skill.yaml +64 -0
  398. control_plane_api/worker/skills/builtin/file_system/agno_impl.py +32 -0
  399. control_plane_api/worker/skills/builtin/file_system/skill.yaml +54 -0
  400. control_plane_api/worker/skills/builtin/knowledge_api/__init__.py +4 -0
  401. control_plane_api/worker/skills/builtin/knowledge_api/agno_impl.py +50 -0
  402. control_plane_api/worker/skills/builtin/knowledge_api/skill.yaml +66 -0
  403. control_plane_api/worker/skills/builtin/python/agno_impl.py +25 -0
  404. control_plane_api/worker/skills/builtin/python/skill.yaml +60 -0
  405. control_plane_api/worker/skills/builtin/schema_fix_mixin.py +260 -0
  406. control_plane_api/worker/skills/builtin/shell/agno_impl.py +31 -0
  407. control_plane_api/worker/skills/builtin/shell/skill.yaml +60 -0
  408. control_plane_api/worker/skills/builtin/slack/__init__.py +3 -0
  409. control_plane_api/worker/skills/builtin/slack/agno_impl.py +1282 -0
  410. control_plane_api/worker/skills/builtin/slack/skill.yaml +276 -0
  411. control_plane_api/worker/skills/builtin/workflow_executor/agno_impl.py +62 -0
  412. control_plane_api/worker/skills/builtin/workflow_executor/skill.yaml +79 -0
  413. control_plane_api/worker/skills/loaders/__init__.py +5 -0
  414. control_plane_api/worker/skills/loaders/base.py +23 -0
  415. control_plane_api/worker/skills/loaders/filesystem_loader.py +357 -0
  416. control_plane_api/worker/skills/registry.py +208 -0
  417. control_plane_api/worker/tests/__init__.py +1 -0
  418. control_plane_api/worker/tests/conftest.py +12 -0
  419. control_plane_api/worker/tests/e2e/__init__.py +0 -0
  420. control_plane_api/worker/tests/e2e/test_context_graph_real_api.py +338 -0
  421. control_plane_api/worker/tests/e2e/test_context_graph_templates_e2e.py +523 -0
  422. control_plane_api/worker/tests/e2e/test_enforcement_e2e.py +344 -0
  423. control_plane_api/worker/tests/e2e/test_execution_flow.py +571 -0
  424. control_plane_api/worker/tests/e2e/test_single_execution_mode.py +656 -0
  425. control_plane_api/worker/tests/integration/__init__.py +0 -0
  426. control_plane_api/worker/tests/integration/test_builtin_skills_fixes.py +245 -0
  427. control_plane_api/worker/tests/integration/test_context_graph_search_integration.py +365 -0
  428. control_plane_api/worker/tests/integration/test_control_plane_integration.py +308 -0
  429. control_plane_api/worker/tests/integration/test_hook_enforcement_integration.py +579 -0
  430. control_plane_api/worker/tests/integration/test_scheduled_job_workflow.py +237 -0
  431. control_plane_api/worker/tests/integration/test_system_prompt_enhancement_integration.py +343 -0
  432. control_plane_api/worker/tests/unit/__init__.py +0 -0
  433. control_plane_api/worker/tests/unit/test_builtin_skill_autoload.py +396 -0
  434. control_plane_api/worker/tests/unit/test_context_graph_search.py +450 -0
  435. control_plane_api/worker/tests/unit/test_context_graph_templates.py +403 -0
  436. control_plane_api/worker/tests/unit/test_control_plane_client.py +401 -0
  437. control_plane_api/worker/tests/unit/test_control_plane_client_jobs.py +345 -0
  438. control_plane_api/worker/tests/unit/test_job_activities.py +353 -0
  439. control_plane_api/worker/tests/unit/test_skill_context_enhancement.py +321 -0
  440. control_plane_api/worker/tests/unit/test_system_prompt_enhancement.py +415 -0
  441. control_plane_api/worker/tests/unit/test_tool_enforcement.py +324 -0
  442. control_plane_api/worker/utils/__init__.py +1 -0
  443. control_plane_api/worker/utils/chunk_batcher.py +330 -0
  444. control_plane_api/worker/utils/environment.py +65 -0
  445. control_plane_api/worker/utils/error_publisher.py +260 -0
  446. control_plane_api/worker/utils/event_batcher.py +256 -0
  447. control_plane_api/worker/utils/logging_config.py +335 -0
  448. control_plane_api/worker/utils/logging_helper.py +326 -0
  449. control_plane_api/worker/utils/parameter_validator.py +120 -0
  450. control_plane_api/worker/utils/retry_utils.py +60 -0
  451. control_plane_api/worker/utils/streaming_utils.py +665 -0
  452. control_plane_api/worker/utils/tool_validation.py +332 -0
  453. control_plane_api/worker/utils/workspace_manager.py +163 -0
  454. control_plane_api/worker/websocket_client.py +393 -0
  455. control_plane_api/worker/worker.py +1297 -0
  456. control_plane_api/worker/workflows/__init__.py +0 -0
  457. control_plane_api/worker/workflows/agent_execution.py +909 -0
  458. control_plane_api/worker/workflows/scheduled_job_wrapper.py +332 -0
  459. control_plane_api/worker/workflows/team_execution.py +611 -0
  460. kubiya_control_plane_api-0.9.15.dist-info/METADATA +354 -0
  461. kubiya_control_plane_api-0.9.15.dist-info/RECORD +479 -0
  462. kubiya_control_plane_api-0.9.15.dist-info/WHEEL +5 -0
  463. kubiya_control_plane_api-0.9.15.dist-info/entry_points.txt +5 -0
  464. kubiya_control_plane_api-0.9.15.dist-info/licenses/LICENSE +676 -0
  465. kubiya_control_plane_api-0.9.15.dist-info/top_level.txt +3 -0
  466. scripts/__init__.py +1 -0
  467. scripts/migrations.py +39 -0
  468. scripts/seed_worker_queues.py +128 -0
  469. scripts/setup_agent_runtime.py +142 -0
  470. worker_internal/__init__.py +1 -0
  471. worker_internal/planner/__init__.py +1 -0
  472. worker_internal/planner/activities.py +1499 -0
  473. worker_internal/planner/agent_tools.py +197 -0
  474. worker_internal/planner/event_models.py +148 -0
  475. worker_internal/planner/event_publisher.py +67 -0
  476. worker_internal/planner/models.py +199 -0
  477. worker_internal/planner/retry_logic.py +134 -0
  478. worker_internal/planner/worker.py +300 -0
  479. worker_internal/planner/workflows.py +970 -0
@@ -0,0 +1,1702 @@
1
+ """
2
+ Local FastAPI proxy for Claude Code SDK to inject Langfuse metadata.
3
+
4
+ This proxy runs in the same process as the worker and intercepts requests
5
+ from Claude Code SDK to add missing metadata before forwarding to the real
6
+ LiteLLM proxy.
7
+
8
+ Architecture:
9
+ Claude Code SDK → Local Proxy (adds metadata) → Real LiteLLM Proxy → Langfuse
10
+
11
+ The proxy:
12
+ 1. Receives requests from Claude Code SDK
13
+ 2. Extracts execution context from thread-local cache
14
+ 3. Injects Langfuse metadata (trace_name, trace_user_id, session_id, etc.)
15
+ 4. Forwards request to real LiteLLM proxy
16
+ 5. Returns response back to Claude Code SDK
17
+ """
18
+
19
+ import asyncio
20
+ import json
21
+ import os
22
+ import re
23
+ import threading
24
+ import time
25
+ from typing import Dict, Any, Optional, List, Tuple
26
+ import structlog
27
+ from contextlib import asynccontextmanager
28
+
29
+ from fastapi import FastAPI, Request, Response, HTTPException
30
+ from fastapi.responses import StreamingResponse
31
+ import httpx
32
+ import uvicorn
33
+
34
+ logger = structlog.get_logger(__name__)
35
+
36
+
37
+ # Cache for available models from upstream LiteLLM proxy
38
+ _available_models_cache: Optional[Dict[str, Any]] = None
39
+ _available_models_cache_time: float = 0
40
+ _available_models_cache_ttl: int = 300 # 5 minutes
41
+
42
+
43
+ async def fetch_available_models(
44
+ litellm_base_url: str,
45
+ litellm_api_key: str,
46
+ timeout: float = 10.0,
47
+ max_retries: int = 3,
48
+ retry_delay: float = 1.0,
49
+ ) -> List[str]:
50
+ """
51
+ Fetch available models from the upstream LiteLLM proxy with retry logic.
52
+
53
+ Args:
54
+ litellm_base_url: Base URL of LiteLLM proxy
55
+ litellm_api_key: API key for authentication
56
+ timeout: Request timeout in seconds
57
+ max_retries: Maximum number of retry attempts
58
+ retry_delay: Initial delay between retries (doubles each retry)
59
+
60
+ Returns:
61
+ List of available model IDs
62
+ """
63
+ global _available_models_cache, _available_models_cache_time
64
+
65
+ # Check cache first
66
+ now = time.time()
67
+ if _available_models_cache is not None and (now - _available_models_cache_time) < _available_models_cache_ttl:
68
+ logger.debug(
69
+ "using_cached_available_models",
70
+ model_count=len(_available_models_cache.get("models", [])),
71
+ cache_age_seconds=int(now - _available_models_cache_time),
72
+ )
73
+ return _available_models_cache.get("models", [])
74
+
75
+ last_error = None
76
+ current_delay = retry_delay
77
+
78
+ for attempt in range(max_retries):
79
+ try:
80
+ async with httpx.AsyncClient(timeout=timeout) as client:
81
+ response = await client.get(
82
+ f"{litellm_base_url.rstrip('/')}/v1/models",
83
+ headers={"Authorization": f"Bearer {litellm_api_key}"},
84
+ )
85
+
86
+ if response.status_code == 200:
87
+ data = response.json()
88
+ # LiteLLM returns {"data": [{"id": "model-name", ...}, ...], "object": "list"}
89
+ models = []
90
+ if "data" in data and isinstance(data["data"], list):
91
+ models = [m.get("id") for m in data["data"] if m.get("id")]
92
+
93
+ # Update cache
94
+ _available_models_cache = {"models": models}
95
+ _available_models_cache_time = time.time()
96
+
97
+ logger.info(
98
+ "fetched_available_models_from_upstream",
99
+ model_count=len(models),
100
+ models=models[:10] if len(models) > 10 else models,
101
+ litellm_base_url=litellm_base_url,
102
+ attempt=attempt + 1,
103
+ )
104
+ return models
105
+
106
+ elif response.status_code in (502, 503, 504):
107
+ # Transient errors - retry
108
+ last_error = f"HTTP {response.status_code}"
109
+ logger.warning(
110
+ "transient_error_fetching_models",
111
+ status_code=response.status_code,
112
+ attempt=attempt + 1,
113
+ max_retries=max_retries,
114
+ retry_delay=current_delay,
115
+ )
116
+ else:
117
+ # Non-retryable error
118
+ logger.warning(
119
+ "failed_to_fetch_models_from_upstream",
120
+ status_code=response.status_code,
121
+ response_text=response.text[:500] if response.text else "",
122
+ litellm_base_url=litellm_base_url,
123
+ )
124
+ return []
125
+
126
+ except (httpx.ConnectError, httpx.TimeoutException) as e:
127
+ last_error = str(e)
128
+ logger.warning(
129
+ "connection_error_fetching_models",
130
+ error=str(e),
131
+ error_type=type(e).__name__,
132
+ attempt=attempt + 1,
133
+ max_retries=max_retries,
134
+ retry_delay=current_delay,
135
+ )
136
+
137
+ except Exception as e:
138
+ # Unexpected error - don't retry
139
+ logger.error(
140
+ "unexpected_error_fetching_models",
141
+ error=str(e),
142
+ error_type=type(e).__name__,
143
+ litellm_base_url=litellm_base_url,
144
+ exc_info=True,
145
+ )
146
+ return []
147
+
148
+ # Wait before retry (with exponential backoff)
149
+ if attempt < max_retries - 1:
150
+ await asyncio.sleep(current_delay)
151
+ current_delay *= 2 # Exponential backoff
152
+
153
+ # All retries exhausted
154
+ logger.error(
155
+ "all_retries_exhausted_fetching_models",
156
+ last_error=last_error,
157
+ max_retries=max_retries,
158
+ litellm_base_url=litellm_base_url,
159
+ )
160
+ return []
161
+
162
+
163
+ def get_cached_available_models() -> List[str]:
164
+ """
165
+ Get cached available models (synchronous, for use in non-async contexts).
166
+
167
+ Returns empty list if cache is not populated.
168
+ """
169
+ global _available_models_cache, _available_models_cache_time
170
+
171
+ now = time.time()
172
+ if _available_models_cache is not None and (now - _available_models_cache_time) < _available_models_cache_ttl:
173
+ return _available_models_cache.get("models", [])
174
+ return []
175
+
176
+
177
+ def _normalize_model_name(model: str) -> str:
178
+ """
179
+ Normalize model name for comparison.
180
+
181
+ Handles various model naming patterns:
182
+ - Provider prefixes: bedrock/, anthropic/, openai/, azure/, etc.
183
+ - Region prefixes for Bedrock: us., eu., ap., etc.
184
+ - Version suffixes: -v1:0, -20240620-v1:0, etc.
185
+
186
+ Args:
187
+ model: Model name to normalize
188
+
189
+ Returns:
190
+ Normalized model name (lowercase, without common prefixes/suffixes)
191
+ """
192
+ normalized = model.lower().strip()
193
+
194
+ # Remove common provider prefixes (order matters - longest first)
195
+ provider_prefixes = [
196
+ "bedrock/converse/", "bedrock/invoke/", # More specific first
197
+ "bedrock/", "anthropic/", "openai/", "azure/", "vertex_ai/",
198
+ "kubiya/", "litellm/",
199
+ ]
200
+ for prefix in provider_prefixes:
201
+ if normalized.startswith(prefix):
202
+ normalized = normalized[len(prefix):]
203
+ break
204
+
205
+ # Remove Bedrock region prefixes (us., eu., ap., etc.)
206
+ # Pattern: XX. where XX is 2 lowercase letters
207
+ normalized = re.sub(r'^[a-z]{2}\.', '', normalized)
208
+
209
+ # Remove provider prefixes within model name (anthropic., meta., etc.)
210
+ inner_prefixes = ["anthropic.", "meta.", "amazon.", "mistral.", "ai21.", "cohere."]
211
+ for prefix in inner_prefixes:
212
+ if normalized.startswith(prefix):
213
+ normalized = normalized[len(prefix):]
214
+ break
215
+
216
+ # Remove version suffixes like -v1:0, -v2:0, etc.
217
+ normalized = re.sub(r'-v\d+:\d+$', '', normalized)
218
+
219
+ # Remove date-version suffixes like -20240620-v1:0
220
+ normalized = re.sub(r'-\d{8}-v\d+:\d+$', '', normalized)
221
+ normalized = re.sub(r'-\d{8}$', '', normalized)
222
+
223
+ return normalized
224
+
225
+
226
+ def _calculate_model_similarity(requested: str, available: str) -> float:
227
+ """
228
+ Calculate similarity score between two model names.
229
+
230
+ Higher score means better match. Uses normalized names and
231
+ considers various matching strategies.
232
+
233
+ Args:
234
+ requested: Requested model name
235
+ available: Available model name
236
+
237
+ Returns:
238
+ Similarity score (0.0 to 1.0)
239
+ """
240
+ req_norm = _normalize_model_name(requested)
241
+ avail_norm = _normalize_model_name(available)
242
+
243
+ # Exact match after normalization
244
+ if req_norm == avail_norm:
245
+ return 1.0
246
+
247
+ # One contains the other (after normalization)
248
+ if req_norm in avail_norm:
249
+ return 0.9
250
+ if avail_norm in req_norm:
251
+ return 0.85
252
+
253
+ # Check for key model family matches
254
+ # e.g., "claude-sonnet-4" should match "claude-sonnet-4-20250115"
255
+ model_families = [
256
+ "claude-sonnet-4", "claude-4-sonnet", "claude-sonnet-4-5",
257
+ "claude-3-5-sonnet", "claude-3-sonnet", "claude-3-haiku", "claude-3-opus",
258
+ "claude-instant", "claude-v2",
259
+ "gpt-4", "gpt-4o", "gpt-3.5",
260
+ "llama-3", "llama-2",
261
+ "mistral", "mixtral",
262
+ ]
263
+
264
+ for family in model_families:
265
+ if family in req_norm and family in avail_norm:
266
+ return 0.8
267
+
268
+ # Partial word overlap
269
+ req_parts = set(req_norm.replace("-", " ").replace(".", " ").split())
270
+ avail_parts = set(avail_norm.replace("-", " ").replace(".", " ").split())
271
+ overlap = req_parts & avail_parts
272
+ if overlap:
273
+ return 0.5 * len(overlap) / max(len(req_parts), len(avail_parts))
274
+
275
+ return 0.0
276
+
277
+
278
+ def validate_and_resolve_model(
279
+ requested_model: str,
280
+ available_models: List[str],
281
+ default_fallback: str = None,
282
+ ) -> Tuple[str, bool]:
283
+ """
284
+ Validate requested model against available models and resolve fallback.
285
+
286
+ Handles complex model naming patterns including:
287
+ - Bedrock: bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0
288
+ - Cross-region: bedrock/us.anthropic.claude-sonnet-4-20250115-v1:0
289
+ - Simple: claude-sonnet-4, gpt-4o
290
+ - Provider-prefixed: kubiya/claude-sonnet-4, anthropic/claude-3-sonnet
291
+
292
+ Args:
293
+ requested_model: The model requested by the user/agent
294
+ available_models: List of available model IDs from upstream
295
+ default_fallback: Default model to use if no match found
296
+
297
+ Returns:
298
+ Tuple of (resolved_model, was_fallback_used)
299
+ """
300
+ if not available_models:
301
+ # No available models list - can't validate, use as-is
302
+ logger.warning(
303
+ "cannot_validate_model_no_available_models",
304
+ requested_model=requested_model,
305
+ note="Proceeding with requested model without validation"
306
+ )
307
+ return requested_model, False
308
+
309
+ # Exact match (case-sensitive)
310
+ if requested_model in available_models:
311
+ return requested_model, False
312
+
313
+ # Case-insensitive exact match
314
+ requested_lower = requested_model.lower()
315
+ for available in available_models:
316
+ if available.lower() == requested_lower:
317
+ logger.info(
318
+ "model_case_insensitive_match",
319
+ requested_model=requested_model,
320
+ matched_model=available,
321
+ )
322
+ return available, False
323
+
324
+ # Find best match using similarity scoring
325
+ best_match = None
326
+ best_score = 0.0
327
+
328
+ for available in available_models:
329
+ score = _calculate_model_similarity(requested_model, available)
330
+ if score > best_score:
331
+ best_score = score
332
+ best_match = available
333
+
334
+ # Accept match if score is above threshold
335
+ if best_score >= 0.7:
336
+ logger.info(
337
+ "model_similarity_match_found",
338
+ requested_model=requested_model,
339
+ matched_model=best_match,
340
+ similarity_score=best_score,
341
+ note="Found similar model via smart matching"
342
+ )
343
+ return best_match, True
344
+
345
+ # Log detailed match attempts for debugging
346
+ logger.warning(
347
+ "model_no_good_match_found",
348
+ requested_model=requested_model,
349
+ requested_normalized=_normalize_model_name(requested_model),
350
+ best_candidate=best_match,
351
+ best_score=best_score,
352
+ available_models_sample=available_models[:5] if len(available_models) > 5 else available_models,
353
+ available_count=len(available_models),
354
+ )
355
+
356
+ # No good match - find best fallback from same provider/family
357
+ # Priority: same family > default_fallback > first available
358
+ fallback = _find_same_family_fallback(requested_model, available_models)
359
+ if not fallback:
360
+ fallback = default_fallback or (available_models[0] if available_models else requested_model)
361
+
362
+ logger.warning(
363
+ "model_not_found_using_fallback",
364
+ requested_model=requested_model,
365
+ fallback_model=fallback,
366
+ available_models=available_models[:10] if len(available_models) > 10 else available_models,
367
+ note="Requested model not available, using same-family fallback if available"
368
+ )
369
+ return fallback, True
370
+
371
+
372
+ def _find_same_family_fallback(requested_model: str, available_models: List[str]) -> Optional[str]:
373
+ """
374
+ Find a fallback model from the same provider/family.
375
+
376
+ For Claude models, prefer other Claude models.
377
+ For GPT models, prefer other GPT models.
378
+ For Llama models, prefer other Llama models.
379
+ etc.
380
+
381
+ Args:
382
+ requested_model: The requested model name
383
+ available_models: List of available models
384
+
385
+ Returns:
386
+ Best same-family fallback, or None if no match
387
+ """
388
+ requested_lower = requested_model.lower()
389
+
390
+ # Define model families and their identifying patterns
391
+ # Order matters - more specific patterns first
392
+ model_families = [
393
+ # Claude family
394
+ ("claude", ["claude", "anthropic"]),
395
+ # OpenAI family
396
+ ("gpt", ["gpt-4", "gpt-3", "gpt4", "gpt3", "openai"]),
397
+ # Llama family
398
+ ("llama", ["llama", "meta"]),
399
+ # Mistral family
400
+ ("mistral", ["mistral", "mixtral"]),
401
+ # DeepSeek family
402
+ ("deepseek", ["deepseek"]),
403
+ ]
404
+
405
+ # Determine which family the requested model belongs to
406
+ requested_family = None
407
+ for family_name, patterns in model_families:
408
+ for pattern in patterns:
409
+ if pattern in requested_lower:
410
+ requested_family = family_name
411
+ break
412
+ if requested_family:
413
+ break
414
+
415
+ if not requested_family:
416
+ return None
417
+
418
+ # Find available models from the same family
419
+ # Score them by how well they match
420
+ same_family_models = []
421
+ for available in available_models:
422
+ available_lower = available.lower()
423
+ for family_name, patterns in model_families:
424
+ if family_name == requested_family:
425
+ for pattern in patterns:
426
+ if pattern in available_lower:
427
+ # Calculate a preference score
428
+ # Prefer models with more capability (sonnet > haiku, etc.)
429
+ score = _calculate_model_capability_score(available)
430
+ same_family_models.append((available, score))
431
+ break
432
+ break
433
+
434
+ if not same_family_models:
435
+ return None
436
+
437
+ # Sort by capability score (descending) and return the best
438
+ same_family_models.sort(key=lambda x: x[1], reverse=True)
439
+ best_fallback = same_family_models[0][0]
440
+
441
+ logger.info(
442
+ "same_family_fallback_found",
443
+ requested_model=requested_model,
444
+ requested_family=requested_family,
445
+ fallback_model=best_fallback,
446
+ same_family_options=[m[0] for m in same_family_models],
447
+ )
448
+
449
+ return best_fallback
450
+
451
+
452
+ def _calculate_model_capability_score(model: str) -> int:
453
+ """
454
+ Calculate a capability score for model preference.
455
+ Higher score = more capable model (preferred for fallback).
456
+
457
+ Args:
458
+ model: Model name
459
+
460
+ Returns:
461
+ Capability score (higher is more capable)
462
+ """
463
+ model_lower = model.lower()
464
+ score = 0
465
+
466
+ # Claude models - prefer opus > sonnet > haiku
467
+ if "opus" in model_lower:
468
+ score = 100
469
+ elif "sonnet" in model_lower:
470
+ score = 80
471
+ elif "haiku" in model_lower:
472
+ score = 60
473
+ # GPT models - prefer gpt-4 > gpt-3.5
474
+ elif "gpt-4o" in model_lower:
475
+ score = 95
476
+ elif "gpt-4" in model_lower:
477
+ score = 90
478
+ elif "gpt-3.5" in model_lower or "gpt-35" in model_lower:
479
+ score = 70
480
+ # Llama models - prefer larger
481
+ elif "llama-3" in model_lower or "llama3" in model_lower:
482
+ score = 75
483
+ elif "llama-2" in model_lower or "llama2" in model_lower:
484
+ score = 65
485
+ # DeepSeek
486
+ elif "deepseek-r1" in model_lower:
487
+ score = 85
488
+ elif "deepseek-v3" in model_lower:
489
+ score = 80
490
+ elif "deepseek" in model_lower:
491
+ score = 70
492
+ # Mistral
493
+ elif "mixtral" in model_lower:
494
+ score = 75
495
+ elif "mistral" in model_lower:
496
+ score = 70
497
+ else:
498
+ score = 50 # Default for unknown models
499
+
500
+ return score
501
+
502
+
503
+ # Thread-local storage for execution context
504
+ # This allows us to access execution metadata from the proxy
505
+ class ExecutionContextStore:
506
+ """
507
+ Thread-safe storage for execution context metadata with TTL and proactive cleanup.
508
+
509
+ Features:
510
+ - TTL-based expiration (default 3600s)
511
+ - Proactive cleanup timer (runs every 60s)
512
+ - Circuit breaker to prevent runaway memory growth
513
+ - Thread-safe operations
514
+ """
515
+
516
+ def __init__(self, ttl_seconds: int = 3600, max_contexts: int = 1000):
517
+ self._contexts: Dict[str, Dict[str, Any]] = {}
518
+ self._context_timestamps: Dict[str, float] = {}
519
+ self._ttl_seconds = ttl_seconds
520
+ self._max_contexts = max_contexts # Circuit breaker threshold
521
+ self._current_execution: Optional[str] = None
522
+ self._lock = threading.Lock()
523
+
524
+ # Proactive cleanup timer
525
+ self._cleanup_timer: Optional[threading.Timer] = None
526
+ self._cleanup_interval = 60 # Run cleanup every 60 seconds
527
+ self._start_proactive_cleanup()
528
+
529
+ def _start_proactive_cleanup(self):
530
+ """Start periodic cleanup timer."""
531
+ self._cleanup_timer = threading.Timer(
532
+ self._cleanup_interval,
533
+ self._proactive_cleanup_worker
534
+ )
535
+ self._cleanup_timer.daemon = True
536
+ self._cleanup_timer.start()
537
+ logger.debug("proactive_cleanup_timer_started", interval=self._cleanup_interval)
538
+
539
+ def _proactive_cleanup_worker(self):
540
+ """Worker that runs periodic cleanup."""
541
+ try:
542
+ self._cleanup_expired()
543
+
544
+ # Check circuit breaker
545
+ with self._lock:
546
+ context_count = len(self._contexts)
547
+
548
+ if context_count > self._max_contexts:
549
+ logger.error(
550
+ "context_store_circuit_breaker_triggered",
551
+ context_count=context_count,
552
+ max_contexts=self._max_contexts,
553
+ action="forcing_aggressive_cleanup"
554
+ )
555
+ # Aggressive cleanup: remove oldest 50%
556
+ self._force_cleanup(keep_ratio=0.5)
557
+
558
+ except Exception as e:
559
+ logger.error(
560
+ "proactive_cleanup_error",
561
+ error=str(e),
562
+ error_type=type(e).__name__,
563
+ exc_info=True
564
+ )
565
+ finally:
566
+ # Reschedule timer
567
+ self._start_proactive_cleanup()
568
+
569
+ def _force_cleanup(self, keep_ratio: float = 0.5):
570
+ """
571
+ Force cleanup of oldest contexts when circuit breaker triggers.
572
+
573
+ Args:
574
+ keep_ratio: Ratio of newest contexts to keep (0.5 = keep newest 50%)
575
+ """
576
+ with self._lock:
577
+ if not self._contexts:
578
+ return
579
+
580
+ # Sort by timestamp (oldest first)
581
+ sorted_ids = sorted(
582
+ self._context_timestamps.items(),
583
+ key=lambda x: x[1]
584
+ )
585
+
586
+ # Calculate how many to remove
587
+ keep_count = int(len(sorted_ids) * keep_ratio)
588
+ to_remove = sorted_ids[:len(sorted_ids) - keep_count]
589
+
590
+ # Remove oldest contexts
591
+ removed_count = 0
592
+ for exec_id, _ in to_remove:
593
+ self._contexts.pop(exec_id, None)
594
+ self._context_timestamps.pop(exec_id, None)
595
+ if self._current_execution == exec_id:
596
+ self._current_execution = None
597
+ removed_count += 1
598
+
599
+ logger.warning(
600
+ "forced_cleanup_completed",
601
+ removed=removed_count,
602
+ remaining=len(self._contexts),
603
+ keep_ratio=keep_ratio
604
+ )
605
+
606
+ def set_context(self, execution_id: str, context: Dict[str, Any]):
607
+ """Store execution context for an execution ID with timestamp."""
608
+ with self._lock:
609
+ # Check circuit breaker before adding
610
+ if len(self._contexts) >= self._max_contexts:
611
+ logger.error(
612
+ "context_store_at_capacity",
613
+ current_count=len(self._contexts),
614
+ max_contexts=self._max_contexts,
615
+ action="rejecting_new_context"
616
+ )
617
+ raise RuntimeError(
618
+ f"Context store at capacity ({self._max_contexts}). "
619
+ "System may be leaking contexts or under high load."
620
+ )
621
+
622
+ self._contexts[execution_id] = context
623
+ self._context_timestamps[execution_id] = time.time()
624
+ self._current_execution = execution_id
625
+ logger.debug(
626
+ "execution_context_stored",
627
+ execution_id=execution_id[:8] if len(execution_id) >= 8 else execution_id,
628
+ total_contexts=len(self._contexts),
629
+ has_user_id=bool(context.get("user_id")),
630
+ has_session_id=bool(context.get("session_id")),
631
+ )
632
+
633
+ def get_context(self, execution_id: Optional[str] = None) -> Optional[Dict[str, Any]]:
634
+ """
635
+ Retrieve execution context for an execution ID if not expired.
636
+
637
+ If execution_id is None, returns the current active execution context.
638
+ """
639
+ with self._lock:
640
+ target_id = execution_id if execution_id else self._current_execution
641
+ if not target_id:
642
+ return None
643
+
644
+ # Check if expired
645
+ timestamp = self._context_timestamps.get(target_id)
646
+ if timestamp and (time.time() - timestamp) > self._ttl_seconds:
647
+ # Expired - remove and return None
648
+ self._contexts.pop(target_id, None)
649
+ self._context_timestamps.pop(target_id, None)
650
+ logger.debug("execution_context_expired", execution_id=target_id[:8] if len(target_id) >= 8 else target_id)
651
+ return None
652
+
653
+ return self._contexts.get(target_id)
654
+
655
+ def get_current_execution_id(self) -> Optional[str]:
656
+ """Get the current active execution ID."""
657
+ with self._lock:
658
+ return self._current_execution
659
+
660
+ def get_any_valid_execution_id(self) -> Optional[str]:
661
+ """
662
+ Get any valid (non-expired) execution ID.
663
+
664
+ This is a fallback when _current_execution is None but there are still
665
+ valid contexts available. Useful for sub-agent requests that arrive
666
+ after _current_execution has been overwritten by concurrent executions.
667
+
668
+ Returns the most recently set context's execution ID.
669
+ """
670
+ with self._lock:
671
+ if not self._contexts:
672
+ return None
673
+
674
+ now = time.time()
675
+ # Find the most recent non-expired context
676
+ valid_contexts = [
677
+ (exec_id, ts) for exec_id, ts in self._context_timestamps.items()
678
+ if (now - ts) <= self._ttl_seconds and exec_id in self._contexts
679
+ ]
680
+
681
+ if not valid_contexts:
682
+ return None
683
+
684
+ # Return the most recently set context
685
+ most_recent = max(valid_contexts, key=lambda x: x[1])
686
+ logger.debug(
687
+ "using_fallback_execution_context",
688
+ execution_id=most_recent[0][:8] if len(most_recent[0]) >= 8 else most_recent[0],
689
+ total_valid_contexts=len(valid_contexts),
690
+ )
691
+ return most_recent[0]
692
+
693
+ def clear_context(self, execution_id: str):
694
+ """Clear execution context after execution completes."""
695
+ with self._lock:
696
+ if execution_id in self._contexts:
697
+ del self._contexts[execution_id]
698
+ self._context_timestamps.pop(execution_id, None)
699
+ if self._current_execution == execution_id:
700
+ self._current_execution = None
701
+ logger.debug(
702
+ "execution_context_cleared",
703
+ execution_id=execution_id[:8] if len(execution_id) >= 8 else execution_id,
704
+ remaining_contexts=len(self._contexts)
705
+ )
706
+
707
+ def _cleanup_expired(self) -> None:
708
+ """Remove contexts older than TTL."""
709
+ now = time.time()
710
+ with self._lock:
711
+ expired_ids = [
712
+ exec_id for exec_id, timestamp in self._context_timestamps.items()
713
+ if (now - timestamp) > self._ttl_seconds
714
+ ]
715
+
716
+ if expired_ids:
717
+ for exec_id in expired_ids:
718
+ self._contexts.pop(exec_id, None)
719
+ self._context_timestamps.pop(exec_id, None)
720
+ if self._current_execution == exec_id:
721
+ self._current_execution = None
722
+
723
+ logger.info(
724
+ "expired_contexts_cleaned",
725
+ removed=len(expired_ids),
726
+ remaining=len(self._contexts)
727
+ )
728
+
729
+ def get_stats(self) -> Dict[str, Any]:
730
+ """Get context store statistics."""
731
+ with self._lock:
732
+ now = time.time()
733
+ ages = [now - ts for ts in self._context_timestamps.values()]
734
+ return {
735
+ 'total_contexts': len(self._contexts),
736
+ 'max_contexts': self._max_contexts,
737
+ 'ttl_seconds': self._ttl_seconds,
738
+ 'oldest_age_seconds': int(max(ages)) if ages else 0,
739
+ 'newest_age_seconds': int(min(ages)) if ages else 0,
740
+ }
741
+
742
+ def shutdown(self):
743
+ """Stop proactive cleanup timer."""
744
+ if self._cleanup_timer:
745
+ self._cleanup_timer.cancel()
746
+ logger.info("context_store_cleanup_timer_stopped")
747
+
748
+
749
+ # Global context store
750
+ _context_store = ExecutionContextStore()
751
+
752
+
753
+ class ContextCleanupScheduler:
754
+ """Schedules delayed context cleanup without blocking."""
755
+
756
+ def __init__(self):
757
+ self._pending_cleanups: Dict[str, asyncio.Task] = {}
758
+ self._lock = threading.Lock()
759
+
760
+ def schedule_cleanup(
761
+ self,
762
+ execution_id: str,
763
+ delay_seconds: float,
764
+ store: 'ExecutionContextStore'
765
+ ):
766
+ """Schedule cleanup after delay (non-blocking)."""
767
+ with self._lock:
768
+ # Cancel existing cleanup if rescheduling
769
+ if execution_id in self._pending_cleanups:
770
+ self._pending_cleanups[execution_id].cancel()
771
+
772
+ # Create background task for delayed cleanup
773
+ try:
774
+ loop = asyncio.get_event_loop()
775
+ task = loop.create_task(
776
+ self._delayed_cleanup(execution_id, delay_seconds, store)
777
+ )
778
+ self._pending_cleanups[execution_id] = task
779
+ except RuntimeError:
780
+ # No event loop - fallback to immediate cleanup
781
+ store.clear_context(execution_id)
782
+
783
+ async def _delayed_cleanup(
784
+ self,
785
+ execution_id: str,
786
+ delay_seconds: float,
787
+ store: 'ExecutionContextStore'
788
+ ):
789
+ """Internal: Wait then clear context."""
790
+ try:
791
+ await asyncio.sleep(delay_seconds)
792
+ store.clear_context(execution_id)
793
+ except asyncio.CancelledError:
794
+ pass # Cleanup was cancelled
795
+ except Exception as e:
796
+ # Log but don't crash - TTL will handle it
797
+ logger.warning(
798
+ "context_cleanup_error",
799
+ execution_id=execution_id[:8] if len(execution_id) >= 8 else execution_id,
800
+ error=str(e)
801
+ )
802
+ finally:
803
+ with self._lock:
804
+ self._pending_cleanups.pop(execution_id, None)
805
+
806
+
807
+ # Global cleanup scheduler
808
+ _cleanup_scheduler = ContextCleanupScheduler()
809
+
810
+
811
+ def _hash_user_id(user_id: str, organization_id: str) -> str:
812
+ """
813
+ Hash user_id to avoid sending email addresses to Anthropic API.
814
+
815
+ Anthropic API rejects email addresses in metadata.user_id.
816
+ We hash the email with org to create a unique, non-PII identifier.
817
+
818
+ Args:
819
+ user_id: User ID (may be email address)
820
+ organization_id: Organization ID
821
+
822
+ Returns:
823
+ Hashed user identifier (SHA256, first 16 chars)
824
+ """
825
+ import hashlib
826
+ combined = f"{user_id}-{organization_id}"
827
+ return hashlib.sha256(combined.encode()).hexdigest()[:16]
828
+
829
+
830
+ def build_langfuse_metadata(context: Dict[str, Any]) -> Dict[str, Any]:
831
+ """
832
+ Build Langfuse metadata from execution context.
833
+
834
+ Matches the format used by agno runtime for consistency.
835
+
836
+ Args:
837
+ context: Execution context with user_id, session_id, agent_id, etc.
838
+
839
+ Returns:
840
+ Metadata dict for Langfuse tracking
841
+ """
842
+ metadata = {}
843
+
844
+ user_id = context.get("user_id")
845
+ organization_id = context.get("organization_id")
846
+ session_id = context.get("session_id")
847
+ agent_id = context.get("agent_id")
848
+ agent_name = context.get("agent_name")
849
+ model_id = context.get("model_id")
850
+
851
+ # Langfuse naming fields - use custom values if provided, otherwise default to "agent-chat"
852
+ # This allows both agent-chat and plan execution to use the same proxy
853
+ metadata["name"] = context.get("name", "agent-chat")
854
+ metadata["trace_name"] = context.get("trace_name", "agent-chat")
855
+ metadata["generation_name"] = context.get("generation_name", "agent-chat")
856
+
857
+ # Hash user_id to avoid sending email addresses to Anthropic API
858
+ # Anthropic rejects: "user_id appears to contain an email address"
859
+ if user_id and organization_id:
860
+ hashed_user_id = _hash_user_id(user_id, organization_id)
861
+ metadata["trace_user_id"] = hashed_user_id
862
+ metadata["user_id"] = hashed_user_id
863
+
864
+ # Use session_id as trace_id to group conversation turns
865
+ if session_id:
866
+ metadata["trace_id"] = session_id
867
+ metadata["session_id"] = session_id
868
+
869
+ # Additional metadata (these are safe - not sent to Anthropic)
870
+ if agent_id:
871
+ metadata["agent_id"] = agent_id
872
+ if agent_name:
873
+ metadata["agent_name"] = agent_name
874
+ if user_id:
875
+ metadata["user_email"] = user_id # Keep original for Langfuse internal tracking
876
+ if organization_id:
877
+ metadata["organization_id"] = organization_id
878
+ if model_id:
879
+ metadata["model"] = model_id
880
+
881
+ return metadata
882
+
883
+
884
+ class LiteLLMProxyApp:
885
+ """FastAPI application for LiteLLM proxy with metadata injection."""
886
+
887
+ def __init__(self, litellm_base_url: str, litellm_api_key: str):
888
+ """
889
+ Initialize the proxy application.
890
+
891
+ Args:
892
+ litellm_base_url: Base URL of the real LiteLLM proxy
893
+ litellm_api_key: API key for LiteLLM proxy
894
+ """
895
+ self.litellm_base_url = litellm_base_url.rstrip('/')
896
+ self.litellm_api_key = litellm_api_key
897
+ self.client = None # Will be lazily initialized per request
898
+ self._client_lock = None # Asyncio lock for thread-safe client creation
899
+
900
+ # Create FastAPI app WITHOUT lifespan
901
+ # Reason: httpx clients must be created in the same event loop where they're used
902
+ # When uvicorn runs in a background thread, it has its own event loop
903
+ # Creating the client in a different loop causes ConnectError
904
+ self.app = FastAPI(
905
+ title="Claude Code LiteLLM Proxy",
906
+ description="Local proxy to inject Langfuse metadata for Claude Code SDK",
907
+ )
908
+
909
+ # Register routes
910
+ self._register_routes()
911
+
912
+ def _register_routes(self):
913
+ """Register all proxy routes."""
914
+
915
+ @self.app.get("/health")
916
+ async def health_check():
917
+ """Health check endpoint."""
918
+ return {"status": "healthy", "service": "claude-code-litellm-proxy"}
919
+
920
+ @self.app.post("/v1/messages")
921
+ async def proxy_messages(request: Request):
922
+ """
923
+ Proxy endpoint for Anthropic Messages API format.
924
+
925
+ This is the main endpoint used by Claude Code SDK.
926
+ We keep the Anthropic format by forwarding to /v1/messages.
927
+ """
928
+ # Keep Anthropic format - forward to /v1/messages
929
+ return await self._proxy_request(request, "/v1/messages")
930
+
931
+ @self.app.post("/v1/chat/completions")
932
+ async def proxy_chat_completions(request: Request):
933
+ """
934
+ Proxy endpoint for OpenAI Chat Completions API format.
935
+
936
+ Fallback for OpenAI-format requests.
937
+ """
938
+ return await self._proxy_request(request, "/v1/chat/completions")
939
+
940
+ async def _get_client(self) -> httpx.AsyncClient:
941
+ """
942
+ Get or create the httpx client in the current event loop.
943
+
944
+ This ensures the client is created in the same event loop where it will be used,
945
+ avoiding ConnectError when uvicorn runs in a background thread.
946
+
947
+ Returns:
948
+ httpx.AsyncClient instance
949
+ """
950
+ if self.client is None:
951
+ # Initialize lock if needed (must be done in async context)
952
+ if self._client_lock is None:
953
+ self._client_lock = asyncio.Lock()
954
+
955
+ async with self._client_lock:
956
+ # Double-check after acquiring lock
957
+ if self.client is None:
958
+ logger.info(
959
+ "initializing_httpx_client_in_current_event_loop",
960
+ litellm_base_url=self.litellm_base_url,
961
+ )
962
+ # Create client with explicit settings for reliability
963
+ # VERY long timeouts to handle long-running streaming LLM operations
964
+ # For streaming workflows, the read timeout needs to be very generous
965
+ # since the connection may be open for hours while streaming responses
966
+ self.client = httpx.AsyncClient(
967
+ timeout=httpx.Timeout(
968
+ connect=30.0, # Connection timeout (reasonable for initial connection)
969
+ read=86400.0, # Read timeout (24 hours for long streaming operations)
970
+ write=300.0, # Write timeout (5 minutes for large payloads)
971
+ pool=300.0, # Pool timeout (5 minutes to avoid pool exhaustion)
972
+ ),
973
+ limits=httpx.Limits(
974
+ max_keepalive_connections=50, # Increased for better reuse
975
+ max_connections=200, # Increased for high concurrency
976
+ ),
977
+ follow_redirects=True,
978
+ )
979
+ return self.client
980
+
981
+ async def cleanup(self):
982
+ """Clean up HTTP client resources."""
983
+ if self.client is not None:
984
+ try:
985
+ await self.client.aclose()
986
+ logger.info("httpx_client_closed")
987
+ self.client = None
988
+ except Exception as e:
989
+ logger.error(
990
+ "httpx_client_close_failed",
991
+ error=str(e),
992
+ error_type=type(e).__name__
993
+ )
994
+
995
+ async def _proxy_request(self, request: Request, path: str) -> Response:
996
+ """
997
+ Proxy a request to the real LiteLLM proxy with metadata injection.
998
+
999
+ Args:
1000
+ request: Incoming FastAPI request
1001
+ path: API path to forward to
1002
+
1003
+ Returns:
1004
+ Response from LiteLLM proxy
1005
+ """
1006
+ # Get or create client in current event loop
1007
+ client = await self._get_client()
1008
+
1009
+ try:
1010
+ # Parse request body
1011
+ body = await request.json()
1012
+
1013
+ # CRITICAL: Override model if KUBIYA_MODEL_OVERRIDE is set
1014
+ # This ensures the explicit model from CLI --model flag takes precedence
1015
+ model_override = os.environ.get("KUBIYA_MODEL_OVERRIDE")
1016
+ if model_override:
1017
+ original_model = body.get("model")
1018
+ body["model"] = model_override
1019
+ logger.info(
1020
+ "model_override_applied_in_proxy",
1021
+ original_model=original_model,
1022
+ overridden_model=model_override,
1023
+ path=path,
1024
+ note="CLI --model flag or KUBIYA_MODEL env var is active"
1025
+ )
1026
+
1027
+ # Model validation: Only validate when using a LOCAL/CUSTOM LiteLLM proxy
1028
+ # This prevents "Invalid model name" errors when the configured model doesn't exist
1029
+ # on a local proxy. Skip validation for the default Kubiya proxy which supports all models.
1030
+ #
1031
+ # Validation is enabled when ANY of these conditions are true:
1032
+ # - KUBIYA_ENABLE_LOCAL_PROXY is set (using local LiteLLM proxy)
1033
+ # - LITELLM_API_BASE is set to a non-default URL (custom proxy)
1034
+ # - KUBIYA_FORCE_MODEL_VALIDATION is set (explicit opt-in)
1035
+ default_proxy_url = "https://llm-proxy.kubiya.ai"
1036
+ is_local_proxy = os.environ.get("KUBIYA_ENABLE_LOCAL_PROXY", "").lower() in ("true", "1", "yes")
1037
+ is_custom_proxy = self.litellm_base_url and self.litellm_base_url.rstrip('/') != default_proxy_url
1038
+ force_validation = os.environ.get("KUBIYA_FORCE_MODEL_VALIDATION", "").lower() in ("true", "1", "yes")
1039
+ should_validate_model = is_local_proxy or is_custom_proxy or force_validation
1040
+
1041
+ requested_model = body.get("model")
1042
+ if requested_model and should_validate_model:
1043
+ # Fetch available models from upstream (uses cache)
1044
+ available_models = await fetch_available_models(
1045
+ self.litellm_base_url,
1046
+ self.litellm_api_key,
1047
+ )
1048
+
1049
+ if available_models:
1050
+ resolved_model, used_fallback = validate_and_resolve_model(
1051
+ requested_model,
1052
+ available_models,
1053
+ )
1054
+
1055
+ if used_fallback:
1056
+ body["model"] = resolved_model
1057
+ logger.warning(
1058
+ "model_resolved_with_fallback",
1059
+ original_model=requested_model,
1060
+ resolved_model=resolved_model,
1061
+ available_models_count=len(available_models),
1062
+ path=path,
1063
+ note="Original model not available, using fallback"
1064
+ )
1065
+ else:
1066
+ logger.warning(
1067
+ "skipping_model_validation",
1068
+ model=requested_model,
1069
+ path=path,
1070
+ note="Could not fetch available models from upstream, proceeding without validation"
1071
+ )
1072
+ elif requested_model and not should_validate_model:
1073
+ logger.debug(
1074
+ "model_validation_skipped",
1075
+ model=requested_model,
1076
+ litellm_base_url=self.litellm_base_url,
1077
+ note="Using default Kubiya proxy, model validation not needed"
1078
+ )
1079
+
1080
+ # Extract execution_id from custom header, or use current execution
1081
+ execution_id = request.headers.get("X-Execution-ID")
1082
+
1083
+ if not execution_id:
1084
+ # Try to get current execution ID
1085
+ execution_id = _context_store.get_current_execution_id()
1086
+
1087
+ if not execution_id:
1088
+ # Fallback: try to get any valid execution context
1089
+ # This handles sub-agent requests when _current_execution was overwritten
1090
+ execution_id = _context_store.get_any_valid_execution_id()
1091
+ if execution_id:
1092
+ logger.debug(
1093
+ "using_fallback_execution_id",
1094
+ execution_id=execution_id[:8] if execution_id else None,
1095
+ path=path,
1096
+ )
1097
+
1098
+ if not execution_id:
1099
+ # Still no execution_id - this is unexpected but not fatal
1100
+ # Log at debug level since this may happen during proxy startup/shutdown
1101
+ logger.debug(
1102
+ "no_execution_id_available",
1103
+ path=path,
1104
+ note="Cannot inject Langfuse metadata - no execution context found"
1105
+ )
1106
+
1107
+ if execution_id:
1108
+ # Get execution context and build metadata
1109
+ context = _context_store.get_context(execution_id)
1110
+
1111
+ if context:
1112
+ metadata = build_langfuse_metadata(context)
1113
+
1114
+ # For Anthropic format, we need to be more explicit with Langfuse fields
1115
+ # LiteLLM looks for specific fields in specific places
1116
+
1117
+ # 1. Set 'user' at top level (works with both formats)
1118
+ body["user"] = metadata.get("trace_user_id")
1119
+
1120
+ # 2. Initialize metadata dict
1121
+ if "metadata" not in body:
1122
+ body["metadata"] = {}
1123
+
1124
+ # 3. Put Langfuse fields with explicit naming that LiteLLM recognizes
1125
+ # Based on LiteLLM source, these specific keys are extracted for Langfuse
1126
+ body["metadata"]["generation_name"] = metadata.get("generation_name", "agent-chat")
1127
+ body["metadata"]["trace_name"] = metadata.get("trace_name", "agent-chat")
1128
+ body["metadata"]["trace_id"] = metadata.get("trace_id")
1129
+ body["metadata"]["session_id"] = metadata.get("session_id")
1130
+ body["metadata"]["trace_user_id"] = metadata.get("trace_user_id")
1131
+ body["metadata"]["user_id"] = metadata.get("trace_user_id")
1132
+
1133
+ # Additional context metadata
1134
+ body["metadata"]["agent_id"] = metadata.get("agent_id")
1135
+ body["metadata"]["agent_name"] = metadata.get("agent_name")
1136
+ body["metadata"]["organization_id"] = metadata.get("organization_id")
1137
+ body["metadata"]["user_email"] = metadata.get("user_email")
1138
+ body["metadata"]["model"] = metadata.get("model")
1139
+
1140
+ logger.debug(
1141
+ "metadata_injected_into_request",
1142
+ execution_id=execution_id[:8],
1143
+ path=path,
1144
+ user_field=body.get("user"),
1145
+ metadata_keys=list(metadata.keys()),
1146
+ trace_user_id=metadata.get("trace_user_id"),
1147
+ trace_id=metadata.get("trace_id"),
1148
+ session_id=metadata.get("session_id"),
1149
+ trace_name=metadata.get("trace_name"),
1150
+ )
1151
+ else:
1152
+ logger.warning(
1153
+ "no_context_found_for_execution",
1154
+ execution_id=execution_id[:8] if execution_id else "unknown",
1155
+ path=path,
1156
+ )
1157
+
1158
+ # Build forwarding URL (keep same endpoint - don't convert formats)
1159
+ forward_url = f"{self.litellm_base_url}{path}"
1160
+
1161
+ # Prepare headers
1162
+ headers = {
1163
+ "Authorization": f"Bearer {self.litellm_api_key}",
1164
+ "Content-Type": "application/json",
1165
+ }
1166
+
1167
+ # Add Langfuse metadata as custom headers (LiteLLM recognizes these)
1168
+ # Can be disabled via KUBIYA_DISABLE_LANGFUSE_HEADERS=true for local proxies
1169
+ # that don't support Langfuse or have incompatible versions
1170
+ langfuse_headers_enabled = os.environ.get("KUBIYA_DISABLE_LANGFUSE_HEADERS", "").lower() not in ("true", "1", "yes")
1171
+
1172
+ if execution_id and langfuse_headers_enabled:
1173
+ context = _context_store.get_context(execution_id)
1174
+ if context:
1175
+ metadata = build_langfuse_metadata(context)
1176
+
1177
+ # LiteLLM extracts Langfuse fields from these custom headers
1178
+ # IMPORTANT: Header values MUST be str, never None
1179
+ # Use `or ""` to handle both missing keys AND keys with None values
1180
+ headers["X-Langfuse-Trace-Id"] = metadata.get("trace_id") or ""
1181
+ headers["X-Langfuse-Session-Id"] = metadata.get("session_id") or ""
1182
+ headers["X-Langfuse-User-Id"] = metadata.get("trace_user_id") or ""
1183
+ headers["X-Langfuse-Trace-Name"] = metadata.get("trace_name") or "agent-chat"
1184
+
1185
+ # Additional metadata as JSON in custom header
1186
+ extra_metadata = {
1187
+ "agent_id": metadata.get("agent_id"),
1188
+ "agent_name": metadata.get("agent_name"),
1189
+ "organization_id": metadata.get("organization_id"),
1190
+ "user_email": metadata.get("user_email"),
1191
+ }
1192
+ headers["X-Langfuse-Metadata"] = json.dumps(extra_metadata)
1193
+
1194
+ logger.debug(
1195
+ "langfuse_headers_added",
1196
+ execution_id=execution_id[:8],
1197
+ trace_id=metadata.get("trace_id", ""),
1198
+ session_id=metadata.get("session_id", ""),
1199
+ )
1200
+ elif not langfuse_headers_enabled:
1201
+ logger.debug(
1202
+ "langfuse_headers_disabled",
1203
+ note="KUBIYA_DISABLE_LANGFUSE_HEADERS is set, skipping Langfuse header injection"
1204
+ )
1205
+
1206
+ # Copy relevant headers from original request
1207
+ for header in ["X-Request-ID", "User-Agent"]:
1208
+ if header.lower() in request.headers:
1209
+ headers[header] = request.headers[header.lower()]
1210
+
1211
+ # Check if streaming is requested
1212
+ is_streaming = body.get("stream", False)
1213
+
1214
+ if is_streaming:
1215
+ # Handle streaming response
1216
+ logger.info(
1217
+ "starting_streaming_request",
1218
+ url=forward_url,
1219
+ model=body.get("model", "unknown"),
1220
+ execution_id=execution_id[:8] if execution_id else "unknown",
1221
+ )
1222
+ return await self._proxy_streaming_request(client, forward_url, body, headers)
1223
+ else:
1224
+ # Handle non-streaming response
1225
+ response = await client.post(
1226
+ forward_url,
1227
+ json=body,
1228
+ headers=headers,
1229
+ )
1230
+
1231
+ logger.debug(
1232
+ "litellm_request_completed",
1233
+ status_code=response.status_code,
1234
+ path=path,
1235
+ execution_id=execution_id[:8] if execution_id else None,
1236
+ )
1237
+
1238
+ return Response(
1239
+ content=response.content,
1240
+ status_code=response.status_code,
1241
+ headers=dict(response.headers),
1242
+ )
1243
+
1244
+ except httpx.ConnectError as e:
1245
+ logger.error(
1246
+ "litellm_proxy_connection_error",
1247
+ error=str(e),
1248
+ error_type=type(e).__name__,
1249
+ path=path,
1250
+ forward_url=forward_url,
1251
+ litellm_base_url=self.litellm_base_url,
1252
+ message="Failed to connect to LiteLLM proxy - check network connectivity and URL",
1253
+ )
1254
+ raise HTTPException(
1255
+ status_code=502,
1256
+ detail=f"Failed to connect to LiteLLM proxy at {self.litellm_base_url}: {str(e)}"
1257
+ )
1258
+
1259
+ except httpx.HTTPError as e:
1260
+ logger.error(
1261
+ "litellm_proxy_http_error",
1262
+ error=str(e),
1263
+ error_type=type(e).__name__,
1264
+ path=path,
1265
+ forward_url=forward_url,
1266
+ )
1267
+ raise HTTPException(status_code=502, detail=f"Proxy error: {str(e)}")
1268
+
1269
+ except Exception as e:
1270
+ logger.error(
1271
+ "litellm_proxy_error",
1272
+ error=str(e),
1273
+ error_type=type(e).__name__,
1274
+ path=path,
1275
+ exc_info=True,
1276
+ )
1277
+ raise HTTPException(status_code=500, detail=f"Internal proxy error: {str(e)}")
1278
+
1279
+ async def _proxy_streaming_request(
1280
+ self, client: httpx.AsyncClient, url: str, body: Dict[str, Any], headers: Dict[str, str]
1281
+ ) -> StreamingResponse:
1282
+ """
1283
+ Proxy a streaming request to LiteLLM with robust error handling.
1284
+
1285
+ Args:
1286
+ client: httpx AsyncClient instance
1287
+ url: Forward URL
1288
+ body: Request body
1289
+ headers: Request headers
1290
+
1291
+ Returns:
1292
+ StreamingResponse that forwards chunks from LiteLLM
1293
+
1294
+ Raises:
1295
+ HTTPException: On connection or streaming errors
1296
+ """
1297
+ async def stream_generator():
1298
+ """Generator that yields chunks from LiteLLM with error handling."""
1299
+ try:
1300
+ # Use VERY long timeout for streaming to ensure long operations work
1301
+ # Streaming responses can take hours for complex workflows
1302
+ stream_timeout = httpx.Timeout(
1303
+ connect=30.0, # Connection timeout (reasonable for initial connection)
1304
+ read=86400.0, # Read timeout (24 hours for long streaming operations)
1305
+ write=300.0, # Write timeout (5 minutes for large payloads)
1306
+ pool=300.0, # Pool timeout (5 minutes to avoid pool exhaustion)
1307
+ )
1308
+ async with client.stream(
1309
+ "POST",
1310
+ url,
1311
+ json=body,
1312
+ headers=headers,
1313
+ timeout=stream_timeout,
1314
+ ) as response:
1315
+ # Check for HTTP errors before streaming
1316
+ if response.status_code >= 400:
1317
+ error_text = await response.aread()
1318
+ logger.error(
1319
+ "litellm_streaming_http_error",
1320
+ status_code=response.status_code,
1321
+ error=error_text.decode('utf-8', errors='ignore')[:500],
1322
+ url=url,
1323
+ )
1324
+ # Yield error message as SSE event
1325
+ error_msg = f"data: {{\"error\": \"HTTP {response.status_code}: {error_text.decode('utf-8', errors='ignore')[:200]}\"}}\n\n"
1326
+ yield error_msg.encode('utf-8')
1327
+ return
1328
+
1329
+ # Stream chunks
1330
+ async for chunk in response.aiter_bytes():
1331
+ yield chunk
1332
+
1333
+ except httpx.ConnectError as e:
1334
+ logger.error(
1335
+ "litellm_streaming_connection_error",
1336
+ error=str(e),
1337
+ url=url,
1338
+ message="Failed to connect to LiteLLM proxy during streaming",
1339
+ )
1340
+ # Yield error as SSE event instead of crashing
1341
+ error_msg = f"data: {{\"error\": \"Connection failed: {str(e)}\"}}\n\n"
1342
+ yield error_msg.encode('utf-8')
1343
+
1344
+ except httpx.TimeoutException as e:
1345
+ # Capture detailed timeout info
1346
+ error_detail = str(e) or repr(e) or "No error details available"
1347
+ logger.error(
1348
+ "litellm_streaming_timeout",
1349
+ error=error_detail,
1350
+ error_type=type(e).__name__,
1351
+ error_args=getattr(e, 'args', []),
1352
+ url=url,
1353
+ model=body.get("model", "unknown"),
1354
+ message="Request timed out during streaming",
1355
+ note="Check network connectivity to LLM proxy or increase timeouts"
1356
+ )
1357
+ error_msg = f"data: {{\"error\": \"Request timed out ({type(e).__name__}): {error_detail}\"}}\n\n"
1358
+ yield error_msg.encode('utf-8')
1359
+
1360
+ except httpx.HTTPError as e:
1361
+ logger.error(
1362
+ "litellm_streaming_http_error_general",
1363
+ error=str(e),
1364
+ error_type=type(e).__name__,
1365
+ url=url,
1366
+ )
1367
+ error_msg = f"data: {{\"error\": \"HTTP error: {str(e)}\"}}\n\n"
1368
+ yield error_msg.encode('utf-8')
1369
+
1370
+ except Exception as e:
1371
+ logger.error(
1372
+ "litellm_streaming_unexpected_error",
1373
+ error=str(e),
1374
+ error_type=type(e).__name__,
1375
+ url=url,
1376
+ exc_info=True,
1377
+ )
1378
+ error_msg = f"data: {{\"error\": \"Unexpected error: {str(e)}\"}}\n\n"
1379
+ yield error_msg.encode('utf-8')
1380
+
1381
+ return StreamingResponse(
1382
+ stream_generator(),
1383
+ media_type="text/event-stream",
1384
+ )
1385
+
1386
+
1387
+ class LiteLLMProxyServer:
1388
+ """Manager for running the LiteLLM proxy server in the same process."""
1389
+
1390
+ def __init__(self, port: int = 0):
1391
+ """
1392
+ Initialize the proxy server.
1393
+
1394
+ Args:
1395
+ port: Port to listen on (0 = auto-assign random port)
1396
+ """
1397
+ self.port = port
1398
+ self.actual_port: Optional[int] = None
1399
+ self.server_thread: Optional[threading.Thread] = None
1400
+ self.app: Optional[LiteLLMProxyApp] = None
1401
+ self._started = threading.Event()
1402
+ self._shutdown = threading.Event()
1403
+
1404
+ def start(self) -> int:
1405
+ """
1406
+ Start the proxy server in a background thread.
1407
+
1408
+ Returns:
1409
+ The actual port the server is listening on
1410
+
1411
+ Raises:
1412
+ RuntimeError: If server fails to start
1413
+ """
1414
+ # Get LiteLLM configuration
1415
+ litellm_base_url = os.getenv("LITELLM_API_BASE", "https://llm-proxy.kubiya.ai")
1416
+ litellm_api_key = os.getenv("LITELLM_API_KEY")
1417
+
1418
+ # Check for model override
1419
+ model_override = os.getenv("KUBIYA_MODEL_OVERRIDE")
1420
+
1421
+ logger.info(
1422
+ "litellm_proxy_server_initializing",
1423
+ litellm_base_url=litellm_base_url,
1424
+ model_override=model_override,
1425
+ has_model_override=bool(model_override),
1426
+ note="Model override will be applied to ALL requests" if model_override else "No model override active"
1427
+ )
1428
+
1429
+ if not litellm_api_key:
1430
+ raise RuntimeError("LITELLM_API_KEY not set")
1431
+
1432
+ # Create proxy app
1433
+ self.app = LiteLLMProxyApp(litellm_base_url, litellm_api_key)
1434
+
1435
+ # Auto-assign port if needed
1436
+ if self.port == 0:
1437
+ import socket
1438
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
1439
+ s.bind(('127.0.0.1', 0))
1440
+ s.listen(1)
1441
+ self.actual_port = s.getsockname()[1]
1442
+ else:
1443
+ self.actual_port = self.port
1444
+
1445
+ # Start server in background thread
1446
+ self.server_thread = threading.Thread(
1447
+ target=self._run_server,
1448
+ daemon=True,
1449
+ name="LiteLLMProxyServer"
1450
+ )
1451
+ self.server_thread.start()
1452
+
1453
+ # Wait for server to become ready by checking health endpoint
1454
+ import time
1455
+ import httpx
1456
+ max_wait = 10 # seconds
1457
+ start_time = time.time()
1458
+
1459
+ while time.time() - start_time < max_wait:
1460
+ try:
1461
+ # Try to connect to health endpoint
1462
+ with httpx.Client(timeout=1.0) as client:
1463
+ response = client.get(f"http://127.0.0.1:{self.actual_port}/health")
1464
+ if response.status_code == 200:
1465
+ self._started.set()
1466
+ logger.info(
1467
+ "litellm_proxy_server_started",
1468
+ port=self.actual_port,
1469
+ url=f"http://127.0.0.1:{self.actual_port}",
1470
+ )
1471
+ return self.actual_port
1472
+ except Exception:
1473
+ # Server not ready yet, wait and retry
1474
+ time.sleep(0.1)
1475
+ continue
1476
+
1477
+ # Timeout waiting for server
1478
+ raise RuntimeError("LiteLLM proxy server failed to start within 10 seconds")
1479
+
1480
+ def _run_server(self):
1481
+ """Run the uvicorn server (called in background thread)."""
1482
+ try:
1483
+ # Create event loop for this thread
1484
+ loop = asyncio.new_event_loop()
1485
+ asyncio.set_event_loop(loop)
1486
+
1487
+ # Create uvicorn config
1488
+ config = uvicorn.Config(
1489
+ self.app.app,
1490
+ host="127.0.0.1",
1491
+ port=self.actual_port,
1492
+ log_level="error",
1493
+ access_log=False,
1494
+ loop=loop,
1495
+ )
1496
+ server = uvicorn.Server(config)
1497
+
1498
+ # Run server
1499
+ loop.run_until_complete(server.serve())
1500
+
1501
+ except Exception as e:
1502
+ logger.error(
1503
+ "litellm_proxy_server_error",
1504
+ error=str(e),
1505
+ error_type=type(e).__name__,
1506
+ exc_info=True,
1507
+ )
1508
+ finally:
1509
+ # Cleanup HTTP client
1510
+ if self.app and self.app.client:
1511
+ try:
1512
+ loop.run_until_complete(self.app.cleanup())
1513
+ except Exception as cleanup_error:
1514
+ logger.error(
1515
+ "proxy_app_cleanup_failed",
1516
+ error=str(cleanup_error)
1517
+ )
1518
+
1519
+ # Close event loop
1520
+ try:
1521
+ loop.close()
1522
+ except Exception as loop_error:
1523
+ logger.error("event_loop_close_failed", error=str(loop_error))
1524
+
1525
+ self._shutdown.set()
1526
+
1527
+ def stop(self):
1528
+ """Stop the proxy server and cleanup resources."""
1529
+ logger.info("stopping_litellm_proxy_server")
1530
+ self._shutdown.set()
1531
+
1532
+ # Give server time to shutdown gracefully
1533
+ if self.server_thread:
1534
+ self.server_thread.join(timeout=10)
1535
+
1536
+ if self.server_thread.is_alive():
1537
+ logger.warning(
1538
+ "proxy_server_thread_still_alive",
1539
+ note="Daemon thread will be terminated by Python at exit"
1540
+ )
1541
+ else:
1542
+ logger.info("proxy_server_thread_stopped")
1543
+
1544
+ logger.info("litellm_proxy_server_stopped")
1545
+
1546
+ def get_base_url(self) -> str:
1547
+ """Get the base URL of the proxy server."""
1548
+ if not self.actual_port:
1549
+ raise RuntimeError("Server not started")
1550
+ return f"http://127.0.0.1:{self.actual_port}"
1551
+
1552
+
1553
+ # Singleton instance
1554
+ _proxy_server: Optional[LiteLLMProxyServer] = None
1555
+ _proxy_lock = threading.Lock()
1556
+
1557
+
1558
+ def get_proxy_server() -> LiteLLMProxyServer:
1559
+ """
1560
+ Get or create the singleton proxy server instance.
1561
+
1562
+ Returns:
1563
+ LiteLLMProxyServer instance
1564
+ """
1565
+ global _proxy_server
1566
+
1567
+ with _proxy_lock:
1568
+ if _proxy_server is None:
1569
+ _proxy_server = LiteLLMProxyServer(port=0) # Auto-assign port
1570
+ _proxy_server.start()
1571
+
1572
+ return _proxy_server
1573
+
1574
+
1575
+ def set_execution_context(execution_id: str, context: Dict[str, Any]):
1576
+ """
1577
+ Store execution context for metadata injection.
1578
+
1579
+ Call this before starting a Claude Code execution.
1580
+
1581
+ Args:
1582
+ execution_id: Execution ID
1583
+ context: Context dict with user_id, session_id, agent_id, etc.
1584
+ """
1585
+ _context_store.set_context(execution_id, context)
1586
+
1587
+
1588
+ def clear_execution_context(
1589
+ execution_id: str,
1590
+ immediate: bool = False,
1591
+ delay_seconds: float = 5.0
1592
+ ):
1593
+ """
1594
+ Clear execution context after execution completes.
1595
+
1596
+ Args:
1597
+ execution_id: Execution ID
1598
+ immediate: If True, clear immediately. If False, schedule delayed cleanup.
1599
+ delay_seconds: Delay before cleanup (only if immediate=False)
1600
+ """
1601
+ if immediate:
1602
+ _context_store.clear_context(execution_id)
1603
+ else:
1604
+ _cleanup_scheduler.schedule_cleanup(
1605
+ execution_id,
1606
+ delay_seconds,
1607
+ _context_store
1608
+ )
1609
+
1610
+
1611
+ def get_proxy_base_url() -> str:
1612
+ """
1613
+ Get the base URL of the local proxy server.
1614
+
1615
+ Starts the server if not already running.
1616
+
1617
+ Returns:
1618
+ Base URL (e.g., "http://127.0.0.1:8080")
1619
+ """
1620
+ server = get_proxy_server()
1621
+ return server.get_base_url()
1622
+
1623
+
1624
+ def list_available_models_sync(timeout: float = 10.0) -> List[str]:
1625
+ """
1626
+ Synchronously fetch and return available models from upstream LiteLLM proxy.
1627
+
1628
+ This is useful for CLI/debugging to show what models are available.
1629
+
1630
+ Args:
1631
+ timeout: Request timeout in seconds
1632
+
1633
+ Returns:
1634
+ List of available model IDs
1635
+ """
1636
+ litellm_base_url = os.getenv("LITELLM_API_BASE", "https://llm-proxy.kubiya.ai")
1637
+ litellm_api_key = os.getenv("LITELLM_API_KEY")
1638
+
1639
+ if not litellm_api_key:
1640
+ logger.warning("cannot_list_models_no_api_key")
1641
+ return []
1642
+
1643
+ try:
1644
+ with httpx.Client(timeout=timeout) as client:
1645
+ response = client.get(
1646
+ f"{litellm_base_url.rstrip('/')}/v1/models",
1647
+ headers={"Authorization": f"Bearer {litellm_api_key}"},
1648
+ )
1649
+
1650
+ if response.status_code == 200:
1651
+ data = response.json()
1652
+ models = []
1653
+ if "data" in data and isinstance(data["data"], list):
1654
+ models = [m.get("id") for m in data["data"] if m.get("id")]
1655
+
1656
+ logger.info(
1657
+ "listed_available_models_sync",
1658
+ model_count=len(models),
1659
+ models=models,
1660
+ litellm_base_url=litellm_base_url,
1661
+ )
1662
+ return models
1663
+ else:
1664
+ logger.warning(
1665
+ "failed_to_list_models_sync",
1666
+ status_code=response.status_code,
1667
+ litellm_base_url=litellm_base_url,
1668
+ )
1669
+ return []
1670
+
1671
+ except Exception as e:
1672
+ logger.warning(
1673
+ "error_listing_models_sync",
1674
+ error=str(e),
1675
+ error_type=type(e).__name__,
1676
+ litellm_base_url=litellm_base_url,
1677
+ )
1678
+ return []
1679
+
1680
+
1681
+ def print_available_models():
1682
+ """
1683
+ Print available models to stdout for debugging.
1684
+
1685
+ Useful for CLI troubleshooting when model errors occur.
1686
+ """
1687
+ models = list_available_models_sync()
1688
+ litellm_base_url = os.getenv("LITELLM_API_BASE", "https://llm-proxy.kubiya.ai")
1689
+
1690
+ print(f"\n{'='*60}")
1691
+ print(f"Available Models from LiteLLM Proxy")
1692
+ print(f"Proxy URL: {litellm_base_url}")
1693
+ print(f"{'='*60}")
1694
+
1695
+ if models:
1696
+ for i, model in enumerate(models, 1):
1697
+ print(f" {i}. {model}")
1698
+ else:
1699
+ print(" No models available or failed to fetch models.")
1700
+ print(" Check LITELLM_API_BASE and LITELLM_API_KEY environment variables.")
1701
+
1702
+ print(f"{'='*60}\n")