kubiya-control-plane-api 0.9.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- control_plane_api/LICENSE +676 -0
- control_plane_api/README.md +350 -0
- control_plane_api/__init__.py +4 -0
- control_plane_api/__version__.py +8 -0
- control_plane_api/alembic/README +1 -0
- control_plane_api/alembic/env.py +121 -0
- control_plane_api/alembic/script.py.mako +28 -0
- control_plane_api/alembic/versions/2613c65c3dbe_initial_database_setup.py +32 -0
- control_plane_api/alembic/versions/2df520d4927d_merge_heads.py +28 -0
- control_plane_api/alembic/versions/43abf98d6a01_add_paused_status_to_executions.py +73 -0
- control_plane_api/alembic/versions/6289854264cb_merge_multiple_heads.py +28 -0
- control_plane_api/alembic/versions/6a4d4dc3d8dc_generate_execution_transitions.py +50 -0
- control_plane_api/alembic/versions/87d11cf0a783_add_disconnected_status_to_worker_.py +44 -0
- control_plane_api/alembic/versions/add_ephemeral_queue_support.py +85 -0
- control_plane_api/alembic/versions/add_model_type_to_llm_models.py +31 -0
- control_plane_api/alembic/versions/add_plan_executions_table.py +114 -0
- control_plane_api/alembic/versions/add_trace_span_tables.py +154 -0
- control_plane_api/alembic/versions/add_user_info_to_traces.py +36 -0
- control_plane_api/alembic/versions/adjusting_foreign_keys.py +32 -0
- control_plane_api/alembic/versions/b4983d976db2_initial_tables.py +1128 -0
- control_plane_api/alembic/versions/d181a3b40e71_rename_custom_metadata_to_metadata_in_.py +50 -0
- control_plane_api/alembic/versions/df9117888e82_add_missing_columns.py +82 -0
- control_plane_api/alembic/versions/f25de6ad895a_missing_migrations.py +34 -0
- control_plane_api/alembic/versions/f71305fb69b9_fix_ephemeral_queue_deletion_foreign_key.py +54 -0
- control_plane_api/alembic/versions/mark_local_exec_queues_as_ephemeral.py +68 -0
- control_plane_api/alembic.ini +148 -0
- control_plane_api/api/index.py +12 -0
- control_plane_api/app/__init__.py +11 -0
- control_plane_api/app/activities/__init__.py +20 -0
- control_plane_api/app/activities/agent_activities.py +384 -0
- control_plane_api/app/activities/plan_generation_activities.py +499 -0
- control_plane_api/app/activities/team_activities.py +424 -0
- control_plane_api/app/activities/temporal_cloud_activities.py +588 -0
- control_plane_api/app/config/__init__.py +35 -0
- control_plane_api/app/config/api_config.py +469 -0
- control_plane_api/app/config/config_loader.py +224 -0
- control_plane_api/app/config/model_pricing.py +323 -0
- control_plane_api/app/config/storage_config.py +159 -0
- control_plane_api/app/config.py +115 -0
- control_plane_api/app/controllers/__init__.py +0 -0
- control_plane_api/app/controllers/execution_environment_controller.py +1315 -0
- control_plane_api/app/database.py +135 -0
- control_plane_api/app/exceptions.py +408 -0
- control_plane_api/app/lib/__init__.py +11 -0
- control_plane_api/app/lib/environment.py +65 -0
- control_plane_api/app/lib/event_bus/__init__.py +17 -0
- control_plane_api/app/lib/event_bus/base.py +136 -0
- control_plane_api/app/lib/event_bus/manager.py +335 -0
- control_plane_api/app/lib/event_bus/providers/__init__.py +6 -0
- control_plane_api/app/lib/event_bus/providers/http_provider.py +166 -0
- control_plane_api/app/lib/event_bus/providers/nats_provider.py +324 -0
- control_plane_api/app/lib/event_bus/providers/redis_provider.py +233 -0
- control_plane_api/app/lib/event_bus/providers/websocket_provider.py +497 -0
- control_plane_api/app/lib/job_executor.py +330 -0
- control_plane_api/app/lib/kubiya_client.py +293 -0
- control_plane_api/app/lib/litellm_pricing.py +166 -0
- control_plane_api/app/lib/mcp_validation.py +163 -0
- control_plane_api/app/lib/nats/__init__.py +13 -0
- control_plane_api/app/lib/nats/credentials_manager.py +288 -0
- control_plane_api/app/lib/nats/listener.py +374 -0
- control_plane_api/app/lib/planning_prompt_builder.py +153 -0
- control_plane_api/app/lib/planning_tools/__init__.py +41 -0
- control_plane_api/app/lib/planning_tools/agents.py +409 -0
- control_plane_api/app/lib/planning_tools/agno_toolkit.py +836 -0
- control_plane_api/app/lib/planning_tools/base.py +119 -0
- control_plane_api/app/lib/planning_tools/cognitive_memory_tools.py +403 -0
- control_plane_api/app/lib/planning_tools/context_graph_tools.py +545 -0
- control_plane_api/app/lib/planning_tools/environments.py +218 -0
- control_plane_api/app/lib/planning_tools/knowledge.py +204 -0
- control_plane_api/app/lib/planning_tools/models.py +93 -0
- control_plane_api/app/lib/planning_tools/planning_service.py +646 -0
- control_plane_api/app/lib/planning_tools/resources.py +242 -0
- control_plane_api/app/lib/planning_tools/teams.py +334 -0
- control_plane_api/app/lib/policy_enforcer_client.py +1016 -0
- control_plane_api/app/lib/redis_client.py +803 -0
- control_plane_api/app/lib/sqlalchemy_utils.py +486 -0
- control_plane_api/app/lib/state_transition_tools/__init__.py +7 -0
- control_plane_api/app/lib/state_transition_tools/execution_context.py +388 -0
- control_plane_api/app/lib/storage/__init__.py +20 -0
- control_plane_api/app/lib/storage/base_provider.py +274 -0
- control_plane_api/app/lib/storage/provider_factory.py +157 -0
- control_plane_api/app/lib/storage/vercel_blob_provider.py +468 -0
- control_plane_api/app/lib/supabase.py +71 -0
- control_plane_api/app/lib/supabase_utils.py +138 -0
- control_plane_api/app/lib/task_planning/__init__.py +138 -0
- control_plane_api/app/lib/task_planning/agent_factory.py +308 -0
- control_plane_api/app/lib/task_planning/agents.py +389 -0
- control_plane_api/app/lib/task_planning/cache.py +218 -0
- control_plane_api/app/lib/task_planning/entity_resolver.py +273 -0
- control_plane_api/app/lib/task_planning/helpers.py +293 -0
- control_plane_api/app/lib/task_planning/hooks.py +474 -0
- control_plane_api/app/lib/task_planning/models.py +503 -0
- control_plane_api/app/lib/task_planning/plan_validator.py +166 -0
- control_plane_api/app/lib/task_planning/planning_workflow.py +2911 -0
- control_plane_api/app/lib/task_planning/runner.py +656 -0
- control_plane_api/app/lib/task_planning/streaming_hook.py +213 -0
- control_plane_api/app/lib/task_planning/workflow.py +424 -0
- control_plane_api/app/lib/templating/__init__.py +88 -0
- control_plane_api/app/lib/templating/compiler.py +278 -0
- control_plane_api/app/lib/templating/engine.py +178 -0
- control_plane_api/app/lib/templating/parsers/__init__.py +29 -0
- control_plane_api/app/lib/templating/parsers/base.py +96 -0
- control_plane_api/app/lib/templating/parsers/env.py +85 -0
- control_plane_api/app/lib/templating/parsers/graph.py +112 -0
- control_plane_api/app/lib/templating/parsers/secret.py +87 -0
- control_plane_api/app/lib/templating/parsers/simple.py +81 -0
- control_plane_api/app/lib/templating/resolver.py +366 -0
- control_plane_api/app/lib/templating/types.py +214 -0
- control_plane_api/app/lib/templating/validator.py +201 -0
- control_plane_api/app/lib/temporal_client.py +232 -0
- control_plane_api/app/lib/temporal_credentials_cache.py +178 -0
- control_plane_api/app/lib/temporal_credentials_service.py +203 -0
- control_plane_api/app/lib/validation/__init__.py +24 -0
- control_plane_api/app/lib/validation/runtime_validation.py +388 -0
- control_plane_api/app/main.py +531 -0
- control_plane_api/app/middleware/__init__.py +10 -0
- control_plane_api/app/middleware/auth.py +645 -0
- control_plane_api/app/middleware/exception_handler.py +267 -0
- control_plane_api/app/middleware/prometheus_middleware.py +173 -0
- control_plane_api/app/middleware/rate_limiting.py +384 -0
- control_plane_api/app/middleware/request_id.py +202 -0
- control_plane_api/app/models/__init__.py +40 -0
- control_plane_api/app/models/agent.py +90 -0
- control_plane_api/app/models/analytics.py +206 -0
- control_plane_api/app/models/associations.py +107 -0
- control_plane_api/app/models/auth_user.py +73 -0
- control_plane_api/app/models/context.py +161 -0
- control_plane_api/app/models/custom_integration.py +99 -0
- control_plane_api/app/models/environment.py +64 -0
- control_plane_api/app/models/execution.py +125 -0
- control_plane_api/app/models/execution_transition.py +50 -0
- control_plane_api/app/models/job.py +159 -0
- control_plane_api/app/models/llm_model.py +78 -0
- control_plane_api/app/models/orchestration.py +66 -0
- control_plane_api/app/models/plan_execution.py +102 -0
- control_plane_api/app/models/presence.py +49 -0
- control_plane_api/app/models/project.py +61 -0
- control_plane_api/app/models/project_management.py +85 -0
- control_plane_api/app/models/session.py +29 -0
- control_plane_api/app/models/skill.py +155 -0
- control_plane_api/app/models/system_tables.py +43 -0
- control_plane_api/app/models/task_planning.py +372 -0
- control_plane_api/app/models/team.py +86 -0
- control_plane_api/app/models/trace.py +257 -0
- control_plane_api/app/models/user_profile.py +54 -0
- control_plane_api/app/models/worker.py +221 -0
- control_plane_api/app/models/workflow.py +161 -0
- control_plane_api/app/models/workspace.py +50 -0
- control_plane_api/app/observability/__init__.py +177 -0
- control_plane_api/app/observability/context_logging.py +475 -0
- control_plane_api/app/observability/decorators.py +337 -0
- control_plane_api/app/observability/local_span_processor.py +702 -0
- control_plane_api/app/observability/metrics.py +303 -0
- control_plane_api/app/observability/middleware.py +246 -0
- control_plane_api/app/observability/optional.py +115 -0
- control_plane_api/app/observability/tracing.py +382 -0
- control_plane_api/app/policies/README.md +149 -0
- control_plane_api/app/policies/approved_users.rego +62 -0
- control_plane_api/app/policies/business_hours.rego +51 -0
- control_plane_api/app/policies/rate_limiting.rego +100 -0
- control_plane_api/app/policies/tool_enforcement/README.md +336 -0
- control_plane_api/app/policies/tool_enforcement/bash_command_validation.rego +71 -0
- control_plane_api/app/policies/tool_enforcement/business_hours_enforcement.rego +82 -0
- control_plane_api/app/policies/tool_enforcement/mcp_tool_allowlist.rego +58 -0
- control_plane_api/app/policies/tool_enforcement/production_safeguards.rego +80 -0
- control_plane_api/app/policies/tool_enforcement/role_based_tool_access.rego +44 -0
- control_plane_api/app/policies/tool_restrictions.rego +86 -0
- control_plane_api/app/routers/__init__.py +4 -0
- control_plane_api/app/routers/agents.py +382 -0
- control_plane_api/app/routers/agents_v2.py +1598 -0
- control_plane_api/app/routers/analytics.py +1310 -0
- control_plane_api/app/routers/auth.py +59 -0
- control_plane_api/app/routers/client_config.py +57 -0
- control_plane_api/app/routers/context_graph.py +561 -0
- control_plane_api/app/routers/context_manager.py +577 -0
- control_plane_api/app/routers/custom_integrations.py +490 -0
- control_plane_api/app/routers/enforcer.py +132 -0
- control_plane_api/app/routers/environment_context.py +252 -0
- control_plane_api/app/routers/environments.py +761 -0
- control_plane_api/app/routers/execution_environment.py +847 -0
- control_plane_api/app/routers/executions/__init__.py +28 -0
- control_plane_api/app/routers/executions/router.py +286 -0
- control_plane_api/app/routers/executions/services/__init__.py +22 -0
- control_plane_api/app/routers/executions/services/demo_worker_health.py +156 -0
- control_plane_api/app/routers/executions/services/status_service.py +420 -0
- control_plane_api/app/routers/executions/services/test_worker_health.py +480 -0
- control_plane_api/app/routers/executions/services/worker_health.py +514 -0
- control_plane_api/app/routers/executions/streaming/__init__.py +22 -0
- control_plane_api/app/routers/executions/streaming/deduplication.py +352 -0
- control_plane_api/app/routers/executions/streaming/event_buffer.py +353 -0
- control_plane_api/app/routers/executions/streaming/event_formatter.py +964 -0
- control_plane_api/app/routers/executions/streaming/history_loader.py +588 -0
- control_plane_api/app/routers/executions/streaming/live_source.py +693 -0
- control_plane_api/app/routers/executions/streaming/streamer.py +849 -0
- control_plane_api/app/routers/executions.py +4888 -0
- control_plane_api/app/routers/health.py +165 -0
- control_plane_api/app/routers/health_v2.py +394 -0
- control_plane_api/app/routers/integration_templates.py +496 -0
- control_plane_api/app/routers/integrations.py +287 -0
- control_plane_api/app/routers/jobs.py +1809 -0
- control_plane_api/app/routers/metrics.py +517 -0
- control_plane_api/app/routers/models.py +82 -0
- control_plane_api/app/routers/models_v2.py +628 -0
- control_plane_api/app/routers/plan_executions.py +1481 -0
- control_plane_api/app/routers/plan_generation_async.py +304 -0
- control_plane_api/app/routers/policies.py +669 -0
- control_plane_api/app/routers/presence.py +234 -0
- control_plane_api/app/routers/projects.py +987 -0
- control_plane_api/app/routers/runners.py +379 -0
- control_plane_api/app/routers/runtimes.py +172 -0
- control_plane_api/app/routers/secrets.py +171 -0
- control_plane_api/app/routers/skills.py +1010 -0
- control_plane_api/app/routers/skills_definitions.py +140 -0
- control_plane_api/app/routers/storage.py +456 -0
- control_plane_api/app/routers/task_planning.py +611 -0
- control_plane_api/app/routers/task_queues.py +650 -0
- control_plane_api/app/routers/team_context.py +274 -0
- control_plane_api/app/routers/teams.py +1747 -0
- control_plane_api/app/routers/templates.py +248 -0
- control_plane_api/app/routers/traces.py +571 -0
- control_plane_api/app/routers/websocket_client.py +479 -0
- control_plane_api/app/routers/websocket_executions_status.py +437 -0
- control_plane_api/app/routers/websocket_gateway.py +323 -0
- control_plane_api/app/routers/websocket_traces.py +576 -0
- control_plane_api/app/routers/worker_queues.py +2555 -0
- control_plane_api/app/routers/worker_websocket.py +419 -0
- control_plane_api/app/routers/workers.py +1004 -0
- control_plane_api/app/routers/workflows.py +204 -0
- control_plane_api/app/runtimes/__init__.py +6 -0
- control_plane_api/app/runtimes/validation.py +344 -0
- control_plane_api/app/schemas/__init__.py +1 -0
- control_plane_api/app/schemas/job_schemas.py +302 -0
- control_plane_api/app/schemas/mcp_schemas.py +311 -0
- control_plane_api/app/schemas/template_schemas.py +133 -0
- control_plane_api/app/schemas/trace_schemas.py +168 -0
- control_plane_api/app/schemas/worker_queue_observability_schemas.py +165 -0
- control_plane_api/app/services/__init__.py +1 -0
- control_plane_api/app/services/agno_planning_strategy.py +233 -0
- control_plane_api/app/services/agno_service.py +838 -0
- control_plane_api/app/services/claude_code_planning_service.py +203 -0
- control_plane_api/app/services/context_graph_client.py +224 -0
- control_plane_api/app/services/custom_integration_service.py +415 -0
- control_plane_api/app/services/integration_resolution_service.py +345 -0
- control_plane_api/app/services/litellm_service.py +394 -0
- control_plane_api/app/services/plan_generator.py +79 -0
- control_plane_api/app/services/planning_strategy.py +66 -0
- control_plane_api/app/services/planning_strategy_factory.py +118 -0
- control_plane_api/app/services/policy_service.py +615 -0
- control_plane_api/app/services/state_transition_service.py +755 -0
- control_plane_api/app/services/storage_service.py +593 -0
- control_plane_api/app/services/temporal_cloud_provisioning.py +150 -0
- control_plane_api/app/services/toolsets/context_graph_skill.py +432 -0
- control_plane_api/app/services/trace_retention.py +354 -0
- control_plane_api/app/services/worker_queue_metrics_service.py +190 -0
- control_plane_api/app/services/workflow_cancellation_manager.py +135 -0
- control_plane_api/app/services/workflow_operations_service.py +611 -0
- control_plane_api/app/skills/__init__.py +100 -0
- control_plane_api/app/skills/base.py +239 -0
- control_plane_api/app/skills/builtin/__init__.py +37 -0
- control_plane_api/app/skills/builtin/agent_communication/__init__.py +8 -0
- control_plane_api/app/skills/builtin/agent_communication/skill.py +246 -0
- control_plane_api/app/skills/builtin/code_ingestion/__init__.py +4 -0
- control_plane_api/app/skills/builtin/code_ingestion/skill.py +267 -0
- control_plane_api/app/skills/builtin/cognitive_memory/__init__.py +4 -0
- control_plane_api/app/skills/builtin/cognitive_memory/skill.py +174 -0
- control_plane_api/app/skills/builtin/contextual_awareness/__init__.py +4 -0
- control_plane_api/app/skills/builtin/contextual_awareness/skill.py +387 -0
- control_plane_api/app/skills/builtin/data_visualization/__init__.py +4 -0
- control_plane_api/app/skills/builtin/data_visualization/skill.py +154 -0
- control_plane_api/app/skills/builtin/docker/__init__.py +4 -0
- control_plane_api/app/skills/builtin/docker/skill.py +104 -0
- control_plane_api/app/skills/builtin/file_generation/__init__.py +4 -0
- control_plane_api/app/skills/builtin/file_generation/skill.py +94 -0
- control_plane_api/app/skills/builtin/file_system/__init__.py +4 -0
- control_plane_api/app/skills/builtin/file_system/skill.py +110 -0
- control_plane_api/app/skills/builtin/knowledge_api/__init__.py +5 -0
- control_plane_api/app/skills/builtin/knowledge_api/skill.py +124 -0
- control_plane_api/app/skills/builtin/python/__init__.py +4 -0
- control_plane_api/app/skills/builtin/python/skill.py +92 -0
- control_plane_api/app/skills/builtin/remote_filesystem/__init__.py +5 -0
- control_plane_api/app/skills/builtin/remote_filesystem/skill.py +170 -0
- control_plane_api/app/skills/builtin/shell/__init__.py +4 -0
- control_plane_api/app/skills/builtin/shell/skill.py +161 -0
- control_plane_api/app/skills/builtin/slack/__init__.py +3 -0
- control_plane_api/app/skills/builtin/slack/skill.py +302 -0
- control_plane_api/app/skills/builtin/workflow_executor/__init__.py +4 -0
- control_plane_api/app/skills/builtin/workflow_executor/skill.py +469 -0
- control_plane_api/app/skills/business_intelligence.py +189 -0
- control_plane_api/app/skills/config.py +63 -0
- control_plane_api/app/skills/loaders/__init__.py +14 -0
- control_plane_api/app/skills/loaders/base.py +73 -0
- control_plane_api/app/skills/loaders/filesystem_loader.py +199 -0
- control_plane_api/app/skills/registry.py +125 -0
- control_plane_api/app/utils/helpers.py +12 -0
- control_plane_api/app/utils/workflow_executor.py +354 -0
- control_plane_api/app/workflows/__init__.py +11 -0
- control_plane_api/app/workflows/agent_execution.py +520 -0
- control_plane_api/app/workflows/agent_execution_with_skills.py +223 -0
- control_plane_api/app/workflows/namespace_provisioning.py +326 -0
- control_plane_api/app/workflows/plan_generation.py +254 -0
- control_plane_api/app/workflows/team_execution.py +442 -0
- control_plane_api/scripts/seed_models.py +240 -0
- control_plane_api/scripts/validate_existing_tool_names.py +492 -0
- control_plane_api/shared/__init__.py +8 -0
- control_plane_api/shared/version.py +17 -0
- control_plane_api/test_deduplication.py +274 -0
- control_plane_api/test_executor_deduplication_e2e.py +309 -0
- control_plane_api/test_job_execution_e2e.py +283 -0
- control_plane_api/test_real_integration.py +193 -0
- control_plane_api/version.py +38 -0
- control_plane_api/worker/__init__.py +0 -0
- control_plane_api/worker/activities/__init__.py +0 -0
- control_plane_api/worker/activities/agent_activities.py +1585 -0
- control_plane_api/worker/activities/approval_activities.py +234 -0
- control_plane_api/worker/activities/job_activities.py +199 -0
- control_plane_api/worker/activities/runtime_activities.py +1167 -0
- control_plane_api/worker/activities/skill_activities.py +282 -0
- control_plane_api/worker/activities/team_activities.py +479 -0
- control_plane_api/worker/agent_runtime_server.py +370 -0
- control_plane_api/worker/binary_manager.py +333 -0
- control_plane_api/worker/config/__init__.py +31 -0
- control_plane_api/worker/config/worker_config.py +273 -0
- control_plane_api/worker/control_plane_client.py +1491 -0
- control_plane_api/worker/examples/analytics_integration_example.py +362 -0
- control_plane_api/worker/health_monitor.py +159 -0
- control_plane_api/worker/metrics.py +237 -0
- control_plane_api/worker/models/__init__.py +1 -0
- control_plane_api/worker/models/error_events.py +105 -0
- control_plane_api/worker/models/inputs.py +89 -0
- control_plane_api/worker/runtimes/__init__.py +35 -0
- control_plane_api/worker/runtimes/agent_runtime/runtime.py +485 -0
- control_plane_api/worker/runtimes/agno/__init__.py +34 -0
- control_plane_api/worker/runtimes/agno/config.py +248 -0
- control_plane_api/worker/runtimes/agno/hooks.py +385 -0
- control_plane_api/worker/runtimes/agno/mcp_builder.py +195 -0
- control_plane_api/worker/runtimes/agno/runtime.py +1063 -0
- control_plane_api/worker/runtimes/agno/utils.py +163 -0
- control_plane_api/worker/runtimes/base.py +979 -0
- control_plane_api/worker/runtimes/claude_code/__init__.py +38 -0
- control_plane_api/worker/runtimes/claude_code/cleanup.py +184 -0
- control_plane_api/worker/runtimes/claude_code/client_pool.py +529 -0
- control_plane_api/worker/runtimes/claude_code/config.py +829 -0
- control_plane_api/worker/runtimes/claude_code/hooks.py +482 -0
- control_plane_api/worker/runtimes/claude_code/litellm_proxy.py +1702 -0
- control_plane_api/worker/runtimes/claude_code/mcp_builder.py +467 -0
- control_plane_api/worker/runtimes/claude_code/mcp_discovery.py +558 -0
- control_plane_api/worker/runtimes/claude_code/runtime.py +1546 -0
- control_plane_api/worker/runtimes/claude_code/tool_mapper.py +403 -0
- control_plane_api/worker/runtimes/claude_code/utils.py +149 -0
- control_plane_api/worker/runtimes/factory.py +173 -0
- control_plane_api/worker/runtimes/model_utils.py +107 -0
- control_plane_api/worker/runtimes/validation.py +93 -0
- control_plane_api/worker/services/__init__.py +1 -0
- control_plane_api/worker/services/agent_communication_tools.py +908 -0
- control_plane_api/worker/services/agent_executor.py +485 -0
- control_plane_api/worker/services/agent_executor_v2.py +793 -0
- control_plane_api/worker/services/analytics_collector.py +457 -0
- control_plane_api/worker/services/analytics_service.py +464 -0
- control_plane_api/worker/services/approval_tools.py +310 -0
- control_plane_api/worker/services/approval_tools_agno.py +207 -0
- control_plane_api/worker/services/cancellation_manager.py +177 -0
- control_plane_api/worker/services/code_ingestion_tools.py +465 -0
- control_plane_api/worker/services/contextual_awareness_tools.py +405 -0
- control_plane_api/worker/services/data_visualization.py +834 -0
- control_plane_api/worker/services/event_publisher.py +531 -0
- control_plane_api/worker/services/jira_tools.py +257 -0
- control_plane_api/worker/services/remote_filesystem_tools.py +498 -0
- control_plane_api/worker/services/runtime_analytics.py +328 -0
- control_plane_api/worker/services/session_service.py +365 -0
- control_plane_api/worker/services/skill_context_enhancement.py +181 -0
- control_plane_api/worker/services/skill_factory.py +471 -0
- control_plane_api/worker/services/system_prompt_enhancement.py +410 -0
- control_plane_api/worker/services/team_executor.py +715 -0
- control_plane_api/worker/services/team_executor_v2.py +1866 -0
- control_plane_api/worker/services/tool_enforcement.py +254 -0
- control_plane_api/worker/services/workflow_executor/__init__.py +52 -0
- control_plane_api/worker/services/workflow_executor/event_processor.py +287 -0
- control_plane_api/worker/services/workflow_executor/event_publisher.py +210 -0
- control_plane_api/worker/services/workflow_executor/executors/__init__.py +15 -0
- control_plane_api/worker/services/workflow_executor/executors/base.py +270 -0
- control_plane_api/worker/services/workflow_executor/executors/json_executor.py +50 -0
- control_plane_api/worker/services/workflow_executor/executors/python_executor.py +50 -0
- control_plane_api/worker/services/workflow_executor/models.py +142 -0
- control_plane_api/worker/services/workflow_executor_tools.py +1748 -0
- control_plane_api/worker/skills/__init__.py +12 -0
- control_plane_api/worker/skills/builtin/context_graph_search/README.md +213 -0
- control_plane_api/worker/skills/builtin/context_graph_search/__init__.py +5 -0
- control_plane_api/worker/skills/builtin/context_graph_search/agno_impl.py +808 -0
- control_plane_api/worker/skills/builtin/context_graph_search/skill.yaml +67 -0
- control_plane_api/worker/skills/builtin/contextual_awareness/__init__.py +4 -0
- control_plane_api/worker/skills/builtin/contextual_awareness/agno_impl.py +62 -0
- control_plane_api/worker/skills/builtin/data_visualization/agno_impl.py +18 -0
- control_plane_api/worker/skills/builtin/data_visualization/skill.yaml +84 -0
- control_plane_api/worker/skills/builtin/docker/agno_impl.py +65 -0
- control_plane_api/worker/skills/builtin/docker/skill.yaml +60 -0
- control_plane_api/worker/skills/builtin/file_generation/agno_impl.py +47 -0
- control_plane_api/worker/skills/builtin/file_generation/skill.yaml +64 -0
- control_plane_api/worker/skills/builtin/file_system/agno_impl.py +32 -0
- control_plane_api/worker/skills/builtin/file_system/skill.yaml +54 -0
- control_plane_api/worker/skills/builtin/knowledge_api/__init__.py +4 -0
- control_plane_api/worker/skills/builtin/knowledge_api/agno_impl.py +50 -0
- control_plane_api/worker/skills/builtin/knowledge_api/skill.yaml +66 -0
- control_plane_api/worker/skills/builtin/python/agno_impl.py +25 -0
- control_plane_api/worker/skills/builtin/python/skill.yaml +60 -0
- control_plane_api/worker/skills/builtin/schema_fix_mixin.py +260 -0
- control_plane_api/worker/skills/builtin/shell/agno_impl.py +31 -0
- control_plane_api/worker/skills/builtin/shell/skill.yaml +60 -0
- control_plane_api/worker/skills/builtin/slack/__init__.py +3 -0
- control_plane_api/worker/skills/builtin/slack/agno_impl.py +1282 -0
- control_plane_api/worker/skills/builtin/slack/skill.yaml +276 -0
- control_plane_api/worker/skills/builtin/workflow_executor/agno_impl.py +62 -0
- control_plane_api/worker/skills/builtin/workflow_executor/skill.yaml +79 -0
- control_plane_api/worker/skills/loaders/__init__.py +5 -0
- control_plane_api/worker/skills/loaders/base.py +23 -0
- control_plane_api/worker/skills/loaders/filesystem_loader.py +357 -0
- control_plane_api/worker/skills/registry.py +208 -0
- control_plane_api/worker/tests/__init__.py +1 -0
- control_plane_api/worker/tests/conftest.py +12 -0
- control_plane_api/worker/tests/e2e/__init__.py +0 -0
- control_plane_api/worker/tests/e2e/test_context_graph_real_api.py +338 -0
- control_plane_api/worker/tests/e2e/test_context_graph_templates_e2e.py +523 -0
- control_plane_api/worker/tests/e2e/test_enforcement_e2e.py +344 -0
- control_plane_api/worker/tests/e2e/test_execution_flow.py +571 -0
- control_plane_api/worker/tests/e2e/test_single_execution_mode.py +656 -0
- control_plane_api/worker/tests/integration/__init__.py +0 -0
- control_plane_api/worker/tests/integration/test_builtin_skills_fixes.py +245 -0
- control_plane_api/worker/tests/integration/test_context_graph_search_integration.py +365 -0
- control_plane_api/worker/tests/integration/test_control_plane_integration.py +308 -0
- control_plane_api/worker/tests/integration/test_hook_enforcement_integration.py +579 -0
- control_plane_api/worker/tests/integration/test_scheduled_job_workflow.py +237 -0
- control_plane_api/worker/tests/integration/test_system_prompt_enhancement_integration.py +343 -0
- control_plane_api/worker/tests/unit/__init__.py +0 -0
- control_plane_api/worker/tests/unit/test_builtin_skill_autoload.py +396 -0
- control_plane_api/worker/tests/unit/test_context_graph_search.py +450 -0
- control_plane_api/worker/tests/unit/test_context_graph_templates.py +403 -0
- control_plane_api/worker/tests/unit/test_control_plane_client.py +401 -0
- control_plane_api/worker/tests/unit/test_control_plane_client_jobs.py +345 -0
- control_plane_api/worker/tests/unit/test_job_activities.py +353 -0
- control_plane_api/worker/tests/unit/test_skill_context_enhancement.py +321 -0
- control_plane_api/worker/tests/unit/test_system_prompt_enhancement.py +415 -0
- control_plane_api/worker/tests/unit/test_tool_enforcement.py +324 -0
- control_plane_api/worker/utils/__init__.py +1 -0
- control_plane_api/worker/utils/chunk_batcher.py +330 -0
- control_plane_api/worker/utils/environment.py +65 -0
- control_plane_api/worker/utils/error_publisher.py +260 -0
- control_plane_api/worker/utils/event_batcher.py +256 -0
- control_plane_api/worker/utils/logging_config.py +335 -0
- control_plane_api/worker/utils/logging_helper.py +326 -0
- control_plane_api/worker/utils/parameter_validator.py +120 -0
- control_plane_api/worker/utils/retry_utils.py +60 -0
- control_plane_api/worker/utils/streaming_utils.py +665 -0
- control_plane_api/worker/utils/tool_validation.py +332 -0
- control_plane_api/worker/utils/workspace_manager.py +163 -0
- control_plane_api/worker/websocket_client.py +393 -0
- control_plane_api/worker/worker.py +1297 -0
- control_plane_api/worker/workflows/__init__.py +0 -0
- control_plane_api/worker/workflows/agent_execution.py +909 -0
- control_plane_api/worker/workflows/scheduled_job_wrapper.py +332 -0
- control_plane_api/worker/workflows/team_execution.py +611 -0
- kubiya_control_plane_api-0.9.15.dist-info/METADATA +354 -0
- kubiya_control_plane_api-0.9.15.dist-info/RECORD +479 -0
- kubiya_control_plane_api-0.9.15.dist-info/WHEEL +5 -0
- kubiya_control_plane_api-0.9.15.dist-info/entry_points.txt +5 -0
- kubiya_control_plane_api-0.9.15.dist-info/licenses/LICENSE +676 -0
- kubiya_control_plane_api-0.9.15.dist-info/top_level.txt +3 -0
- scripts/__init__.py +1 -0
- scripts/migrations.py +39 -0
- scripts/seed_worker_queues.py +128 -0
- scripts/setup_agent_runtime.py +142 -0
- worker_internal/__init__.py +1 -0
- worker_internal/planner/__init__.py +1 -0
- worker_internal/planner/activities.py +1499 -0
- worker_internal/planner/agent_tools.py +197 -0
- worker_internal/planner/event_models.py +148 -0
- worker_internal/planner/event_publisher.py +67 -0
- worker_internal/planner/models.py +199 -0
- worker_internal/planner/retry_logic.py +134 -0
- worker_internal/planner/worker.py +300 -0
- worker_internal/planner/workflows.py +970 -0
|
@@ -0,0 +1,1702 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Local FastAPI proxy for Claude Code SDK to inject Langfuse metadata.
|
|
3
|
+
|
|
4
|
+
This proxy runs in the same process as the worker and intercepts requests
|
|
5
|
+
from Claude Code SDK to add missing metadata before forwarding to the real
|
|
6
|
+
LiteLLM proxy.
|
|
7
|
+
|
|
8
|
+
Architecture:
|
|
9
|
+
Claude Code SDK → Local Proxy (adds metadata) → Real LiteLLM Proxy → Langfuse
|
|
10
|
+
|
|
11
|
+
The proxy:
|
|
12
|
+
1. Receives requests from Claude Code SDK
|
|
13
|
+
2. Extracts execution context from thread-local cache
|
|
14
|
+
3. Injects Langfuse metadata (trace_name, trace_user_id, session_id, etc.)
|
|
15
|
+
4. Forwards request to real LiteLLM proxy
|
|
16
|
+
5. Returns response back to Claude Code SDK
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
import asyncio
|
|
20
|
+
import json
|
|
21
|
+
import os
|
|
22
|
+
import re
|
|
23
|
+
import threading
|
|
24
|
+
import time
|
|
25
|
+
from typing import Dict, Any, Optional, List, Tuple
|
|
26
|
+
import structlog
|
|
27
|
+
from contextlib import asynccontextmanager
|
|
28
|
+
|
|
29
|
+
from fastapi import FastAPI, Request, Response, HTTPException
|
|
30
|
+
from fastapi.responses import StreamingResponse
|
|
31
|
+
import httpx
|
|
32
|
+
import uvicorn
|
|
33
|
+
|
|
34
|
+
logger = structlog.get_logger(__name__)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# Cache for available models from upstream LiteLLM proxy
|
|
38
|
+
_available_models_cache: Optional[Dict[str, Any]] = None
|
|
39
|
+
_available_models_cache_time: float = 0
|
|
40
|
+
_available_models_cache_ttl: int = 300 # 5 minutes
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
async def fetch_available_models(
|
|
44
|
+
litellm_base_url: str,
|
|
45
|
+
litellm_api_key: str,
|
|
46
|
+
timeout: float = 10.0,
|
|
47
|
+
max_retries: int = 3,
|
|
48
|
+
retry_delay: float = 1.0,
|
|
49
|
+
) -> List[str]:
|
|
50
|
+
"""
|
|
51
|
+
Fetch available models from the upstream LiteLLM proxy with retry logic.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
litellm_base_url: Base URL of LiteLLM proxy
|
|
55
|
+
litellm_api_key: API key for authentication
|
|
56
|
+
timeout: Request timeout in seconds
|
|
57
|
+
max_retries: Maximum number of retry attempts
|
|
58
|
+
retry_delay: Initial delay between retries (doubles each retry)
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
List of available model IDs
|
|
62
|
+
"""
|
|
63
|
+
global _available_models_cache, _available_models_cache_time
|
|
64
|
+
|
|
65
|
+
# Check cache first
|
|
66
|
+
now = time.time()
|
|
67
|
+
if _available_models_cache is not None and (now - _available_models_cache_time) < _available_models_cache_ttl:
|
|
68
|
+
logger.debug(
|
|
69
|
+
"using_cached_available_models",
|
|
70
|
+
model_count=len(_available_models_cache.get("models", [])),
|
|
71
|
+
cache_age_seconds=int(now - _available_models_cache_time),
|
|
72
|
+
)
|
|
73
|
+
return _available_models_cache.get("models", [])
|
|
74
|
+
|
|
75
|
+
last_error = None
|
|
76
|
+
current_delay = retry_delay
|
|
77
|
+
|
|
78
|
+
for attempt in range(max_retries):
|
|
79
|
+
try:
|
|
80
|
+
async with httpx.AsyncClient(timeout=timeout) as client:
|
|
81
|
+
response = await client.get(
|
|
82
|
+
f"{litellm_base_url.rstrip('/')}/v1/models",
|
|
83
|
+
headers={"Authorization": f"Bearer {litellm_api_key}"},
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
if response.status_code == 200:
|
|
87
|
+
data = response.json()
|
|
88
|
+
# LiteLLM returns {"data": [{"id": "model-name", ...}, ...], "object": "list"}
|
|
89
|
+
models = []
|
|
90
|
+
if "data" in data and isinstance(data["data"], list):
|
|
91
|
+
models = [m.get("id") for m in data["data"] if m.get("id")]
|
|
92
|
+
|
|
93
|
+
# Update cache
|
|
94
|
+
_available_models_cache = {"models": models}
|
|
95
|
+
_available_models_cache_time = time.time()
|
|
96
|
+
|
|
97
|
+
logger.info(
|
|
98
|
+
"fetched_available_models_from_upstream",
|
|
99
|
+
model_count=len(models),
|
|
100
|
+
models=models[:10] if len(models) > 10 else models,
|
|
101
|
+
litellm_base_url=litellm_base_url,
|
|
102
|
+
attempt=attempt + 1,
|
|
103
|
+
)
|
|
104
|
+
return models
|
|
105
|
+
|
|
106
|
+
elif response.status_code in (502, 503, 504):
|
|
107
|
+
# Transient errors - retry
|
|
108
|
+
last_error = f"HTTP {response.status_code}"
|
|
109
|
+
logger.warning(
|
|
110
|
+
"transient_error_fetching_models",
|
|
111
|
+
status_code=response.status_code,
|
|
112
|
+
attempt=attempt + 1,
|
|
113
|
+
max_retries=max_retries,
|
|
114
|
+
retry_delay=current_delay,
|
|
115
|
+
)
|
|
116
|
+
else:
|
|
117
|
+
# Non-retryable error
|
|
118
|
+
logger.warning(
|
|
119
|
+
"failed_to_fetch_models_from_upstream",
|
|
120
|
+
status_code=response.status_code,
|
|
121
|
+
response_text=response.text[:500] if response.text else "",
|
|
122
|
+
litellm_base_url=litellm_base_url,
|
|
123
|
+
)
|
|
124
|
+
return []
|
|
125
|
+
|
|
126
|
+
except (httpx.ConnectError, httpx.TimeoutException) as e:
|
|
127
|
+
last_error = str(e)
|
|
128
|
+
logger.warning(
|
|
129
|
+
"connection_error_fetching_models",
|
|
130
|
+
error=str(e),
|
|
131
|
+
error_type=type(e).__name__,
|
|
132
|
+
attempt=attempt + 1,
|
|
133
|
+
max_retries=max_retries,
|
|
134
|
+
retry_delay=current_delay,
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
except Exception as e:
|
|
138
|
+
# Unexpected error - don't retry
|
|
139
|
+
logger.error(
|
|
140
|
+
"unexpected_error_fetching_models",
|
|
141
|
+
error=str(e),
|
|
142
|
+
error_type=type(e).__name__,
|
|
143
|
+
litellm_base_url=litellm_base_url,
|
|
144
|
+
exc_info=True,
|
|
145
|
+
)
|
|
146
|
+
return []
|
|
147
|
+
|
|
148
|
+
# Wait before retry (with exponential backoff)
|
|
149
|
+
if attempt < max_retries - 1:
|
|
150
|
+
await asyncio.sleep(current_delay)
|
|
151
|
+
current_delay *= 2 # Exponential backoff
|
|
152
|
+
|
|
153
|
+
# All retries exhausted
|
|
154
|
+
logger.error(
|
|
155
|
+
"all_retries_exhausted_fetching_models",
|
|
156
|
+
last_error=last_error,
|
|
157
|
+
max_retries=max_retries,
|
|
158
|
+
litellm_base_url=litellm_base_url,
|
|
159
|
+
)
|
|
160
|
+
return []
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def get_cached_available_models() -> List[str]:
|
|
164
|
+
"""
|
|
165
|
+
Get cached available models (synchronous, for use in non-async contexts).
|
|
166
|
+
|
|
167
|
+
Returns empty list if cache is not populated.
|
|
168
|
+
"""
|
|
169
|
+
global _available_models_cache, _available_models_cache_time
|
|
170
|
+
|
|
171
|
+
now = time.time()
|
|
172
|
+
if _available_models_cache is not None and (now - _available_models_cache_time) < _available_models_cache_ttl:
|
|
173
|
+
return _available_models_cache.get("models", [])
|
|
174
|
+
return []
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def _normalize_model_name(model: str) -> str:
|
|
178
|
+
"""
|
|
179
|
+
Normalize model name for comparison.
|
|
180
|
+
|
|
181
|
+
Handles various model naming patterns:
|
|
182
|
+
- Provider prefixes: bedrock/, anthropic/, openai/, azure/, etc.
|
|
183
|
+
- Region prefixes for Bedrock: us., eu., ap., etc.
|
|
184
|
+
- Version suffixes: -v1:0, -20240620-v1:0, etc.
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
model: Model name to normalize
|
|
188
|
+
|
|
189
|
+
Returns:
|
|
190
|
+
Normalized model name (lowercase, without common prefixes/suffixes)
|
|
191
|
+
"""
|
|
192
|
+
normalized = model.lower().strip()
|
|
193
|
+
|
|
194
|
+
# Remove common provider prefixes (order matters - longest first)
|
|
195
|
+
provider_prefixes = [
|
|
196
|
+
"bedrock/converse/", "bedrock/invoke/", # More specific first
|
|
197
|
+
"bedrock/", "anthropic/", "openai/", "azure/", "vertex_ai/",
|
|
198
|
+
"kubiya/", "litellm/",
|
|
199
|
+
]
|
|
200
|
+
for prefix in provider_prefixes:
|
|
201
|
+
if normalized.startswith(prefix):
|
|
202
|
+
normalized = normalized[len(prefix):]
|
|
203
|
+
break
|
|
204
|
+
|
|
205
|
+
# Remove Bedrock region prefixes (us., eu., ap., etc.)
|
|
206
|
+
# Pattern: XX. where XX is 2 lowercase letters
|
|
207
|
+
normalized = re.sub(r'^[a-z]{2}\.', '', normalized)
|
|
208
|
+
|
|
209
|
+
# Remove provider prefixes within model name (anthropic., meta., etc.)
|
|
210
|
+
inner_prefixes = ["anthropic.", "meta.", "amazon.", "mistral.", "ai21.", "cohere."]
|
|
211
|
+
for prefix in inner_prefixes:
|
|
212
|
+
if normalized.startswith(prefix):
|
|
213
|
+
normalized = normalized[len(prefix):]
|
|
214
|
+
break
|
|
215
|
+
|
|
216
|
+
# Remove version suffixes like -v1:0, -v2:0, etc.
|
|
217
|
+
normalized = re.sub(r'-v\d+:\d+$', '', normalized)
|
|
218
|
+
|
|
219
|
+
# Remove date-version suffixes like -20240620-v1:0
|
|
220
|
+
normalized = re.sub(r'-\d{8}-v\d+:\d+$', '', normalized)
|
|
221
|
+
normalized = re.sub(r'-\d{8}$', '', normalized)
|
|
222
|
+
|
|
223
|
+
return normalized
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def _calculate_model_similarity(requested: str, available: str) -> float:
|
|
227
|
+
"""
|
|
228
|
+
Calculate similarity score between two model names.
|
|
229
|
+
|
|
230
|
+
Higher score means better match. Uses normalized names and
|
|
231
|
+
considers various matching strategies.
|
|
232
|
+
|
|
233
|
+
Args:
|
|
234
|
+
requested: Requested model name
|
|
235
|
+
available: Available model name
|
|
236
|
+
|
|
237
|
+
Returns:
|
|
238
|
+
Similarity score (0.0 to 1.0)
|
|
239
|
+
"""
|
|
240
|
+
req_norm = _normalize_model_name(requested)
|
|
241
|
+
avail_norm = _normalize_model_name(available)
|
|
242
|
+
|
|
243
|
+
# Exact match after normalization
|
|
244
|
+
if req_norm == avail_norm:
|
|
245
|
+
return 1.0
|
|
246
|
+
|
|
247
|
+
# One contains the other (after normalization)
|
|
248
|
+
if req_norm in avail_norm:
|
|
249
|
+
return 0.9
|
|
250
|
+
if avail_norm in req_norm:
|
|
251
|
+
return 0.85
|
|
252
|
+
|
|
253
|
+
# Check for key model family matches
|
|
254
|
+
# e.g., "claude-sonnet-4" should match "claude-sonnet-4-20250115"
|
|
255
|
+
model_families = [
|
|
256
|
+
"claude-sonnet-4", "claude-4-sonnet", "claude-sonnet-4-5",
|
|
257
|
+
"claude-3-5-sonnet", "claude-3-sonnet", "claude-3-haiku", "claude-3-opus",
|
|
258
|
+
"claude-instant", "claude-v2",
|
|
259
|
+
"gpt-4", "gpt-4o", "gpt-3.5",
|
|
260
|
+
"llama-3", "llama-2",
|
|
261
|
+
"mistral", "mixtral",
|
|
262
|
+
]
|
|
263
|
+
|
|
264
|
+
for family in model_families:
|
|
265
|
+
if family in req_norm and family in avail_norm:
|
|
266
|
+
return 0.8
|
|
267
|
+
|
|
268
|
+
# Partial word overlap
|
|
269
|
+
req_parts = set(req_norm.replace("-", " ").replace(".", " ").split())
|
|
270
|
+
avail_parts = set(avail_norm.replace("-", " ").replace(".", " ").split())
|
|
271
|
+
overlap = req_parts & avail_parts
|
|
272
|
+
if overlap:
|
|
273
|
+
return 0.5 * len(overlap) / max(len(req_parts), len(avail_parts))
|
|
274
|
+
|
|
275
|
+
return 0.0
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
def validate_and_resolve_model(
|
|
279
|
+
requested_model: str,
|
|
280
|
+
available_models: List[str],
|
|
281
|
+
default_fallback: str = None,
|
|
282
|
+
) -> Tuple[str, bool]:
|
|
283
|
+
"""
|
|
284
|
+
Validate requested model against available models and resolve fallback.
|
|
285
|
+
|
|
286
|
+
Handles complex model naming patterns including:
|
|
287
|
+
- Bedrock: bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0
|
|
288
|
+
- Cross-region: bedrock/us.anthropic.claude-sonnet-4-20250115-v1:0
|
|
289
|
+
- Simple: claude-sonnet-4, gpt-4o
|
|
290
|
+
- Provider-prefixed: kubiya/claude-sonnet-4, anthropic/claude-3-sonnet
|
|
291
|
+
|
|
292
|
+
Args:
|
|
293
|
+
requested_model: The model requested by the user/agent
|
|
294
|
+
available_models: List of available model IDs from upstream
|
|
295
|
+
default_fallback: Default model to use if no match found
|
|
296
|
+
|
|
297
|
+
Returns:
|
|
298
|
+
Tuple of (resolved_model, was_fallback_used)
|
|
299
|
+
"""
|
|
300
|
+
if not available_models:
|
|
301
|
+
# No available models list - can't validate, use as-is
|
|
302
|
+
logger.warning(
|
|
303
|
+
"cannot_validate_model_no_available_models",
|
|
304
|
+
requested_model=requested_model,
|
|
305
|
+
note="Proceeding with requested model without validation"
|
|
306
|
+
)
|
|
307
|
+
return requested_model, False
|
|
308
|
+
|
|
309
|
+
# Exact match (case-sensitive)
|
|
310
|
+
if requested_model in available_models:
|
|
311
|
+
return requested_model, False
|
|
312
|
+
|
|
313
|
+
# Case-insensitive exact match
|
|
314
|
+
requested_lower = requested_model.lower()
|
|
315
|
+
for available in available_models:
|
|
316
|
+
if available.lower() == requested_lower:
|
|
317
|
+
logger.info(
|
|
318
|
+
"model_case_insensitive_match",
|
|
319
|
+
requested_model=requested_model,
|
|
320
|
+
matched_model=available,
|
|
321
|
+
)
|
|
322
|
+
return available, False
|
|
323
|
+
|
|
324
|
+
# Find best match using similarity scoring
|
|
325
|
+
best_match = None
|
|
326
|
+
best_score = 0.0
|
|
327
|
+
|
|
328
|
+
for available in available_models:
|
|
329
|
+
score = _calculate_model_similarity(requested_model, available)
|
|
330
|
+
if score > best_score:
|
|
331
|
+
best_score = score
|
|
332
|
+
best_match = available
|
|
333
|
+
|
|
334
|
+
# Accept match if score is above threshold
|
|
335
|
+
if best_score >= 0.7:
|
|
336
|
+
logger.info(
|
|
337
|
+
"model_similarity_match_found",
|
|
338
|
+
requested_model=requested_model,
|
|
339
|
+
matched_model=best_match,
|
|
340
|
+
similarity_score=best_score,
|
|
341
|
+
note="Found similar model via smart matching"
|
|
342
|
+
)
|
|
343
|
+
return best_match, True
|
|
344
|
+
|
|
345
|
+
# Log detailed match attempts for debugging
|
|
346
|
+
logger.warning(
|
|
347
|
+
"model_no_good_match_found",
|
|
348
|
+
requested_model=requested_model,
|
|
349
|
+
requested_normalized=_normalize_model_name(requested_model),
|
|
350
|
+
best_candidate=best_match,
|
|
351
|
+
best_score=best_score,
|
|
352
|
+
available_models_sample=available_models[:5] if len(available_models) > 5 else available_models,
|
|
353
|
+
available_count=len(available_models),
|
|
354
|
+
)
|
|
355
|
+
|
|
356
|
+
# No good match - find best fallback from same provider/family
|
|
357
|
+
# Priority: same family > default_fallback > first available
|
|
358
|
+
fallback = _find_same_family_fallback(requested_model, available_models)
|
|
359
|
+
if not fallback:
|
|
360
|
+
fallback = default_fallback or (available_models[0] if available_models else requested_model)
|
|
361
|
+
|
|
362
|
+
logger.warning(
|
|
363
|
+
"model_not_found_using_fallback",
|
|
364
|
+
requested_model=requested_model,
|
|
365
|
+
fallback_model=fallback,
|
|
366
|
+
available_models=available_models[:10] if len(available_models) > 10 else available_models,
|
|
367
|
+
note="Requested model not available, using same-family fallback if available"
|
|
368
|
+
)
|
|
369
|
+
return fallback, True
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
def _find_same_family_fallback(requested_model: str, available_models: List[str]) -> Optional[str]:
|
|
373
|
+
"""
|
|
374
|
+
Find a fallback model from the same provider/family.
|
|
375
|
+
|
|
376
|
+
For Claude models, prefer other Claude models.
|
|
377
|
+
For GPT models, prefer other GPT models.
|
|
378
|
+
For Llama models, prefer other Llama models.
|
|
379
|
+
etc.
|
|
380
|
+
|
|
381
|
+
Args:
|
|
382
|
+
requested_model: The requested model name
|
|
383
|
+
available_models: List of available models
|
|
384
|
+
|
|
385
|
+
Returns:
|
|
386
|
+
Best same-family fallback, or None if no match
|
|
387
|
+
"""
|
|
388
|
+
requested_lower = requested_model.lower()
|
|
389
|
+
|
|
390
|
+
# Define model families and their identifying patterns
|
|
391
|
+
# Order matters - more specific patterns first
|
|
392
|
+
model_families = [
|
|
393
|
+
# Claude family
|
|
394
|
+
("claude", ["claude", "anthropic"]),
|
|
395
|
+
# OpenAI family
|
|
396
|
+
("gpt", ["gpt-4", "gpt-3", "gpt4", "gpt3", "openai"]),
|
|
397
|
+
# Llama family
|
|
398
|
+
("llama", ["llama", "meta"]),
|
|
399
|
+
# Mistral family
|
|
400
|
+
("mistral", ["mistral", "mixtral"]),
|
|
401
|
+
# DeepSeek family
|
|
402
|
+
("deepseek", ["deepseek"]),
|
|
403
|
+
]
|
|
404
|
+
|
|
405
|
+
# Determine which family the requested model belongs to
|
|
406
|
+
requested_family = None
|
|
407
|
+
for family_name, patterns in model_families:
|
|
408
|
+
for pattern in patterns:
|
|
409
|
+
if pattern in requested_lower:
|
|
410
|
+
requested_family = family_name
|
|
411
|
+
break
|
|
412
|
+
if requested_family:
|
|
413
|
+
break
|
|
414
|
+
|
|
415
|
+
if not requested_family:
|
|
416
|
+
return None
|
|
417
|
+
|
|
418
|
+
# Find available models from the same family
|
|
419
|
+
# Score them by how well they match
|
|
420
|
+
same_family_models = []
|
|
421
|
+
for available in available_models:
|
|
422
|
+
available_lower = available.lower()
|
|
423
|
+
for family_name, patterns in model_families:
|
|
424
|
+
if family_name == requested_family:
|
|
425
|
+
for pattern in patterns:
|
|
426
|
+
if pattern in available_lower:
|
|
427
|
+
# Calculate a preference score
|
|
428
|
+
# Prefer models with more capability (sonnet > haiku, etc.)
|
|
429
|
+
score = _calculate_model_capability_score(available)
|
|
430
|
+
same_family_models.append((available, score))
|
|
431
|
+
break
|
|
432
|
+
break
|
|
433
|
+
|
|
434
|
+
if not same_family_models:
|
|
435
|
+
return None
|
|
436
|
+
|
|
437
|
+
# Sort by capability score (descending) and return the best
|
|
438
|
+
same_family_models.sort(key=lambda x: x[1], reverse=True)
|
|
439
|
+
best_fallback = same_family_models[0][0]
|
|
440
|
+
|
|
441
|
+
logger.info(
|
|
442
|
+
"same_family_fallback_found",
|
|
443
|
+
requested_model=requested_model,
|
|
444
|
+
requested_family=requested_family,
|
|
445
|
+
fallback_model=best_fallback,
|
|
446
|
+
same_family_options=[m[0] for m in same_family_models],
|
|
447
|
+
)
|
|
448
|
+
|
|
449
|
+
return best_fallback
|
|
450
|
+
|
|
451
|
+
|
|
452
|
+
def _calculate_model_capability_score(model: str) -> int:
|
|
453
|
+
"""
|
|
454
|
+
Calculate a capability score for model preference.
|
|
455
|
+
Higher score = more capable model (preferred for fallback).
|
|
456
|
+
|
|
457
|
+
Args:
|
|
458
|
+
model: Model name
|
|
459
|
+
|
|
460
|
+
Returns:
|
|
461
|
+
Capability score (higher is more capable)
|
|
462
|
+
"""
|
|
463
|
+
model_lower = model.lower()
|
|
464
|
+
score = 0
|
|
465
|
+
|
|
466
|
+
# Claude models - prefer opus > sonnet > haiku
|
|
467
|
+
if "opus" in model_lower:
|
|
468
|
+
score = 100
|
|
469
|
+
elif "sonnet" in model_lower:
|
|
470
|
+
score = 80
|
|
471
|
+
elif "haiku" in model_lower:
|
|
472
|
+
score = 60
|
|
473
|
+
# GPT models - prefer gpt-4 > gpt-3.5
|
|
474
|
+
elif "gpt-4o" in model_lower:
|
|
475
|
+
score = 95
|
|
476
|
+
elif "gpt-4" in model_lower:
|
|
477
|
+
score = 90
|
|
478
|
+
elif "gpt-3.5" in model_lower or "gpt-35" in model_lower:
|
|
479
|
+
score = 70
|
|
480
|
+
# Llama models - prefer larger
|
|
481
|
+
elif "llama-3" in model_lower or "llama3" in model_lower:
|
|
482
|
+
score = 75
|
|
483
|
+
elif "llama-2" in model_lower or "llama2" in model_lower:
|
|
484
|
+
score = 65
|
|
485
|
+
# DeepSeek
|
|
486
|
+
elif "deepseek-r1" in model_lower:
|
|
487
|
+
score = 85
|
|
488
|
+
elif "deepseek-v3" in model_lower:
|
|
489
|
+
score = 80
|
|
490
|
+
elif "deepseek" in model_lower:
|
|
491
|
+
score = 70
|
|
492
|
+
# Mistral
|
|
493
|
+
elif "mixtral" in model_lower:
|
|
494
|
+
score = 75
|
|
495
|
+
elif "mistral" in model_lower:
|
|
496
|
+
score = 70
|
|
497
|
+
else:
|
|
498
|
+
score = 50 # Default for unknown models
|
|
499
|
+
|
|
500
|
+
return score
|
|
501
|
+
|
|
502
|
+
|
|
503
|
+
# Thread-local storage for execution context
|
|
504
|
+
# This allows us to access execution metadata from the proxy
|
|
505
|
+
class ExecutionContextStore:
|
|
506
|
+
"""
|
|
507
|
+
Thread-safe storage for execution context metadata with TTL and proactive cleanup.
|
|
508
|
+
|
|
509
|
+
Features:
|
|
510
|
+
- TTL-based expiration (default 3600s)
|
|
511
|
+
- Proactive cleanup timer (runs every 60s)
|
|
512
|
+
- Circuit breaker to prevent runaway memory growth
|
|
513
|
+
- Thread-safe operations
|
|
514
|
+
"""
|
|
515
|
+
|
|
516
|
+
def __init__(self, ttl_seconds: int = 3600, max_contexts: int = 1000):
|
|
517
|
+
self._contexts: Dict[str, Dict[str, Any]] = {}
|
|
518
|
+
self._context_timestamps: Dict[str, float] = {}
|
|
519
|
+
self._ttl_seconds = ttl_seconds
|
|
520
|
+
self._max_contexts = max_contexts # Circuit breaker threshold
|
|
521
|
+
self._current_execution: Optional[str] = None
|
|
522
|
+
self._lock = threading.Lock()
|
|
523
|
+
|
|
524
|
+
# Proactive cleanup timer
|
|
525
|
+
self._cleanup_timer: Optional[threading.Timer] = None
|
|
526
|
+
self._cleanup_interval = 60 # Run cleanup every 60 seconds
|
|
527
|
+
self._start_proactive_cleanup()
|
|
528
|
+
|
|
529
|
+
def _start_proactive_cleanup(self):
|
|
530
|
+
"""Start periodic cleanup timer."""
|
|
531
|
+
self._cleanup_timer = threading.Timer(
|
|
532
|
+
self._cleanup_interval,
|
|
533
|
+
self._proactive_cleanup_worker
|
|
534
|
+
)
|
|
535
|
+
self._cleanup_timer.daemon = True
|
|
536
|
+
self._cleanup_timer.start()
|
|
537
|
+
logger.debug("proactive_cleanup_timer_started", interval=self._cleanup_interval)
|
|
538
|
+
|
|
539
|
+
def _proactive_cleanup_worker(self):
|
|
540
|
+
"""Worker that runs periodic cleanup."""
|
|
541
|
+
try:
|
|
542
|
+
self._cleanup_expired()
|
|
543
|
+
|
|
544
|
+
# Check circuit breaker
|
|
545
|
+
with self._lock:
|
|
546
|
+
context_count = len(self._contexts)
|
|
547
|
+
|
|
548
|
+
if context_count > self._max_contexts:
|
|
549
|
+
logger.error(
|
|
550
|
+
"context_store_circuit_breaker_triggered",
|
|
551
|
+
context_count=context_count,
|
|
552
|
+
max_contexts=self._max_contexts,
|
|
553
|
+
action="forcing_aggressive_cleanup"
|
|
554
|
+
)
|
|
555
|
+
# Aggressive cleanup: remove oldest 50%
|
|
556
|
+
self._force_cleanup(keep_ratio=0.5)
|
|
557
|
+
|
|
558
|
+
except Exception as e:
|
|
559
|
+
logger.error(
|
|
560
|
+
"proactive_cleanup_error",
|
|
561
|
+
error=str(e),
|
|
562
|
+
error_type=type(e).__name__,
|
|
563
|
+
exc_info=True
|
|
564
|
+
)
|
|
565
|
+
finally:
|
|
566
|
+
# Reschedule timer
|
|
567
|
+
self._start_proactive_cleanup()
|
|
568
|
+
|
|
569
|
+
def _force_cleanup(self, keep_ratio: float = 0.5):
|
|
570
|
+
"""
|
|
571
|
+
Force cleanup of oldest contexts when circuit breaker triggers.
|
|
572
|
+
|
|
573
|
+
Args:
|
|
574
|
+
keep_ratio: Ratio of newest contexts to keep (0.5 = keep newest 50%)
|
|
575
|
+
"""
|
|
576
|
+
with self._lock:
|
|
577
|
+
if not self._contexts:
|
|
578
|
+
return
|
|
579
|
+
|
|
580
|
+
# Sort by timestamp (oldest first)
|
|
581
|
+
sorted_ids = sorted(
|
|
582
|
+
self._context_timestamps.items(),
|
|
583
|
+
key=lambda x: x[1]
|
|
584
|
+
)
|
|
585
|
+
|
|
586
|
+
# Calculate how many to remove
|
|
587
|
+
keep_count = int(len(sorted_ids) * keep_ratio)
|
|
588
|
+
to_remove = sorted_ids[:len(sorted_ids) - keep_count]
|
|
589
|
+
|
|
590
|
+
# Remove oldest contexts
|
|
591
|
+
removed_count = 0
|
|
592
|
+
for exec_id, _ in to_remove:
|
|
593
|
+
self._contexts.pop(exec_id, None)
|
|
594
|
+
self._context_timestamps.pop(exec_id, None)
|
|
595
|
+
if self._current_execution == exec_id:
|
|
596
|
+
self._current_execution = None
|
|
597
|
+
removed_count += 1
|
|
598
|
+
|
|
599
|
+
logger.warning(
|
|
600
|
+
"forced_cleanup_completed",
|
|
601
|
+
removed=removed_count,
|
|
602
|
+
remaining=len(self._contexts),
|
|
603
|
+
keep_ratio=keep_ratio
|
|
604
|
+
)
|
|
605
|
+
|
|
606
|
+
def set_context(self, execution_id: str, context: Dict[str, Any]):
|
|
607
|
+
"""Store execution context for an execution ID with timestamp."""
|
|
608
|
+
with self._lock:
|
|
609
|
+
# Check circuit breaker before adding
|
|
610
|
+
if len(self._contexts) >= self._max_contexts:
|
|
611
|
+
logger.error(
|
|
612
|
+
"context_store_at_capacity",
|
|
613
|
+
current_count=len(self._contexts),
|
|
614
|
+
max_contexts=self._max_contexts,
|
|
615
|
+
action="rejecting_new_context"
|
|
616
|
+
)
|
|
617
|
+
raise RuntimeError(
|
|
618
|
+
f"Context store at capacity ({self._max_contexts}). "
|
|
619
|
+
"System may be leaking contexts or under high load."
|
|
620
|
+
)
|
|
621
|
+
|
|
622
|
+
self._contexts[execution_id] = context
|
|
623
|
+
self._context_timestamps[execution_id] = time.time()
|
|
624
|
+
self._current_execution = execution_id
|
|
625
|
+
logger.debug(
|
|
626
|
+
"execution_context_stored",
|
|
627
|
+
execution_id=execution_id[:8] if len(execution_id) >= 8 else execution_id,
|
|
628
|
+
total_contexts=len(self._contexts),
|
|
629
|
+
has_user_id=bool(context.get("user_id")),
|
|
630
|
+
has_session_id=bool(context.get("session_id")),
|
|
631
|
+
)
|
|
632
|
+
|
|
633
|
+
def get_context(self, execution_id: Optional[str] = None) -> Optional[Dict[str, Any]]:
|
|
634
|
+
"""
|
|
635
|
+
Retrieve execution context for an execution ID if not expired.
|
|
636
|
+
|
|
637
|
+
If execution_id is None, returns the current active execution context.
|
|
638
|
+
"""
|
|
639
|
+
with self._lock:
|
|
640
|
+
target_id = execution_id if execution_id else self._current_execution
|
|
641
|
+
if not target_id:
|
|
642
|
+
return None
|
|
643
|
+
|
|
644
|
+
# Check if expired
|
|
645
|
+
timestamp = self._context_timestamps.get(target_id)
|
|
646
|
+
if timestamp and (time.time() - timestamp) > self._ttl_seconds:
|
|
647
|
+
# Expired - remove and return None
|
|
648
|
+
self._contexts.pop(target_id, None)
|
|
649
|
+
self._context_timestamps.pop(target_id, None)
|
|
650
|
+
logger.debug("execution_context_expired", execution_id=target_id[:8] if len(target_id) >= 8 else target_id)
|
|
651
|
+
return None
|
|
652
|
+
|
|
653
|
+
return self._contexts.get(target_id)
|
|
654
|
+
|
|
655
|
+
def get_current_execution_id(self) -> Optional[str]:
|
|
656
|
+
"""Get the current active execution ID."""
|
|
657
|
+
with self._lock:
|
|
658
|
+
return self._current_execution
|
|
659
|
+
|
|
660
|
+
def get_any_valid_execution_id(self) -> Optional[str]:
|
|
661
|
+
"""
|
|
662
|
+
Get any valid (non-expired) execution ID.
|
|
663
|
+
|
|
664
|
+
This is a fallback when _current_execution is None but there are still
|
|
665
|
+
valid contexts available. Useful for sub-agent requests that arrive
|
|
666
|
+
after _current_execution has been overwritten by concurrent executions.
|
|
667
|
+
|
|
668
|
+
Returns the most recently set context's execution ID.
|
|
669
|
+
"""
|
|
670
|
+
with self._lock:
|
|
671
|
+
if not self._contexts:
|
|
672
|
+
return None
|
|
673
|
+
|
|
674
|
+
now = time.time()
|
|
675
|
+
# Find the most recent non-expired context
|
|
676
|
+
valid_contexts = [
|
|
677
|
+
(exec_id, ts) for exec_id, ts in self._context_timestamps.items()
|
|
678
|
+
if (now - ts) <= self._ttl_seconds and exec_id in self._contexts
|
|
679
|
+
]
|
|
680
|
+
|
|
681
|
+
if not valid_contexts:
|
|
682
|
+
return None
|
|
683
|
+
|
|
684
|
+
# Return the most recently set context
|
|
685
|
+
most_recent = max(valid_contexts, key=lambda x: x[1])
|
|
686
|
+
logger.debug(
|
|
687
|
+
"using_fallback_execution_context",
|
|
688
|
+
execution_id=most_recent[0][:8] if len(most_recent[0]) >= 8 else most_recent[0],
|
|
689
|
+
total_valid_contexts=len(valid_contexts),
|
|
690
|
+
)
|
|
691
|
+
return most_recent[0]
|
|
692
|
+
|
|
693
|
+
def clear_context(self, execution_id: str):
|
|
694
|
+
"""Clear execution context after execution completes."""
|
|
695
|
+
with self._lock:
|
|
696
|
+
if execution_id in self._contexts:
|
|
697
|
+
del self._contexts[execution_id]
|
|
698
|
+
self._context_timestamps.pop(execution_id, None)
|
|
699
|
+
if self._current_execution == execution_id:
|
|
700
|
+
self._current_execution = None
|
|
701
|
+
logger.debug(
|
|
702
|
+
"execution_context_cleared",
|
|
703
|
+
execution_id=execution_id[:8] if len(execution_id) >= 8 else execution_id,
|
|
704
|
+
remaining_contexts=len(self._contexts)
|
|
705
|
+
)
|
|
706
|
+
|
|
707
|
+
def _cleanup_expired(self) -> None:
|
|
708
|
+
"""Remove contexts older than TTL."""
|
|
709
|
+
now = time.time()
|
|
710
|
+
with self._lock:
|
|
711
|
+
expired_ids = [
|
|
712
|
+
exec_id for exec_id, timestamp in self._context_timestamps.items()
|
|
713
|
+
if (now - timestamp) > self._ttl_seconds
|
|
714
|
+
]
|
|
715
|
+
|
|
716
|
+
if expired_ids:
|
|
717
|
+
for exec_id in expired_ids:
|
|
718
|
+
self._contexts.pop(exec_id, None)
|
|
719
|
+
self._context_timestamps.pop(exec_id, None)
|
|
720
|
+
if self._current_execution == exec_id:
|
|
721
|
+
self._current_execution = None
|
|
722
|
+
|
|
723
|
+
logger.info(
|
|
724
|
+
"expired_contexts_cleaned",
|
|
725
|
+
removed=len(expired_ids),
|
|
726
|
+
remaining=len(self._contexts)
|
|
727
|
+
)
|
|
728
|
+
|
|
729
|
+
def get_stats(self) -> Dict[str, Any]:
|
|
730
|
+
"""Get context store statistics."""
|
|
731
|
+
with self._lock:
|
|
732
|
+
now = time.time()
|
|
733
|
+
ages = [now - ts for ts in self._context_timestamps.values()]
|
|
734
|
+
return {
|
|
735
|
+
'total_contexts': len(self._contexts),
|
|
736
|
+
'max_contexts': self._max_contexts,
|
|
737
|
+
'ttl_seconds': self._ttl_seconds,
|
|
738
|
+
'oldest_age_seconds': int(max(ages)) if ages else 0,
|
|
739
|
+
'newest_age_seconds': int(min(ages)) if ages else 0,
|
|
740
|
+
}
|
|
741
|
+
|
|
742
|
+
def shutdown(self):
|
|
743
|
+
"""Stop proactive cleanup timer."""
|
|
744
|
+
if self._cleanup_timer:
|
|
745
|
+
self._cleanup_timer.cancel()
|
|
746
|
+
logger.info("context_store_cleanup_timer_stopped")
|
|
747
|
+
|
|
748
|
+
|
|
749
|
+
# Global context store
|
|
750
|
+
_context_store = ExecutionContextStore()
|
|
751
|
+
|
|
752
|
+
|
|
753
|
+
class ContextCleanupScheduler:
|
|
754
|
+
"""Schedules delayed context cleanup without blocking."""
|
|
755
|
+
|
|
756
|
+
def __init__(self):
|
|
757
|
+
self._pending_cleanups: Dict[str, asyncio.Task] = {}
|
|
758
|
+
self._lock = threading.Lock()
|
|
759
|
+
|
|
760
|
+
def schedule_cleanup(
|
|
761
|
+
self,
|
|
762
|
+
execution_id: str,
|
|
763
|
+
delay_seconds: float,
|
|
764
|
+
store: 'ExecutionContextStore'
|
|
765
|
+
):
|
|
766
|
+
"""Schedule cleanup after delay (non-blocking)."""
|
|
767
|
+
with self._lock:
|
|
768
|
+
# Cancel existing cleanup if rescheduling
|
|
769
|
+
if execution_id in self._pending_cleanups:
|
|
770
|
+
self._pending_cleanups[execution_id].cancel()
|
|
771
|
+
|
|
772
|
+
# Create background task for delayed cleanup
|
|
773
|
+
try:
|
|
774
|
+
loop = asyncio.get_event_loop()
|
|
775
|
+
task = loop.create_task(
|
|
776
|
+
self._delayed_cleanup(execution_id, delay_seconds, store)
|
|
777
|
+
)
|
|
778
|
+
self._pending_cleanups[execution_id] = task
|
|
779
|
+
except RuntimeError:
|
|
780
|
+
# No event loop - fallback to immediate cleanup
|
|
781
|
+
store.clear_context(execution_id)
|
|
782
|
+
|
|
783
|
+
async def _delayed_cleanup(
|
|
784
|
+
self,
|
|
785
|
+
execution_id: str,
|
|
786
|
+
delay_seconds: float,
|
|
787
|
+
store: 'ExecutionContextStore'
|
|
788
|
+
):
|
|
789
|
+
"""Internal: Wait then clear context."""
|
|
790
|
+
try:
|
|
791
|
+
await asyncio.sleep(delay_seconds)
|
|
792
|
+
store.clear_context(execution_id)
|
|
793
|
+
except asyncio.CancelledError:
|
|
794
|
+
pass # Cleanup was cancelled
|
|
795
|
+
except Exception as e:
|
|
796
|
+
# Log but don't crash - TTL will handle it
|
|
797
|
+
logger.warning(
|
|
798
|
+
"context_cleanup_error",
|
|
799
|
+
execution_id=execution_id[:8] if len(execution_id) >= 8 else execution_id,
|
|
800
|
+
error=str(e)
|
|
801
|
+
)
|
|
802
|
+
finally:
|
|
803
|
+
with self._lock:
|
|
804
|
+
self._pending_cleanups.pop(execution_id, None)
|
|
805
|
+
|
|
806
|
+
|
|
807
|
+
# Global cleanup scheduler
|
|
808
|
+
_cleanup_scheduler = ContextCleanupScheduler()
|
|
809
|
+
|
|
810
|
+
|
|
811
|
+
def _hash_user_id(user_id: str, organization_id: str) -> str:
|
|
812
|
+
"""
|
|
813
|
+
Hash user_id to avoid sending email addresses to Anthropic API.
|
|
814
|
+
|
|
815
|
+
Anthropic API rejects email addresses in metadata.user_id.
|
|
816
|
+
We hash the email with org to create a unique, non-PII identifier.
|
|
817
|
+
|
|
818
|
+
Args:
|
|
819
|
+
user_id: User ID (may be email address)
|
|
820
|
+
organization_id: Organization ID
|
|
821
|
+
|
|
822
|
+
Returns:
|
|
823
|
+
Hashed user identifier (SHA256, first 16 chars)
|
|
824
|
+
"""
|
|
825
|
+
import hashlib
|
|
826
|
+
combined = f"{user_id}-{organization_id}"
|
|
827
|
+
return hashlib.sha256(combined.encode()).hexdigest()[:16]
|
|
828
|
+
|
|
829
|
+
|
|
830
|
+
def build_langfuse_metadata(context: Dict[str, Any]) -> Dict[str, Any]:
|
|
831
|
+
"""
|
|
832
|
+
Build Langfuse metadata from execution context.
|
|
833
|
+
|
|
834
|
+
Matches the format used by agno runtime for consistency.
|
|
835
|
+
|
|
836
|
+
Args:
|
|
837
|
+
context: Execution context with user_id, session_id, agent_id, etc.
|
|
838
|
+
|
|
839
|
+
Returns:
|
|
840
|
+
Metadata dict for Langfuse tracking
|
|
841
|
+
"""
|
|
842
|
+
metadata = {}
|
|
843
|
+
|
|
844
|
+
user_id = context.get("user_id")
|
|
845
|
+
organization_id = context.get("organization_id")
|
|
846
|
+
session_id = context.get("session_id")
|
|
847
|
+
agent_id = context.get("agent_id")
|
|
848
|
+
agent_name = context.get("agent_name")
|
|
849
|
+
model_id = context.get("model_id")
|
|
850
|
+
|
|
851
|
+
# Langfuse naming fields - use custom values if provided, otherwise default to "agent-chat"
|
|
852
|
+
# This allows both agent-chat and plan execution to use the same proxy
|
|
853
|
+
metadata["name"] = context.get("name", "agent-chat")
|
|
854
|
+
metadata["trace_name"] = context.get("trace_name", "agent-chat")
|
|
855
|
+
metadata["generation_name"] = context.get("generation_name", "agent-chat")
|
|
856
|
+
|
|
857
|
+
# Hash user_id to avoid sending email addresses to Anthropic API
|
|
858
|
+
# Anthropic rejects: "user_id appears to contain an email address"
|
|
859
|
+
if user_id and organization_id:
|
|
860
|
+
hashed_user_id = _hash_user_id(user_id, organization_id)
|
|
861
|
+
metadata["trace_user_id"] = hashed_user_id
|
|
862
|
+
metadata["user_id"] = hashed_user_id
|
|
863
|
+
|
|
864
|
+
# Use session_id as trace_id to group conversation turns
|
|
865
|
+
if session_id:
|
|
866
|
+
metadata["trace_id"] = session_id
|
|
867
|
+
metadata["session_id"] = session_id
|
|
868
|
+
|
|
869
|
+
# Additional metadata (these are safe - not sent to Anthropic)
|
|
870
|
+
if agent_id:
|
|
871
|
+
metadata["agent_id"] = agent_id
|
|
872
|
+
if agent_name:
|
|
873
|
+
metadata["agent_name"] = agent_name
|
|
874
|
+
if user_id:
|
|
875
|
+
metadata["user_email"] = user_id # Keep original for Langfuse internal tracking
|
|
876
|
+
if organization_id:
|
|
877
|
+
metadata["organization_id"] = organization_id
|
|
878
|
+
if model_id:
|
|
879
|
+
metadata["model"] = model_id
|
|
880
|
+
|
|
881
|
+
return metadata
|
|
882
|
+
|
|
883
|
+
|
|
884
|
+
class LiteLLMProxyApp:
|
|
885
|
+
"""FastAPI application for LiteLLM proxy with metadata injection."""
|
|
886
|
+
|
|
887
|
+
def __init__(self, litellm_base_url: str, litellm_api_key: str):
|
|
888
|
+
"""
|
|
889
|
+
Initialize the proxy application.
|
|
890
|
+
|
|
891
|
+
Args:
|
|
892
|
+
litellm_base_url: Base URL of the real LiteLLM proxy
|
|
893
|
+
litellm_api_key: API key for LiteLLM proxy
|
|
894
|
+
"""
|
|
895
|
+
self.litellm_base_url = litellm_base_url.rstrip('/')
|
|
896
|
+
self.litellm_api_key = litellm_api_key
|
|
897
|
+
self.client = None # Will be lazily initialized per request
|
|
898
|
+
self._client_lock = None # Asyncio lock for thread-safe client creation
|
|
899
|
+
|
|
900
|
+
# Create FastAPI app WITHOUT lifespan
|
|
901
|
+
# Reason: httpx clients must be created in the same event loop where they're used
|
|
902
|
+
# When uvicorn runs in a background thread, it has its own event loop
|
|
903
|
+
# Creating the client in a different loop causes ConnectError
|
|
904
|
+
self.app = FastAPI(
|
|
905
|
+
title="Claude Code LiteLLM Proxy",
|
|
906
|
+
description="Local proxy to inject Langfuse metadata for Claude Code SDK",
|
|
907
|
+
)
|
|
908
|
+
|
|
909
|
+
# Register routes
|
|
910
|
+
self._register_routes()
|
|
911
|
+
|
|
912
|
+
def _register_routes(self):
|
|
913
|
+
"""Register all proxy routes."""
|
|
914
|
+
|
|
915
|
+
@self.app.get("/health")
|
|
916
|
+
async def health_check():
|
|
917
|
+
"""Health check endpoint."""
|
|
918
|
+
return {"status": "healthy", "service": "claude-code-litellm-proxy"}
|
|
919
|
+
|
|
920
|
+
@self.app.post("/v1/messages")
|
|
921
|
+
async def proxy_messages(request: Request):
|
|
922
|
+
"""
|
|
923
|
+
Proxy endpoint for Anthropic Messages API format.
|
|
924
|
+
|
|
925
|
+
This is the main endpoint used by Claude Code SDK.
|
|
926
|
+
We keep the Anthropic format by forwarding to /v1/messages.
|
|
927
|
+
"""
|
|
928
|
+
# Keep Anthropic format - forward to /v1/messages
|
|
929
|
+
return await self._proxy_request(request, "/v1/messages")
|
|
930
|
+
|
|
931
|
+
@self.app.post("/v1/chat/completions")
|
|
932
|
+
async def proxy_chat_completions(request: Request):
|
|
933
|
+
"""
|
|
934
|
+
Proxy endpoint for OpenAI Chat Completions API format.
|
|
935
|
+
|
|
936
|
+
Fallback for OpenAI-format requests.
|
|
937
|
+
"""
|
|
938
|
+
return await self._proxy_request(request, "/v1/chat/completions")
|
|
939
|
+
|
|
940
|
+
async def _get_client(self) -> httpx.AsyncClient:
|
|
941
|
+
"""
|
|
942
|
+
Get or create the httpx client in the current event loop.
|
|
943
|
+
|
|
944
|
+
This ensures the client is created in the same event loop where it will be used,
|
|
945
|
+
avoiding ConnectError when uvicorn runs in a background thread.
|
|
946
|
+
|
|
947
|
+
Returns:
|
|
948
|
+
httpx.AsyncClient instance
|
|
949
|
+
"""
|
|
950
|
+
if self.client is None:
|
|
951
|
+
# Initialize lock if needed (must be done in async context)
|
|
952
|
+
if self._client_lock is None:
|
|
953
|
+
self._client_lock = asyncio.Lock()
|
|
954
|
+
|
|
955
|
+
async with self._client_lock:
|
|
956
|
+
# Double-check after acquiring lock
|
|
957
|
+
if self.client is None:
|
|
958
|
+
logger.info(
|
|
959
|
+
"initializing_httpx_client_in_current_event_loop",
|
|
960
|
+
litellm_base_url=self.litellm_base_url,
|
|
961
|
+
)
|
|
962
|
+
# Create client with explicit settings for reliability
|
|
963
|
+
# VERY long timeouts to handle long-running streaming LLM operations
|
|
964
|
+
# For streaming workflows, the read timeout needs to be very generous
|
|
965
|
+
# since the connection may be open for hours while streaming responses
|
|
966
|
+
self.client = httpx.AsyncClient(
|
|
967
|
+
timeout=httpx.Timeout(
|
|
968
|
+
connect=30.0, # Connection timeout (reasonable for initial connection)
|
|
969
|
+
read=86400.0, # Read timeout (24 hours for long streaming operations)
|
|
970
|
+
write=300.0, # Write timeout (5 minutes for large payloads)
|
|
971
|
+
pool=300.0, # Pool timeout (5 minutes to avoid pool exhaustion)
|
|
972
|
+
),
|
|
973
|
+
limits=httpx.Limits(
|
|
974
|
+
max_keepalive_connections=50, # Increased for better reuse
|
|
975
|
+
max_connections=200, # Increased for high concurrency
|
|
976
|
+
),
|
|
977
|
+
follow_redirects=True,
|
|
978
|
+
)
|
|
979
|
+
return self.client
|
|
980
|
+
|
|
981
|
+
async def cleanup(self):
|
|
982
|
+
"""Clean up HTTP client resources."""
|
|
983
|
+
if self.client is not None:
|
|
984
|
+
try:
|
|
985
|
+
await self.client.aclose()
|
|
986
|
+
logger.info("httpx_client_closed")
|
|
987
|
+
self.client = None
|
|
988
|
+
except Exception as e:
|
|
989
|
+
logger.error(
|
|
990
|
+
"httpx_client_close_failed",
|
|
991
|
+
error=str(e),
|
|
992
|
+
error_type=type(e).__name__
|
|
993
|
+
)
|
|
994
|
+
|
|
995
|
+
async def _proxy_request(self, request: Request, path: str) -> Response:
|
|
996
|
+
"""
|
|
997
|
+
Proxy a request to the real LiteLLM proxy with metadata injection.
|
|
998
|
+
|
|
999
|
+
Args:
|
|
1000
|
+
request: Incoming FastAPI request
|
|
1001
|
+
path: API path to forward to
|
|
1002
|
+
|
|
1003
|
+
Returns:
|
|
1004
|
+
Response from LiteLLM proxy
|
|
1005
|
+
"""
|
|
1006
|
+
# Get or create client in current event loop
|
|
1007
|
+
client = await self._get_client()
|
|
1008
|
+
|
|
1009
|
+
try:
|
|
1010
|
+
# Parse request body
|
|
1011
|
+
body = await request.json()
|
|
1012
|
+
|
|
1013
|
+
# CRITICAL: Override model if KUBIYA_MODEL_OVERRIDE is set
|
|
1014
|
+
# This ensures the explicit model from CLI --model flag takes precedence
|
|
1015
|
+
model_override = os.environ.get("KUBIYA_MODEL_OVERRIDE")
|
|
1016
|
+
if model_override:
|
|
1017
|
+
original_model = body.get("model")
|
|
1018
|
+
body["model"] = model_override
|
|
1019
|
+
logger.info(
|
|
1020
|
+
"model_override_applied_in_proxy",
|
|
1021
|
+
original_model=original_model,
|
|
1022
|
+
overridden_model=model_override,
|
|
1023
|
+
path=path,
|
|
1024
|
+
note="CLI --model flag or KUBIYA_MODEL env var is active"
|
|
1025
|
+
)
|
|
1026
|
+
|
|
1027
|
+
# Model validation: Only validate when using a LOCAL/CUSTOM LiteLLM proxy
|
|
1028
|
+
# This prevents "Invalid model name" errors when the configured model doesn't exist
|
|
1029
|
+
# on a local proxy. Skip validation for the default Kubiya proxy which supports all models.
|
|
1030
|
+
#
|
|
1031
|
+
# Validation is enabled when ANY of these conditions are true:
|
|
1032
|
+
# - KUBIYA_ENABLE_LOCAL_PROXY is set (using local LiteLLM proxy)
|
|
1033
|
+
# - LITELLM_API_BASE is set to a non-default URL (custom proxy)
|
|
1034
|
+
# - KUBIYA_FORCE_MODEL_VALIDATION is set (explicit opt-in)
|
|
1035
|
+
default_proxy_url = "https://llm-proxy.kubiya.ai"
|
|
1036
|
+
is_local_proxy = os.environ.get("KUBIYA_ENABLE_LOCAL_PROXY", "").lower() in ("true", "1", "yes")
|
|
1037
|
+
is_custom_proxy = self.litellm_base_url and self.litellm_base_url.rstrip('/') != default_proxy_url
|
|
1038
|
+
force_validation = os.environ.get("KUBIYA_FORCE_MODEL_VALIDATION", "").lower() in ("true", "1", "yes")
|
|
1039
|
+
should_validate_model = is_local_proxy or is_custom_proxy or force_validation
|
|
1040
|
+
|
|
1041
|
+
requested_model = body.get("model")
|
|
1042
|
+
if requested_model and should_validate_model:
|
|
1043
|
+
# Fetch available models from upstream (uses cache)
|
|
1044
|
+
available_models = await fetch_available_models(
|
|
1045
|
+
self.litellm_base_url,
|
|
1046
|
+
self.litellm_api_key,
|
|
1047
|
+
)
|
|
1048
|
+
|
|
1049
|
+
if available_models:
|
|
1050
|
+
resolved_model, used_fallback = validate_and_resolve_model(
|
|
1051
|
+
requested_model,
|
|
1052
|
+
available_models,
|
|
1053
|
+
)
|
|
1054
|
+
|
|
1055
|
+
if used_fallback:
|
|
1056
|
+
body["model"] = resolved_model
|
|
1057
|
+
logger.warning(
|
|
1058
|
+
"model_resolved_with_fallback",
|
|
1059
|
+
original_model=requested_model,
|
|
1060
|
+
resolved_model=resolved_model,
|
|
1061
|
+
available_models_count=len(available_models),
|
|
1062
|
+
path=path,
|
|
1063
|
+
note="Original model not available, using fallback"
|
|
1064
|
+
)
|
|
1065
|
+
else:
|
|
1066
|
+
logger.warning(
|
|
1067
|
+
"skipping_model_validation",
|
|
1068
|
+
model=requested_model,
|
|
1069
|
+
path=path,
|
|
1070
|
+
note="Could not fetch available models from upstream, proceeding without validation"
|
|
1071
|
+
)
|
|
1072
|
+
elif requested_model and not should_validate_model:
|
|
1073
|
+
logger.debug(
|
|
1074
|
+
"model_validation_skipped",
|
|
1075
|
+
model=requested_model,
|
|
1076
|
+
litellm_base_url=self.litellm_base_url,
|
|
1077
|
+
note="Using default Kubiya proxy, model validation not needed"
|
|
1078
|
+
)
|
|
1079
|
+
|
|
1080
|
+
# Extract execution_id from custom header, or use current execution
|
|
1081
|
+
execution_id = request.headers.get("X-Execution-ID")
|
|
1082
|
+
|
|
1083
|
+
if not execution_id:
|
|
1084
|
+
# Try to get current execution ID
|
|
1085
|
+
execution_id = _context_store.get_current_execution_id()
|
|
1086
|
+
|
|
1087
|
+
if not execution_id:
|
|
1088
|
+
# Fallback: try to get any valid execution context
|
|
1089
|
+
# This handles sub-agent requests when _current_execution was overwritten
|
|
1090
|
+
execution_id = _context_store.get_any_valid_execution_id()
|
|
1091
|
+
if execution_id:
|
|
1092
|
+
logger.debug(
|
|
1093
|
+
"using_fallback_execution_id",
|
|
1094
|
+
execution_id=execution_id[:8] if execution_id else None,
|
|
1095
|
+
path=path,
|
|
1096
|
+
)
|
|
1097
|
+
|
|
1098
|
+
if not execution_id:
|
|
1099
|
+
# Still no execution_id - this is unexpected but not fatal
|
|
1100
|
+
# Log at debug level since this may happen during proxy startup/shutdown
|
|
1101
|
+
logger.debug(
|
|
1102
|
+
"no_execution_id_available",
|
|
1103
|
+
path=path,
|
|
1104
|
+
note="Cannot inject Langfuse metadata - no execution context found"
|
|
1105
|
+
)
|
|
1106
|
+
|
|
1107
|
+
if execution_id:
|
|
1108
|
+
# Get execution context and build metadata
|
|
1109
|
+
context = _context_store.get_context(execution_id)
|
|
1110
|
+
|
|
1111
|
+
if context:
|
|
1112
|
+
metadata = build_langfuse_metadata(context)
|
|
1113
|
+
|
|
1114
|
+
# For Anthropic format, we need to be more explicit with Langfuse fields
|
|
1115
|
+
# LiteLLM looks for specific fields in specific places
|
|
1116
|
+
|
|
1117
|
+
# 1. Set 'user' at top level (works with both formats)
|
|
1118
|
+
body["user"] = metadata.get("trace_user_id")
|
|
1119
|
+
|
|
1120
|
+
# 2. Initialize metadata dict
|
|
1121
|
+
if "metadata" not in body:
|
|
1122
|
+
body["metadata"] = {}
|
|
1123
|
+
|
|
1124
|
+
# 3. Put Langfuse fields with explicit naming that LiteLLM recognizes
|
|
1125
|
+
# Based on LiteLLM source, these specific keys are extracted for Langfuse
|
|
1126
|
+
body["metadata"]["generation_name"] = metadata.get("generation_name", "agent-chat")
|
|
1127
|
+
body["metadata"]["trace_name"] = metadata.get("trace_name", "agent-chat")
|
|
1128
|
+
body["metadata"]["trace_id"] = metadata.get("trace_id")
|
|
1129
|
+
body["metadata"]["session_id"] = metadata.get("session_id")
|
|
1130
|
+
body["metadata"]["trace_user_id"] = metadata.get("trace_user_id")
|
|
1131
|
+
body["metadata"]["user_id"] = metadata.get("trace_user_id")
|
|
1132
|
+
|
|
1133
|
+
# Additional context metadata
|
|
1134
|
+
body["metadata"]["agent_id"] = metadata.get("agent_id")
|
|
1135
|
+
body["metadata"]["agent_name"] = metadata.get("agent_name")
|
|
1136
|
+
body["metadata"]["organization_id"] = metadata.get("organization_id")
|
|
1137
|
+
body["metadata"]["user_email"] = metadata.get("user_email")
|
|
1138
|
+
body["metadata"]["model"] = metadata.get("model")
|
|
1139
|
+
|
|
1140
|
+
logger.debug(
|
|
1141
|
+
"metadata_injected_into_request",
|
|
1142
|
+
execution_id=execution_id[:8],
|
|
1143
|
+
path=path,
|
|
1144
|
+
user_field=body.get("user"),
|
|
1145
|
+
metadata_keys=list(metadata.keys()),
|
|
1146
|
+
trace_user_id=metadata.get("trace_user_id"),
|
|
1147
|
+
trace_id=metadata.get("trace_id"),
|
|
1148
|
+
session_id=metadata.get("session_id"),
|
|
1149
|
+
trace_name=metadata.get("trace_name"),
|
|
1150
|
+
)
|
|
1151
|
+
else:
|
|
1152
|
+
logger.warning(
|
|
1153
|
+
"no_context_found_for_execution",
|
|
1154
|
+
execution_id=execution_id[:8] if execution_id else "unknown",
|
|
1155
|
+
path=path,
|
|
1156
|
+
)
|
|
1157
|
+
|
|
1158
|
+
# Build forwarding URL (keep same endpoint - don't convert formats)
|
|
1159
|
+
forward_url = f"{self.litellm_base_url}{path}"
|
|
1160
|
+
|
|
1161
|
+
# Prepare headers
|
|
1162
|
+
headers = {
|
|
1163
|
+
"Authorization": f"Bearer {self.litellm_api_key}",
|
|
1164
|
+
"Content-Type": "application/json",
|
|
1165
|
+
}
|
|
1166
|
+
|
|
1167
|
+
# Add Langfuse metadata as custom headers (LiteLLM recognizes these)
|
|
1168
|
+
# Can be disabled via KUBIYA_DISABLE_LANGFUSE_HEADERS=true for local proxies
|
|
1169
|
+
# that don't support Langfuse or have incompatible versions
|
|
1170
|
+
langfuse_headers_enabled = os.environ.get("KUBIYA_DISABLE_LANGFUSE_HEADERS", "").lower() not in ("true", "1", "yes")
|
|
1171
|
+
|
|
1172
|
+
if execution_id and langfuse_headers_enabled:
|
|
1173
|
+
context = _context_store.get_context(execution_id)
|
|
1174
|
+
if context:
|
|
1175
|
+
metadata = build_langfuse_metadata(context)
|
|
1176
|
+
|
|
1177
|
+
# LiteLLM extracts Langfuse fields from these custom headers
|
|
1178
|
+
# IMPORTANT: Header values MUST be str, never None
|
|
1179
|
+
# Use `or ""` to handle both missing keys AND keys with None values
|
|
1180
|
+
headers["X-Langfuse-Trace-Id"] = metadata.get("trace_id") or ""
|
|
1181
|
+
headers["X-Langfuse-Session-Id"] = metadata.get("session_id") or ""
|
|
1182
|
+
headers["X-Langfuse-User-Id"] = metadata.get("trace_user_id") or ""
|
|
1183
|
+
headers["X-Langfuse-Trace-Name"] = metadata.get("trace_name") or "agent-chat"
|
|
1184
|
+
|
|
1185
|
+
# Additional metadata as JSON in custom header
|
|
1186
|
+
extra_metadata = {
|
|
1187
|
+
"agent_id": metadata.get("agent_id"),
|
|
1188
|
+
"agent_name": metadata.get("agent_name"),
|
|
1189
|
+
"organization_id": metadata.get("organization_id"),
|
|
1190
|
+
"user_email": metadata.get("user_email"),
|
|
1191
|
+
}
|
|
1192
|
+
headers["X-Langfuse-Metadata"] = json.dumps(extra_metadata)
|
|
1193
|
+
|
|
1194
|
+
logger.debug(
|
|
1195
|
+
"langfuse_headers_added",
|
|
1196
|
+
execution_id=execution_id[:8],
|
|
1197
|
+
trace_id=metadata.get("trace_id", ""),
|
|
1198
|
+
session_id=metadata.get("session_id", ""),
|
|
1199
|
+
)
|
|
1200
|
+
elif not langfuse_headers_enabled:
|
|
1201
|
+
logger.debug(
|
|
1202
|
+
"langfuse_headers_disabled",
|
|
1203
|
+
note="KUBIYA_DISABLE_LANGFUSE_HEADERS is set, skipping Langfuse header injection"
|
|
1204
|
+
)
|
|
1205
|
+
|
|
1206
|
+
# Copy relevant headers from original request
|
|
1207
|
+
for header in ["X-Request-ID", "User-Agent"]:
|
|
1208
|
+
if header.lower() in request.headers:
|
|
1209
|
+
headers[header] = request.headers[header.lower()]
|
|
1210
|
+
|
|
1211
|
+
# Check if streaming is requested
|
|
1212
|
+
is_streaming = body.get("stream", False)
|
|
1213
|
+
|
|
1214
|
+
if is_streaming:
|
|
1215
|
+
# Handle streaming response
|
|
1216
|
+
logger.info(
|
|
1217
|
+
"starting_streaming_request",
|
|
1218
|
+
url=forward_url,
|
|
1219
|
+
model=body.get("model", "unknown"),
|
|
1220
|
+
execution_id=execution_id[:8] if execution_id else "unknown",
|
|
1221
|
+
)
|
|
1222
|
+
return await self._proxy_streaming_request(client, forward_url, body, headers)
|
|
1223
|
+
else:
|
|
1224
|
+
# Handle non-streaming response
|
|
1225
|
+
response = await client.post(
|
|
1226
|
+
forward_url,
|
|
1227
|
+
json=body,
|
|
1228
|
+
headers=headers,
|
|
1229
|
+
)
|
|
1230
|
+
|
|
1231
|
+
logger.debug(
|
|
1232
|
+
"litellm_request_completed",
|
|
1233
|
+
status_code=response.status_code,
|
|
1234
|
+
path=path,
|
|
1235
|
+
execution_id=execution_id[:8] if execution_id else None,
|
|
1236
|
+
)
|
|
1237
|
+
|
|
1238
|
+
return Response(
|
|
1239
|
+
content=response.content,
|
|
1240
|
+
status_code=response.status_code,
|
|
1241
|
+
headers=dict(response.headers),
|
|
1242
|
+
)
|
|
1243
|
+
|
|
1244
|
+
except httpx.ConnectError as e:
|
|
1245
|
+
logger.error(
|
|
1246
|
+
"litellm_proxy_connection_error",
|
|
1247
|
+
error=str(e),
|
|
1248
|
+
error_type=type(e).__name__,
|
|
1249
|
+
path=path,
|
|
1250
|
+
forward_url=forward_url,
|
|
1251
|
+
litellm_base_url=self.litellm_base_url,
|
|
1252
|
+
message="Failed to connect to LiteLLM proxy - check network connectivity and URL",
|
|
1253
|
+
)
|
|
1254
|
+
raise HTTPException(
|
|
1255
|
+
status_code=502,
|
|
1256
|
+
detail=f"Failed to connect to LiteLLM proxy at {self.litellm_base_url}: {str(e)}"
|
|
1257
|
+
)
|
|
1258
|
+
|
|
1259
|
+
except httpx.HTTPError as e:
|
|
1260
|
+
logger.error(
|
|
1261
|
+
"litellm_proxy_http_error",
|
|
1262
|
+
error=str(e),
|
|
1263
|
+
error_type=type(e).__name__,
|
|
1264
|
+
path=path,
|
|
1265
|
+
forward_url=forward_url,
|
|
1266
|
+
)
|
|
1267
|
+
raise HTTPException(status_code=502, detail=f"Proxy error: {str(e)}")
|
|
1268
|
+
|
|
1269
|
+
except Exception as e:
|
|
1270
|
+
logger.error(
|
|
1271
|
+
"litellm_proxy_error",
|
|
1272
|
+
error=str(e),
|
|
1273
|
+
error_type=type(e).__name__,
|
|
1274
|
+
path=path,
|
|
1275
|
+
exc_info=True,
|
|
1276
|
+
)
|
|
1277
|
+
raise HTTPException(status_code=500, detail=f"Internal proxy error: {str(e)}")
|
|
1278
|
+
|
|
1279
|
+
async def _proxy_streaming_request(
|
|
1280
|
+
self, client: httpx.AsyncClient, url: str, body: Dict[str, Any], headers: Dict[str, str]
|
|
1281
|
+
) -> StreamingResponse:
|
|
1282
|
+
"""
|
|
1283
|
+
Proxy a streaming request to LiteLLM with robust error handling.
|
|
1284
|
+
|
|
1285
|
+
Args:
|
|
1286
|
+
client: httpx AsyncClient instance
|
|
1287
|
+
url: Forward URL
|
|
1288
|
+
body: Request body
|
|
1289
|
+
headers: Request headers
|
|
1290
|
+
|
|
1291
|
+
Returns:
|
|
1292
|
+
StreamingResponse that forwards chunks from LiteLLM
|
|
1293
|
+
|
|
1294
|
+
Raises:
|
|
1295
|
+
HTTPException: On connection or streaming errors
|
|
1296
|
+
"""
|
|
1297
|
+
async def stream_generator():
|
|
1298
|
+
"""Generator that yields chunks from LiteLLM with error handling."""
|
|
1299
|
+
try:
|
|
1300
|
+
# Use VERY long timeout for streaming to ensure long operations work
|
|
1301
|
+
# Streaming responses can take hours for complex workflows
|
|
1302
|
+
stream_timeout = httpx.Timeout(
|
|
1303
|
+
connect=30.0, # Connection timeout (reasonable for initial connection)
|
|
1304
|
+
read=86400.0, # Read timeout (24 hours for long streaming operations)
|
|
1305
|
+
write=300.0, # Write timeout (5 minutes for large payloads)
|
|
1306
|
+
pool=300.0, # Pool timeout (5 minutes to avoid pool exhaustion)
|
|
1307
|
+
)
|
|
1308
|
+
async with client.stream(
|
|
1309
|
+
"POST",
|
|
1310
|
+
url,
|
|
1311
|
+
json=body,
|
|
1312
|
+
headers=headers,
|
|
1313
|
+
timeout=stream_timeout,
|
|
1314
|
+
) as response:
|
|
1315
|
+
# Check for HTTP errors before streaming
|
|
1316
|
+
if response.status_code >= 400:
|
|
1317
|
+
error_text = await response.aread()
|
|
1318
|
+
logger.error(
|
|
1319
|
+
"litellm_streaming_http_error",
|
|
1320
|
+
status_code=response.status_code,
|
|
1321
|
+
error=error_text.decode('utf-8', errors='ignore')[:500],
|
|
1322
|
+
url=url,
|
|
1323
|
+
)
|
|
1324
|
+
# Yield error message as SSE event
|
|
1325
|
+
error_msg = f"data: {{\"error\": \"HTTP {response.status_code}: {error_text.decode('utf-8', errors='ignore')[:200]}\"}}\n\n"
|
|
1326
|
+
yield error_msg.encode('utf-8')
|
|
1327
|
+
return
|
|
1328
|
+
|
|
1329
|
+
# Stream chunks
|
|
1330
|
+
async for chunk in response.aiter_bytes():
|
|
1331
|
+
yield chunk
|
|
1332
|
+
|
|
1333
|
+
except httpx.ConnectError as e:
|
|
1334
|
+
logger.error(
|
|
1335
|
+
"litellm_streaming_connection_error",
|
|
1336
|
+
error=str(e),
|
|
1337
|
+
url=url,
|
|
1338
|
+
message="Failed to connect to LiteLLM proxy during streaming",
|
|
1339
|
+
)
|
|
1340
|
+
# Yield error as SSE event instead of crashing
|
|
1341
|
+
error_msg = f"data: {{\"error\": \"Connection failed: {str(e)}\"}}\n\n"
|
|
1342
|
+
yield error_msg.encode('utf-8')
|
|
1343
|
+
|
|
1344
|
+
except httpx.TimeoutException as e:
|
|
1345
|
+
# Capture detailed timeout info
|
|
1346
|
+
error_detail = str(e) or repr(e) or "No error details available"
|
|
1347
|
+
logger.error(
|
|
1348
|
+
"litellm_streaming_timeout",
|
|
1349
|
+
error=error_detail,
|
|
1350
|
+
error_type=type(e).__name__,
|
|
1351
|
+
error_args=getattr(e, 'args', []),
|
|
1352
|
+
url=url,
|
|
1353
|
+
model=body.get("model", "unknown"),
|
|
1354
|
+
message="Request timed out during streaming",
|
|
1355
|
+
note="Check network connectivity to LLM proxy or increase timeouts"
|
|
1356
|
+
)
|
|
1357
|
+
error_msg = f"data: {{\"error\": \"Request timed out ({type(e).__name__}): {error_detail}\"}}\n\n"
|
|
1358
|
+
yield error_msg.encode('utf-8')
|
|
1359
|
+
|
|
1360
|
+
except httpx.HTTPError as e:
|
|
1361
|
+
logger.error(
|
|
1362
|
+
"litellm_streaming_http_error_general",
|
|
1363
|
+
error=str(e),
|
|
1364
|
+
error_type=type(e).__name__,
|
|
1365
|
+
url=url,
|
|
1366
|
+
)
|
|
1367
|
+
error_msg = f"data: {{\"error\": \"HTTP error: {str(e)}\"}}\n\n"
|
|
1368
|
+
yield error_msg.encode('utf-8')
|
|
1369
|
+
|
|
1370
|
+
except Exception as e:
|
|
1371
|
+
logger.error(
|
|
1372
|
+
"litellm_streaming_unexpected_error",
|
|
1373
|
+
error=str(e),
|
|
1374
|
+
error_type=type(e).__name__,
|
|
1375
|
+
url=url,
|
|
1376
|
+
exc_info=True,
|
|
1377
|
+
)
|
|
1378
|
+
error_msg = f"data: {{\"error\": \"Unexpected error: {str(e)}\"}}\n\n"
|
|
1379
|
+
yield error_msg.encode('utf-8')
|
|
1380
|
+
|
|
1381
|
+
return StreamingResponse(
|
|
1382
|
+
stream_generator(),
|
|
1383
|
+
media_type="text/event-stream",
|
|
1384
|
+
)
|
|
1385
|
+
|
|
1386
|
+
|
|
1387
|
+
class LiteLLMProxyServer:
|
|
1388
|
+
"""Manager for running the LiteLLM proxy server in the same process."""
|
|
1389
|
+
|
|
1390
|
+
def __init__(self, port: int = 0):
|
|
1391
|
+
"""
|
|
1392
|
+
Initialize the proxy server.
|
|
1393
|
+
|
|
1394
|
+
Args:
|
|
1395
|
+
port: Port to listen on (0 = auto-assign random port)
|
|
1396
|
+
"""
|
|
1397
|
+
self.port = port
|
|
1398
|
+
self.actual_port: Optional[int] = None
|
|
1399
|
+
self.server_thread: Optional[threading.Thread] = None
|
|
1400
|
+
self.app: Optional[LiteLLMProxyApp] = None
|
|
1401
|
+
self._started = threading.Event()
|
|
1402
|
+
self._shutdown = threading.Event()
|
|
1403
|
+
|
|
1404
|
+
def start(self) -> int:
|
|
1405
|
+
"""
|
|
1406
|
+
Start the proxy server in a background thread.
|
|
1407
|
+
|
|
1408
|
+
Returns:
|
|
1409
|
+
The actual port the server is listening on
|
|
1410
|
+
|
|
1411
|
+
Raises:
|
|
1412
|
+
RuntimeError: If server fails to start
|
|
1413
|
+
"""
|
|
1414
|
+
# Get LiteLLM configuration
|
|
1415
|
+
litellm_base_url = os.getenv("LITELLM_API_BASE", "https://llm-proxy.kubiya.ai")
|
|
1416
|
+
litellm_api_key = os.getenv("LITELLM_API_KEY")
|
|
1417
|
+
|
|
1418
|
+
# Check for model override
|
|
1419
|
+
model_override = os.getenv("KUBIYA_MODEL_OVERRIDE")
|
|
1420
|
+
|
|
1421
|
+
logger.info(
|
|
1422
|
+
"litellm_proxy_server_initializing",
|
|
1423
|
+
litellm_base_url=litellm_base_url,
|
|
1424
|
+
model_override=model_override,
|
|
1425
|
+
has_model_override=bool(model_override),
|
|
1426
|
+
note="Model override will be applied to ALL requests" if model_override else "No model override active"
|
|
1427
|
+
)
|
|
1428
|
+
|
|
1429
|
+
if not litellm_api_key:
|
|
1430
|
+
raise RuntimeError("LITELLM_API_KEY not set")
|
|
1431
|
+
|
|
1432
|
+
# Create proxy app
|
|
1433
|
+
self.app = LiteLLMProxyApp(litellm_base_url, litellm_api_key)
|
|
1434
|
+
|
|
1435
|
+
# Auto-assign port if needed
|
|
1436
|
+
if self.port == 0:
|
|
1437
|
+
import socket
|
|
1438
|
+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
|
1439
|
+
s.bind(('127.0.0.1', 0))
|
|
1440
|
+
s.listen(1)
|
|
1441
|
+
self.actual_port = s.getsockname()[1]
|
|
1442
|
+
else:
|
|
1443
|
+
self.actual_port = self.port
|
|
1444
|
+
|
|
1445
|
+
# Start server in background thread
|
|
1446
|
+
self.server_thread = threading.Thread(
|
|
1447
|
+
target=self._run_server,
|
|
1448
|
+
daemon=True,
|
|
1449
|
+
name="LiteLLMProxyServer"
|
|
1450
|
+
)
|
|
1451
|
+
self.server_thread.start()
|
|
1452
|
+
|
|
1453
|
+
# Wait for server to become ready by checking health endpoint
|
|
1454
|
+
import time
|
|
1455
|
+
import httpx
|
|
1456
|
+
max_wait = 10 # seconds
|
|
1457
|
+
start_time = time.time()
|
|
1458
|
+
|
|
1459
|
+
while time.time() - start_time < max_wait:
|
|
1460
|
+
try:
|
|
1461
|
+
# Try to connect to health endpoint
|
|
1462
|
+
with httpx.Client(timeout=1.0) as client:
|
|
1463
|
+
response = client.get(f"http://127.0.0.1:{self.actual_port}/health")
|
|
1464
|
+
if response.status_code == 200:
|
|
1465
|
+
self._started.set()
|
|
1466
|
+
logger.info(
|
|
1467
|
+
"litellm_proxy_server_started",
|
|
1468
|
+
port=self.actual_port,
|
|
1469
|
+
url=f"http://127.0.0.1:{self.actual_port}",
|
|
1470
|
+
)
|
|
1471
|
+
return self.actual_port
|
|
1472
|
+
except Exception:
|
|
1473
|
+
# Server not ready yet, wait and retry
|
|
1474
|
+
time.sleep(0.1)
|
|
1475
|
+
continue
|
|
1476
|
+
|
|
1477
|
+
# Timeout waiting for server
|
|
1478
|
+
raise RuntimeError("LiteLLM proxy server failed to start within 10 seconds")
|
|
1479
|
+
|
|
1480
|
+
def _run_server(self):
|
|
1481
|
+
"""Run the uvicorn server (called in background thread)."""
|
|
1482
|
+
try:
|
|
1483
|
+
# Create event loop for this thread
|
|
1484
|
+
loop = asyncio.new_event_loop()
|
|
1485
|
+
asyncio.set_event_loop(loop)
|
|
1486
|
+
|
|
1487
|
+
# Create uvicorn config
|
|
1488
|
+
config = uvicorn.Config(
|
|
1489
|
+
self.app.app,
|
|
1490
|
+
host="127.0.0.1",
|
|
1491
|
+
port=self.actual_port,
|
|
1492
|
+
log_level="error",
|
|
1493
|
+
access_log=False,
|
|
1494
|
+
loop=loop,
|
|
1495
|
+
)
|
|
1496
|
+
server = uvicorn.Server(config)
|
|
1497
|
+
|
|
1498
|
+
# Run server
|
|
1499
|
+
loop.run_until_complete(server.serve())
|
|
1500
|
+
|
|
1501
|
+
except Exception as e:
|
|
1502
|
+
logger.error(
|
|
1503
|
+
"litellm_proxy_server_error",
|
|
1504
|
+
error=str(e),
|
|
1505
|
+
error_type=type(e).__name__,
|
|
1506
|
+
exc_info=True,
|
|
1507
|
+
)
|
|
1508
|
+
finally:
|
|
1509
|
+
# Cleanup HTTP client
|
|
1510
|
+
if self.app and self.app.client:
|
|
1511
|
+
try:
|
|
1512
|
+
loop.run_until_complete(self.app.cleanup())
|
|
1513
|
+
except Exception as cleanup_error:
|
|
1514
|
+
logger.error(
|
|
1515
|
+
"proxy_app_cleanup_failed",
|
|
1516
|
+
error=str(cleanup_error)
|
|
1517
|
+
)
|
|
1518
|
+
|
|
1519
|
+
# Close event loop
|
|
1520
|
+
try:
|
|
1521
|
+
loop.close()
|
|
1522
|
+
except Exception as loop_error:
|
|
1523
|
+
logger.error("event_loop_close_failed", error=str(loop_error))
|
|
1524
|
+
|
|
1525
|
+
self._shutdown.set()
|
|
1526
|
+
|
|
1527
|
+
def stop(self):
|
|
1528
|
+
"""Stop the proxy server and cleanup resources."""
|
|
1529
|
+
logger.info("stopping_litellm_proxy_server")
|
|
1530
|
+
self._shutdown.set()
|
|
1531
|
+
|
|
1532
|
+
# Give server time to shutdown gracefully
|
|
1533
|
+
if self.server_thread:
|
|
1534
|
+
self.server_thread.join(timeout=10)
|
|
1535
|
+
|
|
1536
|
+
if self.server_thread.is_alive():
|
|
1537
|
+
logger.warning(
|
|
1538
|
+
"proxy_server_thread_still_alive",
|
|
1539
|
+
note="Daemon thread will be terminated by Python at exit"
|
|
1540
|
+
)
|
|
1541
|
+
else:
|
|
1542
|
+
logger.info("proxy_server_thread_stopped")
|
|
1543
|
+
|
|
1544
|
+
logger.info("litellm_proxy_server_stopped")
|
|
1545
|
+
|
|
1546
|
+
def get_base_url(self) -> str:
|
|
1547
|
+
"""Get the base URL of the proxy server."""
|
|
1548
|
+
if not self.actual_port:
|
|
1549
|
+
raise RuntimeError("Server not started")
|
|
1550
|
+
return f"http://127.0.0.1:{self.actual_port}"
|
|
1551
|
+
|
|
1552
|
+
|
|
1553
|
+
# Singleton instance
|
|
1554
|
+
_proxy_server: Optional[LiteLLMProxyServer] = None
|
|
1555
|
+
_proxy_lock = threading.Lock()
|
|
1556
|
+
|
|
1557
|
+
|
|
1558
|
+
def get_proxy_server() -> LiteLLMProxyServer:
|
|
1559
|
+
"""
|
|
1560
|
+
Get or create the singleton proxy server instance.
|
|
1561
|
+
|
|
1562
|
+
Returns:
|
|
1563
|
+
LiteLLMProxyServer instance
|
|
1564
|
+
"""
|
|
1565
|
+
global _proxy_server
|
|
1566
|
+
|
|
1567
|
+
with _proxy_lock:
|
|
1568
|
+
if _proxy_server is None:
|
|
1569
|
+
_proxy_server = LiteLLMProxyServer(port=0) # Auto-assign port
|
|
1570
|
+
_proxy_server.start()
|
|
1571
|
+
|
|
1572
|
+
return _proxy_server
|
|
1573
|
+
|
|
1574
|
+
|
|
1575
|
+
def set_execution_context(execution_id: str, context: Dict[str, Any]):
|
|
1576
|
+
"""
|
|
1577
|
+
Store execution context for metadata injection.
|
|
1578
|
+
|
|
1579
|
+
Call this before starting a Claude Code execution.
|
|
1580
|
+
|
|
1581
|
+
Args:
|
|
1582
|
+
execution_id: Execution ID
|
|
1583
|
+
context: Context dict with user_id, session_id, agent_id, etc.
|
|
1584
|
+
"""
|
|
1585
|
+
_context_store.set_context(execution_id, context)
|
|
1586
|
+
|
|
1587
|
+
|
|
1588
|
+
def clear_execution_context(
|
|
1589
|
+
execution_id: str,
|
|
1590
|
+
immediate: bool = False,
|
|
1591
|
+
delay_seconds: float = 5.0
|
|
1592
|
+
):
|
|
1593
|
+
"""
|
|
1594
|
+
Clear execution context after execution completes.
|
|
1595
|
+
|
|
1596
|
+
Args:
|
|
1597
|
+
execution_id: Execution ID
|
|
1598
|
+
immediate: If True, clear immediately. If False, schedule delayed cleanup.
|
|
1599
|
+
delay_seconds: Delay before cleanup (only if immediate=False)
|
|
1600
|
+
"""
|
|
1601
|
+
if immediate:
|
|
1602
|
+
_context_store.clear_context(execution_id)
|
|
1603
|
+
else:
|
|
1604
|
+
_cleanup_scheduler.schedule_cleanup(
|
|
1605
|
+
execution_id,
|
|
1606
|
+
delay_seconds,
|
|
1607
|
+
_context_store
|
|
1608
|
+
)
|
|
1609
|
+
|
|
1610
|
+
|
|
1611
|
+
def get_proxy_base_url() -> str:
|
|
1612
|
+
"""
|
|
1613
|
+
Get the base URL of the local proxy server.
|
|
1614
|
+
|
|
1615
|
+
Starts the server if not already running.
|
|
1616
|
+
|
|
1617
|
+
Returns:
|
|
1618
|
+
Base URL (e.g., "http://127.0.0.1:8080")
|
|
1619
|
+
"""
|
|
1620
|
+
server = get_proxy_server()
|
|
1621
|
+
return server.get_base_url()
|
|
1622
|
+
|
|
1623
|
+
|
|
1624
|
+
def list_available_models_sync(timeout: float = 10.0) -> List[str]:
|
|
1625
|
+
"""
|
|
1626
|
+
Synchronously fetch and return available models from upstream LiteLLM proxy.
|
|
1627
|
+
|
|
1628
|
+
This is useful for CLI/debugging to show what models are available.
|
|
1629
|
+
|
|
1630
|
+
Args:
|
|
1631
|
+
timeout: Request timeout in seconds
|
|
1632
|
+
|
|
1633
|
+
Returns:
|
|
1634
|
+
List of available model IDs
|
|
1635
|
+
"""
|
|
1636
|
+
litellm_base_url = os.getenv("LITELLM_API_BASE", "https://llm-proxy.kubiya.ai")
|
|
1637
|
+
litellm_api_key = os.getenv("LITELLM_API_KEY")
|
|
1638
|
+
|
|
1639
|
+
if not litellm_api_key:
|
|
1640
|
+
logger.warning("cannot_list_models_no_api_key")
|
|
1641
|
+
return []
|
|
1642
|
+
|
|
1643
|
+
try:
|
|
1644
|
+
with httpx.Client(timeout=timeout) as client:
|
|
1645
|
+
response = client.get(
|
|
1646
|
+
f"{litellm_base_url.rstrip('/')}/v1/models",
|
|
1647
|
+
headers={"Authorization": f"Bearer {litellm_api_key}"},
|
|
1648
|
+
)
|
|
1649
|
+
|
|
1650
|
+
if response.status_code == 200:
|
|
1651
|
+
data = response.json()
|
|
1652
|
+
models = []
|
|
1653
|
+
if "data" in data and isinstance(data["data"], list):
|
|
1654
|
+
models = [m.get("id") for m in data["data"] if m.get("id")]
|
|
1655
|
+
|
|
1656
|
+
logger.info(
|
|
1657
|
+
"listed_available_models_sync",
|
|
1658
|
+
model_count=len(models),
|
|
1659
|
+
models=models,
|
|
1660
|
+
litellm_base_url=litellm_base_url,
|
|
1661
|
+
)
|
|
1662
|
+
return models
|
|
1663
|
+
else:
|
|
1664
|
+
logger.warning(
|
|
1665
|
+
"failed_to_list_models_sync",
|
|
1666
|
+
status_code=response.status_code,
|
|
1667
|
+
litellm_base_url=litellm_base_url,
|
|
1668
|
+
)
|
|
1669
|
+
return []
|
|
1670
|
+
|
|
1671
|
+
except Exception as e:
|
|
1672
|
+
logger.warning(
|
|
1673
|
+
"error_listing_models_sync",
|
|
1674
|
+
error=str(e),
|
|
1675
|
+
error_type=type(e).__name__,
|
|
1676
|
+
litellm_base_url=litellm_base_url,
|
|
1677
|
+
)
|
|
1678
|
+
return []
|
|
1679
|
+
|
|
1680
|
+
|
|
1681
|
+
def print_available_models():
|
|
1682
|
+
"""
|
|
1683
|
+
Print available models to stdout for debugging.
|
|
1684
|
+
|
|
1685
|
+
Useful for CLI troubleshooting when model errors occur.
|
|
1686
|
+
"""
|
|
1687
|
+
models = list_available_models_sync()
|
|
1688
|
+
litellm_base_url = os.getenv("LITELLM_API_BASE", "https://llm-proxy.kubiya.ai")
|
|
1689
|
+
|
|
1690
|
+
print(f"\n{'='*60}")
|
|
1691
|
+
print(f"Available Models from LiteLLM Proxy")
|
|
1692
|
+
print(f"Proxy URL: {litellm_base_url}")
|
|
1693
|
+
print(f"{'='*60}")
|
|
1694
|
+
|
|
1695
|
+
if models:
|
|
1696
|
+
for i, model in enumerate(models, 1):
|
|
1697
|
+
print(f" {i}. {model}")
|
|
1698
|
+
else:
|
|
1699
|
+
print(" No models available or failed to fetch models.")
|
|
1700
|
+
print(" Check LITELLM_API_BASE and LITELLM_API_KEY environment variables.")
|
|
1701
|
+
|
|
1702
|
+
print(f"{'='*60}\n")
|