ccproxy-api 0.1.6__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ccproxy/api/__init__.py +1 -15
- ccproxy/api/app.py +439 -212
- ccproxy/api/bootstrap.py +30 -0
- ccproxy/api/decorators.py +85 -0
- ccproxy/api/dependencies.py +145 -176
- ccproxy/api/format_validation.py +54 -0
- ccproxy/api/middleware/cors.py +6 -3
- ccproxy/api/middleware/errors.py +402 -530
- ccproxy/api/middleware/hooks.py +563 -0
- ccproxy/api/middleware/normalize_headers.py +59 -0
- ccproxy/api/middleware/request_id.py +35 -16
- ccproxy/api/middleware/streaming_hooks.py +292 -0
- ccproxy/api/routes/__init__.py +5 -14
- ccproxy/api/routes/health.py +39 -672
- ccproxy/api/routes/plugins.py +277 -0
- ccproxy/auth/__init__.py +2 -19
- ccproxy/auth/bearer.py +25 -15
- ccproxy/auth/dependencies.py +123 -157
- ccproxy/auth/exceptions.py +0 -12
- ccproxy/auth/manager.py +35 -49
- ccproxy/auth/managers/__init__.py +10 -0
- ccproxy/auth/managers/base.py +523 -0
- ccproxy/auth/managers/base_enhanced.py +63 -0
- ccproxy/auth/managers/token_snapshot.py +77 -0
- ccproxy/auth/models/base.py +65 -0
- ccproxy/auth/models/credentials.py +40 -0
- ccproxy/auth/oauth/__init__.py +4 -18
- ccproxy/auth/oauth/base.py +533 -0
- ccproxy/auth/oauth/cli_errors.py +37 -0
- ccproxy/auth/oauth/flows.py +430 -0
- ccproxy/auth/oauth/protocol.py +366 -0
- ccproxy/auth/oauth/registry.py +408 -0
- ccproxy/auth/oauth/router.py +396 -0
- ccproxy/auth/oauth/routes.py +186 -113
- ccproxy/auth/oauth/session.py +151 -0
- ccproxy/auth/oauth/templates.py +342 -0
- ccproxy/auth/storage/__init__.py +2 -5
- ccproxy/auth/storage/base.py +279 -5
- ccproxy/auth/storage/generic.py +134 -0
- ccproxy/cli/__init__.py +1 -2
- ccproxy/cli/_settings_help.py +351 -0
- ccproxy/cli/commands/auth.py +1519 -793
- ccproxy/cli/commands/config/commands.py +209 -276
- ccproxy/cli/commands/plugins.py +669 -0
- ccproxy/cli/commands/serve.py +75 -810
- ccproxy/cli/commands/status.py +254 -0
- ccproxy/cli/decorators.py +83 -0
- ccproxy/cli/helpers.py +22 -60
- ccproxy/cli/main.py +359 -10
- ccproxy/cli/options/claude_options.py +0 -25
- ccproxy/config/__init__.py +7 -11
- ccproxy/config/core.py +227 -0
- ccproxy/config/env_generator.py +232 -0
- ccproxy/config/runtime.py +67 -0
- ccproxy/config/security.py +36 -3
- ccproxy/config/settings.py +382 -441
- ccproxy/config/toml_generator.py +299 -0
- ccproxy/config/utils.py +452 -0
- ccproxy/core/__init__.py +7 -271
- ccproxy/{_version.py → core/_version.py} +16 -3
- ccproxy/core/async_task_manager.py +516 -0
- ccproxy/core/async_utils.py +47 -14
- ccproxy/core/auth/__init__.py +6 -0
- ccproxy/core/constants.py +16 -50
- ccproxy/core/errors.py +53 -0
- ccproxy/core/id_utils.py +20 -0
- ccproxy/core/interfaces.py +16 -123
- ccproxy/core/logging.py +473 -18
- ccproxy/core/plugins/__init__.py +77 -0
- ccproxy/core/plugins/cli_discovery.py +211 -0
- ccproxy/core/plugins/declaration.py +455 -0
- ccproxy/core/plugins/discovery.py +604 -0
- ccproxy/core/plugins/factories.py +967 -0
- ccproxy/core/plugins/hooks/__init__.py +30 -0
- ccproxy/core/plugins/hooks/base.py +58 -0
- ccproxy/core/plugins/hooks/events.py +46 -0
- ccproxy/core/plugins/hooks/implementations/__init__.py +16 -0
- ccproxy/core/plugins/hooks/implementations/formatters/__init__.py +11 -0
- ccproxy/core/plugins/hooks/implementations/formatters/json.py +552 -0
- ccproxy/core/plugins/hooks/implementations/formatters/raw.py +370 -0
- ccproxy/core/plugins/hooks/implementations/http_tracer.py +431 -0
- ccproxy/core/plugins/hooks/layers.py +44 -0
- ccproxy/core/plugins/hooks/manager.py +186 -0
- ccproxy/core/plugins/hooks/registry.py +139 -0
- ccproxy/core/plugins/hooks/thread_manager.py +203 -0
- ccproxy/core/plugins/hooks/types.py +22 -0
- ccproxy/core/plugins/interfaces.py +416 -0
- ccproxy/core/plugins/loader.py +166 -0
- ccproxy/core/plugins/middleware.py +233 -0
- ccproxy/core/plugins/models.py +59 -0
- ccproxy/core/plugins/protocol.py +180 -0
- ccproxy/core/plugins/runtime.py +519 -0
- ccproxy/{observability/context.py → core/request_context.py} +137 -94
- ccproxy/core/status_report.py +211 -0
- ccproxy/core/transformers.py +13 -8
- ccproxy/data/claude_headers_fallback.json +558 -0
- ccproxy/data/codex_headers_fallback.json +121 -0
- ccproxy/http/__init__.py +30 -0
- ccproxy/http/base.py +95 -0
- ccproxy/http/client.py +323 -0
- ccproxy/http/hooks.py +642 -0
- ccproxy/http/pool.py +279 -0
- ccproxy/llms/formatters/__init__.py +7 -0
- ccproxy/llms/formatters/anthropic_to_openai/__init__.py +55 -0
- ccproxy/llms/formatters/anthropic_to_openai/errors.py +65 -0
- ccproxy/llms/formatters/anthropic_to_openai/requests.py +356 -0
- ccproxy/llms/formatters/anthropic_to_openai/responses.py +153 -0
- ccproxy/llms/formatters/anthropic_to_openai/streams.py +1546 -0
- ccproxy/llms/formatters/base.py +140 -0
- ccproxy/llms/formatters/base_model.py +33 -0
- ccproxy/llms/formatters/common/__init__.py +51 -0
- ccproxy/llms/formatters/common/identifiers.py +48 -0
- ccproxy/llms/formatters/common/streams.py +254 -0
- ccproxy/llms/formatters/common/thinking.py +74 -0
- ccproxy/llms/formatters/common/usage.py +135 -0
- ccproxy/llms/formatters/constants.py +55 -0
- ccproxy/llms/formatters/context.py +116 -0
- ccproxy/llms/formatters/mapping.py +33 -0
- ccproxy/llms/formatters/openai_to_anthropic/__init__.py +55 -0
- ccproxy/llms/formatters/openai_to_anthropic/_helpers.py +141 -0
- ccproxy/llms/formatters/openai_to_anthropic/errors.py +53 -0
- ccproxy/llms/formatters/openai_to_anthropic/requests.py +674 -0
- ccproxy/llms/formatters/openai_to_anthropic/responses.py +285 -0
- ccproxy/llms/formatters/openai_to_anthropic/streams.py +530 -0
- ccproxy/llms/formatters/openai_to_openai/__init__.py +53 -0
- ccproxy/llms/formatters/openai_to_openai/_helpers.py +325 -0
- ccproxy/llms/formatters/openai_to_openai/errors.py +6 -0
- ccproxy/llms/formatters/openai_to_openai/requests.py +388 -0
- ccproxy/llms/formatters/openai_to_openai/responses.py +594 -0
- ccproxy/llms/formatters/openai_to_openai/streams.py +1832 -0
- ccproxy/llms/formatters/utils.py +306 -0
- ccproxy/llms/models/__init__.py +9 -0
- ccproxy/llms/models/anthropic.py +619 -0
- ccproxy/llms/models/openai.py +844 -0
- ccproxy/llms/streaming/__init__.py +26 -0
- ccproxy/llms/streaming/accumulators.py +1074 -0
- ccproxy/llms/streaming/formatters.py +251 -0
- ccproxy/{adapters/openai/streaming.py → llms/streaming/processors.py} +193 -240
- ccproxy/models/__init__.py +8 -159
- ccproxy/models/detection.py +92 -193
- ccproxy/models/provider.py +75 -0
- ccproxy/plugins/access_log/README.md +32 -0
- ccproxy/plugins/access_log/__init__.py +20 -0
- ccproxy/plugins/access_log/config.py +33 -0
- ccproxy/plugins/access_log/formatter.py +126 -0
- ccproxy/plugins/access_log/hook.py +763 -0
- ccproxy/plugins/access_log/logger.py +254 -0
- ccproxy/plugins/access_log/plugin.py +137 -0
- ccproxy/plugins/access_log/writer.py +109 -0
- ccproxy/plugins/analytics/README.md +24 -0
- ccproxy/plugins/analytics/__init__.py +1 -0
- ccproxy/plugins/analytics/config.py +5 -0
- ccproxy/plugins/analytics/ingest.py +85 -0
- ccproxy/plugins/analytics/models.py +97 -0
- ccproxy/plugins/analytics/plugin.py +121 -0
- ccproxy/plugins/analytics/routes.py +163 -0
- ccproxy/plugins/analytics/service.py +284 -0
- ccproxy/plugins/claude_api/README.md +29 -0
- ccproxy/plugins/claude_api/__init__.py +10 -0
- ccproxy/plugins/claude_api/adapter.py +829 -0
- ccproxy/plugins/claude_api/config.py +52 -0
- ccproxy/plugins/claude_api/detection_service.py +461 -0
- ccproxy/plugins/claude_api/health.py +175 -0
- ccproxy/plugins/claude_api/hooks.py +284 -0
- ccproxy/plugins/claude_api/models.py +256 -0
- ccproxy/plugins/claude_api/plugin.py +298 -0
- ccproxy/plugins/claude_api/routes.py +118 -0
- ccproxy/plugins/claude_api/streaming_metrics.py +68 -0
- ccproxy/plugins/claude_api/tasks.py +84 -0
- ccproxy/plugins/claude_sdk/README.md +35 -0
- ccproxy/plugins/claude_sdk/__init__.py +80 -0
- ccproxy/plugins/claude_sdk/adapter.py +749 -0
- ccproxy/plugins/claude_sdk/auth.py +57 -0
- ccproxy/{claude_sdk → plugins/claude_sdk}/client.py +63 -39
- ccproxy/plugins/claude_sdk/config.py +210 -0
- ccproxy/{claude_sdk → plugins/claude_sdk}/converter.py +6 -6
- ccproxy/plugins/claude_sdk/detection_service.py +163 -0
- ccproxy/{services/claude_sdk_service.py → plugins/claude_sdk/handler.py} +123 -304
- ccproxy/plugins/claude_sdk/health.py +113 -0
- ccproxy/plugins/claude_sdk/hooks.py +115 -0
- ccproxy/{claude_sdk → plugins/claude_sdk}/manager.py +42 -32
- ccproxy/{claude_sdk → plugins/claude_sdk}/message_queue.py +8 -8
- ccproxy/{models/claude_sdk.py → plugins/claude_sdk/models.py} +64 -16
- ccproxy/plugins/claude_sdk/options.py +154 -0
- ccproxy/{claude_sdk → plugins/claude_sdk}/parser.py +23 -5
- ccproxy/plugins/claude_sdk/plugin.py +269 -0
- ccproxy/plugins/claude_sdk/routes.py +104 -0
- ccproxy/{claude_sdk → plugins/claude_sdk}/session_client.py +124 -12
- ccproxy/plugins/claude_sdk/session_pool.py +700 -0
- ccproxy/{claude_sdk → plugins/claude_sdk}/stream_handle.py +48 -43
- ccproxy/{claude_sdk → plugins/claude_sdk}/stream_worker.py +22 -18
- ccproxy/{claude_sdk → plugins/claude_sdk}/streaming.py +50 -16
- ccproxy/plugins/claude_sdk/tasks.py +97 -0
- ccproxy/plugins/claude_shared/README.md +18 -0
- ccproxy/plugins/claude_shared/__init__.py +12 -0
- ccproxy/plugins/claude_shared/model_defaults.py +171 -0
- ccproxy/plugins/codex/README.md +35 -0
- ccproxy/plugins/codex/__init__.py +6 -0
- ccproxy/plugins/codex/adapter.py +635 -0
- ccproxy/{config/codex.py → plugins/codex/config.py} +78 -12
- ccproxy/plugins/codex/detection_service.py +544 -0
- ccproxy/plugins/codex/health.py +162 -0
- ccproxy/plugins/codex/hooks.py +263 -0
- ccproxy/plugins/codex/model_defaults.py +39 -0
- ccproxy/plugins/codex/models.py +263 -0
- ccproxy/plugins/codex/plugin.py +275 -0
- ccproxy/plugins/codex/routes.py +129 -0
- ccproxy/plugins/codex/streaming_metrics.py +324 -0
- ccproxy/plugins/codex/tasks.py +106 -0
- ccproxy/plugins/codex/utils/__init__.py +1 -0
- ccproxy/plugins/codex/utils/sse_parser.py +106 -0
- ccproxy/plugins/command_replay/README.md +34 -0
- ccproxy/plugins/command_replay/__init__.py +17 -0
- ccproxy/plugins/command_replay/config.py +133 -0
- ccproxy/plugins/command_replay/formatter.py +432 -0
- ccproxy/plugins/command_replay/hook.py +294 -0
- ccproxy/plugins/command_replay/plugin.py +161 -0
- ccproxy/plugins/copilot/README.md +39 -0
- ccproxy/plugins/copilot/__init__.py +11 -0
- ccproxy/plugins/copilot/adapter.py +465 -0
- ccproxy/plugins/copilot/config.py +155 -0
- ccproxy/plugins/copilot/data/copilot_fallback.json +41 -0
- ccproxy/plugins/copilot/detection_service.py +255 -0
- ccproxy/plugins/copilot/manager.py +275 -0
- ccproxy/plugins/copilot/model_defaults.py +284 -0
- ccproxy/plugins/copilot/models.py +148 -0
- ccproxy/plugins/copilot/oauth/__init__.py +16 -0
- ccproxy/plugins/copilot/oauth/client.py +494 -0
- ccproxy/plugins/copilot/oauth/models.py +385 -0
- ccproxy/plugins/copilot/oauth/provider.py +602 -0
- ccproxy/plugins/copilot/oauth/storage.py +170 -0
- ccproxy/plugins/copilot/plugin.py +360 -0
- ccproxy/plugins/copilot/routes.py +294 -0
- ccproxy/plugins/credential_balancer/README.md +124 -0
- ccproxy/plugins/credential_balancer/__init__.py +6 -0
- ccproxy/plugins/credential_balancer/config.py +270 -0
- ccproxy/plugins/credential_balancer/factory.py +415 -0
- ccproxy/plugins/credential_balancer/hook.py +51 -0
- ccproxy/plugins/credential_balancer/manager.py +587 -0
- ccproxy/plugins/credential_balancer/plugin.py +146 -0
- ccproxy/plugins/dashboard/README.md +25 -0
- ccproxy/plugins/dashboard/__init__.py +1 -0
- ccproxy/plugins/dashboard/config.py +8 -0
- ccproxy/plugins/dashboard/plugin.py +71 -0
- ccproxy/plugins/dashboard/routes.py +67 -0
- ccproxy/plugins/docker/README.md +32 -0
- ccproxy/{docker → plugins/docker}/__init__.py +3 -0
- ccproxy/{docker → plugins/docker}/adapter.py +108 -10
- ccproxy/plugins/docker/config.py +82 -0
- ccproxy/{docker → plugins/docker}/docker_path.py +4 -3
- ccproxy/{docker → plugins/docker}/middleware.py +2 -2
- ccproxy/plugins/docker/plugin.py +198 -0
- ccproxy/{docker → plugins/docker}/stream_process.py +3 -3
- ccproxy/plugins/duckdb_storage/README.md +26 -0
- ccproxy/plugins/duckdb_storage/__init__.py +1 -0
- ccproxy/plugins/duckdb_storage/config.py +22 -0
- ccproxy/plugins/duckdb_storage/plugin.py +128 -0
- ccproxy/plugins/duckdb_storage/routes.py +51 -0
- ccproxy/plugins/duckdb_storage/storage.py +633 -0
- ccproxy/plugins/max_tokens/README.md +38 -0
- ccproxy/plugins/max_tokens/__init__.py +12 -0
- ccproxy/plugins/max_tokens/adapter.py +235 -0
- ccproxy/plugins/max_tokens/config.py +86 -0
- ccproxy/plugins/max_tokens/models.py +53 -0
- ccproxy/plugins/max_tokens/plugin.py +200 -0
- ccproxy/plugins/max_tokens/service.py +271 -0
- ccproxy/plugins/max_tokens/token_limits.json +54 -0
- ccproxy/plugins/metrics/README.md +35 -0
- ccproxy/plugins/metrics/__init__.py +10 -0
- ccproxy/{observability/metrics.py → plugins/metrics/collector.py} +20 -153
- ccproxy/plugins/metrics/config.py +85 -0
- ccproxy/plugins/metrics/grafana/dashboards/ccproxy-dashboard.json +1720 -0
- ccproxy/plugins/metrics/hook.py +403 -0
- ccproxy/plugins/metrics/plugin.py +268 -0
- ccproxy/{observability → plugins/metrics}/pushgateway.py +57 -59
- ccproxy/plugins/metrics/routes.py +107 -0
- ccproxy/plugins/metrics/tasks.py +117 -0
- ccproxy/plugins/oauth_claude/README.md +35 -0
- ccproxy/plugins/oauth_claude/__init__.py +14 -0
- ccproxy/plugins/oauth_claude/client.py +270 -0
- ccproxy/plugins/oauth_claude/config.py +84 -0
- ccproxy/plugins/oauth_claude/manager.py +482 -0
- ccproxy/plugins/oauth_claude/models.py +266 -0
- ccproxy/plugins/oauth_claude/plugin.py +149 -0
- ccproxy/plugins/oauth_claude/provider.py +571 -0
- ccproxy/plugins/oauth_claude/storage.py +212 -0
- ccproxy/plugins/oauth_codex/README.md +38 -0
- ccproxy/plugins/oauth_codex/__init__.py +14 -0
- ccproxy/plugins/oauth_codex/client.py +224 -0
- ccproxy/plugins/oauth_codex/config.py +95 -0
- ccproxy/plugins/oauth_codex/manager.py +256 -0
- ccproxy/plugins/oauth_codex/models.py +239 -0
- ccproxy/plugins/oauth_codex/plugin.py +146 -0
- ccproxy/plugins/oauth_codex/provider.py +574 -0
- ccproxy/plugins/oauth_codex/storage.py +92 -0
- ccproxy/plugins/permissions/README.md +28 -0
- ccproxy/plugins/permissions/__init__.py +22 -0
- ccproxy/plugins/permissions/config.py +28 -0
- ccproxy/{cli/commands/permission_handler.py → plugins/permissions/handlers/cli.py} +49 -25
- ccproxy/plugins/permissions/handlers/protocol.py +33 -0
- ccproxy/plugins/permissions/handlers/terminal.py +675 -0
- ccproxy/{api/routes → plugins/permissions}/mcp.py +34 -7
- ccproxy/{models/permissions.py → plugins/permissions/models.py} +65 -1
- ccproxy/plugins/permissions/plugin.py +153 -0
- ccproxy/{api/routes/permissions.py → plugins/permissions/routes.py} +20 -16
- ccproxy/{api/services/permission_service.py → plugins/permissions/service.py} +65 -11
- ccproxy/{api → plugins/permissions}/ui/permission_handler_protocol.py +1 -1
- ccproxy/{api → plugins/permissions}/ui/terminal_permission_handler.py +66 -10
- ccproxy/plugins/pricing/README.md +34 -0
- ccproxy/plugins/pricing/__init__.py +6 -0
- ccproxy/{pricing → plugins/pricing}/cache.py +7 -6
- ccproxy/{config/pricing.py → plugins/pricing/config.py} +32 -6
- ccproxy/plugins/pricing/exceptions.py +35 -0
- ccproxy/plugins/pricing/loader.py +440 -0
- ccproxy/{pricing → plugins/pricing}/models.py +13 -23
- ccproxy/plugins/pricing/plugin.py +169 -0
- ccproxy/plugins/pricing/service.py +191 -0
- ccproxy/plugins/pricing/tasks.py +300 -0
- ccproxy/{pricing → plugins/pricing}/updater.py +86 -72
- ccproxy/plugins/pricing/utils.py +99 -0
- ccproxy/plugins/request_tracer/README.md +40 -0
- ccproxy/plugins/request_tracer/__init__.py +7 -0
- ccproxy/plugins/request_tracer/config.py +120 -0
- ccproxy/plugins/request_tracer/hook.py +415 -0
- ccproxy/plugins/request_tracer/plugin.py +255 -0
- ccproxy/scheduler/__init__.py +2 -14
- ccproxy/scheduler/core.py +26 -41
- ccproxy/scheduler/manager.py +63 -107
- ccproxy/scheduler/registry.py +6 -32
- ccproxy/scheduler/tasks.py +346 -314
- ccproxy/services/__init__.py +0 -1
- ccproxy/services/adapters/__init__.py +11 -0
- ccproxy/services/adapters/base.py +123 -0
- ccproxy/services/adapters/chain_composer.py +88 -0
- ccproxy/services/adapters/chain_validation.py +44 -0
- ccproxy/services/adapters/chat_accumulator.py +200 -0
- ccproxy/services/adapters/delta_utils.py +142 -0
- ccproxy/services/adapters/format_adapter.py +136 -0
- ccproxy/services/adapters/format_context.py +11 -0
- ccproxy/services/adapters/format_registry.py +158 -0
- ccproxy/services/adapters/http_adapter.py +1045 -0
- ccproxy/services/adapters/mock_adapter.py +118 -0
- ccproxy/services/adapters/protocols.py +35 -0
- ccproxy/services/adapters/simple_converters.py +571 -0
- ccproxy/services/auth_registry.py +180 -0
- ccproxy/services/cache/__init__.py +6 -0
- ccproxy/services/cache/response_cache.py +261 -0
- ccproxy/services/cli_detection.py +437 -0
- ccproxy/services/config/__init__.py +6 -0
- ccproxy/services/config/proxy_configuration.py +111 -0
- ccproxy/services/container.py +256 -0
- ccproxy/services/factories.py +380 -0
- ccproxy/services/handler_config.py +76 -0
- ccproxy/services/interfaces.py +298 -0
- ccproxy/services/mocking/__init__.py +6 -0
- ccproxy/services/mocking/mock_handler.py +291 -0
- ccproxy/services/tracing/__init__.py +7 -0
- ccproxy/services/tracing/interfaces.py +61 -0
- ccproxy/services/tracing/null_tracer.py +57 -0
- ccproxy/streaming/__init__.py +23 -0
- ccproxy/streaming/buffer.py +1056 -0
- ccproxy/streaming/deferred.py +897 -0
- ccproxy/streaming/handler.py +117 -0
- ccproxy/streaming/interfaces.py +77 -0
- ccproxy/streaming/simple_adapter.py +39 -0
- ccproxy/streaming/sse.py +109 -0
- ccproxy/streaming/sse_parser.py +127 -0
- ccproxy/templates/__init__.py +6 -0
- ccproxy/templates/plugin_scaffold.py +695 -0
- ccproxy/testing/endpoints/__init__.py +33 -0
- ccproxy/testing/endpoints/cli.py +215 -0
- ccproxy/testing/endpoints/config.py +874 -0
- ccproxy/testing/endpoints/console.py +57 -0
- ccproxy/testing/endpoints/models.py +100 -0
- ccproxy/testing/endpoints/runner.py +1903 -0
- ccproxy/testing/endpoints/tools.py +308 -0
- ccproxy/testing/mock_responses.py +70 -1
- ccproxy/testing/response_handlers.py +20 -0
- ccproxy/utils/__init__.py +0 -6
- ccproxy/utils/binary_resolver.py +476 -0
- ccproxy/utils/caching.py +327 -0
- ccproxy/utils/cli_logging.py +101 -0
- ccproxy/utils/command_line.py +251 -0
- ccproxy/utils/headers.py +228 -0
- ccproxy/utils/model_mapper.py +120 -0
- ccproxy/utils/startup_helpers.py +95 -342
- ccproxy/utils/version_checker.py +279 -6
- ccproxy_api-0.2.0.dist-info/METADATA +212 -0
- ccproxy_api-0.2.0.dist-info/RECORD +417 -0
- {ccproxy_api-0.1.6.dist-info → ccproxy_api-0.2.0.dist-info}/WHEEL +1 -1
- ccproxy_api-0.2.0.dist-info/entry_points.txt +24 -0
- ccproxy/__init__.py +0 -4
- ccproxy/adapters/__init__.py +0 -11
- ccproxy/adapters/base.py +0 -80
- ccproxy/adapters/codex/__init__.py +0 -11
- ccproxy/adapters/openai/__init__.py +0 -42
- ccproxy/adapters/openai/adapter.py +0 -953
- ccproxy/adapters/openai/models.py +0 -412
- ccproxy/adapters/openai/response_adapter.py +0 -355
- ccproxy/adapters/openai/response_models.py +0 -178
- ccproxy/api/middleware/headers.py +0 -49
- ccproxy/api/middleware/logging.py +0 -180
- ccproxy/api/middleware/request_content_logging.py +0 -297
- ccproxy/api/middleware/server_header.py +0 -58
- ccproxy/api/responses.py +0 -89
- ccproxy/api/routes/claude.py +0 -371
- ccproxy/api/routes/codex.py +0 -1231
- ccproxy/api/routes/metrics.py +0 -1029
- ccproxy/api/routes/proxy.py +0 -211
- ccproxy/api/services/__init__.py +0 -6
- ccproxy/auth/conditional.py +0 -84
- ccproxy/auth/credentials_adapter.py +0 -93
- ccproxy/auth/models.py +0 -118
- ccproxy/auth/oauth/models.py +0 -48
- ccproxy/auth/openai/__init__.py +0 -13
- ccproxy/auth/openai/credentials.py +0 -166
- ccproxy/auth/openai/oauth_client.py +0 -334
- ccproxy/auth/openai/storage.py +0 -184
- ccproxy/auth/storage/json_file.py +0 -158
- ccproxy/auth/storage/keyring.py +0 -189
- ccproxy/claude_sdk/__init__.py +0 -18
- ccproxy/claude_sdk/options.py +0 -194
- ccproxy/claude_sdk/session_pool.py +0 -550
- ccproxy/cli/docker/__init__.py +0 -34
- ccproxy/cli/docker/adapter_factory.py +0 -157
- ccproxy/cli/docker/params.py +0 -274
- ccproxy/config/auth.py +0 -153
- ccproxy/config/claude.py +0 -348
- ccproxy/config/cors.py +0 -79
- ccproxy/config/discovery.py +0 -95
- ccproxy/config/docker_settings.py +0 -264
- ccproxy/config/observability.py +0 -158
- ccproxy/config/reverse_proxy.py +0 -31
- ccproxy/config/scheduler.py +0 -108
- ccproxy/config/server.py +0 -86
- ccproxy/config/validators.py +0 -231
- ccproxy/core/codex_transformers.py +0 -389
- ccproxy/core/http.py +0 -328
- ccproxy/core/http_transformers.py +0 -812
- ccproxy/core/proxy.py +0 -143
- ccproxy/core/validators.py +0 -288
- ccproxy/models/errors.py +0 -42
- ccproxy/models/messages.py +0 -269
- ccproxy/models/requests.py +0 -107
- ccproxy/models/responses.py +0 -270
- ccproxy/models/types.py +0 -102
- ccproxy/observability/__init__.py +0 -51
- ccproxy/observability/access_logger.py +0 -457
- ccproxy/observability/sse_events.py +0 -303
- ccproxy/observability/stats_printer.py +0 -753
- ccproxy/observability/storage/__init__.py +0 -1
- ccproxy/observability/storage/duckdb_simple.py +0 -677
- ccproxy/observability/storage/models.py +0 -70
- ccproxy/observability/streaming_response.py +0 -107
- ccproxy/pricing/__init__.py +0 -19
- ccproxy/pricing/loader.py +0 -251
- ccproxy/services/claude_detection_service.py +0 -269
- ccproxy/services/codex_detection_service.py +0 -263
- ccproxy/services/credentials/__init__.py +0 -55
- ccproxy/services/credentials/config.py +0 -105
- ccproxy/services/credentials/manager.py +0 -561
- ccproxy/services/credentials/oauth_client.py +0 -481
- ccproxy/services/proxy_service.py +0 -1827
- ccproxy/static/.keep +0 -0
- ccproxy/utils/cost_calculator.py +0 -210
- ccproxy/utils/disconnection_monitor.py +0 -83
- ccproxy/utils/model_mapping.py +0 -199
- ccproxy/utils/models_provider.py +0 -150
- ccproxy/utils/simple_request_logger.py +0 -284
- ccproxy/utils/streaming_metrics.py +0 -199
- ccproxy_api-0.1.6.dist-info/METADATA +0 -615
- ccproxy_api-0.1.6.dist-info/RECORD +0 -189
- ccproxy_api-0.1.6.dist-info/entry_points.txt +0 -4
- /ccproxy/{api/middleware/auth.py → auth/models/__init__.py} +0 -0
- /ccproxy/{claude_sdk → plugins/claude_sdk}/exceptions.py +0 -0
- /ccproxy/{docker → plugins/docker}/models.py +0 -0
- /ccproxy/{docker → plugins/docker}/protocol.py +0 -0
- /ccproxy/{docker → plugins/docker}/validators.py +0 -0
- /ccproxy/{auth/oauth/storage.py → plugins/permissions/handlers/__init__.py} +0 -0
- /ccproxy/{api → plugins/permissions}/ui/__init__.py +0 -0
- {ccproxy_api-0.1.6.dist-info → ccproxy_api-0.2.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,1827 +0,0 @@
|
|
|
1
|
-
"""Proxy service for orchestrating Claude API requests with business logic."""
|
|
2
|
-
|
|
3
|
-
import asyncio
|
|
4
|
-
import json
|
|
5
|
-
import os
|
|
6
|
-
import random
|
|
7
|
-
import time
|
|
8
|
-
from collections.abc import AsyncGenerator
|
|
9
|
-
from pathlib import Path
|
|
10
|
-
from typing import TYPE_CHECKING, Any
|
|
11
|
-
|
|
12
|
-
import httpx
|
|
13
|
-
import structlog
|
|
14
|
-
from fastapi import HTTPException, Request
|
|
15
|
-
from fastapi.responses import StreamingResponse
|
|
16
|
-
from starlette.responses import Response
|
|
17
|
-
from typing_extensions import TypedDict
|
|
18
|
-
|
|
19
|
-
from ccproxy.config.settings import Settings
|
|
20
|
-
from ccproxy.core.codex_transformers import CodexRequestTransformer
|
|
21
|
-
from ccproxy.core.http import BaseProxyClient
|
|
22
|
-
from ccproxy.core.http_transformers import (
|
|
23
|
-
HTTPRequestTransformer,
|
|
24
|
-
HTTPResponseTransformer,
|
|
25
|
-
)
|
|
26
|
-
from ccproxy.observability import (
|
|
27
|
-
PrometheusMetrics,
|
|
28
|
-
get_metrics,
|
|
29
|
-
request_context,
|
|
30
|
-
timed_operation,
|
|
31
|
-
)
|
|
32
|
-
from ccproxy.observability.access_logger import log_request_access
|
|
33
|
-
from ccproxy.observability.streaming_response import StreamingResponseWithLogging
|
|
34
|
-
from ccproxy.services.credentials.manager import CredentialsManager
|
|
35
|
-
from ccproxy.testing import RealisticMockResponseGenerator
|
|
36
|
-
from ccproxy.utils.simple_request_logger import (
|
|
37
|
-
append_streaming_log,
|
|
38
|
-
write_request_log,
|
|
39
|
-
)
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
if TYPE_CHECKING:
|
|
43
|
-
from ccproxy.observability.context import RequestContext
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
class RequestData(TypedDict):
|
|
47
|
-
"""Typed structure for transformed request data."""
|
|
48
|
-
|
|
49
|
-
method: str
|
|
50
|
-
url: str
|
|
51
|
-
headers: dict[str, str]
|
|
52
|
-
body: bytes | None
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
class ResponseData(TypedDict):
|
|
56
|
-
"""Typed structure for transformed response data."""
|
|
57
|
-
|
|
58
|
-
status_code: int
|
|
59
|
-
headers: dict[str, str]
|
|
60
|
-
body: bytes
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
logger = structlog.get_logger(__name__)
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
class ProxyService:
|
|
67
|
-
"""Claude-specific proxy orchestration with business logic.
|
|
68
|
-
|
|
69
|
-
This service orchestrates the complete proxy flow including:
|
|
70
|
-
- Authentication management
|
|
71
|
-
- Request/response transformations
|
|
72
|
-
- Metrics collection (future)
|
|
73
|
-
- Error handling and logging
|
|
74
|
-
|
|
75
|
-
Pure HTTP forwarding is delegated to BaseProxyClient.
|
|
76
|
-
"""
|
|
77
|
-
|
|
78
|
-
SENSITIVE_HEADERS = {"authorization", "x-api-key", "cookie", "set-cookie"}
|
|
79
|
-
|
|
80
|
-
def __init__(
|
|
81
|
-
self,
|
|
82
|
-
proxy_client: BaseProxyClient,
|
|
83
|
-
credentials_manager: CredentialsManager,
|
|
84
|
-
settings: Settings,
|
|
85
|
-
proxy_mode: str = "full",
|
|
86
|
-
target_base_url: str = "https://api.anthropic.com",
|
|
87
|
-
metrics: PrometheusMetrics | None = None,
|
|
88
|
-
app_state: Any = None,
|
|
89
|
-
) -> None:
|
|
90
|
-
"""Initialize the proxy service.
|
|
91
|
-
|
|
92
|
-
Args:
|
|
93
|
-
proxy_client: HTTP client for pure forwarding
|
|
94
|
-
credentials_manager: Authentication manager
|
|
95
|
-
settings: Application settings
|
|
96
|
-
proxy_mode: Transformation mode - "minimal" or "full"
|
|
97
|
-
target_base_url: Base URL for the target API
|
|
98
|
-
metrics: Prometheus metrics collector (optional)
|
|
99
|
-
app_state: FastAPI app state for accessing detection data
|
|
100
|
-
"""
|
|
101
|
-
self.proxy_client = proxy_client
|
|
102
|
-
self.credentials_manager = credentials_manager
|
|
103
|
-
self.settings = settings
|
|
104
|
-
self.proxy_mode = proxy_mode
|
|
105
|
-
self.target_base_url = target_base_url.rstrip("/")
|
|
106
|
-
self.metrics = metrics or get_metrics()
|
|
107
|
-
self.app_state = app_state
|
|
108
|
-
|
|
109
|
-
# Create concrete transformers
|
|
110
|
-
self.request_transformer = HTTPRequestTransformer()
|
|
111
|
-
self.response_transformer = HTTPResponseTransformer()
|
|
112
|
-
self.codex_transformer = CodexRequestTransformer()
|
|
113
|
-
|
|
114
|
-
# Create OpenAI adapter for stream transformation
|
|
115
|
-
from ccproxy.adapters.openai.adapter import OpenAIAdapter
|
|
116
|
-
|
|
117
|
-
self.openai_adapter = OpenAIAdapter()
|
|
118
|
-
|
|
119
|
-
# Create mock response generator for bypass mode
|
|
120
|
-
self.mock_generator = RealisticMockResponseGenerator()
|
|
121
|
-
|
|
122
|
-
# Cache environment-based configuration
|
|
123
|
-
self._proxy_url = self._init_proxy_url()
|
|
124
|
-
self._ssl_context = self._init_ssl_context()
|
|
125
|
-
self._verbose_streaming = (
|
|
126
|
-
os.environ.get("CCPROXY_VERBOSE_STREAMING", "false").lower() == "true"
|
|
127
|
-
)
|
|
128
|
-
self._verbose_api = (
|
|
129
|
-
os.environ.get("CCPROXY_VERBOSE_API", "false").lower() == "true"
|
|
130
|
-
)
|
|
131
|
-
|
|
132
|
-
def _init_proxy_url(self) -> str | None:
|
|
133
|
-
"""Initialize proxy URL from environment variables."""
|
|
134
|
-
# Check for standard proxy environment variables
|
|
135
|
-
# For HTTPS requests, prioritize HTTPS_PROXY
|
|
136
|
-
https_proxy = os.environ.get("HTTPS_PROXY") or os.environ.get("https_proxy")
|
|
137
|
-
all_proxy = os.environ.get("ALL_PROXY")
|
|
138
|
-
http_proxy = os.environ.get("HTTP_PROXY") or os.environ.get("http_proxy")
|
|
139
|
-
|
|
140
|
-
proxy_url = https_proxy or all_proxy or http_proxy
|
|
141
|
-
|
|
142
|
-
if proxy_url:
|
|
143
|
-
logger.debug("proxy_configured", proxy_url=proxy_url)
|
|
144
|
-
|
|
145
|
-
return proxy_url
|
|
146
|
-
|
|
147
|
-
def _init_ssl_context(self) -> str | bool:
|
|
148
|
-
"""Initialize SSL context configuration from environment variables."""
|
|
149
|
-
# Check for custom CA bundle
|
|
150
|
-
ca_bundle = os.environ.get("REQUESTS_CA_BUNDLE") or os.environ.get(
|
|
151
|
-
"SSL_CERT_FILE"
|
|
152
|
-
)
|
|
153
|
-
|
|
154
|
-
# Check if SSL verification should be disabled (NOT RECOMMENDED)
|
|
155
|
-
ssl_verify = os.environ.get("SSL_VERIFY", "true").lower()
|
|
156
|
-
|
|
157
|
-
if ca_bundle and Path(ca_bundle).exists():
|
|
158
|
-
logger.info("ca_bundle_configured", ca_bundle=ca_bundle)
|
|
159
|
-
return ca_bundle
|
|
160
|
-
elif ssl_verify in ("false", "0", "no"):
|
|
161
|
-
logger.warning("ssl_verification_disabled")
|
|
162
|
-
return False
|
|
163
|
-
else:
|
|
164
|
-
logger.debug("ssl_verification_default")
|
|
165
|
-
return True
|
|
166
|
-
|
|
167
|
-
async def handle_request(
|
|
168
|
-
self,
|
|
169
|
-
method: str,
|
|
170
|
-
path: str,
|
|
171
|
-
headers: dict[str, str],
|
|
172
|
-
body: bytes | None = None,
|
|
173
|
-
query_params: dict[str, str | list[str]] | None = None,
|
|
174
|
-
timeout: float = 240.0,
|
|
175
|
-
request: Request | None = None, # Optional FastAPI Request object
|
|
176
|
-
) -> tuple[int, dict[str, str], bytes] | StreamingResponse:
|
|
177
|
-
"""Handle a proxy request with full business logic orchestration.
|
|
178
|
-
|
|
179
|
-
Args:
|
|
180
|
-
method: HTTP method
|
|
181
|
-
path: Request path (without /unclaude prefix)
|
|
182
|
-
headers: Request headers
|
|
183
|
-
body: Request body
|
|
184
|
-
query_params: Query parameters
|
|
185
|
-
timeout: Request timeout in seconds
|
|
186
|
-
request: Optional FastAPI Request object for accessing request context
|
|
187
|
-
|
|
188
|
-
Returns:
|
|
189
|
-
Tuple of (status_code, headers, body) or StreamingResponse for streaming
|
|
190
|
-
|
|
191
|
-
Raises:
|
|
192
|
-
HTTPException: If request fails
|
|
193
|
-
"""
|
|
194
|
-
# Extract request metadata
|
|
195
|
-
model, streaming = self._extract_request_metadata(body)
|
|
196
|
-
endpoint = path.split("/")[-1] if path else "unknown"
|
|
197
|
-
|
|
198
|
-
# Use existing context from request if available, otherwise create new one
|
|
199
|
-
if request and hasattr(request, "state") and hasattr(request.state, "context"):
|
|
200
|
-
# Use existing context from middleware
|
|
201
|
-
ctx = request.state.context
|
|
202
|
-
# Add service-specific metadata
|
|
203
|
-
ctx.add_metadata(
|
|
204
|
-
endpoint=endpoint,
|
|
205
|
-
model=model,
|
|
206
|
-
streaming=streaming,
|
|
207
|
-
service_type="proxy_service",
|
|
208
|
-
)
|
|
209
|
-
# Create a context manager that preserves the existing context's lifecycle
|
|
210
|
-
# This ensures __aexit__ is called for proper access logging
|
|
211
|
-
from contextlib import asynccontextmanager
|
|
212
|
-
|
|
213
|
-
@asynccontextmanager
|
|
214
|
-
async def existing_context_manager() -> AsyncGenerator[Any, None]:
|
|
215
|
-
try:
|
|
216
|
-
yield ctx
|
|
217
|
-
finally:
|
|
218
|
-
# Let the existing context handle its own lifecycle
|
|
219
|
-
# The middleware or parent context will call __aexit__
|
|
220
|
-
pass
|
|
221
|
-
|
|
222
|
-
context_manager: Any = existing_context_manager()
|
|
223
|
-
else:
|
|
224
|
-
# Create new context for observability
|
|
225
|
-
context_manager = request_context(
|
|
226
|
-
method=method,
|
|
227
|
-
path=path,
|
|
228
|
-
endpoint=endpoint,
|
|
229
|
-
model=model,
|
|
230
|
-
streaming=streaming,
|
|
231
|
-
service_type="proxy_service",
|
|
232
|
-
metrics=self.metrics,
|
|
233
|
-
)
|
|
234
|
-
|
|
235
|
-
async with context_manager as ctx:
|
|
236
|
-
try:
|
|
237
|
-
# 1. Authentication - get access token
|
|
238
|
-
async with timed_operation("oauth_token", ctx.request_id):
|
|
239
|
-
logger.debug("oauth_token_retrieval_start")
|
|
240
|
-
access_token = await self._get_access_token()
|
|
241
|
-
|
|
242
|
-
# 2. Request transformation
|
|
243
|
-
async with timed_operation("request_transform", ctx.request_id):
|
|
244
|
-
injection_mode = (
|
|
245
|
-
self.settings.claude.system_prompt_injection_mode.value
|
|
246
|
-
)
|
|
247
|
-
logger.debug(
|
|
248
|
-
"request_transform_start",
|
|
249
|
-
system_prompt_injection_mode=injection_mode,
|
|
250
|
-
)
|
|
251
|
-
transformed_request = (
|
|
252
|
-
await self.request_transformer.transform_proxy_request(
|
|
253
|
-
method,
|
|
254
|
-
path,
|
|
255
|
-
headers,
|
|
256
|
-
body,
|
|
257
|
-
query_params,
|
|
258
|
-
access_token,
|
|
259
|
-
self.target_base_url,
|
|
260
|
-
self.app_state,
|
|
261
|
-
injection_mode,
|
|
262
|
-
)
|
|
263
|
-
)
|
|
264
|
-
|
|
265
|
-
# 3. Check for bypass header to skip upstream forwarding
|
|
266
|
-
bypass_upstream = (
|
|
267
|
-
headers.get("X-CCProxy-Bypass-Upstream", "").lower() == "true"
|
|
268
|
-
)
|
|
269
|
-
|
|
270
|
-
if bypass_upstream:
|
|
271
|
-
logger.debug("bypassing_upstream_forwarding_due_to_header")
|
|
272
|
-
# Determine message type from request body for realistic response generation
|
|
273
|
-
message_type = self._extract_message_type_from_body(body)
|
|
274
|
-
|
|
275
|
-
# Check if this will be a streaming response
|
|
276
|
-
should_stream = streaming or self._should_stream_response(
|
|
277
|
-
transformed_request["headers"]
|
|
278
|
-
)
|
|
279
|
-
|
|
280
|
-
# Determine response format based on original request path
|
|
281
|
-
is_openai_format = self.response_transformer._is_openai_request(
|
|
282
|
-
path
|
|
283
|
-
)
|
|
284
|
-
|
|
285
|
-
if should_stream:
|
|
286
|
-
return await self._generate_bypass_streaming_response(
|
|
287
|
-
model, is_openai_format, ctx, message_type
|
|
288
|
-
)
|
|
289
|
-
else:
|
|
290
|
-
return await self._generate_bypass_standard_response(
|
|
291
|
-
model, is_openai_format, ctx, message_type
|
|
292
|
-
)
|
|
293
|
-
|
|
294
|
-
# 3. Forward request using proxy client
|
|
295
|
-
logger.debug("request_forwarding_start", url=transformed_request["url"])
|
|
296
|
-
|
|
297
|
-
# Check if this will be a streaming response
|
|
298
|
-
should_stream = streaming or self._should_stream_response(
|
|
299
|
-
transformed_request["headers"]
|
|
300
|
-
)
|
|
301
|
-
|
|
302
|
-
if should_stream:
|
|
303
|
-
logger.debug("streaming_response_detected")
|
|
304
|
-
return await self._handle_streaming_request(
|
|
305
|
-
transformed_request, path, timeout, ctx
|
|
306
|
-
)
|
|
307
|
-
else:
|
|
308
|
-
logger.debug("non_streaming_response_detected")
|
|
309
|
-
|
|
310
|
-
# Log the outgoing request if verbose API logging is enabled
|
|
311
|
-
await self._log_verbose_api_request(transformed_request, ctx)
|
|
312
|
-
|
|
313
|
-
# Handle regular request
|
|
314
|
-
async with timed_operation("api_call", ctx.request_id) as api_op:
|
|
315
|
-
start_time = time.perf_counter()
|
|
316
|
-
|
|
317
|
-
(
|
|
318
|
-
status_code,
|
|
319
|
-
response_headers,
|
|
320
|
-
response_body,
|
|
321
|
-
) = await self.proxy_client.forward(
|
|
322
|
-
method=transformed_request["method"],
|
|
323
|
-
url=transformed_request["url"],
|
|
324
|
-
headers=transformed_request["headers"],
|
|
325
|
-
body=transformed_request["body"],
|
|
326
|
-
timeout=timeout,
|
|
327
|
-
)
|
|
328
|
-
|
|
329
|
-
end_time = time.perf_counter()
|
|
330
|
-
api_duration = end_time - start_time
|
|
331
|
-
api_op["duration_seconds"] = api_duration
|
|
332
|
-
|
|
333
|
-
# Log the received response if verbose API logging is enabled
|
|
334
|
-
await self._log_verbose_api_response(
|
|
335
|
-
status_code, response_headers, response_body, ctx
|
|
336
|
-
)
|
|
337
|
-
|
|
338
|
-
# 4. Response transformation
|
|
339
|
-
async with timed_operation("response_transform", ctx.request_id):
|
|
340
|
-
logger.debug("response_transform_start")
|
|
341
|
-
# For error responses, transform to OpenAI format if needed
|
|
342
|
-
transformed_response: ResponseData
|
|
343
|
-
if status_code >= 400:
|
|
344
|
-
logger.info(
|
|
345
|
-
"upstream_error_received",
|
|
346
|
-
status_code=status_code,
|
|
347
|
-
has_body=bool(response_body),
|
|
348
|
-
content_length=len(response_body) if response_body else 0,
|
|
349
|
-
)
|
|
350
|
-
|
|
351
|
-
# Use transformer to handle error transformation (including OpenAI format)
|
|
352
|
-
transformed_response = (
|
|
353
|
-
await self.response_transformer.transform_proxy_response(
|
|
354
|
-
status_code,
|
|
355
|
-
response_headers,
|
|
356
|
-
response_body,
|
|
357
|
-
path,
|
|
358
|
-
self.proxy_mode,
|
|
359
|
-
)
|
|
360
|
-
)
|
|
361
|
-
else:
|
|
362
|
-
transformed_response = (
|
|
363
|
-
await self.response_transformer.transform_proxy_response(
|
|
364
|
-
status_code,
|
|
365
|
-
response_headers,
|
|
366
|
-
response_body,
|
|
367
|
-
path,
|
|
368
|
-
self.proxy_mode,
|
|
369
|
-
)
|
|
370
|
-
)
|
|
371
|
-
|
|
372
|
-
# 5. Extract response metrics using direct JSON parsing
|
|
373
|
-
tokens_input = tokens_output = cache_read_tokens = (
|
|
374
|
-
cache_write_tokens
|
|
375
|
-
) = cost_usd = None
|
|
376
|
-
if transformed_response["body"]:
|
|
377
|
-
try:
|
|
378
|
-
response_data = json.loads(
|
|
379
|
-
transformed_response["body"].decode("utf-8")
|
|
380
|
-
)
|
|
381
|
-
usage = response_data.get("usage", {})
|
|
382
|
-
tokens_input = usage.get("input_tokens")
|
|
383
|
-
tokens_output = usage.get("output_tokens")
|
|
384
|
-
cache_read_tokens = usage.get("cache_read_input_tokens")
|
|
385
|
-
cache_write_tokens = usage.get("cache_creation_input_tokens")
|
|
386
|
-
|
|
387
|
-
# Calculate cost including cache tokens if we have tokens and model
|
|
388
|
-
from ccproxy.utils.cost_calculator import calculate_token_cost
|
|
389
|
-
|
|
390
|
-
cost_usd = calculate_token_cost(
|
|
391
|
-
tokens_input,
|
|
392
|
-
tokens_output,
|
|
393
|
-
model,
|
|
394
|
-
cache_read_tokens,
|
|
395
|
-
cache_write_tokens,
|
|
396
|
-
)
|
|
397
|
-
except (json.JSONDecodeError, UnicodeDecodeError):
|
|
398
|
-
pass # Keep all values as None if parsing fails
|
|
399
|
-
|
|
400
|
-
# 6. Update context with response data
|
|
401
|
-
ctx.add_metadata(
|
|
402
|
-
status_code=status_code,
|
|
403
|
-
tokens_input=tokens_input,
|
|
404
|
-
tokens_output=tokens_output,
|
|
405
|
-
cache_read_tokens=cache_read_tokens,
|
|
406
|
-
cache_write_tokens=cache_write_tokens,
|
|
407
|
-
cost_usd=cost_usd,
|
|
408
|
-
)
|
|
409
|
-
|
|
410
|
-
return (
|
|
411
|
-
transformed_response["status_code"],
|
|
412
|
-
transformed_response["headers"],
|
|
413
|
-
transformed_response["body"],
|
|
414
|
-
)
|
|
415
|
-
|
|
416
|
-
except Exception as e:
|
|
417
|
-
ctx.add_metadata(error=e)
|
|
418
|
-
raise
|
|
419
|
-
|
|
420
|
-
async def handle_codex_request(
|
|
421
|
-
self,
|
|
422
|
-
method: str,
|
|
423
|
-
path: str,
|
|
424
|
-
session_id: str,
|
|
425
|
-
access_token: str,
|
|
426
|
-
request: Request,
|
|
427
|
-
settings: Settings,
|
|
428
|
-
) -> StreamingResponse | Response:
|
|
429
|
-
"""Handle OpenAI Codex proxy request with request/response capture.
|
|
430
|
-
|
|
431
|
-
Args:
|
|
432
|
-
method: HTTP method
|
|
433
|
-
path: Request path (e.g., "/responses" or "/{session_id}/responses")
|
|
434
|
-
session_id: Resolved session ID
|
|
435
|
-
access_token: OpenAI access token
|
|
436
|
-
request: FastAPI request object
|
|
437
|
-
settings: Application settings
|
|
438
|
-
|
|
439
|
-
Returns:
|
|
440
|
-
StreamingResponse or regular Response
|
|
441
|
-
"""
|
|
442
|
-
try:
|
|
443
|
-
# Read request body - check if already stored by middleware
|
|
444
|
-
if hasattr(request.state, "body"):
|
|
445
|
-
body = request.state.body
|
|
446
|
-
else:
|
|
447
|
-
body = await request.body()
|
|
448
|
-
|
|
449
|
-
# Parse request data to capture the instructions field and other metadata
|
|
450
|
-
request_data = None
|
|
451
|
-
try:
|
|
452
|
-
request_data = json.loads(body.decode("utf-8")) if body else {}
|
|
453
|
-
except (json.JSONDecodeError, UnicodeDecodeError) as e:
|
|
454
|
-
request_data = {}
|
|
455
|
-
logger.warning(
|
|
456
|
-
"codex_json_decode_failed",
|
|
457
|
-
error=str(e),
|
|
458
|
-
body_preview=body[:100].decode("utf-8", errors="replace")
|
|
459
|
-
if body
|
|
460
|
-
else None,
|
|
461
|
-
body_length=len(body) if body else 0,
|
|
462
|
-
)
|
|
463
|
-
|
|
464
|
-
# Parse request to extract account_id from token if available
|
|
465
|
-
import jwt
|
|
466
|
-
|
|
467
|
-
account_id = "unknown"
|
|
468
|
-
try:
|
|
469
|
-
decoded = jwt.decode(access_token, options={"verify_signature": False})
|
|
470
|
-
account_id = decoded.get(
|
|
471
|
-
"org_id", decoded.get("sub", decoded.get("account_id", "unknown"))
|
|
472
|
-
)
|
|
473
|
-
except Exception:
|
|
474
|
-
pass
|
|
475
|
-
|
|
476
|
-
# Get Codex detection data from app state
|
|
477
|
-
codex_detection_data = None
|
|
478
|
-
if self.app_state and hasattr(self.app_state, "codex_detection_data"):
|
|
479
|
-
codex_detection_data = self.app_state.codex_detection_data
|
|
480
|
-
|
|
481
|
-
# Use CodexRequestTransformer to build request
|
|
482
|
-
original_headers = dict(request.headers)
|
|
483
|
-
transformed_request = await self.codex_transformer.transform_codex_request(
|
|
484
|
-
method=method,
|
|
485
|
-
path=path,
|
|
486
|
-
headers=original_headers,
|
|
487
|
-
body=body,
|
|
488
|
-
access_token=access_token,
|
|
489
|
-
session_id=session_id,
|
|
490
|
-
account_id=account_id,
|
|
491
|
-
codex_detection_data=codex_detection_data,
|
|
492
|
-
target_base_url=settings.codex.base_url,
|
|
493
|
-
)
|
|
494
|
-
|
|
495
|
-
target_url = transformed_request["url"]
|
|
496
|
-
headers = transformed_request["headers"]
|
|
497
|
-
transformed_body = transformed_request["body"] or body
|
|
498
|
-
|
|
499
|
-
# Parse transformed body for logging
|
|
500
|
-
transformed_request_data = request_data
|
|
501
|
-
if transformed_body and transformed_body != body:
|
|
502
|
-
try:
|
|
503
|
-
transformed_request_data = json.loads(
|
|
504
|
-
transformed_body.decode("utf-8")
|
|
505
|
-
)
|
|
506
|
-
except (json.JSONDecodeError, UnicodeDecodeError):
|
|
507
|
-
transformed_request_data = request_data
|
|
508
|
-
|
|
509
|
-
# Generate request ID for logging
|
|
510
|
-
from uuid import uuid4
|
|
511
|
-
|
|
512
|
-
request_id = f"codex_{uuid4().hex[:8]}"
|
|
513
|
-
|
|
514
|
-
# Log Codex request (including instructions field and headers)
|
|
515
|
-
await self._log_codex_request(
|
|
516
|
-
request_id=request_id,
|
|
517
|
-
method=method,
|
|
518
|
-
url=target_url,
|
|
519
|
-
headers=headers,
|
|
520
|
-
body_data=transformed_request_data,
|
|
521
|
-
session_id=session_id,
|
|
522
|
-
)
|
|
523
|
-
|
|
524
|
-
# Check if user explicitly requested streaming (from original request)
|
|
525
|
-
user_requested_streaming = self.codex_transformer._is_streaming_request(
|
|
526
|
-
body
|
|
527
|
-
)
|
|
528
|
-
|
|
529
|
-
# Forward request to ChatGPT backend
|
|
530
|
-
if user_requested_streaming:
|
|
531
|
-
# Handle streaming request with proper context management
|
|
532
|
-
# First, collect the response to check for errors
|
|
533
|
-
collected_chunks = []
|
|
534
|
-
chunk_count = 0
|
|
535
|
-
total_bytes = 0
|
|
536
|
-
response_status_code = 200
|
|
537
|
-
response_headers = {}
|
|
538
|
-
|
|
539
|
-
async def stream_codex_response() -> AsyncGenerator[bytes, None]:
|
|
540
|
-
nonlocal \
|
|
541
|
-
collected_chunks, \
|
|
542
|
-
chunk_count, \
|
|
543
|
-
total_bytes, \
|
|
544
|
-
response_status_code, \
|
|
545
|
-
response_headers
|
|
546
|
-
|
|
547
|
-
logger.debug(
|
|
548
|
-
"proxy_service_streaming_started",
|
|
549
|
-
request_id=request_id,
|
|
550
|
-
session_id=session_id,
|
|
551
|
-
)
|
|
552
|
-
|
|
553
|
-
async with (
|
|
554
|
-
httpx.AsyncClient(timeout=240.0) as client,
|
|
555
|
-
client.stream(
|
|
556
|
-
method=method,
|
|
557
|
-
url=target_url,
|
|
558
|
-
headers=headers,
|
|
559
|
-
content=transformed_body,
|
|
560
|
-
) as response,
|
|
561
|
-
):
|
|
562
|
-
# Capture response info for error checking
|
|
563
|
-
response_status_code = response.status_code
|
|
564
|
-
response_headers = dict(response.headers)
|
|
565
|
-
|
|
566
|
-
# Log response headers for streaming
|
|
567
|
-
await self._log_codex_response_headers(
|
|
568
|
-
request_id=request_id,
|
|
569
|
-
status_code=response.status_code,
|
|
570
|
-
headers=dict(response.headers),
|
|
571
|
-
stream_type="codex_sse",
|
|
572
|
-
)
|
|
573
|
-
|
|
574
|
-
# Check if upstream actually returned streaming
|
|
575
|
-
content_type = response.headers.get("content-type", "")
|
|
576
|
-
is_streaming = "text/event-stream" in content_type
|
|
577
|
-
|
|
578
|
-
if not is_streaming:
|
|
579
|
-
logger.warning(
|
|
580
|
-
"codex_expected_streaming_but_got_regular",
|
|
581
|
-
content_type=content_type,
|
|
582
|
-
status_code=response.status_code,
|
|
583
|
-
)
|
|
584
|
-
|
|
585
|
-
async for chunk in response.aiter_bytes():
|
|
586
|
-
chunk_count += 1
|
|
587
|
-
chunk_size = len(chunk)
|
|
588
|
-
total_bytes += chunk_size
|
|
589
|
-
collected_chunks.append(chunk)
|
|
590
|
-
|
|
591
|
-
logger.debug(
|
|
592
|
-
"proxy_service_streaming_chunk",
|
|
593
|
-
request_id=request_id,
|
|
594
|
-
chunk_number=chunk_count,
|
|
595
|
-
chunk_size=chunk_size,
|
|
596
|
-
total_bytes=total_bytes,
|
|
597
|
-
)
|
|
598
|
-
|
|
599
|
-
yield chunk
|
|
600
|
-
|
|
601
|
-
logger.debug(
|
|
602
|
-
"proxy_service_streaming_complete",
|
|
603
|
-
request_id=request_id,
|
|
604
|
-
total_chunks=chunk_count,
|
|
605
|
-
total_bytes=total_bytes,
|
|
606
|
-
)
|
|
607
|
-
|
|
608
|
-
# Log the complete stream data after streaming finishes
|
|
609
|
-
await self._log_codex_streaming_complete(
|
|
610
|
-
request_id=request_id,
|
|
611
|
-
chunks=collected_chunks,
|
|
612
|
-
)
|
|
613
|
-
|
|
614
|
-
# Execute the stream generator to collect the response
|
|
615
|
-
generator_chunks = []
|
|
616
|
-
async for chunk in stream_codex_response():
|
|
617
|
-
generator_chunks.append(chunk)
|
|
618
|
-
|
|
619
|
-
# Now check if this should be an error response
|
|
620
|
-
content_type = response_headers.get("content-type", "")
|
|
621
|
-
if (
|
|
622
|
-
response_status_code >= 400
|
|
623
|
-
and "text/event-stream" not in content_type
|
|
624
|
-
):
|
|
625
|
-
# Return error as regular Response with proper status code
|
|
626
|
-
error_content = b"".join(collected_chunks)
|
|
627
|
-
logger.warning(
|
|
628
|
-
"codex_returning_error_as_regular_response",
|
|
629
|
-
status_code=response_status_code,
|
|
630
|
-
content_type=content_type,
|
|
631
|
-
content_preview=error_content[:200].decode(
|
|
632
|
-
"utf-8", errors="replace"
|
|
633
|
-
),
|
|
634
|
-
)
|
|
635
|
-
return Response(
|
|
636
|
-
content=error_content,
|
|
637
|
-
status_code=response_status_code,
|
|
638
|
-
headers=response_headers,
|
|
639
|
-
)
|
|
640
|
-
|
|
641
|
-
# Return normal streaming response
|
|
642
|
-
async def replay_stream() -> AsyncGenerator[bytes, None]:
|
|
643
|
-
for chunk in generator_chunks:
|
|
644
|
-
yield chunk
|
|
645
|
-
|
|
646
|
-
# Forward upstream headers but filter out incompatible ones for streaming
|
|
647
|
-
streaming_headers = dict(response_headers)
|
|
648
|
-
# Remove headers that conflict with streaming responses
|
|
649
|
-
streaming_headers.pop("content-length", None)
|
|
650
|
-
streaming_headers.pop("content-encoding", None)
|
|
651
|
-
streaming_headers.pop("date", None)
|
|
652
|
-
# Set streaming-specific headers
|
|
653
|
-
streaming_headers.update(
|
|
654
|
-
{
|
|
655
|
-
"content-type": "text/event-stream",
|
|
656
|
-
"cache-control": "no-cache",
|
|
657
|
-
"connection": "keep-alive",
|
|
658
|
-
}
|
|
659
|
-
)
|
|
660
|
-
|
|
661
|
-
return StreamingResponse(
|
|
662
|
-
replay_stream(),
|
|
663
|
-
media_type="text/event-stream",
|
|
664
|
-
headers=streaming_headers,
|
|
665
|
-
)
|
|
666
|
-
else:
|
|
667
|
-
# Handle non-streaming request
|
|
668
|
-
async with httpx.AsyncClient(timeout=240.0) as client:
|
|
669
|
-
response = await client.request(
|
|
670
|
-
method=method,
|
|
671
|
-
url=target_url,
|
|
672
|
-
headers=headers,
|
|
673
|
-
content=transformed_body,
|
|
674
|
-
)
|
|
675
|
-
|
|
676
|
-
# Check if upstream response is streaming (shouldn't happen)
|
|
677
|
-
content_type = response.headers.get("content-type", "")
|
|
678
|
-
transfer_encoding = response.headers.get("transfer-encoding", "")
|
|
679
|
-
upstream_is_streaming = "text/event-stream" in content_type or (
|
|
680
|
-
transfer_encoding == "chunked" and content_type == ""
|
|
681
|
-
)
|
|
682
|
-
|
|
683
|
-
logger.debug(
|
|
684
|
-
"codex_response_non_streaming",
|
|
685
|
-
content_type=content_type,
|
|
686
|
-
user_requested_streaming=user_requested_streaming,
|
|
687
|
-
upstream_is_streaming=upstream_is_streaming,
|
|
688
|
-
transfer_encoding=transfer_encoding,
|
|
689
|
-
)
|
|
690
|
-
|
|
691
|
-
if upstream_is_streaming:
|
|
692
|
-
# Upstream is streaming but user didn't request streaming
|
|
693
|
-
# Collect all streaming data and return as JSON
|
|
694
|
-
logger.debug(
|
|
695
|
-
"converting_upstream_stream_to_json", request_id=request_id
|
|
696
|
-
)
|
|
697
|
-
|
|
698
|
-
collected_chunks = []
|
|
699
|
-
async for chunk in response.aiter_bytes():
|
|
700
|
-
collected_chunks.append(chunk)
|
|
701
|
-
|
|
702
|
-
# Combine all chunks
|
|
703
|
-
full_content = b"".join(collected_chunks)
|
|
704
|
-
|
|
705
|
-
# Try to parse the streaming data and extract the final response
|
|
706
|
-
try:
|
|
707
|
-
# Parse SSE data to extract JSON response
|
|
708
|
-
content_str = full_content.decode("utf-8")
|
|
709
|
-
lines = content_str.strip().split("\n")
|
|
710
|
-
|
|
711
|
-
# Look for the last data line with JSON content
|
|
712
|
-
final_json = None
|
|
713
|
-
for line in reversed(lines):
|
|
714
|
-
if line.startswith("data: ") and not line.endswith(
|
|
715
|
-
"[DONE]"
|
|
716
|
-
):
|
|
717
|
-
try:
|
|
718
|
-
json_str = line[6:] # Remove "data: " prefix
|
|
719
|
-
final_json = json.loads(json_str)
|
|
720
|
-
break
|
|
721
|
-
except json.JSONDecodeError:
|
|
722
|
-
continue
|
|
723
|
-
|
|
724
|
-
if final_json:
|
|
725
|
-
response_content = json.dumps(final_json).encode(
|
|
726
|
-
"utf-8"
|
|
727
|
-
)
|
|
728
|
-
else:
|
|
729
|
-
# Fallback: return the raw content
|
|
730
|
-
response_content = full_content
|
|
731
|
-
|
|
732
|
-
except (UnicodeDecodeError, json.JSONDecodeError):
|
|
733
|
-
# Fallback: return raw content
|
|
734
|
-
response_content = full_content
|
|
735
|
-
|
|
736
|
-
# Log the complete response
|
|
737
|
-
try:
|
|
738
|
-
response_data = json.loads(response_content.decode("utf-8"))
|
|
739
|
-
except (json.JSONDecodeError, UnicodeDecodeError):
|
|
740
|
-
response_data = {
|
|
741
|
-
"raw_content": response_content.decode(
|
|
742
|
-
"utf-8", errors="replace"
|
|
743
|
-
)
|
|
744
|
-
}
|
|
745
|
-
|
|
746
|
-
await self._log_codex_response(
|
|
747
|
-
request_id=request_id,
|
|
748
|
-
status_code=response.status_code,
|
|
749
|
-
headers=dict(response.headers),
|
|
750
|
-
body_data=response_data,
|
|
751
|
-
)
|
|
752
|
-
|
|
753
|
-
# Return as JSON response
|
|
754
|
-
return Response(
|
|
755
|
-
content=response_content,
|
|
756
|
-
status_code=response.status_code,
|
|
757
|
-
headers={
|
|
758
|
-
"content-type": "application/json",
|
|
759
|
-
"content-length": str(len(response_content)),
|
|
760
|
-
},
|
|
761
|
-
media_type="application/json",
|
|
762
|
-
)
|
|
763
|
-
else:
|
|
764
|
-
# For regular non-streaming responses
|
|
765
|
-
response_data = None
|
|
766
|
-
try:
|
|
767
|
-
response_data = (
|
|
768
|
-
json.loads(response.content.decode("utf-8"))
|
|
769
|
-
if response.content
|
|
770
|
-
else {}
|
|
771
|
-
)
|
|
772
|
-
except (json.JSONDecodeError, UnicodeDecodeError):
|
|
773
|
-
response_data = {
|
|
774
|
-
"raw_content": response.content.decode(
|
|
775
|
-
"utf-8", errors="replace"
|
|
776
|
-
)
|
|
777
|
-
}
|
|
778
|
-
|
|
779
|
-
await self._log_codex_response(
|
|
780
|
-
request_id=request_id,
|
|
781
|
-
status_code=response.status_code,
|
|
782
|
-
headers=dict(response.headers),
|
|
783
|
-
body_data=response_data,
|
|
784
|
-
)
|
|
785
|
-
|
|
786
|
-
# Return regular response
|
|
787
|
-
return Response(
|
|
788
|
-
content=response.content,
|
|
789
|
-
status_code=response.status_code,
|
|
790
|
-
headers=dict(response.headers),
|
|
791
|
-
media_type=response.headers.get("content-type"),
|
|
792
|
-
)
|
|
793
|
-
|
|
794
|
-
except Exception as e:
|
|
795
|
-
logger.error("Codex request failed", error=str(e), session_id=session_id)
|
|
796
|
-
raise
|
|
797
|
-
|
|
798
|
-
async def _get_access_token(self) -> str:
|
|
799
|
-
"""Get access token for upstream authentication.
|
|
800
|
-
|
|
801
|
-
Uses OAuth credentials from Claude CLI for upstream authentication.
|
|
802
|
-
|
|
803
|
-
NOTE: The SECURITY__AUTH_TOKEN is only for authenticating incoming requests,
|
|
804
|
-
not for upstream authentication.
|
|
805
|
-
|
|
806
|
-
Returns:
|
|
807
|
-
Valid access token
|
|
808
|
-
|
|
809
|
-
Raises:
|
|
810
|
-
HTTPException: If no valid token is available
|
|
811
|
-
"""
|
|
812
|
-
# Always use OAuth credentials for upstream authentication
|
|
813
|
-
# The SECURITY__AUTH_TOKEN is only for client authentication, not upstream
|
|
814
|
-
try:
|
|
815
|
-
access_token = await self.credentials_manager.get_access_token()
|
|
816
|
-
if not access_token:
|
|
817
|
-
logger.error("oauth_token_unavailable")
|
|
818
|
-
|
|
819
|
-
# Try to get more details about credential status
|
|
820
|
-
try:
|
|
821
|
-
validation = await self.credentials_manager.validate()
|
|
822
|
-
|
|
823
|
-
if (
|
|
824
|
-
validation.valid
|
|
825
|
-
and validation.expired
|
|
826
|
-
and validation.credentials
|
|
827
|
-
):
|
|
828
|
-
logger.debug(
|
|
829
|
-
"oauth_token_expired",
|
|
830
|
-
expired_at=str(
|
|
831
|
-
validation.credentials.claude_ai_oauth.expires_at
|
|
832
|
-
),
|
|
833
|
-
)
|
|
834
|
-
except Exception as e:
|
|
835
|
-
logger.debug(
|
|
836
|
-
"credential_check_failed",
|
|
837
|
-
error=str(e),
|
|
838
|
-
exc_info=True,
|
|
839
|
-
)
|
|
840
|
-
|
|
841
|
-
raise HTTPException(
|
|
842
|
-
status_code=401,
|
|
843
|
-
detail="No valid OAuth credentials found. Please run 'ccproxy auth login'.",
|
|
844
|
-
)
|
|
845
|
-
|
|
846
|
-
logger.debug("oauth_token_retrieved")
|
|
847
|
-
return access_token
|
|
848
|
-
|
|
849
|
-
except HTTPException:
|
|
850
|
-
raise
|
|
851
|
-
except Exception as e:
|
|
852
|
-
logger.error("oauth_token_retrieval_failed", error=str(e), exc_info=True)
|
|
853
|
-
raise HTTPException(
|
|
854
|
-
status_code=401,
|
|
855
|
-
detail="Authentication failed",
|
|
856
|
-
) from e
|
|
857
|
-
|
|
858
|
-
def _redact_headers(self, headers: dict[str, str]) -> dict[str, str]:
|
|
859
|
-
"""Redact sensitive information from headers for safe logging."""
|
|
860
|
-
return {
|
|
861
|
-
k: "[REDACTED]" if k.lower() in self.SENSITIVE_HEADERS else v
|
|
862
|
-
for k, v in headers.items()
|
|
863
|
-
}
|
|
864
|
-
|
|
865
|
-
async def _log_verbose_api_request(
|
|
866
|
-
self, request_data: RequestData, ctx: "RequestContext"
|
|
867
|
-
) -> None:
|
|
868
|
-
"""Log details of an outgoing API request if verbose logging is enabled."""
|
|
869
|
-
if not self._verbose_api:
|
|
870
|
-
return
|
|
871
|
-
|
|
872
|
-
body = request_data.get("body")
|
|
873
|
-
body_preview = ""
|
|
874
|
-
full_body = None
|
|
875
|
-
if body:
|
|
876
|
-
try:
|
|
877
|
-
full_body = body.decode("utf-8", errors="replace")
|
|
878
|
-
# Truncate at 1024 chars for readability
|
|
879
|
-
body_preview = full_body[:1024]
|
|
880
|
-
# Try to parse as JSON for better formatting
|
|
881
|
-
try:
|
|
882
|
-
import json
|
|
883
|
-
|
|
884
|
-
full_body = json.loads(full_body)
|
|
885
|
-
except json.JSONDecodeError:
|
|
886
|
-
pass # Keep as string
|
|
887
|
-
except Exception:
|
|
888
|
-
body_preview = f"<binary data of length {len(body)}>"
|
|
889
|
-
|
|
890
|
-
logger.info(
|
|
891
|
-
"verbose_api_request",
|
|
892
|
-
method=request_data["method"],
|
|
893
|
-
url=request_data["url"],
|
|
894
|
-
headers=self._redact_headers(request_data["headers"]),
|
|
895
|
-
body_size=len(body) if body else 0,
|
|
896
|
-
body_preview=body_preview,
|
|
897
|
-
)
|
|
898
|
-
|
|
899
|
-
# Use new request logging system
|
|
900
|
-
request_id = ctx.request_id
|
|
901
|
-
timestamp = ctx.get_log_timestamp_prefix()
|
|
902
|
-
await write_request_log(
|
|
903
|
-
request_id=request_id,
|
|
904
|
-
log_type="upstream_request",
|
|
905
|
-
data={
|
|
906
|
-
"method": request_data["method"],
|
|
907
|
-
"url": request_data["url"],
|
|
908
|
-
"headers": dict(request_data["headers"]), # Don't redact in file
|
|
909
|
-
"body": full_body,
|
|
910
|
-
},
|
|
911
|
-
timestamp=timestamp,
|
|
912
|
-
)
|
|
913
|
-
|
|
914
|
-
async def _log_verbose_api_response(
|
|
915
|
-
self,
|
|
916
|
-
status_code: int,
|
|
917
|
-
headers: dict[str, str],
|
|
918
|
-
body: bytes,
|
|
919
|
-
ctx: "RequestContext",
|
|
920
|
-
) -> None:
|
|
921
|
-
"""Log details of a received API response if verbose logging is enabled."""
|
|
922
|
-
if not self._verbose_api:
|
|
923
|
-
return
|
|
924
|
-
|
|
925
|
-
body_preview = ""
|
|
926
|
-
if body:
|
|
927
|
-
try:
|
|
928
|
-
# Truncate at 1024 chars for readability
|
|
929
|
-
body_preview = body.decode("utf-8", errors="replace")[:1024]
|
|
930
|
-
except Exception:
|
|
931
|
-
body_preview = f"<binary data of length {len(body)}>"
|
|
932
|
-
|
|
933
|
-
logger.info(
|
|
934
|
-
"verbose_api_response",
|
|
935
|
-
status_code=status_code,
|
|
936
|
-
headers=self._redact_headers(headers),
|
|
937
|
-
body_size=len(body),
|
|
938
|
-
body_preview=body_preview,
|
|
939
|
-
)
|
|
940
|
-
|
|
941
|
-
# Use new request logging system
|
|
942
|
-
full_body = None
|
|
943
|
-
if body:
|
|
944
|
-
try:
|
|
945
|
-
full_body_str = body.decode("utf-8", errors="replace")
|
|
946
|
-
# Try to parse as JSON for better formatting
|
|
947
|
-
try:
|
|
948
|
-
full_body = json.loads(full_body_str)
|
|
949
|
-
except json.JSONDecodeError:
|
|
950
|
-
full_body = full_body_str
|
|
951
|
-
except Exception:
|
|
952
|
-
full_body = f"<binary data of length {len(body)}>"
|
|
953
|
-
|
|
954
|
-
# Use new request logging system
|
|
955
|
-
request_id = ctx.request_id
|
|
956
|
-
timestamp = ctx.get_log_timestamp_prefix()
|
|
957
|
-
await write_request_log(
|
|
958
|
-
request_id=request_id,
|
|
959
|
-
log_type="upstream_response",
|
|
960
|
-
data={
|
|
961
|
-
"status_code": status_code,
|
|
962
|
-
"headers": dict(headers), # Don't redact in file
|
|
963
|
-
"body": full_body,
|
|
964
|
-
},
|
|
965
|
-
timestamp=timestamp,
|
|
966
|
-
)
|
|
967
|
-
|
|
968
|
-
async def _log_codex_request(
|
|
969
|
-
self,
|
|
970
|
-
request_id: str,
|
|
971
|
-
method: str,
|
|
972
|
-
url: str,
|
|
973
|
-
headers: dict[str, str],
|
|
974
|
-
body_data: dict[str, Any] | None,
|
|
975
|
-
session_id: str,
|
|
976
|
-
) -> None:
|
|
977
|
-
"""Log outgoing Codex request preserving instructions field exactly."""
|
|
978
|
-
if not self._verbose_api:
|
|
979
|
-
return
|
|
980
|
-
|
|
981
|
-
# Log to console with redacted headers
|
|
982
|
-
logger.info(
|
|
983
|
-
"verbose_codex_request",
|
|
984
|
-
request_id=request_id,
|
|
985
|
-
method=method,
|
|
986
|
-
url=url,
|
|
987
|
-
headers=self._redact_headers(headers),
|
|
988
|
-
session_id=session_id,
|
|
989
|
-
instructions_preview=(
|
|
990
|
-
body_data.get("instructions", "")[:100] + "..."
|
|
991
|
-
if body_data and body_data.get("instructions")
|
|
992
|
-
else None
|
|
993
|
-
),
|
|
994
|
-
)
|
|
995
|
-
|
|
996
|
-
# Save complete request to file (without redaction)
|
|
997
|
-
timestamp = time.strftime("%Y%m%d_%H%M%S")
|
|
998
|
-
await write_request_log(
|
|
999
|
-
request_id=request_id,
|
|
1000
|
-
log_type="codex_request",
|
|
1001
|
-
data={
|
|
1002
|
-
"method": method,
|
|
1003
|
-
"url": url,
|
|
1004
|
-
"headers": dict(headers),
|
|
1005
|
-
"body": body_data,
|
|
1006
|
-
"session_id": session_id,
|
|
1007
|
-
},
|
|
1008
|
-
timestamp=timestamp,
|
|
1009
|
-
)
|
|
1010
|
-
|
|
1011
|
-
async def _log_codex_response(
|
|
1012
|
-
self,
|
|
1013
|
-
request_id: str,
|
|
1014
|
-
status_code: int,
|
|
1015
|
-
headers: dict[str, str],
|
|
1016
|
-
body_data: dict[str, Any] | None,
|
|
1017
|
-
) -> None:
|
|
1018
|
-
"""Log complete non-streaming Codex response."""
|
|
1019
|
-
if not self._verbose_api:
|
|
1020
|
-
return
|
|
1021
|
-
|
|
1022
|
-
# Log to console with redacted headers
|
|
1023
|
-
logger.info(
|
|
1024
|
-
"verbose_codex_response",
|
|
1025
|
-
request_id=request_id,
|
|
1026
|
-
status_code=status_code,
|
|
1027
|
-
headers=self._redact_headers(headers),
|
|
1028
|
-
response_type="non_streaming",
|
|
1029
|
-
)
|
|
1030
|
-
|
|
1031
|
-
# Save complete response to file
|
|
1032
|
-
timestamp = time.strftime("%Y%m%d_%H%M%S")
|
|
1033
|
-
await write_request_log(
|
|
1034
|
-
request_id=request_id,
|
|
1035
|
-
log_type="codex_response",
|
|
1036
|
-
data={
|
|
1037
|
-
"status_code": status_code,
|
|
1038
|
-
"headers": dict(headers),
|
|
1039
|
-
"body": body_data,
|
|
1040
|
-
},
|
|
1041
|
-
timestamp=timestamp,
|
|
1042
|
-
)
|
|
1043
|
-
|
|
1044
|
-
async def _log_codex_response_headers(
|
|
1045
|
-
self,
|
|
1046
|
-
request_id: str,
|
|
1047
|
-
status_code: int,
|
|
1048
|
-
headers: dict[str, str],
|
|
1049
|
-
stream_type: str,
|
|
1050
|
-
) -> None:
|
|
1051
|
-
"""Log streaming Codex response headers."""
|
|
1052
|
-
if not self._verbose_api:
|
|
1053
|
-
return
|
|
1054
|
-
|
|
1055
|
-
# Log to console with redacted headers
|
|
1056
|
-
logger.info(
|
|
1057
|
-
"verbose_codex_response_headers",
|
|
1058
|
-
request_id=request_id,
|
|
1059
|
-
status_code=status_code,
|
|
1060
|
-
headers=self._redact_headers(headers),
|
|
1061
|
-
stream_type=stream_type,
|
|
1062
|
-
)
|
|
1063
|
-
|
|
1064
|
-
# Save response headers to file
|
|
1065
|
-
timestamp = time.strftime("%Y%m%d_%H%M%S")
|
|
1066
|
-
await write_request_log(
|
|
1067
|
-
request_id=request_id,
|
|
1068
|
-
log_type="codex_response_headers",
|
|
1069
|
-
data={
|
|
1070
|
-
"status_code": status_code,
|
|
1071
|
-
"headers": dict(headers),
|
|
1072
|
-
"stream_type": stream_type,
|
|
1073
|
-
},
|
|
1074
|
-
timestamp=timestamp,
|
|
1075
|
-
)
|
|
1076
|
-
|
|
1077
|
-
async def _log_codex_streaming_complete(
|
|
1078
|
-
self,
|
|
1079
|
-
request_id: str,
|
|
1080
|
-
chunks: list[bytes],
|
|
1081
|
-
) -> None:
|
|
1082
|
-
"""Log complete streaming data after stream finishes."""
|
|
1083
|
-
if not self._verbose_api:
|
|
1084
|
-
return
|
|
1085
|
-
|
|
1086
|
-
# Combine chunks and decode for analysis
|
|
1087
|
-
complete_data = b"".join(chunks)
|
|
1088
|
-
try:
|
|
1089
|
-
decoded_data = complete_data.decode("utf-8", errors="replace")
|
|
1090
|
-
except Exception:
|
|
1091
|
-
decoded_data = f"<binary data of length {len(complete_data)}>"
|
|
1092
|
-
|
|
1093
|
-
# Log to console with preview
|
|
1094
|
-
logger.info(
|
|
1095
|
-
"verbose_codex_streaming_complete",
|
|
1096
|
-
request_id=request_id,
|
|
1097
|
-
total_bytes=len(complete_data),
|
|
1098
|
-
chunk_count=len(chunks),
|
|
1099
|
-
data_preview=decoded_data[:200] + "..."
|
|
1100
|
-
if len(decoded_data) > 200
|
|
1101
|
-
else decoded_data,
|
|
1102
|
-
)
|
|
1103
|
-
|
|
1104
|
-
# Save complete streaming data to file
|
|
1105
|
-
timestamp = time.strftime("%Y%m%d_%H%M%S")
|
|
1106
|
-
await write_request_log(
|
|
1107
|
-
request_id=request_id,
|
|
1108
|
-
log_type="codex_streaming_complete",
|
|
1109
|
-
data={
|
|
1110
|
-
"total_bytes": len(complete_data),
|
|
1111
|
-
"chunk_count": len(chunks),
|
|
1112
|
-
"complete_data": decoded_data,
|
|
1113
|
-
},
|
|
1114
|
-
timestamp=timestamp,
|
|
1115
|
-
)
|
|
1116
|
-
|
|
1117
|
-
def _should_stream_response(self, headers: dict[str, str]) -> bool:
|
|
1118
|
-
"""Check if response should be streamed based on request headers.
|
|
1119
|
-
|
|
1120
|
-
Args:
|
|
1121
|
-
headers: Request headers
|
|
1122
|
-
|
|
1123
|
-
Returns:
|
|
1124
|
-
True if response should be streamed
|
|
1125
|
-
"""
|
|
1126
|
-
# Check if client requested streaming
|
|
1127
|
-
accept_header = headers.get("accept", "").lower()
|
|
1128
|
-
should_stream = (
|
|
1129
|
-
"text/event-stream" in accept_header or "stream" in accept_header
|
|
1130
|
-
)
|
|
1131
|
-
logger.debug(
|
|
1132
|
-
"stream_check_completed",
|
|
1133
|
-
accept_header=accept_header,
|
|
1134
|
-
should_stream=should_stream,
|
|
1135
|
-
)
|
|
1136
|
-
return should_stream
|
|
1137
|
-
|
|
1138
|
-
def _extract_request_metadata(self, body: bytes | None) -> tuple[str | None, bool]:
|
|
1139
|
-
"""Extract model and streaming flag from request body.
|
|
1140
|
-
|
|
1141
|
-
Args:
|
|
1142
|
-
body: Request body
|
|
1143
|
-
|
|
1144
|
-
Returns:
|
|
1145
|
-
Tuple of (model, streaming)
|
|
1146
|
-
"""
|
|
1147
|
-
if not body:
|
|
1148
|
-
return None, False
|
|
1149
|
-
|
|
1150
|
-
try:
|
|
1151
|
-
body_data = json.loads(body.decode("utf-8"))
|
|
1152
|
-
model = body_data.get("model")
|
|
1153
|
-
streaming = body_data.get("stream", False)
|
|
1154
|
-
return model, streaming
|
|
1155
|
-
except (json.JSONDecodeError, UnicodeDecodeError):
|
|
1156
|
-
return None, False
|
|
1157
|
-
|
|
1158
|
-
async def _handle_streaming_request(
|
|
1159
|
-
self,
|
|
1160
|
-
request_data: RequestData,
|
|
1161
|
-
original_path: str,
|
|
1162
|
-
timeout: float,
|
|
1163
|
-
ctx: "RequestContext",
|
|
1164
|
-
) -> StreamingResponse | tuple[int, dict[str, str], bytes]:
|
|
1165
|
-
"""Handle streaming request with transformation.
|
|
1166
|
-
|
|
1167
|
-
Args:
|
|
1168
|
-
request_data: Transformed request data
|
|
1169
|
-
original_path: Original request path for context
|
|
1170
|
-
timeout: Request timeout
|
|
1171
|
-
ctx: Request context for observability
|
|
1172
|
-
|
|
1173
|
-
Returns:
|
|
1174
|
-
StreamingResponse or error response tuple
|
|
1175
|
-
"""
|
|
1176
|
-
# Log the outgoing request if verbose API logging is enabled
|
|
1177
|
-
await self._log_verbose_api_request(request_data, ctx)
|
|
1178
|
-
|
|
1179
|
-
# First, make the request and check for errors before streaming
|
|
1180
|
-
proxy_url = self._proxy_url
|
|
1181
|
-
verify = self._ssl_context
|
|
1182
|
-
|
|
1183
|
-
async with httpx.AsyncClient(
|
|
1184
|
-
timeout=timeout, proxy=proxy_url, verify=verify
|
|
1185
|
-
) as client:
|
|
1186
|
-
# Start the request to get headers
|
|
1187
|
-
response = await client.send(
|
|
1188
|
-
client.build_request(
|
|
1189
|
-
method=request_data["method"],
|
|
1190
|
-
url=request_data["url"],
|
|
1191
|
-
headers=request_data["headers"],
|
|
1192
|
-
content=request_data["body"],
|
|
1193
|
-
),
|
|
1194
|
-
stream=True,
|
|
1195
|
-
)
|
|
1196
|
-
|
|
1197
|
-
# Check for errors before starting to stream
|
|
1198
|
-
if response.status_code >= 400:
|
|
1199
|
-
error_content = await response.aread()
|
|
1200
|
-
|
|
1201
|
-
# Log the full error response body
|
|
1202
|
-
await self._log_verbose_api_response(
|
|
1203
|
-
response.status_code, dict(response.headers), error_content, ctx
|
|
1204
|
-
)
|
|
1205
|
-
|
|
1206
|
-
logger.info(
|
|
1207
|
-
"streaming_error_received",
|
|
1208
|
-
status_code=response.status_code,
|
|
1209
|
-
error_detail=error_content.decode("utf-8", errors="replace"),
|
|
1210
|
-
)
|
|
1211
|
-
|
|
1212
|
-
# Use transformer to handle error transformation (including OpenAI format)
|
|
1213
|
-
transformed_error_response = (
|
|
1214
|
-
await self.response_transformer.transform_proxy_response(
|
|
1215
|
-
response.status_code,
|
|
1216
|
-
dict(response.headers),
|
|
1217
|
-
error_content,
|
|
1218
|
-
original_path,
|
|
1219
|
-
self.proxy_mode,
|
|
1220
|
-
)
|
|
1221
|
-
)
|
|
1222
|
-
transformed_error_body = transformed_error_response["body"]
|
|
1223
|
-
|
|
1224
|
-
# Update context with error status
|
|
1225
|
-
ctx.add_metadata(status_code=response.status_code)
|
|
1226
|
-
|
|
1227
|
-
# Log access log for error
|
|
1228
|
-
from ccproxy.observability.access_logger import log_request_access
|
|
1229
|
-
|
|
1230
|
-
await log_request_access(
|
|
1231
|
-
context=ctx,
|
|
1232
|
-
status_code=response.status_code,
|
|
1233
|
-
method=request_data["method"],
|
|
1234
|
-
metrics=self.metrics,
|
|
1235
|
-
)
|
|
1236
|
-
|
|
1237
|
-
# Return error as regular response
|
|
1238
|
-
return (
|
|
1239
|
-
response.status_code,
|
|
1240
|
-
dict(response.headers),
|
|
1241
|
-
transformed_error_body,
|
|
1242
|
-
)
|
|
1243
|
-
|
|
1244
|
-
# If no error, proceed with streaming
|
|
1245
|
-
# Make initial request to get headers
|
|
1246
|
-
proxy_url = self._proxy_url
|
|
1247
|
-
verify = self._ssl_context
|
|
1248
|
-
|
|
1249
|
-
response_headers = {}
|
|
1250
|
-
response_status = 200
|
|
1251
|
-
|
|
1252
|
-
async with httpx.AsyncClient(
|
|
1253
|
-
timeout=timeout, proxy=proxy_url, verify=verify
|
|
1254
|
-
) as client:
|
|
1255
|
-
# Make initial request to capture headers
|
|
1256
|
-
initial_response = await client.send(
|
|
1257
|
-
client.build_request(
|
|
1258
|
-
method=request_data["method"],
|
|
1259
|
-
url=request_data["url"],
|
|
1260
|
-
headers=request_data["headers"],
|
|
1261
|
-
content=request_data["body"],
|
|
1262
|
-
),
|
|
1263
|
-
stream=True,
|
|
1264
|
-
)
|
|
1265
|
-
response_status = initial_response.status_code
|
|
1266
|
-
response_headers = dict(initial_response.headers)
|
|
1267
|
-
|
|
1268
|
-
# Close the initial response since we'll make a new one in the generator
|
|
1269
|
-
await initial_response.aclose()
|
|
1270
|
-
|
|
1271
|
-
# Initialize streaming metrics collector
|
|
1272
|
-
from ccproxy.utils.streaming_metrics import StreamingMetricsCollector
|
|
1273
|
-
|
|
1274
|
-
metrics_collector = StreamingMetricsCollector(request_id=ctx.request_id)
|
|
1275
|
-
|
|
1276
|
-
async def stream_generator() -> AsyncGenerator[bytes, None]:
|
|
1277
|
-
try:
|
|
1278
|
-
logger.debug(
|
|
1279
|
-
"stream_generator_start",
|
|
1280
|
-
method=request_data["method"],
|
|
1281
|
-
url=request_data["url"],
|
|
1282
|
-
headers=request_data["headers"],
|
|
1283
|
-
)
|
|
1284
|
-
|
|
1285
|
-
# Use httpx directly for streaming since we need the stream context manager
|
|
1286
|
-
# Get proxy and SSL settings from cached configuration
|
|
1287
|
-
proxy_url = self._proxy_url
|
|
1288
|
-
verify = self._ssl_context
|
|
1289
|
-
|
|
1290
|
-
start_time = time.perf_counter()
|
|
1291
|
-
async with (
|
|
1292
|
-
httpx.AsyncClient(
|
|
1293
|
-
timeout=timeout, proxy=proxy_url, verify=verify
|
|
1294
|
-
) as client,
|
|
1295
|
-
client.stream(
|
|
1296
|
-
method=request_data["method"],
|
|
1297
|
-
url=request_data["url"],
|
|
1298
|
-
headers=request_data["headers"],
|
|
1299
|
-
content=request_data["body"],
|
|
1300
|
-
) as response,
|
|
1301
|
-
):
|
|
1302
|
-
end_time = time.perf_counter()
|
|
1303
|
-
proxy_api_call_ms = (end_time - start_time) * 1000
|
|
1304
|
-
logger.debug(
|
|
1305
|
-
"stream_response_received",
|
|
1306
|
-
status_code=response.status_code,
|
|
1307
|
-
headers=dict(response.headers),
|
|
1308
|
-
)
|
|
1309
|
-
|
|
1310
|
-
# Log initial stream response headers if verbose
|
|
1311
|
-
if self._verbose_api:
|
|
1312
|
-
logger.info(
|
|
1313
|
-
"verbose_api_stream_response_start",
|
|
1314
|
-
status_code=response.status_code,
|
|
1315
|
-
headers=self._redact_headers(dict(response.headers)),
|
|
1316
|
-
)
|
|
1317
|
-
|
|
1318
|
-
# Store response status and headers
|
|
1319
|
-
nonlocal response_status, response_headers
|
|
1320
|
-
response_status = response.status_code
|
|
1321
|
-
response_headers = dict(response.headers)
|
|
1322
|
-
|
|
1323
|
-
# Log upstream response headers for streaming
|
|
1324
|
-
if self._verbose_api:
|
|
1325
|
-
request_id = ctx.request_id
|
|
1326
|
-
timestamp = ctx.get_log_timestamp_prefix()
|
|
1327
|
-
await write_request_log(
|
|
1328
|
-
request_id=request_id,
|
|
1329
|
-
log_type="upstream_response_headers",
|
|
1330
|
-
data={
|
|
1331
|
-
"status_code": response.status_code,
|
|
1332
|
-
"headers": dict(response.headers),
|
|
1333
|
-
"stream_type": "anthropic_sse"
|
|
1334
|
-
if not self.response_transformer._is_openai_request(
|
|
1335
|
-
original_path
|
|
1336
|
-
)
|
|
1337
|
-
else "openai_sse",
|
|
1338
|
-
},
|
|
1339
|
-
timestamp=timestamp,
|
|
1340
|
-
)
|
|
1341
|
-
|
|
1342
|
-
# Transform streaming response
|
|
1343
|
-
is_openai = self.response_transformer._is_openai_request(
|
|
1344
|
-
original_path
|
|
1345
|
-
)
|
|
1346
|
-
logger.debug(
|
|
1347
|
-
"openai_format_check", is_openai=is_openai, path=original_path
|
|
1348
|
-
)
|
|
1349
|
-
|
|
1350
|
-
if is_openai:
|
|
1351
|
-
# Transform Anthropic SSE to OpenAI SSE format using adapter
|
|
1352
|
-
logger.debug("sse_transform_start", path=original_path)
|
|
1353
|
-
|
|
1354
|
-
# Get timestamp once for all streaming chunks
|
|
1355
|
-
request_id = ctx.request_id
|
|
1356
|
-
timestamp = ctx.get_log_timestamp_prefix()
|
|
1357
|
-
|
|
1358
|
-
async for (
|
|
1359
|
-
transformed_chunk
|
|
1360
|
-
) in self._transform_anthropic_to_openai_stream(
|
|
1361
|
-
response, original_path
|
|
1362
|
-
):
|
|
1363
|
-
# Log transformed streaming chunk
|
|
1364
|
-
await append_streaming_log(
|
|
1365
|
-
request_id=request_id,
|
|
1366
|
-
log_type="upstream_streaming",
|
|
1367
|
-
data=transformed_chunk,
|
|
1368
|
-
timestamp=timestamp,
|
|
1369
|
-
)
|
|
1370
|
-
|
|
1371
|
-
logger.debug(
|
|
1372
|
-
"transformed_chunk_yielded",
|
|
1373
|
-
chunk_size=len(transformed_chunk),
|
|
1374
|
-
)
|
|
1375
|
-
yield transformed_chunk
|
|
1376
|
-
else:
|
|
1377
|
-
# Stream as-is for Anthropic endpoints
|
|
1378
|
-
logger.debug("anthropic_streaming_start")
|
|
1379
|
-
chunk_count = 0
|
|
1380
|
-
content_block_delta_count = 0
|
|
1381
|
-
|
|
1382
|
-
# Use cached verbose streaming configuration
|
|
1383
|
-
verbose_streaming = self._verbose_streaming
|
|
1384
|
-
|
|
1385
|
-
# Get timestamp once for all streaming chunks
|
|
1386
|
-
request_id = ctx.request_id
|
|
1387
|
-
timestamp = ctx.get_log_timestamp_prefix()
|
|
1388
|
-
|
|
1389
|
-
async for chunk in response.aiter_bytes():
|
|
1390
|
-
if chunk:
|
|
1391
|
-
chunk_count += 1
|
|
1392
|
-
|
|
1393
|
-
# Log raw streaming chunk
|
|
1394
|
-
await append_streaming_log(
|
|
1395
|
-
request_id=request_id,
|
|
1396
|
-
log_type="upstream_streaming",
|
|
1397
|
-
data=chunk,
|
|
1398
|
-
timestamp=timestamp,
|
|
1399
|
-
)
|
|
1400
|
-
|
|
1401
|
-
# Compact logging for content_block_delta events
|
|
1402
|
-
chunk_str = chunk.decode("utf-8", errors="replace")
|
|
1403
|
-
|
|
1404
|
-
# Extract token metrics from streaming events
|
|
1405
|
-
is_final = metrics_collector.process_chunk(chunk_str)
|
|
1406
|
-
|
|
1407
|
-
# If this is the final chunk with complete metrics, update context and record metrics
|
|
1408
|
-
if is_final:
|
|
1409
|
-
model = ctx.metadata.get("model")
|
|
1410
|
-
cost_usd = metrics_collector.calculate_final_cost(
|
|
1411
|
-
model
|
|
1412
|
-
)
|
|
1413
|
-
final_metrics = metrics_collector.get_metrics()
|
|
1414
|
-
|
|
1415
|
-
# Update context with final metrics
|
|
1416
|
-
ctx.add_metadata(
|
|
1417
|
-
status_code=response_status,
|
|
1418
|
-
tokens_input=final_metrics["tokens_input"],
|
|
1419
|
-
tokens_output=final_metrics["tokens_output"],
|
|
1420
|
-
cache_read_tokens=final_metrics[
|
|
1421
|
-
"cache_read_tokens"
|
|
1422
|
-
],
|
|
1423
|
-
cache_write_tokens=final_metrics[
|
|
1424
|
-
"cache_write_tokens"
|
|
1425
|
-
],
|
|
1426
|
-
cost_usd=cost_usd,
|
|
1427
|
-
)
|
|
1428
|
-
|
|
1429
|
-
# Access logging is now handled by StreamingResponseWithLogging
|
|
1430
|
-
|
|
1431
|
-
if (
|
|
1432
|
-
"content_block_delta" in chunk_str
|
|
1433
|
-
and not verbose_streaming
|
|
1434
|
-
):
|
|
1435
|
-
content_block_delta_count += 1
|
|
1436
|
-
# Only log every 10th content_block_delta or when we start/end
|
|
1437
|
-
if content_block_delta_count == 1:
|
|
1438
|
-
logger.debug("content_block_delta_start")
|
|
1439
|
-
elif content_block_delta_count % 10 == 0:
|
|
1440
|
-
logger.debug(
|
|
1441
|
-
"content_block_delta_progress",
|
|
1442
|
-
count=content_block_delta_count,
|
|
1443
|
-
)
|
|
1444
|
-
elif (
|
|
1445
|
-
verbose_streaming
|
|
1446
|
-
or "content_block_delta" not in chunk_str
|
|
1447
|
-
):
|
|
1448
|
-
# Log non-content_block_delta events normally, or everything if verbose mode
|
|
1449
|
-
logger.debug(
|
|
1450
|
-
"chunk_yielded",
|
|
1451
|
-
chunk_number=chunk_count,
|
|
1452
|
-
chunk_size=len(chunk),
|
|
1453
|
-
chunk_preview=chunk[:100].decode(
|
|
1454
|
-
"utf-8", errors="replace"
|
|
1455
|
-
),
|
|
1456
|
-
)
|
|
1457
|
-
|
|
1458
|
-
yield chunk
|
|
1459
|
-
|
|
1460
|
-
# Final summary for content_block_delta events
|
|
1461
|
-
if content_block_delta_count > 0 and not verbose_streaming:
|
|
1462
|
-
logger.debug(
|
|
1463
|
-
"content_block_delta_completed",
|
|
1464
|
-
total_count=content_block_delta_count,
|
|
1465
|
-
)
|
|
1466
|
-
|
|
1467
|
-
except Exception as e:
|
|
1468
|
-
logger.exception("streaming_error", error=str(e), exc_info=True)
|
|
1469
|
-
error_message = f'data: {{"error": "Streaming error: {str(e)}"}}\n\n'
|
|
1470
|
-
yield error_message.encode("utf-8")
|
|
1471
|
-
|
|
1472
|
-
# Always use upstream headers as base
|
|
1473
|
-
final_headers = response_headers.copy()
|
|
1474
|
-
|
|
1475
|
-
# Remove headers that can cause conflicts
|
|
1476
|
-
final_headers.pop(
|
|
1477
|
-
"date", None
|
|
1478
|
-
) # Remove upstream date header to avoid conflicts
|
|
1479
|
-
|
|
1480
|
-
# Ensure critical headers for streaming
|
|
1481
|
-
final_headers["Cache-Control"] = "no-cache"
|
|
1482
|
-
final_headers["Connection"] = "keep-alive"
|
|
1483
|
-
|
|
1484
|
-
# Set content-type if not already set by upstream
|
|
1485
|
-
if "content-type" not in final_headers:
|
|
1486
|
-
final_headers["content-type"] = "text/event-stream"
|
|
1487
|
-
|
|
1488
|
-
return StreamingResponseWithLogging(
|
|
1489
|
-
content=stream_generator(),
|
|
1490
|
-
request_context=ctx,
|
|
1491
|
-
metrics=self.metrics,
|
|
1492
|
-
status_code=response_status,
|
|
1493
|
-
headers=final_headers,
|
|
1494
|
-
)
|
|
1495
|
-
|
|
1496
|
-
async def _transform_anthropic_to_openai_stream(
|
|
1497
|
-
self, response: httpx.Response, original_path: str
|
|
1498
|
-
) -> AsyncGenerator[bytes, None]:
|
|
1499
|
-
"""Transform Anthropic SSE stream to OpenAI SSE format using adapter.
|
|
1500
|
-
|
|
1501
|
-
Args:
|
|
1502
|
-
response: Streaming response from Anthropic
|
|
1503
|
-
original_path: Original request path for context
|
|
1504
|
-
|
|
1505
|
-
Yields:
|
|
1506
|
-
Transformed OpenAI SSE format chunks
|
|
1507
|
-
"""
|
|
1508
|
-
|
|
1509
|
-
# Parse SSE chunks from response into dict stream
|
|
1510
|
-
async def sse_to_dict_stream() -> AsyncGenerator[dict[str, object], None]:
|
|
1511
|
-
chunk_count = 0
|
|
1512
|
-
async for line in response.aiter_lines():
|
|
1513
|
-
if line.startswith("data: "):
|
|
1514
|
-
data_str = line[6:].strip()
|
|
1515
|
-
if data_str and data_str != "[DONE]":
|
|
1516
|
-
try:
|
|
1517
|
-
chunk_data = json.loads(data_str)
|
|
1518
|
-
chunk_count += 1
|
|
1519
|
-
logger.debug(
|
|
1520
|
-
"proxy_anthropic_chunk_received",
|
|
1521
|
-
chunk_count=chunk_count,
|
|
1522
|
-
chunk_type=chunk_data.get("type"),
|
|
1523
|
-
chunk=chunk_data,
|
|
1524
|
-
)
|
|
1525
|
-
yield chunk_data
|
|
1526
|
-
except json.JSONDecodeError:
|
|
1527
|
-
logger.warning("sse_parse_failed", data=data_str)
|
|
1528
|
-
continue
|
|
1529
|
-
|
|
1530
|
-
# Transform using OpenAI adapter and format back to SSE
|
|
1531
|
-
async for openai_chunk in self.openai_adapter.adapt_stream(
|
|
1532
|
-
sse_to_dict_stream()
|
|
1533
|
-
):
|
|
1534
|
-
sse_line = f"data: {json.dumps(openai_chunk)}\n\n"
|
|
1535
|
-
yield sse_line.encode("utf-8")
|
|
1536
|
-
|
|
1537
|
-
def _extract_message_type_from_body(self, body: bytes | None) -> str:
|
|
1538
|
-
"""Extract message type from request body for realistic response generation."""
|
|
1539
|
-
if not body:
|
|
1540
|
-
return "short"
|
|
1541
|
-
|
|
1542
|
-
try:
|
|
1543
|
-
body_data = json.loads(body.decode("utf-8"))
|
|
1544
|
-
# Check if tools are present - indicates tool use
|
|
1545
|
-
if body_data.get("tools"):
|
|
1546
|
-
return "tool_use"
|
|
1547
|
-
|
|
1548
|
-
# Check message content length to determine type
|
|
1549
|
-
messages = body_data.get("messages", [])
|
|
1550
|
-
if messages:
|
|
1551
|
-
content = str(messages[-1].get("content", ""))
|
|
1552
|
-
if len(content) > 200:
|
|
1553
|
-
return "long"
|
|
1554
|
-
elif len(content) < 50:
|
|
1555
|
-
return "short"
|
|
1556
|
-
else:
|
|
1557
|
-
return "medium"
|
|
1558
|
-
except (json.JSONDecodeError, UnicodeDecodeError):
|
|
1559
|
-
pass
|
|
1560
|
-
|
|
1561
|
-
return "short"
|
|
1562
|
-
|
|
1563
|
-
async def _generate_bypass_standard_response(
|
|
1564
|
-
self,
|
|
1565
|
-
model: str | None,
|
|
1566
|
-
is_openai_format: bool,
|
|
1567
|
-
ctx: "RequestContext",
|
|
1568
|
-
message_type: str = "short",
|
|
1569
|
-
) -> tuple[int, dict[str, str], bytes]:
|
|
1570
|
-
"""Generate realistic mock standard response."""
|
|
1571
|
-
|
|
1572
|
-
# Check if we should simulate an error
|
|
1573
|
-
if self.mock_generator.should_simulate_error():
|
|
1574
|
-
error_response, status_code = self.mock_generator.generate_error_response(
|
|
1575
|
-
"openai" if is_openai_format else "anthropic"
|
|
1576
|
-
)
|
|
1577
|
-
response_body = json.dumps(error_response).encode()
|
|
1578
|
-
return status_code, {"content-type": "application/json"}, response_body
|
|
1579
|
-
|
|
1580
|
-
# Generate realistic content and token counts
|
|
1581
|
-
content, input_tokens, output_tokens = (
|
|
1582
|
-
self.mock_generator.generate_response_content(
|
|
1583
|
-
message_type, model or "claude-3-5-sonnet-20241022"
|
|
1584
|
-
)
|
|
1585
|
-
)
|
|
1586
|
-
cache_read_tokens, cache_write_tokens = (
|
|
1587
|
-
self.mock_generator.generate_cache_tokens()
|
|
1588
|
-
)
|
|
1589
|
-
|
|
1590
|
-
# Simulate realistic latency
|
|
1591
|
-
latency_ms = random.randint(*self.mock_generator.config.base_latency_ms)
|
|
1592
|
-
await asyncio.sleep(latency_ms / 1000.0)
|
|
1593
|
-
|
|
1594
|
-
# Always start with Anthropic format
|
|
1595
|
-
request_id = f"msg_test_{ctx.request_id}_{random.randint(1000, 9999)}"
|
|
1596
|
-
content_list: list[dict[str, Any]] = [{"type": "text", "text": content}]
|
|
1597
|
-
anthropic_response = {
|
|
1598
|
-
"id": request_id,
|
|
1599
|
-
"type": "message",
|
|
1600
|
-
"role": "assistant",
|
|
1601
|
-
"content": content_list,
|
|
1602
|
-
"model": model or "claude-3-5-sonnet-20241022",
|
|
1603
|
-
"stop_reason": "end_turn",
|
|
1604
|
-
"stop_sequence": None,
|
|
1605
|
-
"usage": {
|
|
1606
|
-
"input_tokens": input_tokens,
|
|
1607
|
-
"output_tokens": output_tokens,
|
|
1608
|
-
"cache_creation_input_tokens": cache_write_tokens,
|
|
1609
|
-
"cache_read_input_tokens": cache_read_tokens,
|
|
1610
|
-
},
|
|
1611
|
-
}
|
|
1612
|
-
|
|
1613
|
-
# Add tool use if appropriate
|
|
1614
|
-
if message_type == "tool_use":
|
|
1615
|
-
content_list.insert(
|
|
1616
|
-
0,
|
|
1617
|
-
{
|
|
1618
|
-
"type": "tool_use",
|
|
1619
|
-
"id": f"toolu_{random.randint(10000, 99999)}",
|
|
1620
|
-
"name": "calculator",
|
|
1621
|
-
"input": {"expression": "23 * 45"},
|
|
1622
|
-
},
|
|
1623
|
-
)
|
|
1624
|
-
|
|
1625
|
-
if is_openai_format:
|
|
1626
|
-
# Transform to OpenAI format using existing adapter
|
|
1627
|
-
openai_response = self.openai_adapter.adapt_response(anthropic_response)
|
|
1628
|
-
response_body = json.dumps(openai_response).encode()
|
|
1629
|
-
else:
|
|
1630
|
-
response_body = json.dumps(anthropic_response).encode()
|
|
1631
|
-
|
|
1632
|
-
headers = {
|
|
1633
|
-
"content-type": "application/json",
|
|
1634
|
-
"content-length": str(len(response_body)),
|
|
1635
|
-
}
|
|
1636
|
-
|
|
1637
|
-
# Update context with realistic metrics
|
|
1638
|
-
cost_usd = self.mock_generator.calculate_realistic_cost(
|
|
1639
|
-
input_tokens,
|
|
1640
|
-
output_tokens,
|
|
1641
|
-
model or "claude-3-5-sonnet-20241022",
|
|
1642
|
-
cache_read_tokens,
|
|
1643
|
-
cache_write_tokens,
|
|
1644
|
-
)
|
|
1645
|
-
|
|
1646
|
-
ctx.add_metadata(
|
|
1647
|
-
status_code=200,
|
|
1648
|
-
tokens_input=input_tokens,
|
|
1649
|
-
tokens_output=output_tokens,
|
|
1650
|
-
cache_read_tokens=cache_read_tokens,
|
|
1651
|
-
cache_write_tokens=cache_write_tokens,
|
|
1652
|
-
cost_usd=cost_usd,
|
|
1653
|
-
)
|
|
1654
|
-
|
|
1655
|
-
# Log comprehensive access log (includes Prometheus metrics)
|
|
1656
|
-
await log_request_access(
|
|
1657
|
-
context=ctx,
|
|
1658
|
-
status_code=200,
|
|
1659
|
-
method="POST",
|
|
1660
|
-
metrics=self.metrics,
|
|
1661
|
-
)
|
|
1662
|
-
|
|
1663
|
-
return 200, headers, response_body
|
|
1664
|
-
|
|
1665
|
-
async def _generate_bypass_streaming_response(
|
|
1666
|
-
self,
|
|
1667
|
-
model: str | None,
|
|
1668
|
-
is_openai_format: bool,
|
|
1669
|
-
ctx: "RequestContext",
|
|
1670
|
-
message_type: str = "short",
|
|
1671
|
-
) -> StreamingResponse:
|
|
1672
|
-
"""Generate realistic mock streaming response."""
|
|
1673
|
-
|
|
1674
|
-
# Generate content and tokens
|
|
1675
|
-
content, input_tokens, output_tokens = (
|
|
1676
|
-
self.mock_generator.generate_response_content(
|
|
1677
|
-
message_type, model or "claude-3-5-sonnet-20241022"
|
|
1678
|
-
)
|
|
1679
|
-
)
|
|
1680
|
-
cache_read_tokens, cache_write_tokens = (
|
|
1681
|
-
self.mock_generator.generate_cache_tokens()
|
|
1682
|
-
)
|
|
1683
|
-
|
|
1684
|
-
async def realistic_mock_stream_generator() -> AsyncGenerator[bytes, None]:
|
|
1685
|
-
request_id = f"msg_test_{ctx.request_id}_{random.randint(1000, 9999)}"
|
|
1686
|
-
|
|
1687
|
-
if is_openai_format:
|
|
1688
|
-
# Generate OpenAI-style streaming
|
|
1689
|
-
chunks = await self._generate_realistic_openai_stream(
|
|
1690
|
-
request_id,
|
|
1691
|
-
model or "claude-3-5-sonnet-20241022",
|
|
1692
|
-
content,
|
|
1693
|
-
input_tokens,
|
|
1694
|
-
output_tokens,
|
|
1695
|
-
)
|
|
1696
|
-
else:
|
|
1697
|
-
# Generate Anthropic-style streaming
|
|
1698
|
-
chunks = self.mock_generator.generate_realistic_anthropic_stream(
|
|
1699
|
-
request_id,
|
|
1700
|
-
model or "claude-3-5-sonnet-20241022",
|
|
1701
|
-
content,
|
|
1702
|
-
input_tokens,
|
|
1703
|
-
output_tokens,
|
|
1704
|
-
cache_read_tokens,
|
|
1705
|
-
cache_write_tokens,
|
|
1706
|
-
)
|
|
1707
|
-
|
|
1708
|
-
# Simulate realistic token generation rate
|
|
1709
|
-
tokens_per_second = self.mock_generator.config.token_generation_rate
|
|
1710
|
-
|
|
1711
|
-
for i, chunk in enumerate(chunks):
|
|
1712
|
-
# Realistic delay based on token generation rate
|
|
1713
|
-
if i > 0: # Don't delay the first chunk
|
|
1714
|
-
# Estimate tokens in this chunk and calculate delay
|
|
1715
|
-
chunk_tokens = len(str(chunk)) // 4 # Rough estimate
|
|
1716
|
-
delay_seconds = chunk_tokens / tokens_per_second
|
|
1717
|
-
# Add some randomness
|
|
1718
|
-
delay_seconds *= random.uniform(0.5, 1.5)
|
|
1719
|
-
await asyncio.sleep(max(0.01, delay_seconds))
|
|
1720
|
-
|
|
1721
|
-
yield f"data: {json.dumps(chunk)}\n\n".encode()
|
|
1722
|
-
|
|
1723
|
-
yield b"data: [DONE]\n\n"
|
|
1724
|
-
|
|
1725
|
-
headers = {
|
|
1726
|
-
"content-type": "text/event-stream",
|
|
1727
|
-
"cache-control": "no-cache",
|
|
1728
|
-
"connection": "keep-alive",
|
|
1729
|
-
}
|
|
1730
|
-
|
|
1731
|
-
# Update context with realistic metrics
|
|
1732
|
-
cost_usd = self.mock_generator.calculate_realistic_cost(
|
|
1733
|
-
input_tokens,
|
|
1734
|
-
output_tokens,
|
|
1735
|
-
model or "claude-3-5-sonnet-20241022",
|
|
1736
|
-
cache_read_tokens,
|
|
1737
|
-
cache_write_tokens,
|
|
1738
|
-
)
|
|
1739
|
-
|
|
1740
|
-
ctx.add_metadata(
|
|
1741
|
-
status_code=200,
|
|
1742
|
-
tokens_input=input_tokens,
|
|
1743
|
-
tokens_output=output_tokens,
|
|
1744
|
-
cache_read_tokens=cache_read_tokens,
|
|
1745
|
-
cache_write_tokens=cache_write_tokens,
|
|
1746
|
-
cost_usd=cost_usd,
|
|
1747
|
-
)
|
|
1748
|
-
|
|
1749
|
-
return StreamingResponseWithLogging(
|
|
1750
|
-
content=realistic_mock_stream_generator(),
|
|
1751
|
-
request_context=ctx,
|
|
1752
|
-
metrics=self.metrics,
|
|
1753
|
-
headers=headers,
|
|
1754
|
-
)
|
|
1755
|
-
|
|
1756
|
-
async def _generate_realistic_openai_stream(
|
|
1757
|
-
self,
|
|
1758
|
-
request_id: str,
|
|
1759
|
-
model: str,
|
|
1760
|
-
content: str,
|
|
1761
|
-
input_tokens: int,
|
|
1762
|
-
output_tokens: int,
|
|
1763
|
-
) -> list[dict[str, Any]]:
|
|
1764
|
-
"""Generate realistic OpenAI streaming chunks by converting Anthropic format."""
|
|
1765
|
-
|
|
1766
|
-
# Generate Anthropic chunks first
|
|
1767
|
-
anthropic_chunks = self.mock_generator.generate_realistic_anthropic_stream(
|
|
1768
|
-
request_id, model, content, input_tokens, output_tokens, 0, 0
|
|
1769
|
-
)
|
|
1770
|
-
|
|
1771
|
-
# Convert to OpenAI format using the adapter
|
|
1772
|
-
openai_chunks = []
|
|
1773
|
-
for chunk in anthropic_chunks:
|
|
1774
|
-
# Use the OpenAI adapter to convert each chunk
|
|
1775
|
-
# This is a simplified conversion - in practice, you'd need a full streaming adapter
|
|
1776
|
-
if chunk.get("type") == "message_start":
|
|
1777
|
-
openai_chunks.append(
|
|
1778
|
-
{
|
|
1779
|
-
"id": f"chatcmpl-{request_id}",
|
|
1780
|
-
"object": "chat.completion.chunk",
|
|
1781
|
-
"created": int(time.time()),
|
|
1782
|
-
"model": model,
|
|
1783
|
-
"choices": [
|
|
1784
|
-
{
|
|
1785
|
-
"index": 0,
|
|
1786
|
-
"delta": {"role": "assistant", "content": ""},
|
|
1787
|
-
"finish_reason": None,
|
|
1788
|
-
}
|
|
1789
|
-
],
|
|
1790
|
-
}
|
|
1791
|
-
)
|
|
1792
|
-
elif chunk.get("type") == "content_block_delta":
|
|
1793
|
-
delta_text = chunk.get("delta", {}).get("text", "")
|
|
1794
|
-
openai_chunks.append(
|
|
1795
|
-
{
|
|
1796
|
-
"id": f"chatcmpl-{request_id}",
|
|
1797
|
-
"object": "chat.completion.chunk",
|
|
1798
|
-
"created": int(time.time()),
|
|
1799
|
-
"model": model,
|
|
1800
|
-
"choices": [
|
|
1801
|
-
{
|
|
1802
|
-
"index": 0,
|
|
1803
|
-
"delta": {"content": delta_text},
|
|
1804
|
-
"finish_reason": None,
|
|
1805
|
-
}
|
|
1806
|
-
],
|
|
1807
|
-
}
|
|
1808
|
-
)
|
|
1809
|
-
elif chunk.get("type") == "message_stop":
|
|
1810
|
-
openai_chunks.append(
|
|
1811
|
-
{
|
|
1812
|
-
"id": f"chatcmpl-{request_id}",
|
|
1813
|
-
"object": "chat.completion.chunk",
|
|
1814
|
-
"created": int(time.time()),
|
|
1815
|
-
"model": model,
|
|
1816
|
-
"choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
|
|
1817
|
-
}
|
|
1818
|
-
)
|
|
1819
|
-
|
|
1820
|
-
return openai_chunks
|
|
1821
|
-
|
|
1822
|
-
async def close(self) -> None:
|
|
1823
|
-
"""Close any resources held by the proxy service."""
|
|
1824
|
-
if self.proxy_client:
|
|
1825
|
-
await self.proxy_client.close()
|
|
1826
|
-
if self.credentials_manager:
|
|
1827
|
-
await self.credentials_manager.__aexit__(None, None, None)
|