ccproxy-api 0.1.7__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ccproxy/api/__init__.py +1 -15
- ccproxy/api/app.py +434 -219
- ccproxy/api/bootstrap.py +30 -0
- ccproxy/api/decorators.py +85 -0
- ccproxy/api/dependencies.py +144 -168
- ccproxy/api/format_validation.py +54 -0
- ccproxy/api/middleware/cors.py +6 -3
- ccproxy/api/middleware/errors.py +388 -524
- ccproxy/api/middleware/hooks.py +563 -0
- ccproxy/api/middleware/normalize_headers.py +59 -0
- ccproxy/api/middleware/request_id.py +35 -16
- ccproxy/api/middleware/streaming_hooks.py +292 -0
- ccproxy/api/routes/__init__.py +5 -14
- ccproxy/api/routes/health.py +39 -672
- ccproxy/api/routes/plugins.py +277 -0
- ccproxy/auth/__init__.py +2 -19
- ccproxy/auth/bearer.py +25 -15
- ccproxy/auth/dependencies.py +123 -157
- ccproxy/auth/exceptions.py +0 -12
- ccproxy/auth/manager.py +35 -49
- ccproxy/auth/managers/__init__.py +10 -0
- ccproxy/auth/managers/base.py +523 -0
- ccproxy/auth/managers/base_enhanced.py +63 -0
- ccproxy/auth/managers/token_snapshot.py +77 -0
- ccproxy/auth/models/base.py +65 -0
- ccproxy/auth/models/credentials.py +40 -0
- ccproxy/auth/oauth/__init__.py +4 -18
- ccproxy/auth/oauth/base.py +533 -0
- ccproxy/auth/oauth/cli_errors.py +37 -0
- ccproxy/auth/oauth/flows.py +430 -0
- ccproxy/auth/oauth/protocol.py +366 -0
- ccproxy/auth/oauth/registry.py +408 -0
- ccproxy/auth/oauth/router.py +396 -0
- ccproxy/auth/oauth/routes.py +186 -113
- ccproxy/auth/oauth/session.py +151 -0
- ccproxy/auth/oauth/templates.py +342 -0
- ccproxy/auth/storage/__init__.py +2 -5
- ccproxy/auth/storage/base.py +279 -5
- ccproxy/auth/storage/generic.py +134 -0
- ccproxy/cli/__init__.py +1 -2
- ccproxy/cli/_settings_help.py +351 -0
- ccproxy/cli/commands/auth.py +1519 -793
- ccproxy/cli/commands/config/commands.py +209 -276
- ccproxy/cli/commands/plugins.py +669 -0
- ccproxy/cli/commands/serve.py +75 -810
- ccproxy/cli/commands/status.py +254 -0
- ccproxy/cli/decorators.py +83 -0
- ccproxy/cli/helpers.py +22 -60
- ccproxy/cli/main.py +359 -10
- ccproxy/cli/options/claude_options.py +0 -25
- ccproxy/config/__init__.py +7 -11
- ccproxy/config/core.py +227 -0
- ccproxy/config/env_generator.py +232 -0
- ccproxy/config/runtime.py +67 -0
- ccproxy/config/security.py +36 -3
- ccproxy/config/settings.py +382 -441
- ccproxy/config/toml_generator.py +299 -0
- ccproxy/config/utils.py +452 -0
- ccproxy/core/__init__.py +7 -271
- ccproxy/{_version.py → core/_version.py} +16 -3
- ccproxy/core/async_task_manager.py +516 -0
- ccproxy/core/async_utils.py +47 -14
- ccproxy/core/auth/__init__.py +6 -0
- ccproxy/core/constants.py +16 -50
- ccproxy/core/errors.py +53 -0
- ccproxy/core/id_utils.py +20 -0
- ccproxy/core/interfaces.py +16 -123
- ccproxy/core/logging.py +473 -18
- ccproxy/core/plugins/__init__.py +77 -0
- ccproxy/core/plugins/cli_discovery.py +211 -0
- ccproxy/core/plugins/declaration.py +455 -0
- ccproxy/core/plugins/discovery.py +604 -0
- ccproxy/core/plugins/factories.py +967 -0
- ccproxy/core/plugins/hooks/__init__.py +30 -0
- ccproxy/core/plugins/hooks/base.py +58 -0
- ccproxy/core/plugins/hooks/events.py +46 -0
- ccproxy/core/plugins/hooks/implementations/__init__.py +16 -0
- ccproxy/core/plugins/hooks/implementations/formatters/__init__.py +11 -0
- ccproxy/core/plugins/hooks/implementations/formatters/json.py +552 -0
- ccproxy/core/plugins/hooks/implementations/formatters/raw.py +370 -0
- ccproxy/core/plugins/hooks/implementations/http_tracer.py +431 -0
- ccproxy/core/plugins/hooks/layers.py +44 -0
- ccproxy/core/plugins/hooks/manager.py +186 -0
- ccproxy/core/plugins/hooks/registry.py +139 -0
- ccproxy/core/plugins/hooks/thread_manager.py +203 -0
- ccproxy/core/plugins/hooks/types.py +22 -0
- ccproxy/core/plugins/interfaces.py +416 -0
- ccproxy/core/plugins/loader.py +166 -0
- ccproxy/core/plugins/middleware.py +233 -0
- ccproxy/core/plugins/models.py +59 -0
- ccproxy/core/plugins/protocol.py +180 -0
- ccproxy/core/plugins/runtime.py +519 -0
- ccproxy/{observability/context.py → core/request_context.py} +137 -94
- ccproxy/core/status_report.py +211 -0
- ccproxy/core/transformers.py +13 -8
- ccproxy/data/claude_headers_fallback.json +540 -19
- ccproxy/data/codex_headers_fallback.json +114 -7
- ccproxy/http/__init__.py +30 -0
- ccproxy/http/base.py +95 -0
- ccproxy/http/client.py +323 -0
- ccproxy/http/hooks.py +642 -0
- ccproxy/http/pool.py +279 -0
- ccproxy/llms/formatters/__init__.py +7 -0
- ccproxy/llms/formatters/anthropic_to_openai/__init__.py +55 -0
- ccproxy/llms/formatters/anthropic_to_openai/errors.py +65 -0
- ccproxy/llms/formatters/anthropic_to_openai/requests.py +356 -0
- ccproxy/llms/formatters/anthropic_to_openai/responses.py +153 -0
- ccproxy/llms/formatters/anthropic_to_openai/streams.py +1546 -0
- ccproxy/llms/formatters/base.py +140 -0
- ccproxy/llms/formatters/base_model.py +33 -0
- ccproxy/llms/formatters/common/__init__.py +51 -0
- ccproxy/llms/formatters/common/identifiers.py +48 -0
- ccproxy/llms/formatters/common/streams.py +254 -0
- ccproxy/llms/formatters/common/thinking.py +74 -0
- ccproxy/llms/formatters/common/usage.py +135 -0
- ccproxy/llms/formatters/constants.py +55 -0
- ccproxy/llms/formatters/context.py +116 -0
- ccproxy/llms/formatters/mapping.py +33 -0
- ccproxy/llms/formatters/openai_to_anthropic/__init__.py +55 -0
- ccproxy/llms/formatters/openai_to_anthropic/_helpers.py +141 -0
- ccproxy/llms/formatters/openai_to_anthropic/errors.py +53 -0
- ccproxy/llms/formatters/openai_to_anthropic/requests.py +674 -0
- ccproxy/llms/formatters/openai_to_anthropic/responses.py +285 -0
- ccproxy/llms/formatters/openai_to_anthropic/streams.py +530 -0
- ccproxy/llms/formatters/openai_to_openai/__init__.py +53 -0
- ccproxy/llms/formatters/openai_to_openai/_helpers.py +325 -0
- ccproxy/llms/formatters/openai_to_openai/errors.py +6 -0
- ccproxy/llms/formatters/openai_to_openai/requests.py +388 -0
- ccproxy/llms/formatters/openai_to_openai/responses.py +594 -0
- ccproxy/llms/formatters/openai_to_openai/streams.py +1832 -0
- ccproxy/llms/formatters/utils.py +306 -0
- ccproxy/llms/models/__init__.py +9 -0
- ccproxy/llms/models/anthropic.py +619 -0
- ccproxy/llms/models/openai.py +844 -0
- ccproxy/llms/streaming/__init__.py +26 -0
- ccproxy/llms/streaming/accumulators.py +1074 -0
- ccproxy/llms/streaming/formatters.py +251 -0
- ccproxy/{adapters/openai/streaming.py → llms/streaming/processors.py} +193 -240
- ccproxy/models/__init__.py +8 -159
- ccproxy/models/detection.py +92 -193
- ccproxy/models/provider.py +75 -0
- ccproxy/plugins/access_log/README.md +32 -0
- ccproxy/plugins/access_log/__init__.py +20 -0
- ccproxy/plugins/access_log/config.py +33 -0
- ccproxy/plugins/access_log/formatter.py +126 -0
- ccproxy/plugins/access_log/hook.py +763 -0
- ccproxy/plugins/access_log/logger.py +254 -0
- ccproxy/plugins/access_log/plugin.py +137 -0
- ccproxy/plugins/access_log/writer.py +109 -0
- ccproxy/plugins/analytics/README.md +24 -0
- ccproxy/plugins/analytics/__init__.py +1 -0
- ccproxy/plugins/analytics/config.py +5 -0
- ccproxy/plugins/analytics/ingest.py +85 -0
- ccproxy/plugins/analytics/models.py +97 -0
- ccproxy/plugins/analytics/plugin.py +121 -0
- ccproxy/plugins/analytics/routes.py +163 -0
- ccproxy/plugins/analytics/service.py +284 -0
- ccproxy/plugins/claude_api/README.md +29 -0
- ccproxy/plugins/claude_api/__init__.py +10 -0
- ccproxy/plugins/claude_api/adapter.py +829 -0
- ccproxy/plugins/claude_api/config.py +52 -0
- ccproxy/plugins/claude_api/detection_service.py +461 -0
- ccproxy/plugins/claude_api/health.py +175 -0
- ccproxy/plugins/claude_api/hooks.py +284 -0
- ccproxy/plugins/claude_api/models.py +256 -0
- ccproxy/plugins/claude_api/plugin.py +298 -0
- ccproxy/plugins/claude_api/routes.py +118 -0
- ccproxy/plugins/claude_api/streaming_metrics.py +68 -0
- ccproxy/plugins/claude_api/tasks.py +84 -0
- ccproxy/plugins/claude_sdk/README.md +35 -0
- ccproxy/plugins/claude_sdk/__init__.py +80 -0
- ccproxy/plugins/claude_sdk/adapter.py +749 -0
- ccproxy/plugins/claude_sdk/auth.py +57 -0
- ccproxy/{claude_sdk → plugins/claude_sdk}/client.py +63 -39
- ccproxy/plugins/claude_sdk/config.py +210 -0
- ccproxy/{claude_sdk → plugins/claude_sdk}/converter.py +6 -6
- ccproxy/plugins/claude_sdk/detection_service.py +163 -0
- ccproxy/{services/claude_sdk_service.py → plugins/claude_sdk/handler.py} +123 -304
- ccproxy/plugins/claude_sdk/health.py +113 -0
- ccproxy/plugins/claude_sdk/hooks.py +115 -0
- ccproxy/{claude_sdk → plugins/claude_sdk}/manager.py +42 -32
- ccproxy/{claude_sdk → plugins/claude_sdk}/message_queue.py +8 -8
- ccproxy/{models/claude_sdk.py → plugins/claude_sdk/models.py} +64 -16
- ccproxy/plugins/claude_sdk/options.py +154 -0
- ccproxy/{claude_sdk → plugins/claude_sdk}/parser.py +23 -5
- ccproxy/plugins/claude_sdk/plugin.py +269 -0
- ccproxy/plugins/claude_sdk/routes.py +104 -0
- ccproxy/{claude_sdk → plugins/claude_sdk}/session_client.py +124 -12
- ccproxy/plugins/claude_sdk/session_pool.py +700 -0
- ccproxy/{claude_sdk → plugins/claude_sdk}/stream_handle.py +48 -43
- ccproxy/{claude_sdk → plugins/claude_sdk}/stream_worker.py +22 -18
- ccproxy/{claude_sdk → plugins/claude_sdk}/streaming.py +50 -16
- ccproxy/plugins/claude_sdk/tasks.py +97 -0
- ccproxy/plugins/claude_shared/README.md +18 -0
- ccproxy/plugins/claude_shared/__init__.py +12 -0
- ccproxy/plugins/claude_shared/model_defaults.py +171 -0
- ccproxy/plugins/codex/README.md +35 -0
- ccproxy/plugins/codex/__init__.py +6 -0
- ccproxy/plugins/codex/adapter.py +635 -0
- ccproxy/{config/codex.py → plugins/codex/config.py} +78 -12
- ccproxy/plugins/codex/detection_service.py +544 -0
- ccproxy/plugins/codex/health.py +162 -0
- ccproxy/plugins/codex/hooks.py +263 -0
- ccproxy/plugins/codex/model_defaults.py +39 -0
- ccproxy/plugins/codex/models.py +263 -0
- ccproxy/plugins/codex/plugin.py +275 -0
- ccproxy/plugins/codex/routes.py +129 -0
- ccproxy/plugins/codex/streaming_metrics.py +324 -0
- ccproxy/plugins/codex/tasks.py +106 -0
- ccproxy/plugins/codex/utils/__init__.py +1 -0
- ccproxy/plugins/codex/utils/sse_parser.py +106 -0
- ccproxy/plugins/command_replay/README.md +34 -0
- ccproxy/plugins/command_replay/__init__.py +17 -0
- ccproxy/plugins/command_replay/config.py +133 -0
- ccproxy/plugins/command_replay/formatter.py +432 -0
- ccproxy/plugins/command_replay/hook.py +294 -0
- ccproxy/plugins/command_replay/plugin.py +161 -0
- ccproxy/plugins/copilot/README.md +39 -0
- ccproxy/plugins/copilot/__init__.py +11 -0
- ccproxy/plugins/copilot/adapter.py +465 -0
- ccproxy/plugins/copilot/config.py +155 -0
- ccproxy/plugins/copilot/data/copilot_fallback.json +41 -0
- ccproxy/plugins/copilot/detection_service.py +255 -0
- ccproxy/plugins/copilot/manager.py +275 -0
- ccproxy/plugins/copilot/model_defaults.py +284 -0
- ccproxy/plugins/copilot/models.py +148 -0
- ccproxy/plugins/copilot/oauth/__init__.py +16 -0
- ccproxy/plugins/copilot/oauth/client.py +494 -0
- ccproxy/plugins/copilot/oauth/models.py +385 -0
- ccproxy/plugins/copilot/oauth/provider.py +602 -0
- ccproxy/plugins/copilot/oauth/storage.py +170 -0
- ccproxy/plugins/copilot/plugin.py +360 -0
- ccproxy/plugins/copilot/routes.py +294 -0
- ccproxy/plugins/credential_balancer/README.md +124 -0
- ccproxy/plugins/credential_balancer/__init__.py +6 -0
- ccproxy/plugins/credential_balancer/config.py +270 -0
- ccproxy/plugins/credential_balancer/factory.py +415 -0
- ccproxy/plugins/credential_balancer/hook.py +51 -0
- ccproxy/plugins/credential_balancer/manager.py +587 -0
- ccproxy/plugins/credential_balancer/plugin.py +146 -0
- ccproxy/plugins/dashboard/README.md +25 -0
- ccproxy/plugins/dashboard/__init__.py +1 -0
- ccproxy/plugins/dashboard/config.py +8 -0
- ccproxy/plugins/dashboard/plugin.py +71 -0
- ccproxy/plugins/dashboard/routes.py +67 -0
- ccproxy/plugins/docker/README.md +32 -0
- ccproxy/{docker → plugins/docker}/__init__.py +3 -0
- ccproxy/{docker → plugins/docker}/adapter.py +108 -10
- ccproxy/plugins/docker/config.py +82 -0
- ccproxy/{docker → plugins/docker}/docker_path.py +4 -3
- ccproxy/{docker → plugins/docker}/middleware.py +2 -2
- ccproxy/plugins/docker/plugin.py +198 -0
- ccproxy/{docker → plugins/docker}/stream_process.py +3 -3
- ccproxy/plugins/duckdb_storage/README.md +26 -0
- ccproxy/plugins/duckdb_storage/__init__.py +1 -0
- ccproxy/plugins/duckdb_storage/config.py +22 -0
- ccproxy/plugins/duckdb_storage/plugin.py +128 -0
- ccproxy/plugins/duckdb_storage/routes.py +51 -0
- ccproxy/plugins/duckdb_storage/storage.py +633 -0
- ccproxy/plugins/max_tokens/README.md +38 -0
- ccproxy/plugins/max_tokens/__init__.py +12 -0
- ccproxy/plugins/max_tokens/adapter.py +235 -0
- ccproxy/plugins/max_tokens/config.py +86 -0
- ccproxy/plugins/max_tokens/models.py +53 -0
- ccproxy/plugins/max_tokens/plugin.py +200 -0
- ccproxy/plugins/max_tokens/service.py +271 -0
- ccproxy/plugins/max_tokens/token_limits.json +54 -0
- ccproxy/plugins/metrics/README.md +35 -0
- ccproxy/plugins/metrics/__init__.py +10 -0
- ccproxy/{observability/metrics.py → plugins/metrics/collector.py} +20 -153
- ccproxy/plugins/metrics/config.py +85 -0
- ccproxy/plugins/metrics/grafana/dashboards/ccproxy-dashboard.json +1720 -0
- ccproxy/plugins/metrics/hook.py +403 -0
- ccproxy/plugins/metrics/plugin.py +268 -0
- ccproxy/{observability → plugins/metrics}/pushgateway.py +57 -59
- ccproxy/plugins/metrics/routes.py +107 -0
- ccproxy/plugins/metrics/tasks.py +117 -0
- ccproxy/plugins/oauth_claude/README.md +35 -0
- ccproxy/plugins/oauth_claude/__init__.py +14 -0
- ccproxy/plugins/oauth_claude/client.py +270 -0
- ccproxy/plugins/oauth_claude/config.py +84 -0
- ccproxy/plugins/oauth_claude/manager.py +482 -0
- ccproxy/plugins/oauth_claude/models.py +266 -0
- ccproxy/plugins/oauth_claude/plugin.py +149 -0
- ccproxy/plugins/oauth_claude/provider.py +571 -0
- ccproxy/plugins/oauth_claude/storage.py +212 -0
- ccproxy/plugins/oauth_codex/README.md +38 -0
- ccproxy/plugins/oauth_codex/__init__.py +14 -0
- ccproxy/plugins/oauth_codex/client.py +224 -0
- ccproxy/plugins/oauth_codex/config.py +95 -0
- ccproxy/plugins/oauth_codex/manager.py +256 -0
- ccproxy/plugins/oauth_codex/models.py +239 -0
- ccproxy/plugins/oauth_codex/plugin.py +146 -0
- ccproxy/plugins/oauth_codex/provider.py +574 -0
- ccproxy/plugins/oauth_codex/storage.py +92 -0
- ccproxy/plugins/permissions/README.md +28 -0
- ccproxy/plugins/permissions/__init__.py +22 -0
- ccproxy/plugins/permissions/config.py +28 -0
- ccproxy/{cli/commands/permission_handler.py → plugins/permissions/handlers/cli.py} +49 -25
- ccproxy/plugins/permissions/handlers/protocol.py +33 -0
- ccproxy/plugins/permissions/handlers/terminal.py +675 -0
- ccproxy/{api/routes → plugins/permissions}/mcp.py +34 -7
- ccproxy/{models/permissions.py → plugins/permissions/models.py} +65 -1
- ccproxy/plugins/permissions/plugin.py +153 -0
- ccproxy/{api/routes/permissions.py → plugins/permissions/routes.py} +20 -16
- ccproxy/{api/services/permission_service.py → plugins/permissions/service.py} +65 -11
- ccproxy/{api → plugins/permissions}/ui/permission_handler_protocol.py +1 -1
- ccproxy/{api → plugins/permissions}/ui/terminal_permission_handler.py +66 -10
- ccproxy/plugins/pricing/README.md +34 -0
- ccproxy/plugins/pricing/__init__.py +6 -0
- ccproxy/{pricing → plugins/pricing}/cache.py +7 -6
- ccproxy/{config/pricing.py → plugins/pricing/config.py} +32 -6
- ccproxy/plugins/pricing/exceptions.py +35 -0
- ccproxy/plugins/pricing/loader.py +440 -0
- ccproxy/{pricing → plugins/pricing}/models.py +13 -23
- ccproxy/plugins/pricing/plugin.py +169 -0
- ccproxy/plugins/pricing/service.py +191 -0
- ccproxy/plugins/pricing/tasks.py +300 -0
- ccproxy/{pricing → plugins/pricing}/updater.py +86 -72
- ccproxy/plugins/pricing/utils.py +99 -0
- ccproxy/plugins/request_tracer/README.md +40 -0
- ccproxy/plugins/request_tracer/__init__.py +7 -0
- ccproxy/plugins/request_tracer/config.py +120 -0
- ccproxy/plugins/request_tracer/hook.py +415 -0
- ccproxy/plugins/request_tracer/plugin.py +255 -0
- ccproxy/scheduler/__init__.py +2 -14
- ccproxy/scheduler/core.py +26 -41
- ccproxy/scheduler/manager.py +61 -105
- ccproxy/scheduler/registry.py +6 -32
- ccproxy/scheduler/tasks.py +268 -276
- ccproxy/services/__init__.py +0 -1
- ccproxy/services/adapters/__init__.py +11 -0
- ccproxy/services/adapters/base.py +123 -0
- ccproxy/services/adapters/chain_composer.py +88 -0
- ccproxy/services/adapters/chain_validation.py +44 -0
- ccproxy/services/adapters/chat_accumulator.py +200 -0
- ccproxy/services/adapters/delta_utils.py +142 -0
- ccproxy/services/adapters/format_adapter.py +136 -0
- ccproxy/services/adapters/format_context.py +11 -0
- ccproxy/services/adapters/format_registry.py +158 -0
- ccproxy/services/adapters/http_adapter.py +1045 -0
- ccproxy/services/adapters/mock_adapter.py +118 -0
- ccproxy/services/adapters/protocols.py +35 -0
- ccproxy/services/adapters/simple_converters.py +571 -0
- ccproxy/services/auth_registry.py +180 -0
- ccproxy/services/cache/__init__.py +6 -0
- ccproxy/services/cache/response_cache.py +261 -0
- ccproxy/services/cli_detection.py +437 -0
- ccproxy/services/config/__init__.py +6 -0
- ccproxy/services/config/proxy_configuration.py +111 -0
- ccproxy/services/container.py +256 -0
- ccproxy/services/factories.py +380 -0
- ccproxy/services/handler_config.py +76 -0
- ccproxy/services/interfaces.py +298 -0
- ccproxy/services/mocking/__init__.py +6 -0
- ccproxy/services/mocking/mock_handler.py +291 -0
- ccproxy/services/tracing/__init__.py +7 -0
- ccproxy/services/tracing/interfaces.py +61 -0
- ccproxy/services/tracing/null_tracer.py +57 -0
- ccproxy/streaming/__init__.py +23 -0
- ccproxy/streaming/buffer.py +1056 -0
- ccproxy/streaming/deferred.py +897 -0
- ccproxy/streaming/handler.py +117 -0
- ccproxy/streaming/interfaces.py +77 -0
- ccproxy/streaming/simple_adapter.py +39 -0
- ccproxy/streaming/sse.py +109 -0
- ccproxy/streaming/sse_parser.py +127 -0
- ccproxy/templates/__init__.py +6 -0
- ccproxy/templates/plugin_scaffold.py +695 -0
- ccproxy/testing/endpoints/__init__.py +33 -0
- ccproxy/testing/endpoints/cli.py +215 -0
- ccproxy/testing/endpoints/config.py +874 -0
- ccproxy/testing/endpoints/console.py +57 -0
- ccproxy/testing/endpoints/models.py +100 -0
- ccproxy/testing/endpoints/runner.py +1903 -0
- ccproxy/testing/endpoints/tools.py +308 -0
- ccproxy/testing/mock_responses.py +70 -1
- ccproxy/testing/response_handlers.py +20 -0
- ccproxy/utils/__init__.py +0 -6
- ccproxy/utils/binary_resolver.py +476 -0
- ccproxy/utils/caching.py +327 -0
- ccproxy/utils/cli_logging.py +101 -0
- ccproxy/utils/command_line.py +251 -0
- ccproxy/utils/headers.py +228 -0
- ccproxy/utils/model_mapper.py +120 -0
- ccproxy/utils/startup_helpers.py +68 -446
- ccproxy/utils/version_checker.py +273 -6
- ccproxy_api-0.2.0.dist-info/METADATA +212 -0
- ccproxy_api-0.2.0.dist-info/RECORD +417 -0
- {ccproxy_api-0.1.7.dist-info → ccproxy_api-0.2.0.dist-info}/WHEEL +1 -1
- ccproxy_api-0.2.0.dist-info/entry_points.txt +24 -0
- ccproxy/__init__.py +0 -4
- ccproxy/adapters/__init__.py +0 -11
- ccproxy/adapters/base.py +0 -80
- ccproxy/adapters/codex/__init__.py +0 -11
- ccproxy/adapters/openai/__init__.py +0 -42
- ccproxy/adapters/openai/adapter.py +0 -953
- ccproxy/adapters/openai/models.py +0 -412
- ccproxy/adapters/openai/response_adapter.py +0 -355
- ccproxy/adapters/openai/response_models.py +0 -178
- ccproxy/api/middleware/headers.py +0 -49
- ccproxy/api/middleware/logging.py +0 -180
- ccproxy/api/middleware/request_content_logging.py +0 -297
- ccproxy/api/middleware/server_header.py +0 -58
- ccproxy/api/responses.py +0 -89
- ccproxy/api/routes/claude.py +0 -371
- ccproxy/api/routes/codex.py +0 -1251
- ccproxy/api/routes/metrics.py +0 -1029
- ccproxy/api/routes/proxy.py +0 -211
- ccproxy/api/services/__init__.py +0 -6
- ccproxy/auth/conditional.py +0 -84
- ccproxy/auth/credentials_adapter.py +0 -93
- ccproxy/auth/models.py +0 -118
- ccproxy/auth/oauth/models.py +0 -48
- ccproxy/auth/openai/__init__.py +0 -13
- ccproxy/auth/openai/credentials.py +0 -166
- ccproxy/auth/openai/oauth_client.py +0 -334
- ccproxy/auth/openai/storage.py +0 -184
- ccproxy/auth/storage/json_file.py +0 -158
- ccproxy/auth/storage/keyring.py +0 -189
- ccproxy/claude_sdk/__init__.py +0 -18
- ccproxy/claude_sdk/options.py +0 -194
- ccproxy/claude_sdk/session_pool.py +0 -550
- ccproxy/cli/docker/__init__.py +0 -34
- ccproxy/cli/docker/adapter_factory.py +0 -157
- ccproxy/cli/docker/params.py +0 -274
- ccproxy/config/auth.py +0 -153
- ccproxy/config/claude.py +0 -348
- ccproxy/config/cors.py +0 -79
- ccproxy/config/discovery.py +0 -95
- ccproxy/config/docker_settings.py +0 -264
- ccproxy/config/observability.py +0 -158
- ccproxy/config/reverse_proxy.py +0 -31
- ccproxy/config/scheduler.py +0 -108
- ccproxy/config/server.py +0 -86
- ccproxy/config/validators.py +0 -231
- ccproxy/core/codex_transformers.py +0 -389
- ccproxy/core/http.py +0 -328
- ccproxy/core/http_transformers.py +0 -812
- ccproxy/core/proxy.py +0 -143
- ccproxy/core/validators.py +0 -288
- ccproxy/models/errors.py +0 -42
- ccproxy/models/messages.py +0 -269
- ccproxy/models/requests.py +0 -107
- ccproxy/models/responses.py +0 -270
- ccproxy/models/types.py +0 -102
- ccproxy/observability/__init__.py +0 -51
- ccproxy/observability/access_logger.py +0 -457
- ccproxy/observability/sse_events.py +0 -303
- ccproxy/observability/stats_printer.py +0 -753
- ccproxy/observability/storage/__init__.py +0 -1
- ccproxy/observability/storage/duckdb_simple.py +0 -677
- ccproxy/observability/storage/models.py +0 -70
- ccproxy/observability/streaming_response.py +0 -107
- ccproxy/pricing/__init__.py +0 -19
- ccproxy/pricing/loader.py +0 -251
- ccproxy/services/claude_detection_service.py +0 -243
- ccproxy/services/codex_detection_service.py +0 -252
- ccproxy/services/credentials/__init__.py +0 -55
- ccproxy/services/credentials/config.py +0 -105
- ccproxy/services/credentials/manager.py +0 -561
- ccproxy/services/credentials/oauth_client.py +0 -481
- ccproxy/services/proxy_service.py +0 -1827
- ccproxy/static/.keep +0 -0
- ccproxy/utils/cost_calculator.py +0 -210
- ccproxy/utils/disconnection_monitor.py +0 -83
- ccproxy/utils/model_mapping.py +0 -199
- ccproxy/utils/models_provider.py +0 -150
- ccproxy/utils/simple_request_logger.py +0 -284
- ccproxy/utils/streaming_metrics.py +0 -199
- ccproxy_api-0.1.7.dist-info/METADATA +0 -615
- ccproxy_api-0.1.7.dist-info/RECORD +0 -191
- ccproxy_api-0.1.7.dist-info/entry_points.txt +0 -4
- /ccproxy/{api/middleware/auth.py → auth/models/__init__.py} +0 -0
- /ccproxy/{claude_sdk → plugins/claude_sdk}/exceptions.py +0 -0
- /ccproxy/{docker → plugins/docker}/models.py +0 -0
- /ccproxy/{docker → plugins/docker}/protocol.py +0 -0
- /ccproxy/{docker → plugins/docker}/validators.py +0 -0
- /ccproxy/{auth/oauth/storage.py → plugins/permissions/handlers/__init__.py} +0 -0
- /ccproxy/{api → plugins/permissions}/ui/__init__.py +0 -0
- {ccproxy_api-0.1.7.dist-info → ccproxy_api-0.2.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,1056 @@
|
|
|
1
|
+
"""Streaming buffer service for converting streaming requests to non-streaming responses.
|
|
2
|
+
|
|
3
|
+
This service handles the pattern where a non-streaming request needs to be converted
|
|
4
|
+
internally to a streaming request, buffered, and then returned as a non-streaming response.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import contextlib
|
|
8
|
+
import json
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
from typing import TYPE_CHECKING, Any
|
|
11
|
+
|
|
12
|
+
import httpx
|
|
13
|
+
import structlog
|
|
14
|
+
from pydantic import ValidationError
|
|
15
|
+
from starlette.responses import Response
|
|
16
|
+
|
|
17
|
+
from ccproxy.core.plugins.hooks import HookEvent, HookManager
|
|
18
|
+
from ccproxy.core.plugins.hooks.base import HookContext
|
|
19
|
+
from ccproxy.llms.models import openai as openai_models
|
|
20
|
+
from ccproxy.llms.streaming.accumulators import ResponsesAccumulator, StreamAccumulator
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
if TYPE_CHECKING:
|
|
24
|
+
from ccproxy.core.request_context import RequestContext
|
|
25
|
+
from ccproxy.http.pool import HTTPPoolManager
|
|
26
|
+
from ccproxy.services.handler_config import HandlerConfig
|
|
27
|
+
from ccproxy.services.interfaces import IRequestTracer
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
logger = structlog.get_logger(__name__)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
MAX_BODY_LOG_CHARS = 2048
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _stringify_payload(payload: Any) -> tuple[str | None, int, bool]:
|
|
37
|
+
"""Return a safe preview of request or response payloads."""
|
|
38
|
+
|
|
39
|
+
if payload is None:
|
|
40
|
+
return None, 0, False
|
|
41
|
+
|
|
42
|
+
try:
|
|
43
|
+
if isinstance(payload, bytes | bytearray | memoryview):
|
|
44
|
+
text = bytes(payload).decode("utf-8", errors="replace")
|
|
45
|
+
elif isinstance(payload, str):
|
|
46
|
+
text = payload
|
|
47
|
+
else:
|
|
48
|
+
text = json.dumps(payload, ensure_ascii=False)
|
|
49
|
+
except Exception:
|
|
50
|
+
text = str(payload)
|
|
51
|
+
|
|
52
|
+
length = len(text)
|
|
53
|
+
truncated = length > MAX_BODY_LOG_CHARS
|
|
54
|
+
preview = f"{text[:MAX_BODY_LOG_CHARS]}...[truncated]" if truncated else text
|
|
55
|
+
return preview, length, truncated
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class StreamingBufferService:
|
|
59
|
+
"""Service for handling stream-to-buffer conversion.
|
|
60
|
+
|
|
61
|
+
This service orchestrates the conversion of non-streaming requests to streaming
|
|
62
|
+
requests internally, buffers the entire stream response, and converts it back
|
|
63
|
+
to a non-streaming JSON response while maintaining full observability.
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
def __init__(
|
|
67
|
+
self,
|
|
68
|
+
http_client: httpx.AsyncClient,
|
|
69
|
+
request_tracer: "IRequestTracer | None" = None,
|
|
70
|
+
hook_manager: HookManager | None = None,
|
|
71
|
+
http_pool_manager: "HTTPPoolManager | None" = None,
|
|
72
|
+
) -> None:
|
|
73
|
+
"""Initialize the streaming buffer service.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
http_client: HTTP client for making requests
|
|
77
|
+
request_tracer: Optional request tracer for observability
|
|
78
|
+
hook_manager: Optional hook manager for event emission
|
|
79
|
+
http_pool_manager: Optional HTTP pool manager for getting clients on demand
|
|
80
|
+
"""
|
|
81
|
+
self.http_client = http_client
|
|
82
|
+
self.request_tracer = request_tracer
|
|
83
|
+
self.hook_manager = hook_manager
|
|
84
|
+
self._http_pool_manager = http_pool_manager
|
|
85
|
+
|
|
86
|
+
async def _get_http_client(self) -> httpx.AsyncClient:
|
|
87
|
+
"""Get HTTP client, either existing or from pool manager.
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
HTTP client instance
|
|
91
|
+
"""
|
|
92
|
+
# If we have a pool manager, get a fresh client from it
|
|
93
|
+
if self._http_pool_manager is not None:
|
|
94
|
+
return await self._http_pool_manager.get_client()
|
|
95
|
+
|
|
96
|
+
# Fall back to existing client
|
|
97
|
+
return self.http_client
|
|
98
|
+
|
|
99
|
+
async def handle_buffered_streaming_request(
|
|
100
|
+
self,
|
|
101
|
+
method: str,
|
|
102
|
+
url: str,
|
|
103
|
+
headers: dict[str, str],
|
|
104
|
+
body: bytes,
|
|
105
|
+
handler_config: "HandlerConfig",
|
|
106
|
+
request_context: "RequestContext",
|
|
107
|
+
provider_name: str = "unknown",
|
|
108
|
+
) -> Response:
|
|
109
|
+
"""Main orchestration method for stream-to-buffer conversion.
|
|
110
|
+
|
|
111
|
+
This method:
|
|
112
|
+
1. Transforms the request to enable streaming
|
|
113
|
+
2. Makes a streaming request to the provider
|
|
114
|
+
3. Collects and buffers the entire stream
|
|
115
|
+
4. Parses the buffered stream using SSE parser if available
|
|
116
|
+
5. Returns a non-streaming response with proper headers and observability
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
method: HTTP method
|
|
120
|
+
url: Target API URL
|
|
121
|
+
headers: Request headers
|
|
122
|
+
body: Request body
|
|
123
|
+
handler_config: Handler configuration with SSE parser and transformers
|
|
124
|
+
request_context: Request context for observability
|
|
125
|
+
provider_name: Name of the provider for hook events
|
|
126
|
+
|
|
127
|
+
Returns:
|
|
128
|
+
Non-streaming Response with JSON content
|
|
129
|
+
|
|
130
|
+
Raises:
|
|
131
|
+
HTTPException: If streaming fails or parsing fails
|
|
132
|
+
"""
|
|
133
|
+
try:
|
|
134
|
+
request_preview, request_size, request_truncated = _stringify_payload(body)
|
|
135
|
+
logger.info(
|
|
136
|
+
"streaming_buffer_request_received",
|
|
137
|
+
provider=provider_name,
|
|
138
|
+
method=method,
|
|
139
|
+
url=url,
|
|
140
|
+
request_id=getattr(request_context, "request_id", None),
|
|
141
|
+
body_preview=request_preview,
|
|
142
|
+
body_size=request_size,
|
|
143
|
+
body_truncated=request_truncated,
|
|
144
|
+
category="streaming",
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
# Step 1: Transform request to enable streaming
|
|
148
|
+
streaming_body = await self._transform_to_streaming_request(body)
|
|
149
|
+
transformed_preview, transformed_size, transformed_truncated = (
|
|
150
|
+
_stringify_payload(streaming_body)
|
|
151
|
+
)
|
|
152
|
+
logger.info(
|
|
153
|
+
"streaming_buffer_request_transformed",
|
|
154
|
+
provider=provider_name,
|
|
155
|
+
method=method,
|
|
156
|
+
url=url,
|
|
157
|
+
request_id=getattr(request_context, "request_id", None),
|
|
158
|
+
body_preview=transformed_preview,
|
|
159
|
+
body_size=transformed_size,
|
|
160
|
+
body_truncated=transformed_truncated,
|
|
161
|
+
body_changed=streaming_body != body,
|
|
162
|
+
category="streaming",
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
if handler_config.response_adapter:
|
|
166
|
+
logger.info(
|
|
167
|
+
"streaming_buffer_response_adapter_detected",
|
|
168
|
+
provider=provider_name,
|
|
169
|
+
adapter_type=type(handler_config.response_adapter).__name__,
|
|
170
|
+
request_id=getattr(request_context, "request_id", None),
|
|
171
|
+
category="format",
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
# Step 2: Collect and parse the stream
|
|
175
|
+
(
|
|
176
|
+
final_data,
|
|
177
|
+
status_code,
|
|
178
|
+
response_headers,
|
|
179
|
+
) = await self._collect_and_parse_stream(
|
|
180
|
+
method=method,
|
|
181
|
+
url=url,
|
|
182
|
+
headers=headers,
|
|
183
|
+
body=streaming_body,
|
|
184
|
+
handler_config=handler_config,
|
|
185
|
+
request_context=request_context,
|
|
186
|
+
provider_name=provider_name,
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
# Step 3: Build non-streaming response
|
|
190
|
+
return await self._build_non_streaming_response(
|
|
191
|
+
final_data=final_data,
|
|
192
|
+
status_code=status_code,
|
|
193
|
+
response_headers=response_headers,
|
|
194
|
+
request_context=request_context,
|
|
195
|
+
provider_name=provider_name,
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
except Exception as e:
|
|
199
|
+
logger.error(
|
|
200
|
+
"streaming_buffer_service_error",
|
|
201
|
+
method=method,
|
|
202
|
+
url=url,
|
|
203
|
+
error=str(e),
|
|
204
|
+
provider=provider_name,
|
|
205
|
+
request_id=getattr(request_context, "request_id", None),
|
|
206
|
+
exc_info=e,
|
|
207
|
+
)
|
|
208
|
+
# Emit error hook if hook manager is available
|
|
209
|
+
if self.hook_manager:
|
|
210
|
+
try:
|
|
211
|
+
error_context = HookContext(
|
|
212
|
+
event=HookEvent.PROVIDER_ERROR,
|
|
213
|
+
timestamp=datetime.now(),
|
|
214
|
+
provider=provider_name,
|
|
215
|
+
data={
|
|
216
|
+
"url": url,
|
|
217
|
+
"method": method,
|
|
218
|
+
"error": str(e),
|
|
219
|
+
"phase": "streaming_buffer_service",
|
|
220
|
+
},
|
|
221
|
+
metadata={
|
|
222
|
+
"request_id": getattr(request_context, "request_id", None),
|
|
223
|
+
},
|
|
224
|
+
error=e,
|
|
225
|
+
)
|
|
226
|
+
await self.hook_manager.emit_with_context(error_context)
|
|
227
|
+
except Exception as hook_error:
|
|
228
|
+
logger.debug(
|
|
229
|
+
"hook_emission_failed",
|
|
230
|
+
event="PROVIDER_ERROR",
|
|
231
|
+
error=str(hook_error),
|
|
232
|
+
category="hooks",
|
|
233
|
+
)
|
|
234
|
+
raise
|
|
235
|
+
|
|
236
|
+
async def _transform_to_streaming_request(self, body: bytes) -> bytes:
|
|
237
|
+
"""Transform request body to enable streaming.
|
|
238
|
+
|
|
239
|
+
Adds or modifies the 'stream' flag in the request body to enable streaming.
|
|
240
|
+
|
|
241
|
+
Args:
|
|
242
|
+
body: Original request body
|
|
243
|
+
|
|
244
|
+
Returns:
|
|
245
|
+
Modified request body with stream=true
|
|
246
|
+
"""
|
|
247
|
+
if not body:
|
|
248
|
+
# If no body, create minimal streaming request
|
|
249
|
+
return json.dumps({"stream": True}).encode("utf-8")
|
|
250
|
+
|
|
251
|
+
try:
|
|
252
|
+
# Parse existing body
|
|
253
|
+
data = json.loads(body)
|
|
254
|
+
except json.JSONDecodeError:
|
|
255
|
+
logger.warning(
|
|
256
|
+
"failed_to_parse_request_body_for_streaming_transform",
|
|
257
|
+
body_preview=body[:100].decode("utf-8", errors="ignore"),
|
|
258
|
+
)
|
|
259
|
+
# If we can't parse it, wrap it in a streaming request
|
|
260
|
+
return json.dumps({"stream": True}).encode("utf-8")
|
|
261
|
+
|
|
262
|
+
# Ensure stream flag is set to True
|
|
263
|
+
if isinstance(data, dict):
|
|
264
|
+
data["stream"] = True
|
|
265
|
+
else:
|
|
266
|
+
# If data is not a dict, wrap it
|
|
267
|
+
data = {"stream": True, "original_data": data}
|
|
268
|
+
|
|
269
|
+
return json.dumps(data).encode("utf-8")
|
|
270
|
+
|
|
271
|
+
async def _collect_and_parse_stream(
|
|
272
|
+
self,
|
|
273
|
+
method: str,
|
|
274
|
+
url: str,
|
|
275
|
+
headers: dict[str, str],
|
|
276
|
+
body: bytes,
|
|
277
|
+
handler_config: "HandlerConfig",
|
|
278
|
+
request_context: "RequestContext",
|
|
279
|
+
provider_name: str,
|
|
280
|
+
) -> tuple[dict[str, Any] | None, int, dict[str, str]]:
|
|
281
|
+
"""Collect streaming response and parse using SSE parser.
|
|
282
|
+
|
|
283
|
+
Makes a streaming request, buffers all chunks, and applies the SSE parser
|
|
284
|
+
from handler config to extract the final JSON response.
|
|
285
|
+
|
|
286
|
+
Args:
|
|
287
|
+
method: HTTP method
|
|
288
|
+
url: Target URL
|
|
289
|
+
headers: Request headers
|
|
290
|
+
body: Request body with stream=true
|
|
291
|
+
handler_config: Handler configuration with SSE parser
|
|
292
|
+
request_context: Request context for observability
|
|
293
|
+
provider_name: Provider name for hook events
|
|
294
|
+
|
|
295
|
+
Returns:
|
|
296
|
+
Tuple of (parsed_data, status_code, response_headers)
|
|
297
|
+
"""
|
|
298
|
+
request_id = getattr(request_context, "request_id", None)
|
|
299
|
+
|
|
300
|
+
# Prepare extensions for request ID tracking
|
|
301
|
+
extensions = {}
|
|
302
|
+
if request_id:
|
|
303
|
+
extensions["request_id"] = request_id
|
|
304
|
+
|
|
305
|
+
body_preview, body_size, body_truncated = _stringify_payload(body)
|
|
306
|
+
logger.info(
|
|
307
|
+
"streaming_buffer_upstream_request",
|
|
308
|
+
provider=provider_name,
|
|
309
|
+
method=method,
|
|
310
|
+
url=url,
|
|
311
|
+
request_id=request_id,
|
|
312
|
+
body_preview=body_preview,
|
|
313
|
+
body_size=body_size,
|
|
314
|
+
body_truncated=body_truncated,
|
|
315
|
+
category="streaming",
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
# Emit PROVIDER_STREAM_START hook
|
|
319
|
+
if self.hook_manager:
|
|
320
|
+
try:
|
|
321
|
+
stream_start_context = HookContext(
|
|
322
|
+
event=HookEvent.PROVIDER_STREAM_START,
|
|
323
|
+
timestamp=datetime.now(),
|
|
324
|
+
provider=provider_name,
|
|
325
|
+
data={
|
|
326
|
+
"url": url,
|
|
327
|
+
"method": method,
|
|
328
|
+
"headers": dict(headers),
|
|
329
|
+
"request_id": request_id,
|
|
330
|
+
"buffered_mode": True,
|
|
331
|
+
},
|
|
332
|
+
metadata={
|
|
333
|
+
"request_id": request_id,
|
|
334
|
+
},
|
|
335
|
+
)
|
|
336
|
+
await self.hook_manager.emit_with_context(stream_start_context)
|
|
337
|
+
except Exception as e:
|
|
338
|
+
logger.debug(
|
|
339
|
+
"hook_emission_failed",
|
|
340
|
+
event="PROVIDER_STREAM_START",
|
|
341
|
+
error=str(e),
|
|
342
|
+
category="hooks",
|
|
343
|
+
)
|
|
344
|
+
|
|
345
|
+
# Start streaming request and collect all chunks
|
|
346
|
+
chunks: list[bytes] = []
|
|
347
|
+
total_chunks = 0
|
|
348
|
+
total_bytes = 0
|
|
349
|
+
|
|
350
|
+
# Get HTTP client from pool manager if available for hook-enabled client
|
|
351
|
+
http_client = await self._get_http_client()
|
|
352
|
+
|
|
353
|
+
recent_buffer = bytearray()
|
|
354
|
+
completion_detected = False
|
|
355
|
+
|
|
356
|
+
async with http_client.stream(
|
|
357
|
+
method=method,
|
|
358
|
+
url=url,
|
|
359
|
+
headers=headers,
|
|
360
|
+
content=body,
|
|
361
|
+
timeout=httpx.Timeout(300.0),
|
|
362
|
+
extensions=extensions,
|
|
363
|
+
) as response:
|
|
364
|
+
# Store response info
|
|
365
|
+
status_code = response.status_code
|
|
366
|
+
response_headers = dict(response.headers)
|
|
367
|
+
|
|
368
|
+
# If error status, read error body and return it
|
|
369
|
+
if status_code >= 400:
|
|
370
|
+
error_body = await response.aread()
|
|
371
|
+
error_preview, error_size, error_truncated = _stringify_payload(
|
|
372
|
+
error_body
|
|
373
|
+
)
|
|
374
|
+
logger.error(
|
|
375
|
+
"streaming_buffer_upstream_error",
|
|
376
|
+
provider=provider_name,
|
|
377
|
+
method=method,
|
|
378
|
+
url=url,
|
|
379
|
+
status_code=status_code,
|
|
380
|
+
body_preview=error_preview,
|
|
381
|
+
body_size=error_size,
|
|
382
|
+
body_truncated=error_truncated,
|
|
383
|
+
request_id=request_id,
|
|
384
|
+
category="streaming",
|
|
385
|
+
)
|
|
386
|
+
try:
|
|
387
|
+
error_data = json.loads(error_body)
|
|
388
|
+
except json.JSONDecodeError:
|
|
389
|
+
error_data = {"error": error_body.decode("utf-8", errors="ignore")}
|
|
390
|
+
return error_data, status_code, response_headers
|
|
391
|
+
|
|
392
|
+
# Collect all stream chunks
|
|
393
|
+
async for chunk in response.aiter_bytes():
|
|
394
|
+
chunks.append(chunk)
|
|
395
|
+
total_chunks += 1
|
|
396
|
+
total_bytes += len(chunk)
|
|
397
|
+
recent_buffer.extend(chunk)
|
|
398
|
+
if len(recent_buffer) > 8192:
|
|
399
|
+
del recent_buffer[:-8192]
|
|
400
|
+
|
|
401
|
+
# Emit PROVIDER_STREAM_CHUNK hook
|
|
402
|
+
if self.hook_manager:
|
|
403
|
+
try:
|
|
404
|
+
chunk_context = HookContext(
|
|
405
|
+
event=HookEvent.PROVIDER_STREAM_CHUNK,
|
|
406
|
+
timestamp=datetime.now(),
|
|
407
|
+
provider=provider_name,
|
|
408
|
+
data={
|
|
409
|
+
"chunk": chunk,
|
|
410
|
+
"chunk_number": total_chunks,
|
|
411
|
+
"chunk_size": len(chunk),
|
|
412
|
+
"request_id": request_id,
|
|
413
|
+
"buffered_mode": True,
|
|
414
|
+
},
|
|
415
|
+
metadata={"request_id": request_id},
|
|
416
|
+
)
|
|
417
|
+
await self.hook_manager.emit_with_context(chunk_context)
|
|
418
|
+
except Exception as e:
|
|
419
|
+
logger.trace(
|
|
420
|
+
"hook_emission_failed",
|
|
421
|
+
event="PROVIDER_STREAM_CHUNK",
|
|
422
|
+
error=str(e),
|
|
423
|
+
)
|
|
424
|
+
|
|
425
|
+
if not completion_detected and (
|
|
426
|
+
b"response.completed" in recent_buffer
|
|
427
|
+
or b"response.failed" in recent_buffer
|
|
428
|
+
or b"response.incomplete" in recent_buffer
|
|
429
|
+
):
|
|
430
|
+
completion_detected = True
|
|
431
|
+
logger.debug(
|
|
432
|
+
"streaming_buffer_completion_detected",
|
|
433
|
+
provider=provider_name,
|
|
434
|
+
request_id=request_id,
|
|
435
|
+
total_chunks=total_chunks,
|
|
436
|
+
total_bytes=total_bytes,
|
|
437
|
+
category="streaming",
|
|
438
|
+
)
|
|
439
|
+
break
|
|
440
|
+
|
|
441
|
+
logger.info(
|
|
442
|
+
"streaming_buffer_upstream_response",
|
|
443
|
+
provider=provider_name,
|
|
444
|
+
method=method,
|
|
445
|
+
url=url,
|
|
446
|
+
request_id=request_id,
|
|
447
|
+
status_code=status_code,
|
|
448
|
+
total_chunks=total_chunks,
|
|
449
|
+
total_bytes=total_bytes,
|
|
450
|
+
category="streaming",
|
|
451
|
+
)
|
|
452
|
+
|
|
453
|
+
# Emit PROVIDER_STREAM_END hook
|
|
454
|
+
if self.hook_manager:
|
|
455
|
+
try:
|
|
456
|
+
stream_end_context = HookContext(
|
|
457
|
+
event=HookEvent.PROVIDER_STREAM_END,
|
|
458
|
+
timestamp=datetime.now(),
|
|
459
|
+
provider=provider_name,
|
|
460
|
+
data={
|
|
461
|
+
"url": url,
|
|
462
|
+
"method": method,
|
|
463
|
+
"request_id": request_id,
|
|
464
|
+
"total_chunks": total_chunks,
|
|
465
|
+
"total_bytes": total_bytes,
|
|
466
|
+
"buffered_mode": True,
|
|
467
|
+
"upstream_stream_text": b"".join(chunks).decode(
|
|
468
|
+
"utf-8", errors="replace"
|
|
469
|
+
),
|
|
470
|
+
},
|
|
471
|
+
metadata={
|
|
472
|
+
"request_id": request_id,
|
|
473
|
+
},
|
|
474
|
+
)
|
|
475
|
+
await self.hook_manager.emit_with_context(stream_end_context)
|
|
476
|
+
except Exception as e:
|
|
477
|
+
logger.error(
|
|
478
|
+
"hook_emission_failed",
|
|
479
|
+
event="PROVIDER_STREAM_END",
|
|
480
|
+
error=str(e),
|
|
481
|
+
category="hooks",
|
|
482
|
+
exc_info=e,
|
|
483
|
+
)
|
|
484
|
+
|
|
485
|
+
# Update metrics if available
|
|
486
|
+
if hasattr(request_context, "metrics"):
|
|
487
|
+
request_context.metrics["stream_chunks"] = total_chunks
|
|
488
|
+
request_context.metrics["stream_bytes"] = total_bytes
|
|
489
|
+
|
|
490
|
+
# Parse the collected stream using SSE parser if available
|
|
491
|
+
parsed_data = await self._parse_collected_stream(
|
|
492
|
+
chunks=chunks,
|
|
493
|
+
handler_config=handler_config,
|
|
494
|
+
request_context=request_context,
|
|
495
|
+
)
|
|
496
|
+
|
|
497
|
+
if parsed_data is None:
|
|
498
|
+
raise RuntimeError("Parsed streaming response is empty")
|
|
499
|
+
|
|
500
|
+
return parsed_data, status_code, response_headers
|
|
501
|
+
|
|
502
|
+
async def _parse_collected_stream(
|
|
503
|
+
self,
|
|
504
|
+
chunks: list[bytes],
|
|
505
|
+
handler_config: "HandlerConfig",
|
|
506
|
+
request_context: "RequestContext",
|
|
507
|
+
) -> dict[str, Any] | None:
|
|
508
|
+
"""Parse collected stream chunks using the configured SSE parser.
|
|
509
|
+
|
|
510
|
+
Args:
|
|
511
|
+
chunks: Collected stream chunks
|
|
512
|
+
handler_config: Handler configuration with potential SSE parser
|
|
513
|
+
request_context: Request context for logging
|
|
514
|
+
|
|
515
|
+
Returns:
|
|
516
|
+
Parsed final response data or None if parsing fails
|
|
517
|
+
"""
|
|
518
|
+
if not chunks:
|
|
519
|
+
logger.error("no_chunks_collected_for_parsing")
|
|
520
|
+
raise RuntimeError("No streaming chunks were collected")
|
|
521
|
+
|
|
522
|
+
# Combine all chunks into a single string
|
|
523
|
+
full_content = b"".join(chunks).decode("utf-8", errors="replace")
|
|
524
|
+
content_preview, content_size, content_truncated = _stringify_payload(
|
|
525
|
+
full_content
|
|
526
|
+
)
|
|
527
|
+
logger.debug(
|
|
528
|
+
"streaming_buffer_collected_content",
|
|
529
|
+
request_id=getattr(request_context, "request_id", None),
|
|
530
|
+
content_preview=content_preview,
|
|
531
|
+
content_size=content_size,
|
|
532
|
+
content_truncated=content_truncated,
|
|
533
|
+
category="streaming",
|
|
534
|
+
)
|
|
535
|
+
|
|
536
|
+
stream_accumulator: StreamAccumulator | None = None
|
|
537
|
+
accumulator_cls = getattr(request_context, "_tool_accumulator_class", None)
|
|
538
|
+
if callable(accumulator_cls):
|
|
539
|
+
try:
|
|
540
|
+
stream_accumulator = accumulator_cls()
|
|
541
|
+
except Exception as exc: # pragma: no cover - defensive logging
|
|
542
|
+
logger.debug(
|
|
543
|
+
"stream_accumulator_init_failed",
|
|
544
|
+
error=str(exc),
|
|
545
|
+
request_id=getattr(request_context, "request_id", None),
|
|
546
|
+
)
|
|
547
|
+
stream_accumulator = None
|
|
548
|
+
|
|
549
|
+
if stream_accumulator:
|
|
550
|
+
self._accumulate_stream_events(
|
|
551
|
+
full_content, stream_accumulator, request_context
|
|
552
|
+
)
|
|
553
|
+
|
|
554
|
+
# Attempt to reconstruct a Responses API payload from the SSE stream
|
|
555
|
+
payloads = self._extract_sse_payloads(full_content)
|
|
556
|
+
base_response: dict[str, Any] | None = None
|
|
557
|
+
reasoning_signature: str | None = None
|
|
558
|
+
for payload in payloads:
|
|
559
|
+
if not isinstance(payload, dict):
|
|
560
|
+
continue
|
|
561
|
+
event_type = payload.get("type")
|
|
562
|
+
if isinstance(event_type, str) and stream_accumulator is not None:
|
|
563
|
+
with contextlib.suppress(Exception):
|
|
564
|
+
stream_accumulator.accumulate(event_type, payload)
|
|
565
|
+
if event_type == "response.reasoning_summary_part.added":
|
|
566
|
+
part = payload.get("part")
|
|
567
|
+
if isinstance(part, dict):
|
|
568
|
+
signature = part.get("text") or part.get("signature")
|
|
569
|
+
if isinstance(signature, str):
|
|
570
|
+
reasoning_signature = signature
|
|
571
|
+
if isinstance(payload.get("response"), dict):
|
|
572
|
+
base_response = payload["response"]
|
|
573
|
+
|
|
574
|
+
if base_response is None and payloads:
|
|
575
|
+
# Fallback to first response created event
|
|
576
|
+
for payload in payloads:
|
|
577
|
+
resp = payload.get("response") if isinstance(payload, dict) else None
|
|
578
|
+
if isinstance(resp, dict):
|
|
579
|
+
base_response = resp
|
|
580
|
+
break
|
|
581
|
+
|
|
582
|
+
if base_response is not None:
|
|
583
|
+
response_obj = dict(base_response)
|
|
584
|
+
response_obj.setdefault("created_at", 0)
|
|
585
|
+
response_obj.setdefault("status", "completed")
|
|
586
|
+
response_obj.setdefault("model", response_obj.get("model") or "")
|
|
587
|
+
response_obj.setdefault("output", response_obj.get("output") or {})
|
|
588
|
+
response_obj.setdefault(
|
|
589
|
+
"parallel_tool_calls", response_obj.get("parallel_tool_calls", False)
|
|
590
|
+
)
|
|
591
|
+
|
|
592
|
+
if reasoning_signature and isinstance(response_obj.get("reasoning"), dict):
|
|
593
|
+
response_obj["reasoning"].setdefault("summary", [])
|
|
594
|
+
|
|
595
|
+
accumulator_for_rebuild: ResponsesAccumulator | None = None
|
|
596
|
+
if isinstance(stream_accumulator, ResponsesAccumulator):
|
|
597
|
+
accumulator_for_rebuild = stream_accumulator
|
|
598
|
+
else:
|
|
599
|
+
accumulator_for_rebuild = ResponsesAccumulator()
|
|
600
|
+
for payload in payloads:
|
|
601
|
+
if not isinstance(payload, dict):
|
|
602
|
+
continue
|
|
603
|
+
event_type = payload.get("type")
|
|
604
|
+
if isinstance(event_type, str):
|
|
605
|
+
with contextlib.suppress(Exception):
|
|
606
|
+
accumulator_for_rebuild.accumulate(event_type, payload)
|
|
607
|
+
|
|
608
|
+
if accumulator_for_rebuild is not None:
|
|
609
|
+
completed_payload = accumulator_for_rebuild.get_completed_response()
|
|
610
|
+
logger.debug(
|
|
611
|
+
"streaming_buffer_accumulator_rebuild_attempt",
|
|
612
|
+
completed=bool(completed_payload),
|
|
613
|
+
)
|
|
614
|
+
if completed_payload is not None:
|
|
615
|
+
response_obj = completed_payload
|
|
616
|
+
return response_obj
|
|
617
|
+
try:
|
|
618
|
+
response_obj = accumulator_for_rebuild.rebuild_response_object(
|
|
619
|
+
response_obj
|
|
620
|
+
)
|
|
621
|
+
logger.info(
|
|
622
|
+
"streaming_buffer_parser_strategy",
|
|
623
|
+
strategy="accumulator_rebuild",
|
|
624
|
+
request_id=getattr(request_context, "request_id", None),
|
|
625
|
+
category="streaming",
|
|
626
|
+
)
|
|
627
|
+
with contextlib.suppress(ValidationError):
|
|
628
|
+
typed_payload = openai_models.ResponseObject.model_validate(
|
|
629
|
+
response_obj
|
|
630
|
+
)
|
|
631
|
+
logger.debug(
|
|
632
|
+
"streaming_buffer_rebuilt_response",
|
|
633
|
+
response=typed_payload.model_dump(),
|
|
634
|
+
category="streaming",
|
|
635
|
+
request_id=getattr(request_context, "request_id", None),
|
|
636
|
+
)
|
|
637
|
+
except Exception as exc: # pragma: no cover - defensive logging
|
|
638
|
+
logger.debug(
|
|
639
|
+
"response_rebuild_failed",
|
|
640
|
+
error=str(exc),
|
|
641
|
+
request_id=getattr(request_context, "request_id", None),
|
|
642
|
+
)
|
|
643
|
+
|
|
644
|
+
if not response_obj.get("usage"):
|
|
645
|
+
usage = self._extract_usage_from_chunks(chunks)
|
|
646
|
+
if usage:
|
|
647
|
+
response_obj["usage"] = {
|
|
648
|
+
"input_tokens": usage.get("input_tokens", 0),
|
|
649
|
+
"input_tokens_details": {"cached_tokens": 0},
|
|
650
|
+
"output_tokens": usage.get("output_tokens", 0),
|
|
651
|
+
"output_tokens_details": {"reasoning_tokens": 0},
|
|
652
|
+
"total_tokens": usage.get("total_tokens", 0),
|
|
653
|
+
}
|
|
654
|
+
|
|
655
|
+
return response_obj
|
|
656
|
+
|
|
657
|
+
# Try using the configured SSE parser first
|
|
658
|
+
logger.debug(
|
|
659
|
+
"parsing_collected_stream",
|
|
660
|
+
content_preview=full_content[:200],
|
|
661
|
+
request_id=getattr(request_context, "request_id", None),
|
|
662
|
+
)
|
|
663
|
+
|
|
664
|
+
if handler_config.sse_parser:
|
|
665
|
+
try:
|
|
666
|
+
parsed_data = handler_config.sse_parser(full_content)
|
|
667
|
+
if parsed_data is not None:
|
|
668
|
+
logger.debug(
|
|
669
|
+
"sse_parser_success",
|
|
670
|
+
parsed_type=type(parsed_data).__name__,
|
|
671
|
+
request_id=getattr(request_context, "request_id", None),
|
|
672
|
+
)
|
|
673
|
+
logger.info(
|
|
674
|
+
"streaming_buffer_parser_strategy",
|
|
675
|
+
strategy="sse_parser",
|
|
676
|
+
request_id=getattr(request_context, "request_id", None),
|
|
677
|
+
category="streaming",
|
|
678
|
+
)
|
|
679
|
+
|
|
680
|
+
# Rebuild response with stream accumulator if available
|
|
681
|
+
if stream_accumulator and isinstance(parsed_data, dict):
|
|
682
|
+
try:
|
|
683
|
+
parsed_data = stream_accumulator.rebuild_response_object(
|
|
684
|
+
parsed_data
|
|
685
|
+
)
|
|
686
|
+
logger.debug(
|
|
687
|
+
"response_object_rebuilt",
|
|
688
|
+
request_id=getattr(request_context, "request_id", None),
|
|
689
|
+
)
|
|
690
|
+
except Exception as e:
|
|
691
|
+
logger.warning(
|
|
692
|
+
"response_rebuild_failed",
|
|
693
|
+
error=str(e),
|
|
694
|
+
request_id=getattr(request_context, "request_id", None),
|
|
695
|
+
exc_info=e,
|
|
696
|
+
)
|
|
697
|
+
|
|
698
|
+
return parsed_data
|
|
699
|
+
else:
|
|
700
|
+
logger.warning(
|
|
701
|
+
"sse_parser_returned_none",
|
|
702
|
+
content_preview=full_content[:200],
|
|
703
|
+
request_id=getattr(request_context, "request_id", None),
|
|
704
|
+
)
|
|
705
|
+
except Exception as e:
|
|
706
|
+
logger.warning(
|
|
707
|
+
"sse_parser_failed",
|
|
708
|
+
error=str(e),
|
|
709
|
+
content_preview=full_content[:200],
|
|
710
|
+
request_id=getattr(request_context, "request_id", None),
|
|
711
|
+
)
|
|
712
|
+
|
|
713
|
+
# Fallback: try to parse as JSON if it's not SSE format
|
|
714
|
+
try:
|
|
715
|
+
parsed_json = json.loads(full_content.strip())
|
|
716
|
+
if isinstance(parsed_json, dict):
|
|
717
|
+
logger.info(
|
|
718
|
+
"streaming_buffer_parser_strategy",
|
|
719
|
+
strategy="direct_json",
|
|
720
|
+
request_id=getattr(request_context, "request_id", None),
|
|
721
|
+
category="streaming",
|
|
722
|
+
)
|
|
723
|
+
return parsed_json
|
|
724
|
+
else:
|
|
725
|
+
# If it's not a dict, wrap it
|
|
726
|
+
logger.info(
|
|
727
|
+
"streaming_buffer_parser_strategy",
|
|
728
|
+
strategy="direct_json_wrapped",
|
|
729
|
+
request_id=getattr(request_context, "request_id", None),
|
|
730
|
+
category="streaming",
|
|
731
|
+
)
|
|
732
|
+
return {"data": parsed_json}
|
|
733
|
+
except json.JSONDecodeError:
|
|
734
|
+
pass
|
|
735
|
+
|
|
736
|
+
# Fallback: try to extract from generic SSE format
|
|
737
|
+
try:
|
|
738
|
+
parsed_data = self._extract_from_generic_sse(full_content)
|
|
739
|
+
if parsed_data is not None:
|
|
740
|
+
logger.info(
|
|
741
|
+
"streaming_buffer_parser_strategy",
|
|
742
|
+
strategy="generic_sse",
|
|
743
|
+
request_id=getattr(request_context, "request_id", None),
|
|
744
|
+
category="streaming",
|
|
745
|
+
)
|
|
746
|
+
return parsed_data
|
|
747
|
+
except Exception as e:
|
|
748
|
+
logger.debug(
|
|
749
|
+
"generic_sse_parsing_failed",
|
|
750
|
+
error=str(e),
|
|
751
|
+
request_id=getattr(request_context, "request_id", None),
|
|
752
|
+
)
|
|
753
|
+
|
|
754
|
+
logger.error(
|
|
755
|
+
"stream_parsing_failed",
|
|
756
|
+
content_preview=full_content[:200],
|
|
757
|
+
request_id=getattr(request_context, "request_id", None),
|
|
758
|
+
category="streaming",
|
|
759
|
+
)
|
|
760
|
+
raise RuntimeError("Failed to parse streaming response")
|
|
761
|
+
|
|
762
|
+
@staticmethod
|
|
763
|
+
def _extract_sse_payloads(content: str) -> list[dict[str, Any]]:
|
|
764
|
+
"""Extract JSON payloads from a raw SSE buffer."""
|
|
765
|
+
|
|
766
|
+
payloads: list[dict[str, Any]] = []
|
|
767
|
+
current: list[str] = []
|
|
768
|
+
for line in content.splitlines():
|
|
769
|
+
if line.startswith("data: "):
|
|
770
|
+
current.append(line[6:])
|
|
771
|
+
elif line.strip() == "" and current:
|
|
772
|
+
payload = "".join(current)
|
|
773
|
+
if payload and payload != "[DONE]":
|
|
774
|
+
with contextlib.suppress(json.JSONDecodeError):
|
|
775
|
+
payloads.append(json.loads(payload))
|
|
776
|
+
current = []
|
|
777
|
+
if current:
|
|
778
|
+
payload = "".join(current)
|
|
779
|
+
if payload and payload != "[DONE]":
|
|
780
|
+
with contextlib.suppress(json.JSONDecodeError):
|
|
781
|
+
payloads.append(json.loads(payload))
|
|
782
|
+
return payloads
|
|
783
|
+
|
|
784
|
+
def _extract_from_generic_sse(self, content: str) -> dict[str, Any] | None:
|
|
785
|
+
"""Extract final JSON from generic SSE format.
|
|
786
|
+
|
|
787
|
+
This is a fallback parser that tries to extract JSON from common SSE patterns.
|
|
788
|
+
|
|
789
|
+
Args:
|
|
790
|
+
content: Full SSE content
|
|
791
|
+
|
|
792
|
+
Returns:
|
|
793
|
+
Extracted JSON data or None if not found
|
|
794
|
+
"""
|
|
795
|
+
lines = content.strip().split("\n")
|
|
796
|
+
last_json_data = None
|
|
797
|
+
|
|
798
|
+
for line in lines:
|
|
799
|
+
line = line.strip()
|
|
800
|
+
|
|
801
|
+
# Look for data lines
|
|
802
|
+
if line.startswith("data: "):
|
|
803
|
+
data_str = line[6:].strip()
|
|
804
|
+
|
|
805
|
+
# Skip [DONE] markers
|
|
806
|
+
if data_str == "[DONE]":
|
|
807
|
+
continue
|
|
808
|
+
|
|
809
|
+
try:
|
|
810
|
+
json_data = json.loads(data_str)
|
|
811
|
+
# Keep track of the last valid JSON we find
|
|
812
|
+
last_json_data = json_data
|
|
813
|
+
except json.JSONDecodeError:
|
|
814
|
+
continue
|
|
815
|
+
|
|
816
|
+
if isinstance(last_json_data, dict) and "response" in last_json_data:
|
|
817
|
+
response_payload = last_json_data["response"]
|
|
818
|
+
if isinstance(response_payload, dict):
|
|
819
|
+
return response_payload
|
|
820
|
+
|
|
821
|
+
if isinstance(last_json_data, dict):
|
|
822
|
+
return last_json_data
|
|
823
|
+
|
|
824
|
+
return None
|
|
825
|
+
|
|
826
|
+
@staticmethod
|
|
827
|
+
def _accumulate_stream_events(
|
|
828
|
+
full_content: str,
|
|
829
|
+
accumulator: StreamAccumulator,
|
|
830
|
+
request_context: "RequestContext",
|
|
831
|
+
) -> None:
|
|
832
|
+
"""Feed SSE events from the buffered content into the stream accumulator."""
|
|
833
|
+
|
|
834
|
+
events = full_content.split("\n\n")
|
|
835
|
+
for event in events:
|
|
836
|
+
event = event.strip()
|
|
837
|
+
if not event:
|
|
838
|
+
continue
|
|
839
|
+
|
|
840
|
+
event_name = ""
|
|
841
|
+
data_lines: list[str] = []
|
|
842
|
+
for raw_line in event.split("\n"):
|
|
843
|
+
line = raw_line.strip()
|
|
844
|
+
if line.startswith("event:"):
|
|
845
|
+
event_name = line[6:].strip()
|
|
846
|
+
elif line.startswith("data:"):
|
|
847
|
+
payload = line[5:].lstrip()
|
|
848
|
+
if payload == "[DONE]":
|
|
849
|
+
data_lines = []
|
|
850
|
+
break
|
|
851
|
+
data_lines.append(payload)
|
|
852
|
+
|
|
853
|
+
if not data_lines:
|
|
854
|
+
continue
|
|
855
|
+
|
|
856
|
+
try:
|
|
857
|
+
event_data = json.loads("\n".join(data_lines))
|
|
858
|
+
except json.JSONDecodeError:
|
|
859
|
+
continue
|
|
860
|
+
|
|
861
|
+
try:
|
|
862
|
+
accumulator.accumulate(event_name, event_data)
|
|
863
|
+
except Exception as exc: # pragma: no cover - defensive logging
|
|
864
|
+
logger.debug(
|
|
865
|
+
"tool_accumulator_accumulate_failed",
|
|
866
|
+
error=str(exc),
|
|
867
|
+
event_name=event_name,
|
|
868
|
+
request_id=getattr(request_context, "request_id", None),
|
|
869
|
+
)
|
|
870
|
+
|
|
871
|
+
try:
|
|
872
|
+
# Store tool calls in request context metadata
|
|
873
|
+
tool_calls = accumulator.get_complete_tool_calls()
|
|
874
|
+
if tool_calls:
|
|
875
|
+
existing = request_context.metadata.get("tool_calls")
|
|
876
|
+
if isinstance(existing, list):
|
|
877
|
+
existing.extend(tool_calls)
|
|
878
|
+
else:
|
|
879
|
+
request_context.metadata["tool_calls"] = tool_calls
|
|
880
|
+
|
|
881
|
+
# Also store the accumulator itself for potential later use
|
|
882
|
+
request_context.metadata["stream_accumulator"] = accumulator
|
|
883
|
+
except Exception as exc: # pragma: no cover - defensive logging
|
|
884
|
+
logger.debug(
|
|
885
|
+
"tool_accumulator_finalize_failed",
|
|
886
|
+
error=str(exc),
|
|
887
|
+
request_id=getattr(request_context, "request_id", None),
|
|
888
|
+
)
|
|
889
|
+
|
|
890
|
+
def _extract_usage_from_chunks(self, chunks: list[bytes]) -> dict[str, int] | None:
|
|
891
|
+
"""Extract token usage from SSE chunks and normalize to Response API shape.
|
|
892
|
+
|
|
893
|
+
Tries to find the last JSON object containing a "usage" field and returns a
|
|
894
|
+
dict with keys: input_tokens, output_tokens, total_tokens.
|
|
895
|
+
"""
|
|
896
|
+
last_usage: dict[str, Any] | None = None
|
|
897
|
+
for chunk in chunks:
|
|
898
|
+
try:
|
|
899
|
+
text = chunk.decode("utf-8", errors="ignore")
|
|
900
|
+
except Exception:
|
|
901
|
+
continue
|
|
902
|
+
for part in text.split("\n\n"):
|
|
903
|
+
for line in part.splitlines():
|
|
904
|
+
line = line.strip()
|
|
905
|
+
if not line.startswith("data: "):
|
|
906
|
+
continue
|
|
907
|
+
data_str = line[6:].strip()
|
|
908
|
+
if data_str == "[DONE]":
|
|
909
|
+
continue
|
|
910
|
+
try:
|
|
911
|
+
obj = json.loads(data_str)
|
|
912
|
+
except json.JSONDecodeError:
|
|
913
|
+
continue
|
|
914
|
+
# Accept direct usage at top-level or nested
|
|
915
|
+
usage_obj = None
|
|
916
|
+
if isinstance(obj, dict) and "usage" in obj:
|
|
917
|
+
usage_obj = obj["usage"]
|
|
918
|
+
elif (
|
|
919
|
+
isinstance(obj, dict)
|
|
920
|
+
and "response" in obj
|
|
921
|
+
and isinstance(obj["response"], dict)
|
|
922
|
+
):
|
|
923
|
+
# Some formats nest usage under response
|
|
924
|
+
usage_obj = obj["response"].get("usage")
|
|
925
|
+
if isinstance(usage_obj, dict):
|
|
926
|
+
last_usage = usage_obj
|
|
927
|
+
|
|
928
|
+
if not isinstance(last_usage, dict):
|
|
929
|
+
return None
|
|
930
|
+
|
|
931
|
+
# Normalize keys
|
|
932
|
+
input_tokens = None
|
|
933
|
+
output_tokens = None
|
|
934
|
+
total_tokens = None
|
|
935
|
+
|
|
936
|
+
if "input_tokens" in last_usage or "output_tokens" in last_usage:
|
|
937
|
+
input_tokens = int(last_usage.get("input_tokens", 0) or 0)
|
|
938
|
+
output_tokens = int(last_usage.get("output_tokens", 0) or 0)
|
|
939
|
+
total_tokens = int(
|
|
940
|
+
last_usage.get("total_tokens", input_tokens + output_tokens)
|
|
941
|
+
)
|
|
942
|
+
elif "prompt_tokens" in last_usage or "completion_tokens" in last_usage:
|
|
943
|
+
# Map OpenAI-style to Response API style
|
|
944
|
+
input_tokens = int(last_usage.get("prompt_tokens", 0) or 0)
|
|
945
|
+
output_tokens = int(last_usage.get("completion_tokens", 0) or 0)
|
|
946
|
+
total_tokens = int(
|
|
947
|
+
last_usage.get("total_tokens", input_tokens + output_tokens)
|
|
948
|
+
)
|
|
949
|
+
else:
|
|
950
|
+
return None
|
|
951
|
+
|
|
952
|
+
return {
|
|
953
|
+
"input_tokens": input_tokens or 0,
|
|
954
|
+
"output_tokens": output_tokens or 0,
|
|
955
|
+
"total_tokens": total_tokens
|
|
956
|
+
or ((input_tokens or 0) + (output_tokens or 0)),
|
|
957
|
+
}
|
|
958
|
+
|
|
959
|
+
async def _build_non_streaming_response(
|
|
960
|
+
self,
|
|
961
|
+
final_data: dict[str, Any] | None,
|
|
962
|
+
status_code: int,
|
|
963
|
+
response_headers: dict[str, str],
|
|
964
|
+
request_context: "RequestContext",
|
|
965
|
+
provider_name: str,
|
|
966
|
+
) -> Response:
|
|
967
|
+
"""Build the final non-streaming response.
|
|
968
|
+
|
|
969
|
+
Creates a standard Response object with the parsed JSON data and appropriate headers.
|
|
970
|
+
|
|
971
|
+
Args:
|
|
972
|
+
final_data: Parsed response data
|
|
973
|
+
status_code: HTTP status code from streaming response
|
|
974
|
+
response_headers: Headers from streaming response
|
|
975
|
+
request_context: Request context for request ID
|
|
976
|
+
|
|
977
|
+
Returns:
|
|
978
|
+
Non-streaming Response with JSON content
|
|
979
|
+
"""
|
|
980
|
+
# Prepare response content
|
|
981
|
+
if final_data is None:
|
|
982
|
+
logger.error(
|
|
983
|
+
"streaming_buffer_empty_final_data",
|
|
984
|
+
provider=provider_name,
|
|
985
|
+
request_id=getattr(request_context, "request_id", None),
|
|
986
|
+
category="streaming",
|
|
987
|
+
)
|
|
988
|
+
raise RuntimeError("No data could be extracted from streaming response")
|
|
989
|
+
|
|
990
|
+
response_content = json.dumps(final_data).encode("utf-8")
|
|
991
|
+
response_preview, response_size, response_truncated = _stringify_payload(
|
|
992
|
+
final_data
|
|
993
|
+
)
|
|
994
|
+
|
|
995
|
+
# Prepare response headers
|
|
996
|
+
final_headers = {}
|
|
997
|
+
|
|
998
|
+
# Copy relevant headers from streaming response
|
|
999
|
+
for key, value in response_headers.items():
|
|
1000
|
+
# Skip streaming-specific headers and content-length
|
|
1001
|
+
if key.lower() not in {
|
|
1002
|
+
"transfer-encoding",
|
|
1003
|
+
"connection",
|
|
1004
|
+
"cache-control",
|
|
1005
|
+
"content-length",
|
|
1006
|
+
}:
|
|
1007
|
+
final_headers[key] = value
|
|
1008
|
+
|
|
1009
|
+
# Set appropriate headers for JSON response
|
|
1010
|
+
# Note: Don't set Content-Length as the response may be wrapped by streaming middleware
|
|
1011
|
+
final_headers.update(
|
|
1012
|
+
{
|
|
1013
|
+
"Content-Type": "application/json",
|
|
1014
|
+
}
|
|
1015
|
+
)
|
|
1016
|
+
|
|
1017
|
+
# Add request ID if available
|
|
1018
|
+
request_id = getattr(request_context, "request_id", None)
|
|
1019
|
+
if request_id:
|
|
1020
|
+
final_headers["X-Request-ID"] = request_id
|
|
1021
|
+
|
|
1022
|
+
logger.debug(
|
|
1023
|
+
"non_streaming_response_built",
|
|
1024
|
+
status_code=status_code,
|
|
1025
|
+
content_length=len(response_content),
|
|
1026
|
+
data_keys=list(final_data.keys()) if isinstance(final_data, dict) else None,
|
|
1027
|
+
request_id=request_id,
|
|
1028
|
+
)
|
|
1029
|
+
|
|
1030
|
+
logger.info(
|
|
1031
|
+
"streaming_buffer_response_ready",
|
|
1032
|
+
provider=provider_name,
|
|
1033
|
+
status_code=status_code,
|
|
1034
|
+
request_id=request_id,
|
|
1035
|
+
body_preview=response_preview,
|
|
1036
|
+
body_size=response_size,
|
|
1037
|
+
body_truncated=response_truncated,
|
|
1038
|
+
category="streaming",
|
|
1039
|
+
)
|
|
1040
|
+
|
|
1041
|
+
# Create response - Starlette will automatically add Content-Length
|
|
1042
|
+
response = Response(
|
|
1043
|
+
content=response_content,
|
|
1044
|
+
status_code=status_code,
|
|
1045
|
+
headers=final_headers,
|
|
1046
|
+
media_type="application/json",
|
|
1047
|
+
)
|
|
1048
|
+
|
|
1049
|
+
# Explicitly remove content-length header to avoid conflicts with middleware conversion
|
|
1050
|
+
# This follows the same pattern as the main branch for streaming response handling
|
|
1051
|
+
if "content-length" in response.headers:
|
|
1052
|
+
del response.headers["content-length"]
|
|
1053
|
+
if "Content-Length" in response.headers:
|
|
1054
|
+
del response.headers["Content-Length"]
|
|
1055
|
+
|
|
1056
|
+
return response
|