ccproxy-api 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. ccproxy/__init__.py +4 -0
  2. ccproxy/__main__.py +7 -0
  3. ccproxy/_version.py +21 -0
  4. ccproxy/adapters/__init__.py +11 -0
  5. ccproxy/adapters/base.py +80 -0
  6. ccproxy/adapters/openai/__init__.py +43 -0
  7. ccproxy/adapters/openai/adapter.py +915 -0
  8. ccproxy/adapters/openai/models.py +412 -0
  9. ccproxy/adapters/openai/streaming.py +449 -0
  10. ccproxy/api/__init__.py +28 -0
  11. ccproxy/api/app.py +225 -0
  12. ccproxy/api/dependencies.py +140 -0
  13. ccproxy/api/middleware/__init__.py +11 -0
  14. ccproxy/api/middleware/auth.py +0 -0
  15. ccproxy/api/middleware/cors.py +55 -0
  16. ccproxy/api/middleware/errors.py +703 -0
  17. ccproxy/api/middleware/headers.py +51 -0
  18. ccproxy/api/middleware/logging.py +175 -0
  19. ccproxy/api/middleware/request_id.py +69 -0
  20. ccproxy/api/middleware/server_header.py +62 -0
  21. ccproxy/api/responses.py +84 -0
  22. ccproxy/api/routes/__init__.py +16 -0
  23. ccproxy/api/routes/claude.py +181 -0
  24. ccproxy/api/routes/health.py +489 -0
  25. ccproxy/api/routes/metrics.py +1033 -0
  26. ccproxy/api/routes/proxy.py +238 -0
  27. ccproxy/auth/__init__.py +75 -0
  28. ccproxy/auth/bearer.py +68 -0
  29. ccproxy/auth/credentials_adapter.py +93 -0
  30. ccproxy/auth/dependencies.py +229 -0
  31. ccproxy/auth/exceptions.py +79 -0
  32. ccproxy/auth/manager.py +102 -0
  33. ccproxy/auth/models.py +118 -0
  34. ccproxy/auth/oauth/__init__.py +26 -0
  35. ccproxy/auth/oauth/models.py +49 -0
  36. ccproxy/auth/oauth/routes.py +396 -0
  37. ccproxy/auth/oauth/storage.py +0 -0
  38. ccproxy/auth/storage/__init__.py +12 -0
  39. ccproxy/auth/storage/base.py +57 -0
  40. ccproxy/auth/storage/json_file.py +159 -0
  41. ccproxy/auth/storage/keyring.py +192 -0
  42. ccproxy/claude_sdk/__init__.py +20 -0
  43. ccproxy/claude_sdk/client.py +169 -0
  44. ccproxy/claude_sdk/converter.py +331 -0
  45. ccproxy/claude_sdk/options.py +120 -0
  46. ccproxy/cli/__init__.py +14 -0
  47. ccproxy/cli/commands/__init__.py +8 -0
  48. ccproxy/cli/commands/auth.py +553 -0
  49. ccproxy/cli/commands/config/__init__.py +14 -0
  50. ccproxy/cli/commands/config/commands.py +766 -0
  51. ccproxy/cli/commands/config/schema_commands.py +119 -0
  52. ccproxy/cli/commands/serve.py +630 -0
  53. ccproxy/cli/docker/__init__.py +34 -0
  54. ccproxy/cli/docker/adapter_factory.py +157 -0
  55. ccproxy/cli/docker/params.py +278 -0
  56. ccproxy/cli/helpers.py +144 -0
  57. ccproxy/cli/main.py +193 -0
  58. ccproxy/cli/options/__init__.py +14 -0
  59. ccproxy/cli/options/claude_options.py +216 -0
  60. ccproxy/cli/options/core_options.py +40 -0
  61. ccproxy/cli/options/security_options.py +48 -0
  62. ccproxy/cli/options/server_options.py +117 -0
  63. ccproxy/config/__init__.py +40 -0
  64. ccproxy/config/auth.py +154 -0
  65. ccproxy/config/claude.py +124 -0
  66. ccproxy/config/cors.py +79 -0
  67. ccproxy/config/discovery.py +87 -0
  68. ccproxy/config/docker_settings.py +265 -0
  69. ccproxy/config/loader.py +108 -0
  70. ccproxy/config/observability.py +158 -0
  71. ccproxy/config/pricing.py +88 -0
  72. ccproxy/config/reverse_proxy.py +31 -0
  73. ccproxy/config/scheduler.py +89 -0
  74. ccproxy/config/security.py +14 -0
  75. ccproxy/config/server.py +81 -0
  76. ccproxy/config/settings.py +534 -0
  77. ccproxy/config/validators.py +231 -0
  78. ccproxy/core/__init__.py +274 -0
  79. ccproxy/core/async_utils.py +675 -0
  80. ccproxy/core/constants.py +97 -0
  81. ccproxy/core/errors.py +256 -0
  82. ccproxy/core/http.py +328 -0
  83. ccproxy/core/http_transformers.py +428 -0
  84. ccproxy/core/interfaces.py +247 -0
  85. ccproxy/core/logging.py +189 -0
  86. ccproxy/core/middleware.py +114 -0
  87. ccproxy/core/proxy.py +143 -0
  88. ccproxy/core/system.py +38 -0
  89. ccproxy/core/transformers.py +259 -0
  90. ccproxy/core/types.py +129 -0
  91. ccproxy/core/validators.py +288 -0
  92. ccproxy/docker/__init__.py +67 -0
  93. ccproxy/docker/adapter.py +588 -0
  94. ccproxy/docker/docker_path.py +207 -0
  95. ccproxy/docker/middleware.py +103 -0
  96. ccproxy/docker/models.py +228 -0
  97. ccproxy/docker/protocol.py +192 -0
  98. ccproxy/docker/stream_process.py +264 -0
  99. ccproxy/docker/validators.py +173 -0
  100. ccproxy/models/__init__.py +123 -0
  101. ccproxy/models/errors.py +42 -0
  102. ccproxy/models/messages.py +243 -0
  103. ccproxy/models/requests.py +85 -0
  104. ccproxy/models/responses.py +227 -0
  105. ccproxy/models/types.py +102 -0
  106. ccproxy/observability/__init__.py +51 -0
  107. ccproxy/observability/access_logger.py +400 -0
  108. ccproxy/observability/context.py +447 -0
  109. ccproxy/observability/metrics.py +539 -0
  110. ccproxy/observability/pushgateway.py +366 -0
  111. ccproxy/observability/sse_events.py +303 -0
  112. ccproxy/observability/stats_printer.py +755 -0
  113. ccproxy/observability/storage/__init__.py +1 -0
  114. ccproxy/observability/storage/duckdb_simple.py +665 -0
  115. ccproxy/observability/storage/models.py +55 -0
  116. ccproxy/pricing/__init__.py +19 -0
  117. ccproxy/pricing/cache.py +212 -0
  118. ccproxy/pricing/loader.py +267 -0
  119. ccproxy/pricing/models.py +106 -0
  120. ccproxy/pricing/updater.py +309 -0
  121. ccproxy/scheduler/__init__.py +39 -0
  122. ccproxy/scheduler/core.py +335 -0
  123. ccproxy/scheduler/exceptions.py +34 -0
  124. ccproxy/scheduler/manager.py +186 -0
  125. ccproxy/scheduler/registry.py +150 -0
  126. ccproxy/scheduler/tasks.py +484 -0
  127. ccproxy/services/__init__.py +10 -0
  128. ccproxy/services/claude_sdk_service.py +614 -0
  129. ccproxy/services/credentials/__init__.py +55 -0
  130. ccproxy/services/credentials/config.py +105 -0
  131. ccproxy/services/credentials/manager.py +562 -0
  132. ccproxy/services/credentials/oauth_client.py +482 -0
  133. ccproxy/services/proxy_service.py +1536 -0
  134. ccproxy/static/.keep +0 -0
  135. ccproxy/testing/__init__.py +34 -0
  136. ccproxy/testing/config.py +148 -0
  137. ccproxy/testing/content_generation.py +197 -0
  138. ccproxy/testing/mock_responses.py +262 -0
  139. ccproxy/testing/response_handlers.py +161 -0
  140. ccproxy/testing/scenarios.py +241 -0
  141. ccproxy/utils/__init__.py +6 -0
  142. ccproxy/utils/cost_calculator.py +210 -0
  143. ccproxy/utils/streaming_metrics.py +199 -0
  144. ccproxy_api-0.1.0.dist-info/METADATA +253 -0
  145. ccproxy_api-0.1.0.dist-info/RECORD +148 -0
  146. ccproxy_api-0.1.0.dist-info/WHEEL +4 -0
  147. ccproxy_api-0.1.0.dist-info/entry_points.txt +2 -0
  148. ccproxy_api-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,1536 @@
1
+ """Proxy service for orchestrating Claude API requests with business logic."""
2
+
3
+ import asyncio
4
+ import json
5
+ import logging
6
+ import os
7
+ import random
8
+ import time
9
+ import urllib.parse
10
+ from collections.abc import AsyncGenerator
11
+ from pathlib import Path
12
+ from typing import TYPE_CHECKING, Any
13
+
14
+ import httpx
15
+ import structlog
16
+ from fastapi import HTTPException, Request
17
+ from fastapi.responses import StreamingResponse
18
+ from pydantic import BaseModel
19
+ from typing_extensions import TypedDict
20
+
21
+ from ccproxy.config.settings import Settings
22
+ from ccproxy.core.http import BaseProxyClient
23
+ from ccproxy.core.http_transformers import (
24
+ HTTPRequestTransformer,
25
+ HTTPResponseTransformer,
26
+ )
27
+ from ccproxy.observability import (
28
+ PrometheusMetrics,
29
+ get_metrics,
30
+ request_context,
31
+ timed_operation,
32
+ )
33
+ from ccproxy.observability.access_logger import log_request_access
34
+ from ccproxy.services.credentials.manager import CredentialsManager
35
+ from ccproxy.testing import RealisticMockResponseGenerator
36
+
37
+
38
+ if TYPE_CHECKING:
39
+ from ccproxy.observability.context import RequestContext
40
+
41
+
42
+ class RequestData(TypedDict):
43
+ """Typed structure for transformed request data."""
44
+
45
+ method: str
46
+ url: str
47
+ headers: dict[str, str]
48
+ body: bytes | None
49
+
50
+
51
+ class ResponseData(TypedDict):
52
+ """Typed structure for transformed response data."""
53
+
54
+ status_code: int
55
+ headers: dict[str, str]
56
+ body: bytes
57
+
58
+
59
+ logger = structlog.get_logger(__name__)
60
+
61
+
62
+ class ProxyService:
63
+ """Claude-specific proxy orchestration with business logic.
64
+
65
+ This service orchestrates the complete proxy flow including:
66
+ - Authentication management
67
+ - Request/response transformations
68
+ - Metrics collection (future)
69
+ - Error handling and logging
70
+
71
+ Pure HTTP forwarding is delegated to BaseProxyClient.
72
+ """
73
+
74
+ SENSITIVE_HEADERS = {"authorization", "x-api-key", "cookie", "set-cookie"}
75
+
76
+ def __init__(
77
+ self,
78
+ proxy_client: BaseProxyClient,
79
+ credentials_manager: CredentialsManager,
80
+ settings: Settings,
81
+ proxy_mode: str = "full",
82
+ target_base_url: str = "https://api.anthropic.com",
83
+ metrics: PrometheusMetrics | None = None,
84
+ ) -> None:
85
+ """Initialize the proxy service.
86
+
87
+ Args:
88
+ proxy_client: HTTP client for pure forwarding
89
+ credentials_manager: Authentication manager
90
+ settings: Application settings
91
+ proxy_mode: Transformation mode - "minimal" or "full"
92
+ target_base_url: Base URL for the target API
93
+ metrics: Prometheus metrics collector (optional)
94
+ """
95
+ self.proxy_client = proxy_client
96
+ self.credentials_manager = credentials_manager
97
+ self.settings = settings
98
+ self.proxy_mode = proxy_mode
99
+ self.target_base_url = target_base_url.rstrip("/")
100
+ self.metrics = metrics or get_metrics()
101
+
102
+ # Create concrete transformers
103
+ self.request_transformer = HTTPRequestTransformer()
104
+ self.response_transformer = HTTPResponseTransformer()
105
+
106
+ # Create OpenAI adapter for stream transformation
107
+ from ccproxy.adapters.openai.adapter import OpenAIAdapter
108
+
109
+ self.openai_adapter = OpenAIAdapter()
110
+
111
+ # Create mock response generator for bypass mode
112
+ self.mock_generator = RealisticMockResponseGenerator()
113
+
114
+ # Cache environment-based configuration
115
+ self._proxy_url = self._init_proxy_url()
116
+ self._ssl_context = self._init_ssl_context()
117
+ self._verbose_streaming = (
118
+ os.environ.get("CCPROXY_VERBOSE_STREAMING", "false").lower() == "true"
119
+ )
120
+ self._verbose_api = (
121
+ os.environ.get("CCPROXY_VERBOSE_API", "false").lower() == "true"
122
+ )
123
+ self._request_log_dir = os.environ.get("CCPROXY_REQUEST_LOG_DIR")
124
+
125
+ # Create request log directory if specified
126
+ if self._request_log_dir and self._verbose_api:
127
+ Path(self._request_log_dir).mkdir(parents=True, exist_ok=True)
128
+
129
+ # Track current request ID for logging
130
+ self._current_request_id: str | None = None
131
+
132
+ def _init_proxy_url(self) -> str | None:
133
+ """Initialize proxy URL from environment variables."""
134
+ # Check for standard proxy environment variables
135
+ # For HTTPS requests, prioritize HTTPS_PROXY
136
+ https_proxy = os.environ.get("HTTPS_PROXY") or os.environ.get("https_proxy")
137
+ all_proxy = os.environ.get("ALL_PROXY")
138
+ http_proxy = os.environ.get("HTTP_PROXY") or os.environ.get("http_proxy")
139
+
140
+ proxy_url = https_proxy or all_proxy or http_proxy
141
+
142
+ if proxy_url:
143
+ logger.debug("proxy_configured", proxy_url=proxy_url)
144
+
145
+ return proxy_url
146
+
147
+ def _init_ssl_context(self) -> str | bool:
148
+ """Initialize SSL context configuration from environment variables."""
149
+ # Check for custom CA bundle
150
+ ca_bundle = os.environ.get("REQUESTS_CA_BUNDLE") or os.environ.get(
151
+ "SSL_CERT_FILE"
152
+ )
153
+
154
+ # Check if SSL verification should be disabled (NOT RECOMMENDED)
155
+ ssl_verify = os.environ.get("SSL_VERIFY", "true").lower()
156
+
157
+ if ca_bundle and Path(ca_bundle).exists():
158
+ logger.info("ca_bundle_configured", ca_bundle=ca_bundle)
159
+ return ca_bundle
160
+ elif ssl_verify in ("false", "0", "no"):
161
+ logger.warning("ssl_verification_disabled")
162
+ return False
163
+ else:
164
+ logger.debug("ssl_verification_default")
165
+ return True
166
+
167
+ async def handle_request(
168
+ self,
169
+ method: str,
170
+ path: str,
171
+ headers: dict[str, str],
172
+ body: bytes | None = None,
173
+ query_params: dict[str, str | list[str]] | None = None,
174
+ timeout: float = 240.0,
175
+ request: Request | None = None, # Optional FastAPI Request object
176
+ ) -> tuple[int, dict[str, str], bytes] | StreamingResponse:
177
+ """Handle a proxy request with full business logic orchestration.
178
+
179
+ Args:
180
+ method: HTTP method
181
+ path: Request path (without /unclaude prefix)
182
+ headers: Request headers
183
+ body: Request body
184
+ query_params: Query parameters
185
+ timeout: Request timeout in seconds
186
+ request: Optional FastAPI Request object for accessing request context
187
+
188
+ Returns:
189
+ Tuple of (status_code, headers, body) or StreamingResponse for streaming
190
+
191
+ Raises:
192
+ HTTPException: If request fails
193
+ """
194
+ # Extract request metadata
195
+ model, streaming = self._extract_request_metadata(body)
196
+ endpoint = path.split("/")[-1] if path else "unknown"
197
+
198
+ # Handle /v1/models endpoint specially
199
+ if path == "/v1/models":
200
+ return await self.handle_models_request(headers, timeout)
201
+
202
+ # Use existing context from request if available, otherwise create new one
203
+ if request and hasattr(request, "state") and hasattr(request.state, "context"):
204
+ # Use existing context from middleware
205
+ ctx = request.state.context
206
+ # Add service-specific metadata
207
+ ctx.add_metadata(
208
+ endpoint=endpoint,
209
+ model=model,
210
+ streaming=streaming,
211
+ service_type="proxy_service",
212
+ )
213
+ # Create a context manager that preserves the existing context's lifecycle
214
+ # This ensures __aexit__ is called for proper access logging
215
+ from contextlib import asynccontextmanager
216
+
217
+ @asynccontextmanager
218
+ async def existing_context_manager() -> AsyncGenerator[Any, None]:
219
+ try:
220
+ yield ctx
221
+ finally:
222
+ # Let the existing context handle its own lifecycle
223
+ # The middleware or parent context will call __aexit__
224
+ pass
225
+
226
+ context_manager: Any = existing_context_manager()
227
+ else:
228
+ # Create new context for observability
229
+ context_manager = request_context(
230
+ method=method,
231
+ path=path,
232
+ endpoint=endpoint,
233
+ model=model,
234
+ streaming=streaming,
235
+ service_type="proxy_service",
236
+ metrics=self.metrics,
237
+ )
238
+
239
+ async with context_manager as ctx:
240
+ # Store the current request ID for file logging
241
+ self._current_request_id = ctx.request_id
242
+
243
+ try:
244
+ # 1. Authentication - get access token
245
+ async with timed_operation("oauth_token", ctx.request_id):
246
+ logger.debug("oauth_token_retrieval_start")
247
+ access_token = await self._get_access_token()
248
+
249
+ # 2. Request transformation
250
+ async with timed_operation("request_transform", ctx.request_id):
251
+ logger.debug("request_transform_start")
252
+ transformed_request = await self._transform_request(
253
+ method, path, headers, body, query_params, access_token
254
+ )
255
+
256
+ # 3. Check for bypass header to skip upstream forwarding
257
+ bypass_upstream = (
258
+ headers.get("X-CCProxy-Bypass-Upstream", "").lower() == "true"
259
+ )
260
+
261
+ if bypass_upstream:
262
+ logger.debug("bypassing_upstream_forwarding_due_to_header")
263
+ # Determine message type from request body for realistic response generation
264
+ message_type = self._extract_message_type_from_body(body)
265
+
266
+ # Check if this will be a streaming response
267
+ should_stream = streaming or self._should_stream_response(
268
+ transformed_request["headers"]
269
+ )
270
+
271
+ # Determine response format based on original request path
272
+ is_openai_format = self.response_transformer._is_openai_request(
273
+ path
274
+ )
275
+
276
+ if should_stream:
277
+ return await self._generate_bypass_streaming_response(
278
+ model, is_openai_format, ctx, message_type
279
+ )
280
+ else:
281
+ return await self._generate_bypass_standard_response(
282
+ model, is_openai_format, ctx, message_type
283
+ )
284
+
285
+ # 3. Forward request using proxy client
286
+ logger.debug("request_forwarding_start", url=transformed_request["url"])
287
+
288
+ # Check if this will be a streaming response
289
+ should_stream = streaming or self._should_stream_response(
290
+ transformed_request["headers"]
291
+ )
292
+
293
+ if should_stream:
294
+ logger.debug("streaming_response_detected")
295
+ return await self._handle_streaming_request(
296
+ transformed_request, path, timeout, ctx
297
+ )
298
+ else:
299
+ logger.debug("non_streaming_response_detected")
300
+
301
+ # Log the outgoing request if verbose API logging is enabled
302
+ self._log_verbose_api_request(transformed_request)
303
+
304
+ # Handle regular request
305
+ async with timed_operation("api_call", ctx.request_id) as api_op:
306
+ start_time = time.perf_counter()
307
+
308
+ (
309
+ status_code,
310
+ response_headers,
311
+ response_body,
312
+ ) = await self.proxy_client.forward(
313
+ method=transformed_request["method"],
314
+ url=transformed_request["url"],
315
+ headers=transformed_request["headers"],
316
+ body=transformed_request["body"],
317
+ timeout=timeout,
318
+ )
319
+
320
+ end_time = time.perf_counter()
321
+ api_duration = end_time - start_time
322
+ api_op["duration_seconds"] = api_duration
323
+
324
+ # Log the received response if verbose API logging is enabled
325
+ self._log_verbose_api_response(
326
+ status_code, response_headers, response_body
327
+ )
328
+
329
+ # 4. Response transformation
330
+ async with timed_operation("response_transform", ctx.request_id):
331
+ logger.debug("response_transform_start")
332
+ # For error responses, transform to OpenAI format if needed
333
+ transformed_response: ResponseData
334
+ if status_code >= 400:
335
+ logger.info(
336
+ "upstream_error_received",
337
+ status_code=status_code,
338
+ has_body=bool(response_body),
339
+ content_length=len(response_body) if response_body else 0,
340
+ )
341
+
342
+ # Transform error to OpenAI format if this is an OpenAI endpoint
343
+ transformed_error_body = response_body
344
+ if self.response_transformer._is_openai_request(path):
345
+ try:
346
+ error_data = json.loads(response_body.decode("utf-8"))
347
+ openai_error = self.openai_adapter.adapt_error(
348
+ error_data
349
+ )
350
+ transformed_error_body = json.dumps(
351
+ openai_error
352
+ ).encode("utf-8")
353
+ except (json.JSONDecodeError, UnicodeDecodeError):
354
+ # Keep original error if parsing fails
355
+ pass
356
+
357
+ transformed_response = ResponseData(
358
+ status_code=status_code,
359
+ headers=response_headers,
360
+ body=transformed_error_body,
361
+ )
362
+ else:
363
+ transformed_response = await self._transform_response(
364
+ status_code, response_headers, response_body, path
365
+ )
366
+
367
+ # 5. Extract response metrics using direct JSON parsing
368
+ tokens_input = tokens_output = cache_read_tokens = (
369
+ cache_write_tokens
370
+ ) = cost_usd = None
371
+ if transformed_response["body"]:
372
+ try:
373
+ response_data = json.loads(
374
+ transformed_response["body"].decode("utf-8")
375
+ )
376
+ usage = response_data.get("usage", {})
377
+ tokens_input = usage.get("input_tokens")
378
+ tokens_output = usage.get("output_tokens")
379
+ cache_read_tokens = usage.get("cache_read_input_tokens")
380
+ cache_write_tokens = usage.get("cache_creation_input_tokens")
381
+
382
+ # Calculate cost including cache tokens if we have tokens and model
383
+ from ccproxy.utils.cost_calculator import calculate_token_cost
384
+
385
+ cost_usd = calculate_token_cost(
386
+ tokens_input,
387
+ tokens_output,
388
+ model,
389
+ cache_read_tokens,
390
+ cache_write_tokens,
391
+ )
392
+ except (json.JSONDecodeError, UnicodeDecodeError):
393
+ pass # Keep all values as None if parsing fails
394
+
395
+ # 6. Update context with response data
396
+ ctx.add_metadata(
397
+ status_code=status_code,
398
+ tokens_input=tokens_input,
399
+ tokens_output=tokens_output,
400
+ cache_read_tokens=cache_read_tokens,
401
+ cache_write_tokens=cache_write_tokens,
402
+ cost_usd=cost_usd,
403
+ )
404
+
405
+ # 7. Log comprehensive access log (includes Prometheus metrics)
406
+ await log_request_access(
407
+ context=ctx,
408
+ status_code=status_code,
409
+ method=method,
410
+ metrics=self.metrics,
411
+ )
412
+
413
+ return (
414
+ transformed_response["status_code"],
415
+ transformed_response["headers"],
416
+ transformed_response["body"],
417
+ )
418
+
419
+ except Exception as e:
420
+ # Record error metrics via access logger
421
+ error_type = type(e).__name__
422
+
423
+ # Log the error with access logger (includes metrics)
424
+ await log_request_access(
425
+ context=ctx,
426
+ method=method,
427
+ error_message=str(e),
428
+ metrics=self.metrics,
429
+ error_type=error_type,
430
+ )
431
+
432
+ logger.exception(
433
+ "proxy_request_failed",
434
+ method=method,
435
+ path=path,
436
+ error=str(e),
437
+ exc_info=True,
438
+ )
439
+ # Re-raise the exception without transformation
440
+ # Let higher layers handle specific error types
441
+ raise
442
+ finally:
443
+ # Reset current request ID
444
+ self._current_request_id = None
445
+
446
+ async def _get_access_token(self) -> str:
447
+ """Get access token for upstream authentication.
448
+
449
+ Tries configured auth_token first, then falls back to OAuth credentials.
450
+
451
+ Returns:
452
+ Valid access token
453
+
454
+ Raises:
455
+ HTTPException: If no valid token is available
456
+ """
457
+ # First try to get configured auth_token from settings
458
+ if self.settings.security.auth_token:
459
+ logger.debug("using_configured_auth_token")
460
+ return self.settings.security.auth_token
461
+
462
+ # Fall back to OAuth credentials
463
+ try:
464
+ access_token = await self.credentials_manager.get_access_token()
465
+ if not access_token:
466
+ logger.error("oauth_token_unavailable")
467
+
468
+ # Try to get more details about credential status
469
+ try:
470
+ validation = await self.credentials_manager.validate()
471
+
472
+ if (
473
+ validation.valid
474
+ and validation.expired
475
+ and validation.credentials
476
+ ):
477
+ logger.debug(
478
+ "oauth_token_expired",
479
+ expired_at=str(
480
+ validation.credentials.claude_ai_oauth.expires_at
481
+ ),
482
+ )
483
+ except Exception as e:
484
+ logger.debug(
485
+ "credential_check_failed",
486
+ error=str(e),
487
+ exc_info=logger.isEnabledFor(logging.DEBUG),
488
+ )
489
+
490
+ raise HTTPException(
491
+ status_code=401,
492
+ detail="No valid OAuth credentials found. Please run 'ccproxy auth login'.",
493
+ )
494
+
495
+ logger.debug("oauth_token_retrieved")
496
+ return access_token
497
+
498
+ except HTTPException:
499
+ raise
500
+ except Exception as e:
501
+ logger.error("oauth_token_retrieval_failed", error=str(e), exc_info=True)
502
+ raise HTTPException(
503
+ status_code=401,
504
+ detail="Authentication failed",
505
+ ) from e
506
+
507
+ async def _transform_request(
508
+ self,
509
+ method: str,
510
+ path: str,
511
+ headers: dict[str, str],
512
+ body: bytes | None,
513
+ query_params: dict[str, str | list[str]] | None,
514
+ access_token: str,
515
+ ) -> RequestData:
516
+ """Transform request using the transformer pipeline.
517
+
518
+ Args:
519
+ method: HTTP method
520
+ path: Request path
521
+ headers: Request headers
522
+ body: Request body
523
+ query_params: Query parameters
524
+ access_token: OAuth access token
525
+
526
+ Returns:
527
+ Transformed request data
528
+ """
529
+ # Transform path
530
+ transformed_path = self.request_transformer.transform_path(
531
+ path, self.proxy_mode
532
+ )
533
+ target_url = f"{self.target_base_url}{transformed_path}"
534
+
535
+ # Add beta=true query parameter for /v1/messages requests if not already present
536
+ if transformed_path == "/v1/messages":
537
+ if query_params is None:
538
+ query_params = {}
539
+ elif "beta" not in query_params:
540
+ query_params = dict(query_params) # Make a copy
541
+
542
+ if "beta" not in query_params:
543
+ query_params["beta"] = "true"
544
+ logger.debug("beta_parameter_added")
545
+
546
+ # Transform body first (as it might change size)
547
+ proxy_body = None
548
+ if body:
549
+ proxy_body = self.request_transformer.transform_request_body(
550
+ body, path, self.proxy_mode
551
+ )
552
+
553
+ # Transform headers (and update Content-Length if body changed)
554
+ proxy_headers = self.request_transformer.create_proxy_headers(
555
+ headers, access_token, self.proxy_mode
556
+ )
557
+
558
+ # Update Content-Length if body was transformed and size changed
559
+ if proxy_body and body and len(proxy_body) != len(body):
560
+ # Remove any existing content-length headers (case-insensitive)
561
+ proxy_headers = {
562
+ k: v for k, v in proxy_headers.items() if k.lower() != "content-length"
563
+ }
564
+ proxy_headers["Content-Length"] = str(len(proxy_body))
565
+ elif proxy_body and not body:
566
+ # New body was created where none existed
567
+ proxy_headers["Content-Length"] = str(len(proxy_body))
568
+
569
+ # Add query parameters to URL if present
570
+ if query_params:
571
+ query_string = urllib.parse.urlencode(query_params)
572
+ target_url = f"{target_url}?{query_string}"
573
+
574
+ return {
575
+ "method": method,
576
+ "url": target_url,
577
+ "headers": proxy_headers,
578
+ "body": proxy_body,
579
+ }
580
+
581
+ async def _transform_response(
582
+ self,
583
+ status_code: int,
584
+ headers: dict[str, str],
585
+ body: bytes,
586
+ original_path: str,
587
+ ) -> ResponseData:
588
+ """Transform response using the transformer pipeline.
589
+
590
+ Args:
591
+ status_code: HTTP status code
592
+ headers: Response headers
593
+ body: Response body
594
+ original_path: Original request path for context
595
+
596
+ Returns:
597
+ Transformed response data
598
+ """
599
+ # For error responses, pass through without transformation
600
+ if status_code >= 400:
601
+ return {
602
+ "status_code": status_code,
603
+ "headers": headers,
604
+ "body": body,
605
+ }
606
+
607
+ transformed_body = self.response_transformer.transform_response_body(
608
+ body, original_path, self.proxy_mode
609
+ )
610
+
611
+ transformed_headers = self.response_transformer.transform_response_headers(
612
+ headers, original_path, len(transformed_body), self.proxy_mode
613
+ )
614
+
615
+ return {
616
+ "status_code": status_code,
617
+ "headers": transformed_headers,
618
+ "body": transformed_body,
619
+ }
620
+
621
+ def _redact_headers(self, headers: dict[str, str]) -> dict[str, str]:
622
+ """Redact sensitive information from headers for safe logging."""
623
+ return {
624
+ k: "[REDACTED]" if k.lower() in self.SENSITIVE_HEADERS else v
625
+ for k, v in headers.items()
626
+ }
627
+
628
+ def _log_verbose_api_request(self, request_data: RequestData) -> None:
629
+ """Log details of an outgoing API request if verbose logging is enabled."""
630
+ if not self._verbose_api:
631
+ return
632
+
633
+ body = request_data.get("body")
634
+ body_preview = ""
635
+ full_body = None
636
+ if body:
637
+ try:
638
+ full_body = body.decode("utf-8", errors="replace")
639
+ # Truncate at 1024 chars for readability
640
+ body_preview = full_body[:1024]
641
+ # Try to parse as JSON for better formatting
642
+ try:
643
+ import json
644
+
645
+ full_body = json.loads(full_body)
646
+ except json.JSONDecodeError:
647
+ pass # Keep as string
648
+ except Exception:
649
+ body_preview = f"<binary data of length {len(body)}>"
650
+
651
+ logger.info(
652
+ "verbose_api_request",
653
+ method=request_data["method"],
654
+ url=request_data["url"],
655
+ headers=self._redact_headers(request_data["headers"]),
656
+ body_size=len(body) if body else 0,
657
+ body_preview=body_preview,
658
+ )
659
+
660
+ # Write to individual file if directory is specified
661
+ # Note: We cannot get request ID here since this is called from multiple places
662
+ # Request ID will be determined within _write_request_to_file method
663
+ self._write_request_to_file(
664
+ "request",
665
+ {
666
+ "method": request_data["method"],
667
+ "url": request_data["url"],
668
+ "headers": dict(request_data["headers"]), # Don't redact in file
669
+ "body": full_body,
670
+ },
671
+ )
672
+
673
+ def _log_verbose_api_response(
674
+ self, status_code: int, headers: dict[str, str], body: bytes
675
+ ) -> None:
676
+ """Log details of a received API response if verbose logging is enabled."""
677
+ if not self._verbose_api:
678
+ return
679
+
680
+ body_preview = ""
681
+ if body:
682
+ try:
683
+ # Truncate at 1024 chars for readability
684
+ body_preview = body.decode("utf-8", errors="replace")[:1024]
685
+ except Exception:
686
+ body_preview = f"<binary data of length {len(body)}>"
687
+
688
+ logger.info(
689
+ "verbose_api_response",
690
+ status_code=status_code,
691
+ headers=self._redact_headers(headers),
692
+ body_size=len(body),
693
+ body_preview=body_preview,
694
+ )
695
+
696
+ # Write to individual file if directory is specified
697
+ full_body = None
698
+ if body:
699
+ try:
700
+ full_body_str = body.decode("utf-8", errors="replace")
701
+ # Try to parse as JSON for better formatting
702
+ try:
703
+ full_body = json.loads(full_body_str)
704
+ except json.JSONDecodeError:
705
+ full_body = full_body_str
706
+ except Exception:
707
+ full_body = f"<binary data of length {len(body)}>"
708
+
709
+ self._write_request_to_file(
710
+ "response",
711
+ {
712
+ "status_code": status_code,
713
+ "headers": dict(headers), # Don't redact in file
714
+ "body": full_body,
715
+ },
716
+ )
717
+
718
+ def _should_stream_response(self, headers: dict[str, str]) -> bool:
719
+ """Check if response should be streamed based on request headers.
720
+
721
+ Args:
722
+ headers: Request headers
723
+
724
+ Returns:
725
+ True if response should be streamed
726
+ """
727
+ # Check if client requested streaming
728
+ accept_header = headers.get("accept", "").lower()
729
+ should_stream = (
730
+ "text/event-stream" in accept_header or "stream" in accept_header
731
+ )
732
+ logger.debug(
733
+ "stream_check_completed",
734
+ accept_header=accept_header,
735
+ should_stream=should_stream,
736
+ )
737
+ return should_stream
738
+
739
+ def _extract_request_metadata(self, body: bytes | None) -> tuple[str | None, bool]:
740
+ """Extract model and streaming flag from request body.
741
+
742
+ Args:
743
+ body: Request body
744
+
745
+ Returns:
746
+ Tuple of (model, streaming)
747
+ """
748
+ if not body:
749
+ return None, False
750
+
751
+ try:
752
+ body_data = json.loads(body.decode("utf-8"))
753
+ model = body_data.get("model")
754
+ streaming = body_data.get("stream", False)
755
+ return model, streaming
756
+ except (json.JSONDecodeError, UnicodeDecodeError):
757
+ return None, False
758
+
759
+ async def _handle_streaming_request(
760
+ self,
761
+ request_data: RequestData,
762
+ original_path: str,
763
+ timeout: float,
764
+ ctx: "RequestContext",
765
+ ) -> StreamingResponse | tuple[int, dict[str, str], bytes]:
766
+ """Handle streaming request with transformation.
767
+
768
+ Args:
769
+ request_data: Transformed request data
770
+ original_path: Original request path for context
771
+ timeout: Request timeout
772
+ ctx: Request context for observability
773
+
774
+ Returns:
775
+ StreamingResponse or error response tuple
776
+ """
777
+ # Log the outgoing request if verbose API logging is enabled
778
+ self._log_verbose_api_request(request_data)
779
+
780
+ # First, make the request and check for errors before streaming
781
+ proxy_url = self._proxy_url
782
+ verify = self._ssl_context
783
+
784
+ async with httpx.AsyncClient(
785
+ timeout=timeout, proxy=proxy_url, verify=verify
786
+ ) as client:
787
+ # Start the request to get headers
788
+ response = await client.send(
789
+ client.build_request(
790
+ method=request_data["method"],
791
+ url=request_data["url"],
792
+ headers=request_data["headers"],
793
+ content=request_data["body"],
794
+ ),
795
+ stream=True,
796
+ )
797
+
798
+ # Check for errors before starting to stream
799
+ if response.status_code >= 400:
800
+ error_content = await response.aread()
801
+
802
+ # Log the full error response body
803
+ self._log_verbose_api_response(
804
+ response.status_code, dict(response.headers), error_content
805
+ )
806
+
807
+ logger.info(
808
+ "streaming_error_received",
809
+ status_code=response.status_code,
810
+ error_detail=error_content.decode("utf-8", errors="replace"),
811
+ )
812
+
813
+ # Transform error to OpenAI format if this is an OpenAI endpoint
814
+ transformed_error_body = error_content
815
+ if self.response_transformer._is_openai_request(original_path):
816
+ try:
817
+ error_data = json.loads(error_content.decode("utf-8"))
818
+ openai_error = self.openai_adapter.adapt_error(error_data)
819
+ transformed_error_body = json.dumps(openai_error).encode(
820
+ "utf-8"
821
+ )
822
+ except (json.JSONDecodeError, UnicodeDecodeError):
823
+ # Keep original error if parsing fails
824
+ pass
825
+
826
+ # Update context with error status
827
+ ctx.add_metadata(status_code=response.status_code)
828
+
829
+ # Log access log for error
830
+ from ccproxy.observability.access_logger import log_request_access
831
+
832
+ await log_request_access(
833
+ context=ctx,
834
+ status_code=response.status_code,
835
+ method=request_data["method"],
836
+ metrics=self.metrics,
837
+ )
838
+
839
+ # Return error as regular response
840
+ return (
841
+ response.status_code,
842
+ dict(response.headers),
843
+ transformed_error_body,
844
+ )
845
+
846
+ # If no error, proceed with streaming
847
+ # Store response headers to preserve for streaming
848
+ response_headers = {}
849
+ response_status = 200
850
+
851
+ # Initialize streaming metrics collector
852
+ from ccproxy.utils.streaming_metrics import StreamingMetricsCollector
853
+
854
+ metrics_collector = StreamingMetricsCollector(request_id=ctx.request_id)
855
+
856
+ async def stream_generator() -> AsyncGenerator[bytes, None]:
857
+ try:
858
+ logger.debug(
859
+ "stream_generator_start",
860
+ method=request_data["method"],
861
+ url=request_data["url"],
862
+ headers=request_data["headers"],
863
+ )
864
+
865
+ # Use httpx directly for streaming since we need the stream context manager
866
+ # Get proxy and SSL settings from cached configuration
867
+ proxy_url = self._proxy_url
868
+ verify = self._ssl_context
869
+
870
+ start_time = time.perf_counter()
871
+ async with (
872
+ httpx.AsyncClient(
873
+ timeout=timeout, proxy=proxy_url, verify=verify
874
+ ) as client,
875
+ client.stream(
876
+ method=request_data["method"],
877
+ url=request_data["url"],
878
+ headers=request_data["headers"],
879
+ content=request_data["body"],
880
+ ) as response,
881
+ ):
882
+ end_time = time.perf_counter()
883
+ proxy_api_call_ms = (end_time - start_time) * 1000
884
+ logger.debug(
885
+ "stream_response_received",
886
+ status_code=response.status_code,
887
+ headers=dict(response.headers),
888
+ )
889
+
890
+ # Log initial stream response headers if verbose
891
+ if self._verbose_api:
892
+ logger.info(
893
+ "verbose_api_stream_response_start",
894
+ status_code=response.status_code,
895
+ headers=self._redact_headers(dict(response.headers)),
896
+ )
897
+
898
+ # Store response status and headers
899
+ nonlocal response_status, response_headers
900
+ response_status = response.status_code
901
+ response_headers = dict(response.headers)
902
+
903
+ # Transform streaming response
904
+ is_openai = self.response_transformer._is_openai_request(
905
+ original_path
906
+ )
907
+ logger.debug(
908
+ "openai_format_check", is_openai=is_openai, path=original_path
909
+ )
910
+
911
+ if is_openai:
912
+ # Transform Anthropic SSE to OpenAI SSE format using adapter
913
+ logger.debug("sse_transform_start", path=original_path)
914
+
915
+ async for (
916
+ transformed_chunk
917
+ ) in self._transform_anthropic_to_openai_stream(
918
+ response, original_path
919
+ ):
920
+ logger.debug(
921
+ "transformed_chunk_yielded",
922
+ chunk_size=len(transformed_chunk),
923
+ )
924
+ yield transformed_chunk
925
+ else:
926
+ # Stream as-is for Anthropic endpoints
927
+ logger.debug("anthropic_streaming_start")
928
+ chunk_count = 0
929
+ content_block_delta_count = 0
930
+
931
+ # Use cached verbose streaming configuration
932
+ verbose_streaming = self._verbose_streaming
933
+
934
+ async for chunk in response.aiter_bytes():
935
+ if chunk:
936
+ chunk_count += 1
937
+
938
+ # Compact logging for content_block_delta events
939
+ chunk_str = chunk.decode("utf-8", errors="replace")
940
+
941
+ # Extract token metrics from streaming events
942
+ is_final = metrics_collector.process_chunk(chunk_str)
943
+
944
+ # If this is the final chunk with complete metrics, update context and record metrics
945
+ if is_final:
946
+ model = ctx.metadata.get("model")
947
+ cost_usd = metrics_collector.calculate_final_cost(
948
+ model
949
+ )
950
+ final_metrics = metrics_collector.get_metrics()
951
+
952
+ # Update context with final metrics
953
+ ctx.add_metadata(
954
+ status_code=response_status,
955
+ tokens_input=final_metrics["tokens_input"],
956
+ tokens_output=final_metrics["tokens_output"],
957
+ cache_read_tokens=final_metrics[
958
+ "cache_read_tokens"
959
+ ],
960
+ cache_write_tokens=final_metrics[
961
+ "cache_write_tokens"
962
+ ],
963
+ cost_usd=cost_usd,
964
+ )
965
+
966
+ # Log comprehensive access log for streaming completion
967
+ from ccproxy.observability.access_logger import (
968
+ log_request_access,
969
+ )
970
+
971
+ await log_request_access(
972
+ context=ctx,
973
+ status_code=response_status,
974
+ metrics=self.metrics,
975
+ # Additional metadata for streaming completion
976
+ event_type="streaming_complete",
977
+ )
978
+
979
+ if (
980
+ "content_block_delta" in chunk_str
981
+ and not verbose_streaming
982
+ ):
983
+ content_block_delta_count += 1
984
+ # Only log every 10th content_block_delta or when we start/end
985
+ if content_block_delta_count == 1:
986
+ logger.debug("content_block_delta_start")
987
+ elif content_block_delta_count % 10 == 0:
988
+ logger.debug(
989
+ "content_block_delta_progress",
990
+ count=content_block_delta_count,
991
+ )
992
+ elif (
993
+ verbose_streaming
994
+ or "content_block_delta" not in chunk_str
995
+ ):
996
+ # Log non-content_block_delta events normally, or everything if verbose mode
997
+ logger.debug(
998
+ "chunk_yielded",
999
+ chunk_number=chunk_count,
1000
+ chunk_size=len(chunk),
1001
+ chunk_preview=chunk[:100].decode(
1002
+ "utf-8", errors="replace"
1003
+ ),
1004
+ )
1005
+
1006
+ yield chunk
1007
+
1008
+ # Final summary for content_block_delta events
1009
+ if content_block_delta_count > 0 and not verbose_streaming:
1010
+ logger.debug(
1011
+ "content_block_delta_completed",
1012
+ total_count=content_block_delta_count,
1013
+ )
1014
+
1015
+ except Exception as e:
1016
+ logger.exception("streaming_error", error=str(e), exc_info=True)
1017
+ error_message = f'data: {{"error": "Streaming error: {str(e)}"}}\n\n'
1018
+ yield error_message.encode("utf-8")
1019
+
1020
+ # Always use upstream headers as base
1021
+ final_headers = response_headers.copy()
1022
+
1023
+ # Ensure critical headers for streaming
1024
+ final_headers["Cache-Control"] = "no-cache"
1025
+ final_headers["Connection"] = "keep-alive"
1026
+
1027
+ # Set content-type if not already set by upstream
1028
+ if "content-type" not in final_headers:
1029
+ final_headers["content-type"] = "text/event-stream"
1030
+
1031
+ return StreamingResponse(
1032
+ stream_generator(),
1033
+ status_code=response_status,
1034
+ headers=final_headers,
1035
+ )
1036
+
1037
+ async def _transform_anthropic_to_openai_stream(
1038
+ self, response: httpx.Response, original_path: str
1039
+ ) -> AsyncGenerator[bytes, None]:
1040
+ """Transform Anthropic SSE stream to OpenAI SSE format using adapter.
1041
+
1042
+ Args:
1043
+ response: Streaming response from Anthropic
1044
+ original_path: Original request path for context
1045
+
1046
+ Yields:
1047
+ Transformed OpenAI SSE format chunks
1048
+ """
1049
+
1050
+ # Parse SSE chunks from response into dict stream
1051
+ async def sse_to_dict_stream() -> AsyncGenerator[dict[str, object], None]:
1052
+ async for line in response.aiter_lines():
1053
+ if line.startswith("data: "):
1054
+ data_str = line[6:].strip()
1055
+ if data_str and data_str != "[DONE]":
1056
+ try:
1057
+ yield json.loads(data_str)
1058
+ except json.JSONDecodeError:
1059
+ logger.warning("sse_parse_failed", data=data_str)
1060
+ continue
1061
+
1062
+ # Transform using OpenAI adapter and format back to SSE
1063
+ async for openai_chunk in self.openai_adapter.adapt_stream(
1064
+ sse_to_dict_stream()
1065
+ ):
1066
+ sse_line = f"data: {json.dumps(openai_chunk)}\n\n"
1067
+ yield sse_line.encode("utf-8")
1068
+
1069
+ def _write_request_to_file(self, data_type: str, data: dict[str, Any]) -> None:
1070
+ """Write request or response data to individual file if logging directory is configured.
1071
+
1072
+ Args:
1073
+ data_type: Type of data ("request" or "response")
1074
+ data: The data to write
1075
+ """
1076
+ if not self._request_log_dir or not self._verbose_api:
1077
+ return
1078
+
1079
+ # Use the current request ID stored during request handling
1080
+ request_id = self._current_request_id or "unknown"
1081
+
1082
+ # Create filename with request ID and data type
1083
+ filename = f"{request_id}_{data_type}.json"
1084
+ file_path = Path(self._request_log_dir) / filename
1085
+
1086
+ try:
1087
+ # Write JSON data to file
1088
+ with file_path.open("w", encoding="utf-8") as f:
1089
+ json.dump(data, f, indent=2, default=str)
1090
+
1091
+ logger.debug(
1092
+ "request_data_logged_to_file",
1093
+ request_id=request_id,
1094
+ data_type=data_type,
1095
+ file_path=str(file_path),
1096
+ )
1097
+
1098
+ except Exception as e:
1099
+ logger.error(
1100
+ "failed_to_write_request_log_file",
1101
+ request_id=request_id,
1102
+ data_type=data_type,
1103
+ error=str(e),
1104
+ )
1105
+
1106
+ def _extract_message_type_from_body(self, body: bytes | None) -> str:
1107
+ """Extract message type from request body for realistic response generation."""
1108
+ if not body:
1109
+ return "short"
1110
+
1111
+ try:
1112
+ body_data = json.loads(body.decode("utf-8"))
1113
+ # Check if tools are present - indicates tool use
1114
+ if body_data.get("tools"):
1115
+ return "tool_use"
1116
+
1117
+ # Check message content length to determine type
1118
+ messages = body_data.get("messages", [])
1119
+ if messages:
1120
+ content = str(messages[-1].get("content", ""))
1121
+ if len(content) > 200:
1122
+ return "long"
1123
+ elif len(content) < 50:
1124
+ return "short"
1125
+ else:
1126
+ return "medium"
1127
+ except (json.JSONDecodeError, UnicodeDecodeError):
1128
+ pass
1129
+
1130
+ return "short"
1131
+
1132
+ async def _generate_bypass_standard_response(
1133
+ self,
1134
+ model: str | None,
1135
+ is_openai_format: bool,
1136
+ ctx: "RequestContext",
1137
+ message_type: str = "short",
1138
+ ) -> tuple[int, dict[str, str], bytes]:
1139
+ """Generate realistic mock standard response."""
1140
+
1141
+ # Check if we should simulate an error
1142
+ if self.mock_generator.should_simulate_error():
1143
+ error_response, status_code = self.mock_generator.generate_error_response(
1144
+ "openai" if is_openai_format else "anthropic"
1145
+ )
1146
+ response_body = json.dumps(error_response).encode()
1147
+ return status_code, {"content-type": "application/json"}, response_body
1148
+
1149
+ # Generate realistic content and token counts
1150
+ content, input_tokens, output_tokens = (
1151
+ self.mock_generator.generate_response_content(
1152
+ message_type, model or "claude-3-5-sonnet-20241022"
1153
+ )
1154
+ )
1155
+ cache_read_tokens, cache_write_tokens = (
1156
+ self.mock_generator.generate_cache_tokens()
1157
+ )
1158
+
1159
+ # Simulate realistic latency
1160
+ latency_ms = random.randint(*self.mock_generator.config.base_latency_ms)
1161
+ await asyncio.sleep(latency_ms / 1000.0)
1162
+
1163
+ # Always start with Anthropic format
1164
+ request_id = f"msg_test_{ctx.request_id}_{random.randint(1000, 9999)}"
1165
+ content_list: list[dict[str, Any]] = [{"type": "text", "text": content}]
1166
+ anthropic_response = {
1167
+ "id": request_id,
1168
+ "type": "message",
1169
+ "role": "assistant",
1170
+ "content": content_list,
1171
+ "model": model or "claude-3-5-sonnet-20241022",
1172
+ "stop_reason": "end_turn",
1173
+ "stop_sequence": None,
1174
+ "usage": {
1175
+ "input_tokens": input_tokens,
1176
+ "output_tokens": output_tokens,
1177
+ "cache_creation_input_tokens": cache_write_tokens,
1178
+ "cache_read_input_tokens": cache_read_tokens,
1179
+ },
1180
+ }
1181
+
1182
+ # Add tool use if appropriate
1183
+ if message_type == "tool_use":
1184
+ content_list.insert(
1185
+ 0,
1186
+ {
1187
+ "type": "tool_use",
1188
+ "id": f"toolu_{random.randint(10000, 99999)}",
1189
+ "name": "calculator",
1190
+ "input": {"expression": "23 * 45"},
1191
+ },
1192
+ )
1193
+
1194
+ if is_openai_format:
1195
+ # Transform to OpenAI format using existing adapter
1196
+ openai_response = self.openai_adapter.adapt_response(anthropic_response)
1197
+ response_body = json.dumps(openai_response).encode()
1198
+ else:
1199
+ response_body = json.dumps(anthropic_response).encode()
1200
+
1201
+ headers = {
1202
+ "content-type": "application/json",
1203
+ "content-length": str(len(response_body)),
1204
+ }
1205
+
1206
+ # Update context with realistic metrics
1207
+ cost_usd = self.mock_generator.calculate_realistic_cost(
1208
+ input_tokens,
1209
+ output_tokens,
1210
+ model or "claude-3-5-sonnet-20241022",
1211
+ cache_read_tokens,
1212
+ cache_write_tokens,
1213
+ )
1214
+
1215
+ ctx.add_metadata(
1216
+ status_code=200,
1217
+ tokens_input=input_tokens,
1218
+ tokens_output=output_tokens,
1219
+ cache_read_tokens=cache_read_tokens,
1220
+ cache_write_tokens=cache_write_tokens,
1221
+ cost_usd=cost_usd,
1222
+ )
1223
+
1224
+ # Log comprehensive access log (includes Prometheus metrics)
1225
+ await log_request_access(
1226
+ context=ctx,
1227
+ status_code=200,
1228
+ method="POST",
1229
+ metrics=self.metrics,
1230
+ )
1231
+
1232
+ return 200, headers, response_body
1233
+
1234
+ async def _generate_bypass_streaming_response(
1235
+ self,
1236
+ model: str | None,
1237
+ is_openai_format: bool,
1238
+ ctx: "RequestContext",
1239
+ message_type: str = "short",
1240
+ ) -> StreamingResponse:
1241
+ """Generate realistic mock streaming response."""
1242
+
1243
+ # Generate content and tokens
1244
+ content, input_tokens, output_tokens = (
1245
+ self.mock_generator.generate_response_content(
1246
+ message_type, model or "claude-3-5-sonnet-20241022"
1247
+ )
1248
+ )
1249
+ cache_read_tokens, cache_write_tokens = (
1250
+ self.mock_generator.generate_cache_tokens()
1251
+ )
1252
+
1253
+ async def realistic_mock_stream_generator() -> AsyncGenerator[bytes, None]:
1254
+ request_id = f"msg_test_{ctx.request_id}_{random.randint(1000, 9999)}"
1255
+
1256
+ if is_openai_format:
1257
+ # Generate OpenAI-style streaming
1258
+ chunks = await self._generate_realistic_openai_stream(
1259
+ request_id,
1260
+ model or "claude-3-5-sonnet-20241022",
1261
+ content,
1262
+ input_tokens,
1263
+ output_tokens,
1264
+ )
1265
+ else:
1266
+ # Generate Anthropic-style streaming
1267
+ chunks = self.mock_generator.generate_realistic_anthropic_stream(
1268
+ request_id,
1269
+ model or "claude-3-5-sonnet-20241022",
1270
+ content,
1271
+ input_tokens,
1272
+ output_tokens,
1273
+ cache_read_tokens,
1274
+ cache_write_tokens,
1275
+ )
1276
+
1277
+ # Simulate realistic token generation rate
1278
+ tokens_per_second = self.mock_generator.config.token_generation_rate
1279
+
1280
+ for i, chunk in enumerate(chunks):
1281
+ # Realistic delay based on token generation rate
1282
+ if i > 0: # Don't delay the first chunk
1283
+ # Estimate tokens in this chunk and calculate delay
1284
+ chunk_tokens = len(str(chunk)) // 4 # Rough estimate
1285
+ delay_seconds = chunk_tokens / tokens_per_second
1286
+ # Add some randomness
1287
+ delay_seconds *= random.uniform(0.5, 1.5)
1288
+ await asyncio.sleep(max(0.01, delay_seconds))
1289
+
1290
+ yield f"data: {json.dumps(chunk)}\n\n".encode()
1291
+
1292
+ yield b"data: [DONE]\n\n"
1293
+
1294
+ headers = {
1295
+ "content-type": "text/event-stream",
1296
+ "cache-control": "no-cache",
1297
+ "connection": "keep-alive",
1298
+ }
1299
+
1300
+ # Update context with realistic metrics
1301
+ cost_usd = self.mock_generator.calculate_realistic_cost(
1302
+ input_tokens,
1303
+ output_tokens,
1304
+ model or "claude-3-5-sonnet-20241022",
1305
+ cache_read_tokens,
1306
+ cache_write_tokens,
1307
+ )
1308
+
1309
+ ctx.add_metadata(
1310
+ status_code=200,
1311
+ tokens_input=input_tokens,
1312
+ tokens_output=output_tokens,
1313
+ cache_read_tokens=cache_read_tokens,
1314
+ cache_write_tokens=cache_write_tokens,
1315
+ cost_usd=cost_usd,
1316
+ )
1317
+
1318
+ return StreamingResponse(realistic_mock_stream_generator(), headers=headers)
1319
+
1320
+ async def _generate_realistic_openai_stream(
1321
+ self,
1322
+ request_id: str,
1323
+ model: str,
1324
+ content: str,
1325
+ input_tokens: int,
1326
+ output_tokens: int,
1327
+ ) -> list[dict[str, Any]]:
1328
+ """Generate realistic OpenAI streaming chunks by converting Anthropic format."""
1329
+
1330
+ # Generate Anthropic chunks first
1331
+ anthropic_chunks = self.mock_generator.generate_realistic_anthropic_stream(
1332
+ request_id, model, content, input_tokens, output_tokens, 0, 0
1333
+ )
1334
+
1335
+ # Convert to OpenAI format using the adapter
1336
+ openai_chunks = []
1337
+ for chunk in anthropic_chunks:
1338
+ # Use the OpenAI adapter to convert each chunk
1339
+ # This is a simplified conversion - in practice, you'd need a full streaming adapter
1340
+ if chunk.get("type") == "message_start":
1341
+ openai_chunks.append(
1342
+ {
1343
+ "id": f"chatcmpl-{request_id}",
1344
+ "object": "chat.completion.chunk",
1345
+ "created": int(time.time()),
1346
+ "model": model,
1347
+ "choices": [
1348
+ {
1349
+ "index": 0,
1350
+ "delta": {"role": "assistant", "content": ""},
1351
+ "finish_reason": None,
1352
+ }
1353
+ ],
1354
+ }
1355
+ )
1356
+ elif chunk.get("type") == "content_block_delta":
1357
+ delta_text = chunk.get("delta", {}).get("text", "")
1358
+ openai_chunks.append(
1359
+ {
1360
+ "id": f"chatcmpl-{request_id}",
1361
+ "object": "chat.completion.chunk",
1362
+ "created": int(time.time()),
1363
+ "model": model,
1364
+ "choices": [
1365
+ {
1366
+ "index": 0,
1367
+ "delta": {"content": delta_text},
1368
+ "finish_reason": None,
1369
+ }
1370
+ ],
1371
+ }
1372
+ )
1373
+ elif chunk.get("type") == "message_stop":
1374
+ openai_chunks.append(
1375
+ {
1376
+ "id": f"chatcmpl-{request_id}",
1377
+ "object": "chat.completion.chunk",
1378
+ "created": int(time.time()),
1379
+ "model": model,
1380
+ "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
1381
+ }
1382
+ )
1383
+
1384
+ return openai_chunks
1385
+
1386
+ async def handle_models_request(
1387
+ self,
1388
+ headers: dict[str, str],
1389
+ timeout: float = 240.0,
1390
+ ) -> tuple[int, dict[str, str], bytes]:
1391
+ """Handle a /v1/models request to list available models.
1392
+
1393
+ Since Anthropic API doesn't support /v1/models endpoint,
1394
+ returns a hardcoded list of Anthropic models and recent OpenAI models.
1395
+
1396
+ Args:
1397
+ headers: Request headers
1398
+ timeout: Request timeout in seconds
1399
+
1400
+ Returns:
1401
+ Tuple of (status_code, headers, body)
1402
+ """
1403
+ # Define hardcoded Anthropic models
1404
+ anthropic_models = [
1405
+ {
1406
+ "type": "model",
1407
+ "id": "claude-opus-4-20250514",
1408
+ "display_name": "Claude Opus 4",
1409
+ "created_at": 1747526400, # 2025-05-22
1410
+ },
1411
+ {
1412
+ "type": "model",
1413
+ "id": "claude-sonnet-4-20250514",
1414
+ "display_name": "Claude Sonnet 4",
1415
+ "created_at": 1747526400, # 2025-05-22
1416
+ },
1417
+ {
1418
+ "type": "model",
1419
+ "id": "claude-3-7-sonnet-20250219",
1420
+ "display_name": "Claude Sonnet 3.7",
1421
+ "created_at": 1740268800, # 2025-02-24
1422
+ },
1423
+ {
1424
+ "type": "model",
1425
+ "id": "claude-3-5-sonnet-20241022",
1426
+ "display_name": "Claude Sonnet 3.5 (New)",
1427
+ "created_at": 1729555200, # 2024-10-22
1428
+ },
1429
+ {
1430
+ "type": "model",
1431
+ "id": "claude-3-5-haiku-20241022",
1432
+ "display_name": "Claude Haiku 3.5",
1433
+ "created_at": 1729555200, # 2024-10-22
1434
+ },
1435
+ {
1436
+ "type": "model",
1437
+ "id": "claude-3-5-sonnet-20240620",
1438
+ "display_name": "Claude Sonnet 3.5 (Old)",
1439
+ "created_at": 1718841600, # 2024-06-20
1440
+ },
1441
+ {
1442
+ "type": "model",
1443
+ "id": "claude-3-haiku-20240307",
1444
+ "display_name": "Claude Haiku 3",
1445
+ "created_at": 1709769600, # 2024-03-07
1446
+ },
1447
+ {
1448
+ "type": "model",
1449
+ "id": "claude-3-opus-20240229",
1450
+ "display_name": "Claude Opus 3",
1451
+ "created_at": 1709164800, # 2024-02-29
1452
+ },
1453
+ ]
1454
+
1455
+ # Define recent OpenAI models to include (GPT-4 variants and O1 models)
1456
+ openai_models = [
1457
+ {
1458
+ "id": "gpt-4o",
1459
+ "object": "model",
1460
+ "created": 1715367049,
1461
+ "owned_by": "openai",
1462
+ },
1463
+ {
1464
+ "id": "gpt-4o-mini",
1465
+ "object": "model",
1466
+ "created": 1721172741,
1467
+ "owned_by": "openai",
1468
+ },
1469
+ {
1470
+ "id": "gpt-4-turbo",
1471
+ "object": "model",
1472
+ "created": 1712361441,
1473
+ "owned_by": "openai",
1474
+ },
1475
+ {
1476
+ "id": "gpt-4-turbo-preview",
1477
+ "object": "model",
1478
+ "created": 1706037777,
1479
+ "owned_by": "openai",
1480
+ },
1481
+ {
1482
+ "id": "o1",
1483
+ "object": "model",
1484
+ "created": 1734375816,
1485
+ "owned_by": "openai",
1486
+ },
1487
+ {
1488
+ "id": "o1-mini",
1489
+ "object": "model",
1490
+ "created": 1725649008,
1491
+ "owned_by": "openai",
1492
+ },
1493
+ {
1494
+ "id": "o1-preview",
1495
+ "object": "model",
1496
+ "created": 1725648897,
1497
+ "owned_by": "openai",
1498
+ },
1499
+ {
1500
+ "id": "o3",
1501
+ "object": "model",
1502
+ "created": 1744225308,
1503
+ "owned_by": "openai",
1504
+ },
1505
+ {
1506
+ "id": "o3-mini",
1507
+ "object": "model",
1508
+ "created": 1737146383,
1509
+ "owned_by": "openai",
1510
+ },
1511
+ ]
1512
+
1513
+ # Combine models - mixed format with both Anthropic and OpenAI fields
1514
+ combined_response = {
1515
+ "data": anthropic_models + openai_models,
1516
+ "has_more": False,
1517
+ "object": "list", # Add OpenAI-style field
1518
+ }
1519
+
1520
+ # Serialize response
1521
+ response_body = json.dumps(combined_response).encode("utf-8")
1522
+
1523
+ # Create response headers
1524
+ response_headers = {
1525
+ "content-type": "application/json",
1526
+ "content-length": str(len(response_body)),
1527
+ }
1528
+
1529
+ return 200, response_headers, response_body
1530
+
1531
+ async def close(self) -> None:
1532
+ """Close any resources held by the proxy service."""
1533
+ if self.proxy_client:
1534
+ await self.proxy_client.close()
1535
+ if self.credentials_manager:
1536
+ await self.credentials_manager.__aexit__(None, None, None)