ccproxy-api 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ccproxy/_version.py +2 -2
- ccproxy/adapters/codex/__init__.py +11 -0
- ccproxy/adapters/openai/adapter.py +1 -1
- ccproxy/adapters/openai/models.py +1 -1
- ccproxy/adapters/openai/response_adapter.py +355 -0
- ccproxy/adapters/openai/response_models.py +178 -0
- ccproxy/adapters/openai/streaming.py +1 -0
- ccproxy/api/app.py +150 -224
- ccproxy/api/dependencies.py +22 -2
- ccproxy/api/middleware/errors.py +27 -3
- ccproxy/api/middleware/logging.py +4 -0
- ccproxy/api/responses.py +6 -1
- ccproxy/api/routes/claude.py +222 -17
- ccproxy/api/routes/codex.py +1231 -0
- ccproxy/api/routes/health.py +228 -3
- ccproxy/api/routes/proxy.py +25 -6
- ccproxy/api/services/permission_service.py +2 -2
- ccproxy/auth/openai/__init__.py +13 -0
- ccproxy/auth/openai/credentials.py +166 -0
- ccproxy/auth/openai/oauth_client.py +334 -0
- ccproxy/auth/openai/storage.py +184 -0
- ccproxy/claude_sdk/__init__.py +4 -8
- ccproxy/claude_sdk/client.py +661 -131
- ccproxy/claude_sdk/exceptions.py +16 -0
- ccproxy/claude_sdk/manager.py +219 -0
- ccproxy/claude_sdk/message_queue.py +342 -0
- ccproxy/claude_sdk/options.py +6 -1
- ccproxy/claude_sdk/session_client.py +546 -0
- ccproxy/claude_sdk/session_pool.py +550 -0
- ccproxy/claude_sdk/stream_handle.py +538 -0
- ccproxy/claude_sdk/stream_worker.py +392 -0
- ccproxy/claude_sdk/streaming.py +53 -11
- ccproxy/cli/commands/auth.py +398 -1
- ccproxy/cli/commands/serve.py +99 -1
- ccproxy/cli/options/claude_options.py +47 -0
- ccproxy/config/__init__.py +0 -3
- ccproxy/config/claude.py +171 -23
- ccproxy/config/codex.py +100 -0
- ccproxy/config/discovery.py +10 -1
- ccproxy/config/scheduler.py +2 -2
- ccproxy/config/settings.py +38 -1
- ccproxy/core/codex_transformers.py +389 -0
- ccproxy/core/http_transformers.py +458 -75
- ccproxy/core/logging.py +108 -12
- ccproxy/core/transformers.py +5 -0
- ccproxy/models/claude_sdk.py +57 -0
- ccproxy/models/detection.py +208 -0
- ccproxy/models/requests.py +22 -0
- ccproxy/models/responses.py +16 -0
- ccproxy/observability/access_logger.py +72 -14
- ccproxy/observability/metrics.py +151 -0
- ccproxy/observability/storage/duckdb_simple.py +12 -0
- ccproxy/observability/storage/models.py +16 -0
- ccproxy/observability/streaming_response.py +107 -0
- ccproxy/scheduler/manager.py +31 -6
- ccproxy/scheduler/tasks.py +122 -0
- ccproxy/services/claude_detection_service.py +269 -0
- ccproxy/services/claude_sdk_service.py +333 -130
- ccproxy/services/codex_detection_service.py +263 -0
- ccproxy/services/proxy_service.py +618 -197
- ccproxy/utils/__init__.py +9 -1
- ccproxy/utils/disconnection_monitor.py +83 -0
- ccproxy/utils/id_generator.py +12 -0
- ccproxy/utils/model_mapping.py +7 -5
- ccproxy/utils/startup_helpers.py +470 -0
- ccproxy_api-0.1.6.dist-info/METADATA +615 -0
- {ccproxy_api-0.1.4.dist-info → ccproxy_api-0.1.6.dist-info}/RECORD +70 -47
- ccproxy/config/loader.py +0 -105
- ccproxy_api-0.1.4.dist-info/METADATA +0 -369
- {ccproxy_api-0.1.4.dist-info → ccproxy_api-0.1.6.dist-info}/WHEEL +0 -0
- {ccproxy_api-0.1.4.dist-info → ccproxy_api-0.1.6.dist-info}/entry_points.txt +0 -0
- {ccproxy_api-0.1.4.dist-info → ccproxy_api-0.1.6.dist-info}/licenses/LICENSE +0 -0
|
@@ -5,7 +5,6 @@ import json
|
|
|
5
5
|
import os
|
|
6
6
|
import random
|
|
7
7
|
import time
|
|
8
|
-
import urllib.parse
|
|
9
8
|
from collections.abc import AsyncGenerator
|
|
10
9
|
from pathlib import Path
|
|
11
10
|
from typing import TYPE_CHECKING, Any
|
|
@@ -14,9 +13,11 @@ import httpx
|
|
|
14
13
|
import structlog
|
|
15
14
|
from fastapi import HTTPException, Request
|
|
16
15
|
from fastapi.responses import StreamingResponse
|
|
16
|
+
from starlette.responses import Response
|
|
17
17
|
from typing_extensions import TypedDict
|
|
18
18
|
|
|
19
19
|
from ccproxy.config.settings import Settings
|
|
20
|
+
from ccproxy.core.codex_transformers import CodexRequestTransformer
|
|
20
21
|
from ccproxy.core.http import BaseProxyClient
|
|
21
22
|
from ccproxy.core.http_transformers import (
|
|
22
23
|
HTTPRequestTransformer,
|
|
@@ -29,6 +30,7 @@ from ccproxy.observability import (
|
|
|
29
30
|
timed_operation,
|
|
30
31
|
)
|
|
31
32
|
from ccproxy.observability.access_logger import log_request_access
|
|
33
|
+
from ccproxy.observability.streaming_response import StreamingResponseWithLogging
|
|
32
34
|
from ccproxy.services.credentials.manager import CredentialsManager
|
|
33
35
|
from ccproxy.testing import RealisticMockResponseGenerator
|
|
34
36
|
from ccproxy.utils.simple_request_logger import (
|
|
@@ -83,6 +85,7 @@ class ProxyService:
|
|
|
83
85
|
proxy_mode: str = "full",
|
|
84
86
|
target_base_url: str = "https://api.anthropic.com",
|
|
85
87
|
metrics: PrometheusMetrics | None = None,
|
|
88
|
+
app_state: Any = None,
|
|
86
89
|
) -> None:
|
|
87
90
|
"""Initialize the proxy service.
|
|
88
91
|
|
|
@@ -93,6 +96,7 @@ class ProxyService:
|
|
|
93
96
|
proxy_mode: Transformation mode - "minimal" or "full"
|
|
94
97
|
target_base_url: Base URL for the target API
|
|
95
98
|
metrics: Prometheus metrics collector (optional)
|
|
99
|
+
app_state: FastAPI app state for accessing detection data
|
|
96
100
|
"""
|
|
97
101
|
self.proxy_client = proxy_client
|
|
98
102
|
self.credentials_manager = credentials_manager
|
|
@@ -100,10 +104,12 @@ class ProxyService:
|
|
|
100
104
|
self.proxy_mode = proxy_mode
|
|
101
105
|
self.target_base_url = target_base_url.rstrip("/")
|
|
102
106
|
self.metrics = metrics or get_metrics()
|
|
107
|
+
self.app_state = app_state
|
|
103
108
|
|
|
104
109
|
# Create concrete transformers
|
|
105
110
|
self.request_transformer = HTTPRequestTransformer()
|
|
106
111
|
self.response_transformer = HTTPResponseTransformer()
|
|
112
|
+
self.codex_transformer = CodexRequestTransformer()
|
|
107
113
|
|
|
108
114
|
# Create OpenAI adapter for stream transformation
|
|
109
115
|
from ccproxy.adapters.openai.adapter import OpenAIAdapter
|
|
@@ -122,10 +128,6 @@ class ProxyService:
|
|
|
122
128
|
self._verbose_api = (
|
|
123
129
|
os.environ.get("CCPROXY_VERBOSE_API", "false").lower() == "true"
|
|
124
130
|
)
|
|
125
|
-
# Note: Request logging is now handled by simple_request_logger utility
|
|
126
|
-
# which checks CCPROXY_LOG_REQUESTS and CCPROXY_REQUEST_LOG_DIR independently
|
|
127
|
-
|
|
128
|
-
# Request context is now passed as parameters to methods
|
|
129
131
|
|
|
130
132
|
def _init_proxy_url(self) -> str | None:
|
|
131
133
|
"""Initialize proxy URL from environment variables."""
|
|
@@ -239,9 +241,25 @@ class ProxyService:
|
|
|
239
241
|
|
|
240
242
|
# 2. Request transformation
|
|
241
243
|
async with timed_operation("request_transform", ctx.request_id):
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
244
|
+
injection_mode = (
|
|
245
|
+
self.settings.claude.system_prompt_injection_mode.value
|
|
246
|
+
)
|
|
247
|
+
logger.debug(
|
|
248
|
+
"request_transform_start",
|
|
249
|
+
system_prompt_injection_mode=injection_mode,
|
|
250
|
+
)
|
|
251
|
+
transformed_request = (
|
|
252
|
+
await self.request_transformer.transform_proxy_request(
|
|
253
|
+
method,
|
|
254
|
+
path,
|
|
255
|
+
headers,
|
|
256
|
+
body,
|
|
257
|
+
query_params,
|
|
258
|
+
access_token,
|
|
259
|
+
self.target_base_url,
|
|
260
|
+
self.app_state,
|
|
261
|
+
injection_mode,
|
|
262
|
+
)
|
|
245
263
|
)
|
|
246
264
|
|
|
247
265
|
# 3. Check for bypass header to skip upstream forwarding
|
|
@@ -330,29 +348,25 @@ class ProxyService:
|
|
|
330
348
|
content_length=len(response_body) if response_body else 0,
|
|
331
349
|
)
|
|
332
350
|
|
|
333
|
-
#
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
openai_error
|
|
343
|
-
).encode("utf-8")
|
|
344
|
-
except (json.JSONDecodeError, UnicodeDecodeError):
|
|
345
|
-
# Keep original error if parsing fails
|
|
346
|
-
pass
|
|
347
|
-
|
|
348
|
-
transformed_response = ResponseData(
|
|
349
|
-
status_code=status_code,
|
|
350
|
-
headers=response_headers,
|
|
351
|
-
body=transformed_error_body,
|
|
351
|
+
# Use transformer to handle error transformation (including OpenAI format)
|
|
352
|
+
transformed_response = (
|
|
353
|
+
await self.response_transformer.transform_proxy_response(
|
|
354
|
+
status_code,
|
|
355
|
+
response_headers,
|
|
356
|
+
response_body,
|
|
357
|
+
path,
|
|
358
|
+
self.proxy_mode,
|
|
359
|
+
)
|
|
352
360
|
)
|
|
353
361
|
else:
|
|
354
|
-
transformed_response =
|
|
355
|
-
|
|
362
|
+
transformed_response = (
|
|
363
|
+
await self.response_transformer.transform_proxy_response(
|
|
364
|
+
status_code,
|
|
365
|
+
response_headers,
|
|
366
|
+
response_body,
|
|
367
|
+
path,
|
|
368
|
+
self.proxy_mode,
|
|
369
|
+
)
|
|
356
370
|
)
|
|
357
371
|
|
|
358
372
|
# 5. Extract response metrics using direct JSON parsing
|
|
@@ -393,14 +407,6 @@ class ProxyService:
|
|
|
393
407
|
cost_usd=cost_usd,
|
|
394
408
|
)
|
|
395
409
|
|
|
396
|
-
# 7. Log comprehensive access log (includes Prometheus metrics)
|
|
397
|
-
await log_request_access(
|
|
398
|
-
context=ctx,
|
|
399
|
-
status_code=status_code,
|
|
400
|
-
method=method,
|
|
401
|
-
metrics=self.metrics,
|
|
402
|
-
)
|
|
403
|
-
|
|
404
410
|
return (
|
|
405
411
|
transformed_response["status_code"],
|
|
406
412
|
transformed_response["headers"],
|
|
@@ -408,28 +414,386 @@ class ProxyService:
|
|
|
408
414
|
)
|
|
409
415
|
|
|
410
416
|
except Exception as e:
|
|
411
|
-
|
|
412
|
-
|
|
417
|
+
ctx.add_metadata(error=e)
|
|
418
|
+
raise
|
|
413
419
|
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
420
|
+
async def handle_codex_request(
|
|
421
|
+
self,
|
|
422
|
+
method: str,
|
|
423
|
+
path: str,
|
|
424
|
+
session_id: str,
|
|
425
|
+
access_token: str,
|
|
426
|
+
request: Request,
|
|
427
|
+
settings: Settings,
|
|
428
|
+
) -> StreamingResponse | Response:
|
|
429
|
+
"""Handle OpenAI Codex proxy request with request/response capture.
|
|
430
|
+
|
|
431
|
+
Args:
|
|
432
|
+
method: HTTP method
|
|
433
|
+
path: Request path (e.g., "/responses" or "/{session_id}/responses")
|
|
434
|
+
session_id: Resolved session ID
|
|
435
|
+
access_token: OpenAI access token
|
|
436
|
+
request: FastAPI request object
|
|
437
|
+
settings: Application settings
|
|
422
438
|
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
439
|
+
Returns:
|
|
440
|
+
StreamingResponse or regular Response
|
|
441
|
+
"""
|
|
442
|
+
try:
|
|
443
|
+
# Read request body - check if already stored by middleware
|
|
444
|
+
if hasattr(request.state, "body"):
|
|
445
|
+
body = request.state.body
|
|
446
|
+
else:
|
|
447
|
+
body = await request.body()
|
|
448
|
+
|
|
449
|
+
# Parse request data to capture the instructions field and other metadata
|
|
450
|
+
request_data = None
|
|
451
|
+
try:
|
|
452
|
+
request_data = json.loads(body.decode("utf-8")) if body else {}
|
|
453
|
+
except (json.JSONDecodeError, UnicodeDecodeError) as e:
|
|
454
|
+
request_data = {}
|
|
455
|
+
logger.warning(
|
|
456
|
+
"codex_json_decode_failed",
|
|
427
457
|
error=str(e),
|
|
428
|
-
|
|
458
|
+
body_preview=body[:100].decode("utf-8", errors="replace")
|
|
459
|
+
if body
|
|
460
|
+
else None,
|
|
461
|
+
body_length=len(body) if body else 0,
|
|
429
462
|
)
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
463
|
+
|
|
464
|
+
# Parse request to extract account_id from token if available
|
|
465
|
+
import jwt
|
|
466
|
+
|
|
467
|
+
account_id = "unknown"
|
|
468
|
+
try:
|
|
469
|
+
decoded = jwt.decode(access_token, options={"verify_signature": False})
|
|
470
|
+
account_id = decoded.get(
|
|
471
|
+
"org_id", decoded.get("sub", decoded.get("account_id", "unknown"))
|
|
472
|
+
)
|
|
473
|
+
except Exception:
|
|
474
|
+
pass
|
|
475
|
+
|
|
476
|
+
# Get Codex detection data from app state
|
|
477
|
+
codex_detection_data = None
|
|
478
|
+
if self.app_state and hasattr(self.app_state, "codex_detection_data"):
|
|
479
|
+
codex_detection_data = self.app_state.codex_detection_data
|
|
480
|
+
|
|
481
|
+
# Use CodexRequestTransformer to build request
|
|
482
|
+
original_headers = dict(request.headers)
|
|
483
|
+
transformed_request = await self.codex_transformer.transform_codex_request(
|
|
484
|
+
method=method,
|
|
485
|
+
path=path,
|
|
486
|
+
headers=original_headers,
|
|
487
|
+
body=body,
|
|
488
|
+
access_token=access_token,
|
|
489
|
+
session_id=session_id,
|
|
490
|
+
account_id=account_id,
|
|
491
|
+
codex_detection_data=codex_detection_data,
|
|
492
|
+
target_base_url=settings.codex.base_url,
|
|
493
|
+
)
|
|
494
|
+
|
|
495
|
+
target_url = transformed_request["url"]
|
|
496
|
+
headers = transformed_request["headers"]
|
|
497
|
+
transformed_body = transformed_request["body"] or body
|
|
498
|
+
|
|
499
|
+
# Parse transformed body for logging
|
|
500
|
+
transformed_request_data = request_data
|
|
501
|
+
if transformed_body and transformed_body != body:
|
|
502
|
+
try:
|
|
503
|
+
transformed_request_data = json.loads(
|
|
504
|
+
transformed_body.decode("utf-8")
|
|
505
|
+
)
|
|
506
|
+
except (json.JSONDecodeError, UnicodeDecodeError):
|
|
507
|
+
transformed_request_data = request_data
|
|
508
|
+
|
|
509
|
+
# Generate request ID for logging
|
|
510
|
+
from uuid import uuid4
|
|
511
|
+
|
|
512
|
+
request_id = f"codex_{uuid4().hex[:8]}"
|
|
513
|
+
|
|
514
|
+
# Log Codex request (including instructions field and headers)
|
|
515
|
+
await self._log_codex_request(
|
|
516
|
+
request_id=request_id,
|
|
517
|
+
method=method,
|
|
518
|
+
url=target_url,
|
|
519
|
+
headers=headers,
|
|
520
|
+
body_data=transformed_request_data,
|
|
521
|
+
session_id=session_id,
|
|
522
|
+
)
|
|
523
|
+
|
|
524
|
+
# Check if user explicitly requested streaming (from original request)
|
|
525
|
+
user_requested_streaming = self.codex_transformer._is_streaming_request(
|
|
526
|
+
body
|
|
527
|
+
)
|
|
528
|
+
|
|
529
|
+
# Forward request to ChatGPT backend
|
|
530
|
+
if user_requested_streaming:
|
|
531
|
+
# Handle streaming request with proper context management
|
|
532
|
+
# First, collect the response to check for errors
|
|
533
|
+
collected_chunks = []
|
|
534
|
+
chunk_count = 0
|
|
535
|
+
total_bytes = 0
|
|
536
|
+
response_status_code = 200
|
|
537
|
+
response_headers = {}
|
|
538
|
+
|
|
539
|
+
async def stream_codex_response() -> AsyncGenerator[bytes, None]:
|
|
540
|
+
nonlocal \
|
|
541
|
+
collected_chunks, \
|
|
542
|
+
chunk_count, \
|
|
543
|
+
total_bytes, \
|
|
544
|
+
response_status_code, \
|
|
545
|
+
response_headers
|
|
546
|
+
|
|
547
|
+
logger.debug(
|
|
548
|
+
"proxy_service_streaming_started",
|
|
549
|
+
request_id=request_id,
|
|
550
|
+
session_id=session_id,
|
|
551
|
+
)
|
|
552
|
+
|
|
553
|
+
async with (
|
|
554
|
+
httpx.AsyncClient(timeout=240.0) as client,
|
|
555
|
+
client.stream(
|
|
556
|
+
method=method,
|
|
557
|
+
url=target_url,
|
|
558
|
+
headers=headers,
|
|
559
|
+
content=transformed_body,
|
|
560
|
+
) as response,
|
|
561
|
+
):
|
|
562
|
+
# Capture response info for error checking
|
|
563
|
+
response_status_code = response.status_code
|
|
564
|
+
response_headers = dict(response.headers)
|
|
565
|
+
|
|
566
|
+
# Log response headers for streaming
|
|
567
|
+
await self._log_codex_response_headers(
|
|
568
|
+
request_id=request_id,
|
|
569
|
+
status_code=response.status_code,
|
|
570
|
+
headers=dict(response.headers),
|
|
571
|
+
stream_type="codex_sse",
|
|
572
|
+
)
|
|
573
|
+
|
|
574
|
+
# Check if upstream actually returned streaming
|
|
575
|
+
content_type = response.headers.get("content-type", "")
|
|
576
|
+
is_streaming = "text/event-stream" in content_type
|
|
577
|
+
|
|
578
|
+
if not is_streaming:
|
|
579
|
+
logger.warning(
|
|
580
|
+
"codex_expected_streaming_but_got_regular",
|
|
581
|
+
content_type=content_type,
|
|
582
|
+
status_code=response.status_code,
|
|
583
|
+
)
|
|
584
|
+
|
|
585
|
+
async for chunk in response.aiter_bytes():
|
|
586
|
+
chunk_count += 1
|
|
587
|
+
chunk_size = len(chunk)
|
|
588
|
+
total_bytes += chunk_size
|
|
589
|
+
collected_chunks.append(chunk)
|
|
590
|
+
|
|
591
|
+
logger.debug(
|
|
592
|
+
"proxy_service_streaming_chunk",
|
|
593
|
+
request_id=request_id,
|
|
594
|
+
chunk_number=chunk_count,
|
|
595
|
+
chunk_size=chunk_size,
|
|
596
|
+
total_bytes=total_bytes,
|
|
597
|
+
)
|
|
598
|
+
|
|
599
|
+
yield chunk
|
|
600
|
+
|
|
601
|
+
logger.debug(
|
|
602
|
+
"proxy_service_streaming_complete",
|
|
603
|
+
request_id=request_id,
|
|
604
|
+
total_chunks=chunk_count,
|
|
605
|
+
total_bytes=total_bytes,
|
|
606
|
+
)
|
|
607
|
+
|
|
608
|
+
# Log the complete stream data after streaming finishes
|
|
609
|
+
await self._log_codex_streaming_complete(
|
|
610
|
+
request_id=request_id,
|
|
611
|
+
chunks=collected_chunks,
|
|
612
|
+
)
|
|
613
|
+
|
|
614
|
+
# Execute the stream generator to collect the response
|
|
615
|
+
generator_chunks = []
|
|
616
|
+
async for chunk in stream_codex_response():
|
|
617
|
+
generator_chunks.append(chunk)
|
|
618
|
+
|
|
619
|
+
# Now check if this should be an error response
|
|
620
|
+
content_type = response_headers.get("content-type", "")
|
|
621
|
+
if (
|
|
622
|
+
response_status_code >= 400
|
|
623
|
+
and "text/event-stream" not in content_type
|
|
624
|
+
):
|
|
625
|
+
# Return error as regular Response with proper status code
|
|
626
|
+
error_content = b"".join(collected_chunks)
|
|
627
|
+
logger.warning(
|
|
628
|
+
"codex_returning_error_as_regular_response",
|
|
629
|
+
status_code=response_status_code,
|
|
630
|
+
content_type=content_type,
|
|
631
|
+
content_preview=error_content[:200].decode(
|
|
632
|
+
"utf-8", errors="replace"
|
|
633
|
+
),
|
|
634
|
+
)
|
|
635
|
+
return Response(
|
|
636
|
+
content=error_content,
|
|
637
|
+
status_code=response_status_code,
|
|
638
|
+
headers=response_headers,
|
|
639
|
+
)
|
|
640
|
+
|
|
641
|
+
# Return normal streaming response
|
|
642
|
+
async def replay_stream() -> AsyncGenerator[bytes, None]:
|
|
643
|
+
for chunk in generator_chunks:
|
|
644
|
+
yield chunk
|
|
645
|
+
|
|
646
|
+
# Forward upstream headers but filter out incompatible ones for streaming
|
|
647
|
+
streaming_headers = dict(response_headers)
|
|
648
|
+
# Remove headers that conflict with streaming responses
|
|
649
|
+
streaming_headers.pop("content-length", None)
|
|
650
|
+
streaming_headers.pop("content-encoding", None)
|
|
651
|
+
streaming_headers.pop("date", None)
|
|
652
|
+
# Set streaming-specific headers
|
|
653
|
+
streaming_headers.update(
|
|
654
|
+
{
|
|
655
|
+
"content-type": "text/event-stream",
|
|
656
|
+
"cache-control": "no-cache",
|
|
657
|
+
"connection": "keep-alive",
|
|
658
|
+
}
|
|
659
|
+
)
|
|
660
|
+
|
|
661
|
+
return StreamingResponse(
|
|
662
|
+
replay_stream(),
|
|
663
|
+
media_type="text/event-stream",
|
|
664
|
+
headers=streaming_headers,
|
|
665
|
+
)
|
|
666
|
+
else:
|
|
667
|
+
# Handle non-streaming request
|
|
668
|
+
async with httpx.AsyncClient(timeout=240.0) as client:
|
|
669
|
+
response = await client.request(
|
|
670
|
+
method=method,
|
|
671
|
+
url=target_url,
|
|
672
|
+
headers=headers,
|
|
673
|
+
content=transformed_body,
|
|
674
|
+
)
|
|
675
|
+
|
|
676
|
+
# Check if upstream response is streaming (shouldn't happen)
|
|
677
|
+
content_type = response.headers.get("content-type", "")
|
|
678
|
+
transfer_encoding = response.headers.get("transfer-encoding", "")
|
|
679
|
+
upstream_is_streaming = "text/event-stream" in content_type or (
|
|
680
|
+
transfer_encoding == "chunked" and content_type == ""
|
|
681
|
+
)
|
|
682
|
+
|
|
683
|
+
logger.debug(
|
|
684
|
+
"codex_response_non_streaming",
|
|
685
|
+
content_type=content_type,
|
|
686
|
+
user_requested_streaming=user_requested_streaming,
|
|
687
|
+
upstream_is_streaming=upstream_is_streaming,
|
|
688
|
+
transfer_encoding=transfer_encoding,
|
|
689
|
+
)
|
|
690
|
+
|
|
691
|
+
if upstream_is_streaming:
|
|
692
|
+
# Upstream is streaming but user didn't request streaming
|
|
693
|
+
# Collect all streaming data and return as JSON
|
|
694
|
+
logger.debug(
|
|
695
|
+
"converting_upstream_stream_to_json", request_id=request_id
|
|
696
|
+
)
|
|
697
|
+
|
|
698
|
+
collected_chunks = []
|
|
699
|
+
async for chunk in response.aiter_bytes():
|
|
700
|
+
collected_chunks.append(chunk)
|
|
701
|
+
|
|
702
|
+
# Combine all chunks
|
|
703
|
+
full_content = b"".join(collected_chunks)
|
|
704
|
+
|
|
705
|
+
# Try to parse the streaming data and extract the final response
|
|
706
|
+
try:
|
|
707
|
+
# Parse SSE data to extract JSON response
|
|
708
|
+
content_str = full_content.decode("utf-8")
|
|
709
|
+
lines = content_str.strip().split("\n")
|
|
710
|
+
|
|
711
|
+
# Look for the last data line with JSON content
|
|
712
|
+
final_json = None
|
|
713
|
+
for line in reversed(lines):
|
|
714
|
+
if line.startswith("data: ") and not line.endswith(
|
|
715
|
+
"[DONE]"
|
|
716
|
+
):
|
|
717
|
+
try:
|
|
718
|
+
json_str = line[6:] # Remove "data: " prefix
|
|
719
|
+
final_json = json.loads(json_str)
|
|
720
|
+
break
|
|
721
|
+
except json.JSONDecodeError:
|
|
722
|
+
continue
|
|
723
|
+
|
|
724
|
+
if final_json:
|
|
725
|
+
response_content = json.dumps(final_json).encode(
|
|
726
|
+
"utf-8"
|
|
727
|
+
)
|
|
728
|
+
else:
|
|
729
|
+
# Fallback: return the raw content
|
|
730
|
+
response_content = full_content
|
|
731
|
+
|
|
732
|
+
except (UnicodeDecodeError, json.JSONDecodeError):
|
|
733
|
+
# Fallback: return raw content
|
|
734
|
+
response_content = full_content
|
|
735
|
+
|
|
736
|
+
# Log the complete response
|
|
737
|
+
try:
|
|
738
|
+
response_data = json.loads(response_content.decode("utf-8"))
|
|
739
|
+
except (json.JSONDecodeError, UnicodeDecodeError):
|
|
740
|
+
response_data = {
|
|
741
|
+
"raw_content": response_content.decode(
|
|
742
|
+
"utf-8", errors="replace"
|
|
743
|
+
)
|
|
744
|
+
}
|
|
745
|
+
|
|
746
|
+
await self._log_codex_response(
|
|
747
|
+
request_id=request_id,
|
|
748
|
+
status_code=response.status_code,
|
|
749
|
+
headers=dict(response.headers),
|
|
750
|
+
body_data=response_data,
|
|
751
|
+
)
|
|
752
|
+
|
|
753
|
+
# Return as JSON response
|
|
754
|
+
return Response(
|
|
755
|
+
content=response_content,
|
|
756
|
+
status_code=response.status_code,
|
|
757
|
+
headers={
|
|
758
|
+
"content-type": "application/json",
|
|
759
|
+
"content-length": str(len(response_content)),
|
|
760
|
+
},
|
|
761
|
+
media_type="application/json",
|
|
762
|
+
)
|
|
763
|
+
else:
|
|
764
|
+
# For regular non-streaming responses
|
|
765
|
+
response_data = None
|
|
766
|
+
try:
|
|
767
|
+
response_data = (
|
|
768
|
+
json.loads(response.content.decode("utf-8"))
|
|
769
|
+
if response.content
|
|
770
|
+
else {}
|
|
771
|
+
)
|
|
772
|
+
except (json.JSONDecodeError, UnicodeDecodeError):
|
|
773
|
+
response_data = {
|
|
774
|
+
"raw_content": response.content.decode(
|
|
775
|
+
"utf-8", errors="replace"
|
|
776
|
+
)
|
|
777
|
+
}
|
|
778
|
+
|
|
779
|
+
await self._log_codex_response(
|
|
780
|
+
request_id=request_id,
|
|
781
|
+
status_code=response.status_code,
|
|
782
|
+
headers=dict(response.headers),
|
|
783
|
+
body_data=response_data,
|
|
784
|
+
)
|
|
785
|
+
|
|
786
|
+
# Return regular response
|
|
787
|
+
return Response(
|
|
788
|
+
content=response.content,
|
|
789
|
+
status_code=response.status_code,
|
|
790
|
+
headers=dict(response.headers),
|
|
791
|
+
media_type=response.headers.get("content-type"),
|
|
792
|
+
)
|
|
793
|
+
|
|
794
|
+
except Exception as e:
|
|
795
|
+
logger.error("Codex request failed", error=str(e), session_id=session_id)
|
|
796
|
+
raise
|
|
433
797
|
|
|
434
798
|
async def _get_access_token(self) -> str:
|
|
435
799
|
"""Get access token for upstream authentication.
|
|
@@ -491,120 +855,6 @@ class ProxyService:
|
|
|
491
855
|
detail="Authentication failed",
|
|
492
856
|
) from e
|
|
493
857
|
|
|
494
|
-
async def _transform_request(
|
|
495
|
-
self,
|
|
496
|
-
method: str,
|
|
497
|
-
path: str,
|
|
498
|
-
headers: dict[str, str],
|
|
499
|
-
body: bytes | None,
|
|
500
|
-
query_params: dict[str, str | list[str]] | None,
|
|
501
|
-
access_token: str,
|
|
502
|
-
) -> RequestData:
|
|
503
|
-
"""Transform request using the transformer pipeline.
|
|
504
|
-
|
|
505
|
-
Args:
|
|
506
|
-
method: HTTP method
|
|
507
|
-
path: Request path
|
|
508
|
-
headers: Request headers
|
|
509
|
-
body: Request body
|
|
510
|
-
query_params: Query parameters
|
|
511
|
-
access_token: OAuth access token
|
|
512
|
-
|
|
513
|
-
Returns:
|
|
514
|
-
Transformed request data
|
|
515
|
-
"""
|
|
516
|
-
# Transform path
|
|
517
|
-
transformed_path = self.request_transformer.transform_path(
|
|
518
|
-
path, self.proxy_mode
|
|
519
|
-
)
|
|
520
|
-
target_url = f"{self.target_base_url}{transformed_path}"
|
|
521
|
-
|
|
522
|
-
# Add beta=true query parameter for /v1/messages requests if not already present
|
|
523
|
-
if transformed_path == "/v1/messages":
|
|
524
|
-
if query_params is None:
|
|
525
|
-
query_params = {}
|
|
526
|
-
elif "beta" not in query_params:
|
|
527
|
-
query_params = dict(query_params) # Make a copy
|
|
528
|
-
|
|
529
|
-
if "beta" not in query_params:
|
|
530
|
-
query_params["beta"] = "true"
|
|
531
|
-
logger.debug("beta_parameter_added")
|
|
532
|
-
|
|
533
|
-
# Transform body first (as it might change size)
|
|
534
|
-
proxy_body = None
|
|
535
|
-
if body:
|
|
536
|
-
proxy_body = self.request_transformer.transform_request_body(
|
|
537
|
-
body, path, self.proxy_mode
|
|
538
|
-
)
|
|
539
|
-
|
|
540
|
-
# Transform headers (and update Content-Length if body changed)
|
|
541
|
-
proxy_headers = self.request_transformer.create_proxy_headers(
|
|
542
|
-
headers, access_token, self.proxy_mode
|
|
543
|
-
)
|
|
544
|
-
|
|
545
|
-
# Update Content-Length if body was transformed and size changed
|
|
546
|
-
if proxy_body and body and len(proxy_body) != len(body):
|
|
547
|
-
# Remove any existing content-length headers (case-insensitive)
|
|
548
|
-
proxy_headers = {
|
|
549
|
-
k: v for k, v in proxy_headers.items() if k.lower() != "content-length"
|
|
550
|
-
}
|
|
551
|
-
proxy_headers["Content-Length"] = str(len(proxy_body))
|
|
552
|
-
elif proxy_body and not body:
|
|
553
|
-
# New body was created where none existed
|
|
554
|
-
proxy_headers["Content-Length"] = str(len(proxy_body))
|
|
555
|
-
|
|
556
|
-
# Add query parameters to URL if present
|
|
557
|
-
if query_params:
|
|
558
|
-
query_string = urllib.parse.urlencode(query_params)
|
|
559
|
-
target_url = f"{target_url}?{query_string}"
|
|
560
|
-
|
|
561
|
-
return {
|
|
562
|
-
"method": method,
|
|
563
|
-
"url": target_url,
|
|
564
|
-
"headers": proxy_headers,
|
|
565
|
-
"body": proxy_body,
|
|
566
|
-
}
|
|
567
|
-
|
|
568
|
-
async def _transform_response(
|
|
569
|
-
self,
|
|
570
|
-
status_code: int,
|
|
571
|
-
headers: dict[str, str],
|
|
572
|
-
body: bytes,
|
|
573
|
-
original_path: str,
|
|
574
|
-
) -> ResponseData:
|
|
575
|
-
"""Transform response using the transformer pipeline.
|
|
576
|
-
|
|
577
|
-
Args:
|
|
578
|
-
status_code: HTTP status code
|
|
579
|
-
headers: Response headers
|
|
580
|
-
body: Response body
|
|
581
|
-
original_path: Original request path for context
|
|
582
|
-
|
|
583
|
-
Returns:
|
|
584
|
-
Transformed response data
|
|
585
|
-
"""
|
|
586
|
-
# For error responses, pass through without transformation
|
|
587
|
-
if status_code >= 400:
|
|
588
|
-
return {
|
|
589
|
-
"status_code": status_code,
|
|
590
|
-
"headers": headers,
|
|
591
|
-
"body": body,
|
|
592
|
-
}
|
|
593
|
-
|
|
594
|
-
transformed_body = self.response_transformer.transform_response_body(
|
|
595
|
-
body, original_path, self.proxy_mode
|
|
596
|
-
)
|
|
597
|
-
|
|
598
|
-
transformed_headers = self.response_transformer.transform_response_headers(
|
|
599
|
-
headers, original_path, len(transformed_body), self.proxy_mode
|
|
600
|
-
)
|
|
601
|
-
|
|
602
|
-
return {
|
|
603
|
-
"status_code": status_code,
|
|
604
|
-
"headers": transformed_headers,
|
|
605
|
-
"body": transformed_body,
|
|
606
|
-
}
|
|
607
|
-
|
|
608
858
|
def _redact_headers(self, headers: dict[str, str]) -> dict[str, str]:
|
|
609
859
|
"""Redact sensitive information from headers for safe logging."""
|
|
610
860
|
return {
|
|
@@ -715,6 +965,155 @@ class ProxyService:
|
|
|
715
965
|
timestamp=timestamp,
|
|
716
966
|
)
|
|
717
967
|
|
|
968
|
+
async def _log_codex_request(
|
|
969
|
+
self,
|
|
970
|
+
request_id: str,
|
|
971
|
+
method: str,
|
|
972
|
+
url: str,
|
|
973
|
+
headers: dict[str, str],
|
|
974
|
+
body_data: dict[str, Any] | None,
|
|
975
|
+
session_id: str,
|
|
976
|
+
) -> None:
|
|
977
|
+
"""Log outgoing Codex request preserving instructions field exactly."""
|
|
978
|
+
if not self._verbose_api:
|
|
979
|
+
return
|
|
980
|
+
|
|
981
|
+
# Log to console with redacted headers
|
|
982
|
+
logger.info(
|
|
983
|
+
"verbose_codex_request",
|
|
984
|
+
request_id=request_id,
|
|
985
|
+
method=method,
|
|
986
|
+
url=url,
|
|
987
|
+
headers=self._redact_headers(headers),
|
|
988
|
+
session_id=session_id,
|
|
989
|
+
instructions_preview=(
|
|
990
|
+
body_data.get("instructions", "")[:100] + "..."
|
|
991
|
+
if body_data and body_data.get("instructions")
|
|
992
|
+
else None
|
|
993
|
+
),
|
|
994
|
+
)
|
|
995
|
+
|
|
996
|
+
# Save complete request to file (without redaction)
|
|
997
|
+
timestamp = time.strftime("%Y%m%d_%H%M%S")
|
|
998
|
+
await write_request_log(
|
|
999
|
+
request_id=request_id,
|
|
1000
|
+
log_type="codex_request",
|
|
1001
|
+
data={
|
|
1002
|
+
"method": method,
|
|
1003
|
+
"url": url,
|
|
1004
|
+
"headers": dict(headers),
|
|
1005
|
+
"body": body_data,
|
|
1006
|
+
"session_id": session_id,
|
|
1007
|
+
},
|
|
1008
|
+
timestamp=timestamp,
|
|
1009
|
+
)
|
|
1010
|
+
|
|
1011
|
+
async def _log_codex_response(
|
|
1012
|
+
self,
|
|
1013
|
+
request_id: str,
|
|
1014
|
+
status_code: int,
|
|
1015
|
+
headers: dict[str, str],
|
|
1016
|
+
body_data: dict[str, Any] | None,
|
|
1017
|
+
) -> None:
|
|
1018
|
+
"""Log complete non-streaming Codex response."""
|
|
1019
|
+
if not self._verbose_api:
|
|
1020
|
+
return
|
|
1021
|
+
|
|
1022
|
+
# Log to console with redacted headers
|
|
1023
|
+
logger.info(
|
|
1024
|
+
"verbose_codex_response",
|
|
1025
|
+
request_id=request_id,
|
|
1026
|
+
status_code=status_code,
|
|
1027
|
+
headers=self._redact_headers(headers),
|
|
1028
|
+
response_type="non_streaming",
|
|
1029
|
+
)
|
|
1030
|
+
|
|
1031
|
+
# Save complete response to file
|
|
1032
|
+
timestamp = time.strftime("%Y%m%d_%H%M%S")
|
|
1033
|
+
await write_request_log(
|
|
1034
|
+
request_id=request_id,
|
|
1035
|
+
log_type="codex_response",
|
|
1036
|
+
data={
|
|
1037
|
+
"status_code": status_code,
|
|
1038
|
+
"headers": dict(headers),
|
|
1039
|
+
"body": body_data,
|
|
1040
|
+
},
|
|
1041
|
+
timestamp=timestamp,
|
|
1042
|
+
)
|
|
1043
|
+
|
|
1044
|
+
async def _log_codex_response_headers(
|
|
1045
|
+
self,
|
|
1046
|
+
request_id: str,
|
|
1047
|
+
status_code: int,
|
|
1048
|
+
headers: dict[str, str],
|
|
1049
|
+
stream_type: str,
|
|
1050
|
+
) -> None:
|
|
1051
|
+
"""Log streaming Codex response headers."""
|
|
1052
|
+
if not self._verbose_api:
|
|
1053
|
+
return
|
|
1054
|
+
|
|
1055
|
+
# Log to console with redacted headers
|
|
1056
|
+
logger.info(
|
|
1057
|
+
"verbose_codex_response_headers",
|
|
1058
|
+
request_id=request_id,
|
|
1059
|
+
status_code=status_code,
|
|
1060
|
+
headers=self._redact_headers(headers),
|
|
1061
|
+
stream_type=stream_type,
|
|
1062
|
+
)
|
|
1063
|
+
|
|
1064
|
+
# Save response headers to file
|
|
1065
|
+
timestamp = time.strftime("%Y%m%d_%H%M%S")
|
|
1066
|
+
await write_request_log(
|
|
1067
|
+
request_id=request_id,
|
|
1068
|
+
log_type="codex_response_headers",
|
|
1069
|
+
data={
|
|
1070
|
+
"status_code": status_code,
|
|
1071
|
+
"headers": dict(headers),
|
|
1072
|
+
"stream_type": stream_type,
|
|
1073
|
+
},
|
|
1074
|
+
timestamp=timestamp,
|
|
1075
|
+
)
|
|
1076
|
+
|
|
1077
|
+
async def _log_codex_streaming_complete(
|
|
1078
|
+
self,
|
|
1079
|
+
request_id: str,
|
|
1080
|
+
chunks: list[bytes],
|
|
1081
|
+
) -> None:
|
|
1082
|
+
"""Log complete streaming data after stream finishes."""
|
|
1083
|
+
if not self._verbose_api:
|
|
1084
|
+
return
|
|
1085
|
+
|
|
1086
|
+
# Combine chunks and decode for analysis
|
|
1087
|
+
complete_data = b"".join(chunks)
|
|
1088
|
+
try:
|
|
1089
|
+
decoded_data = complete_data.decode("utf-8", errors="replace")
|
|
1090
|
+
except Exception:
|
|
1091
|
+
decoded_data = f"<binary data of length {len(complete_data)}>"
|
|
1092
|
+
|
|
1093
|
+
# Log to console with preview
|
|
1094
|
+
logger.info(
|
|
1095
|
+
"verbose_codex_streaming_complete",
|
|
1096
|
+
request_id=request_id,
|
|
1097
|
+
total_bytes=len(complete_data),
|
|
1098
|
+
chunk_count=len(chunks),
|
|
1099
|
+
data_preview=decoded_data[:200] + "..."
|
|
1100
|
+
if len(decoded_data) > 200
|
|
1101
|
+
else decoded_data,
|
|
1102
|
+
)
|
|
1103
|
+
|
|
1104
|
+
# Save complete streaming data to file
|
|
1105
|
+
timestamp = time.strftime("%Y%m%d_%H%M%S")
|
|
1106
|
+
await write_request_log(
|
|
1107
|
+
request_id=request_id,
|
|
1108
|
+
log_type="codex_streaming_complete",
|
|
1109
|
+
data={
|
|
1110
|
+
"total_bytes": len(complete_data),
|
|
1111
|
+
"chunk_count": len(chunks),
|
|
1112
|
+
"complete_data": decoded_data,
|
|
1113
|
+
},
|
|
1114
|
+
timestamp=timestamp,
|
|
1115
|
+
)
|
|
1116
|
+
|
|
718
1117
|
def _should_stream_response(self, headers: dict[str, str]) -> bool:
|
|
719
1118
|
"""Check if response should be streamed based on request headers.
|
|
720
1119
|
|
|
@@ -810,18 +1209,17 @@ class ProxyService:
|
|
|
810
1209
|
error_detail=error_content.decode("utf-8", errors="replace"),
|
|
811
1210
|
)
|
|
812
1211
|
|
|
813
|
-
#
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
pass
|
|
1212
|
+
# Use transformer to handle error transformation (including OpenAI format)
|
|
1213
|
+
transformed_error_response = (
|
|
1214
|
+
await self.response_transformer.transform_proxy_response(
|
|
1215
|
+
response.status_code,
|
|
1216
|
+
dict(response.headers),
|
|
1217
|
+
error_content,
|
|
1218
|
+
original_path,
|
|
1219
|
+
self.proxy_mode,
|
|
1220
|
+
)
|
|
1221
|
+
)
|
|
1222
|
+
transformed_error_body = transformed_error_response["body"]
|
|
825
1223
|
|
|
826
1224
|
# Update context with error status
|
|
827
1225
|
ctx.add_metadata(status_code=response.status_code)
|
|
@@ -844,10 +1242,32 @@ class ProxyService:
|
|
|
844
1242
|
)
|
|
845
1243
|
|
|
846
1244
|
# If no error, proceed with streaming
|
|
847
|
-
#
|
|
1245
|
+
# Make initial request to get headers
|
|
1246
|
+
proxy_url = self._proxy_url
|
|
1247
|
+
verify = self._ssl_context
|
|
1248
|
+
|
|
848
1249
|
response_headers = {}
|
|
849
1250
|
response_status = 200
|
|
850
1251
|
|
|
1252
|
+
async with httpx.AsyncClient(
|
|
1253
|
+
timeout=timeout, proxy=proxy_url, verify=verify
|
|
1254
|
+
) as client:
|
|
1255
|
+
# Make initial request to capture headers
|
|
1256
|
+
initial_response = await client.send(
|
|
1257
|
+
client.build_request(
|
|
1258
|
+
method=request_data["method"],
|
|
1259
|
+
url=request_data["url"],
|
|
1260
|
+
headers=request_data["headers"],
|
|
1261
|
+
content=request_data["body"],
|
|
1262
|
+
),
|
|
1263
|
+
stream=True,
|
|
1264
|
+
)
|
|
1265
|
+
response_status = initial_response.status_code
|
|
1266
|
+
response_headers = dict(initial_response.headers)
|
|
1267
|
+
|
|
1268
|
+
# Close the initial response since we'll make a new one in the generator
|
|
1269
|
+
await initial_response.aclose()
|
|
1270
|
+
|
|
851
1271
|
# Initialize streaming metrics collector
|
|
852
1272
|
from ccproxy.utils.streaming_metrics import StreamingMetricsCollector
|
|
853
1273
|
|
|
@@ -1006,18 +1426,7 @@ class ProxyService:
|
|
|
1006
1426
|
cost_usd=cost_usd,
|
|
1007
1427
|
)
|
|
1008
1428
|
|
|
1009
|
-
#
|
|
1010
|
-
from ccproxy.observability.access_logger import (
|
|
1011
|
-
log_request_access,
|
|
1012
|
-
)
|
|
1013
|
-
|
|
1014
|
-
await log_request_access(
|
|
1015
|
-
context=ctx,
|
|
1016
|
-
status_code=response_status,
|
|
1017
|
-
metrics=self.metrics,
|
|
1018
|
-
# Additional metadata for streaming completion
|
|
1019
|
-
event_type="streaming_complete",
|
|
1020
|
-
)
|
|
1429
|
+
# Access logging is now handled by StreamingResponseWithLogging
|
|
1021
1430
|
|
|
1022
1431
|
if (
|
|
1023
1432
|
"content_block_delta" in chunk_str
|
|
@@ -1063,6 +1472,11 @@ class ProxyService:
|
|
|
1063
1472
|
# Always use upstream headers as base
|
|
1064
1473
|
final_headers = response_headers.copy()
|
|
1065
1474
|
|
|
1475
|
+
# Remove headers that can cause conflicts
|
|
1476
|
+
final_headers.pop(
|
|
1477
|
+
"date", None
|
|
1478
|
+
) # Remove upstream date header to avoid conflicts
|
|
1479
|
+
|
|
1066
1480
|
# Ensure critical headers for streaming
|
|
1067
1481
|
final_headers["Cache-Control"] = "no-cache"
|
|
1068
1482
|
final_headers["Connection"] = "keep-alive"
|
|
@@ -1071,8 +1485,10 @@ class ProxyService:
|
|
|
1071
1485
|
if "content-type" not in final_headers:
|
|
1072
1486
|
final_headers["content-type"] = "text/event-stream"
|
|
1073
1487
|
|
|
1074
|
-
return
|
|
1075
|
-
stream_generator(),
|
|
1488
|
+
return StreamingResponseWithLogging(
|
|
1489
|
+
content=stream_generator(),
|
|
1490
|
+
request_context=ctx,
|
|
1491
|
+
metrics=self.metrics,
|
|
1076
1492
|
status_code=response_status,
|
|
1077
1493
|
headers=final_headers,
|
|
1078
1494
|
)
|
|
@@ -1330,7 +1746,12 @@ class ProxyService:
|
|
|
1330
1746
|
cost_usd=cost_usd,
|
|
1331
1747
|
)
|
|
1332
1748
|
|
|
1333
|
-
return
|
|
1749
|
+
return StreamingResponseWithLogging(
|
|
1750
|
+
content=realistic_mock_stream_generator(),
|
|
1751
|
+
request_context=ctx,
|
|
1752
|
+
metrics=self.metrics,
|
|
1753
|
+
headers=headers,
|
|
1754
|
+
)
|
|
1334
1755
|
|
|
1335
1756
|
async def _generate_realistic_openai_stream(
|
|
1336
1757
|
self,
|