ccproxy-api 0.1.5__py3-none-any.whl → 0.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ccproxy/_version.py +2 -2
- ccproxy/adapters/codex/__init__.py +11 -0
- ccproxy/adapters/openai/models.py +1 -1
- ccproxy/adapters/openai/response_adapter.py +355 -0
- ccproxy/adapters/openai/response_models.py +178 -0
- ccproxy/api/app.py +31 -3
- ccproxy/api/dependencies.py +1 -8
- ccproxy/api/middleware/errors.py +15 -7
- ccproxy/api/routes/codex.py +1251 -0
- ccproxy/api/routes/health.py +228 -3
- ccproxy/auth/openai/__init__.py +13 -0
- ccproxy/auth/openai/credentials.py +166 -0
- ccproxy/auth/openai/oauth_client.py +334 -0
- ccproxy/auth/openai/storage.py +184 -0
- ccproxy/claude_sdk/options.py +1 -1
- ccproxy/cli/commands/auth.py +398 -1
- ccproxy/cli/commands/serve.py +3 -1
- ccproxy/config/claude.py +1 -1
- ccproxy/config/codex.py +100 -0
- ccproxy/config/scheduler.py +8 -8
- ccproxy/config/settings.py +19 -0
- ccproxy/core/codex_transformers.py +389 -0
- ccproxy/core/http_transformers.py +153 -2
- ccproxy/data/claude_headers_fallback.json +37 -0
- ccproxy/data/codex_headers_fallback.json +14 -0
- ccproxy/models/detection.py +82 -0
- ccproxy/models/requests.py +22 -0
- ccproxy/models/responses.py +16 -0
- ccproxy/scheduler/manager.py +2 -2
- ccproxy/scheduler/tasks.py +105 -65
- ccproxy/services/claude_detection_service.py +7 -33
- ccproxy/services/codex_detection_service.py +252 -0
- ccproxy/services/proxy_service.py +530 -0
- ccproxy/utils/model_mapping.py +7 -5
- ccproxy/utils/startup_helpers.py +205 -12
- ccproxy/utils/version_checker.py +6 -0
- ccproxy_api-0.1.7.dist-info/METADATA +615 -0
- {ccproxy_api-0.1.5.dist-info → ccproxy_api-0.1.7.dist-info}/RECORD +41 -28
- ccproxy_api-0.1.5.dist-info/METADATA +0 -396
- {ccproxy_api-0.1.5.dist-info → ccproxy_api-0.1.7.dist-info}/WHEEL +0 -0
- {ccproxy_api-0.1.5.dist-info → ccproxy_api-0.1.7.dist-info}/entry_points.txt +0 -0
- {ccproxy_api-0.1.5.dist-info → ccproxy_api-0.1.7.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,1251 @@
|
|
|
1
|
+
"""OpenAI Codex API routes."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import time
|
|
5
|
+
import uuid
|
|
6
|
+
from collections.abc import AsyncIterator
|
|
7
|
+
|
|
8
|
+
import httpx
|
|
9
|
+
import structlog
|
|
10
|
+
from fastapi import APIRouter, Depends, HTTPException, Request
|
|
11
|
+
from fastapi.responses import StreamingResponse
|
|
12
|
+
from starlette.responses import Response
|
|
13
|
+
|
|
14
|
+
from ccproxy.adapters.openai.models import (
|
|
15
|
+
OpenAIChatCompletionRequest,
|
|
16
|
+
OpenAIChatCompletionResponse,
|
|
17
|
+
)
|
|
18
|
+
from ccproxy.adapters.openai.response_adapter import ResponseAdapter
|
|
19
|
+
from ccproxy.api.dependencies import ProxyServiceDep
|
|
20
|
+
from ccproxy.auth.openai import OpenAITokenManager
|
|
21
|
+
from ccproxy.config.settings import Settings, get_settings
|
|
22
|
+
from ccproxy.core.errors import AuthenticationError, ProxyError
|
|
23
|
+
from ccproxy.observability.streaming_response import StreamingResponseWithLogging
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
logger = structlog.get_logger(__name__)
|
|
27
|
+
|
|
28
|
+
# Create router
|
|
29
|
+
router = APIRouter(prefix="/codex", tags=["codex"])
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def get_token_manager() -> OpenAITokenManager:
|
|
33
|
+
"""Get OpenAI token manager dependency."""
|
|
34
|
+
return OpenAITokenManager()
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def resolve_session_id(
|
|
38
|
+
path_session: str | None = None,
|
|
39
|
+
header_session: str | None = None,
|
|
40
|
+
) -> str:
|
|
41
|
+
"""Resolve session ID with priority: path > header > generated."""
|
|
42
|
+
return path_session or header_session or str(uuid.uuid4())
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
async def check_codex_enabled(settings: Settings = Depends(get_settings)) -> None:
|
|
46
|
+
"""Check if Codex is enabled."""
|
|
47
|
+
if not settings.codex.enabled:
|
|
48
|
+
raise HTTPException(
|
|
49
|
+
status_code=503, detail="OpenAI Codex provider is not enabled"
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@router.post("/responses", response_model=None)
|
|
54
|
+
async def codex_responses(
|
|
55
|
+
request: Request,
|
|
56
|
+
proxy_service: ProxyServiceDep,
|
|
57
|
+
settings: Settings = Depends(get_settings),
|
|
58
|
+
token_manager: OpenAITokenManager = Depends(get_token_manager),
|
|
59
|
+
_: None = Depends(check_codex_enabled),
|
|
60
|
+
) -> StreamingResponse | Response:
|
|
61
|
+
"""Create completion with auto-generated session_id.
|
|
62
|
+
|
|
63
|
+
This endpoint creates a new completion request with an automatically
|
|
64
|
+
generated session_id. Each request gets a unique session.
|
|
65
|
+
"""
|
|
66
|
+
# Get session_id from header if provided
|
|
67
|
+
header_session_id = request.headers.get("session_id")
|
|
68
|
+
session_id = resolve_session_id(header_session=header_session_id)
|
|
69
|
+
|
|
70
|
+
# Get and validate access token
|
|
71
|
+
try:
|
|
72
|
+
access_token = await token_manager.get_valid_token()
|
|
73
|
+
if not access_token:
|
|
74
|
+
raise HTTPException(
|
|
75
|
+
status_code=401,
|
|
76
|
+
detail="No valid OpenAI credentials found. Please authenticate first.",
|
|
77
|
+
)
|
|
78
|
+
except HTTPException:
|
|
79
|
+
# Re-raise HTTPExceptions without chaining to avoid stack traces
|
|
80
|
+
raise
|
|
81
|
+
except Exception as e:
|
|
82
|
+
logger.debug(
|
|
83
|
+
"Failed to get OpenAI access token",
|
|
84
|
+
error=str(e),
|
|
85
|
+
error_type=type(e).__name__,
|
|
86
|
+
)
|
|
87
|
+
raise HTTPException(
|
|
88
|
+
status_code=401, detail="Failed to retrieve valid credentials"
|
|
89
|
+
) from None
|
|
90
|
+
|
|
91
|
+
try:
|
|
92
|
+
# Handle the Codex request
|
|
93
|
+
response = await proxy_service.handle_codex_request(
|
|
94
|
+
method="POST",
|
|
95
|
+
path="/responses",
|
|
96
|
+
session_id=session_id,
|
|
97
|
+
access_token=access_token,
|
|
98
|
+
request=request,
|
|
99
|
+
settings=settings,
|
|
100
|
+
)
|
|
101
|
+
return response
|
|
102
|
+
except AuthenticationError as e:
|
|
103
|
+
raise HTTPException(status_code=401, detail=str(e)) from None
|
|
104
|
+
except ProxyError as e:
|
|
105
|
+
raise HTTPException(status_code=502, detail=str(e)) from None
|
|
106
|
+
except Exception as e:
|
|
107
|
+
logger.error("Unexpected error in codex_responses", error=str(e))
|
|
108
|
+
raise HTTPException(status_code=500, detail="Internal server error") from None
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
@router.post("/{session_id}/responses", response_model=None)
|
|
112
|
+
async def codex_responses_with_session(
|
|
113
|
+
session_id: str,
|
|
114
|
+
request: Request,
|
|
115
|
+
proxy_service: ProxyServiceDep,
|
|
116
|
+
settings: Settings = Depends(get_settings),
|
|
117
|
+
token_manager: OpenAITokenManager = Depends(get_token_manager),
|
|
118
|
+
_: None = Depends(check_codex_enabled),
|
|
119
|
+
) -> StreamingResponse | Response:
|
|
120
|
+
"""Create completion with specific session_id.
|
|
121
|
+
|
|
122
|
+
This endpoint creates a completion request using the provided session_id
|
|
123
|
+
from the URL path. This allows for session-specific conversations.
|
|
124
|
+
"""
|
|
125
|
+
# Get and validate access token
|
|
126
|
+
try:
|
|
127
|
+
access_token = await token_manager.get_valid_token()
|
|
128
|
+
if not access_token:
|
|
129
|
+
raise HTTPException(
|
|
130
|
+
status_code=401,
|
|
131
|
+
detail="No valid OpenAI credentials found. Please authenticate first.",
|
|
132
|
+
)
|
|
133
|
+
except HTTPException:
|
|
134
|
+
# Re-raise HTTPExceptions without chaining to avoid stack traces
|
|
135
|
+
raise
|
|
136
|
+
except Exception as e:
|
|
137
|
+
logger.debug(
|
|
138
|
+
"Failed to get OpenAI access token",
|
|
139
|
+
error=str(e),
|
|
140
|
+
error_type=type(e).__name__,
|
|
141
|
+
)
|
|
142
|
+
raise HTTPException(
|
|
143
|
+
status_code=401, detail="Failed to retrieve valid credentials"
|
|
144
|
+
) from None
|
|
145
|
+
|
|
146
|
+
try:
|
|
147
|
+
# Handle the Codex request with specific session_id
|
|
148
|
+
response = await proxy_service.handle_codex_request(
|
|
149
|
+
method="POST",
|
|
150
|
+
path=f"/{session_id}/responses",
|
|
151
|
+
session_id=session_id,
|
|
152
|
+
access_token=access_token,
|
|
153
|
+
request=request,
|
|
154
|
+
settings=settings,
|
|
155
|
+
)
|
|
156
|
+
return response
|
|
157
|
+
except AuthenticationError as e:
|
|
158
|
+
raise HTTPException(status_code=401, detail=str(e)) from None
|
|
159
|
+
except ProxyError as e:
|
|
160
|
+
raise HTTPException(status_code=502, detail=str(e)) from None
|
|
161
|
+
except Exception as e:
|
|
162
|
+
logger.error("Unexpected error in codex_responses_with_session", error=str(e))
|
|
163
|
+
raise HTTPException(status_code=500, detail="Internal server error") from None
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
@router.post("/chat/completions", response_model=None)
|
|
167
|
+
async def codex_chat_completions(
|
|
168
|
+
openai_request: OpenAIChatCompletionRequest,
|
|
169
|
+
request: Request,
|
|
170
|
+
proxy_service: ProxyServiceDep,
|
|
171
|
+
settings: Settings = Depends(get_settings),
|
|
172
|
+
token_manager: OpenAITokenManager = Depends(get_token_manager),
|
|
173
|
+
_: None = Depends(check_codex_enabled),
|
|
174
|
+
) -> StreamingResponse | OpenAIChatCompletionResponse:
|
|
175
|
+
"""OpenAI-compatible chat completions endpoint for Codex.
|
|
176
|
+
|
|
177
|
+
This endpoint accepts OpenAI chat/completions format and converts it
|
|
178
|
+
to OpenAI Response API format before forwarding to the ChatGPT backend.
|
|
179
|
+
"""
|
|
180
|
+
# Get session_id from header if provided, otherwise generate
|
|
181
|
+
header_session_id = request.headers.get("session_id")
|
|
182
|
+
session_id = resolve_session_id(header_session=header_session_id)
|
|
183
|
+
|
|
184
|
+
# Get and validate access token
|
|
185
|
+
try:
|
|
186
|
+
access_token = await token_manager.get_valid_token()
|
|
187
|
+
if not access_token:
|
|
188
|
+
raise HTTPException(
|
|
189
|
+
status_code=401,
|
|
190
|
+
detail="No valid OpenAI credentials found. Please authenticate first.",
|
|
191
|
+
)
|
|
192
|
+
except HTTPException:
|
|
193
|
+
# Re-raise HTTPExceptions without chaining to avoid stack traces
|
|
194
|
+
raise
|
|
195
|
+
except Exception as e:
|
|
196
|
+
logger.debug(
|
|
197
|
+
"Failed to get OpenAI access token",
|
|
198
|
+
error=str(e),
|
|
199
|
+
error_type=type(e).__name__,
|
|
200
|
+
)
|
|
201
|
+
raise HTTPException(
|
|
202
|
+
status_code=401, detail="Failed to retrieve valid credentials"
|
|
203
|
+
) from None
|
|
204
|
+
|
|
205
|
+
try:
|
|
206
|
+
# Create adapter for format conversion
|
|
207
|
+
adapter = ResponseAdapter()
|
|
208
|
+
|
|
209
|
+
# Convert OpenAI Chat Completions format to Response API format
|
|
210
|
+
response_request = adapter.chat_to_response_request(openai_request)
|
|
211
|
+
|
|
212
|
+
# Convert the transformed request to bytes
|
|
213
|
+
codex_body = response_request.model_dump_json().encode("utf-8")
|
|
214
|
+
|
|
215
|
+
# Get request context from middleware
|
|
216
|
+
request_context = getattr(request.state, "context", None)
|
|
217
|
+
|
|
218
|
+
# Create a mock request object with the converted body
|
|
219
|
+
class MockRequest:
|
|
220
|
+
def __init__(self, original_request: Request, new_body: bytes) -> None:
|
|
221
|
+
self.method = original_request.method
|
|
222
|
+
self.url = original_request.url
|
|
223
|
+
self.headers = dict(original_request.headers)
|
|
224
|
+
self.headers["content-length"] = str(len(new_body))
|
|
225
|
+
self.state = original_request.state
|
|
226
|
+
self._body = new_body
|
|
227
|
+
|
|
228
|
+
async def body(self) -> bytes:
|
|
229
|
+
return self._body
|
|
230
|
+
|
|
231
|
+
mock_request = MockRequest(request, codex_body)
|
|
232
|
+
|
|
233
|
+
# For streaming requests, handle the transformation directly
|
|
234
|
+
if openai_request.stream:
|
|
235
|
+
# Make the request directly to get the raw streaming response
|
|
236
|
+
from ccproxy.core.codex_transformers import CodexRequestTransformer
|
|
237
|
+
|
|
238
|
+
# Transform the request
|
|
239
|
+
transformer = CodexRequestTransformer()
|
|
240
|
+
transformed_request = await transformer.transform_codex_request(
|
|
241
|
+
method="POST",
|
|
242
|
+
path="/responses",
|
|
243
|
+
headers=dict(request.headers),
|
|
244
|
+
body=codex_body,
|
|
245
|
+
access_token=access_token,
|
|
246
|
+
session_id=session_id,
|
|
247
|
+
account_id="unknown", # Will be extracted from token if needed
|
|
248
|
+
codex_detection_data=getattr(
|
|
249
|
+
proxy_service.app_state, "codex_detection_data", None
|
|
250
|
+
)
|
|
251
|
+
if proxy_service.app_state
|
|
252
|
+
else None,
|
|
253
|
+
target_base_url=settings.codex.base_url,
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
# Convert Response API SSE stream to Chat Completions format
|
|
257
|
+
response_headers = {}
|
|
258
|
+
# Generate stream_id and timestamp outside the nested function to avoid closure issues
|
|
259
|
+
stream_id = f"chatcmpl_{uuid.uuid4().hex[:29]}"
|
|
260
|
+
created = int(time.time())
|
|
261
|
+
|
|
262
|
+
async def stream_codex_response() -> AsyncIterator[bytes]:
|
|
263
|
+
"""Stream and convert Response API to Chat Completions format."""
|
|
264
|
+
async with (
|
|
265
|
+
httpx.AsyncClient(timeout=240.0) as client,
|
|
266
|
+
client.stream(
|
|
267
|
+
method="POST",
|
|
268
|
+
url=transformed_request["url"],
|
|
269
|
+
headers=transformed_request["headers"],
|
|
270
|
+
content=transformed_request["body"],
|
|
271
|
+
) as response,
|
|
272
|
+
):
|
|
273
|
+
# Check if we got a streaming response
|
|
274
|
+
content_type = response.headers.get("content-type", "")
|
|
275
|
+
transfer_encoding = response.headers.get("transfer-encoding", "")
|
|
276
|
+
|
|
277
|
+
# Capture response headers for forwarding
|
|
278
|
+
nonlocal response_headers
|
|
279
|
+
response_headers = dict(response.headers)
|
|
280
|
+
|
|
281
|
+
logger.debug(
|
|
282
|
+
"codex_chat_response_headers",
|
|
283
|
+
status_code=response.status_code,
|
|
284
|
+
content_type=content_type,
|
|
285
|
+
transfer_encoding=transfer_encoding,
|
|
286
|
+
headers=response_headers,
|
|
287
|
+
url=str(response.url),
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
# Check for error response first
|
|
291
|
+
if response.status_code >= 400:
|
|
292
|
+
# Handle error response - collect the response body
|
|
293
|
+
error_body = b""
|
|
294
|
+
async for chunk in response.aiter_bytes():
|
|
295
|
+
error_body += chunk
|
|
296
|
+
|
|
297
|
+
# Try to parse error message
|
|
298
|
+
error_message = "Request failed"
|
|
299
|
+
if error_body:
|
|
300
|
+
try:
|
|
301
|
+
error_data = json.loads(error_body.decode("utf-8"))
|
|
302
|
+
if "detail" in error_data:
|
|
303
|
+
error_message = error_data["detail"]
|
|
304
|
+
elif "error" in error_data and isinstance(
|
|
305
|
+
error_data["error"], dict
|
|
306
|
+
):
|
|
307
|
+
error_message = error_data["error"].get(
|
|
308
|
+
"message", "Request failed"
|
|
309
|
+
)
|
|
310
|
+
except json.JSONDecodeError:
|
|
311
|
+
pass
|
|
312
|
+
|
|
313
|
+
logger.warning(
|
|
314
|
+
"codex_chat_error_response",
|
|
315
|
+
status_code=response.status_code,
|
|
316
|
+
error_message=error_message,
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
# Return error in streaming format
|
|
320
|
+
error_response = {
|
|
321
|
+
"error": {
|
|
322
|
+
"message": error_message,
|
|
323
|
+
"type": "invalid_request_error",
|
|
324
|
+
"code": response.status_code,
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
yield f"data: {json.dumps(error_response)}\n\n".encode()
|
|
328
|
+
return
|
|
329
|
+
|
|
330
|
+
# Check if this is a streaming response
|
|
331
|
+
# The backend may return chunked transfer encoding without content-type
|
|
332
|
+
is_streaming = "text/event-stream" in content_type or (
|
|
333
|
+
transfer_encoding == "chunked" and not content_type
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
if is_streaming:
|
|
337
|
+
logger.debug(
|
|
338
|
+
"codex_stream_conversion_started",
|
|
339
|
+
session_id=session_id,
|
|
340
|
+
request_id=getattr(request.state, "request_id", "unknown"),
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
chunk_count = 0
|
|
344
|
+
total_bytes = 0
|
|
345
|
+
|
|
346
|
+
# Process SSE events directly without buffering
|
|
347
|
+
line_count = 0
|
|
348
|
+
first_chunk_sent = False
|
|
349
|
+
thinking_block_active = False
|
|
350
|
+
try:
|
|
351
|
+
async for line in response.aiter_lines():
|
|
352
|
+
line_count += 1
|
|
353
|
+
logger.debug(
|
|
354
|
+
"codex_stream_line",
|
|
355
|
+
line_number=line_count,
|
|
356
|
+
line_preview=line[:100] if line else "(empty)",
|
|
357
|
+
)
|
|
358
|
+
|
|
359
|
+
# Skip empty lines
|
|
360
|
+
if not line or line.strip() == "":
|
|
361
|
+
continue
|
|
362
|
+
|
|
363
|
+
if line.startswith("data:"):
|
|
364
|
+
data_str = line[5:].strip()
|
|
365
|
+
if data_str == "[DONE]":
|
|
366
|
+
continue
|
|
367
|
+
|
|
368
|
+
try:
|
|
369
|
+
event_data = json.loads(data_str)
|
|
370
|
+
event_type = event_data.get("type")
|
|
371
|
+
|
|
372
|
+
# Send initial role message if this is the first chunk
|
|
373
|
+
if not first_chunk_sent:
|
|
374
|
+
# Send an initial chunk to indicate streaming has started
|
|
375
|
+
initial_chunk = {
|
|
376
|
+
"id": stream_id,
|
|
377
|
+
"object": "chat.completion.chunk",
|
|
378
|
+
"created": created,
|
|
379
|
+
"model": "gpt-5",
|
|
380
|
+
"choices": [
|
|
381
|
+
{
|
|
382
|
+
"index": 0,
|
|
383
|
+
"delta": {"role": "assistant"},
|
|
384
|
+
"finish_reason": None,
|
|
385
|
+
}
|
|
386
|
+
],
|
|
387
|
+
}
|
|
388
|
+
yield f"data: {json.dumps(initial_chunk)}\n\n".encode()
|
|
389
|
+
first_chunk_sent = True
|
|
390
|
+
chunk_count += 1
|
|
391
|
+
|
|
392
|
+
logger.debug(
|
|
393
|
+
"codex_stream_initial_chunk_sent",
|
|
394
|
+
event_type=event_type,
|
|
395
|
+
)
|
|
396
|
+
|
|
397
|
+
# Handle reasoning blocks based on official OpenAI Response API
|
|
398
|
+
if event_type == "response.output_item.added":
|
|
399
|
+
# Check if this is a reasoning block
|
|
400
|
+
item = event_data.get("item", {})
|
|
401
|
+
item_type = item.get("type")
|
|
402
|
+
|
|
403
|
+
if (
|
|
404
|
+
item_type == "reasoning"
|
|
405
|
+
and not thinking_block_active
|
|
406
|
+
):
|
|
407
|
+
# Only send opening tag if not already in a thinking block
|
|
408
|
+
thinking_block_active = True
|
|
409
|
+
|
|
410
|
+
logger.debug(
|
|
411
|
+
"codex_reasoning_block_started",
|
|
412
|
+
item_type=item_type,
|
|
413
|
+
event_type=event_type,
|
|
414
|
+
)
|
|
415
|
+
|
|
416
|
+
# Send opening reasoning tag (no signature in official API)
|
|
417
|
+
openai_chunk = {
|
|
418
|
+
"id": stream_id,
|
|
419
|
+
"object": "chat.completion.chunk",
|
|
420
|
+
"created": created,
|
|
421
|
+
"model": "gpt-5",
|
|
422
|
+
"choices": [
|
|
423
|
+
{
|
|
424
|
+
"index": 0,
|
|
425
|
+
"delta": {
|
|
426
|
+
"content": "<reasoning>"
|
|
427
|
+
},
|
|
428
|
+
"finish_reason": None,
|
|
429
|
+
}
|
|
430
|
+
],
|
|
431
|
+
}
|
|
432
|
+
yield f"data: {json.dumps(openai_chunk)}\n\n".encode()
|
|
433
|
+
chunk_count += 1
|
|
434
|
+
|
|
435
|
+
# Handle content part deltas - various content types from API
|
|
436
|
+
elif (
|
|
437
|
+
event_type == "response.content_part.delta"
|
|
438
|
+
):
|
|
439
|
+
delta = event_data.get("delta", {})
|
|
440
|
+
delta_type = delta.get("type")
|
|
441
|
+
|
|
442
|
+
if (
|
|
443
|
+
delta_type == "text"
|
|
444
|
+
and not thinking_block_active
|
|
445
|
+
):
|
|
446
|
+
# Regular text content
|
|
447
|
+
text_content = delta.get("text", "")
|
|
448
|
+
if text_content:
|
|
449
|
+
openai_chunk = {
|
|
450
|
+
"id": stream_id,
|
|
451
|
+
"object": "chat.completion.chunk",
|
|
452
|
+
"created": created,
|
|
453
|
+
"model": "gpt-5",
|
|
454
|
+
"choices": [
|
|
455
|
+
{
|
|
456
|
+
"index": 0,
|
|
457
|
+
"delta": {
|
|
458
|
+
"content": text_content
|
|
459
|
+
},
|
|
460
|
+
"finish_reason": None,
|
|
461
|
+
}
|
|
462
|
+
],
|
|
463
|
+
}
|
|
464
|
+
yield f"data: {json.dumps(openai_chunk)}\n\n".encode()
|
|
465
|
+
chunk_count += 1
|
|
466
|
+
|
|
467
|
+
elif (
|
|
468
|
+
delta_type == "reasoning"
|
|
469
|
+
and thinking_block_active
|
|
470
|
+
):
|
|
471
|
+
# Reasoning content within reasoning block
|
|
472
|
+
reasoning_content = delta.get(
|
|
473
|
+
"reasoning", ""
|
|
474
|
+
)
|
|
475
|
+
if reasoning_content:
|
|
476
|
+
openai_chunk = {
|
|
477
|
+
"id": stream_id,
|
|
478
|
+
"object": "chat.completion.chunk",
|
|
479
|
+
"created": created,
|
|
480
|
+
"model": "gpt-5",
|
|
481
|
+
"choices": [
|
|
482
|
+
{
|
|
483
|
+
"index": 0,
|
|
484
|
+
"delta": {
|
|
485
|
+
"content": reasoning_content
|
|
486
|
+
},
|
|
487
|
+
"finish_reason": None,
|
|
488
|
+
}
|
|
489
|
+
],
|
|
490
|
+
}
|
|
491
|
+
yield f"data: {json.dumps(openai_chunk)}\n\n".encode()
|
|
492
|
+
chunk_count += 1
|
|
493
|
+
|
|
494
|
+
# Handle reasoning summary text - the actual reasoning content
|
|
495
|
+
elif (
|
|
496
|
+
event_type
|
|
497
|
+
== "response.reasoning_summary_text.delta"
|
|
498
|
+
and thinking_block_active
|
|
499
|
+
):
|
|
500
|
+
# Extract reasoning text content from delta field
|
|
501
|
+
reasoning_text = event_data.get("delta", "")
|
|
502
|
+
|
|
503
|
+
if reasoning_text:
|
|
504
|
+
chunk_count += 1
|
|
505
|
+
openai_chunk = {
|
|
506
|
+
"id": stream_id,
|
|
507
|
+
"object": "chat.completion.chunk",
|
|
508
|
+
"created": created,
|
|
509
|
+
"model": "gpt-5",
|
|
510
|
+
"choices": [
|
|
511
|
+
{
|
|
512
|
+
"index": 0,
|
|
513
|
+
"delta": {
|
|
514
|
+
"content": reasoning_text
|
|
515
|
+
},
|
|
516
|
+
"finish_reason": None,
|
|
517
|
+
}
|
|
518
|
+
],
|
|
519
|
+
}
|
|
520
|
+
yield f"data: {json.dumps(openai_chunk)}\n\n".encode()
|
|
521
|
+
|
|
522
|
+
# Handle reasoning block completion - official API
|
|
523
|
+
elif (
|
|
524
|
+
event_type == "response.output_item.done"
|
|
525
|
+
and thinking_block_active
|
|
526
|
+
):
|
|
527
|
+
# Check if this is the end of a reasoning block
|
|
528
|
+
item = event_data.get("item", {})
|
|
529
|
+
item_type = item.get("type")
|
|
530
|
+
|
|
531
|
+
if item_type == "reasoning":
|
|
532
|
+
thinking_block_active = False
|
|
533
|
+
|
|
534
|
+
# Send closing reasoning tag
|
|
535
|
+
openai_chunk = {
|
|
536
|
+
"id": stream_id,
|
|
537
|
+
"object": "chat.completion.chunk",
|
|
538
|
+
"created": created,
|
|
539
|
+
"model": "gpt-5",
|
|
540
|
+
"choices": [
|
|
541
|
+
{
|
|
542
|
+
"index": 0,
|
|
543
|
+
"delta": {
|
|
544
|
+
"content": "</reasoning>\n"
|
|
545
|
+
},
|
|
546
|
+
"finish_reason": None,
|
|
547
|
+
}
|
|
548
|
+
],
|
|
549
|
+
}
|
|
550
|
+
yield f"data: {json.dumps(openai_chunk)}\n\n".encode()
|
|
551
|
+
chunk_count += 1
|
|
552
|
+
|
|
553
|
+
logger.debug(
|
|
554
|
+
"codex_reasoning_block_ended",
|
|
555
|
+
item_type=item_type,
|
|
556
|
+
event_type=event_type,
|
|
557
|
+
)
|
|
558
|
+
|
|
559
|
+
# Convert Response API events to OpenAI format
|
|
560
|
+
elif event_type == "response.output_text.delta":
|
|
561
|
+
# Direct text delta event (only if not in thinking block)
|
|
562
|
+
if not thinking_block_active:
|
|
563
|
+
delta_content = event_data.get(
|
|
564
|
+
"delta", ""
|
|
565
|
+
)
|
|
566
|
+
if delta_content:
|
|
567
|
+
chunk_count += 1
|
|
568
|
+
openai_chunk = {
|
|
569
|
+
"id": stream_id,
|
|
570
|
+
"object": "chat.completion.chunk",
|
|
571
|
+
"created": created,
|
|
572
|
+
"model": event_data.get(
|
|
573
|
+
"model", "gpt-5"
|
|
574
|
+
),
|
|
575
|
+
"choices": [
|
|
576
|
+
{
|
|
577
|
+
"index": 0,
|
|
578
|
+
"delta": {
|
|
579
|
+
"content": delta_content
|
|
580
|
+
},
|
|
581
|
+
"finish_reason": None,
|
|
582
|
+
}
|
|
583
|
+
],
|
|
584
|
+
}
|
|
585
|
+
chunk_data = f"data: {json.dumps(openai_chunk)}\n\n".encode()
|
|
586
|
+
total_bytes += len(chunk_data)
|
|
587
|
+
|
|
588
|
+
logger.debug(
|
|
589
|
+
"codex_stream_chunk_converted",
|
|
590
|
+
chunk_number=chunk_count,
|
|
591
|
+
chunk_size=len(chunk_data),
|
|
592
|
+
event_type=event_type,
|
|
593
|
+
content_length=len(
|
|
594
|
+
delta_content
|
|
595
|
+
),
|
|
596
|
+
)
|
|
597
|
+
|
|
598
|
+
yield chunk_data
|
|
599
|
+
|
|
600
|
+
elif event_type == "response.output.delta":
|
|
601
|
+
# Standard output delta with nested structure
|
|
602
|
+
output = event_data.get("output", [])
|
|
603
|
+
for output_item in output:
|
|
604
|
+
if output_item.get("type") == "message":
|
|
605
|
+
content_blocks = output_item.get(
|
|
606
|
+
"content", []
|
|
607
|
+
)
|
|
608
|
+
for block in content_blocks:
|
|
609
|
+
# Check if this is thinking content
|
|
610
|
+
if (
|
|
611
|
+
block.get("type")
|
|
612
|
+
in [
|
|
613
|
+
"thinking",
|
|
614
|
+
"reasoning",
|
|
615
|
+
"internal_monologue",
|
|
616
|
+
]
|
|
617
|
+
and thinking_block_active
|
|
618
|
+
):
|
|
619
|
+
thinking_content = (
|
|
620
|
+
block.get("text", "")
|
|
621
|
+
)
|
|
622
|
+
if thinking_content:
|
|
623
|
+
chunk_count += 1
|
|
624
|
+
openai_chunk = {
|
|
625
|
+
"id": stream_id,
|
|
626
|
+
"object": "chat.completion.chunk",
|
|
627
|
+
"created": created,
|
|
628
|
+
"model": "gpt-5",
|
|
629
|
+
"choices": [
|
|
630
|
+
{
|
|
631
|
+
"index": 0,
|
|
632
|
+
"delta": {
|
|
633
|
+
"content": thinking_content
|
|
634
|
+
},
|
|
635
|
+
"finish_reason": None,
|
|
636
|
+
}
|
|
637
|
+
],
|
|
638
|
+
}
|
|
639
|
+
yield f"data: {json.dumps(openai_chunk)}\n\n".encode()
|
|
640
|
+
elif (
|
|
641
|
+
block.get("type")
|
|
642
|
+
in [
|
|
643
|
+
"output_text",
|
|
644
|
+
"text",
|
|
645
|
+
]
|
|
646
|
+
and not thinking_block_active
|
|
647
|
+
):
|
|
648
|
+
delta_content = block.get(
|
|
649
|
+
"text", ""
|
|
650
|
+
)
|
|
651
|
+
if delta_content:
|
|
652
|
+
chunk_count += 1
|
|
653
|
+
openai_chunk = {
|
|
654
|
+
"id": stream_id,
|
|
655
|
+
"object": "chat.completion.chunk",
|
|
656
|
+
"created": created,
|
|
657
|
+
"model": event_data.get(
|
|
658
|
+
"model", "gpt-5"
|
|
659
|
+
),
|
|
660
|
+
"choices": [
|
|
661
|
+
{
|
|
662
|
+
"index": 0,
|
|
663
|
+
"delta": {
|
|
664
|
+
"content": delta_content
|
|
665
|
+
},
|
|
666
|
+
"finish_reason": None,
|
|
667
|
+
}
|
|
668
|
+
],
|
|
669
|
+
}
|
|
670
|
+
chunk_data = f"data: {json.dumps(openai_chunk)}\n\n".encode()
|
|
671
|
+
total_bytes += len(
|
|
672
|
+
chunk_data
|
|
673
|
+
)
|
|
674
|
+
|
|
675
|
+
logger.debug(
|
|
676
|
+
"codex_stream_chunk_converted",
|
|
677
|
+
chunk_number=chunk_count,
|
|
678
|
+
chunk_size=len(
|
|
679
|
+
chunk_data
|
|
680
|
+
),
|
|
681
|
+
event_type=event_type,
|
|
682
|
+
content_length=len(
|
|
683
|
+
delta_content
|
|
684
|
+
),
|
|
685
|
+
)
|
|
686
|
+
|
|
687
|
+
yield chunk_data
|
|
688
|
+
|
|
689
|
+
# Handle additional official API event types
|
|
690
|
+
elif (
|
|
691
|
+
event_type
|
|
692
|
+
== "response.function_call_arguments.delta"
|
|
693
|
+
):
|
|
694
|
+
# Function call arguments streaming - official API
|
|
695
|
+
if not thinking_block_active:
|
|
696
|
+
arguments = event_data.get(
|
|
697
|
+
"arguments", ""
|
|
698
|
+
)
|
|
699
|
+
if arguments:
|
|
700
|
+
chunk_count += 1
|
|
701
|
+
openai_chunk = {
|
|
702
|
+
"id": stream_id,
|
|
703
|
+
"object": "chat.completion.chunk",
|
|
704
|
+
"created": created,
|
|
705
|
+
"model": "gpt-5",
|
|
706
|
+
"choices": [
|
|
707
|
+
{
|
|
708
|
+
"index": 0,
|
|
709
|
+
"delta": {
|
|
710
|
+
"content": arguments
|
|
711
|
+
},
|
|
712
|
+
"finish_reason": None,
|
|
713
|
+
}
|
|
714
|
+
],
|
|
715
|
+
}
|
|
716
|
+
yield f"data: {json.dumps(openai_chunk)}\n\n".encode()
|
|
717
|
+
|
|
718
|
+
elif (
|
|
719
|
+
event_type
|
|
720
|
+
== "response.audio_transcript.delta"
|
|
721
|
+
):
|
|
722
|
+
# Audio transcript streaming - official API
|
|
723
|
+
if not thinking_block_active:
|
|
724
|
+
transcript = event_data.get(
|
|
725
|
+
"transcript", ""
|
|
726
|
+
)
|
|
727
|
+
if transcript:
|
|
728
|
+
chunk_count += 1
|
|
729
|
+
openai_chunk = {
|
|
730
|
+
"id": stream_id,
|
|
731
|
+
"object": "chat.completion.chunk",
|
|
732
|
+
"created": created,
|
|
733
|
+
"model": "gpt-5",
|
|
734
|
+
"choices": [
|
|
735
|
+
{
|
|
736
|
+
"index": 0,
|
|
737
|
+
"delta": {
|
|
738
|
+
"content": f"[Audio: {transcript}]"
|
|
739
|
+
},
|
|
740
|
+
"finish_reason": None,
|
|
741
|
+
}
|
|
742
|
+
],
|
|
743
|
+
}
|
|
744
|
+
yield f"data: {json.dumps(openai_chunk)}\n\n".encode()
|
|
745
|
+
|
|
746
|
+
elif (
|
|
747
|
+
event_type
|
|
748
|
+
== "response.tool_calls.function.name"
|
|
749
|
+
):
|
|
750
|
+
# Tool function name - official API
|
|
751
|
+
if not thinking_block_active:
|
|
752
|
+
function_name = event_data.get(
|
|
753
|
+
"name", ""
|
|
754
|
+
)
|
|
755
|
+
if function_name:
|
|
756
|
+
chunk_count += 1
|
|
757
|
+
openai_chunk = {
|
|
758
|
+
"id": stream_id,
|
|
759
|
+
"object": "chat.completion.chunk",
|
|
760
|
+
"created": created,
|
|
761
|
+
"model": "gpt-5",
|
|
762
|
+
"choices": [
|
|
763
|
+
{
|
|
764
|
+
"index": 0,
|
|
765
|
+
"delta": {
|
|
766
|
+
"content": f"[Function: {function_name}]"
|
|
767
|
+
},
|
|
768
|
+
"finish_reason": None,
|
|
769
|
+
}
|
|
770
|
+
],
|
|
771
|
+
}
|
|
772
|
+
yield f"data: {json.dumps(openai_chunk)}\n\n".encode()
|
|
773
|
+
|
|
774
|
+
elif event_type == "response.completed":
|
|
775
|
+
# Final chunk with usage info
|
|
776
|
+
response_obj = event_data.get(
|
|
777
|
+
"response", {}
|
|
778
|
+
)
|
|
779
|
+
usage = response_obj.get("usage")
|
|
780
|
+
|
|
781
|
+
openai_chunk = {
|
|
782
|
+
"id": stream_id,
|
|
783
|
+
"object": "chat.completion.chunk",
|
|
784
|
+
"created": created,
|
|
785
|
+
"model": response_obj.get(
|
|
786
|
+
"model", "gpt-5"
|
|
787
|
+
),
|
|
788
|
+
"choices": [
|
|
789
|
+
{
|
|
790
|
+
"index": 0,
|
|
791
|
+
"delta": {},
|
|
792
|
+
"finish_reason": "stop",
|
|
793
|
+
}
|
|
794
|
+
],
|
|
795
|
+
}
|
|
796
|
+
|
|
797
|
+
if usage:
|
|
798
|
+
openai_chunk["usage"] = {
|
|
799
|
+
"prompt_tokens": usage.get(
|
|
800
|
+
"input_tokens", 0
|
|
801
|
+
),
|
|
802
|
+
"completion_tokens": usage.get(
|
|
803
|
+
"output_tokens", 0
|
|
804
|
+
),
|
|
805
|
+
"total_tokens": usage.get(
|
|
806
|
+
"total_tokens", 0
|
|
807
|
+
),
|
|
808
|
+
}
|
|
809
|
+
|
|
810
|
+
chunk_data = f"data: {json.dumps(openai_chunk)}\n\n".encode()
|
|
811
|
+
yield chunk_data
|
|
812
|
+
|
|
813
|
+
logger.debug(
|
|
814
|
+
"codex_stream_completed",
|
|
815
|
+
total_chunks=chunk_count,
|
|
816
|
+
total_bytes=total_bytes,
|
|
817
|
+
)
|
|
818
|
+
|
|
819
|
+
except json.JSONDecodeError as e:
|
|
820
|
+
logger.debug(
|
|
821
|
+
"codex_sse_parse_failed",
|
|
822
|
+
data_preview=data_str[:100],
|
|
823
|
+
error=str(e),
|
|
824
|
+
)
|
|
825
|
+
continue
|
|
826
|
+
|
|
827
|
+
except Exception as e:
|
|
828
|
+
logger.error(
|
|
829
|
+
"codex_stream_error",
|
|
830
|
+
error=str(e),
|
|
831
|
+
line_count=line_count,
|
|
832
|
+
)
|
|
833
|
+
raise
|
|
834
|
+
|
|
835
|
+
# Send final [DONE] message
|
|
836
|
+
logger.debug(
|
|
837
|
+
"codex_stream_sending_done",
|
|
838
|
+
total_chunks=chunk_count,
|
|
839
|
+
total_bytes=total_bytes,
|
|
840
|
+
)
|
|
841
|
+
yield b"data: [DONE]\n\n"
|
|
842
|
+
else:
|
|
843
|
+
# Backend didn't return streaming or returned unexpected format
|
|
844
|
+
# When using client.stream(), we need to collect the response differently
|
|
845
|
+
chunks = []
|
|
846
|
+
async for chunk in response.aiter_bytes():
|
|
847
|
+
chunks.append(chunk)
|
|
848
|
+
|
|
849
|
+
response_body = b"".join(chunks)
|
|
850
|
+
|
|
851
|
+
logger.debug(
|
|
852
|
+
"codex_chat_non_streaming_response",
|
|
853
|
+
body_length=len(response_body),
|
|
854
|
+
body_preview=response_body[:200].decode(
|
|
855
|
+
"utf-8", errors="replace"
|
|
856
|
+
)
|
|
857
|
+
if response_body
|
|
858
|
+
else "empty",
|
|
859
|
+
)
|
|
860
|
+
|
|
861
|
+
if response_body:
|
|
862
|
+
# Check if it's actually SSE data that we missed
|
|
863
|
+
body_str = response_body.decode("utf-8")
|
|
864
|
+
if body_str.startswith("event:") or body_str.startswith(
|
|
865
|
+
"data:"
|
|
866
|
+
):
|
|
867
|
+
# It's SSE data, try to extract the final JSON
|
|
868
|
+
logger.warning(
|
|
869
|
+
"Backend returned SSE data but content-type was not text/event-stream"
|
|
870
|
+
)
|
|
871
|
+
lines = body_str.strip().split("\n")
|
|
872
|
+
for line in reversed(lines):
|
|
873
|
+
if line.startswith("data:") and not line.endswith(
|
|
874
|
+
"[DONE]"
|
|
875
|
+
):
|
|
876
|
+
try:
|
|
877
|
+
json_str = line[5:].strip()
|
|
878
|
+
response_data = json.loads(json_str)
|
|
879
|
+
if "response" in response_data:
|
|
880
|
+
response_data = response_data[
|
|
881
|
+
"response"
|
|
882
|
+
]
|
|
883
|
+
# Convert to OpenAI format and yield as a single chunk
|
|
884
|
+
openai_response = (
|
|
885
|
+
adapter.response_to_chat_completion(
|
|
886
|
+
response_data
|
|
887
|
+
)
|
|
888
|
+
)
|
|
889
|
+
yield f"data: {openai_response.model_dump_json()}\n\n".encode()
|
|
890
|
+
yield b"data: [DONE]\n\n"
|
|
891
|
+
return
|
|
892
|
+
except json.JSONDecodeError:
|
|
893
|
+
continue
|
|
894
|
+
# Couldn't parse SSE data - yield error as SSE event
|
|
895
|
+
error_response = {
|
|
896
|
+
"error": {
|
|
897
|
+
"message": "Failed to parse SSE response data",
|
|
898
|
+
"type": "invalid_response_error",
|
|
899
|
+
"code": 502,
|
|
900
|
+
}
|
|
901
|
+
}
|
|
902
|
+
yield f"data: {json.dumps(error_response)}\n\n".encode()
|
|
903
|
+
yield b"data: [DONE]\n\n"
|
|
904
|
+
return
|
|
905
|
+
else:
|
|
906
|
+
# Try to parse as regular JSON
|
|
907
|
+
try:
|
|
908
|
+
response_data = json.loads(body_str)
|
|
909
|
+
# Convert to Chat Completions format and yield as single chunk
|
|
910
|
+
openai_response = (
|
|
911
|
+
adapter.response_to_chat_completion(
|
|
912
|
+
response_data
|
|
913
|
+
)
|
|
914
|
+
)
|
|
915
|
+
yield f"data: {openai_response.model_dump_json()}\n\n".encode()
|
|
916
|
+
yield b"data: [DONE]\n\n"
|
|
917
|
+
return
|
|
918
|
+
except json.JSONDecodeError as e:
|
|
919
|
+
logger.error(
|
|
920
|
+
"Failed to parse non-streaming response",
|
|
921
|
+
error=str(e),
|
|
922
|
+
body_preview=body_str[:500],
|
|
923
|
+
)
|
|
924
|
+
error_response = {
|
|
925
|
+
"error": {
|
|
926
|
+
"message": "Invalid JSON response from backend",
|
|
927
|
+
"type": "invalid_response_error",
|
|
928
|
+
"code": 502,
|
|
929
|
+
}
|
|
930
|
+
}
|
|
931
|
+
yield f"data: {json.dumps(error_response)}\n\n".encode()
|
|
932
|
+
yield b"data: [DONE]\n\n"
|
|
933
|
+
return
|
|
934
|
+
else:
|
|
935
|
+
# Empty response - yield error
|
|
936
|
+
error_response = {
|
|
937
|
+
"error": {
|
|
938
|
+
"message": "Backend returned empty response",
|
|
939
|
+
"type": "empty_response_error",
|
|
940
|
+
"code": 502,
|
|
941
|
+
}
|
|
942
|
+
}
|
|
943
|
+
yield f"data: {json.dumps(error_response)}\n\n".encode()
|
|
944
|
+
yield b"data: [DONE]\n\n"
|
|
945
|
+
return
|
|
946
|
+
|
|
947
|
+
# Execute the generator first to capture headers
|
|
948
|
+
generator_chunks = []
|
|
949
|
+
async for chunk in stream_codex_response():
|
|
950
|
+
generator_chunks.append(chunk)
|
|
951
|
+
|
|
952
|
+
# Forward upstream headers but filter out incompatible ones for streaming
|
|
953
|
+
streaming_headers = dict(response_headers)
|
|
954
|
+
# Remove headers that conflict with streaming responses
|
|
955
|
+
streaming_headers.pop("content-length", None)
|
|
956
|
+
streaming_headers.pop("content-encoding", None)
|
|
957
|
+
streaming_headers.pop("date", None)
|
|
958
|
+
# Set streaming-specific headers
|
|
959
|
+
streaming_headers.update(
|
|
960
|
+
{
|
|
961
|
+
"content-type": "text/event-stream",
|
|
962
|
+
"Cache-Control": "no-cache",
|
|
963
|
+
"Connection": "keep-alive",
|
|
964
|
+
}
|
|
965
|
+
)
|
|
966
|
+
|
|
967
|
+
# Replay the collected chunks
|
|
968
|
+
async def replay_stream() -> AsyncIterator[bytes]:
|
|
969
|
+
for chunk in generator_chunks:
|
|
970
|
+
yield chunk
|
|
971
|
+
|
|
972
|
+
# Return streaming response with proper headers - handle missing request_context
|
|
973
|
+
from ccproxy.observability.context import RequestContext
|
|
974
|
+
|
|
975
|
+
# Create a minimal request context if none exists
|
|
976
|
+
if request_context is None:
|
|
977
|
+
request_context = RequestContext(
|
|
978
|
+
request_id=str(uuid.uuid4()),
|
|
979
|
+
start_time=time.perf_counter(),
|
|
980
|
+
logger=logger,
|
|
981
|
+
)
|
|
982
|
+
|
|
983
|
+
return StreamingResponseWithLogging(
|
|
984
|
+
content=replay_stream(),
|
|
985
|
+
request_context=request_context,
|
|
986
|
+
metrics=getattr(proxy_service, "metrics", None),
|
|
987
|
+
status_code=200,
|
|
988
|
+
media_type="text/event-stream",
|
|
989
|
+
headers=streaming_headers,
|
|
990
|
+
)
|
|
991
|
+
else:
|
|
992
|
+
# Handle non-streaming request using the proxy service
|
|
993
|
+
# Cast MockRequest to Request to satisfy type checker
|
|
994
|
+
mock_request_typed: Request = mock_request # type: ignore[assignment]
|
|
995
|
+
response = await proxy_service.handle_codex_request(
|
|
996
|
+
method="POST",
|
|
997
|
+
path="/responses",
|
|
998
|
+
session_id=session_id,
|
|
999
|
+
access_token=access_token,
|
|
1000
|
+
request=mock_request_typed,
|
|
1001
|
+
settings=settings,
|
|
1002
|
+
)
|
|
1003
|
+
|
|
1004
|
+
# Check if this is a streaming response (shouldn't happen for non-streaming requests)
|
|
1005
|
+
is_streaming_response = isinstance(response, StreamingResponse)
|
|
1006
|
+
|
|
1007
|
+
if is_streaming_response and not openai_request.stream:
|
|
1008
|
+
# User requested non-streaming but backend returned streaming
|
|
1009
|
+
# Consume the stream and convert to non-streaming response
|
|
1010
|
+
accumulated_content = ""
|
|
1011
|
+
final_response = None
|
|
1012
|
+
|
|
1013
|
+
error_response = None
|
|
1014
|
+
accumulated_chunks = ""
|
|
1015
|
+
|
|
1016
|
+
async for chunk in response.body_iterator: # type: ignore
|
|
1017
|
+
chunk_str = chunk.decode("utf-8")
|
|
1018
|
+
accumulated_chunks += chunk_str
|
|
1019
|
+
|
|
1020
|
+
# The Response API sends SSE events, but errors might be plain JSON
|
|
1021
|
+
lines = chunk_str.strip().split("\n")
|
|
1022
|
+
for line in lines:
|
|
1023
|
+
if line.startswith("data:") and "[DONE]" not in line:
|
|
1024
|
+
data_str = line[5:].strip()
|
|
1025
|
+
try:
|
|
1026
|
+
event_data = json.loads(data_str)
|
|
1027
|
+
# Look for the completed response
|
|
1028
|
+
if event_data.get("type") == "response.completed":
|
|
1029
|
+
final_response = event_data
|
|
1030
|
+
# Also check if this is a direct error response (not SSE format)
|
|
1031
|
+
elif (
|
|
1032
|
+
"detail" in event_data and "type" not in event_data
|
|
1033
|
+
):
|
|
1034
|
+
error_response = event_data
|
|
1035
|
+
except json.JSONDecodeError:
|
|
1036
|
+
continue
|
|
1037
|
+
|
|
1038
|
+
# If we didn't find SSE events, try parsing the entire accumulated content as JSON
|
|
1039
|
+
if (
|
|
1040
|
+
not final_response
|
|
1041
|
+
and not error_response
|
|
1042
|
+
and accumulated_chunks.strip()
|
|
1043
|
+
):
|
|
1044
|
+
try:
|
|
1045
|
+
# Try to parse the entire content as JSON (for non-SSE error responses)
|
|
1046
|
+
json_response = json.loads(accumulated_chunks.strip())
|
|
1047
|
+
if (
|
|
1048
|
+
"detail" in json_response
|
|
1049
|
+
or "error" in json_response
|
|
1050
|
+
or "message" in json_response
|
|
1051
|
+
):
|
|
1052
|
+
error_response = json_response
|
|
1053
|
+
else:
|
|
1054
|
+
# Might be a valid response without SSE formatting
|
|
1055
|
+
final_response = {"response": json_response}
|
|
1056
|
+
except json.JSONDecodeError:
|
|
1057
|
+
# Not valid JSON either
|
|
1058
|
+
pass
|
|
1059
|
+
|
|
1060
|
+
if final_response:
|
|
1061
|
+
# Convert to Chat Completions format
|
|
1062
|
+
return adapter.response_to_chat_completion(final_response)
|
|
1063
|
+
elif error_response:
|
|
1064
|
+
# Handle error response
|
|
1065
|
+
error_message = "Request failed"
|
|
1066
|
+
if "detail" in error_response:
|
|
1067
|
+
error_message = error_response["detail"]
|
|
1068
|
+
elif "error" in error_response:
|
|
1069
|
+
if isinstance(error_response["error"], dict):
|
|
1070
|
+
error_message = error_response["error"].get(
|
|
1071
|
+
"message", "Request failed"
|
|
1072
|
+
)
|
|
1073
|
+
else:
|
|
1074
|
+
error_message = str(error_response["error"])
|
|
1075
|
+
elif "message" in error_response:
|
|
1076
|
+
error_message = error_response["message"]
|
|
1077
|
+
|
|
1078
|
+
# Log the error for debugging
|
|
1079
|
+
logger.error(
|
|
1080
|
+
"codex_streaming_error_response",
|
|
1081
|
+
error_data=error_response,
|
|
1082
|
+
error_message=error_message,
|
|
1083
|
+
)
|
|
1084
|
+
|
|
1085
|
+
raise HTTPException(status_code=400, detail=error_message)
|
|
1086
|
+
else:
|
|
1087
|
+
raise HTTPException(
|
|
1088
|
+
status_code=502, detail="Failed to parse streaming response"
|
|
1089
|
+
)
|
|
1090
|
+
else:
|
|
1091
|
+
# Non-streaming response - parse and convert
|
|
1092
|
+
if isinstance(response, Response):
|
|
1093
|
+
# Check if this is an error response
|
|
1094
|
+
if response.status_code >= 400:
|
|
1095
|
+
# Return the error response as-is
|
|
1096
|
+
error_body = response.body
|
|
1097
|
+
if error_body:
|
|
1098
|
+
try:
|
|
1099
|
+
# Handle bytes/memoryview union
|
|
1100
|
+
error_body_bytes = (
|
|
1101
|
+
bytes(error_body)
|
|
1102
|
+
if isinstance(error_body, memoryview)
|
|
1103
|
+
else error_body
|
|
1104
|
+
)
|
|
1105
|
+
error_data = json.loads(
|
|
1106
|
+
error_body_bytes.decode("utf-8")
|
|
1107
|
+
)
|
|
1108
|
+
# Log the actual error from backend
|
|
1109
|
+
logger.error(
|
|
1110
|
+
"codex_backend_error",
|
|
1111
|
+
status_code=response.status_code,
|
|
1112
|
+
error_data=error_data,
|
|
1113
|
+
)
|
|
1114
|
+
# Pass through the error from backend
|
|
1115
|
+
# Handle different error formats from backend
|
|
1116
|
+
error_message = "Request failed"
|
|
1117
|
+
if "detail" in error_data:
|
|
1118
|
+
error_message = error_data["detail"]
|
|
1119
|
+
elif "error" in error_data:
|
|
1120
|
+
if isinstance(error_data["error"], dict):
|
|
1121
|
+
error_message = error_data["error"].get(
|
|
1122
|
+
"message", "Request failed"
|
|
1123
|
+
)
|
|
1124
|
+
else:
|
|
1125
|
+
error_message = str(error_data["error"])
|
|
1126
|
+
elif "message" in error_data:
|
|
1127
|
+
error_message = error_data["message"]
|
|
1128
|
+
|
|
1129
|
+
raise HTTPException(
|
|
1130
|
+
status_code=response.status_code,
|
|
1131
|
+
detail=error_message,
|
|
1132
|
+
)
|
|
1133
|
+
except (json.JSONDecodeError, UnicodeDecodeError):
|
|
1134
|
+
# Handle bytes/memoryview union for logging
|
|
1135
|
+
error_body_bytes = (
|
|
1136
|
+
bytes(error_body)
|
|
1137
|
+
if isinstance(error_body, memoryview)
|
|
1138
|
+
else error_body
|
|
1139
|
+
)
|
|
1140
|
+
logger.error(
|
|
1141
|
+
"codex_backend_error_parse_failed",
|
|
1142
|
+
status_code=response.status_code,
|
|
1143
|
+
body=error_body_bytes[:500].decode(
|
|
1144
|
+
"utf-8", errors="replace"
|
|
1145
|
+
),
|
|
1146
|
+
)
|
|
1147
|
+
pass
|
|
1148
|
+
raise HTTPException(
|
|
1149
|
+
status_code=response.status_code, detail="Request failed"
|
|
1150
|
+
)
|
|
1151
|
+
|
|
1152
|
+
# Read the response body for successful responses
|
|
1153
|
+
response_body = response.body
|
|
1154
|
+
if response_body:
|
|
1155
|
+
try:
|
|
1156
|
+
# Handle bytes/memoryview union
|
|
1157
|
+
response_body_bytes = (
|
|
1158
|
+
bytes(response_body)
|
|
1159
|
+
if isinstance(response_body, memoryview)
|
|
1160
|
+
else response_body
|
|
1161
|
+
)
|
|
1162
|
+
response_data = json.loads(
|
|
1163
|
+
response_body_bytes.decode("utf-8")
|
|
1164
|
+
)
|
|
1165
|
+
# Convert Response API format to Chat Completions format
|
|
1166
|
+
return adapter.response_to_chat_completion(response_data)
|
|
1167
|
+
except (json.JSONDecodeError, UnicodeDecodeError) as e:
|
|
1168
|
+
logger.error("Failed to parse Codex response", error=str(e))
|
|
1169
|
+
raise HTTPException(
|
|
1170
|
+
status_code=502,
|
|
1171
|
+
detail="Invalid response from Codex API",
|
|
1172
|
+
) from e
|
|
1173
|
+
|
|
1174
|
+
# If we can't convert, return error
|
|
1175
|
+
raise HTTPException(
|
|
1176
|
+
status_code=502, detail="Unable to process Codex response"
|
|
1177
|
+
)
|
|
1178
|
+
|
|
1179
|
+
except HTTPException:
|
|
1180
|
+
raise
|
|
1181
|
+
except AuthenticationError as e:
|
|
1182
|
+
raise HTTPException(status_code=401, detail=str(e)) from None
|
|
1183
|
+
except ProxyError as e:
|
|
1184
|
+
raise HTTPException(status_code=502, detail=str(e)) from None
|
|
1185
|
+
except Exception as e:
|
|
1186
|
+
logger.error("Unexpected error in codex_chat_completions", error=str(e))
|
|
1187
|
+
raise HTTPException(status_code=500, detail="Internal server error") from None
|
|
1188
|
+
|
|
1189
|
+
|
|
1190
|
+
# NOTE: Test endpoint commented out after exploration
|
|
1191
|
+
# Testing revealed that ChatGPT backend API only supports /responses endpoint
|
|
1192
|
+
# and does NOT support OpenAI-style /chat/completions or other endpoints.
|
|
1193
|
+
# See codex_endpoint_test_results.md for full findings.
|
|
1194
|
+
#
|
|
1195
|
+
# @router.api_route("/test/{path:path}", methods=["GET", "POST", "PUT", "DELETE"], response_model=None, include_in_schema=False)
|
|
1196
|
+
# async def codex_test_probe(
|
|
1197
|
+
# path: str,
|
|
1198
|
+
# request: Request,
|
|
1199
|
+
# proxy_service: ProxyServiceDep,
|
|
1200
|
+
# settings: Settings = Depends(get_settings),
|
|
1201
|
+
# token_manager: OpenAITokenManager = Depends(get_token_manager),
|
|
1202
|
+
# _: None = Depends(check_codex_enabled),
|
|
1203
|
+
# ) -> Response:
|
|
1204
|
+
# """Test endpoint to probe upstream ChatGPT backend API paths.
|
|
1205
|
+
#
|
|
1206
|
+
# WARNING: This is a test endpoint for exploration only.
|
|
1207
|
+
# It forwards requests to any path on the ChatGPT backend API.
|
|
1208
|
+
# Should be removed or protected after testing.
|
|
1209
|
+
# """
|
|
1210
|
+
# # Get and validate access token
|
|
1211
|
+
# try:
|
|
1212
|
+
# access_token = await token_manager.get_valid_token()
|
|
1213
|
+
# if not access_token:
|
|
1214
|
+
# raise HTTPException(
|
|
1215
|
+
# status_code=401,
|
|
1216
|
+
# detail="No valid OpenAI credentials found. Please authenticate first.",
|
|
1217
|
+
# )
|
|
1218
|
+
# except Exception as e:
|
|
1219
|
+
# logger.error("Failed to get OpenAI access token", error=str(e))
|
|
1220
|
+
# raise HTTPException(
|
|
1221
|
+
# status_code=401, detail="Failed to retrieve valid credentials"
|
|
1222
|
+
# ) from e
|
|
1223
|
+
#
|
|
1224
|
+
# # Log the test request
|
|
1225
|
+
# logger.info(f"Testing upstream path: /{path}", method=request.method)
|
|
1226
|
+
#
|
|
1227
|
+
# try:
|
|
1228
|
+
# # Use a simple session_id for testing
|
|
1229
|
+
# session_id = "test-probe"
|
|
1230
|
+
#
|
|
1231
|
+
# # Handle the test request - forward to the specified path
|
|
1232
|
+
# response = await proxy_service.handle_codex_request(
|
|
1233
|
+
# method=request.method,
|
|
1234
|
+
# path=f"/{path}",
|
|
1235
|
+
# session_id=session_id,
|
|
1236
|
+
# access_token=access_token,
|
|
1237
|
+
# request=request,
|
|
1238
|
+
# settings=settings,
|
|
1239
|
+
# )
|
|
1240
|
+
#
|
|
1241
|
+
# logger.info(f"Test probe response for /{path}", status_code=getattr(response, "status_code", 200))
|
|
1242
|
+
# return response
|
|
1243
|
+
# except AuthenticationError as e:
|
|
1244
|
+
# logger.warning(f"Auth error for path /{path}: {str(e)}")
|
|
1245
|
+
# raise HTTPException(status_code=401, detail=str(e)) from None from e
|
|
1246
|
+
# except ProxyError as e:
|
|
1247
|
+
# logger.warning(f"Proxy error for path /{path}: {str(e)}")
|
|
1248
|
+
# raise HTTPException(status_code=502, detail=str(e)) from None from e
|
|
1249
|
+
# except Exception as e:
|
|
1250
|
+
# logger.error(f"Unexpected error testing path /{path}", error=str(e))
|
|
1251
|
+
# raise HTTPException(status_code=500, detail=f"Error testing path: {str(e)}") from e
|