ccproxy-api 0.1.5__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. ccproxy/_version.py +2 -2
  2. ccproxy/adapters/codex/__init__.py +11 -0
  3. ccproxy/adapters/openai/models.py +1 -1
  4. ccproxy/adapters/openai/response_adapter.py +355 -0
  5. ccproxy/adapters/openai/response_models.py +178 -0
  6. ccproxy/api/app.py +16 -0
  7. ccproxy/api/routes/codex.py +1231 -0
  8. ccproxy/api/routes/health.py +228 -3
  9. ccproxy/auth/openai/__init__.py +13 -0
  10. ccproxy/auth/openai/credentials.py +166 -0
  11. ccproxy/auth/openai/oauth_client.py +334 -0
  12. ccproxy/auth/openai/storage.py +184 -0
  13. ccproxy/claude_sdk/options.py +1 -1
  14. ccproxy/cli/commands/auth.py +398 -1
  15. ccproxy/cli/commands/serve.py +3 -1
  16. ccproxy/config/claude.py +1 -1
  17. ccproxy/config/codex.py +100 -0
  18. ccproxy/config/scheduler.py +4 -4
  19. ccproxy/config/settings.py +19 -0
  20. ccproxy/core/codex_transformers.py +389 -0
  21. ccproxy/core/http_transformers.py +153 -2
  22. ccproxy/models/detection.py +82 -0
  23. ccproxy/models/requests.py +22 -0
  24. ccproxy/models/responses.py +16 -0
  25. ccproxy/services/codex_detection_service.py +263 -0
  26. ccproxy/services/proxy_service.py +530 -0
  27. ccproxy/utils/model_mapping.py +7 -5
  28. ccproxy/utils/startup_helpers.py +62 -0
  29. ccproxy_api-0.1.6.dist-info/METADATA +615 -0
  30. {ccproxy_api-0.1.5.dist-info → ccproxy_api-0.1.6.dist-info}/RECORD +33 -22
  31. ccproxy_api-0.1.5.dist-info/METADATA +0 -396
  32. {ccproxy_api-0.1.5.dist-info → ccproxy_api-0.1.6.dist-info}/WHEEL +0 -0
  33. {ccproxy_api-0.1.5.dist-info → ccproxy_api-0.1.6.dist-info}/entry_points.txt +0 -0
  34. {ccproxy_api-0.1.5.dist-info → ccproxy_api-0.1.6.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,1231 @@
1
+ """OpenAI Codex API routes."""
2
+
3
+ import json
4
+ import uuid
5
+ from collections.abc import AsyncIterator
6
+
7
+ import httpx
8
+ import structlog
9
+ from fastapi import APIRouter, Depends, HTTPException, Request
10
+ from fastapi.responses import StreamingResponse
11
+ from starlette.responses import Response
12
+
13
+ from ccproxy.adapters.openai.models import (
14
+ OpenAIChatCompletionRequest,
15
+ OpenAIChatCompletionResponse,
16
+ )
17
+ from ccproxy.adapters.openai.response_adapter import ResponseAdapter
18
+ from ccproxy.api.dependencies import ProxyServiceDep
19
+ from ccproxy.auth.openai import OpenAITokenManager
20
+ from ccproxy.config.settings import Settings, get_settings
21
+ from ccproxy.core.errors import AuthenticationError, ProxyError
22
+ from ccproxy.observability.streaming_response import StreamingResponseWithLogging
23
+
24
+
25
+ logger = structlog.get_logger(__name__)
26
+
27
+ # Create router
28
+ router = APIRouter(prefix="/codex", tags=["codex"])
29
+
30
+
31
+ def get_token_manager() -> OpenAITokenManager:
32
+ """Get OpenAI token manager dependency."""
33
+ return OpenAITokenManager()
34
+
35
+
36
+ def resolve_session_id(
37
+ path_session: str | None = None,
38
+ header_session: str | None = None,
39
+ ) -> str:
40
+ """Resolve session ID with priority: path > header > generated."""
41
+ return path_session or header_session or str(uuid.uuid4())
42
+
43
+
44
+ async def check_codex_enabled(settings: Settings = Depends(get_settings)) -> None:
45
+ """Check if Codex is enabled."""
46
+ if not settings.codex.enabled:
47
+ raise HTTPException(
48
+ status_code=503, detail="OpenAI Codex provider is not enabled"
49
+ )
50
+
51
+
52
+ @router.post("/responses", response_model=None)
53
+ async def codex_responses(
54
+ request: Request,
55
+ proxy_service: ProxyServiceDep,
56
+ settings: Settings = Depends(get_settings),
57
+ token_manager: OpenAITokenManager = Depends(get_token_manager),
58
+ _: None = Depends(check_codex_enabled),
59
+ ) -> StreamingResponse | Response:
60
+ """Create completion with auto-generated session_id.
61
+
62
+ This endpoint creates a new completion request with an automatically
63
+ generated session_id. Each request gets a unique session.
64
+ """
65
+ # Get session_id from header if provided
66
+ header_session_id = request.headers.get("session_id")
67
+ session_id = resolve_session_id(header_session=header_session_id)
68
+
69
+ # Get and validate access token
70
+ try:
71
+ access_token = await token_manager.get_valid_token()
72
+ if not access_token:
73
+ raise HTTPException(
74
+ status_code=401,
75
+ detail="No valid OpenAI credentials found. Please authenticate first.",
76
+ )
77
+ except Exception as e:
78
+ logger.error("Failed to get OpenAI access token", error=str(e))
79
+ raise HTTPException(
80
+ status_code=401, detail="Failed to retrieve valid credentials"
81
+ ) from e
82
+
83
+ try:
84
+ # Handle the Codex request
85
+ response = await proxy_service.handle_codex_request(
86
+ method="POST",
87
+ path="/responses",
88
+ session_id=session_id,
89
+ access_token=access_token,
90
+ request=request,
91
+ settings=settings,
92
+ )
93
+ return response
94
+ except AuthenticationError as e:
95
+ raise HTTPException(status_code=401, detail=str(e)) from e
96
+ except ProxyError as e:
97
+ raise HTTPException(status_code=502, detail=str(e)) from e
98
+ except Exception as e:
99
+ logger.error("Unexpected error in codex_responses", error=str(e))
100
+ raise HTTPException(status_code=500, detail="Internal server error") from e
101
+
102
+
103
+ @router.post("/{session_id}/responses", response_model=None)
104
+ async def codex_responses_with_session(
105
+ session_id: str,
106
+ request: Request,
107
+ proxy_service: ProxyServiceDep,
108
+ settings: Settings = Depends(get_settings),
109
+ token_manager: OpenAITokenManager = Depends(get_token_manager),
110
+ _: None = Depends(check_codex_enabled),
111
+ ) -> StreamingResponse | Response:
112
+ """Create completion with specific session_id.
113
+
114
+ This endpoint creates a completion request using the provided session_id
115
+ from the URL path. This allows for session-specific conversations.
116
+ """
117
+ # Get and validate access token
118
+ try:
119
+ access_token = await token_manager.get_valid_token()
120
+ if not access_token:
121
+ raise HTTPException(
122
+ status_code=401,
123
+ detail="No valid OpenAI credentials found. Please authenticate first.",
124
+ )
125
+ except Exception as e:
126
+ logger.error("Failed to get OpenAI access token", error=str(e))
127
+ raise HTTPException(
128
+ status_code=401, detail="Failed to retrieve valid credentials"
129
+ ) from e
130
+
131
+ try:
132
+ # Handle the Codex request with specific session_id
133
+ response = await proxy_service.handle_codex_request(
134
+ method="POST",
135
+ path=f"/{session_id}/responses",
136
+ session_id=session_id,
137
+ access_token=access_token,
138
+ request=request,
139
+ settings=settings,
140
+ )
141
+ return response
142
+ except AuthenticationError as e:
143
+ raise HTTPException(status_code=401, detail=str(e)) from e
144
+ except ProxyError as e:
145
+ raise HTTPException(status_code=502, detail=str(e)) from e
146
+ except Exception as e:
147
+ logger.error("Unexpected error in codex_responses_with_session", error=str(e))
148
+ raise HTTPException(status_code=500, detail="Internal server error") from e
149
+
150
+
151
+ @router.post("/chat/completions", response_model=None)
152
+ async def codex_chat_completions(
153
+ openai_request: OpenAIChatCompletionRequest,
154
+ request: Request,
155
+ proxy_service: ProxyServiceDep,
156
+ settings: Settings = Depends(get_settings),
157
+ token_manager: OpenAITokenManager = Depends(get_token_manager),
158
+ _: None = Depends(check_codex_enabled),
159
+ ) -> StreamingResponse | OpenAIChatCompletionResponse:
160
+ """OpenAI-compatible chat completions endpoint for Codex.
161
+
162
+ This endpoint accepts OpenAI chat/completions format and converts it
163
+ to OpenAI Response API format before forwarding to the ChatGPT backend.
164
+ """
165
+ # Get session_id from header if provided, otherwise generate
166
+ header_session_id = request.headers.get("session_id")
167
+ session_id = resolve_session_id(header_session=header_session_id)
168
+
169
+ # Get and validate access token
170
+ try:
171
+ access_token = await token_manager.get_valid_token()
172
+ if not access_token:
173
+ raise HTTPException(
174
+ status_code=401,
175
+ detail="No valid OpenAI credentials found. Please authenticate first.",
176
+ )
177
+ except Exception as e:
178
+ logger.error("Failed to get OpenAI access token", error=str(e))
179
+ raise HTTPException(
180
+ status_code=401, detail="Failed to retrieve valid credentials"
181
+ ) from e
182
+
183
+ try:
184
+ # Create adapter for format conversion
185
+ adapter = ResponseAdapter()
186
+
187
+ # Convert OpenAI Chat Completions format to Response API format
188
+ response_request = adapter.chat_to_response_request(openai_request)
189
+
190
+ # Convert the transformed request to bytes
191
+ codex_body = response_request.model_dump_json().encode("utf-8")
192
+
193
+ # Get request context from middleware
194
+ request_context = getattr(request.state, "context", None)
195
+
196
+ # Create a mock request object with the converted body
197
+ class MockRequest:
198
+ def __init__(self, original_request: Request, new_body: bytes) -> None:
199
+ self.method = original_request.method
200
+ self.url = original_request.url
201
+ self.headers = dict(original_request.headers)
202
+ self.headers["content-length"] = str(len(new_body))
203
+ self.state = original_request.state
204
+ self._body = new_body
205
+
206
+ async def body(self) -> bytes:
207
+ return self._body
208
+
209
+ mock_request = MockRequest(request, codex_body)
210
+
211
+ # For streaming requests, handle the transformation directly
212
+ if openai_request.stream:
213
+ # Make the request directly to get the raw streaming response
214
+ from ccproxy.core.codex_transformers import CodexRequestTransformer
215
+
216
+ # Transform the request
217
+ transformer = CodexRequestTransformer()
218
+ transformed_request = await transformer.transform_codex_request(
219
+ method="POST",
220
+ path="/responses",
221
+ headers=dict(request.headers),
222
+ body=codex_body,
223
+ access_token=access_token,
224
+ session_id=session_id,
225
+ account_id="unknown", # Will be extracted from token if needed
226
+ codex_detection_data=getattr(
227
+ proxy_service.app_state, "codex_detection_data", None
228
+ )
229
+ if proxy_service.app_state
230
+ else None,
231
+ target_base_url=settings.codex.base_url,
232
+ )
233
+
234
+ # Convert Response API SSE stream to Chat Completions format
235
+ response_headers = {}
236
+
237
+ async def stream_codex_response() -> AsyncIterator[bytes]:
238
+ """Stream and convert Response API to Chat Completions format."""
239
+ async with (
240
+ httpx.AsyncClient(timeout=240.0) as client,
241
+ client.stream(
242
+ method="POST",
243
+ url=transformed_request["url"],
244
+ headers=transformed_request["headers"],
245
+ content=transformed_request["body"],
246
+ ) as response,
247
+ ):
248
+ # Check if we got a streaming response
249
+ content_type = response.headers.get("content-type", "")
250
+ transfer_encoding = response.headers.get("transfer-encoding", "")
251
+
252
+ # Capture response headers for forwarding
253
+ nonlocal response_headers
254
+ response_headers = dict(response.headers)
255
+
256
+ logger.debug(
257
+ "codex_chat_response_headers",
258
+ status_code=response.status_code,
259
+ content_type=content_type,
260
+ transfer_encoding=transfer_encoding,
261
+ headers=response_headers,
262
+ url=str(response.url),
263
+ )
264
+
265
+ # Check for error response first
266
+ if response.status_code >= 400:
267
+ # Handle error response - collect the response body
268
+ error_body = b""
269
+ async for chunk in response.aiter_bytes():
270
+ error_body += chunk
271
+
272
+ # Try to parse error message
273
+ error_message = "Request failed"
274
+ if error_body:
275
+ try:
276
+ error_data = json.loads(error_body.decode("utf-8"))
277
+ if "detail" in error_data:
278
+ error_message = error_data["detail"]
279
+ elif "error" in error_data and isinstance(
280
+ error_data["error"], dict
281
+ ):
282
+ error_message = error_data["error"].get(
283
+ "message", "Request failed"
284
+ )
285
+ except json.JSONDecodeError:
286
+ pass
287
+
288
+ logger.warning(
289
+ "codex_chat_error_response",
290
+ status_code=response.status_code,
291
+ error_message=error_message,
292
+ )
293
+
294
+ # Return error in streaming format
295
+ error_response = {
296
+ "error": {
297
+ "message": error_message,
298
+ "type": "invalid_request_error",
299
+ "code": response.status_code,
300
+ }
301
+ }
302
+ yield f"data: {json.dumps(error_response)}\n\n".encode()
303
+ return
304
+
305
+ # Check if this is a streaming response
306
+ # The backend may return chunked transfer encoding without content-type
307
+ is_streaming = "text/event-stream" in content_type or (
308
+ transfer_encoding == "chunked" and not content_type
309
+ )
310
+
311
+ if is_streaming:
312
+ logger.debug(
313
+ "codex_stream_conversion_started",
314
+ session_id=session_id,
315
+ request_id=getattr(request.state, "request_id", "unknown"),
316
+ )
317
+
318
+ chunk_count = 0
319
+ total_bytes = 0
320
+ stream_id = f"chatcmpl_{uuid.uuid4().hex[:29]}"
321
+ created = int(time.time())
322
+
323
+ # Process SSE events directly without buffering
324
+ line_count = 0
325
+ first_chunk_sent = False
326
+ thinking_block_active = False
327
+ try:
328
+ async for line in response.aiter_lines():
329
+ line_count += 1
330
+ logger.debug(
331
+ "codex_stream_line",
332
+ line_number=line_count,
333
+ line_preview=line[:100] if line else "(empty)",
334
+ )
335
+
336
+ # Skip empty lines
337
+ if not line or line.strip() == "":
338
+ continue
339
+
340
+ if line.startswith("data:"):
341
+ data_str = line[5:].strip()
342
+ if data_str == "[DONE]":
343
+ continue
344
+
345
+ try:
346
+ event_data = json.loads(data_str)
347
+ event_type = event_data.get("type")
348
+
349
+ # Send initial role message if this is the first chunk
350
+ if not first_chunk_sent:
351
+ # Send an initial chunk to indicate streaming has started
352
+ initial_chunk = {
353
+ "id": stream_id,
354
+ "object": "chat.completion.chunk",
355
+ "created": created,
356
+ "model": "gpt-5",
357
+ "choices": [
358
+ {
359
+ "index": 0,
360
+ "delta": {"role": "assistant"},
361
+ "finish_reason": None,
362
+ }
363
+ ],
364
+ }
365
+ yield f"data: {json.dumps(initial_chunk)}\n\n".encode()
366
+ first_chunk_sent = True
367
+ chunk_count += 1
368
+
369
+ logger.debug(
370
+ "codex_stream_initial_chunk_sent",
371
+ event_type=event_type,
372
+ )
373
+
374
+ # Handle reasoning blocks based on official OpenAI Response API
375
+ if event_type == "response.output_item.added":
376
+ # Check if this is a reasoning block
377
+ item = event_data.get("item", {})
378
+ item_type = item.get("type")
379
+
380
+ if (
381
+ item_type == "reasoning"
382
+ and not thinking_block_active
383
+ ):
384
+ # Only send opening tag if not already in a thinking block
385
+ thinking_block_active = True
386
+
387
+ logger.debug(
388
+ "codex_reasoning_block_started",
389
+ item_type=item_type,
390
+ event_type=event_type,
391
+ )
392
+
393
+ # Send opening reasoning tag (no signature in official API)
394
+ openai_chunk = {
395
+ "id": stream_id,
396
+ "object": "chat.completion.chunk",
397
+ "created": created,
398
+ "model": "gpt-5",
399
+ "choices": [
400
+ {
401
+ "index": 0,
402
+ "delta": {
403
+ "content": "<reasoning>"
404
+ },
405
+ "finish_reason": None,
406
+ }
407
+ ],
408
+ }
409
+ yield f"data: {json.dumps(openai_chunk)}\n\n".encode()
410
+ chunk_count += 1
411
+
412
+ # Handle content part deltas - various content types from API
413
+ elif (
414
+ event_type == "response.content_part.delta"
415
+ ):
416
+ delta = event_data.get("delta", {})
417
+ delta_type = delta.get("type")
418
+
419
+ if (
420
+ delta_type == "text"
421
+ and not thinking_block_active
422
+ ):
423
+ # Regular text content
424
+ text_content = delta.get("text", "")
425
+ if text_content:
426
+ openai_chunk = {
427
+ "id": stream_id,
428
+ "object": "chat.completion.chunk",
429
+ "created": created,
430
+ "model": "gpt-5",
431
+ "choices": [
432
+ {
433
+ "index": 0,
434
+ "delta": {
435
+ "content": text_content
436
+ },
437
+ "finish_reason": None,
438
+ }
439
+ ],
440
+ }
441
+ yield f"data: {json.dumps(openai_chunk)}\n\n".encode()
442
+ chunk_count += 1
443
+
444
+ elif (
445
+ delta_type == "reasoning"
446
+ and thinking_block_active
447
+ ):
448
+ # Reasoning content within reasoning block
449
+ reasoning_content = delta.get(
450
+ "reasoning", ""
451
+ )
452
+ if reasoning_content:
453
+ openai_chunk = {
454
+ "id": stream_id,
455
+ "object": "chat.completion.chunk",
456
+ "created": created,
457
+ "model": "gpt-5",
458
+ "choices": [
459
+ {
460
+ "index": 0,
461
+ "delta": {
462
+ "content": reasoning_content
463
+ },
464
+ "finish_reason": None,
465
+ }
466
+ ],
467
+ }
468
+ yield f"data: {json.dumps(openai_chunk)}\n\n".encode()
469
+ chunk_count += 1
470
+
471
+ # Handle reasoning summary text - the actual reasoning content
472
+ elif (
473
+ event_type
474
+ == "response.reasoning_summary_text.delta"
475
+ and thinking_block_active
476
+ ):
477
+ # Extract reasoning text content from delta field
478
+ reasoning_text = event_data.get("delta", "")
479
+
480
+ if reasoning_text:
481
+ chunk_count += 1
482
+ openai_chunk = {
483
+ "id": stream_id,
484
+ "object": "chat.completion.chunk",
485
+ "created": created,
486
+ "model": "gpt-5",
487
+ "choices": [
488
+ {
489
+ "index": 0,
490
+ "delta": {
491
+ "content": reasoning_text
492
+ },
493
+ "finish_reason": None,
494
+ }
495
+ ],
496
+ }
497
+ yield f"data: {json.dumps(openai_chunk)}\n\n".encode()
498
+
499
+ # Handle reasoning block completion - official API
500
+ elif (
501
+ event_type == "response.output_item.done"
502
+ and thinking_block_active
503
+ ):
504
+ # Check if this is the end of a reasoning block
505
+ item = event_data.get("item", {})
506
+ item_type = item.get("type")
507
+
508
+ if item_type == "reasoning":
509
+ thinking_block_active = False
510
+
511
+ # Send closing reasoning tag
512
+ openai_chunk = {
513
+ "id": stream_id,
514
+ "object": "chat.completion.chunk",
515
+ "created": created,
516
+ "model": "gpt-5",
517
+ "choices": [
518
+ {
519
+ "index": 0,
520
+ "delta": {
521
+ "content": "</reasoning>\n"
522
+ },
523
+ "finish_reason": None,
524
+ }
525
+ ],
526
+ }
527
+ yield f"data: {json.dumps(openai_chunk)}\n\n".encode()
528
+ chunk_count += 1
529
+
530
+ logger.debug(
531
+ "codex_reasoning_block_ended",
532
+ item_type=item_type,
533
+ event_type=event_type,
534
+ )
535
+
536
+ # Convert Response API events to OpenAI format
537
+ elif event_type == "response.output_text.delta":
538
+ # Direct text delta event (only if not in thinking block)
539
+ if not thinking_block_active:
540
+ delta_content = event_data.get(
541
+ "delta", ""
542
+ )
543
+ if delta_content:
544
+ chunk_count += 1
545
+ openai_chunk = {
546
+ "id": stream_id,
547
+ "object": "chat.completion.chunk",
548
+ "created": created,
549
+ "model": event_data.get(
550
+ "model", "gpt-5"
551
+ ),
552
+ "choices": [
553
+ {
554
+ "index": 0,
555
+ "delta": {
556
+ "content": delta_content
557
+ },
558
+ "finish_reason": None,
559
+ }
560
+ ],
561
+ }
562
+ chunk_data = f"data: {json.dumps(openai_chunk)}\n\n".encode()
563
+ total_bytes += len(chunk_data)
564
+
565
+ logger.debug(
566
+ "codex_stream_chunk_converted",
567
+ chunk_number=chunk_count,
568
+ chunk_size=len(chunk_data),
569
+ event_type=event_type,
570
+ content_length=len(
571
+ delta_content
572
+ ),
573
+ )
574
+
575
+ yield chunk_data
576
+
577
+ elif event_type == "response.output.delta":
578
+ # Standard output delta with nested structure
579
+ output = event_data.get("output", [])
580
+ for output_item in output:
581
+ if output_item.get("type") == "message":
582
+ content_blocks = output_item.get(
583
+ "content", []
584
+ )
585
+ for block in content_blocks:
586
+ # Check if this is thinking content
587
+ if (
588
+ block.get("type")
589
+ in [
590
+ "thinking",
591
+ "reasoning",
592
+ "internal_monologue",
593
+ ]
594
+ and thinking_block_active
595
+ ):
596
+ thinking_content = (
597
+ block.get("text", "")
598
+ )
599
+ if thinking_content:
600
+ chunk_count += 1
601
+ openai_chunk = {
602
+ "id": stream_id,
603
+ "object": "chat.completion.chunk",
604
+ "created": created,
605
+ "model": "gpt-5",
606
+ "choices": [
607
+ {
608
+ "index": 0,
609
+ "delta": {
610
+ "content": thinking_content
611
+ },
612
+ "finish_reason": None,
613
+ }
614
+ ],
615
+ }
616
+ yield f"data: {json.dumps(openai_chunk)}\n\n".encode()
617
+ elif (
618
+ block.get("type")
619
+ in [
620
+ "output_text",
621
+ "text",
622
+ ]
623
+ and not thinking_block_active
624
+ ):
625
+ delta_content = block.get(
626
+ "text", ""
627
+ )
628
+ if delta_content:
629
+ chunk_count += 1
630
+ openai_chunk = {
631
+ "id": stream_id,
632
+ "object": "chat.completion.chunk",
633
+ "created": created,
634
+ "model": event_data.get(
635
+ "model", "gpt-5"
636
+ ),
637
+ "choices": [
638
+ {
639
+ "index": 0,
640
+ "delta": {
641
+ "content": delta_content
642
+ },
643
+ "finish_reason": None,
644
+ }
645
+ ],
646
+ }
647
+ chunk_data = f"data: {json.dumps(openai_chunk)}\n\n".encode()
648
+ total_bytes += len(
649
+ chunk_data
650
+ )
651
+
652
+ logger.debug(
653
+ "codex_stream_chunk_converted",
654
+ chunk_number=chunk_count,
655
+ chunk_size=len(
656
+ chunk_data
657
+ ),
658
+ event_type=event_type,
659
+ content_length=len(
660
+ delta_content
661
+ ),
662
+ )
663
+
664
+ yield chunk_data
665
+
666
+ # Handle additional official API event types
667
+ elif (
668
+ event_type
669
+ == "response.function_call_arguments.delta"
670
+ ):
671
+ # Function call arguments streaming - official API
672
+ if not thinking_block_active:
673
+ arguments = event_data.get(
674
+ "arguments", ""
675
+ )
676
+ if arguments:
677
+ chunk_count += 1
678
+ openai_chunk = {
679
+ "id": stream_id,
680
+ "object": "chat.completion.chunk",
681
+ "created": created,
682
+ "model": "gpt-5",
683
+ "choices": [
684
+ {
685
+ "index": 0,
686
+ "delta": {
687
+ "content": arguments
688
+ },
689
+ "finish_reason": None,
690
+ }
691
+ ],
692
+ }
693
+ yield f"data: {json.dumps(openai_chunk)}\n\n".encode()
694
+
695
+ elif (
696
+ event_type
697
+ == "response.audio_transcript.delta"
698
+ ):
699
+ # Audio transcript streaming - official API
700
+ if not thinking_block_active:
701
+ transcript = event_data.get(
702
+ "transcript", ""
703
+ )
704
+ if transcript:
705
+ chunk_count += 1
706
+ openai_chunk = {
707
+ "id": stream_id,
708
+ "object": "chat.completion.chunk",
709
+ "created": created,
710
+ "model": "gpt-5",
711
+ "choices": [
712
+ {
713
+ "index": 0,
714
+ "delta": {
715
+ "content": f"[Audio: {transcript}]"
716
+ },
717
+ "finish_reason": None,
718
+ }
719
+ ],
720
+ }
721
+ yield f"data: {json.dumps(openai_chunk)}\n\n".encode()
722
+
723
+ elif (
724
+ event_type
725
+ == "response.tool_calls.function.name"
726
+ ):
727
+ # Tool function name - official API
728
+ if not thinking_block_active:
729
+ function_name = event_data.get(
730
+ "name", ""
731
+ )
732
+ if function_name:
733
+ chunk_count += 1
734
+ openai_chunk = {
735
+ "id": stream_id,
736
+ "object": "chat.completion.chunk",
737
+ "created": created,
738
+ "model": "gpt-5",
739
+ "choices": [
740
+ {
741
+ "index": 0,
742
+ "delta": {
743
+ "content": f"[Function: {function_name}]"
744
+ },
745
+ "finish_reason": None,
746
+ }
747
+ ],
748
+ }
749
+ yield f"data: {json.dumps(openai_chunk)}\n\n".encode()
750
+
751
+ elif event_type == "response.completed":
752
+ # Final chunk with usage info
753
+ response_obj = event_data.get(
754
+ "response", {}
755
+ )
756
+ usage = response_obj.get("usage")
757
+
758
+ openai_chunk = {
759
+ "id": stream_id,
760
+ "object": "chat.completion.chunk",
761
+ "created": created,
762
+ "model": response_obj.get(
763
+ "model", "gpt-5"
764
+ ),
765
+ "choices": [
766
+ {
767
+ "index": 0,
768
+ "delta": {},
769
+ "finish_reason": "stop",
770
+ }
771
+ ],
772
+ }
773
+
774
+ if usage:
775
+ openai_chunk["usage"] = {
776
+ "prompt_tokens": usage.get(
777
+ "input_tokens", 0
778
+ ),
779
+ "completion_tokens": usage.get(
780
+ "output_tokens", 0
781
+ ),
782
+ "total_tokens": usage.get(
783
+ "total_tokens", 0
784
+ ),
785
+ }
786
+
787
+ chunk_data = f"data: {json.dumps(openai_chunk)}\n\n".encode()
788
+ yield chunk_data
789
+
790
+ logger.debug(
791
+ "codex_stream_completed",
792
+ total_chunks=chunk_count,
793
+ total_bytes=total_bytes,
794
+ )
795
+
796
+ except json.JSONDecodeError as e:
797
+ logger.debug(
798
+ "codex_sse_parse_failed",
799
+ data_preview=data_str[:100],
800
+ error=str(e),
801
+ )
802
+ continue
803
+
804
+ except Exception as e:
805
+ logger.error(
806
+ "codex_stream_error",
807
+ error=str(e),
808
+ line_count=line_count,
809
+ )
810
+ raise
811
+
812
+ # Send final [DONE] message
813
+ logger.debug(
814
+ "codex_stream_sending_done",
815
+ total_chunks=chunk_count,
816
+ total_bytes=total_bytes,
817
+ )
818
+ yield b"data: [DONE]\n\n"
819
+ else:
820
+ # Backend didn't return streaming or returned unexpected format
821
+ # When using client.stream(), we need to collect the response differently
822
+ chunks = []
823
+ async for chunk in response.aiter_bytes():
824
+ chunks.append(chunk)
825
+
826
+ response_body = b"".join(chunks)
827
+
828
+ logger.debug(
829
+ "codex_chat_non_streaming_response",
830
+ body_length=len(response_body),
831
+ body_preview=response_body[:200].decode(
832
+ "utf-8", errors="replace"
833
+ )
834
+ if response_body
835
+ else "empty",
836
+ )
837
+
838
+ if response_body:
839
+ # Check if it's actually SSE data that we missed
840
+ body_str = response_body.decode("utf-8")
841
+ if body_str.startswith("event:") or body_str.startswith(
842
+ "data:"
843
+ ):
844
+ # It's SSE data, try to extract the final JSON
845
+ logger.warning(
846
+ "Backend returned SSE data but content-type was not text/event-stream"
847
+ )
848
+ lines = body_str.strip().split("\n")
849
+ for line in reversed(lines):
850
+ if line.startswith("data:") and not line.endswith(
851
+ "[DONE]"
852
+ ):
853
+ try:
854
+ json_str = line[5:].strip()
855
+ response_data = json.loads(json_str)
856
+ if "response" in response_data:
857
+ response_data = response_data[
858
+ "response"
859
+ ]
860
+ # Convert to OpenAI format and yield as a single chunk
861
+ openai_response = (
862
+ adapter.response_to_chat_completion(
863
+ response_data
864
+ )
865
+ )
866
+ yield f"data: {openai_response.model_dump_json()}\n\n".encode()
867
+ yield b"data: [DONE]\n\n"
868
+ return
869
+ except json.JSONDecodeError:
870
+ continue
871
+ # Couldn't parse SSE data - yield error as SSE event
872
+ error_response = {
873
+ "error": {
874
+ "message": "Failed to parse SSE response data",
875
+ "type": "invalid_response_error",
876
+ "code": 502,
877
+ }
878
+ }
879
+ yield f"data: {json.dumps(error_response)}\n\n".encode()
880
+ yield b"data: [DONE]\n\n"
881
+ return
882
+ else:
883
+ # Try to parse as regular JSON
884
+ try:
885
+ response_data = json.loads(body_str)
886
+ # Convert to Chat Completions format and yield as single chunk
887
+ openai_response = (
888
+ adapter.response_to_chat_completion(
889
+ response_data
890
+ )
891
+ )
892
+ yield f"data: {openai_response.model_dump_json()}\n\n".encode()
893
+ yield b"data: [DONE]\n\n"
894
+ return
895
+ except json.JSONDecodeError as e:
896
+ logger.error(
897
+ "Failed to parse non-streaming response",
898
+ error=str(e),
899
+ body_preview=body_str[:500],
900
+ )
901
+ error_response = {
902
+ "error": {
903
+ "message": "Invalid JSON response from backend",
904
+ "type": "invalid_response_error",
905
+ "code": 502,
906
+ }
907
+ }
908
+ yield f"data: {json.dumps(error_response)}\n\n".encode()
909
+ yield b"data: [DONE]\n\n"
910
+ return
911
+ else:
912
+ # Empty response - yield error
913
+ error_response = {
914
+ "error": {
915
+ "message": "Backend returned empty response",
916
+ "type": "empty_response_error",
917
+ "code": 502,
918
+ }
919
+ }
920
+ yield f"data: {json.dumps(error_response)}\n\n".encode()
921
+ yield b"data: [DONE]\n\n"
922
+ return
923
+
924
+ # Execute the generator first to capture headers
925
+ generator_chunks = []
926
+ async for chunk in stream_codex_response():
927
+ generator_chunks.append(chunk)
928
+
929
+ # Forward upstream headers but filter out incompatible ones for streaming
930
+ streaming_headers = dict(response_headers)
931
+ # Remove headers that conflict with streaming responses
932
+ streaming_headers.pop("content-length", None)
933
+ streaming_headers.pop("content-encoding", None)
934
+ streaming_headers.pop("date", None)
935
+ # Set streaming-specific headers
936
+ streaming_headers.update(
937
+ {
938
+ "content-type": "text/event-stream",
939
+ "Cache-Control": "no-cache",
940
+ "Connection": "keep-alive",
941
+ }
942
+ )
943
+
944
+ # Replay the collected chunks
945
+ async def replay_stream() -> AsyncIterator[bytes]:
946
+ for chunk in generator_chunks:
947
+ yield chunk
948
+
949
+ # Return streaming response with proper headers - handle missing request_context
950
+ from ccproxy.observability.context import RequestContext
951
+
952
+ # Create a minimal request context if none exists
953
+ if request_context is None:
954
+ import time
955
+ import uuid
956
+
957
+ request_context = RequestContext(
958
+ request_id=str(uuid.uuid4()),
959
+ start_time=time.perf_counter(),
960
+ logger=logger,
961
+ )
962
+
963
+ return StreamingResponseWithLogging(
964
+ content=replay_stream(),
965
+ request_context=request_context,
966
+ metrics=getattr(proxy_service, "metrics", None),
967
+ status_code=200,
968
+ media_type="text/event-stream",
969
+ headers=streaming_headers,
970
+ )
971
+ else:
972
+ # Handle non-streaming request using the proxy service
973
+ # Cast MockRequest to Request to satisfy type checker
974
+ mock_request_typed: Request = mock_request # type: ignore[assignment]
975
+ response = await proxy_service.handle_codex_request(
976
+ method="POST",
977
+ path="/responses",
978
+ session_id=session_id,
979
+ access_token=access_token,
980
+ request=mock_request_typed,
981
+ settings=settings,
982
+ )
983
+
984
+ # Check if this is a streaming response (shouldn't happen for non-streaming requests)
985
+ is_streaming_response = isinstance(response, StreamingResponse)
986
+
987
+ if is_streaming_response and not openai_request.stream:
988
+ # User requested non-streaming but backend returned streaming
989
+ # Consume the stream and convert to non-streaming response
990
+ accumulated_content = ""
991
+ final_response = None
992
+
993
+ error_response = None
994
+ accumulated_chunks = ""
995
+
996
+ async for chunk in response.body_iterator: # type: ignore
997
+ chunk_str = chunk.decode("utf-8")
998
+ accumulated_chunks += chunk_str
999
+
1000
+ # The Response API sends SSE events, but errors might be plain JSON
1001
+ lines = chunk_str.strip().split("\n")
1002
+ for line in lines:
1003
+ if line.startswith("data:") and "[DONE]" not in line:
1004
+ data_str = line[5:].strip()
1005
+ try:
1006
+ event_data = json.loads(data_str)
1007
+ # Look for the completed response
1008
+ if event_data.get("type") == "response.completed":
1009
+ final_response = event_data
1010
+ # Also check if this is a direct error response (not SSE format)
1011
+ elif (
1012
+ "detail" in event_data and "type" not in event_data
1013
+ ):
1014
+ error_response = event_data
1015
+ except json.JSONDecodeError:
1016
+ continue
1017
+
1018
+ # If we didn't find SSE events, try parsing the entire accumulated content as JSON
1019
+ if (
1020
+ not final_response
1021
+ and not error_response
1022
+ and accumulated_chunks.strip()
1023
+ ):
1024
+ try:
1025
+ # Try to parse the entire content as JSON (for non-SSE error responses)
1026
+ json_response = json.loads(accumulated_chunks.strip())
1027
+ if (
1028
+ "detail" in json_response
1029
+ or "error" in json_response
1030
+ or "message" in json_response
1031
+ ):
1032
+ error_response = json_response
1033
+ else:
1034
+ # Might be a valid response without SSE formatting
1035
+ final_response = {"response": json_response}
1036
+ except json.JSONDecodeError:
1037
+ # Not valid JSON either
1038
+ pass
1039
+
1040
+ if final_response:
1041
+ # Convert to Chat Completions format
1042
+ return adapter.response_to_chat_completion(final_response)
1043
+ elif error_response:
1044
+ # Handle error response
1045
+ error_message = "Request failed"
1046
+ if "detail" in error_response:
1047
+ error_message = error_response["detail"]
1048
+ elif "error" in error_response:
1049
+ if isinstance(error_response["error"], dict):
1050
+ error_message = error_response["error"].get(
1051
+ "message", "Request failed"
1052
+ )
1053
+ else:
1054
+ error_message = str(error_response["error"])
1055
+ elif "message" in error_response:
1056
+ error_message = error_response["message"]
1057
+
1058
+ # Log the error for debugging
1059
+ logger.error(
1060
+ "codex_streaming_error_response",
1061
+ error_data=error_response,
1062
+ error_message=error_message,
1063
+ )
1064
+
1065
+ raise HTTPException(status_code=400, detail=error_message)
1066
+ else:
1067
+ raise HTTPException(
1068
+ status_code=502, detail="Failed to parse streaming response"
1069
+ )
1070
+ else:
1071
+ # Non-streaming response - parse and convert
1072
+ if isinstance(response, Response):
1073
+ # Check if this is an error response
1074
+ if response.status_code >= 400:
1075
+ # Return the error response as-is
1076
+ error_body = response.body
1077
+ if error_body:
1078
+ try:
1079
+ # Handle bytes/memoryview union
1080
+ error_body_bytes = (
1081
+ bytes(error_body)
1082
+ if isinstance(error_body, memoryview)
1083
+ else error_body
1084
+ )
1085
+ error_data = json.loads(
1086
+ error_body_bytes.decode("utf-8")
1087
+ )
1088
+ # Log the actual error from backend
1089
+ logger.error(
1090
+ "codex_backend_error",
1091
+ status_code=response.status_code,
1092
+ error_data=error_data,
1093
+ )
1094
+ # Pass through the error from backend
1095
+ # Handle different error formats from backend
1096
+ error_message = "Request failed"
1097
+ if "detail" in error_data:
1098
+ error_message = error_data["detail"]
1099
+ elif "error" in error_data:
1100
+ if isinstance(error_data["error"], dict):
1101
+ error_message = error_data["error"].get(
1102
+ "message", "Request failed"
1103
+ )
1104
+ else:
1105
+ error_message = str(error_data["error"])
1106
+ elif "message" in error_data:
1107
+ error_message = error_data["message"]
1108
+
1109
+ raise HTTPException(
1110
+ status_code=response.status_code,
1111
+ detail=error_message,
1112
+ )
1113
+ except (json.JSONDecodeError, UnicodeDecodeError):
1114
+ # Handle bytes/memoryview union for logging
1115
+ error_body_bytes = (
1116
+ bytes(error_body)
1117
+ if isinstance(error_body, memoryview)
1118
+ else error_body
1119
+ )
1120
+ logger.error(
1121
+ "codex_backend_error_parse_failed",
1122
+ status_code=response.status_code,
1123
+ body=error_body_bytes[:500].decode(
1124
+ "utf-8", errors="replace"
1125
+ ),
1126
+ )
1127
+ pass
1128
+ raise HTTPException(
1129
+ status_code=response.status_code, detail="Request failed"
1130
+ )
1131
+
1132
+ # Read the response body for successful responses
1133
+ response_body = response.body
1134
+ if response_body:
1135
+ try:
1136
+ # Handle bytes/memoryview union
1137
+ response_body_bytes = (
1138
+ bytes(response_body)
1139
+ if isinstance(response_body, memoryview)
1140
+ else response_body
1141
+ )
1142
+ response_data = json.loads(
1143
+ response_body_bytes.decode("utf-8")
1144
+ )
1145
+ # Convert Response API format to Chat Completions format
1146
+ return adapter.response_to_chat_completion(response_data)
1147
+ except (json.JSONDecodeError, UnicodeDecodeError) as e:
1148
+ logger.error("Failed to parse Codex response", error=str(e))
1149
+ raise HTTPException(
1150
+ status_code=502,
1151
+ detail="Invalid response from Codex API",
1152
+ ) from e
1153
+
1154
+ # If we can't convert, return error
1155
+ raise HTTPException(
1156
+ status_code=502, detail="Unable to process Codex response"
1157
+ )
1158
+
1159
+ except HTTPException:
1160
+ raise
1161
+ except AuthenticationError as e:
1162
+ raise HTTPException(status_code=401, detail=str(e)) from e
1163
+ except ProxyError as e:
1164
+ raise HTTPException(status_code=502, detail=str(e)) from e
1165
+ except Exception as e:
1166
+ logger.error("Unexpected error in codex_chat_completions", error=str(e))
1167
+ raise HTTPException(status_code=500, detail="Internal server error") from e
1168
+
1169
+
1170
+ # NOTE: Test endpoint commented out after exploration
1171
+ # Testing revealed that ChatGPT backend API only supports /responses endpoint
1172
+ # and does NOT support OpenAI-style /chat/completions or other endpoints.
1173
+ # See codex_endpoint_test_results.md for full findings.
1174
+ #
1175
+ # @router.api_route("/test/{path:path}", methods=["GET", "POST", "PUT", "DELETE"], response_model=None, include_in_schema=False)
1176
+ # async def codex_test_probe(
1177
+ # path: str,
1178
+ # request: Request,
1179
+ # proxy_service: ProxyServiceDep,
1180
+ # settings: Settings = Depends(get_settings),
1181
+ # token_manager: OpenAITokenManager = Depends(get_token_manager),
1182
+ # _: None = Depends(check_codex_enabled),
1183
+ # ) -> Response:
1184
+ # """Test endpoint to probe upstream ChatGPT backend API paths.
1185
+ #
1186
+ # WARNING: This is a test endpoint for exploration only.
1187
+ # It forwards requests to any path on the ChatGPT backend API.
1188
+ # Should be removed or protected after testing.
1189
+ # """
1190
+ # # Get and validate access token
1191
+ # try:
1192
+ # access_token = await token_manager.get_valid_token()
1193
+ # if not access_token:
1194
+ # raise HTTPException(
1195
+ # status_code=401,
1196
+ # detail="No valid OpenAI credentials found. Please authenticate first.",
1197
+ # )
1198
+ # except Exception as e:
1199
+ # logger.error("Failed to get OpenAI access token", error=str(e))
1200
+ # raise HTTPException(
1201
+ # status_code=401, detail="Failed to retrieve valid credentials"
1202
+ # ) from e
1203
+ #
1204
+ # # Log the test request
1205
+ # logger.info(f"Testing upstream path: /{path}", method=request.method)
1206
+ #
1207
+ # try:
1208
+ # # Use a simple session_id for testing
1209
+ # session_id = "test-probe"
1210
+ #
1211
+ # # Handle the test request - forward to the specified path
1212
+ # response = await proxy_service.handle_codex_request(
1213
+ # method=request.method,
1214
+ # path=f"/{path}",
1215
+ # session_id=session_id,
1216
+ # access_token=access_token,
1217
+ # request=request,
1218
+ # settings=settings,
1219
+ # )
1220
+ #
1221
+ # logger.info(f"Test probe response for /{path}", status_code=getattr(response, "status_code", 200))
1222
+ # return response
1223
+ # except AuthenticationError as e:
1224
+ # logger.warning(f"Auth error for path /{path}: {str(e)}")
1225
+ # raise HTTPException(status_code=401, detail=str(e)) from e
1226
+ # except ProxyError as e:
1227
+ # logger.warning(f"Proxy error for path /{path}: {str(e)}")
1228
+ # raise HTTPException(status_code=502, detail=str(e)) from e
1229
+ # except Exception as e:
1230
+ # logger.error(f"Unexpected error testing path /{path}", error=str(e))
1231
+ # raise HTTPException(status_code=500, detail=f"Error testing path: {str(e)}") from e