codex-lb 0.3.1__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. app/core/clients/proxy.py +33 -3
  2. app/core/config/settings.py +9 -8
  3. app/core/handlers/__init__.py +3 -0
  4. app/core/handlers/exceptions.py +39 -0
  5. app/core/middleware/__init__.py +9 -0
  6. app/core/middleware/api_errors.py +33 -0
  7. app/core/middleware/request_decompression.py +101 -0
  8. app/core/middleware/request_id.py +27 -0
  9. app/core/openai/chat_requests.py +172 -0
  10. app/core/openai/chat_responses.py +534 -0
  11. app/core/openai/message_coercion.py +60 -0
  12. app/core/openai/models_catalog.py +72 -0
  13. app/core/openai/requests.py +23 -5
  14. app/core/openai/v1_requests.py +92 -0
  15. app/db/models.py +3 -3
  16. app/db/session.py +25 -8
  17. app/dependencies.py +43 -16
  18. app/main.py +13 -67
  19. app/modules/accounts/repository.py +25 -10
  20. app/modules/proxy/api.py +94 -0
  21. app/modules/proxy/load_balancer.py +75 -58
  22. app/modules/proxy/repo_bundle.py +23 -0
  23. app/modules/proxy/service.py +127 -102
  24. app/modules/request_logs/api.py +61 -7
  25. app/modules/request_logs/repository.py +131 -16
  26. app/modules/request_logs/schemas.py +11 -2
  27. app/modules/request_logs/service.py +97 -20
  28. app/modules/usage/service.py +65 -4
  29. app/modules/usage/updater.py +58 -26
  30. app/static/index.css +378 -1
  31. app/static/index.html +183 -8
  32. app/static/index.js +308 -13
  33. {codex_lb-0.3.1.dist-info → codex_lb-0.5.0.dist-info}/METADATA +42 -3
  34. {codex_lb-0.3.1.dist-info → codex_lb-0.5.0.dist-info}/RECORD +37 -25
  35. {codex_lb-0.3.1.dist-info → codex_lb-0.5.0.dist-info}/WHEEL +0 -0
  36. {codex_lb-0.3.1.dist-info → codex_lb-0.5.0.dist-info}/entry_points.txt +0 -0
  37. {codex_lb-0.3.1.dist-info → codex_lb-0.5.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- from pydantic import BaseModel, ConfigDict, Field
3
+ from pydantic import BaseModel, ConfigDict, Field, field_validator
4
4
 
5
5
  from app.core.types import JsonObject, JsonValue
6
6
 
@@ -35,17 +35,25 @@ class ResponsesRequest(BaseModel):
35
35
  instructions: str
36
36
  input: list[JsonValue]
37
37
  tools: list[JsonValue] = Field(default_factory=list)
38
- tool_choice: str | None = None
38
+ tool_choice: str | dict[str, JsonValue] | None = None
39
39
  parallel_tool_calls: bool | None = None
40
40
  reasoning: ResponsesReasoning | None = None
41
- store: bool | None = None
41
+ store: bool = False
42
42
  stream: bool | None = None
43
43
  include: list[str] = Field(default_factory=list)
44
44
  prompt_cache_key: str | None = None
45
45
  text: ResponsesTextControls | None = None
46
46
 
47
+ @field_validator("store")
48
+ @classmethod
49
+ def _ensure_store_false(cls, value: bool | None) -> bool:
50
+ if value is True:
51
+ raise ValueError("store must be false")
52
+ return False if value is None else value
53
+
47
54
  def to_payload(self) -> JsonObject:
48
- return self.model_dump(mode="json", exclude_none=True)
55
+ payload = self.model_dump(mode="json", exclude_none=True)
56
+ return _strip_unsupported_fields(payload)
49
57
 
50
58
 
51
59
  class ResponsesCompactRequest(BaseModel):
@@ -56,4 +64,14 @@ class ResponsesCompactRequest(BaseModel):
56
64
  input: list[JsonValue]
57
65
 
58
66
  def to_payload(self) -> JsonObject:
59
- return self.model_dump(mode="json", exclude_none=True)
67
+ payload = self.model_dump(mode="json", exclude_none=True)
68
+ return _strip_unsupported_fields(payload)
69
+
70
+
71
+ _UNSUPPORTED_UPSTREAM_FIELDS = {"max_output_tokens"}
72
+
73
+
74
+ def _strip_unsupported_fields(payload: dict[str, JsonValue]) -> dict[str, JsonValue]:
75
+ for key in _UNSUPPORTED_UPSTREAM_FIELDS:
76
+ payload.pop(key, None)
77
+ return payload
@@ -0,0 +1,92 @@
1
+ from __future__ import annotations
2
+
3
+ from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
4
+
5
+ from app.core.openai.message_coercion import coerce_messages
6
+ from app.core.openai.requests import (
7
+ ResponsesCompactRequest,
8
+ ResponsesReasoning,
9
+ ResponsesRequest,
10
+ ResponsesTextControls,
11
+ )
12
+ from app.core.types import JsonValue
13
+
14
+
15
+ class V1ResponsesRequest(BaseModel):
16
+ model_config = ConfigDict(extra="allow")
17
+
18
+ model: str = Field(min_length=1)
19
+ messages: list[JsonValue] | None = None
20
+ input: list[JsonValue] | None = None
21
+ instructions: str | None = None
22
+ tools: list[JsonValue] = Field(default_factory=list)
23
+ tool_choice: str | dict[str, JsonValue] | None = None
24
+ parallel_tool_calls: bool | None = None
25
+ reasoning: ResponsesReasoning | None = None
26
+ store: bool | None = None
27
+ stream: bool | None = None
28
+ include: list[str] = Field(default_factory=list)
29
+ prompt_cache_key: str | None = None
30
+ text: ResponsesTextControls | None = None
31
+
32
+ @field_validator("store")
33
+ @classmethod
34
+ def _ensure_store_false(cls, value: bool | None) -> bool | None:
35
+ if value is True:
36
+ raise ValueError("store must be false")
37
+ return value
38
+
39
+ @model_validator(mode="after")
40
+ def _validate_input(self) -> "V1ResponsesRequest":
41
+ if self.messages is None and self.input is None:
42
+ raise ValueError("Provide either 'input' or 'messages'.")
43
+ if self.messages is not None and self.input not in (None, []):
44
+ raise ValueError("Provide either 'input' or 'messages', not both.")
45
+ return self
46
+
47
+ def to_responses_request(self) -> ResponsesRequest:
48
+ data = self.model_dump(mode="json", exclude_none=True)
49
+ messages = data.pop("messages", None)
50
+ instructions = data.get("instructions")
51
+ instruction_text = instructions if isinstance(instructions, str) else ""
52
+ input_value = data.get("input")
53
+ input_items: list[JsonValue] = input_value if isinstance(input_value, list) else []
54
+
55
+ if messages is not None:
56
+ instruction_text, input_items = coerce_messages(instruction_text, messages)
57
+
58
+ data["instructions"] = instruction_text
59
+ data["input"] = input_items
60
+ return ResponsesRequest.model_validate(data)
61
+
62
+
63
+ class V1ResponsesCompactRequest(BaseModel):
64
+ model_config = ConfigDict(extra="allow")
65
+
66
+ model: str = Field(min_length=1)
67
+ messages: list[JsonValue] | None = None
68
+ input: list[JsonValue] | None = None
69
+ instructions: str | None = None
70
+
71
+ @model_validator(mode="after")
72
+ def _validate_input(self) -> "V1ResponsesCompactRequest":
73
+ if self.messages is None and self.input is None:
74
+ raise ValueError("Provide either 'input' or 'messages'.")
75
+ if self.messages is not None and self.input not in (None, []):
76
+ raise ValueError("Provide either 'input' or 'messages', not both.")
77
+ return self
78
+
79
+ def to_compact_request(self) -> ResponsesCompactRequest:
80
+ data = self.model_dump(mode="json", exclude_none=True)
81
+ messages = data.pop("messages", None)
82
+ instructions = data.get("instructions")
83
+ instruction_text = instructions if isinstance(instructions, str) else ""
84
+ input_value = data.get("input")
85
+ input_items: list[JsonValue] = input_value if isinstance(input_value, list) else []
86
+
87
+ if messages is not None:
88
+ instruction_text, input_items = coerce_messages(instruction_text, messages)
89
+
90
+ data["instructions"] = instruction_text
91
+ data["input"] = input_items
92
+ return ResponsesCompactRequest.model_validate(data)
app/db/models.py CHANGED
@@ -48,7 +48,7 @@ class UsageHistory(Base):
48
48
  __tablename__ = "usage_history"
49
49
 
50
50
  id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
51
- account_id: Mapped[str] = mapped_column(String, ForeignKey("accounts.id"), nullable=False)
51
+ account_id: Mapped[str] = mapped_column(String, ForeignKey("accounts.id", ondelete="CASCADE"), nullable=False)
52
52
  recorded_at: Mapped[datetime] = mapped_column(DateTime, server_default=func.now(), nullable=False)
53
53
  window: Mapped[str | None] = mapped_column(String, nullable=True)
54
54
  used_percent: Mapped[float] = mapped_column(Float, nullable=False)
@@ -65,7 +65,7 @@ class RequestLog(Base):
65
65
  __tablename__ = "request_logs"
66
66
 
67
67
  id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
68
- account_id: Mapped[str] = mapped_column(String, ForeignKey("accounts.id"), nullable=False)
68
+ account_id: Mapped[str] = mapped_column(String, ForeignKey("accounts.id", ondelete="CASCADE"), nullable=False)
69
69
  request_id: Mapped[str] = mapped_column(String, nullable=False)
70
70
  requested_at: Mapped[datetime] = mapped_column(DateTime, server_default=func.now(), nullable=False)
71
71
  model: Mapped[str] = mapped_column(String, nullable=False)
@@ -84,7 +84,7 @@ class StickySession(Base):
84
84
  __tablename__ = "sticky_sessions"
85
85
 
86
86
  key: Mapped[str] = mapped_column(String, primary_key=True)
87
- account_id: Mapped[str] = mapped_column(String, ForeignKey("accounts.id"), nullable=False)
87
+ account_id: Mapped[str] = mapped_column(String, ForeignKey("accounts.id", ondelete="CASCADE"), nullable=False)
88
88
  created_at: Mapped[datetime] = mapped_column(DateTime, server_default=func.now(), nullable=False)
89
89
  updated_at: Mapped[datetime] = mapped_column(
90
90
  DateTime,
app/db/session.py CHANGED
@@ -13,7 +13,7 @@ from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_asyn
13
13
  from app.core.config.settings import get_settings
14
14
  from app.db.migrations import run_migrations
15
15
 
16
- DATABASE_URL = get_settings().database_url
16
+ _settings = get_settings()
17
17
 
18
18
  logger = logging.getLogger(__name__)
19
19
 
@@ -43,15 +43,32 @@ def _configure_sqlite_engine(engine: Engine, *, enable_wal: bool) -> None:
43
43
  cursor.close()
44
44
 
45
45
 
46
- if _is_sqlite_url(DATABASE_URL):
46
+ if _is_sqlite_url(_settings.database_url):
47
+ is_sqlite_memory = _is_sqlite_memory_url(_settings.database_url)
48
+ if is_sqlite_memory:
49
+ engine = create_async_engine(
50
+ _settings.database_url,
51
+ echo=False,
52
+ connect_args={"timeout": _SQLITE_BUSY_TIMEOUT_SECONDS},
53
+ )
54
+ else:
55
+ engine = create_async_engine(
56
+ _settings.database_url,
57
+ echo=False,
58
+ pool_size=_settings.database_pool_size,
59
+ max_overflow=_settings.database_max_overflow,
60
+ pool_timeout=_settings.database_pool_timeout_seconds,
61
+ connect_args={"timeout": _SQLITE_BUSY_TIMEOUT_SECONDS},
62
+ )
63
+ _configure_sqlite_engine(engine.sync_engine, enable_wal=not is_sqlite_memory)
64
+ else:
47
65
  engine = create_async_engine(
48
- DATABASE_URL,
66
+ _settings.database_url,
49
67
  echo=False,
50
- connect_args={"timeout": _SQLITE_BUSY_TIMEOUT_SECONDS},
68
+ pool_size=_settings.database_pool_size,
69
+ max_overflow=_settings.database_max_overflow,
70
+ pool_timeout=_settings.database_pool_timeout_seconds,
51
71
  )
52
- _configure_sqlite_engine(engine.sync_engine, enable_wal=not _is_sqlite_memory_url(DATABASE_URL))
53
- else:
54
- engine = create_async_engine(DATABASE_URL, echo=False)
55
72
 
56
73
  SessionLocal = async_sessionmaker(engine, expire_on_commit=False, class_=AsyncSession)
57
74
 
@@ -116,7 +133,7 @@ async def get_session() -> AsyncIterator[AsyncSession]:
116
133
  async def init_db() -> None:
117
134
  from app.db.models import Base
118
135
 
119
- _ensure_sqlite_dir(DATABASE_URL)
136
+ _ensure_sqlite_dir(_settings.database_url)
120
137
 
121
138
  async with engine.begin() as conn:
122
139
  await conn.run_sync(Base.metadata.create_all)
app/dependencies.py CHANGED
@@ -11,6 +11,7 @@ from app.db.session import SessionLocal, _safe_close, _safe_rollback, get_sessio
11
11
  from app.modules.accounts.repository import AccountsRepository
12
12
  from app.modules.accounts.service import AccountsService
13
13
  from app.modules.oauth.service import OauthService
14
+ from app.modules.proxy.repo_bundle import ProxyRepositories
14
15
  from app.modules.proxy.service import ProxyService
15
16
  from app.modules.proxy.sticky_repository import StickySessionsRepository
16
17
  from app.modules.request_logs.repository import RequestLogsRepository
@@ -79,7 +80,12 @@ def get_usage_context(
79
80
  usage_repository = UsageRepository(session)
80
81
  request_logs_repository = RequestLogsRepository(session)
81
82
  accounts_repository = AccountsRepository(session)
82
- service = UsageService(usage_repository, request_logs_repository, accounts_repository)
83
+ service = UsageService(
84
+ usage_repository,
85
+ request_logs_repository,
86
+ accounts_repository,
87
+ refresh_repo_factory=_usage_refresh_context,
88
+ )
83
89
  return UsageContext(
84
90
  session=session,
85
91
  usage_repository=usage_repository,
@@ -101,6 +107,40 @@ async def _accounts_repo_context() -> AsyncIterator[AccountsRepository]:
101
107
  await _safe_close(session)
102
108
 
103
109
 
110
+ @asynccontextmanager
111
+ async def _usage_refresh_context() -> AsyncIterator[tuple[UsageRepository, AccountsRepository]]:
112
+ session = SessionLocal()
113
+ try:
114
+ yield UsageRepository(session), AccountsRepository(session)
115
+ except BaseException:
116
+ await _safe_rollback(session)
117
+ raise
118
+ finally:
119
+ if session.in_transaction():
120
+ await _safe_rollback(session)
121
+ await _safe_close(session)
122
+
123
+
124
+ @asynccontextmanager
125
+ async def _proxy_repo_context() -> AsyncIterator[ProxyRepositories]:
126
+ session = SessionLocal()
127
+ try:
128
+ yield ProxyRepositories(
129
+ accounts=AccountsRepository(session),
130
+ usage=UsageRepository(session),
131
+ request_logs=RequestLogsRepository(session),
132
+ sticky_sessions=StickySessionsRepository(session),
133
+ settings=SettingsRepository(session),
134
+ )
135
+ except BaseException:
136
+ await _safe_rollback(session)
137
+ raise
138
+ finally:
139
+ if session.in_transaction():
140
+ await _safe_rollback(session)
141
+ await _safe_close(session)
142
+
143
+
104
144
  def get_oauth_context(
105
145
  session: AsyncSession = Depends(get_session),
106
146
  ) -> OauthContext:
@@ -108,21 +148,8 @@ def get_oauth_context(
108
148
  return OauthContext(service=OauthService(accounts_repository, repo_factory=_accounts_repo_context))
109
149
 
110
150
 
111
- def get_proxy_context(
112
- session: AsyncSession = Depends(get_session),
113
- ) -> ProxyContext:
114
- accounts_repository = AccountsRepository(session)
115
- usage_repository = UsageRepository(session)
116
- request_logs_repository = RequestLogsRepository(session)
117
- sticky_repository = StickySessionsRepository(session)
118
- settings_repository = SettingsRepository(session)
119
- service = ProxyService(
120
- accounts_repository,
121
- usage_repository,
122
- request_logs_repository,
123
- sticky_repository,
124
- settings_repository,
125
- )
151
+ def get_proxy_context() -> ProxyContext:
152
+ service = ProxyService(repo_factory=_proxy_repo_context)
126
153
  return ProxyContext(service=service)
127
154
 
128
155
 
app/main.py CHANGED
@@ -1,23 +1,19 @@
1
1
  from __future__ import annotations
2
2
 
3
- import logging
4
3
  from contextlib import asynccontextmanager
5
4
  from pathlib import Path
6
- from uuid import uuid4
7
5
 
8
- from fastapi import FastAPI, Request
9
- from fastapi.exception_handlers import (
10
- http_exception_handler,
11
- request_validation_exception_handler,
12
- )
13
- from fastapi.exceptions import RequestValidationError
14
- from fastapi.responses import FileResponse, JSONResponse, RedirectResponse, Response
6
+ from fastapi import FastAPI
7
+ from fastapi.responses import FileResponse, RedirectResponse
15
8
  from fastapi.staticfiles import StaticFiles
16
- from starlette.exceptions import HTTPException as StarletteHTTPException
17
9
 
18
10
  from app.core.clients.http import close_http_client, init_http_client
19
- from app.core.errors import dashboard_error
20
- from app.core.utils.request_id import get_request_id, reset_request_id, set_request_id
11
+ from app.core.handlers import add_exception_handlers
12
+ from app.core.middleware import (
13
+ add_api_unhandled_error_middleware,
14
+ add_request_decompression_middleware,
15
+ add_request_id_middleware,
16
+ )
21
17
  from app.db.session import close_db, init_db
22
18
  from app.modules.accounts import api as accounts_api
23
19
  from app.modules.health import api as health_api
@@ -27,8 +23,6 @@ from app.modules.request_logs import api as request_logs_api
27
23
  from app.modules.settings import api as settings_api
28
24
  from app.modules.usage import api as usage_api
29
25
 
30
- logger = logging.getLogger(__name__)
31
-
32
26
 
33
27
  @asynccontextmanager
34
28
  async def lifespan(_: FastAPI):
@@ -47,61 +41,13 @@ async def lifespan(_: FastAPI):
47
41
  def create_app() -> FastAPI:
48
42
  app = FastAPI(title="codex-lb", version="0.1.0", lifespan=lifespan)
49
43
 
50
- @app.middleware("http")
51
- async def request_id_middleware(request: Request, call_next) -> JSONResponse:
52
- inbound_request_id = request.headers.get("x-request-id") or request.headers.get("request-id")
53
- request_id = inbound_request_id or str(uuid4())
54
- token = set_request_id(request_id)
55
- try:
56
- response = await call_next(request)
57
- except Exception:
58
- reset_request_id(token)
59
- raise
60
- response.headers.setdefault("x-request-id", request_id)
61
- return response
62
-
63
- @app.middleware("http")
64
- async def api_unhandled_error_middleware(request: Request, call_next) -> Response:
65
- try:
66
- return await call_next(request)
67
- except Exception:
68
- if request.url.path.startswith("/api/"):
69
- logger.exception(
70
- "Unhandled API error request_id=%s",
71
- get_request_id(),
72
- )
73
- return JSONResponse(
74
- status_code=500,
75
- content=dashboard_error("internal_error", "Unexpected error"),
76
- )
77
- raise
78
-
79
- @app.exception_handler(RequestValidationError)
80
- async def _validation_error_handler(
81
- request: Request,
82
- exc: RequestValidationError,
83
- ) -> Response:
84
- if request.url.path.startswith("/api/"):
85
- return JSONResponse(
86
- status_code=422,
87
- content=dashboard_error("validation_error", "Invalid request payload"),
88
- )
89
- return await request_validation_exception_handler(request, exc)
90
-
91
- @app.exception_handler(StarletteHTTPException)
92
- async def _http_error_handler(
93
- request: Request,
94
- exc: StarletteHTTPException,
95
- ) -> Response:
96
- if request.url.path.startswith("/api/"):
97
- detail = exc.detail if isinstance(exc.detail, str) else "Request failed"
98
- return JSONResponse(
99
- status_code=exc.status_code,
100
- content=dashboard_error(f"http_{exc.status_code}", detail),
101
- )
102
- return await http_exception_handler(request, exc)
44
+ add_request_decompression_middleware(app)
45
+ add_request_id_middleware(app)
46
+ add_api_unhandled_error_middleware(app)
47
+ add_exception_handlers(app)
103
48
 
104
49
  app.include_router(proxy_api.router)
50
+ app.include_router(proxy_api.v1_router)
105
51
  app.include_router(proxy_api.usage_router)
106
52
  app.include_router(accounts_api.router)
107
53
  app.include_router(usage_api.router)
@@ -5,7 +5,7 @@ from datetime import datetime
5
5
  from sqlalchemy import delete, select, update
6
6
  from sqlalchemy.ext.asyncio import AsyncSession
7
7
 
8
- from app.db.models import Account, AccountStatus
8
+ from app.db.models import Account, AccountStatus, RequestLog, StickySession, UsageHistory
9
9
 
10
10
 
11
11
  class AccountsRepository:
@@ -19,19 +19,19 @@ class AccountsRepository:
19
19
  async def upsert(self, account: Account) -> Account:
20
20
  existing = await self._session.get(Account, account.id)
21
21
  if existing:
22
- existing.chatgpt_account_id = account.chatgpt_account_id
23
- existing.email = account.email
24
- existing.plan_type = account.plan_type
25
- existing.access_token_encrypted = account.access_token_encrypted
26
- existing.refresh_token_encrypted = account.refresh_token_encrypted
27
- existing.id_token_encrypted = account.id_token_encrypted
28
- existing.last_refresh = account.last_refresh
29
- existing.status = account.status
30
- existing.deactivation_reason = account.deactivation_reason
22
+ _apply_account_updates(existing, account)
31
23
  await self._session.commit()
32
24
  await self._session.refresh(existing)
33
25
  return existing
34
26
 
27
+ result = await self._session.execute(select(Account).where(Account.email == account.email))
28
+ existing_by_email = result.scalar_one_or_none()
29
+ if existing_by_email:
30
+ _apply_account_updates(existing_by_email, account)
31
+ await self._session.commit()
32
+ await self._session.refresh(existing_by_email)
33
+ return existing_by_email
34
+
35
35
  self._session.add(account)
36
36
  await self._session.commit()
37
37
  await self._session.refresh(account)
@@ -54,6 +54,9 @@ class AccountsRepository:
54
54
  return result.scalar_one_or_none() is not None
55
55
 
56
56
  async def delete(self, account_id: str) -> bool:
57
+ await self._session.execute(delete(UsageHistory).where(UsageHistory.account_id == account_id))
58
+ await self._session.execute(delete(RequestLog).where(RequestLog.account_id == account_id))
59
+ await self._session.execute(delete(StickySession).where(StickySession.account_id == account_id))
57
60
  result = await self._session.execute(delete(Account).where(Account.id == account_id).returning(Account.id))
58
61
  await self._session.commit()
59
62
  return result.scalar_one_or_none() is not None
@@ -86,3 +89,15 @@ class AccountsRepository:
86
89
  )
87
90
  await self._session.commit()
88
91
  return result.scalar_one_or_none() is not None
92
+
93
+
94
+ def _apply_account_updates(target: Account, source: Account) -> None:
95
+ target.chatgpt_account_id = source.chatgpt_account_id
96
+ target.email = source.email
97
+ target.plan_type = source.plan_type
98
+ target.access_token_encrypted = source.access_token_encrypted
99
+ target.refresh_token_encrypted = source.refresh_token_encrypted
100
+ target.id_token_encrypted = source.id_token_encrypted
101
+ target.last_refresh = source.last_refresh
102
+ target.status = source.status
103
+ target.deactivation_reason = source.deactivation_reason
app/modules/proxy/api.py CHANGED
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import time
3
4
  from collections.abc import AsyncIterator
4
5
 
5
6
  from fastapi import APIRouter, Body, Depends, Request, Response
@@ -7,11 +8,16 @@ from fastapi.responses import JSONResponse, StreamingResponse
7
8
 
8
9
  from app.core.clients.proxy import ProxyResponseError
9
10
  from app.core.errors import openai_error
11
+ from app.core.openai.chat_requests import ChatCompletionsRequest
12
+ from app.core.openai.chat_responses import collect_chat_completion, stream_chat_chunks
13
+ from app.core.openai.models_catalog import MODEL_CATALOG
10
14
  from app.core.openai.requests import ResponsesCompactRequest, ResponsesRequest
15
+ from app.core.openai.v1_requests import V1ResponsesCompactRequest, V1ResponsesRequest
11
16
  from app.dependencies import ProxyContext, get_proxy_context
12
17
  from app.modules.proxy.schemas import RateLimitStatusPayload
13
18
 
14
19
  router = APIRouter(prefix="/backend-api/codex", tags=["proxy"])
20
+ v1_router = APIRouter(prefix="/v1", tags=["proxy"])
15
21
  usage_router = APIRouter(tags=["proxy"])
16
22
 
17
23
 
@@ -20,6 +26,77 @@ async def responses(
20
26
  request: Request,
21
27
  payload: ResponsesRequest = Body(...),
22
28
  context: ProxyContext = Depends(get_proxy_context),
29
+ ) -> Response:
30
+ return await _stream_responses(request, payload, context)
31
+
32
+
33
+ @v1_router.post("/responses")
34
+ async def v1_responses(
35
+ request: Request,
36
+ payload: V1ResponsesRequest = Body(...),
37
+ context: ProxyContext = Depends(get_proxy_context),
38
+ ) -> Response:
39
+ return await _stream_responses(request, payload.to_responses_request(), context)
40
+
41
+
42
+ @v1_router.get("/models")
43
+ async def v1_models() -> JSONResponse:
44
+ created = int(time.time())
45
+ items = [
46
+ {
47
+ "id": model_id,
48
+ "object": "model",
49
+ "created": created,
50
+ "owned_by": "codex-lb",
51
+ "metadata": entry.model_dump(mode="json"),
52
+ }
53
+ for model_id, entry in MODEL_CATALOG.items()
54
+ ]
55
+ return JSONResponse({"object": "list", "data": items})
56
+
57
+
58
+ @v1_router.post("/chat/completions")
59
+ async def v1_chat_completions(
60
+ request: Request,
61
+ payload: ChatCompletionsRequest = Body(...),
62
+ context: ProxyContext = Depends(get_proxy_context),
63
+ ) -> Response:
64
+ rate_limit_headers = await context.service.rate_limit_headers()
65
+ responses_payload = payload.to_responses_request()
66
+ responses_payload.stream = True
67
+ stream = context.service.stream_responses(
68
+ responses_payload,
69
+ request.headers,
70
+ propagate_http_errors=True,
71
+ )
72
+ try:
73
+ first = await stream.__anext__()
74
+ except StopAsyncIteration:
75
+ first = None
76
+ except ProxyResponseError as exc:
77
+ return JSONResponse(status_code=exc.status_code, content=exc.payload, headers=rate_limit_headers)
78
+
79
+ stream_with_first = _prepend_first(first, stream)
80
+ if payload.stream:
81
+ return StreamingResponse(
82
+ stream_chat_chunks(stream_with_first, model=payload.model),
83
+ media_type="text/event-stream",
84
+ headers={"Cache-Control": "no-cache", **rate_limit_headers},
85
+ )
86
+
87
+ result = await collect_chat_completion(stream_with_first, model=payload.model)
88
+ status_code = 200
89
+ if isinstance(result, dict) and "error" in result:
90
+ error = result.get("error")
91
+ code = error.get("code") if isinstance(error, dict) else None
92
+ status_code = 503 if code == "no_accounts" else 502
93
+ return JSONResponse(content=result, status_code=status_code, headers=rate_limit_headers)
94
+
95
+
96
+ async def _stream_responses(
97
+ request: Request,
98
+ payload: ResponsesRequest,
99
+ context: ProxyContext,
23
100
  ) -> Response:
24
101
  rate_limit_headers = await context.service.rate_limit_headers()
25
102
  stream = context.service.stream_responses(
@@ -49,6 +126,23 @@ async def responses_compact(
49
126
  request: Request,
50
127
  payload: ResponsesCompactRequest = Body(...),
51
128
  context: ProxyContext = Depends(get_proxy_context),
129
+ ) -> JSONResponse:
130
+ return await _compact_responses(request, payload, context)
131
+
132
+
133
+ @v1_router.post("/responses/compact")
134
+ async def v1_responses_compact(
135
+ request: Request,
136
+ payload: V1ResponsesCompactRequest = Body(...),
137
+ context: ProxyContext = Depends(get_proxy_context),
138
+ ) -> JSONResponse:
139
+ return await _compact_responses(request, payload.to_compact_request(), context)
140
+
141
+
142
+ async def _compact_responses(
143
+ request: Request,
144
+ payload: ResponsesCompactRequest,
145
+ context: ProxyContext,
52
146
  ) -> JSONResponse:
53
147
  rate_limit_headers = await context.service.rate_limit_headers()
54
148
  try: