PyPI - codex-lb - Versions diffs - 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

codex-lb 0.4.0py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

app/core/config/settings.py +8 -8
app/core/handlers/__init__.py +3 -0
app/core/handlers/exceptions.py +39 -0
app/core/middleware/__init__.py +9 -0
app/core/middleware/api_errors.py +33 -0
app/core/middleware/request_decompression.py +101 -0
app/core/middleware/request_id.py +27 -0
app/core/openai/chat_requests.py +172 -0
app/core/openai/chat_responses.py +534 -0
app/core/openai/message_coercion.py +60 -0
app/core/openai/models_catalog.py +72 -0
app/core/openai/requests.py +4 -4
app/core/openai/v1_requests.py +4 -60
app/db/session.py +25 -8
app/dependencies.py +43 -16
app/main.py +12 -67
app/modules/accounts/repository.py +21 -9
app/modules/proxy/api.py +58 -0
app/modules/proxy/load_balancer.py +75 -58
app/modules/proxy/repo_bundle.py +23 -0
app/modules/proxy/service.py +98 -102
app/modules/request_logs/repository.py +3 -0
app/modules/usage/service.py +65 -4
{codex_lb-0.4.0.dist-info → codex_lb-0.5.0.dist-info}/METADATA +3 -2
{codex_lb-0.4.0.dist-info → codex_lb-0.5.0.dist-info}/RECORD +28 -17
{codex_lb-0.4.0.dist-info → codex_lb-0.5.0.dist-info}/WHEEL +0 -0
{codex_lb-0.4.0.dist-info → codex_lb-0.5.0.dist-info}/entry_points.txt +0 -0
{codex_lb-0.4.0.dist-info → codex_lb-0.5.0.dist-info}/licenses/LICENSE +0 -0

app/core/openai/chat_responses.py ADDED Viewed

@@ -0,0 +1,534 @@
+from __future__ import annotations
+import json
+import time
+from collections.abc import AsyncIterator, Iterable, Mapping
+from dataclasses import dataclass, field
+from typing import cast
+from pydantic import BaseModel, ConfigDict
+from app.core.errors import openai_error
+from app.core.types import JsonValue
+class ChatToolCallFunction(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+    name: str | None = None
+    arguments: str | None = None
+class ChatToolCallDelta(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+    index: int
+    id: str | None = None
+    type: str = "function"
+    function: ChatToolCallFunction | None = None
+class ChatChunkDelta(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+    role: str | None = None
+    content: str | None = None
+    tool_calls: list[ChatToolCallDelta] | None = None
+class ChatChunkChoice(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+    index: int
+    delta: ChatChunkDelta
+    finish_reason: str | None = None
+class ChatCompletionChunk(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+    id: str
+    object: str = "chat.completion.chunk"
+    created: int
+    model: str
+    choices: list[ChatChunkChoice]
+class ChatMessageToolCall(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+    id: str | None = None
+    type: str = "function"
+    function: ChatToolCallFunction | None = None
+class ChatCompletionMessage(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+    role: str
+    content: str | None = None
+    tool_calls: list[ChatMessageToolCall] | None = None
+class ChatCompletionChoice(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+    index: int
+    message: ChatCompletionMessage
+    finish_reason: str | None = None
+class ChatCompletionUsage(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+    prompt_tokens: int | None = None
+    completion_tokens: int | None = None
+    total_tokens: int | None = None
+class ChatCompletion(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+    id: str
+    object: str = "chat.completion"
+    created: int
+    model: str
+    choices: list[ChatCompletionChoice]
+    usage: ChatCompletionUsage | None = None
+@dataclass
+class ToolCallIndex:
+    indexes: dict[str, int] = field(default_factory=dict)
+    next_index: int = 0
+    def index_for(self, call_id: str | None, name: str | None) -> int:
+        key = _tool_call_key(call_id, name)
+        if key is None:
+            return 0
+        if key not in self.indexes:
+            self.indexes[key] = self.next_index
+            self.next_index += 1
+        return self.indexes[key]
+@dataclass
+class _ChatChunkState:
+    tool_index: ToolCallIndex = field(default_factory=ToolCallIndex)
+    saw_tool_call: bool = False
+    sent_role: bool = False
+@dataclass
+class ToolCallDelta:
+    index: int
+    call_id: str | None
+    name: str | None
+    arguments: str | None
+    tool_type: str | None
+    def to_chunk_call(self) -> ChatToolCallDelta:
+        function = _build_tool_call_function(self.name, self.arguments)
+        return ChatToolCallDelta(
+            index=self.index,
+            id=self.call_id,
+            type=self.tool_type or "function",
+            function=function,
+        )
+@dataclass
+class ToolCallState:
+    index: int
+    call_id: str | None = None
+    name: str | None = None
+    arguments: str = ""
+    tool_type: str = "function"
+    def apply_delta(self, delta: ToolCallDelta) -> None:
+        if delta.call_id:
+            self.call_id = delta.call_id
+        if delta.name:
+            self.name = delta.name
+        if delta.arguments:
+            self.arguments += delta.arguments
+        if delta.tool_type:
+            self.tool_type = delta.tool_type
+    def to_message_tool_call(self) -> ChatMessageToolCall | None:
+        function = _build_tool_call_function(self.name, self.arguments or None)
+        if self.call_id is None and function is None:
+            return None
+        return ChatMessageToolCall(
+            id=self.call_id,
+            type=self.tool_type or "function",
+            function=function,
+        )
+def _build_tool_call_function(name: str | None, arguments: str | None) -> ChatToolCallFunction | None:
+    if name is None and arguments is None:
+        return None
+    return ChatToolCallFunction(name=name, arguments=arguments)
+def _parse_data(line: str) -> dict[str, JsonValue] | None:
+    if line.startswith("data:"):
+        data = line[5:].strip()
+        if not data or data == "[DONE]":
+            return None
+        try:
+            payload = json.loads(data)
+        except json.JSONDecodeError:
+            return None
+        if isinstance(payload, dict):
+            return cast(dict[str, JsonValue], payload)
+    return None
+def iter_chat_chunks(
+    lines: Iterable[str],
+    model: str,
+    *,
+    created: int | None = None,
+    state: _ChatChunkState | None = None,
+) -> Iterable[str]:
+    created = created or int(time.time())
+    state = state or _ChatChunkState()
+    for line in lines:
+        payload = _parse_data(line)
+        if not payload:
+            continue
+        event_type = payload.get("type")
+        if event_type == "response.output_text.delta":
+            delta = payload.get("delta")
+            role = None
+            if not state.sent_role:
+                role = "assistant"
+            chunk = ChatCompletionChunk(
+                id="chatcmpl_temp",
+                created=created,
+                model=model,
+                choices=[
+                    ChatChunkChoice(
+                        index=0,
+                        delta=ChatChunkDelta(
+                            role=role,
+                            content=delta if isinstance(delta, str) else None,
+                        ),
+                        finish_reason=None,
+                    )
+                ],
+            )
+            yield _dump_chunk(chunk)
+            if role is not None:
+                state.sent_role = True
+        tool_delta = _tool_call_delta_from_payload(payload, state.tool_index)
+        if tool_delta is not None:
+            state.saw_tool_call = True
+            role = None
+            if not state.sent_role:
+                role = "assistant"
+            chunk = ChatCompletionChunk(
+                id="chatcmpl_temp",
+                created=created,
+                model=model,
+                choices=[
+                    ChatChunkChoice(
+                        index=0,
+                        delta=ChatChunkDelta(
+                            role=role,
+                            tool_calls=[tool_delta.to_chunk_call()],
+                        ),
+                        finish_reason=None,
+                    )
+                ],
+            )
+            yield _dump_chunk(chunk)
+            if role is not None:
+                state.sent_role = True
+        if event_type in ("response.failed", "error"):
+            error = None
+            if event_type == "response.failed":
+                response = payload.get("response")
+                if isinstance(response, dict):
+                    maybe_error = response.get("error")
+                    if isinstance(maybe_error, dict):
+                        error = maybe_error
+            else:
+                maybe_error = payload.get("error")
+                if isinstance(maybe_error, dict):
+                    error = maybe_error
+            if error is not None:
+                error_payload = {"error": error}
+                yield _dump_sse(error_payload)
+                yield "data: [DONE]\n\n"
+                return
+        if event_type == "response.completed":
+            finish_reason = "tool_calls" if state.saw_tool_call else "stop"
+            done = ChatCompletionChunk(
+                id="chatcmpl_temp",
+                created=created,
+                model=model,
+                choices=[
+                    ChatChunkChoice(
+                        index=0,
+                        delta=ChatChunkDelta(),
+                        finish_reason=finish_reason,
+                    )
+                ],
+            )
+            yield _dump_chunk(done)
+            yield "data: [DONE]\n\n"
+            return
+async def stream_chat_chunks(stream: AsyncIterator[str], model: str) -> AsyncIterator[str]:
+    created = int(time.time())
+    state = _ChatChunkState()
+    async for line in stream:
+        for chunk in iter_chat_chunks([line], model=model, created=created, state=state):
+            yield chunk
+            if chunk.strip() == "data: [DONE]":
+                return
+async def collect_chat_completion(stream: AsyncIterator[str], model: str) -> dict[str, JsonValue]:
+    created = int(time.time())
+    content_parts: list[str] = []
+    response_id: str | None = None
+    usage: dict[str, JsonValue] | None = None
+    tool_index = ToolCallIndex()
+    tool_calls: list[ToolCallState] = []
+    async for line in stream:
+        payload = _parse_data(line)
+        if not payload:
+            continue
+        event_type = payload.get("type")
+        if event_type == "response.output_text.delta":
+            delta = payload.get("delta")
+            if isinstance(delta, str):
+                content_parts.append(delta)
+        tool_delta = _tool_call_delta_from_payload(payload, tool_index)
+        if tool_delta is not None:
+            _merge_tool_call_delta(tool_calls, tool_delta)
+        if event_type in ("response.failed", "error"):
+            error = None
+            if event_type == "response.failed":
+                response = payload.get("response")
+                if isinstance(response, dict):
+                    maybe_error = response.get("error")
+                    if isinstance(maybe_error, dict):
+                        error = maybe_error
+            else:
+                maybe_error = payload.get("error")
+                if isinstance(maybe_error, dict):
+                    error = maybe_error
+            if error is not None:
+                return {"error": error}
+            return cast(dict[str, JsonValue], openai_error("upstream_error", "Upstream error"))
+        if event_type == "response.completed":
+            response = payload.get("response")
+            if isinstance(response, dict):
+                response_id_value = response.get("id")
+                if isinstance(response_id_value, str):
+                    response_id = response_id_value
+                usage_value = response.get("usage")
+                if isinstance(usage_value, dict):
+                    usage = usage_value
+    message_content = "".join(content_parts)
+    message_tool_calls = _compact_tool_calls(tool_calls)
+    has_tool_calls = bool(message_tool_calls)
+    message = ChatCompletionMessage(
+        role="assistant",
+        content=message_content if message_content or not has_tool_calls else None,
+        tool_calls=message_tool_calls or None,
+    )
+    choice = ChatCompletionChoice(
+        index=0,
+        message=message,
+        finish_reason="tool_calls" if has_tool_calls else "stop",
+    )
+    completion = ChatCompletion(
+        id=response_id or "chatcmpl_temp",
+        created=created,
+        model=model,
+        choices=[choice],
+        usage=_map_usage(usage),
+    )
+    return _dump_completion(completion)
+def _map_usage(usage: dict[str, JsonValue] | None) -> ChatCompletionUsage | None:
+    if not usage:
+        return None
+    prompt_tokens = usage.get("input_tokens")
+    completion_tokens = usage.get("output_tokens")
+    total_tokens = usage.get("total_tokens")
+    if not isinstance(prompt_tokens, int):
+        prompt_tokens = None
+    if not isinstance(completion_tokens, int):
+        completion_tokens = None
+    if not isinstance(total_tokens, int):
+        total_tokens = None
+    if prompt_tokens is None and completion_tokens is None and total_tokens is None:
+        return None
+    return ChatCompletionUsage(
+        prompt_tokens=prompt_tokens,
+        completion_tokens=completion_tokens,
+        total_tokens=total_tokens,
+    )
+def _dump_chunk(chunk: ChatCompletionChunk) -> str:
+    payload = chunk.model_dump(mode="json", exclude_none=True)
+    return _dump_sse(payload)
+def _dump_completion(completion: ChatCompletion) -> dict[str, JsonValue]:
+    payload = completion.model_dump(mode="json", exclude_none=True)
+    return cast(dict[str, JsonValue], payload)
+def _dump_sse(payload: dict[str, JsonValue]) -> str:
+    return f"data: {json.dumps(payload)}\n\n"
+def _tool_call_delta_from_payload(payload: Mapping[str, JsonValue], indexer: ToolCallIndex) -> ToolCallDelta | None:
+    if not _is_tool_call_event(payload):
+        return None
+    fields = _extract_tool_call_fields(payload)
+    if fields is None:
+        return None
+    call_id, name, arguments, tool_type = fields
+    index = indexer.index_for(call_id, name)
+    return ToolCallDelta(
+        index=index,
+        call_id=call_id,
+        name=name,
+        arguments=arguments,
+        tool_type=tool_type,
+    )
+def _is_tool_call_event(payload: Mapping[str, JsonValue]) -> bool:
+    event_type = payload.get("type")
+    if isinstance(event_type, str) and ("tool_call" in event_type or "function_call" in event_type):
+        return True
+    item = _as_mapping(payload.get("item"))
+    if item is not None:
+        item_type = item.get("type")
+        if isinstance(item_type, str) and ("tool" in item_type or "function" in item_type):
+            return True
+        if any(key in item for key in ("call_id", "tool_call_id", "arguments", "function", "name")):
+            return True
+    if any(key in payload for key in ("call_id", "tool_call_id")):
+        return True
+    if "arguments" in payload and ("name" in payload or "function" in payload):
+        return True
+    return False
+def _extract_tool_call_fields(
+    payload: Mapping[str, JsonValue],
+) -> tuple[str | None, str | None, str | None, str | None] | None:
+    candidate = _select_tool_call_candidate(payload)
+    delta = candidate.get("delta")
+    delta_map = _as_mapping(delta)
+    delta_text = delta if isinstance(delta, str) else None
+    call_id = _first_str(
+        candidate.get("call_id"),
+        candidate.get("tool_call_id"),
+        candidate.get("id"),
+    )
+    if call_id is None and delta_map is not None:
+        call_id = _first_str(
+            delta_map.get("id"),
+            delta_map.get("call_id"),
+            delta_map.get("tool_call_id"),
+        )
+    name = _first_str(candidate.get("name"), candidate.get("tool_name"))
+    if name is None and delta_map is not None:
+        name = _first_str(delta_map.get("name"))
+    if name is None:
+        function = _as_mapping(candidate.get("function"))
+        if function is not None:
+            name = _first_str(function.get("name"))
+    if name is None and delta_map is not None:
+        function = _as_mapping(delta_map.get("function"))
+        if function is not None:
+            name = _first_str(function.get("name"))
+    arguments = None
+    if isinstance(candidate.get("arguments"), str):
+        arguments = cast(str, candidate.get("arguments"))
+    if arguments is None and isinstance(delta_text, str):
+        arguments = delta_text
+    if arguments is None and delta_map is not None:
+        if isinstance(delta_map.get("arguments"), str):
+            arguments = cast(str, delta_map.get("arguments"))
+        else:
+            function = _as_mapping(delta_map.get("function"))
+            if function is not None and isinstance(function.get("arguments"), str):
+                arguments = cast(str, function.get("arguments"))
+    tool_type = _first_str(candidate.get("tool_type"), candidate.get("type"))
+    if tool_type and tool_type.startswith("response."):
+        tool_type = None
+    if tool_type in ("tool_call", "function_call"):
+        tool_type = "function"
+    if call_id is None and name is None and arguments is None:
+        return None
+    return call_id, name, arguments, tool_type
+def _select_tool_call_candidate(payload: Mapping[str, JsonValue]) -> Mapping[str, JsonValue]:
+    item = _as_mapping(payload.get("item"))
+    if item is not None:
+        item_type = item.get("type")
+        if isinstance(item_type, str) and ("tool" in item_type or "function" in item_type):
+            return item
+        if any(key in item for key in ("call_id", "tool_call_id", "arguments", "function", "name")):
+            return item
+    return payload
+def _tool_call_key(call_id: str | None, name: str | None) -> str | None:
+    if call_id:
+        return f"id:{call_id}"
+    if name:
+        return f"name:{name}"
+    return None
+def _as_mapping(value: JsonValue) -> Mapping[str, JsonValue] | None:
+    if isinstance(value, Mapping):
+        return cast(Mapping[str, JsonValue], value)
+    return None
+def _first_str(*values: object) -> str | None:
+    for value in values:
+        if isinstance(value, str) and value:
+            return value
+    return None
+def _merge_tool_call_delta(tool_calls: list[ToolCallState], delta: ToolCallDelta) -> None:
+    while len(tool_calls) <= delta.index:
+        tool_calls.append(ToolCallState(index=len(tool_calls)))
+    tool_calls[delta.index].apply_delta(delta)
+def _compact_tool_calls(tool_calls: list[ToolCallState]) -> list[ChatMessageToolCall]:
+    cleaned: list[ChatMessageToolCall] = []
+    for call in tool_calls:
+        tool_call = call.to_message_tool_call()
+        if tool_call is not None:
+            cleaned.append(tool_call)
+    return cleaned

app/core/openai/message_coercion.py ADDED Viewed

@@ -0,0 +1,60 @@
+from __future__ import annotations
+from typing import cast
+from app.core.types import JsonValue
+def coerce_messages(existing_instructions: str, messages: list[JsonValue]) -> tuple[str, list[JsonValue]]:
+    instruction_parts: list[str] = []
+    input_messages: list[JsonValue] = []
+    for message in messages:
+        if not isinstance(message, dict):
+            raise ValueError("Each message must be an object.")
+        message_dict = cast(dict[str, JsonValue], message)
+        role_value = message_dict.get("role")
+        role = role_value if isinstance(role_value, str) else None
+        if role in ("system", "developer"):
+            content_text = _content_to_text(message_dict.get("content"))
+            if content_text:
+                instruction_parts.append(content_text)
+            continue
+        input_messages.append(cast(JsonValue, message_dict))
+    merged = _merge_instructions(existing_instructions, instruction_parts)
+    return merged, input_messages
+def _merge_instructions(existing: str, extra_parts: list[str]) -> str:
+    if not extra_parts:
+        return existing
+    extra = "\n".join([part for part in extra_parts if part])
+    if not extra:
+        return existing
+    if existing:
+        return f"{existing}\n{extra}"
+    return extra
+def _content_to_text(content: object) -> str | None:
+    if content is None:
+        return None
+    if isinstance(content, str):
+        return content
+    if isinstance(content, list):
+        parts: list[str] = []
+        for part in content:
+            if isinstance(part, str):
+                parts.append(part)
+            elif isinstance(part, dict):
+                part_dict = cast(dict[str, JsonValue], part)
+                text = part_dict.get("text")
+                if isinstance(text, str):
+                    parts.append(text)
+        return "\n".join([part for part in parts if part])
+    if isinstance(content, dict):
+        content_dict = cast(dict[str, JsonValue], content)
+        text = content_dict.get("text")
+        if isinstance(text, str):
+            return text
+        return None
+    return None

app/core/openai/models_catalog.py ADDED Viewed

@@ -0,0 +1,72 @@
+from __future__ import annotations
+from pydantic import BaseModel, ConfigDict
+class ModelLimits(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+    context: int
+    output: int
+class ModelModalities(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+    input: list[str]
+    output: list[str]
+class ModelVariant(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+    reasoningEffort: str
+    reasoningSummary: str
+    textVerbosity: str
+class ModelEntry(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+    name: str
+    limit: ModelLimits
+    modalities: ModelModalities
+    variants: dict[str, ModelVariant]
+MODEL_CATALOG: dict[str, ModelEntry] = {
+    "gpt-5.2": ModelEntry(
+        name="GPT 5.2",
+        limit=ModelLimits(context=272000, output=128000),
+        modalities=ModelModalities(input=["text", "image"], output=["text"]),
+        variants={
+            "none": ModelVariant(reasoningEffort="none", reasoningSummary="auto", textVerbosity="medium"),
+            "low": ModelVariant(reasoningEffort="low", reasoningSummary="auto", textVerbosity="medium"),
+            "medium": ModelVariant(reasoningEffort="medium", reasoningSummary="auto", textVerbosity="medium"),
+            "high": ModelVariant(reasoningEffort="high", reasoningSummary="detailed", textVerbosity="medium"),
+            "xhigh": ModelVariant(reasoningEffort="xhigh", reasoningSummary="detailed", textVerbosity="medium"),
+        },
+    ),
+    "gpt-5.2-codex": ModelEntry(
+        name="GPT 5.2 Codex",
+        limit=ModelLimits(context=272000, output=128000),
+        modalities=ModelModalities(input=["text", "image"], output=["text"]),
+        variants={
+            "low": ModelVariant(reasoningEffort="low", reasoningSummary="auto", textVerbosity="medium"),
+            "medium": ModelVariant(reasoningEffort="medium", reasoningSummary="auto", textVerbosity="medium"),
+            "high": ModelVariant(reasoningEffort="high", reasoningSummary="detailed", textVerbosity="medium"),
+            "xhigh": ModelVariant(reasoningEffort="xhigh", reasoningSummary="detailed", textVerbosity="medium"),
+        },
+    ),
+    "gpt-5.1-codex-max": ModelEntry(
+        name="GPT 5.1 Codex Max",
+        limit=ModelLimits(context=272000, output=128000),
+        modalities=ModelModalities(input=["text", "image"], output=["text"]),
+        variants={
+            "low": ModelVariant(reasoningEffort="low", reasoningSummary="detailed", textVerbosity="medium"),
+            "medium": ModelVariant(reasoningEffort="medium", reasoningSummary="detailed", textVerbosity="medium"),
+            "high": ModelVariant(reasoningEffort="high", reasoningSummary="detailed", textVerbosity="medium"),
+            "xhigh": ModelVariant(reasoningEffort="xhigh", reasoningSummary="detailed", textVerbosity="medium"),
+        },
+    ),
+}

app/core/openai/requests.py CHANGED Viewed

@@ -35,10 +35,10 @@ class ResponsesRequest(BaseModel):
     instructions: str
     input: list[JsonValue]
     tools: list[JsonValue] = Field(default_factory=list)
-    tool_choice: str | None = None
+    tool_choice: str | dict[str, JsonValue] | None = None
     parallel_tool_calls: bool | None = None
     reasoning: ResponsesReasoning | None = None
-    store: bool | None = None
+    store: bool = False
     stream: bool | None = None
     include: list[str] = Field(default_factory=list)
     prompt_cache_key: str | None = None
@@ -46,10 +46,10 @@ class ResponsesRequest(BaseModel):
     @field_validator("store")
     @classmethod
-    def _ensure_store_false(cls, value: bool | None) -> bool | None:
+    def _ensure_store_false(cls, value: bool | None) -> bool:
         if value is True:
             raise ValueError("store must be false")
-        return value
+        return False if value is None else value
     def to_payload(self) -> JsonObject:
         payload = self.model_dump(mode="json", exclude_none=True)

codex-lb 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

codex-lb 0.4.0py3-none-any.whl → 0.5.0py3-none-any.whl