PyPI - langchain-codex-plus - Versions diffs - 0.0.2__tar.gz → 0.0.4__tar.gz - Mend

langchain-codex-plus 0.0.2tar.gz → 0.0.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

{langchain_codex_plus-0.0.2 → langchain_codex_plus-0.0.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: langchain-codex-plus
-Version: 0.0.2
+Version: 0.0.4
 Summary: LangChain ChatModel for OpenAI Codex Plus / Pro (ChatGPT-account subscription protocol, not api.openai.com).
 Project-URL: Homepage, https://github.com/jasoncarreira/langchain-codex-plus
 Project-URL: Issues, https://github.com/jasoncarreira/langchain-codex-plus/issues

{langchain_codex_plus-0.0.2 → langchain_codex_plus-0.0.4}/langchain_codex_plus/__init__.py RENAMED Viewed

@@ -28,6 +28,7 @@ from langchain_codex_plus.codex_protocol import (
     CodexToolCall,
     SseEvent,
     ToolChoice,
+    aparse_sse_stream,
     build_request_body,
     consume_events,
     parse_error_body,
@@ -66,6 +67,7 @@ __all__ = [
     "build_request_body",
     "consume_events",
     "parse_error_body",
+    "aparse_sse_stream",
     "parse_sse_stream",
     # rate_limits
     "CodexCredits",

{langchain_codex_plus-0.0.2 → langchain_codex_plus-0.0.4}/langchain_codex_plus/codex_chat_model.py RENAMED Viewed

@@ -99,6 +99,7 @@ from langchain_codex_plus.codex_protocol import (
     CodexResponseError,
     SseEvent,
     ToolChoice,
+    aparse_sse_stream,
     build_request_body,
     consume_events,
     first_stop_match,
@@ -552,13 +553,21 @@ class ChatCodexPlus(BaseChatModel):
             return
         body_bytes = response.read()
         err = parse_error_body(body_bytes)
-        # Re-raise with status code prepended so callers can pattern-
-        # match on it.
+        # Surface the rate-limit headers on errors too (esp. 429): fire
+        # the callback so the usage snapshot updates even on a refusal,
+        # and attach status_code + headers + parsed limits to the
+        # exception. Previously these were discarded — this method ran
+        # BEFORE _fire_rate_limit_callback on the success path, so a 429
+        # gave callers no reset timestamp to pause on.
+        rate_limits = self._fire_rate_limit_callback(response.headers)
         raise CodexResponseError(
             message=f"HTTP {response.status_code}: {err.message}",
             code=err.code,
             type=err.type,
             raw=err.raw,
+            status_code=response.status_code,
+            headers=dict(response.headers),
+            rate_limits=rate_limits,
         )
     def _consume_sync(
@@ -686,11 +695,17 @@ class ChatCodexPlus(BaseChatModel):
             return
         body_bytes = await response.aread()
         err = parse_error_body(body_bytes)
+        # See _raise_for_http_error: surface rate-limit headers on errors
+        # so a 429 carries its reset timestamp instead of being opaque.
+        rate_limits = self._fire_rate_limit_callback(response.headers)
         raise CodexResponseError(
             message=f"HTTP {response.status_code}: {err.message}",
             code=err.code,
             type=err.type,
             raw=err.raw,
+            status_code=response.status_code,
+            headers=dict(response.headers),
+            rate_limits=rate_limits,
         )
     async def _consume_async(
@@ -772,10 +787,11 @@ class ChatCodexPlus(BaseChatModel):
             try:
                 await self._araise_for_http_error(response)
                 self._fire_rate_limit_callback(response.headers)
-                lines: list[str] = []
-                async for line in response.aiter_lines():
-                    lines.append(line)
-                events = parse_sse_stream(lines)
+                # Stream events as lines arrive (aparse_sse_stream consumes the
+                # async line iterator incrementally) so token-level deltas reach
+                # callers in real time. Previously this buffered the whole
+                # response into a list before parsing, collapsing the stream.
+                events = aparse_sse_stream(response.aiter_lines())
                 # Mirror of ``_yield_chunks_sync`` for the async path —
                 # kept inline so callers can ``await
                 # run_manager.on_llm_new_token`` on each text delta.
@@ -795,7 +811,7 @@ class ChatCodexPlus(BaseChatModel):
                     max((len(s) for s in stop), default=0) if stop else 0
                 )
                 stopped_early = False
-                for ev in events:
+                async for ev in events:
                     if ev.event == "response.created":
                         resp = ev.data.get("response") or {}
                         if isinstance(resp, dict):

{langchain_codex_plus-0.0.2 → langchain_codex_plus-0.0.4}/langchain_codex_plus/codex_protocol.py RENAMED Viewed

@@ -34,7 +34,7 @@ body when the HTTP envelope is non-200; both shapes carry
 from __future__ import annotations
 import json
-from collections.abc import Iterable, Iterator
+from collections.abc import AsyncIterator, Iterable, Iterator
 from dataclasses import dataclass, field
 from typing import Any
@@ -398,30 +398,46 @@ class SseEvent:
     data: dict[str, Any] = field(default_factory=dict)
-def parse_sse_stream(lines: Iterable[str]) -> Iterator[SseEvent]:
-    """Parse Codex's SSE byte stream into :class:`SseEvent` objects.
+class _SseLineParser:
+    """Incremental SSE line parser shared by the sync + async stream parsers.
-    Input: iterable of decoded lines (one per ``\\n`` boundary). The
-    chat model decodes the response stream and passes the lines here.
+    Feed lines one at a time: ``feed`` returns a completed :class:`SseEvent`
+    on each event boundary (blank line); ``close`` flushes a trailing event
+    with no final blank line. Splitting the per-line logic out lets the async
+    parser stream events AS LINES ARRIVE instead of buffering the whole
+    response first — the previous ``_astream`` did the latter, which collapsed
+    Codex's token-level SSE deltas into one post-completion burst.
+    """
-    Robust to:
+    def __init__(self) -> None:
+        self._event: str = ""
+        self._data: list[str] = []
-    * Blank lines (event separators) — used as boundary markers.
-    * Multi-line ``data:`` values — accumulated until the blank line.
-    * Missing ``event:`` — yields an event with empty ``event`` string
-      so callers can detect malformed input.
-    * Garbage ``data:`` JSON — yielded with ``data={}`` and the raw
-      text dropped (we err on the side of "keep streaming" over
-      "crash mid-response").
-    """
-    current_event: str = ""
-    data_buffer: list[str] = []
+    def feed(self, line: str) -> SseEvent | None:
+        # SSE lines are LF-separated; strip a trailing ``\r\n`` if present.
+        line = line.rstrip("\r\n")
+        if not line:
+            # Empty line = event boundary.
+            return self._flush()
+        if line.startswith(":"):
+            # SSE comment line — heartbeat / keepalive. Ignore.
+            return None
+        if line.startswith("event:"):
+            self._event = line[len("event:"):].strip()
+        elif line.startswith("data:"):
+            self._data.append(line[len("data:"):].lstrip())
+        # Ignore unknown SSE fields (``id:``, ``retry:``) — Codex
+        # doesn't use them today.
+        return None
-    def flush() -> SseEvent | None:
-        nonlocal current_event, data_buffer
-        if not current_event and not data_buffer:
+    def close(self) -> SseEvent | None:
+        # Trailing event without a final blank line (rare; defensive).
+        return self._flush()
+    def _flush(self) -> SseEvent | None:
+        if not self._event and not self._data:
             return None
-        raw_data = "\n".join(data_buffer)
+        raw_data = "\n".join(self._data)
         parsed: dict[str, Any]
         if not raw_data:
             parsed = {}
@@ -432,33 +448,53 @@ def parse_sse_stream(lines: Iterable[str]) -> Iterator[SseEvent]:
                     parsed = {"_raw": parsed}
             except json.JSONDecodeError:
                 parsed = {}
-        evt = SseEvent(event=current_event, data=parsed)
-        current_event = ""
-        data_buffer = []
+        evt = SseEvent(event=self._event, data=parsed)
+        self._event = ""
+        self._data = []
         return evt
+def parse_sse_stream(lines: Iterable[str]) -> Iterator[SseEvent]:
+    """Parse Codex's SSE byte stream into :class:`SseEvent` objects.
+    Input: iterable of decoded lines (one per ``\\n`` boundary). The
+    chat model decodes the response stream and passes the lines here.
+    Robust to:
+    * Blank lines (event separators) — used as boundary markers.
+    * Multi-line ``data:`` values — accumulated until the blank line.
+    * Missing ``event:`` — yields an event with empty ``event`` string
+      so callers can detect malformed input.
+    * Garbage ``data:`` JSON — yielded with ``data={}`` and the raw
+      text dropped (we err on the side of "keep streaming" over
+      "crash mid-response").
+    """
+    parser = _SseLineParser()
     for line in lines:
-        # SSE lines are LF-separated; if iterable yields with trailing
-        # ``\r\n`` strip it.
-        line = line.rstrip("\r\n")
-        if not line:
-            # Empty line = event boundary.
-            evt = flush()
-            if evt is not None:
-                yield evt
-            continue
-        if line.startswith(":"):
-            # SSE comment line — heartbeat / keepalive. Ignore.
-            continue
-        if line.startswith("event:"):
-            current_event = line[len("event:"):].strip()
-        elif line.startswith("data:"):
-            data_buffer.append(line[len("data:"):].lstrip())
-        # Ignore unknown SSE fields (``id:``, ``retry:``) — Codex
-        # doesn't use them today.
+        evt = parser.feed(line)
+        if evt is not None:
+            yield evt
+    final = parser.close()
+    if final is not None:
+        yield final
-    # Trailing event without a final blank line (rare; defensive).
-    final = flush()
+async def aparse_sse_stream(lines: AsyncIterator[str]) -> AsyncIterator[SseEvent]:
+    """Async counterpart to :func:`parse_sse_stream`.
+    Consumes an async line iterator (e.g. ``httpx.Response.aiter_lines()``)
+    and yields :class:`SseEvent` objects AS LINES ARRIVE — so the chat model's
+    ``_astream`` streams Codex's token-level deltas in real time instead of
+    buffering the whole response first. Identical parsing semantics to the
+    sync version (both share :class:`_SseLineParser`).
+    """
+    parser = _SseLineParser()
+    async for line in lines:
+        evt = parser.feed(line)
+        if evt is not None:
+            yield evt
+    final = parser.close()
     if final is not None:
         yield final
@@ -746,15 +782,31 @@ class CodexResponseError(RuntimeError):
         code: str | None = None,
         type: str | None = None,
         raw: Any = None,
+        status_code: int | None = None,
+        headers: dict[str, str] | None = None,
+        rate_limits: Any = None,
     ) -> None:
         super().__init__(message)
         self.message = message
         self.code = code
         self.type = type
         self.raw = raw
+        # HTTP envelope context — populated for non-2xx responses so
+        # callers can pattern-match on the status and, crucially, read
+        # the rate-limit headers on a 429 (the reset timestamp lives in
+        # ``x-codex-primary-reset-at`` / ``-reset-after-seconds``). These
+        # used to be discarded; a caller hitting a 429 had no way to know
+        # when the window would roll over. ``rate_limits`` is the parsed
+        # :class:`~langchain_codex_plus.rate_limits.CodexRateLimits` (or
+        # ``None`` when the response carried no ``x-codex-*`` headers).
+        self.status_code = status_code
+        self.headers = headers
+        self.rate_limits = rate_limits
     def __repr__(self) -> str:
         bits = [f"message={self.message!r}"]
+        if self.status_code is not None:
+            bits.append(f"status_code={self.status_code!r}")
         if self.code:
             bits.append(f"code={self.code!r}")
         if self.type:

{langchain_codex_plus-0.0.2 → langchain_codex_plus-0.0.4}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "langchain-codex-plus"
-version = "0.0.2"
+version = "0.0.4"
 description = "LangChain ChatModel for OpenAI Codex Plus / Pro (ChatGPT-account subscription protocol, not api.openai.com)."
 readme = "README.md"
 requires-python = ">=3.11"

{langchain_codex_plus-0.0.2 → langchain_codex_plus-0.0.4}/tests/test_codex_chat_model.py RENAMED Viewed

@@ -290,6 +290,50 @@ def test_generate_raises_on_oai_error_shape(auth_file):
     assert exc.value.type == "invalid_request_error"
+def test_generate_429_surfaces_rate_limit_headers(auth_file):
+    """A 429 must surface status_code + headers + parsed rate_limits on
+    the exception (so callers can pause until the real window reset) AND
+    fire the rate-limit callback even on the refusal. Previously the
+    headers were discarded — _raise_for_http_error ran before the
+    callback on the success path, so a 429 was opaque."""
+    transport = _CaptureTransport(
+        status_code=429,
+        body=b'{"detail":"Rate limit exceeded"}',
+        headers=_real_rl_headers(),
+    )
+    seen: list[CodexRateLimits] = []
+    llm = _make_llm(
+        auth_file, transport=transport, rate_limit_callback=seen.append
+    )
+    with pytest.raises(CodexResponseError) as exc:
+        llm.invoke([HumanMessage("hi")])
+    err = exc.value
+    assert err.status_code == 429
+    assert err.headers is not None
+    assert err.headers.get("x-codex-primary-reset-at") == "1779343790"
+    assert err.rate_limits is not None
+    assert err.rate_limits.primary is not None
+    assert err.rate_limits.primary.reset_at == 1779343790
+    # The callback fired on the 429 (usage snapshot updates on refusals).
+    assert len(seen) == 1
+    assert seen[0].primary.reset_at == 1779343790
+def test_generate_429_without_codex_headers_still_sets_status(auth_file):
+    """A 429 with no ``x-codex-*`` headers still carries status_code (and
+    rate_limits is None) — callers fall back to a backoff."""
+    transport = _CaptureTransport(
+        status_code=429,
+        body=b'{"detail":"Rate limit exceeded"}',
+        headers={"Content-Type": "application/json"},
+    )
+    llm = _make_llm(auth_file, transport=transport)
+    with pytest.raises(CodexResponseError) as exc:
+        llm.invoke([HumanMessage("hi")])
+    assert exc.value.status_code == 429
+    assert exc.value.rate_limits is None
 def test_generate_stop_argument_is_ignored_silently(auth_file, caplog):
     """Codex Responses API doesn't expose stop sequences. We log at
     DEBUG and proceed — silent drop in production logs."""

{langchain_codex_plus-0.0.2 → langchain_codex_plus-0.0.4}/tests/test_codex_protocol.py RENAMED Viewed

@@ -362,3 +362,63 @@ def test_parse_error_body_handles_garbage():
 def test_parse_error_body_empty():
     err = parse_error_body(b"")
     assert err.message == "<empty response body>"
+# ─── async incremental SSE parsing (0.0.4) ─────────────────────────────
+async def test_aparse_sse_stream_matches_sync_parser():
+    """The async parser yields identical events to the sync one."""
+    import asyncio  # noqa: F401 — parity check only
+    from langchain_codex_plus.codex_protocol import aparse_sse_stream
+    raw = [
+        "event: response.created",
+        'data: {"response": {"id": "r1"}}',
+        "",
+        "event: response.output_text.delta",
+        'data: {"delta": "hi"}',
+        "",
+        "event: response.completed",
+        'data: {"response": {"id": "r1"}}',
+        "",
+    ]
+    async def alines():
+        for line in raw:
+            yield line
+    got = [(e.event, e.data) async for e in aparse_sse_stream(alines())]
+    expected = [(e.event, e.data) for e in parse_sse_stream(raw)]
+    assert got == expected
+async def test_aparse_sse_stream_yields_before_stream_completes():
+    """The async parser yields each event AS its lines arrive, without
+    draining the rest of the stream — the property that lets ``_astream``
+    surface Codex token deltas in real time instead of post-completion."""
+    import asyncio
+    from langchain_codex_plus.codex_protocol import aparse_sse_stream
+    gate = asyncio.Event()
+    async def alines():
+        yield "event: response.output_text.delta"
+        yield 'data: {"delta": "hel"}'
+        yield ""  # boundary → first event flushes here
+        # Block until the consumer has the first event; if the parser had
+        # to drain the whole stream before yielding, this would deadlock.
+        await gate.wait()
+        yield "event: response.output_text.delta"
+        yield 'data: {"delta": "lo"}'
+        yield ""
+    agen = aparse_sse_stream(alines())
+    first = await agen.__anext__()
+    assert first.data["delta"] == "hel"  # arrived while producer is blocked
+    gate.set()
+    second = await agen.__anext__()
+    assert second.data["delta"] == "lo"
+    await agen.aclose()

{langchain_codex_plus-0.0.2 → langchain_codex_plus-0.0.4}/uv.lock RENAMED Viewed

@@ -253,7 +253,7 @@ wheels = [
 [[package]]
 name = "langchain-codex-plus"
-version = "0.0.1"
+version = "0.0.4"
 source = { editable = "." }
 dependencies = [
     { name = "httpx" },