langchain-codex-plus 0.0.2__tar.gz → 0.0.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. {langchain_codex_plus-0.0.2 → langchain_codex_plus-0.0.4}/PKG-INFO +1 -1
  2. {langchain_codex_plus-0.0.2 → langchain_codex_plus-0.0.4}/langchain_codex_plus/__init__.py +2 -0
  3. {langchain_codex_plus-0.0.2 → langchain_codex_plus-0.0.4}/langchain_codex_plus/codex_chat_model.py +23 -7
  4. {langchain_codex_plus-0.0.2 → langchain_codex_plus-0.0.4}/langchain_codex_plus/codex_protocol.py +95 -43
  5. {langchain_codex_plus-0.0.2 → langchain_codex_plus-0.0.4}/pyproject.toml +1 -1
  6. {langchain_codex_plus-0.0.2 → langchain_codex_plus-0.0.4}/tests/test_codex_chat_model.py +44 -0
  7. {langchain_codex_plus-0.0.2 → langchain_codex_plus-0.0.4}/tests/test_codex_protocol.py +60 -0
  8. {langchain_codex_plus-0.0.2 → langchain_codex_plus-0.0.4}/uv.lock +1 -1
  9. {langchain_codex_plus-0.0.2 → langchain_codex_plus-0.0.4}/.github/workflows/publish.yml +0 -0
  10. {langchain_codex_plus-0.0.2 → langchain_codex_plus-0.0.4}/.gitignore +0 -0
  11. {langchain_codex_plus-0.0.2 → langchain_codex_plus-0.0.4}/LICENSE +0 -0
  12. {langchain_codex_plus-0.0.2 → langchain_codex_plus-0.0.4}/README.md +0 -0
  13. {langchain_codex_plus-0.0.2 → langchain_codex_plus-0.0.4}/langchain_codex_plus/codex_auth.py +0 -0
  14. {langchain_codex_plus-0.0.2 → langchain_codex_plus-0.0.4}/langchain_codex_plus/py.typed +0 -0
  15. {langchain_codex_plus-0.0.2 → langchain_codex_plus-0.0.4}/langchain_codex_plus/rate_limits.py +0 -0
  16. {langchain_codex_plus-0.0.2 → langchain_codex_plus-0.0.4}/tests/__init__.py +0 -0
  17. {langchain_codex_plus-0.0.2 → langchain_codex_plus-0.0.4}/tests/conftest.py +0 -0
  18. {langchain_codex_plus-0.0.2 → langchain_codex_plus-0.0.4}/tests/test_codex_auth.py +0 -0
  19. {langchain_codex_plus-0.0.2 → langchain_codex_plus-0.0.4}/tests/test_multimodal.py +0 -0
  20. {langchain_codex_plus-0.0.2 → langchain_codex_plus-0.0.4}/tests/test_oauth_refresh.py +0 -0
  21. {langchain_codex_plus-0.0.2 → langchain_codex_plus-0.0.4}/tests/test_rate_limits.py +0 -0
  22. {langchain_codex_plus-0.0.2 → langchain_codex_plus-0.0.4}/tests/test_stop_sequences.py +0 -0
  23. {langchain_codex_plus-0.0.2 → langchain_codex_plus-0.0.4}/tests/test_tool_calling.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: langchain-codex-plus
3
- Version: 0.0.2
3
+ Version: 0.0.4
4
4
  Summary: LangChain ChatModel for OpenAI Codex Plus / Pro (ChatGPT-account subscription protocol, not api.openai.com).
5
5
  Project-URL: Homepage, https://github.com/jasoncarreira/langchain-codex-plus
6
6
  Project-URL: Issues, https://github.com/jasoncarreira/langchain-codex-plus/issues
@@ -28,6 +28,7 @@ from langchain_codex_plus.codex_protocol import (
28
28
  CodexToolCall,
29
29
  SseEvent,
30
30
  ToolChoice,
31
+ aparse_sse_stream,
31
32
  build_request_body,
32
33
  consume_events,
33
34
  parse_error_body,
@@ -66,6 +67,7 @@ __all__ = [
66
67
  "build_request_body",
67
68
  "consume_events",
68
69
  "parse_error_body",
70
+ "aparse_sse_stream",
69
71
  "parse_sse_stream",
70
72
  # rate_limits
71
73
  "CodexCredits",
@@ -99,6 +99,7 @@ from langchain_codex_plus.codex_protocol import (
99
99
  CodexResponseError,
100
100
  SseEvent,
101
101
  ToolChoice,
102
+ aparse_sse_stream,
102
103
  build_request_body,
103
104
  consume_events,
104
105
  first_stop_match,
@@ -552,13 +553,21 @@ class ChatCodexPlus(BaseChatModel):
552
553
  return
553
554
  body_bytes = response.read()
554
555
  err = parse_error_body(body_bytes)
555
- # Re-raise with status code prepended so callers can pattern-
556
- # match on it.
556
+ # Surface the rate-limit headers on errors too (esp. 429): fire
557
+ # the callback so the usage snapshot updates even on a refusal,
558
+ # and attach status_code + headers + parsed limits to the
559
+ # exception. Previously these were discarded — this method ran
560
+ # BEFORE _fire_rate_limit_callback on the success path, so a 429
561
+ # gave callers no reset timestamp to pause on.
562
+ rate_limits = self._fire_rate_limit_callback(response.headers)
557
563
  raise CodexResponseError(
558
564
  message=f"HTTP {response.status_code}: {err.message}",
559
565
  code=err.code,
560
566
  type=err.type,
561
567
  raw=err.raw,
568
+ status_code=response.status_code,
569
+ headers=dict(response.headers),
570
+ rate_limits=rate_limits,
562
571
  )
563
572
 
564
573
  def _consume_sync(
@@ -686,11 +695,17 @@ class ChatCodexPlus(BaseChatModel):
686
695
  return
687
696
  body_bytes = await response.aread()
688
697
  err = parse_error_body(body_bytes)
698
+ # See _raise_for_http_error: surface rate-limit headers on errors
699
+ # so a 429 carries its reset timestamp instead of being opaque.
700
+ rate_limits = self._fire_rate_limit_callback(response.headers)
689
701
  raise CodexResponseError(
690
702
  message=f"HTTP {response.status_code}: {err.message}",
691
703
  code=err.code,
692
704
  type=err.type,
693
705
  raw=err.raw,
706
+ status_code=response.status_code,
707
+ headers=dict(response.headers),
708
+ rate_limits=rate_limits,
694
709
  )
695
710
 
696
711
  async def _consume_async(
@@ -772,10 +787,11 @@ class ChatCodexPlus(BaseChatModel):
772
787
  try:
773
788
  await self._araise_for_http_error(response)
774
789
  self._fire_rate_limit_callback(response.headers)
775
- lines: list[str] = []
776
- async for line in response.aiter_lines():
777
- lines.append(line)
778
- events = parse_sse_stream(lines)
790
+ # Stream events as lines arrive (aparse_sse_stream consumes the
791
+ # async line iterator incrementally) so token-level deltas reach
792
+ # callers in real time. Previously this buffered the whole
793
+ # response into a list before parsing, collapsing the stream.
794
+ events = aparse_sse_stream(response.aiter_lines())
779
795
  # Mirror of ``_yield_chunks_sync`` for the async path —
780
796
  # kept inline so callers can ``await
781
797
  # run_manager.on_llm_new_token`` on each text delta.
@@ -795,7 +811,7 @@ class ChatCodexPlus(BaseChatModel):
795
811
  max((len(s) for s in stop), default=0) if stop else 0
796
812
  )
797
813
  stopped_early = False
798
- for ev in events:
814
+ async for ev in events:
799
815
  if ev.event == "response.created":
800
816
  resp = ev.data.get("response") or {}
801
817
  if isinstance(resp, dict):
@@ -34,7 +34,7 @@ body when the HTTP envelope is non-200; both shapes carry
34
34
  from __future__ import annotations
35
35
 
36
36
  import json
37
- from collections.abc import Iterable, Iterator
37
+ from collections.abc import AsyncIterator, Iterable, Iterator
38
38
  from dataclasses import dataclass, field
39
39
  from typing import Any
40
40
 
@@ -398,30 +398,46 @@ class SseEvent:
398
398
  data: dict[str, Any] = field(default_factory=dict)
399
399
 
400
400
 
401
- def parse_sse_stream(lines: Iterable[str]) -> Iterator[SseEvent]:
402
- """Parse Codex's SSE byte stream into :class:`SseEvent` objects.
401
+ class _SseLineParser:
402
+ """Incremental SSE line parser shared by the sync + async stream parsers.
403
403
 
404
- Input: iterable of decoded lines (one per ``\\n`` boundary). The
405
- chat model decodes the response stream and passes the lines here.
404
+ Feed lines one at a time: ``feed`` returns a completed :class:`SseEvent`
405
+ on each event boundary (blank line); ``close`` flushes a trailing event
406
+ with no final blank line. Splitting the per-line logic out lets the async
407
+ parser stream events AS LINES ARRIVE instead of buffering the whole
408
+ response first — the previous ``_astream`` did the latter, which collapsed
409
+ Codex's token-level SSE deltas into one post-completion burst.
410
+ """
406
411
 
407
- Robust to:
412
+ def __init__(self) -> None:
413
+ self._event: str = ""
414
+ self._data: list[str] = []
408
415
 
409
- * Blank lines (event separators) used as boundary markers.
410
- * Multi-line ``data:`` values accumulated until the blank line.
411
- * Missing ``event:`` — yields an event with empty ``event`` string
412
- so callers can detect malformed input.
413
- * Garbage ``data:`` JSON — yielded with ``data={}`` and the raw
414
- text dropped (we err on the side of "keep streaming" over
415
- "crash mid-response").
416
- """
417
- current_event: str = ""
418
- data_buffer: list[str] = []
416
+ def feed(self, line: str) -> SseEvent | None:
417
+ # SSE lines are LF-separated; strip a trailing ``\r\n`` if present.
418
+ line = line.rstrip("\r\n")
419
+ if not line:
420
+ # Empty line = event boundary.
421
+ return self._flush()
422
+ if line.startswith(":"):
423
+ # SSE comment line — heartbeat / keepalive. Ignore.
424
+ return None
425
+ if line.startswith("event:"):
426
+ self._event = line[len("event:"):].strip()
427
+ elif line.startswith("data:"):
428
+ self._data.append(line[len("data:"):].lstrip())
429
+ # Ignore unknown SSE fields (``id:``, ``retry:``) — Codex
430
+ # doesn't use them today.
431
+ return None
419
432
 
420
- def flush() -> SseEvent | None:
421
- nonlocal current_event, data_buffer
422
- if not current_event and not data_buffer:
433
+ def close(self) -> SseEvent | None:
434
+ # Trailing event without a final blank line (rare; defensive).
435
+ return self._flush()
436
+
437
+ def _flush(self) -> SseEvent | None:
438
+ if not self._event and not self._data:
423
439
  return None
424
- raw_data = "\n".join(data_buffer)
440
+ raw_data = "\n".join(self._data)
425
441
  parsed: dict[str, Any]
426
442
  if not raw_data:
427
443
  parsed = {}
@@ -432,33 +448,53 @@ def parse_sse_stream(lines: Iterable[str]) -> Iterator[SseEvent]:
432
448
  parsed = {"_raw": parsed}
433
449
  except json.JSONDecodeError:
434
450
  parsed = {}
435
- evt = SseEvent(event=current_event, data=parsed)
436
- current_event = ""
437
- data_buffer = []
451
+ evt = SseEvent(event=self._event, data=parsed)
452
+ self._event = ""
453
+ self._data = []
438
454
  return evt
439
455
 
456
+
457
+ def parse_sse_stream(lines: Iterable[str]) -> Iterator[SseEvent]:
458
+ """Parse Codex's SSE byte stream into :class:`SseEvent` objects.
459
+
460
+ Input: iterable of decoded lines (one per ``\\n`` boundary). The
461
+ chat model decodes the response stream and passes the lines here.
462
+
463
+ Robust to:
464
+
465
+ * Blank lines (event separators) — used as boundary markers.
466
+ * Multi-line ``data:`` values — accumulated until the blank line.
467
+ * Missing ``event:`` — yields an event with empty ``event`` string
468
+ so callers can detect malformed input.
469
+ * Garbage ``data:`` JSON — yielded with ``data={}`` and the raw
470
+ text dropped (we err on the side of "keep streaming" over
471
+ "crash mid-response").
472
+ """
473
+ parser = _SseLineParser()
440
474
  for line in lines:
441
- # SSE lines are LF-separated; if iterable yields with trailing
442
- # ``\r\n`` strip it.
443
- line = line.rstrip("\r\n")
444
- if not line:
445
- # Empty line = event boundary.
446
- evt = flush()
447
- if evt is not None:
448
- yield evt
449
- continue
450
- if line.startswith(":"):
451
- # SSE comment line — heartbeat / keepalive. Ignore.
452
- continue
453
- if line.startswith("event:"):
454
- current_event = line[len("event:"):].strip()
455
- elif line.startswith("data:"):
456
- data_buffer.append(line[len("data:"):].lstrip())
457
- # Ignore unknown SSE fields (``id:``, ``retry:``) — Codex
458
- # doesn't use them today.
475
+ evt = parser.feed(line)
476
+ if evt is not None:
477
+ yield evt
478
+ final = parser.close()
479
+ if final is not None:
480
+ yield final
459
481
 
460
- # Trailing event without a final blank line (rare; defensive).
461
- final = flush()
482
+
483
+ async def aparse_sse_stream(lines: AsyncIterator[str]) -> AsyncIterator[SseEvent]:
484
+ """Async counterpart to :func:`parse_sse_stream`.
485
+
486
+ Consumes an async line iterator (e.g. ``httpx.Response.aiter_lines()``)
487
+ and yields :class:`SseEvent` objects AS LINES ARRIVE — so the chat model's
488
+ ``_astream`` streams Codex's token-level deltas in real time instead of
489
+ buffering the whole response first. Identical parsing semantics to the
490
+ sync version (both share :class:`_SseLineParser`).
491
+ """
492
+ parser = _SseLineParser()
493
+ async for line in lines:
494
+ evt = parser.feed(line)
495
+ if evt is not None:
496
+ yield evt
497
+ final = parser.close()
462
498
  if final is not None:
463
499
  yield final
464
500
 
@@ -746,15 +782,31 @@ class CodexResponseError(RuntimeError):
746
782
  code: str | None = None,
747
783
  type: str | None = None,
748
784
  raw: Any = None,
785
+ status_code: int | None = None,
786
+ headers: dict[str, str] | None = None,
787
+ rate_limits: Any = None,
749
788
  ) -> None:
750
789
  super().__init__(message)
751
790
  self.message = message
752
791
  self.code = code
753
792
  self.type = type
754
793
  self.raw = raw
794
+ # HTTP envelope context — populated for non-2xx responses so
795
+ # callers can pattern-match on the status and, crucially, read
796
+ # the rate-limit headers on a 429 (the reset timestamp lives in
797
+ # ``x-codex-primary-reset-at`` / ``-reset-after-seconds``). These
798
+ # used to be discarded; a caller hitting a 429 had no way to know
799
+ # when the window would roll over. ``rate_limits`` is the parsed
800
+ # :class:`~langchain_codex_plus.rate_limits.CodexRateLimits` (or
801
+ # ``None`` when the response carried no ``x-codex-*`` headers).
802
+ self.status_code = status_code
803
+ self.headers = headers
804
+ self.rate_limits = rate_limits
755
805
 
756
806
  def __repr__(self) -> str:
757
807
  bits = [f"message={self.message!r}"]
808
+ if self.status_code is not None:
809
+ bits.append(f"status_code={self.status_code!r}")
758
810
  if self.code:
759
811
  bits.append(f"code={self.code!r}")
760
812
  if self.type:
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "langchain-codex-plus"
7
- version = "0.0.2"
7
+ version = "0.0.4"
8
8
  description = "LangChain ChatModel for OpenAI Codex Plus / Pro (ChatGPT-account subscription protocol, not api.openai.com)."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.11"
@@ -290,6 +290,50 @@ def test_generate_raises_on_oai_error_shape(auth_file):
290
290
  assert exc.value.type == "invalid_request_error"
291
291
 
292
292
 
293
+ def test_generate_429_surfaces_rate_limit_headers(auth_file):
294
+ """A 429 must surface status_code + headers + parsed rate_limits on
295
+ the exception (so callers can pause until the real window reset) AND
296
+ fire the rate-limit callback even on the refusal. Previously the
297
+ headers were discarded — _raise_for_http_error ran before the
298
+ callback on the success path, so a 429 was opaque."""
299
+ transport = _CaptureTransport(
300
+ status_code=429,
301
+ body=b'{"detail":"Rate limit exceeded"}',
302
+ headers=_real_rl_headers(),
303
+ )
304
+ seen: list[CodexRateLimits] = []
305
+ llm = _make_llm(
306
+ auth_file, transport=transport, rate_limit_callback=seen.append
307
+ )
308
+ with pytest.raises(CodexResponseError) as exc:
309
+ llm.invoke([HumanMessage("hi")])
310
+ err = exc.value
311
+ assert err.status_code == 429
312
+ assert err.headers is not None
313
+ assert err.headers.get("x-codex-primary-reset-at") == "1779343790"
314
+ assert err.rate_limits is not None
315
+ assert err.rate_limits.primary is not None
316
+ assert err.rate_limits.primary.reset_at == 1779343790
317
+ # The callback fired on the 429 (usage snapshot updates on refusals).
318
+ assert len(seen) == 1
319
+ assert seen[0].primary.reset_at == 1779343790
320
+
321
+
322
+ def test_generate_429_without_codex_headers_still_sets_status(auth_file):
323
+ """A 429 with no ``x-codex-*`` headers still carries status_code (and
324
+ rate_limits is None) — callers fall back to a backoff."""
325
+ transport = _CaptureTransport(
326
+ status_code=429,
327
+ body=b'{"detail":"Rate limit exceeded"}',
328
+ headers={"Content-Type": "application/json"},
329
+ )
330
+ llm = _make_llm(auth_file, transport=transport)
331
+ with pytest.raises(CodexResponseError) as exc:
332
+ llm.invoke([HumanMessage("hi")])
333
+ assert exc.value.status_code == 429
334
+ assert exc.value.rate_limits is None
335
+
336
+
293
337
  def test_generate_stop_argument_is_ignored_silently(auth_file, caplog):
294
338
  """Codex Responses API doesn't expose stop sequences. We log at
295
339
  DEBUG and proceed — silent drop in production logs."""
@@ -362,3 +362,63 @@ def test_parse_error_body_handles_garbage():
362
362
  def test_parse_error_body_empty():
363
363
  err = parse_error_body(b"")
364
364
  assert err.message == "<empty response body>"
365
+
366
+
367
+ # ─── async incremental SSE parsing (0.0.4) ─────────────────────────────
368
+
369
+
370
+ async def test_aparse_sse_stream_matches_sync_parser():
371
+ """The async parser yields identical events to the sync one."""
372
+ import asyncio # noqa: F401 — parity check only
373
+
374
+ from langchain_codex_plus.codex_protocol import aparse_sse_stream
375
+
376
+ raw = [
377
+ "event: response.created",
378
+ 'data: {"response": {"id": "r1"}}',
379
+ "",
380
+ "event: response.output_text.delta",
381
+ 'data: {"delta": "hi"}',
382
+ "",
383
+ "event: response.completed",
384
+ 'data: {"response": {"id": "r1"}}',
385
+ "",
386
+ ]
387
+
388
+ async def alines():
389
+ for line in raw:
390
+ yield line
391
+
392
+ got = [(e.event, e.data) async for e in aparse_sse_stream(alines())]
393
+ expected = [(e.event, e.data) for e in parse_sse_stream(raw)]
394
+ assert got == expected
395
+
396
+
397
+ async def test_aparse_sse_stream_yields_before_stream_completes():
398
+ """The async parser yields each event AS its lines arrive, without
399
+ draining the rest of the stream — the property that lets ``_astream``
400
+ surface Codex token deltas in real time instead of post-completion."""
401
+ import asyncio
402
+
403
+ from langchain_codex_plus.codex_protocol import aparse_sse_stream
404
+
405
+ gate = asyncio.Event()
406
+
407
+ async def alines():
408
+ yield "event: response.output_text.delta"
409
+ yield 'data: {"delta": "hel"}'
410
+ yield "" # boundary → first event flushes here
411
+ # Block until the consumer has the first event; if the parser had
412
+ # to drain the whole stream before yielding, this would deadlock.
413
+ await gate.wait()
414
+ yield "event: response.output_text.delta"
415
+ yield 'data: {"delta": "lo"}'
416
+ yield ""
417
+
418
+ agen = aparse_sse_stream(alines())
419
+ first = await agen.__anext__()
420
+ assert first.data["delta"] == "hel" # arrived while producer is blocked
421
+ gate.set()
422
+ second = await agen.__anext__()
423
+ assert second.data["delta"] == "lo"
424
+ await agen.aclose()
@@ -253,7 +253,7 @@ wheels = [
253
253
 
254
254
  [[package]]
255
255
  name = "langchain-codex-plus"
256
- version = "0.0.1"
256
+ version = "0.0.4"
257
257
  source = { editable = "." }
258
258
  dependencies = [
259
259
  { name = "httpx" },