PyPI - python-codex - Versions diffs - 0.1.11__py3-none-any.whl → 0.1.12__py3-none-any.whl - Mend

python-codex 0.1.11py3-none-any.whl → 0.1.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

pycodex/agent.py +198 -17
pycodex/cli.py +1 -0
pycodex/context.py +16 -0
pycodex/model.py +14 -1
pycodex/utils/compactor.py +77 -13
pycodex/utils/visualize.py +53 -0
{python_codex-0.1.11.dist-info → python_codex-0.1.12.dist-info}/METADATA +15 -2
{python_codex-0.1.11.dist-info → python_codex-0.1.12.dist-info}/RECORD +13 -13
responses_server/app.py +7 -3
responses_server/stream_router.py +39 -1
{python_codex-0.1.11.dist-info → python_codex-0.1.12.dist-info}/WHEEL +0 -0
{python_codex-0.1.11.dist-info → python_codex-0.1.12.dist-info}/entry_points.txt +0 -0
{python_codex-0.1.11.dist-info → python_codex-0.1.12.dist-info}/licenses/LICENSE +0 -0

pycodex/agent.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import asyncio
 import json
+import re
 from typing import Callable
 from .context import ContextManager
@@ -26,6 +27,18 @@ if typing.TYPE_CHECKING:
 EventHandler = Callable[[AgentEvent], None]
 NOOP_EVENT_HANDLER: 'EventHandler' = lambda _event: None
+_REQUESTED_TOKENS_RE = re.compile(
+    r"requested\s+([0-9,]+)\s+tokens",
+    re.IGNORECASE,
+)
+_REQUESTED_TOKEN_SPLIT_RE = re.compile(
+    r"\(([0-9,]+)\s+in\s+the\s+messages,\s+([0-9,]+)\s+in\s+the\s+completion\)",
+    re.IGNORECASE,
+)
+_MAX_CONTEXT_TOKENS_RE = re.compile(
+    r"maximum\s+context\s+length\s+is\s+([0-9,]+)\s+tokens",
+    re.IGNORECASE,
+)
 class TurnInterrupted(RuntimeError):
@@ -58,6 +71,10 @@ class AgentLoop:
         self._event_handler = event_handler
         self._history: 'typing.List[ConversationItem]' = list(initial_history)
         self._rollout_recorder = rollout_recorder
+        self._auto_compact_token_limit = (
+            self._context_manager.resolve_auto_compact_token_limit()
+        )
+        self._last_total_usage_tokens: 'typing.Union[int, None]' = None
         self.interrupt_asap = False
     @property
@@ -101,8 +118,6 @@ class AgentLoop:
         turn_id = turn_id or uuid7_string()
         self.interrupt_asap = False
         new_user_messages = [UserMessage(text=text) for text in texts]
-        self._history.extend(new_user_messages)
-        self._persist_history_items(new_user_messages)
         self._emit(
             "turn_started",
@@ -110,6 +125,9 @@ class AgentLoop:
             user_text="\n".join(texts),
             user_texts=list(texts),
         )
+        await self._maybe_auto_compact(turn_id, phase="pre_turn")
+        self._history.extend(new_user_messages)
+        self._persist_history_items(new_user_messages)
         last_assistant_message: 'typing.Union[str, None]' = None
         final_response_items: 'typing.Tuple[\n    typing.Union[typing.Union[AssistantMessage, ToolCall], ReasoningItem], ...\n]' = ()
@@ -122,23 +140,11 @@ class AgentLoop:
                     iteration,
                     output_text=last_assistant_message,
                 )
+                await self._maybe_auto_compact(turn_id, phase="mid_turn")
                 iteration += 1
-                prompt = self._context_manager.build_prompt(
-                    self._history,
-                    self._tool_registry.model_visible_specs(),
-                    self._parallel_tool_calls,
-                    turn_id=turn_id,
-                )
-                self._emit(
-                    "model_called",
+                response = await self._complete_model_request(
                     turn_id,
-                    iteration=iteration,
-                    history_size=len(prompt.input),
-                    tool_count=len(prompt.tools),
-                )
-                response = await self._model_client.complete(
-                    prompt,
-                    lambda event: self._handle_model_stream_event(turn_id, event),
+                    iteration,
                 )
                 final_response_items = tuple(response.items)
                 self._emit(
@@ -193,6 +199,10 @@ class AgentLoop:
         except TurnInterrupted:
             raise
         except Exception as exc:
+            context_usage = _usage_from_context_length_error(str(exc))
+            if context_usage is not None:
+                self._remember_token_usage(context_usage)
+                self._emit("token_count", turn_id, usage=context_usage)
             self._emit(
                 "turn_failed",
                 turn_id,
@@ -287,6 +297,8 @@ class AgentLoop:
             return
     def _handle_model_stream_event(self, turn_id: 'str', event: 'ModelStreamEvent') -> 'None':
+        if event.kind == "token_count":
+            self._remember_token_usage(event.payload.get("usage"))
         if event.kind == "assistant_delta":
             self._emit("assistant_delta", turn_id, **event.payload)
         elif event.kind == "tool_call":
@@ -296,6 +308,140 @@ class AgentLoop:
         elif event.kind == "stream_error":
             self._emit("stream_error", turn_id, **event.payload)
+    def _remember_token_usage(self, usage: 'object') -> 'None':
+        if not isinstance(usage, dict):
+            return
+        try:
+            self._last_total_usage_tokens = int(usage["total_tokens"])
+        except (KeyError, TypeError, ValueError):
+            return
+    async def _complete_model_request(
+        self,
+        turn_id: 'str',
+        iteration: 'int',
+    ) -> 'typing.Any':
+        attempted_context_compact = False
+        while True:
+            prompt = self._context_manager.build_prompt(
+                self._history,
+                self._tool_registry.model_visible_specs(),
+                self._parallel_tool_calls,
+                turn_id=turn_id,
+            )
+            self._emit(
+                "model_called",
+                turn_id,
+                iteration=iteration,
+                history_size=len(prompt.input),
+                tool_count=len(prompt.tools),
+            )
+            try:
+                return await self._model_client.complete(
+                    prompt,
+                    lambda event: self._handle_model_stream_event(turn_id, event),
+                )
+            except Exception as exc:
+                context_usage = _usage_from_context_length_error(str(exc))
+                if context_usage is None or attempted_context_compact:
+                    raise
+                attempted_context_compact = True
+                self._remember_token_usage(context_usage)
+                self._emit("token_count", turn_id, usage=context_usage)
+                await self._run_auto_compact(
+                    turn_id,
+                    phase="context_length_exceeded",
+                    total_tokens=context_usage.get("total_tokens"),
+                    token_limit=_context_length_error_token_limit(str(exc)),
+                    prune_tool_results_on_context_error=True,
+                )
+                self._raise_if_interrupt_requested(turn_id, iteration)
+    async def _maybe_auto_compact(
+        self,
+        turn_id: 'str',
+        phase: 'str',
+    ) -> 'None':
+        limit = self._auto_compact_token_limit
+        total_tokens = self._last_total_usage_tokens
+        if limit is None or total_tokens is None:
+            return
+        if total_tokens < limit or not self._history:
+            return
+        await self._run_auto_compact(
+            turn_id,
+            phase=phase,
+            total_tokens=total_tokens,
+            token_limit=limit,
+            prune_tool_results_on_context_error=True,
+        )
+    async def _run_auto_compact(
+        self,
+        turn_id: 'str',
+        phase: 'str',
+        total_tokens: 'typing.Union[int, None]' = None,
+        token_limit: 'typing.Union[int, None]' = None,
+        prune_tool_results_on_context_error: 'bool' = False,
+    ) -> 'None':
+        from .utils.compactor import compact_agent_loop
+        payload: 'typing.Dict[str, object]' = {"phase": phase}
+        if total_tokens is not None:
+            payload["total_tokens"] = total_tokens
+        if token_limit is not None:
+            payload["token_limit"] = token_limit
+        self._emit(
+            "auto_compact_started",
+            turn_id,
+            **payload,
+        )
+        def handle_compact_stream_event(event: 'ModelStreamEvent') -> 'None':
+            if event.kind == "stream_error":
+                self._emit("stream_error", turn_id, **event.payload)
+        try:
+            compact_result = await compact_agent_loop(
+                self,
+                handle_compact_stream_event,
+                prune_tool_results_on_context_error,
+            )
+        except Exception as exc:
+            failed_payload = dict(payload)
+            failed_payload.update(
+                {
+                    "error": str(exc),
+                    "error_type": type(exc).__name__,
+                }
+            )
+            self._emit(
+                "auto_compact_failed",
+                turn_id,
+                **failed_payload,
+            )
+            raise
+        self._last_total_usage_tokens = None
+        if compact_result is None:
+            return
+        completed_payload = dict(payload)
+        completed_payload.update(
+            {
+                "original_item_count": compact_result.original_item_count,
+                "retained_item_count": compact_result.retained_item_count,
+                "summary": compact_result.display_text(),
+            }
+        )
+        if compact_result.pruned_tool_results:
+            completed_payload["pruned_tool_results"] = compact_result.pruned_tool_results
+        self._emit(
+            "auto_compact_completed",
+            turn_id,
+            **completed_payload,
+        )
     def _build_follow_up_messages(
         self,
         tool_results: 'typing.List[ToolResult]',
@@ -326,3 +472,38 @@ class AgentLoop:
                             )
                         )
         return follow_ups
+def _usage_from_context_length_error(
+    message: 'str',
+) -> 'typing.Union[typing.Dict[str, int], None]':
+    lower = message.lower()
+    if (
+        "context_length_exceeded" not in lower
+        and "maximum context length" not in lower
+    ):
+        return None
+    requested_match = _REQUESTED_TOKENS_RE.search(message)
+    if requested_match is None:
+        return None
+    usage = {"total_tokens": _parse_token_count(requested_match.group(1))}
+    split_match = _REQUESTED_TOKEN_SPLIT_RE.search(message)
+    if split_match is not None:
+        usage["input_tokens"] = _parse_token_count(split_match.group(1))
+        usage["output_tokens"] = _parse_token_count(split_match.group(2))
+    else:
+        usage["input_tokens"] = usage["total_tokens"]
+    return usage
+def _context_length_error_token_limit(message: 'str') -> 'typing.Union[int, None]':
+    limit_match = _MAX_CONTEXT_TOKENS_RE.search(message)
+    if limit_match is None:
+        return None
+    return _parse_token_count(limit_match.group(1))
+def _parse_token_count(value: 'str') -> 'int':
+    return int(value.replace(",", ""))

pycodex/cli.py CHANGED Viewed

@@ -594,6 +594,7 @@ async def run_interactive_session(
             compact_result = await compact_agent_loop(
                 agent_loop,
                 handle_compact_stream_event,
+                True,
             )
             if compact_result is None:
                 view.write_line("Nothing to compact.")

pycodex/context.py CHANGED Viewed

@@ -78,6 +78,7 @@ class ContextConfig:
     project_doc_max_bytes: 'typing.Union[int, None]' = None
     model: 'typing.Union[str, None]' = None
     model_context_window: 'typing.Union[int, None]' = None
+    model_auto_compact_token_limit: 'typing.Union[int, None]' = None
     personality: 'typing.Union[str, None]' = None
     approval_policy: 'typing.Union[str, None]' = None
     sandbox_mode: 'typing.Union[str, None]' = None
@@ -120,6 +121,9 @@ class ContextConfig:
             project_doc_max_bytes=_normalize_int(selected.get("project_doc_max_bytes")),
             model=_normalize_text(selected.get("model")),
             model_context_window=_normalize_int(selected.get("model_context_window")),
+            model_auto_compact_token_limit=_normalize_int(
+                selected.get("model_auto_compact_token_limit")
+            ),
             personality=_normalize_text(selected.get("personality")),
             approval_policy=_normalize_text(selected.get("approval_policy")),
             sandbox_mode=_normalize_text(selected.get("sandbox_mode")),
@@ -268,6 +272,18 @@ class ContextManager:
             effective_percent = DEFAULT_EFFECTIVE_CONTEXT_WINDOW_PERCENT
         return context_window * max(effective_percent, 0) // 100
+    def resolve_auto_compact_token_limit(self) -> 'typing.Union[int, None]':
+        if self._config.model_auto_compact_token_limit is not None:
+            return self._config.model_auto_compact_token_limit
+        model_slug = self._config.model
+        if model_slug is None:
+            return None
+        model_metadata = _load_models_by_slug().get(model_slug)
+        if model_metadata is None:
+            return None
+        return _normalize_int(model_metadata.get("auto_compact_token_limit"))
     def _resolve_model_instructions(self) -> 'typing.Union[str, None]':
         model_slug = self._config.model
         if model_slug is None:

pycodex/model.py CHANGED Viewed

@@ -287,6 +287,8 @@ class ResponsesModelClient:
                     event_handler,
                 )
             except ResponsesRetryableError as exc:
+                if _is_context_length_error_message(str(exc)):
+                    raise ResponsesApiError(str(exc)) from exc
                 if retries >= max_retries:
                     raise
                 retries += 1
@@ -780,11 +782,14 @@ class ResponsesModelClient:
             )
         message = str(error.get("message") or "responses stream failed")
-        code = str(error.get("code") or "").strip()
+        code = str(error.get("code") or error.get("type") or "").strip()
+        if _is_context_length_error_message(message):
+            raise ResponsesApiError(self._format_response_failed_error(message))
         if code in {
             "context_length_exceeded",
             "insufficient_quota",
             "invalid_prompt",
+            "model_output_invalid",
             "usage_not_included",
         }:
             raise ResponsesApiError(self._format_response_failed_error(message))
@@ -888,6 +893,14 @@ def _optional_int(value: 'object') -> 'typing.Union[int, None]':
     return int(value)
+def _is_context_length_error_message(message: 'str') -> 'bool':
+    lower = message.lower()
+    return (
+        "context_length_exceeded" in lower
+        or "maximum context length" in lower
+    )
 def _requests_verify_setting() -> 'typing.Union[typing.Union[str, bool], None]':
     for env_name in ("REQUESTS_CA_BUNDLE", "CURL_CA_BUNDLE", "SSL_CERT_FILE"):
         value = os.environ.get(env_name, "").strip()

pycodex/utils/compactor.py CHANGED Viewed

@@ -1,6 +1,13 @@
 from dataclasses import dataclass
-from ..protocol import AssistantMessage, ConversationItem, ModelStreamEvent, UserMessage
+from ..protocol import (
+    AssistantMessage,
+    ConversationItem,
+    ModelStreamEvent,
+    ToolCall,
+    ToolResult,
+    UserMessage,
+)
 from .random_ids import uuid7_string
 import typing
@@ -35,6 +42,7 @@ _SUBAGENT_NOTIFICATION_PREFIX = "<subagent_notification>\n"
 class CompactResult:
     history: 'typing.Tuple[ConversationItem, ...]'
     original_item_count: 'int'
+    pruned_tool_results: 'int' = 0
     @property
     def retained_item_count(self) -> 'int':
@@ -43,10 +51,14 @@ class CompactResult:
     def display_text(self) -> 'str':
         retained_label = _pluralize("item", self.retained_item_count)
         original_label = _pluralize("item", self.original_item_count)
-        return (
+        text = (
             f"compact({self.original_item_count} {original_label}) -> "
             f"{self.retained_item_count} {retained_label} + [summary]"
         )
+        if self.pruned_tool_results:
+            tool_label = _pluralize("tool response", self.pruned_tool_results)
+            text += f" (dropped {self.pruned_tool_results} old {tool_label})"
+        return text
 def compact(
@@ -60,24 +72,42 @@ def compact(
 async def compact_agent_loop(
     agent_loop: 'AgentLoop',
     stream_event_handler: 'typing.Union[typing.Callable[[ModelStreamEvent], None], None]' = None,
+    prune_tool_results_on_context_error: 'bool' = False,
 ) -> 'typing.Union[CompactResult, None]':
     history = agent_loop.history
     if not history:
         return None
     original_item_count = len(history)
+    pruned_tool_results = 0
-    compact_prompt = UserMessage(text=DEFAULT_COMPACT_PROMPT)
-    prompt = agent_loop._context_manager.build_prompt(
-        list(history) + [compact_prompt],
-        [],
-        False,
-        turn_id=uuid7_string(),
-    )
     noop_stream_event_handler = lambda _event: None
-    response = await agent_loop._model_client.complete(
-        prompt,
-        stream_event_handler or noop_stream_event_handler,
-    )
+    while True:
+        compact_prompt = UserMessage(text=DEFAULT_COMPACT_PROMPT)
+        prompt = agent_loop._context_manager.build_prompt(
+            list(history) + [compact_prompt],
+            [],
+            False,
+            turn_id=uuid7_string(),
+        )
+        try:
+            response = await agent_loop._model_client.complete(
+                prompt,
+                stream_event_handler or noop_stream_event_handler,
+            )
+            break
+        except Exception as exc:
+            if (
+                not prune_tool_results_on_context_error
+                or not _is_context_length_error(str(exc))
+            ):
+                raise
+            pruned_history = prune_oldest_tool_response(history)
+            if pruned_history is None:
+                raise
+            history = pruned_history
+            pruned_tool_results += 1
+            agent_loop.replace_history(history)
     compacted_history = compact(
         list(history) + [compact_prompt] + list(response.items)
     )
@@ -88,6 +118,32 @@ async def compact_agent_loop(
     return CompactResult(
         history=compacted_history,
         original_item_count=original_item_count,
+        pruned_tool_results=pruned_tool_results,
+    )
+def prune_oldest_tool_response(
+    history: 'typing.Sequence[ConversationItem]',
+) -> 'typing.Union[typing.Tuple[ConversationItem, ...], None]':
+    items = list(history)
+    tool_result_index = None
+    call_id = None
+    for index, item in enumerate(items):
+        if isinstance(item, ToolResult):
+            tool_result_index = index
+            call_id = item.call_id
+            break
+    if tool_result_index is None:
+        return None
+    indexes_to_remove = {tool_result_index}
+    for index, item in enumerate(items[:tool_result_index]):
+        if isinstance(item, ToolCall) and item.call_id == call_id:
+            indexes_to_remove.add(index)
+            break
+    return tuple(
+        item for index, item in enumerate(items) if index not in indexes_to_remove
     )
@@ -187,3 +243,11 @@ def _pluralize(noun: 'str', count: 'int') -> 'str':
 def _is_synthetic_user_message(text: 'str') -> 'bool':
     return text.startswith(_SUBAGENT_NOTIFICATION_PREFIX)
+def _is_context_length_error(message: 'str') -> 'bool':
+    lower = message.lower()
+    return (
+        "context_length_exceeded" in lower
+        or "maximum context length" in lower
+    )

pycodex/utils/visualize.py CHANGED Viewed

@@ -705,6 +705,59 @@ class CliSessionView:
                 self._spinner.set_label("reconnecting")
             return
+        if event.kind == "auto_compact_started":
+            self._finish_stream()
+            total_tokens = event.payload.get("total_tokens")
+            token_limit = event.payload.get("token_limit")
+            if total_tokens is not None and token_limit is not None:
+                message = f"[status] auto-compact: {total_tokens}/{token_limit} tokens"
+            else:
+                message = "[status] auto-compact"
+            self._print_line(
+                colorize_cli_message(message, "status", self._color_enabled)
+            )
+            if self._input_active:
+                self._spinner.pause()
+            else:
+                self._spinner.resume()
+                self._spinner.set_label("compacting context")
+                self._spinner.render_now()
+            return
+        if event.kind == "auto_compact_completed":
+            self._finish_stream()
+            summary = str(event.payload.get("summary", "")).strip()
+            message = f"[status] {summary}" if summary else "[status] context compacted"
+            self._print_line(
+                colorize_cli_message(message, "status", self._color_enabled)
+            )
+            if self._input_active:
+                self._spinner.pause()
+            else:
+                self._spinner.resume()
+                self._spinner.set_label("thinking")
+                self._spinner.render_now()
+            return
+        if event.kind == "auto_compact_failed":
+            self._finish_stream()
+            error = str(event.payload.get("error", "")).strip()
+            message = (
+                f"[error] auto-compact failed: {error}"
+                if error
+                else "[error] auto-compact failed"
+            )
+            self._print_line(
+                colorize_cli_message(message, "error", self._color_enabled)
+            )
+            if self._input_active:
+                self._spinner.pause()
+            else:
+                self._spinner.resume()
+                self._spinner.set_label("thinking")
+                self._spinner.render_now()
+            return
         if event.kind == "assistant_delta":
             delta = str(event.payload.get("delta", ""))
             if not delta:

{python_codex-0.1.11.dist-info → python_codex-0.1.12.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: python-codex
-Version: 0.1.11
+Version: 0.1.12
 Summary: A minimal Python extraction of Codex's main agent loop
 License-File: LICENSE
 Requires-Python: >=3.6.2
@@ -185,6 +185,14 @@ Current behavior:
 - `/compact` synthesizes a local handoff summary, replaces the in-memory
   conversation history with the compacted view, and appends a compacted-history
   entry to the rollout so later `/resume` sees the same state
+- `model_auto_compact_token_limit = <tokens>` in `config.toml` enables the same
+  compaction path automatically when the latest reported usage reaches that
+  threshold before a follow-up sampling request or the next user turn
+- if a model request fails with `context_length_exceeded`, pycodex now treats
+  the provider-reported requested token count as a failed-request usage sample,
+  triggers the same compact path immediately, and retries the request once; if
+  the compact request is also over the limit, it repeatedly drops the oldest
+  tool response plus its matching tool call before retrying compact
 - new sessions are now recorded under `CODEX_HOME/sessions/.../rollout-*.jsonl`
   with a stable session/thread id and per-item append+flush semantics so
   `/resume` reads back the same rollout format
@@ -211,7 +219,12 @@ Current behavior:
   `reasoning_content` are translated back into Responses `reasoning` items, and
   historical `reasoning` items are replayed into downstream assistant messages
   via the `reasoning` field. Streaming token usage is also requested from vLLM
-  and forwarded to the final `response.completed.response.usage`
+  and forwarded to the final `response.completed.response.usage`. If a
+  downstream chat stream terminates after emitting only reasoning, with no
+  assistant content and no tool call, the compat layer discards that partial
+  reasoning, retries the same downstream request once, and only then emits
+  `response.failed` with `type = "model_output_invalid"` if the retry is still
+  reasoning-only
 - standalone `responses_server` now also supports downstream `/v1/messages`
   backends via `--outcomming-api messages`, while keeping the internal
   canonical request/route logic in chat-completions shape

{python_codex-0.1.11.dist-info → python_codex-0.1.12.dist-info}/RECORD RENAMED Viewed

@@ -1,11 +1,11 @@
 pycodex/__init__.py,sha256=jCnC_Bgotlxa4GwO3Re2sChKGY49TRM-uVZEQ9uBpfw,3106
-pycodex/agent.py,sha256=s0FrF_XG2pHKryooS461Jr_acmQ_TKTp2JLGQNiny6w,11888
-pycodex/cli.py,sha256=5xQuVhjXh3gGKUawFdmevlqykDjz1va_0kvwfv_rksA,32059
+pycodex/agent.py,sha256=593HxMrQSo_-hZMUL299Rhdg70NBcx1fjKD7x8ryQ1c,18173
+pycodex/cli.py,sha256=wvPoBYX3GPHHsOY6DtMLotV2vnfDL1JvsMVu6ZznqrM,32081
 pycodex/collaboration.py,sha256=yQ6pBD-R3ZWR4_FAYQFoS7KF0m4LLD42otXIbPqw2ys,641
 pycodex/compat.py,sha256=IO0X7AgcYhlHnYnpvBZ6leCh_UjoQzg5HLT5wYBNNIw,3155
-pycodex/context.py,sha256=NuyiDJoUbhj4Xp6xExqs4cATGCKfYQ-YUx2BC7JXGeA,25306
+pycodex/context.py,sha256=IeqSzVZktL5-kTCY9ZlZr9J-NEe31qEFIvoacQl3jwk,26018
 pycodex/doctor.py,sha256=De3M4hRBJq8ZeqsUJgHz0vitqrH18YugrEnz7oHhTdQ,10572
-pycodex/model.py,sha256=ApPW9ecs5Z3oo-nUL9VHg3LzB6nwI0q1iE7dGClgheQ,33662
+pycodex/model.py,sha256=dJKsCfG_xytPkJuE7xPOXrVquoTqtfudSNj33uhWDlE,34181
 pycodex/portable.py,sha256=gxl2E2h5uZJbasMEPPs-nyALFPIvX79T2ZYsu6vXZrg,15656
 pycodex/portable_server.py,sha256=6I3pQkWj3e_SFlDXY2mGdCPns1w_3PSxByBV9wv5epI,7331
 pycodex/protocol.py,sha256=LYDzJefu1tugqQzee4NuZzxhGAv3hXrNcnlw04CudAY,11106
@@ -52,28 +52,28 @@ pycodex/tools/wait_tool.py,sha256=EJcW2Ev9jUD9eZ7cFDNOLDzlywS2BD3ll6pArXyxfrI,23
 pycodex/tools/web_search_tool.py,sha256=_7r2ltWhnBM0ZCgweA5a0GbEi0qSFAHOyi1RHrl6tfQ,957
 pycodex/tools/write_stdin_tool.py,sha256=nCuProkbeewfQ_yS8CgBajo--K3EmkXzJYh1D2QtAM4,2549
 pycodex/utils/__init__.py,sha256=jE63cZR1IBzs4ED86lwdYRLqV5FmPhNNDzIgC90mr6A,1216
-pycodex/utils/compactor.py,sha256=ZCzGc02xHmXq1rIjnG2gATKcFtt6r-OGsCIK0ypjnyI,6467
+pycodex/utils/compactor.py,sha256=bJbtsyT-KCPasGMH403njrrSDDbPyKbnrVaFSt5O4Vg,8440
 pycodex/utils/debug.py,sha256=JeEB5JfzYfbdG0fXlrWFmXyR1ts86fKsI_97IqgF6R0,296
 pycodex/utils/dotenv.py,sha256=rGKmurHjm7GdP4giyjHBPpSPv2Oi45qBqDB6HG3CnfA,1866
 pycodex/utils/get_env.py,sha256=5fNhcNhujOakWV6AS66rGW3jEA68WGpuE4YVXJZFE6U,7427
 pycodex/utils/random_ids.py,sha256=zBphjVGc7OXk9ZNExAbxRi_bk7ipyLG491qTv7hi8jM,380
 pycodex/utils/session_persist.py,sha256=Ntu0jcb2cEZbXpKDe0LXD-OuxfjK0SzBV0lRi90-NAM,16496
-pycodex/utils/visualize.py,sha256=JURzq2AbV046bblE5fojcAe885Juda0LDxt_gqT2PUc,41006
+pycodex/utils/visualize.py,sha256=oWgApf9pXqd-3ijjelhQn0PLPEW3abR_R_PZN2nWhwQ,43099
 responses_server/__init__.py,sha256=3yPv_zeGT7P11tTnmj5kXktISLNsNW-02MUnnbiZcb0,394
 responses_server/__main__.py,sha256=9SRp-Yw7ShGxc6DhSIXcDLKgGEdAVm3oBZ59rBOPjT0,62
-responses_server/app.py,sha256=53SEwI2pUj_YL8zWvvYhQHiT1EUVVPzdRzOqXDvFMJ0,7770
+responses_server/app.py,sha256=ack2a0otiBwq_DpsFURqLMlQzcf9oJPwo8o6iJ1fuig,7885
 responses_server/config.py,sha256=leb3_uPrCyYdUIkyRyVPX4luGF88dQ62OkhRLPe7uxw,2718
 responses_server/messages_api.py,sha256=WgO6J1jz2pOJkI79rLXp-pS1yxtLARcwX8T6JX5Vkcc,16971
 responses_server/payload_processors.py,sha256=cbXGW8Xi-mliaWRg0_Af41X0vXV2W6R9VBzTE6DXfe4,3483
 responses_server/server.py,sha256=Ko-Cqz_kW-uve091itucMklsPhEei77v-YcTjtjEdqU,2286
 responses_server/session_store.py,sha256=ZD3cH2aEOkWaQsu5qTzcal2mThTSFQPAhAhPUN9srgI,1115
-responses_server/stream_router.py,sha256=PuW8_fo8c_R9kd_Gy_Z7nyij7-xW_lPcfFcnYI2PvQA,35434
+responses_server/stream_router.py,sha256=UiP-T4IKgJubD1L0AY93N3DqUh4K41fNcdONmC3Z-0A,37161
 responses_server/trajectory_dump.py,sha256=XCwYaZZmlAxSsSXOfhk3zRvyfDpOHX5R8KzspScNFUM,3435
 responses_server/tools/__init__.py,sha256=ivsBSEy0SBUhY-Uea5v1XMLXShkwHdCVl0id-1FwdZg,150
 responses_server/tools/custom_adapter.py,sha256=LxO7ldydvR-GWachDz8GKC0Q8KGGFoFPbZxM0QvxuZ0,8350
 responses_server/tools/web_search.py,sha256=pm4ZUiHUfxc0bGY1kEvt-BCzDrZIyP24xzPUcga2ul0,8908
-python_codex-0.1.11.dist-info/METADATA,sha256=eRfTDhadn89LMl9z1BITds05EbtinXymdUgT40km-IE,15720
-python_codex-0.1.11.dist-info/WHEEL,sha256=KGYbc1zXlYddvwxnNty23BeaKzh7YuoSIvIMO4jEhvw,87
-python_codex-0.1.11.dist-info/entry_points.txt,sha256=sNUVakoVuTrzJH505ZgRTQxmtRRPUHV_EH0i6EbYTyM,45
-python_codex-0.1.11.dist-info/licenses/LICENSE,sha256=0X8ifk312hYAORM4hlzg8wVSEXYKNmiPgWlB1YIy2Nw,10926
-python_codex-0.1.11.dist-info/RECORD,,
+python_codex-0.1.12.dist-info/METADATA,sha256=T85L7qOCT8P-MNzXHQsNPttvcGQ9ZTetWJbuy6QC6Pw,16657
+python_codex-0.1.12.dist-info/WHEEL,sha256=KGYbc1zXlYddvwxnNty23BeaKzh7YuoSIvIMO4jEhvw,87
+python_codex-0.1.12.dist-info/entry_points.txt,sha256=sNUVakoVuTrzJH505ZgRTQxmtRRPUHV_EH0i6EbYTyM,45
+python_codex-0.1.12.dist-info/licenses/LICENSE,sha256=0X8ifk312hYAORM4hlzg8wVSEXYKNmiPgWlB1YIy2Nw,10926
+python_codex-0.1.12.dist-info/RECORD,,

responses_server/app.py CHANGED Viewed

@@ -37,14 +37,18 @@ def _stream_events(response_server: 'ResponseServer', request_body: 'typing.Dict
         import traceback
         exc_info = traceback.format_exception(type(exc), exc, exc.__traceback__)
+        error = {
+            "message": '\n'.join(exc_info),
+        }
+        error_type = getattr(exc, "error_type", None)
+        if error_type:
+            error["type"] = error_type
         yield _format_sse_event(
             "response.failed",
             {
                 "type": "response.failed",
                 "response": {
-                    "error": {
-                        "message": '\n'.join(exc_info),
-                    }
+                    "error": error,
                 },
             },
         )

responses_server/stream_router.py CHANGED Viewed

@@ -36,7 +36,13 @@ class UnsupportedIncommingFeature(ValueError):
 class OutcommingChatError(RuntimeError):
-    pass
+    def __init__(
+        self,
+        message: 'str',
+        error_type: 'typing.Union[str, None]' = None,
+    ) -> 'None':
+        super().__init__(message)
+        self.error_type = error_type
 class StreamRouter:
@@ -312,10 +318,14 @@ class StreamRouter:
             current_request,
             trajectory_dump,
         )
+        retried_reasoning_only_output = False
         while True:
             tool_calls: 'typing.Dict[int, typing.Dict[str, object]]' = {}
+            finish_reasons: 'typing.List[str]' = []
             current_usage: 'typing.Dict[str, object]' = {}
+            reasoning_start = len(reasoning_parts)
+            text_start = len(text_parts)
             for chunk in current_stream:
                 for event_name, payload in self._consume_chat_chunk(
                     chunk,
@@ -323,6 +333,7 @@ class StreamRouter:
                     text_parts,
                     tool_calls,
                     current_usage,
+                    finish_reasons,
                 ):
                     yield event_name, payload
             if current_usage:
@@ -362,6 +373,29 @@ class StreamRouter:
                 )
                 continue
+            if (
+                len(reasoning_parts) > reasoning_start
+                and len(text_parts) == text_start
+                and not ordinary_tool_calls
+            ):
+                if not retried_reasoning_only_output:
+                    retried_reasoning_only_output = True
+                    del reasoning_parts[reasoning_start:]
+                    del text_parts[text_start:]
+                    current_request = json.loads(json.dumps(current_request))
+                    current_stream = self._open_tracked_outcomming_stream(
+                        current_request,
+                        trajectory_dump,
+                    )
+                    continue
+                finish_reason = finish_reasons[-1] if finish_reasons else "<unknown>"
+                raise OutcommingChatError(
+                    "outcomming chat completion ended without assistant content "
+                    "or tool calls after emitting only reasoning "
+                    f"(finish_reason={finish_reason!r})",
+                    error_type="model_output_invalid",
+                )
             for item in self._build_output_items(
                 reasoning_parts,
                 text_parts,
@@ -676,6 +710,7 @@ class StreamRouter:
         text_parts: 'typing.List[str]',
         tool_calls: 'typing.Dict[int, typing.Dict[str, object]]',
         current_usage: 'typing.Dict[str, object]',
+        finish_reasons: 'typing.List[str]',
     ) -> 'typing.List[typing.Tuple[str, typing.Dict[str, object]]]':
         events: 'typing.List[typing.Tuple[str, typing.Dict[str, object]]]' = []
         usage = payload.get("usage")
@@ -689,6 +724,9 @@ class StreamRouter:
         for choice in choices:
             if not isinstance(choice, dict):
                 continue
+            finish_reason = choice.get("finish_reason")
+            if isinstance(finish_reason, str) and finish_reason:
+                finish_reasons.append(finish_reason)
             delta = choice.get("delta") or {}
             if not isinstance(delta, dict):
                 continue

{python_codex-0.1.11.dist-info → python_codex-0.1.12.dist-info}/WHEEL RENAMED Viewed

File without changes

{python_codex-0.1.11.dist-info → python_codex-0.1.12.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{python_codex-0.1.11.dist-info → python_codex-0.1.12.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

python-codex 0.1.11__py3-none-any.whl → 0.1.12__py3-none-any.whl

python-codex 0.1.11py3-none-any.whl → 0.1.12py3-none-any.whl