python-codex 0.1.10__tar.gz → 0.1.12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. {python_codex-0.1.10 → python_codex-0.1.12}/AGENTS.md +5 -1
  2. {python_codex-0.1.10 → python_codex-0.1.12}/PKG-INFO +15 -2
  3. {python_codex-0.1.10 → python_codex-0.1.12}/README.md +14 -1
  4. {python_codex-0.1.10 → python_codex-0.1.12}/docs/responses_server/README.md +7 -0
  5. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/agent.py +198 -17
  6. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/cli.py +5 -2
  7. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/context.py +16 -0
  8. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/model.py +38 -1
  9. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/prompts/models.json +71 -0
  10. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/utils/compactor.py +77 -13
  11. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/utils/visualize.py +53 -0
  12. {python_codex-0.1.10 → python_codex-0.1.12}/pyproject.toml +1 -1
  13. {python_codex-0.1.10 → python_codex-0.1.12}/responses_server/app.py +7 -3
  14. {python_codex-0.1.10 → python_codex-0.1.12}/responses_server/stream_router.py +39 -1
  15. {python_codex-0.1.10 → python_codex-0.1.12}/tests/responses_server/test_server.py +167 -0
  16. python_codex-0.1.12/tests/test_agent.py +685 -0
  17. {python_codex-0.1.10 → python_codex-0.1.12}/tests/test_cli.py +112 -0
  18. {python_codex-0.1.10 → python_codex-0.1.12}/tests/test_compactor.py +27 -1
  19. {python_codex-0.1.10 → python_codex-0.1.12}/tests/test_context.py +36 -4
  20. {python_codex-0.1.10 → python_codex-0.1.12}/tests/test_model.py +112 -0
  21. python_codex-0.1.10/tests/test_agent.py +0 -349
  22. {python_codex-0.1.10 → python_codex-0.1.12}/.github/workflows/publish.yml +0 -0
  23. {python_codex-0.1.10 → python_codex-0.1.12}/.github/workflows/test.yml +0 -0
  24. {python_codex-0.1.10 → python_codex-0.1.12}/.gitignore +0 -0
  25. {python_codex-0.1.10 → python_codex-0.1.12}/LICENSE +0 -0
  26. {python_codex-0.1.10 → python_codex-0.1.12}/README_ZH.md +0 -0
  27. {python_codex-0.1.10 → python_codex-0.1.12}/docs/ALIGNMENT.md +0 -0
  28. {python_codex-0.1.10 → python_codex-0.1.12}/docs/CONTEXT.md +0 -0
  29. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/__init__.py +0 -0
  30. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/collaboration.py +0 -0
  31. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/compat.py +0 -0
  32. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/doctor.py +0 -0
  33. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/portable.py +0 -0
  34. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/portable_server.py +0 -0
  35. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/prompts/collaboration_default.md +0 -0
  36. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/prompts/collaboration_plan.md +0 -0
  37. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/prompts/default_base_instructions.md +0 -0
  38. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/prompts/exec_tools.json +0 -0
  39. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/prompts/permissions/approval_policy/never.md +0 -0
  40. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/prompts/permissions/approval_policy/on_failure.md +0 -0
  41. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/prompts/permissions/approval_policy/on_request.md +0 -0
  42. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/prompts/permissions/approval_policy/on_request_rule_request_permission.md +0 -0
  43. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/prompts/permissions/approval_policy/unless_trusted.md +0 -0
  44. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/prompts/permissions/sandbox_mode/danger_full_access.md +0 -0
  45. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/prompts/permissions/sandbox_mode/read_only.md +0 -0
  46. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/prompts/permissions/sandbox_mode/workspace_write.md +0 -0
  47. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/prompts/subagent_tools.json +0 -0
  48. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/protocol.py +0 -0
  49. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/runtime.py +0 -0
  50. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/runtime_services.py +0 -0
  51. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/tools/__init__.py +0 -0
  52. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/tools/agent_tool_schemas.py +0 -0
  53. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/tools/apply_patch_tool.py +0 -0
  54. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/tools/base_tool.py +0 -0
  55. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/tools/close_agent_tool.py +0 -0
  56. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/tools/code_mode_manager.py +0 -0
  57. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/tools/exec_command_tool.py +0 -0
  58. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/tools/exec_runtime.js +0 -0
  59. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/tools/exec_tool.py +0 -0
  60. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/tools/grep_files_tool.py +0 -0
  61. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/tools/list_dir_tool.py +0 -0
  62. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/tools/read_file_tool.py +0 -0
  63. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/tools/request_permissions_tool.py +0 -0
  64. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/tools/request_user_input_tool.py +0 -0
  65. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/tools/resume_agent_tool.py +0 -0
  66. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/tools/send_input_tool.py +0 -0
  67. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/tools/shell_command_tool.py +0 -0
  68. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/tools/shell_tool.py +0 -0
  69. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/tools/spawn_agent_tool.py +0 -0
  70. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/tools/unified_exec_manager.py +0 -0
  71. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/tools/update_plan_tool.py +0 -0
  72. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/tools/view_image_tool.py +0 -0
  73. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/tools/wait_agent_tool.py +0 -0
  74. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/tools/wait_tool.py +0 -0
  75. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/tools/web_search_tool.py +0 -0
  76. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/tools/write_stdin_tool.py +0 -0
  77. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/utils/__init__.py +0 -0
  78. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/utils/debug.py +0 -0
  79. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/utils/dotenv.py +0 -0
  80. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/utils/get_env.py +0 -0
  81. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/utils/random_ids.py +0 -0
  82. {python_codex-0.1.10 → python_codex-0.1.12}/pycodex/utils/session_persist.py +0 -0
  83. {python_codex-0.1.10 → python_codex-0.1.12}/responses_server/__init__.py +0 -0
  84. {python_codex-0.1.10 → python_codex-0.1.12}/responses_server/__main__.py +0 -0
  85. {python_codex-0.1.10 → python_codex-0.1.12}/responses_server/config.py +0 -0
  86. {python_codex-0.1.10 → python_codex-0.1.12}/responses_server/messages_api.py +0 -0
  87. {python_codex-0.1.10 → python_codex-0.1.12}/responses_server/payload_processors.py +0 -0
  88. {python_codex-0.1.10 → python_codex-0.1.12}/responses_server/server.py +0 -0
  89. {python_codex-0.1.10 → python_codex-0.1.12}/responses_server/session_store.py +0 -0
  90. {python_codex-0.1.10 → python_codex-0.1.12}/responses_server/tools/__init__.py +0 -0
  91. {python_codex-0.1.10 → python_codex-0.1.12}/responses_server/tools/custom_adapter.py +0 -0
  92. {python_codex-0.1.10 → python_codex-0.1.12}/responses_server/tools/web_search.py +0 -0
  93. {python_codex-0.1.10 → python_codex-0.1.12}/responses_server/trajectory_dump.py +0 -0
  94. {python_codex-0.1.10 → python_codex-0.1.12}/tests/TESTS.md +0 -0
  95. {python_codex-0.1.10 → python_codex-0.1.12}/tests/__init__.py +0 -0
  96. {python_codex-0.1.10 → python_codex-0.1.12}/tests/compare_request_user_input_roundtrip.py +0 -0
  97. {python_codex-0.1.10 → python_codex-0.1.12}/tests/compare_steer_request_bodies.py +0 -0
  98. {python_codex-0.1.10 → python_codex-0.1.12}/tests/compare_tool_schemas.py +0 -0
  99. {python_codex-0.1.10 → python_codex-0.1.12}/tests/fake_responses_server.py +0 -0
  100. {python_codex-0.1.10 → python_codex-0.1.12}/tests/fakes.py +0 -0
  101. {python_codex-0.1.10 → python_codex-0.1.12}/tests/responses_server/fake_chat_completions_server.py +0 -0
  102. {python_codex-0.1.10 → python_codex-0.1.12}/tests/test_builtin_tools.py +0 -0
  103. {python_codex-0.1.10 → python_codex-0.1.12}/tests/test_doctor.py +0 -0
  104. {python_codex-0.1.10 → python_codex-0.1.12}/tests/test_fake_responses_server.py +0 -0
  105. {python_codex-0.1.10 → python_codex-0.1.12}/tests/test_portable.py +0 -0
  106. {python_codex-0.1.10 → python_codex-0.1.12}/tests/test_py36_syntax.py +0 -0
@@ -12,6 +12,7 @@
12
12
  - 现在 `ResponsesModelClient` 默认会对流式断连做 provider 级自动重试(`stream_max_retries` 默认 5);写 CLI/REPL 测试时如果断言“先向用户报错,再靠下一句 `go on` 继续”,必须在测试 provider 配置里显式设 `stream_max_retries = 0`,否则测试可能一直等不到预期错误而卡住。
13
13
  - `responses_server` compat 层应透传请求里的 `model`;不要再做 “取 downstream /models 第一个 id 并强制覆盖请求模型” 这种兜底兼容。
14
14
  - 对 `model_provider = "vllm"`,`responses_server` 仍然走 `/v1/chat/completions` compat 路径,但要保留 reasoning:把 chat chunk 里的 `reasoning` / `reasoning_content` 翻回 Responses `reasoning` item,并把历史里的 Responses `reasoning` item 回放成下游 assistant message 的 `reasoning` 字段。
15
+ - `responses_server` 不能把 terminal reasoning-only chat output 当成成功回复:如果 downstream 一轮结束时只返回 `reasoning` / `reasoning_content`,没有 assistant `content` 且没有 tool call,先丢弃本次 partial reasoning 并用原样 downstream request 静默重试一次;若仍然 reasoning-only,再发 `response.failed(type=model_output_invalid)`,避免把 partial reasoning 写进 rollout 后在下一轮变成 chat 后端拒绝的裸 assistant message。
15
16
  - `responses_server` 的 provider-specific chat payload 定制统一放在 `responses_server/payload_processors.py`:使用 `CompatServerConfig.model_provider` 选择 `provider_name -> proc_fn(outcomming_request)` 映射,并且只在真正发出 downstream `/v1/chat/completions` 前 post-process;`StreamRouter` 内部继续保留 canonical payload,避免 tool hydration / mock web_search follow-up 被 provider 改写污染。
16
17
  - `responses_server` 如果要兼容下游 `/v1/messages`,也优先保持这条边界:内部继续用 canonical chat request / chat-like chunk 流,只有真正发请求和读取 SSE 时才做 messages 适配,这样 tool hydration、mock `web_search` follow-up、provider payload post-process 都能复用。
17
18
  - 真实 vLLM `0.19.0` 的 `/v1/messages` 会对缺失 `max_tokens` 直接返回 `400`;messages 适配层必须总是补这个字段。当前约定是优先透传请求里的 `max_output_tokens`/`max_tokens`,否则回退到默认 `32000`。
@@ -19,9 +20,10 @@
19
20
  - `pycodex` 默认是最小交互 CLI;无 prompt 时进入 REPL,并通过 `AgentRuntime` 跑外层提交循环。当前会显示最小事件流、assistant 流式输出、简单 title/history(`/title`, `/history`),并默认注册一组与原版一一对应的本地工具子集。
20
21
  - 交互 CLI 的事件流展示优先表达用户可感知的阶段(例如工具开始/完成、模型回看工具结果),不要直接把内部 `iteration` 计数暴露成主要状态文案;`iterations` 应继续保留在 `TurnResult` 等程序化结果里。
21
22
  - prompt/context 相关逻辑统一放在 `pycodex/context.py`:`AgentLoop` 只维护真实会话历史;每轮请求前由 `ContextManager` 注入 base instructions、developer message、`AGENTS.md` 指令和 `<environment_context>`,且这些注入项不写回 history。
22
- - 对需要 model-specific prompt 的本地 model slug,直接在 vendored `pycodex/prompts/models.json` 补条目;当前 `step-3.5-flash` / `step-3.5-flash-2603` 已按这个方式接入。
23
+ - 对需要 model-specific prompt 的本地 model slug,直接在 vendored `pycodex/prompts/models.json` 补条目;当前 `step-3.5-flash` / `step-3.5-flash-2603` / `step-3.6` 已按这个方式接入。
23
24
  - 交互 REPL 的 context 用量提示也应尽量贴近上游语义:展示“剩余 context 百分比”而不是原始 token 数;计算时按上游同款 `BASELINE_TOKENS=12000` 做归一化,并在模型元数据只有 `context_window` 时默认按 `95%` effective window 处理。只要当前模型能解析出 context window,初始 prompt 就先显示 `100%`,等首个 usage 回来后再刷新成真实值。
24
25
  - 对交互 REPL 的 context 指示器,`model_context_window` 的取值优先级也要贴近上游:先吃 `config.toml` / profile 里的 `model_context_window` override,再回退到 vendored `models.json` 的 `context_window`;effective percent 继续沿用模型元数据,没有时默认 `95%`。
26
+ - `pyco(<percent>)` 正常只来自模型流里最近一次 `response.completed.response.usage.total_tokens`;如果大 tool output 之后的下一次请求被下游 `context_length_exceeded` 拒绝,rollout 不会单独记录 usage。遇到这类错误时应从错误文案的 `requested ... tokens (... in the messages, ... in the completion)` 提取真实请求 token,作为失败请求的 `token_count` 事件回灌,并立即触发 compact 后重试一次。若 compact 请求本身也超长,先循环删除最旧的 `ToolResult` 及其配对 `ToolCall` 再重试 compact。
25
27
  - `AgentLoop` 的 turn-loop 语义要跟上游 `codex-rs/core/src/codex.rs` 一致:按 follow-up / tool handoff 自然收敛,不要加固定 12 轮之类的 hard cap,也不要保留本地专用的 iteration-limit 参数。
26
28
  - `README.md` 和 `docs/` 属于对齐工作的一部分:只要实现状态、对齐结论或使用方式发生实质变化,就应及时更新,不要让文档滞后于当前代码。
27
29
  - 新工具必须继承 `BaseTool`,然后通过 `ToolRegistry.register(tool_instance)` 接入;不要再给 registry 传散装 name/description/handler 参数。
@@ -43,6 +45,7 @@
43
45
  - 在当前 `pycodex` CLI 里,普通输入与 `/queue <message>` 只负责选择 runtime queue;真正的 steer/queue 差别由 `AgentRuntime.enqueue_user_turn(..., queue=...)` 决定。runtime 内部也应保持成两个同构 queue,而不是一个普通 queue 再叠一个 steer 专用旁路状态机。
44
46
  - 对上游 steer 语义要非常谨慎:正常 active-turn steer 首先走的是 `inject_input(...)` + `pending_input`,不是立刻 `spawn_task(...)` / `TurnAbortReason::Replaced`。更准确的理解是“在最近一次 sampling 边界插入”,而不是“任意时刻硬打断当前模型/工具调用”。
45
47
  - 用 `tests/fake_responses_server.py` 做 steer 时序对比时,不要把 proxy capture 文件的生成时刻当成“请求已到达 upstream”的信号;`build_proxy_handler(...)` 会等整条 upstream response 读完后才 `write_capture(...)`。如果要在第一条 request 仍未完成时注入 steer,应该同步等待 fake origin 自己收到第 1 条 POST。
48
+ - `--use-chat-completion` 已废弃为 CLI flag,改为从 `~/.codex/config.toml` 的 provider 段读取持久配置:在对应 `[model_providers.<name>]` 下加 `use_chat_completion = true` 即可对该 provider 默认启用本地 `responses_server` compat 层;CLI 仍可显式传 `--use-chat-completion` 覆盖配置值。
46
49
  - 在本机做 steer fake-server 对比时,不要把用户本地 `config.toml` 里的 `service_tier` / fast-mode 设置混进“默认 steer”结论。`tests/compare_steer_request_bodies.py` 现在会给 upstream 和 `pycodex` 都生成临时 config,并去掉顶层 `service_tier` 后再比较 request body。
47
50
  - `x-codex-turn-metadata.workspaces` 的时机不是“整个 session 只发第一条请求”。当前对齐结论是:首个 turn 的后续 steer/follow-up request 也继续带 `workspaces`;切到后续新 turn 才省略。
48
51
  - 远端 Codex home 存储模式当前仍刻意只挂在 `pycodex/cli.py` 启动前:`--put`/`--call` 只负责上传或落本地 `CODEX_HOME` 并重写 `args.config`,`model/context/runtime` 继续完全按 `config_path.parent` 读取 `.env`、`AGENTS.md`、`skills/`;后续扩展时优先保持这个隔离边界,不要把分支判断散到运行时各模块里。
@@ -53,3 +56,4 @@
53
56
  - `--call` / portable storage paths must not rely on the process default text encoding. Always pass `encoding="utf-8"` when reading config, prompts, AGENTS files, skills, dotenv, and session history; for user-authored instructions/history, prefer `errors="replace"` so a Windows GBK locale cannot crash on UTF-8 punctuation such as U+2264 or em dash.
54
57
  - 对接真实 `~/.codex/sessions/.../rollout-*.jsonl` 时,不要假设它一定是严格的一行一个 JSON object:本机样本可能包含 pretty-printed 多行对象,且文件尾部偶尔带未完成记录。恢复历史时用 concatenated-JSON 方式读取,并容忍尾部残缺。
55
58
  - `pycodex` 本地 session 保存现在也按上游思路走:新 session 一开始就分配稳定的 uuidv7 thread/session id,并把历史增量追加到 `CODEX_HOME/sessions/.../rollout-*.jsonl`;`/resume` 列表应只展示至少有真实 user message 的 rollout,避免空白新 session 污染恢复列表。
59
+ - auto-compact 对齐上游配置名 `model_auto_compact_token_limit`;为空时关闭,触发依据是最近一次模型上报的 `usage.total_tokens`,pre-turn 压缩上一轮历史,mid-turn 压缩工具 follow-up 前的当前历史,并继续复用现有 compacted rollout 记录。
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: python-codex
3
- Version: 0.1.10
3
+ Version: 0.1.12
4
4
  Summary: A minimal Python extraction of Codex's main agent loop
5
5
  License-File: LICENSE
6
6
  Requires-Python: >=3.6.2
@@ -185,6 +185,14 @@ Current behavior:
185
185
  - `/compact` synthesizes a local handoff summary, replaces the in-memory
186
186
  conversation history with the compacted view, and appends a compacted-history
187
187
  entry to the rollout so later `/resume` sees the same state
188
+ - `model_auto_compact_token_limit = <tokens>` in `config.toml` enables the same
189
+ compaction path automatically when the latest reported usage reaches that
190
+ threshold before a follow-up sampling request or the next user turn
191
+ - if a model request fails with `context_length_exceeded`, pycodex now treats
192
+ the provider-reported requested token count as a failed-request usage sample,
193
+ triggers the same compact path immediately, and retries the request once; if
194
+ the compact request is also over the limit, it repeatedly drops the oldest
195
+ tool response plus its matching tool call before retrying compact
188
196
  - new sessions are now recorded under `CODEX_HOME/sessions/.../rollout-*.jsonl`
189
197
  with a stable session/thread id and per-item append+flush semantics so
190
198
  `/resume` reads back the same rollout format
@@ -211,7 +219,12 @@ Current behavior:
211
219
  `reasoning_content` are translated back into Responses `reasoning` items, and
212
220
  historical `reasoning` items are replayed into downstream assistant messages
213
221
  via the `reasoning` field. Streaming token usage is also requested from vLLM
214
- and forwarded to the final `response.completed.response.usage`
222
+ and forwarded to the final `response.completed.response.usage`. If a
223
+ downstream chat stream terminates after emitting only reasoning, with no
224
+ assistant content and no tool call, the compat layer discards that partial
225
+ reasoning, retries the same downstream request once, and only then emits
226
+ `response.failed` with `type = "model_output_invalid"` if the retry is still
227
+ reasoning-only
215
228
  - standalone `responses_server` now also supports downstream `/v1/messages`
216
229
  backends via `--outcomming-api messages`, while keeping the internal
217
230
  canonical request/route logic in chat-completions shape
@@ -164,6 +164,14 @@ Current behavior:
164
164
  - `/compact` synthesizes a local handoff summary, replaces the in-memory
165
165
  conversation history with the compacted view, and appends a compacted-history
166
166
  entry to the rollout so later `/resume` sees the same state
167
+ - `model_auto_compact_token_limit = <tokens>` in `config.toml` enables the same
168
+ compaction path automatically when the latest reported usage reaches that
169
+ threshold before a follow-up sampling request or the next user turn
170
+ - if a model request fails with `context_length_exceeded`, pycodex now treats
171
+ the provider-reported requested token count as a failed-request usage sample,
172
+ triggers the same compact path immediately, and retries the request once; if
173
+ the compact request is also over the limit, it repeatedly drops the oldest
174
+ tool response plus its matching tool call before retrying compact
167
175
  - new sessions are now recorded under `CODEX_HOME/sessions/.../rollout-*.jsonl`
168
176
  with a stable session/thread id and per-item append+flush semantics so
169
177
  `/resume` reads back the same rollout format
@@ -190,7 +198,12 @@ Current behavior:
190
198
  `reasoning_content` are translated back into Responses `reasoning` items, and
191
199
  historical `reasoning` items are replayed into downstream assistant messages
192
200
  via the `reasoning` field. Streaming token usage is also requested from vLLM
193
- and forwarded to the final `response.completed.response.usage`
201
+ and forwarded to the final `response.completed.response.usage`. If a
202
+ downstream chat stream terminates after emitting only reasoning, with no
203
+ assistant content and no tool call, the compat layer discards that partial
204
+ reasoning, retries the same downstream request once, and only then emits
205
+ `response.failed` with `type = "model_output_invalid"` if the retry is still
206
+ reasoning-only
194
207
  - standalone `responses_server` now also supports downstream `/v1/messages`
195
208
  backends via `--outcomming-api messages`, while keeping the internal
196
209
  canonical request/route logic in chat-completions shape
@@ -97,6 +97,13 @@ trajectory 追加到 `${PYCODEX_DUMP}/dump.jsonl`,当前记录格式是:
97
97
  当前内置规则里,`vllm` 仍走 chat-completions compat 路径,但会额外保留
98
98
  reasoning;`stepfun` 会删除所有 `developer` role。
99
99
 
100
+ 如果下游 chat stream 一轮结束时只给了 `reasoning` / `reasoning_content`,
101
+ 没有 assistant `content` 且没有 tool call,server 会丢弃这次 partial reasoning 并用
102
+ 原样 downstream request 静默重试一次。若 retry 后仍是 reasoning-only,才发
103
+ `response.failed(type=model_output_invalid)`。这样可以避免 interrupted 或
104
+ length-stopped thinking 被持久化成 terminal reasoning-only history,并在下一轮转换成
105
+ 下游 chat 后端不接受的裸 assistant message。
106
+
100
107
  `messages` compat 则故意不改这层 canonical request:仍然先构造 chat 风格
101
108
  `outcomming_request`,只有在真正发请求和读 SSE 时,才在边界把它翻译成
102
109
  messages request / event。这样 tool hydration、mock `web_search`
@@ -1,6 +1,7 @@
1
1
 
2
2
  import asyncio
3
3
  import json
4
+ import re
4
5
  from typing import Callable
5
6
 
6
7
  from .context import ContextManager
@@ -26,6 +27,18 @@ if typing.TYPE_CHECKING:
26
27
 
27
28
  EventHandler = Callable[[AgentEvent], None]
28
29
  NOOP_EVENT_HANDLER: 'EventHandler' = lambda _event: None
30
+ _REQUESTED_TOKENS_RE = re.compile(
31
+ r"requested\s+([0-9,]+)\s+tokens",
32
+ re.IGNORECASE,
33
+ )
34
+ _REQUESTED_TOKEN_SPLIT_RE = re.compile(
35
+ r"\(([0-9,]+)\s+in\s+the\s+messages,\s+([0-9,]+)\s+in\s+the\s+completion\)",
36
+ re.IGNORECASE,
37
+ )
38
+ _MAX_CONTEXT_TOKENS_RE = re.compile(
39
+ r"maximum\s+context\s+length\s+is\s+([0-9,]+)\s+tokens",
40
+ re.IGNORECASE,
41
+ )
29
42
 
30
43
 
31
44
  class TurnInterrupted(RuntimeError):
@@ -58,6 +71,10 @@ class AgentLoop:
58
71
  self._event_handler = event_handler
59
72
  self._history: 'typing.List[ConversationItem]' = list(initial_history)
60
73
  self._rollout_recorder = rollout_recorder
74
+ self._auto_compact_token_limit = (
75
+ self._context_manager.resolve_auto_compact_token_limit()
76
+ )
77
+ self._last_total_usage_tokens: 'typing.Union[int, None]' = None
61
78
  self.interrupt_asap = False
62
79
 
63
80
  @property
@@ -101,8 +118,6 @@ class AgentLoop:
101
118
  turn_id = turn_id or uuid7_string()
102
119
  self.interrupt_asap = False
103
120
  new_user_messages = [UserMessage(text=text) for text in texts]
104
- self._history.extend(new_user_messages)
105
- self._persist_history_items(new_user_messages)
106
121
 
107
122
  self._emit(
108
123
  "turn_started",
@@ -110,6 +125,9 @@ class AgentLoop:
110
125
  user_text="\n".join(texts),
111
126
  user_texts=list(texts),
112
127
  )
128
+ await self._maybe_auto_compact(turn_id, phase="pre_turn")
129
+ self._history.extend(new_user_messages)
130
+ self._persist_history_items(new_user_messages)
113
131
 
114
132
  last_assistant_message: 'typing.Union[str, None]' = None
115
133
  final_response_items: 'typing.Tuple[\n typing.Union[typing.Union[AssistantMessage, ToolCall], ReasoningItem], ...\n]' = ()
@@ -122,23 +140,11 @@ class AgentLoop:
122
140
  iteration,
123
141
  output_text=last_assistant_message,
124
142
  )
143
+ await self._maybe_auto_compact(turn_id, phase="mid_turn")
125
144
  iteration += 1
126
- prompt = self._context_manager.build_prompt(
127
- self._history,
128
- self._tool_registry.model_visible_specs(),
129
- self._parallel_tool_calls,
130
- turn_id=turn_id,
131
- )
132
- self._emit(
133
- "model_called",
145
+ response = await self._complete_model_request(
134
146
  turn_id,
135
- iteration=iteration,
136
- history_size=len(prompt.input),
137
- tool_count=len(prompt.tools),
138
- )
139
- response = await self._model_client.complete(
140
- prompt,
141
- lambda event: self._handle_model_stream_event(turn_id, event),
147
+ iteration,
142
148
  )
143
149
  final_response_items = tuple(response.items)
144
150
  self._emit(
@@ -193,6 +199,10 @@ class AgentLoop:
193
199
  except TurnInterrupted:
194
200
  raise
195
201
  except Exception as exc:
202
+ context_usage = _usage_from_context_length_error(str(exc))
203
+ if context_usage is not None:
204
+ self._remember_token_usage(context_usage)
205
+ self._emit("token_count", turn_id, usage=context_usage)
196
206
  self._emit(
197
207
  "turn_failed",
198
208
  turn_id,
@@ -287,6 +297,8 @@ class AgentLoop:
287
297
  return
288
298
 
289
299
  def _handle_model_stream_event(self, turn_id: 'str', event: 'ModelStreamEvent') -> 'None':
300
+ if event.kind == "token_count":
301
+ self._remember_token_usage(event.payload.get("usage"))
290
302
  if event.kind == "assistant_delta":
291
303
  self._emit("assistant_delta", turn_id, **event.payload)
292
304
  elif event.kind == "tool_call":
@@ -296,6 +308,140 @@ class AgentLoop:
296
308
  elif event.kind == "stream_error":
297
309
  self._emit("stream_error", turn_id, **event.payload)
298
310
 
311
+ def _remember_token_usage(self, usage: 'object') -> 'None':
312
+ if not isinstance(usage, dict):
313
+ return
314
+ try:
315
+ self._last_total_usage_tokens = int(usage["total_tokens"])
316
+ except (KeyError, TypeError, ValueError):
317
+ return
318
+
319
+ async def _complete_model_request(
320
+ self,
321
+ turn_id: 'str',
322
+ iteration: 'int',
323
+ ) -> 'typing.Any':
324
+ attempted_context_compact = False
325
+ while True:
326
+ prompt = self._context_manager.build_prompt(
327
+ self._history,
328
+ self._tool_registry.model_visible_specs(),
329
+ self._parallel_tool_calls,
330
+ turn_id=turn_id,
331
+ )
332
+ self._emit(
333
+ "model_called",
334
+ turn_id,
335
+ iteration=iteration,
336
+ history_size=len(prompt.input),
337
+ tool_count=len(prompt.tools),
338
+ )
339
+ try:
340
+ return await self._model_client.complete(
341
+ prompt,
342
+ lambda event: self._handle_model_stream_event(turn_id, event),
343
+ )
344
+ except Exception as exc:
345
+ context_usage = _usage_from_context_length_error(str(exc))
346
+ if context_usage is None or attempted_context_compact:
347
+ raise
348
+ attempted_context_compact = True
349
+ self._remember_token_usage(context_usage)
350
+ self._emit("token_count", turn_id, usage=context_usage)
351
+ await self._run_auto_compact(
352
+ turn_id,
353
+ phase="context_length_exceeded",
354
+ total_tokens=context_usage.get("total_tokens"),
355
+ token_limit=_context_length_error_token_limit(str(exc)),
356
+ prune_tool_results_on_context_error=True,
357
+ )
358
+ self._raise_if_interrupt_requested(turn_id, iteration)
359
+
360
+ async def _maybe_auto_compact(
361
+ self,
362
+ turn_id: 'str',
363
+ phase: 'str',
364
+ ) -> 'None':
365
+ limit = self._auto_compact_token_limit
366
+ total_tokens = self._last_total_usage_tokens
367
+ if limit is None or total_tokens is None:
368
+ return
369
+ if total_tokens < limit or not self._history:
370
+ return
371
+
372
+ await self._run_auto_compact(
373
+ turn_id,
374
+ phase=phase,
375
+ total_tokens=total_tokens,
376
+ token_limit=limit,
377
+ prune_tool_results_on_context_error=True,
378
+ )
379
+
380
+ async def _run_auto_compact(
381
+ self,
382
+ turn_id: 'str',
383
+ phase: 'str',
384
+ total_tokens: 'typing.Union[int, None]' = None,
385
+ token_limit: 'typing.Union[int, None]' = None,
386
+ prune_tool_results_on_context_error: 'bool' = False,
387
+ ) -> 'None':
388
+ from .utils.compactor import compact_agent_loop
389
+
390
+ payload: 'typing.Dict[str, object]' = {"phase": phase}
391
+ if total_tokens is not None:
392
+ payload["total_tokens"] = total_tokens
393
+ if token_limit is not None:
394
+ payload["token_limit"] = token_limit
395
+ self._emit(
396
+ "auto_compact_started",
397
+ turn_id,
398
+ **payload,
399
+ )
400
+
401
+ def handle_compact_stream_event(event: 'ModelStreamEvent') -> 'None':
402
+ if event.kind == "stream_error":
403
+ self._emit("stream_error", turn_id, **event.payload)
404
+
405
+ try:
406
+ compact_result = await compact_agent_loop(
407
+ self,
408
+ handle_compact_stream_event,
409
+ prune_tool_results_on_context_error,
410
+ )
411
+ except Exception as exc:
412
+ failed_payload = dict(payload)
413
+ failed_payload.update(
414
+ {
415
+ "error": str(exc),
416
+ "error_type": type(exc).__name__,
417
+ }
418
+ )
419
+ self._emit(
420
+ "auto_compact_failed",
421
+ turn_id,
422
+ **failed_payload,
423
+ )
424
+ raise
425
+
426
+ self._last_total_usage_tokens = None
427
+ if compact_result is None:
428
+ return
429
+ completed_payload = dict(payload)
430
+ completed_payload.update(
431
+ {
432
+ "original_item_count": compact_result.original_item_count,
433
+ "retained_item_count": compact_result.retained_item_count,
434
+ "summary": compact_result.display_text(),
435
+ }
436
+ )
437
+ if compact_result.pruned_tool_results:
438
+ completed_payload["pruned_tool_results"] = compact_result.pruned_tool_results
439
+ self._emit(
440
+ "auto_compact_completed",
441
+ turn_id,
442
+ **completed_payload,
443
+ )
444
+
299
445
  def _build_follow_up_messages(
300
446
  self,
301
447
  tool_results: 'typing.List[ToolResult]',
@@ -326,3 +472,38 @@ class AgentLoop:
326
472
  )
327
473
  )
328
474
  return follow_ups
475
+
476
+
477
+ def _usage_from_context_length_error(
478
+ message: 'str',
479
+ ) -> 'typing.Union[typing.Dict[str, int], None]':
480
+ lower = message.lower()
481
+ if (
482
+ "context_length_exceeded" not in lower
483
+ and "maximum context length" not in lower
484
+ ):
485
+ return None
486
+
487
+ requested_match = _REQUESTED_TOKENS_RE.search(message)
488
+ if requested_match is None:
489
+ return None
490
+
491
+ usage = {"total_tokens": _parse_token_count(requested_match.group(1))}
492
+ split_match = _REQUESTED_TOKEN_SPLIT_RE.search(message)
493
+ if split_match is not None:
494
+ usage["input_tokens"] = _parse_token_count(split_match.group(1))
495
+ usage["output_tokens"] = _parse_token_count(split_match.group(2))
496
+ else:
497
+ usage["input_tokens"] = usage["total_tokens"]
498
+ return usage
499
+
500
+
501
+ def _context_length_error_token_limit(message: 'str') -> 'typing.Union[int, None]':
502
+ limit_match = _MAX_CONTEXT_TOKENS_RE.search(message)
503
+ if limit_match is None:
504
+ return None
505
+ return _parse_token_count(limit_match.group(1))
506
+
507
+
508
+ def _parse_token_count(value: 'str') -> 'int':
509
+ return int(value.replace(",", ""))
@@ -381,7 +381,7 @@ def _build_model_client(
381
381
  timeout_seconds: 'float',
382
382
  managed_responses_base_url: 'typing.Union[str, None]' = None,
383
383
  vllm_endpoint: 'typing.Union[str, None]' = None,
384
- use_chat_completion: 'bool' = False,
384
+ use_chat_completion: 'typing.Union[bool, None]' = None,
385
385
  use_messages: 'bool' = False,
386
386
  ):
387
387
  load_codex_dotenv(config_path)
@@ -389,6 +389,8 @@ def _build_model_client(
389
389
  config_path,
390
390
  profile,
391
391
  )
392
+ if use_chat_completion is None:
393
+ use_chat_completion = bool(provider_config.use_chat_completion)
392
394
  if use_chat_completion and use_messages:
393
395
  raise ValueError("--use-chat-completion and --use-messages cannot be combined")
394
396
  if vllm_endpoint and use_messages:
@@ -592,6 +594,7 @@ async def run_interactive_session(
592
594
  compact_result = await compact_agent_loop(
593
595
  agent_loop,
594
596
  handle_compact_stream_event,
597
+ True,
595
598
  )
596
599
  if compact_result is None:
597
600
  view.write_line("Nothing to compact.")
@@ -782,7 +785,7 @@ async def run_cli(args: 'argparse.Namespace') -> 'int':
782
785
  args.profile,
783
786
  args.timeout_seconds,
784
787
  vllm_endpoint=args.vllm_endpoint,
785
- use_chat_completion=args.use_chat_completion,
788
+ use_chat_completion=args.use_chat_completion or None,
786
789
  use_messages=args.use_messages,
787
790
  )
788
791
  if phase_handle is not None:
@@ -78,6 +78,7 @@ class ContextConfig:
78
78
  project_doc_max_bytes: 'typing.Union[int, None]' = None
79
79
  model: 'typing.Union[str, None]' = None
80
80
  model_context_window: 'typing.Union[int, None]' = None
81
+ model_auto_compact_token_limit: 'typing.Union[int, None]' = None
81
82
  personality: 'typing.Union[str, None]' = None
82
83
  approval_policy: 'typing.Union[str, None]' = None
83
84
  sandbox_mode: 'typing.Union[str, None]' = None
@@ -120,6 +121,9 @@ class ContextConfig:
120
121
  project_doc_max_bytes=_normalize_int(selected.get("project_doc_max_bytes")),
121
122
  model=_normalize_text(selected.get("model")),
122
123
  model_context_window=_normalize_int(selected.get("model_context_window")),
124
+ model_auto_compact_token_limit=_normalize_int(
125
+ selected.get("model_auto_compact_token_limit")
126
+ ),
123
127
  personality=_normalize_text(selected.get("personality")),
124
128
  approval_policy=_normalize_text(selected.get("approval_policy")),
125
129
  sandbox_mode=_normalize_text(selected.get("sandbox_mode")),
@@ -268,6 +272,18 @@ class ContextManager:
268
272
  effective_percent = DEFAULT_EFFECTIVE_CONTEXT_WINDOW_PERCENT
269
273
  return context_window * max(effective_percent, 0) // 100
270
274
 
275
+ def resolve_auto_compact_token_limit(self) -> 'typing.Union[int, None]':
276
+ if self._config.model_auto_compact_token_limit is not None:
277
+ return self._config.model_auto_compact_token_limit
278
+
279
+ model_slug = self._config.model
280
+ if model_slug is None:
281
+ return None
282
+ model_metadata = _load_models_by_slug().get(model_slug)
283
+ if model_metadata is None:
284
+ return None
285
+ return _normalize_int(model_metadata.get("auto_compact_token_limit"))
286
+
271
287
  def _resolve_model_instructions(self) -> 'typing.Union[str, None]':
272
288
  model_slug = self._config.model
273
289
  if model_slug is None:
@@ -55,6 +55,7 @@ class ResponsesProviderConfig:
55
55
  provider_name: 'str'
56
56
  base_url: 'str'
57
57
  api_key_env: 'typing.Union[str, None]'
58
+ use_chat_completion: 'bool' = False
58
59
  wire_api: 'str' = "responses"
59
60
  query_params: 'typing.Dict[str, str]' = field(default_factory=dict)
60
61
  reasoning_effort: 'typing.Union[str, None]' = None
@@ -95,11 +96,21 @@ class ResponsesProviderConfig:
95
96
  beta_features: 'typing.List[str]' = []
96
97
  if isinstance(features, dict) and features.get("guardian_approval") is True:
97
98
  beta_features.append("guardian_approval")
99
+ use_chat_completion = _optional_bool(
100
+ selected.get("use_chat_completion")
101
+ )
102
+ if use_chat_completion is None:
103
+ use_chat_completion = _optional_bool(
104
+ provider.get("use_chat_completion")
105
+ )
106
+ if use_chat_completion is None:
107
+ use_chat_completion = False
98
108
  return cls(
99
109
  model=selected["model"],
100
110
  provider_name=provider_name,
101
111
  base_url=provider["base_url"],
102
112
  api_key_env=api_key_env,
113
+ use_chat_completion=use_chat_completion,
103
114
  wire_api=wire_api,
104
115
  query_params=query_params,
105
116
  reasoning_effort=selected.get("model_reasoning_effort"),
@@ -147,6 +158,19 @@ class ResponsesProviderConfig:
147
158
  return max(int(self.stream_idle_timeout_ms), 1) / 1000.0
148
159
 
149
160
 
161
+ def _optional_bool(value: 'typing.Union[bool, str, int, None]') -> 'typing.Union[bool, None]':
162
+ if value is None:
163
+ return None
164
+ if isinstance(value, bool):
165
+ return value
166
+ text = str(value).strip().lower()
167
+ if text in {"1", "true", "yes", "on"}:
168
+ return True
169
+ if text in {"0", "false", "no", "off"}:
170
+ return False
171
+ raise ValueError(f"invalid boolean config value: {value!r}")
172
+
173
+
150
174
  class ResponsesApiError(RuntimeError):
151
175
  pass
152
176
 
@@ -263,6 +287,8 @@ class ResponsesModelClient:
263
287
  event_handler,
264
288
  )
265
289
  except ResponsesRetryableError as exc:
290
+ if _is_context_length_error_message(str(exc)):
291
+ raise ResponsesApiError(str(exc)) from exc
266
292
  if retries >= max_retries:
267
293
  raise
268
294
  retries += 1
@@ -756,11 +782,14 @@ class ResponsesModelClient:
756
782
  )
757
783
 
758
784
  message = str(error.get("message") or "responses stream failed")
759
- code = str(error.get("code") or "").strip()
785
+ code = str(error.get("code") or error.get("type") or "").strip()
786
+ if _is_context_length_error_message(message):
787
+ raise ResponsesApiError(self._format_response_failed_error(message))
760
788
  if code in {
761
789
  "context_length_exceeded",
762
790
  "insufficient_quota",
763
791
  "invalid_prompt",
792
+ "model_output_invalid",
764
793
  "usage_not_included",
765
794
  }:
766
795
  raise ResponsesApiError(self._format_response_failed_error(message))
@@ -864,6 +893,14 @@ def _optional_int(value: 'object') -> 'typing.Union[int, None]':
864
893
  return int(value)
865
894
 
866
895
 
896
+ def _is_context_length_error_message(message: 'str') -> 'bool':
897
+ lower = message.lower()
898
+ return (
899
+ "context_length_exceeded" in lower
900
+ or "maximum context length" in lower
901
+ )
902
+
903
+
867
904
  def _requests_verify_setting() -> 'typing.Union[typing.Union[str, bool], None]':
868
905
  for env_name in ("REQUESTS_CA_BUNDLE", "CURL_CA_BUNDLE", "SSL_CERT_FILE"):
869
906
  value = os.environ.get(env_name, "").strip()