python-codex 0.1.13__tar.gz → 0.1.14__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. {python_codex-0.1.13 → python_codex-0.1.14}/AGENTS.md +7 -0
  2. {python_codex-0.1.13 → python_codex-0.1.14}/PKG-INFO +4 -1
  3. {python_codex-0.1.13 → python_codex-0.1.14}/README.md +3 -0
  4. {python_codex-0.1.13 → python_codex-0.1.14}/docs/ALIGNMENT.md +175 -56
  5. {python_codex-0.1.13 → python_codex-0.1.14}/docs/CONTEXT.md +3 -3
  6. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/agent.py +77 -12
  7. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/cli.py +13 -356
  8. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/feishu_card.py +76 -30
  9. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/feishu_link.py +131 -11
  10. python_codex-0.1.14/pycodex/interactive_session.py +397 -0
  11. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/model.py +1 -19
  12. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/protocol.py +0 -5
  13. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/runtime.py +13 -0
  14. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/runtime_services.py +2 -2
  15. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/tools/agent_tool_schemas.py +1 -1
  16. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/tools/apply_patch_tool.py +1 -1
  17. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/tools/base_tool.py +1 -27
  18. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/tools/close_agent_tool.py +11 -4
  19. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/tools/exec_command_tool.py +40 -16
  20. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/tools/exec_tool.py +18 -2
  21. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/tools/grep_files_tool.py +19 -6
  22. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/tools/ipython_tool.py +3 -2
  23. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/tools/list_dir_tool.py +19 -6
  24. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/tools/read_file_tool.py +39 -9
  25. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/tools/request_permissions_tool.py +12 -1
  26. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/tools/request_user_input_tool.py +28 -1
  27. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/tools/send_input_tool.py +4 -2
  28. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/tools/shell_command_tool.py +23 -6
  29. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/tools/shell_tool.py +13 -4
  30. python_codex-0.1.14/pycodex/tools/spawn_agent_tool.py +119 -0
  31. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/tools/unified_exec_manager.py +42 -1
  32. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/tools/update_plan_tool.py +14 -6
  33. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/tools/view_image_tool.py +17 -16
  34. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/tools/wait_agent_tool.py +15 -3
  35. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/tools/wait_tool.py +18 -4
  36. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/tools/web_search_tool.py +2 -1
  37. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/tools/write_stdin_tool.py +42 -10
  38. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/utils/compactor.py +7 -1
  39. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/utils/visualize.py +34 -15
  40. {python_codex-0.1.13 → python_codex-0.1.14}/pyproject.toml +4 -2
  41. {python_codex-0.1.13 → python_codex-0.1.14}/tests/TESTS.md +4 -4
  42. {python_codex-0.1.13 → python_codex-0.1.14}/tests/test_agent.py +218 -2
  43. {python_codex-0.1.13 → python_codex-0.1.14}/tests/test_builtin_tools.py +235 -4
  44. {python_codex-0.1.13 → python_codex-0.1.14}/tests/test_cli.py +290 -16
  45. {python_codex-0.1.13 → python_codex-0.1.14}/tests/test_compactor.py +21 -0
  46. {python_codex-0.1.13 → python_codex-0.1.14}/tests/test_feishu_card.py +48 -4
  47. python_codex-0.1.14/tests/test_feishu_link.py +208 -0
  48. {python_codex-0.1.13 → python_codex-0.1.14}/tests/test_model.py +36 -13
  49. {python_codex-0.1.13 → python_codex-0.1.14}/tests/test_py36_syntax.py +1 -1
  50. python_codex-0.1.14/tests/test_workspace_server.py +781 -0
  51. python_codex-0.1.14/workspace_server/__init__.py +21 -0
  52. python_codex-0.1.14/workspace_server/__main__.py +5 -0
  53. python_codex-0.1.14/workspace_server/app.py +983 -0
  54. python_codex-0.1.14/workspace_server/workspace.html +790 -0
  55. python_codex-0.1.13/pycodex/prompts/exec_tools.json +0 -411
  56. python_codex-0.1.13/pycodex/prompts/subagent_tools.json +0 -163
  57. python_codex-0.1.13/pycodex/tools/spawn_agent_tool.py +0 -96
  58. python_codex-0.1.13/tests/test_feishu_link.py +0 -25
  59. {python_codex-0.1.13 → python_codex-0.1.14}/.github/workflows/publish.yml +0 -0
  60. {python_codex-0.1.13 → python_codex-0.1.14}/.github/workflows/test.yml +0 -0
  61. {python_codex-0.1.13 → python_codex-0.1.14}/.gitignore +0 -0
  62. {python_codex-0.1.13 → python_codex-0.1.14}/LICENSE +0 -0
  63. {python_codex-0.1.13 → python_codex-0.1.14}/README_ZH.md +0 -0
  64. {python_codex-0.1.13 → python_codex-0.1.14}/docs/responses_server/README.md +0 -0
  65. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/__init__.py +0 -0
  66. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/collaboration.py +0 -0
  67. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/compat.py +0 -0
  68. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/context.py +0 -0
  69. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/doctor.py +0 -0
  70. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/portable.py +0 -0
  71. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/portable_server.py +0 -0
  72. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/prompts/collaboration_default.md +0 -0
  73. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/prompts/collaboration_plan.md +0 -0
  74. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/prompts/default_base_instructions.md +0 -0
  75. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/prompts/models.json +0 -0
  76. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/prompts/permissions/approval_policy/never.md +0 -0
  77. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/prompts/permissions/approval_policy/on_failure.md +0 -0
  78. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/prompts/permissions/approval_policy/on_request.md +0 -0
  79. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/prompts/permissions/approval_policy/on_request_rule_request_permission.md +0 -0
  80. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/prompts/permissions/approval_policy/unless_trusted.md +0 -0
  81. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/prompts/permissions/sandbox_mode/danger_full_access.md +0 -0
  82. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/prompts/permissions/sandbox_mode/read_only.md +0 -0
  83. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/prompts/permissions/sandbox_mode/workspace_write.md +0 -0
  84. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/tools/__init__.py +0 -0
  85. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/tools/code_mode_manager.py +0 -0
  86. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/tools/exec_runtime.js +0 -0
  87. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/tools/resume_agent_tool.py +0 -0
  88. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/utils/__init__.py +0 -0
  89. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/utils/async_bridge.py +0 -0
  90. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/utils/debug.py +0 -0
  91. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/utils/dotenv.py +0 -0
  92. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/utils/get_env.py +0 -0
  93. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/utils/random_ids.py +0 -0
  94. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/utils/session_persist.py +0 -0
  95. {python_codex-0.1.13 → python_codex-0.1.14}/pycodex/utils/toolcall_visualize.py +0 -0
  96. {python_codex-0.1.13 → python_codex-0.1.14}/responses_server/__init__.py +0 -0
  97. {python_codex-0.1.13 → python_codex-0.1.14}/responses_server/__main__.py +0 -0
  98. {python_codex-0.1.13 → python_codex-0.1.14}/responses_server/app.py +0 -0
  99. {python_codex-0.1.13 → python_codex-0.1.14}/responses_server/config.py +0 -0
  100. {python_codex-0.1.13 → python_codex-0.1.14}/responses_server/messages_api.py +0 -0
  101. {python_codex-0.1.13 → python_codex-0.1.14}/responses_server/payload_processors.py +0 -0
  102. {python_codex-0.1.13 → python_codex-0.1.14}/responses_server/server.py +0 -0
  103. {python_codex-0.1.13 → python_codex-0.1.14}/responses_server/session_store.py +0 -0
  104. {python_codex-0.1.13 → python_codex-0.1.14}/responses_server/stream_router.py +0 -0
  105. {python_codex-0.1.13 → python_codex-0.1.14}/responses_server/tools/__init__.py +0 -0
  106. {python_codex-0.1.13 → python_codex-0.1.14}/responses_server/tools/custom_adapter.py +0 -0
  107. {python_codex-0.1.13 → python_codex-0.1.14}/responses_server/tools/web_search.py +0 -0
  108. {python_codex-0.1.13 → python_codex-0.1.14}/responses_server/trajectory_dump.py +0 -0
  109. {python_codex-0.1.13 → python_codex-0.1.14}/tests/__init__.py +0 -0
  110. {python_codex-0.1.13 → python_codex-0.1.14}/tests/compare_request_user_input_roundtrip.py +0 -0
  111. {python_codex-0.1.13 → python_codex-0.1.14}/tests/compare_steer_request_bodies.py +0 -0
  112. {python_codex-0.1.13 → python_codex-0.1.14}/tests/compare_tool_schemas.py +0 -0
  113. {python_codex-0.1.13 → python_codex-0.1.14}/tests/fake_responses_server.py +0 -0
  114. {python_codex-0.1.13 → python_codex-0.1.14}/tests/fakes.py +0 -0
  115. {python_codex-0.1.13 → python_codex-0.1.14}/tests/responses_server/fake_chat_completions_server.py +0 -0
  116. {python_codex-0.1.13 → python_codex-0.1.14}/tests/responses_server/test_server.py +0 -0
  117. {python_codex-0.1.13 → python_codex-0.1.14}/tests/test_async_bridge.py +0 -0
  118. {python_codex-0.1.13 → python_codex-0.1.14}/tests/test_context.py +0 -0
  119. {python_codex-0.1.13 → python_codex-0.1.14}/tests/test_doctor.py +0 -0
  120. {python_codex-0.1.13 → python_codex-0.1.14}/tests/test_fake_responses_server.py +0 -0
  121. {python_codex-0.1.13 → python_codex-0.1.14}/tests/test_ipython_tool.py +0 -0
  122. {python_codex-0.1.13 → python_codex-0.1.14}/tests/test_portable.py +0 -0
  123. {python_codex-0.1.13 → python_codex-0.1.14}/tests/test_visualize.py +0 -0
  124. {python_codex-0.1.13 → python_codex-0.1.14}/tools/feishu_oauth.py +0 -0
@@ -18,7 +18,9 @@
18
18
  - 真实 vLLM `0.19.0` 的 `/v1/messages` 会对缺失 `max_tokens` 直接返回 `400`;messages 适配层必须总是补这个字段。当前约定是优先透传请求里的 `max_output_tokens`/`max_tokens`,否则回退到默认 `32000`。
19
19
  - 对 vLLM chat-completions 打开 `return_token_ids=true` 时,streaming `prompt_token_ids` 只出现在首个 chunk,后续每个 chunk 的 `choices[*].token_ids` 都是 decode delta;要在 `responses_server` 侧导出 trajectory 时,按“首个 `prompt_token_ids` + 按序拼接所有 chunk 的 `token_ids`”重建即可。
20
20
  - `pycodex` 默认是最小交互 CLI;无 prompt 时进入 REPL,并通过 `AgentRuntime` 跑外层提交循环。当前会显示最小事件流、assistant 流式输出、简单 title/history(`/title`, `/history`),并默认注册一组与原版一一对应的本地工具子集。
21
+ - Web workspace lives in the standalone `workspace_server/` package and is launched with `pycodex-ws --listen <host:port> --board <html>`, not through `pycodex` CLI dispatch. CLI and web share `pycodex.interactive_session.run_interactive_session`; slash-command semantics such as `/resume`, `/compact`, `/model`, and `/link` belong to that shared interactive shell loop, while workspace only supplies a web view/input adapter and tab/session lifecycle.
21
22
  - 交互 CLI 的事件流展示优先表达用户可感知的阶段(例如工具开始/完成、模型回看工具结果),不要直接把内部 `iteration` 计数暴露成主要状态文案;`iterations` 应继续保留在 `TurnResult` 等程序化结果里。
23
+ - 在交互 CLI 里,`stream_error` 表示当前 Responses stream attempt 失败且模型客户端可能马上自动重试;不要在这个事件上 `finish_stream()` 输出当前 assistant delta buffer,否则第一次失败 attempt 的文本和重试成功后的最终回复会重复显示。真正 fatal 的失败仍由 `turn_failed` 走通用 flush,保留 partial 输出。
22
24
  - prompt/context 相关逻辑统一放在 `pycodex/context.py`:`AgentLoop` 只维护真实会话历史;每轮请求前由 `ContextManager` 注入 base instructions、developer message、`AGENTS.md` 指令和 `<environment_context>`,且这些注入项不写回 history。
23
25
  - 对需要 model-specific prompt 的本地 model slug,直接在 vendored `pycodex/prompts/models.json` 补条目;当前 `step-3.5-flash` / `step-3.5-flash-2603` / `step-3.6` 已按这个方式接入。
24
26
  - 交互 REPL 的 context 用量提示也应尽量贴近上游语义:展示“剩余 context 百分比”而不是原始 token 数;计算时按上游同款 `BASELINE_TOKENS=12000` 做归一化,并在模型元数据只有 `context_window` 时默认按 `95%` effective window 处理。只要当前模型能解析出 context window,初始 prompt 就先显示 `100%`,等首个 usage 回来后再刷新成真实值。
@@ -58,4 +60,9 @@
58
60
  - `pycodex` 本地 session 保存现在也按上游思路走:新 session 一开始就分配稳定的 uuidv7 thread/session id,并把历史增量追加到 `CODEX_HOME/sessions/.../rollout-*.jsonl`;`/resume` 列表应只展示至少有真实 user message 的 rollout,避免空白新 session 污染恢复列表。
59
61
  - auto-compact 对齐上游配置名 `model_auto_compact_token_limit`;为空时关闭,触发依据是最近一次模型上报的 `usage.total_tokens`,pre-turn 压缩上一轮历史,mid-turn 压缩工具 follow-up 前的当前历史,并继续复用现有 compacted rollout 记录。
60
62
  - Responses streaming 里的 `response.incomplete` 不是连接断开:不要让 `ResponsesModelClient` 把它当 retryable incomplete stream 反复重连。普通 turn 应明确报 `response.incomplete`;compact 请求如果已经收到 assistant partial summary,可以用这个 partial summary 完成 replacement history,避免 midturn auto-compact 卡在 5 次 retry。
63
+ - 上游 Codex Responses 请求当前不传模型级 `max_output_tokens`,也没有读取 `model_max_output_tokens` 这个 config key;这个名字在上游主要用于工具输出截断,不要为了上游对齐把它加进模型请求。
64
+ - 普通 turn 遇到 `ResponsesIncompleteError(reason="max_output_tokens")` 时,上游语义是保留异常前已经收到的 `response.output_item.done`;pycodex 因为模型客户端按整轮返回,需要在异常路径把这些 done assistant/reasoning items 写入 history 和 rollout,才能让用户下一句 `continue` 接上。不要把纯 `response.output_text.delta` 合成 history,也不要持久化没有 tool result 的 tool call。
61
65
  - Feishu card tests read `~/.codex/.feishu_refresh_token` through production code; when running `tests/test_feishu_card.py` locally, isolate HOME (for example `HOME=/tmp/pycodex-empty-home env -u VIRTUAL_ENV uv run pytest tests/test_feishu_card.py tests/test_feishu_link.py`) unless the test itself controls `HOME`.
66
+ - `lark_oapi.ws.client` creates a module-level asyncio `loop` at import time and `Client.start()` always uses that global. For `/link` long-connection listeners, bind that SDK global to a listener-thread-owned loop before constructing the client, and stop it through private `_disconnect()` plus `loop.stop()` on `/unlink`; otherwise unlink/link can reuse a still-running SDK loop and fail with `RuntimeError: This event loop is already running`.
67
+ - `exec_command` background completion auto-resume is intentionally Agent-idle-only: when a session exits, it may call `Agent.maybe_invoke(...)` and start a synthetic `<exec_command_completed>` turn only if that Agent is not already running a turn. Do not enqueue/cache these events in `CliSubmissionQueue`; direct Agent/IPython use should share the same Agent-level hook.
68
+ - The tool description JSON fallbacks (`pycodex/prompts/exec_tools.json` and `pycodex/prompts/subagent_tools.json`) were deleted after moving schemas into class-level `BaseTool` specs. `ToolSpec.serialize()` intentionally skips function-tool `output_schema`, matching upstream `ResponsesApiTool.output_schema #[serde(skip)]`; keep output schemas as local metadata only unless upstream wire format changes.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: python-codex
3
- Version: 0.1.13
3
+ Version: 0.1.14
4
4
  Summary: A minimal Python extraction of Codex's main agent loop
5
5
  License-File: LICENSE
6
6
  Requires-Python: >=3.6.2
@@ -165,12 +165,15 @@ pycodex --put @127.0.0.1:5577
165
165
  pycodex --put /data/.codex/@127.0.0.1:5577
166
166
  pycodex --call SECRET-CALLID@127.0.0.1:5577 "Reply with exactly OK."
167
167
  pycodex doctor
168
+ pycodex-ws --listen 0.0.0.0:6007 --board ./board.html
168
169
  ```
169
170
 
170
171
  Current behavior:
171
172
 
172
173
  - with no argv prompt and a TTY stdin, enter interactive mode
173
174
  - with an argv prompt or piped stdin, run a single turn
175
+ - `pycodex-ws` starts the standalone browser workspace with a board pane and a
176
+ pycodex session pane
174
177
  - interactive mode supports `/exit` and `/quit`
175
178
  - interactive mode shows a compact event stream for user-visible phases such as
176
179
  tool execution and model follow-up after tool results
@@ -144,12 +144,15 @@ pycodex --put @127.0.0.1:5577
144
144
  pycodex --put /data/.codex/@127.0.0.1:5577
145
145
  pycodex --call SECRET-CALLID@127.0.0.1:5577 "Reply with exactly OK."
146
146
  pycodex doctor
147
+ pycodex-ws --listen 0.0.0.0:6007 --board ./board.html
147
148
  ```
148
149
 
149
150
  Current behavior:
150
151
 
151
152
  - with no argv prompt and a TTY stdin, enter interactive mode
152
153
  - with an argv prompt or piped stdin, run a single turn
154
+ - `pycodex-ws` starts the standalone browser workspace with a board pane and a
155
+ pycodex session pane
153
156
  - interactive mode supports `/exit` and `/quit`
154
157
  - interactive mode shows a compact event stream for user-visible phases such as
155
158
  tool execution and model follow-up after tool results
@@ -1,17 +1,23 @@
1
1
  # Alignment
2
2
 
3
- This document records the current prompt/context alignment work between
4
- `pycodex` and upstream Codex from `https://github.com/openai/codex`.
3
+ This document records the current alignment work between `pycodex` and upstream
4
+ Codex from `https://github.com/openai/codex`: prompt/context assembly,
5
+ model-visible tool schemas, and observed tool round-trip behavior.
5
6
 
6
7
  ## Scope
7
8
 
8
- The comparison in this pass focuses on the model-visible prompt assembly:
9
+ The original comparison pass focused on the model-visible prompt assembly:
9
10
 
10
11
  - `instructions`
11
12
  - `input` items
12
13
  - developer/contextual user message shape
13
14
  - `AGENTS.md` / environment-context injection
14
15
 
16
+ The current document also tracks tool alignment at two different layers:
17
+
18
+ - request-visible payloads captured from real outbound `/responses` requests
19
+ - class-level `BaseTool` descriptions, schemas, and runtime result shapes
20
+
15
21
  It does not claim full request parity for every runtime mode yet.
16
22
 
17
23
  ## Comparison method
@@ -37,10 +43,47 @@ The repository copy of that helper server now lives at
37
43
  The temporary capture artifacts used during debugging are intentionally not part
38
44
  of the repository contract and are not documented here as stable project files.
39
45
 
46
+ Tool-specific status uses two inputs:
47
+
48
+ - proxy captures of actual upstream Codex and `pycodex` requests/results
49
+ - source inspection against the latest upstream tool specifications
50
+
51
+ ## Status checkpoint (2026-06-23)
52
+
53
+ - Latest upstream source checked: `openai/codex` `83c4934`
54
+ (`2026-06-23 00:31:56 -0700`, `Remove redundant Codex Apps cache guard`).
55
+ - Live request captures in this pass used installed `codex-cli 0.138.0`.
56
+ - Prompt/context parity remains aligned for the compared non-interactive `exec`
57
+ path and the captured default two-turn `codex-tui` path, modulo dynamic ids.
58
+ - The raw JSON tool fallback files have been deleted:
59
+ `pycodex/prompts/exec_tools.json` and
60
+ `pycodex/prompts/subagent_tools.json`.
61
+ - Class-level descriptions/schemas/runtime result shapes have now been refreshed
62
+ across the default local tool set, not only the tools previously hidden by the
63
+ JSON fallback.
64
+ - Request-visible tool serialization now comes from class-level `BaseTool`
65
+ specs. Function-tool `output_schema` remains available as local metadata but
66
+ is intentionally not serialized into `/responses` requests, matching upstream
67
+ `ResponsesApiTool.output_schema #[serde(skip)]`.
68
+ - Latest upstream-facing fixes included in the class/runtime layer:
69
+ `request_user_input.autoResolutionMs` with `[60000, 240000]` clamping,
70
+ `view_image.detail` (`high` default, `original` opt-in), `close_agent`
71
+ returning `previous_status`, and `spawn_agent` guidance that spawned agents
72
+ inherit the current model by default.
73
+ - Post-delete proxy compare:
74
+ `uv run python tests/compare_tool_schemas.py --root .tmp/tool_schema_proxy_compare_after_fallback_delete_2 --timeout-seconds 240`.
75
+ The request-visible payloads are now equal for `write_stdin`, `web_search`,
76
+ `update_plan`, `apply_patch`, and `view_image` on the captured default path.
77
+ `exec_command` intentionally omits upstream approval/sandbox parameters
78
+ (`sandbox_permissions`, `justification`, `prefix_rule`) because pycodex skips
79
+ that authorization path by design; its description also tells the agent that
80
+ it can reply first for long tasks and will be invoked to continue when the
81
+ task finishes.
82
+
40
83
  ## Result
41
84
 
42
- As of this snapshot, prompt/context parity is achieved for the non-interactive
43
- `exec` comparison:
85
+ As of this snapshot, prompt/context parity is achieved for the compared
86
+ non-interactive `exec` comparison:
44
87
 
45
88
  - `instructions` match exactly
46
89
  - `input` match exactly
@@ -48,10 +91,19 @@ As of this snapshot, prompt/context parity is achieved for the non-interactive
48
91
  In other words, the model-visible prompt dump for `pycodex` and upstream Codex
49
92
  is currently identical for this comparison scenario.
50
93
 
94
+ Tool alignment is also materially improved: all default local tool classes have
95
+ been reviewed/refreshed against the latest upstream-facing specs where a current
96
+ upstream builtin exists. The request-visible payload for the compared official
97
+ tools now comes from class-level specs rather than vendored JSON snapshots.
98
+
51
99
  ## Current non-prompt status
52
100
 
53
101
  After prompt/context parity, the next comparison layer is the full outbound
54
- request shape. That work is in progress.
102
+ request shape. That work is still layered:
103
+
104
+ - request-visible parity for captured paths
105
+ - class-level tool spec/runtime parity after deleting the JSON fallback
106
+ - broader runtime parity for uncaptured modes
55
107
 
56
108
  At the time of writing:
57
109
 
@@ -69,7 +121,10 @@ At the time of writing:
69
121
  - transport/header parity is now aligned for the compared path, including the
70
122
  sub-agent `x-openai-subagent` header and the observed `workspaces` omission
71
123
  on later sub-agent turns
72
- - tool schema parity is aligned for the compared exec-mode tool subset
124
+ - request-visible tool schema parity is aligned for the compared exec-mode and
125
+ default TUI captured paths where upstream still exposes the same tool names
126
+ - class-level tool descriptions, input schemas, output schemas, and notable
127
+ runtime result shapes have been refreshed across the default local tool set
73
128
 
74
129
  The current implementation already matches:
75
130
 
@@ -80,7 +135,8 @@ The current implementation already matches:
80
135
  - session-scoped request id headers
81
136
  - turn metadata header shape (`turn_id` + `sandbox`)
82
137
  - mode-aware `originator` header
83
- - exact exec-mode tool schema payloads via vendored snapshot at the tool layer
138
+ - exact exec-mode tool schema payloads on the compared path, now generated from
139
+ class-level tool specs rather than vendored JSON snapshots
84
140
  - `User-Agent` string for the compared non-interactive path
85
141
 
86
142
  The main remaining deltas are now outside the prompt dump itself:
@@ -88,6 +144,9 @@ The main remaining deltas are now outside the prompt dump itself:
88
144
  - dynamic run-specific values such as generated session ids and turn ids
89
145
  - behavior outside the compared non-interactive `exec` path and the captured
90
146
  default two-turn TUI path, especially other runtime modes not yet captured
147
+ - upstream's current default-path migration to `tool_search` / deferred
148
+ multi-agent tools and goal tools, while `pycodex` still exposes the legacy
149
+ flat sub-agent tools on the first request
91
150
 
92
151
  ## Proxy tool-schema compare
93
152
 
@@ -102,21 +161,45 @@ The main remaining deltas are now outside the prompt dump itself:
102
161
  - 从 `tests/TESTS.md` 的真实 smoke tool 表读取工具顺序
103
162
  - 逐个比较这条被抓到的 request path 里真正暴露给模型的 tool schema
104
163
 
105
- 在当前默认 CLI non-exec / `codex-tui` 这条被抓到的路径上,已经确认 schema
106
- 一致的工具有:
164
+ 注意:这项比较验证的是“这条 request 上模型实际看到的 payload”。raw JSON fallback
165
+ 删除后,这项比较已经能证明当前被抓路径里的 official tool payload 来自类内
166
+ `BaseTool` spec。
167
+
168
+ 删除 fallback 后的最新结果:
169
+
170
+ - command:
171
+ `env -u VIRTUAL_ENV uv run python tests/compare_tool_schemas.py --root .tmp/tool_schema_proxy_compare_after_fallback_delete_2 --timeout-seconds 240`
172
+ - upstream request:
173
+ `.tmp/tool_schema_proxy_compare_after_fallback_delete_2/upstream/008_POST_v1_responses.json`
174
+ - `pycodex` request:
175
+ `.tmp/tool_schema_proxy_compare_after_fallback_delete_2/pycodex/001_POST_v1_responses.json`
176
+ - comparison:
177
+ `.tmp/tool_schema_proxy_compare_after_fallback_delete_2/comparison.json`
178
+
179
+ 在当前默认 CLI non-exec / `codex-tui` 这条被抓到的路径上,已经确认 request-visible
180
+ schema 一致的工具有:
107
181
 
108
- - `exec_command`
109
182
  - `write_stdin`
110
183
  - `update_plan`
111
- - `request_user_input`
112
184
  - `apply_patch`
113
185
  - `web_search`
114
186
  - `view_image`
115
- - `spawn_agent`
116
- - `send_input`
117
- - `resume_agent`
118
- - `wait_agent`
119
- - `close_agent`
187
+
188
+ 仍需分层解释的差异:
189
+
190
+ - `exec_command`:`pycodex` 刻意不暴露 upstream 的
191
+ `sandbox_permissions` / `justification` / `prefix_rule`,因为当前实现明确跳过
192
+ approval/sandbox escalation 逻辑;description 还额外提示本地差异:长任务可以先回复
193
+ 用户,任务完成时 agent 会被 invoke 来继续处理;其余参数和运行时默认/范围约束按类内
194
+ schema 对齐。
195
+ - `request_user_input`:`pycodex` 按 upstream source main 建模,带
196
+ `autoResolutionMs`;installed `codex-cli 0.138.0` 的 live capture 仍未带该字段。
197
+ - `spawn_agent` / `send_input` / `resume_agent` / `wait_agent` /
198
+ `close_agent`:upstream 当前首轮 request 不再平铺暴露这些工具,而是暴露
199
+ `tool_search`,并由 deferred discovery 加载 Multi-agent tools。`pycodex` 仍在首轮
200
+ request 平铺暴露 legacy sub-agent tools。
201
+ - upstream 当前还额外暴露 `get_goal` / `create_goal` / `update_goal`;`pycodex`
202
+ 尚未实现 goal tools。
120
203
 
121
204
  同一条被抓到的路径下,当前 upstream Codex 和 `pycodex` 都没有暴露这些工具:
122
205
 
@@ -131,6 +214,7 @@ The main remaining deltas are now outside the prompt dump itself:
131
214
 
132
215
  这里的结论只针对当前被抓到的默认 `codex-tui` request path;它不等价于说这些
133
216
  工具在上游全局不存在,只说明这次实际 context capture 没把它们带进首轮请求。
217
+ 这些工具的类内 description/schema 状态见下面的 per-tool 表。
134
218
 
135
219
  ## Tool-call / tool-result schema compare
136
220
 
@@ -185,6 +269,8 @@ The main remaining deltas are now outside the prompt dump itself:
185
269
  - Plan-mode happy path 现在也已按 upstream 源码建模:handler 会要求每个问题都带
186
270
  非空 `options`、自动给每个问题补 `isOther=true`,并把结构化答案序列化成
187
271
  JSON 字符串回传到下一轮 `function_call_output.output`,同时补 `success=true`
272
+ - 类内 schema 已补齐 upstream 最新的 `autoResolutionMs` 字段;runtime 会把非空值
273
+ clamp 到 `[60000, 240000]` 后交给交互层
188
274
  - 当前仓库已经新增 deterministic proxy compare 脚本
189
275
  `uv run python tests/compare_request_user_input_roundtrip.py`
190
276
  - 该脚本会用同一套固定 origin SSE + proxy capture,同步比较 upstream Codex
@@ -200,7 +286,9 @@ The main remaining deltas are now outside the prompt dump itself:
200
286
  - `function_call` item schema 一致
201
287
  - 下一轮里的 `function_call_output` schema 一致
202
288
  - 当前样本里,两边都会把 tool result 回传成同一个 `input_image` 列表,
203
- `image_url` data URL 也一致;当前抓到的默认样本没有显式 `detail`
289
+ `image_url` data URL 也一致
290
+ - 类内 schema/runtime 已补齐 upstream 最新的 `detail` 参数:省略时按 `high`
291
+ 返回,显式 `original` 时保留并回传到 `input_image.detail`
204
292
  - `spawn_agent`
205
293
  - 当前先补齐了一个最小 validation-path:当模型在没有 `message` / `items` 的情况下
206
294
  强制调用 `spawn_agent` 时,upstream Codex 和 `pycodex` 现在都会回传同一个固定错误:
@@ -209,6 +297,8 @@ The main remaining deltas are now outside the prompt dump itself:
209
297
  `agent_id` / `nickname`
210
298
  - 当前 `pycodex` 也已经改成 uuid7 agent id,并接上了与 upstream 同一批候选名的
211
299
  默认昵称池;剩余差异主要只在具体抽到哪个昵称这类动态值
300
+ - 类内 description 已刷新到 upstream 最新方向:spawned agents 默认继承当前模型,
301
+ 不再在 tool desc 里硬编码模型 picker 列表
212
302
  - `send_input`
213
303
  - `function_call` item schema 一致
214
304
  - 下一轮里的 `function_call_output` schema 一致
@@ -221,8 +311,8 @@ The main remaining deltas are now outside the prompt dump itself:
221
311
  - `close_agent`
222
312
  - `function_call` item schema 一致
223
313
  - 下一轮里的 `function_call_output` schema 一致
224
- - 当前仓库已把返回键名从 `previous_status` 改成 `status`,与 upstream 当前 happy
225
- path 对齐
314
+ - upstream 最新源码里的输出键名是 `previous_status`;当前仓库的 schema/runtime
315
+ 回到 `previous_status`
226
316
  - `resume_agent`
227
317
  - 真实 happy path 已补抓:子 agent 完成、`close_agent`、`resume_agent`、再
228
318
  `send_input` 的完整链路现在已经对齐
@@ -236,8 +326,8 @@ The main remaining deltas are now outside the prompt dump itself:
236
326
  - request body 里的 `prompt_cache_key` 现在也改成和 upstream 一样:
237
327
  parent thread 维持自己的稳定 session id,而 sub-agent thread 则改用
238
328
  `agent_id` 本身,不再错误复用 parent 的 cache key
239
- - 这 6 个 sub-agent tool schema 现在也已经固化到
240
- `pycodex/prompts/subagent_tools.json`,并由测试逐字节锁定
329
+ - 这 6 个 sub-agent tool schema 现在来自类内 `BaseTool` spec,并由 CLI
330
+ serialization 测试覆盖;`pycodex/prompts/subagent_tools.json` 已删除
241
331
  - `sub-agent notification`
242
332
  - 在 `wait_agent` 之后,upstream 会向 parent thread history 额外注入一条
243
333
  `user` message:
@@ -271,30 +361,37 @@ The main remaining deltas are now outside the prompt dump itself:
271
361
  - `not exposed`:在当前默认 `codex-tui` 首轮 request path 下两边都没把这个工具带进 `tools`
272
362
  - `first-request same`:首轮 `tools` schema 已确认一致
273
363
  - `round-trip same`:真实触发后的 `tool_call` / `tool_result` 外层 schema 已确认一致
364
+ - `class aligned`:类内 description/schema/runtime 已按当前 upstream-facing spec 刷新,
365
+ 且不再依赖 raw JSON fallback
366
+ - `local shim`:本地工具有实现和 smoke 覆盖,但当前 upstream 默认 CLI 抓包没有同名
367
+ official model-visible tool 可直接逐字节对齐
368
+ - `legacy-flat mismatch`:`pycodex` 首轮仍直接暴露 legacy flat tool;upstream 首轮
369
+ 已迁移到 `tool_search` / deferred discovery
274
370
  - `pending`:这条工具链还没有补完真实触发对比
275
371
 
276
- | tool | current status | note |
277
- |---|---|---|
278
- | `shell` | `not exposed` | 当前默认 `codex-tui` 首轮路径不带这个工具 |
279
- | `shell_command` | `not exposed` | 当前默认 `codex-tui` 首轮路径不带这个工具 |
280
- | `exec_command` | `round-trip same` | `function_call` / `function_call_output` 外层 shape 一致;默认 `10_000` token 截断和未读输出 `1 MiB` head/tail cap 也已补齐,仅剩动态值差异 |
281
- | `write_stdin` | `round-trip same` | `function_call` / `function_call_output` 外层 shape 一致;默认 `10_000` token 截断和未读输出 `1 MiB` head/tail cap 也已补齐,仅剩动态值差异 |
282
- | `exec` | `not exposed` | 当前默认 `codex-tui` 首轮路径不带这个工具 |
283
- | `wait` | `not exposed` | 当前默认 `codex-tui` 首轮路径不带这个工具 |
284
- | `web_search` | `round-trip same` | `web_search_call` shape 一致;provider-native tool 无单独客户端 `tool_result` |
285
- | `update_plan` | `round-trip same` | `function_call` / `function_call_output` 外层 shape 一致 |
286
- | `request_user_input` | `round-trip same (Default mode); Plan mode delta` | Default-mode unavailable 路径已 capture 对齐;Plan-mode deterministic proxy compare 已补做:本机 installed `codex-cli 0.115.0` live capture 里,`function_call` 已一致,`function_call_output` 仅差 `pycodex` 多带 `success=true` |
287
- | `request_permissions` | `not exposed` | 当前默认 `codex-tui` 首轮路径不带这个工具 |
288
- | `apply_patch` | `round-trip same` | `custom_tool_call` / `custom_tool_call_output` 外层 shape 一致;当前样本里输出包装也已对齐,仅剩具体文件路径差异 |
289
- | `grep_files` | `not exposed` | 当前默认 `codex-tui` 首轮路径不带这个工具 |
290
- | `read_file` | `not exposed` | 当前默认 `codex-tui` 首轮路径不带这个工具 |
291
- | `list_dir` | `not exposed` | 当前默认 `codex-tui` 首轮路径不带这个工具 |
292
- | `view_image` | `round-trip same` | `function_call` / `function_call_output` 外层 shape 一致;当前样本里 `input_image` data URL 也一致 |
293
- | `spawn_agent` | `round-trip same` | validation-path 与 happy-path 都已补抓;剩余主要是动态 agent id / nickname 值 |
294
- | `send_input` | `round-trip same` | `function_call` / `function_call_output` 外层 shape 一致;仅剩动态 `submission_id` |
295
- | `resume_agent` | `round-trip same` | 已补抓真实 happy path;`resume_agent` 后的 `pending_init` 返回值、sub-agent tool 子集、sub-agent context 都已对齐 |
296
- | `wait_agent` | `round-trip same` | `function_call` / `function_call_output` 外层 shape 一致;仅剩动态 agent id |
297
- | `close_agent` | `round-trip same` | `function_call` / `function_call_output` 外层 shape 一致;parent-thread notification message 也已补齐 |
372
+ | tool | request-visible status | class/runtime status | note |
373
+ |---|---|---|---|
374
+ | `shell` | `not exposed` | `local shim` | 默认 `codex-tui` 首轮路径不带这个工具;类内 argv 执行语义和 schema 已整理,但没有当前 upstream 默认 CLI 同名 payload 可逐字节对齐 |
375
+ | `shell_command` | `not exposed` | `class aligned` | 默认首轮路径不带;类内 desc/schema 已刷新为 shell-string command 语义 |
376
+ | `exec_command` | `intentional approval-field/description delta; round-trip same` | `class aligned except skipped auth + local idle resume` | 删除 fallback 后不再暴露 `sandbox_permissions` / `justification` / `prefix_rule`,这是 pycodex 刻意跳过鉴权逻辑的差异;description 额外提示长任务可以先回复用户,任务完成时 agent 会被 invoke 来继续处理;其余参数按 schema 执行,`function_call` / `function_call_output` 外层 shape 一致;默认 `10_000` token 截断和未读输出 `1 MiB` head/tail cap 已补齐,仅剩动态值差异 |
377
+ | `write_stdin` | `first-request same; round-trip same` | `class aligned` | 删除 fallback 后首轮 schema 相等;`function_call` / `function_call_output` 外层 shape 一致;默认 `10_000` token 截断和未读输出 `1 MiB` head/tail cap 已补齐,仅剩动态值差异 |
378
+ | `exec` | `not exposed` | `class aligned` | 默认首轮路径不带;code-mode custom/freeform desc 和 grammar 已刷新,仍需 code-mode request-visible 抓包复测 |
379
+ | `wait` | `not exposed` | `class aligned` | 默认首轮路径不带;code-mode wait schema/runtime 已刷新,仍需 code-mode request-visible 抓包复测 |
380
+ | `web_search` | `first-request same; round-trip same` | `class aligned` | 删除 fallback 后 provider-native payload 相等,包含 `search_content_types=["text","image"]`;`web_search_call` shape 一致;provider-native tool 无单独客户端 `tool_result` |
381
+ | `update_plan` | `first-request same; round-trip same` | `class aligned` | 删除 fallback 后首轮 schema 相等;`function_call` / `function_call_output` 外层 shape 一致 |
382
+ | `request_user_input` | `round-trip same (Default mode); Plan mode source-aligned` | `class aligned` | Default-mode unavailable 路径已 capture 对齐;Plan-mode upstream main 建模,包含 `success=true` 和 `autoResolutionMs` clamp;本机 installed `codex-cli 0.115.0` live capture 仍少 `success=true` |
383
+ | `request_permissions` | `not exposed` | `class aligned` | 默认首轮路径不带;类内 desc/schema 已补 `environment_id` passthrough,交互 handler 仍是最小实现 |
384
+ | `apply_patch` | `first-request same; round-trip same` | `class aligned` | 删除 fallback 后 custom grammar 相等;`custom_tool_call` / `custom_tool_call_output` 外层 shape 一致;输出包装已对齐,仅剩具体文件路径差异 |
385
+ | `grep_files` | `not exposed` | `local shim` | 默认首轮路径不带;本地文件搜索 helper 有 schema/smoke,但当前 upstream 默认 CLI 没有同名 official payload 可直接对齐 |
386
+ | `read_file` | `not exposed` | `local shim` | 默认首轮路径不带;本地 slice/indentation 读文件 helper 有 schema/smoke,但当前 upstream 默认 CLI 没有同名 official payload 可直接对齐 |
387
+ | `list_dir` | `not exposed` | `local shim` | 默认首轮路径不带;本地目录树 helper 有 schema/smoke,但当前 upstream 默认 CLI 没有同名 official payload 可直接对齐 |
388
+ | `view_image` | `first-request same; round-trip same` | `class aligned` | 删除 fallback 后首轮 schema 相等;`function_call` / `function_call_output` 外层 shape 一致;类内已支持 `detail=high|original`,默认回传 `high` |
389
+ | `spawn_agent` | `legacy-flat mismatch; round-trip same` | `class aligned` | pycodex 首轮仍平铺暴露此工具,upstream 首轮改用 `tool_search`;历史 validation-path 与 happy-path 已补抓;类内 desc 已去掉硬编码模型 picker |
390
+ | `send_input` | `legacy-flat mismatch; round-trip same` | `class aligned` | pycodex 首轮仍平铺暴露此工具,upstream 首轮改用 `tool_search`;历史 round-trip 外层 shape 一致,仅剩动态 `submission_id` |
391
+ | `resume_agent` | `legacy-flat mismatch; round-trip same` | `class aligned` | pycodex 首轮仍平铺暴露此工具,upstream 首轮改用 `tool_search`;已补抓真实 happy path;`pending_init` 返回值、sub-agent tool 子集、sub-agent context 都已对齐 |
392
+ | `wait_agent` | `legacy-flat mismatch; round-trip same` | `class aligned` | pycodex 首轮仍平铺暴露此工具,upstream 首轮改用 `tool_search`;历史 round-trip 外层 shape 一致,仅剩动态 agent id |
393
+ | `close_agent` | `legacy-flat mismatch; round-trip same` | `class aligned` | pycodex 首轮仍平铺暴露此工具,upstream 首轮改用 `tool_search`;schema/runtime 输出为 upstream 当前的 `previous_status`;parent-thread notification message 也已补齐 |
394
+ | `ipython` | `not in default registry` | `local shim` | 这是可选 IPython attach helper,不属于默认 CLI tool 集合;当前没有 upstream default CLI 同名 payload 对齐目标 |
298
395
 
299
396
  ### Redacted example: request-level diff categories
300
397
 
@@ -308,12 +405,20 @@ same:
308
405
  - exec-mode tool subset membership
309
406
  - request context field presence
310
407
  - exec-mode tool schemas
408
+ - current default-path schemas for `write_stdin`, `web_search`, `update_plan`,
409
+ `apply_patch`, and `view_image`
311
410
  - user-agent semantics and compared string
312
411
 
313
412
  different:
314
413
  - dynamic request metadata values
414
+ - intentional `exec_command` approval/sandbox field omission and idle-resume
415
+ description in pycodex
315
416
  - transport-layer header casing / normalization
316
- - paths and modes not yet aligned beyond non-interactive `exec`
417
+ - paths and modes not yet aligned beyond captured `exec` / default TUI paths
418
+ - installed upstream `request_user_input` lacks source-main `autoResolutionMs`
419
+ - upstream default-path tool discovery now exposes `tool_search` instead of
420
+ legacy flat sub-agent tools
421
+ - upstream default-path goal tools are not implemented locally yet
317
422
  ```
318
423
 
319
424
  ## Redacted examples
@@ -457,7 +562,6 @@ Vendored upstream prompt data:
457
562
  - `pycodex/prompts/models.json`
458
563
  - `pycodex/prompts/permissions/sandbox_mode/`
459
564
  - `pycodex/prompts/permissions/approval_policy/`
460
- - `pycodex/prompts/exec_tools.json`
461
565
 
462
566
  Tests:
463
567
 
@@ -467,16 +571,31 @@ Tests:
467
571
 
468
572
  ## What is still out of scope here
469
573
 
470
- Prompt parity is not the same thing as full request parity.
471
-
472
- At the time this file was written, the remaining request-level differences are
473
- outside the prompt/context dump itself, for example:
474
-
475
- - dynamic request metadata values such as generated session ids and turn ids
476
- - behavior outside the currently aligned non-interactive `exec` path
477
- - broader runtime features such as sandbox / approvals / compact / memory
478
-
479
- Those are the next alignment target after the prompt/context pass.
574
+ Prompt/tool-schema parity is not the same thing as full Codex parity.
575
+
576
+ The remaining explicit alignment work is:
577
+
578
+ - Implement upstream's current default-path `tool_search` / deferred multi-agent
579
+ discovery, or explicitly decide to keep legacy flat agent tools as a
580
+ compatibility surface.
581
+ - Add goal tools (`get_goal`, `create_goal`, `update_goal`) if the local CLI is
582
+ intended to match the current upstream default tool set.
583
+ - Capture/compare code-mode request-visible payloads for `exec` / `wait`, not
584
+ only their class-level schemas and smoke behavior.
585
+ - Decide whether local helper tools (`shell`, `grep_files`, `read_file`,
586
+ `list_dir`, optional `ipython`) are intended to stay as local shims or should
587
+ be replaced/renamed as upstream evolves.
588
+ - Broaden runtime parity beyond the currently aligned non-interactive `exec`
589
+ path and captured default two-turn TUI path.
590
+ - App-server `AdditionalTools` handling from upstream `6e0c8b4` is not
591
+ implemented locally; this pass only verified it is not a builtin CLI tool spec
592
+ change for the local path.
593
+ - Newer multi-agent v2-style tools such as `send_message`, `followup_task`,
594
+ `interrupt_agent`, and `list_agents` are not implemented in this local tool
595
+ registry yet.
596
+ - Broader runtime features such as sandbox/approval enforcement, WebSocket/HTTP
597
+ transport fallback, cancellation markers, MCP/connectors/plugins, memory, and
598
+ review flows remain partial or out of scope for this pass.
480
599
 
481
600
  ## Steer semantics
482
601
 
@@ -228,7 +228,7 @@ A skill is a set of local instructions to follow that is stored in a `SKILL.md`
228
228
  - `permissions` prompt 的来源目录不同:Codex 从 `codex-rs/protocol/src/prompts/permissions/...` 取,`pycodex` 从 `./pycodex/prompts/permissions/...` 取。
229
229
  - `collaboration_mode` block 的来源不同:Codex 用上游协作提示模板,`pycodex` 用 `./pycodex/prompts/collaboration_default.md` / `./pycodex/prompts/collaboration_plan.md`。
230
230
  - `skills guidance` 的来源不同:Codex 用上游固定 guidance,`pycodex` 用 `./pycodex/context.py::SKILLS_GUIDANCE`。
231
- - `tools` 的构造来源不同:Codex 从上游 runtime tool registry 出来,`pycodex` `./pycodex/prompts/exec_tools.json + ToolSpec.serialize()` 出来。
231
+ - `tools` 的构造来源不同:Codex 从上游 runtime tool registry 出来,`pycodex` 从本地 `BaseTool` class specs 经 `ToolSpec.serialize()` 出来。
232
232
 
233
233
  ### 1.3 首轮请求不变量
234
234
 
@@ -462,5 +462,5 @@ ProviderBuiltinToolSchema = {
462
462
  当前实现方式:
463
463
 
464
464
  - 不再使用 prompt 级别的 `serialized_tools` override。
465
- - 在工具层直接复用上游 snapshot
466
- - snapshot 文件位于 `./pycodex/prompts/exec_tools.json`。
465
+ - 不再使用 `pycodex/prompts/exec_tools.json` 这类 raw JSON fallback
466
+ - 在工具类内维护 description / input schema;`ToolSpec.serialize()` 负责生成 request-visible payload。
@@ -5,7 +5,7 @@ import re
5
5
  from typing import Callable
6
6
 
7
7
  from .context import ContextManager
8
- from .model import ModelClient
8
+ from .model import ModelClient, ResponsesIncompleteError
9
9
  from .protocol import (
10
10
  AgentEvent,
11
11
  AssistantMessage,
@@ -17,7 +17,7 @@ from .protocol import (
17
17
  TurnResult,
18
18
  UserMessage,
19
19
  )
20
- from .tools import ToolContext, ToolRegistry
20
+ from .tools import ExecCommandTool, ToolContext, ToolRegistry, UnifiedExecManager
21
21
  from .utils import uuid7_string
22
22
  import typing
23
23
 
@@ -46,6 +46,7 @@ _CONTEXT_LENGTH_ERROR_MARKERS = (
46
46
  "exceeds the context window",
47
47
  "exceeded the context window",
48
48
  )
49
+ TERMINAL_TURN_EVENTS = {"turn_completed", "turn_failed", "turn_interrupted"}
49
50
 
50
51
 
51
52
  class TurnInterrupted(RuntimeError):
@@ -85,6 +86,15 @@ class Agent:
85
86
  self._last_total_usage_tokens: 'typing.Union[int, None]' = None
86
87
  self.runtime_environment = runtime_environment
87
88
  self.interrupt_asap = False
89
+ self._turn_running = False
90
+ exec_command_tool = self._tool_registry.get_tool("exec_command")
91
+ self._exec_manager = (
92
+ exec_command_tool._manager
93
+ if isinstance(exec_command_tool, ExecCommandTool)
94
+ else None
95
+ )
96
+ if self._exec_manager is not None:
97
+ self._exec_manager.set_notify_hook(self.maybe_invoke)
88
98
 
89
99
  @property
90
100
  def history(self) -> 'typing.Tuple[ConversationItem, ...]':
@@ -129,6 +139,7 @@ class Agent:
129
139
  async def run_turn(
130
140
  self, texts: 'typing.List[str]', turn_id: 'typing.Union[str, None]' = None
131
141
  ) -> 'TurnResult':
142
+ self._turn_running = True
132
143
  turn_id = turn_id or uuid7_string()
133
144
  self.interrupt_asap = False
134
145
  new_user_messages = [UserMessage(text=text) for text in texts]
@@ -168,16 +179,10 @@ class Agent:
168
179
  item_count=len(response.items),
169
180
  )
170
181
 
171
- tool_calls: 'typing.List[ToolCall]' = []
172
- persisted_response_items: 'typing.List[ConversationItem]' = []
173
- for item in response.items:
174
- self._history.append(item)
175
- persisted_response_items.append(item)
176
- if isinstance(item, AssistantMessage):
177
- last_assistant_message = item.text
178
- elif isinstance(item, ToolCall):
179
- tool_calls.append(item)
180
- self._persist_history_items(persisted_response_items)
182
+ recorded_items = self._record_model_response_items(response.items)
183
+ tool_calls = recorded_items[1]
184
+ if recorded_items[2] is not None:
185
+ last_assistant_message = recorded_items[2]
181
186
 
182
187
  if not tool_calls:
183
188
  self._raise_if_interrupt_requested(
@@ -191,6 +196,7 @@ class Agent:
191
196
  iteration=iteration,
192
197
  output_text=last_assistant_message,
193
198
  )
199
+ self._turn_running = False
194
200
  return TurnResult(
195
201
  turn_id=turn_id,
196
202
  output_text=last_assistant_message,
@@ -211,6 +217,7 @@ class Agent:
211
217
  output_text=last_assistant_message,
212
218
  )
213
219
  except TurnInterrupted:
220
+ self._turn_running = False
214
221
  raise
215
222
  except Exception as exc:
216
223
  context_usage = _usage_from_context_length_error(str(exc))
@@ -224,8 +231,29 @@ class Agent:
224
231
  error=str(exc),
225
232
  error_type=type(exc).__name__,
226
233
  )
234
+ self._turn_running = False
227
235
  raise
228
236
 
237
+ async def maybe_invoke(self, event: 'typing.Dict[str, object]') -> 'bool':
238
+ if self._turn_running or event.get("type") != "exec_command_completed":
239
+ return False
240
+ payload = {
241
+ "session_id": event.get("session_id"),
242
+ "exit_code": event.get("exit_code"),
243
+ "command": event.get("command"),
244
+ }
245
+ text = (
246
+ "<exec_command_completed>\n"
247
+ f"{json.dumps(payload, ensure_ascii=False, separators=(',', ':'))}\n"
248
+ "</exec_command_completed>"
249
+ )
250
+ self._turn_running = True
251
+ task = asyncio.create_task(self.run_turn([text]))
252
+ task.add_done_callback(
253
+ lambda task: None if task.cancelled() else task.exception()
254
+ )
255
+ return True
256
+
229
257
  async def _execute_tool_batch(
230
258
  self,
231
259
  turn_id: 'str',
@@ -294,10 +322,18 @@ class Agent:
294
322
  return result
295
323
 
296
324
  def _emit(self, kind: 'str', turn_id: 'str', **payload: 'object') -> 'None':
325
+ if kind in TERMINAL_TURN_EVENTS:
326
+ payload["background_exec_count"] = self._background_exec_count()
297
327
  self._event_handler(
298
328
  AgentEvent(kind=kind, turn_id=turn_id, payload=dict(payload))
299
329
  )
300
330
 
331
+ def _background_exec_count(self) -> 'int':
332
+ manager: 'typing.Union[UnifiedExecManager, None]' = self._exec_manager
333
+ if manager is None:
334
+ return 0
335
+ return manager.running_session_count()
336
+
301
337
  def _persist_history_items(
302
338
  self,
303
339
  items: 'typing.Iterable[ConversationItem]',
@@ -310,6 +346,28 @@ class Agent:
310
346
  except Exception: # pragma: no cover - persistence should not break turns
311
347
  return
312
348
 
349
+ def _record_model_response_items(
350
+ self,
351
+ items: 'typing.Iterable[object]',
352
+ include_tool_calls: 'bool' = True,
353
+ ) -> 'typing.Tuple[typing.Tuple[ConversationItem, ...], typing.List[ToolCall], typing.Union[str, None]]':
354
+ persisted_response_items: 'typing.List[ConversationItem]' = []
355
+ tool_calls: 'typing.List[ToolCall]' = []
356
+ last_assistant_message = None
357
+ for item in items:
358
+ if isinstance(item, ToolCall) and not include_tool_calls:
359
+ continue
360
+ if not isinstance(item, (AssistantMessage, ToolCall, ReasoningItem)):
361
+ continue
362
+ self._history.append(item)
363
+ persisted_response_items.append(item)
364
+ if isinstance(item, AssistantMessage):
365
+ last_assistant_message = item.text
366
+ elif isinstance(item, ToolCall):
367
+ tool_calls.append(item)
368
+ self._persist_history_items(persisted_response_items)
369
+ return tuple(persisted_response_items), tool_calls, last_assistant_message
370
+
313
371
  def _handle_model_stream_event(self, turn_id: 'str', event: 'ModelStreamEvent') -> 'None':
314
372
  if event.kind == "token_count":
315
373
  self._remember_token_usage(event.payload.get("usage"))
@@ -355,6 +413,13 @@ class Agent:
355
413
  prompt,
356
414
  lambda event: self._handle_model_stream_event(turn_id, event),
357
415
  )
416
+ except ResponsesIncompleteError as exc:
417
+ if exc.reason == "max_output_tokens":
418
+ self._record_model_response_items(
419
+ exc.partial_items,
420
+ include_tool_calls=False,
421
+ )
422
+ raise
358
423
  except Exception as exc:
359
424
  error_message = str(exc)
360
425
  if (