python-codex 0.1.13__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {python_codex-0.1.13 → python_codex-0.2.0}/AGENTS.md +7 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/PKG-INFO +4 -1
- {python_codex-0.1.13 → python_codex-0.2.0}/README.md +3 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/docs/ALIGNMENT.md +175 -56
- {python_codex-0.1.13 → python_codex-0.2.0}/docs/CONTEXT.md +3 -3
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/agent.py +71 -11
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/cli.py +16 -356
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/context.py +12 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/feishu_card.py +76 -30
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/feishu_link.py +131 -11
- python_codex-0.2.0/pycodex/interactive_session.py +397 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/model.py +11 -22
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/protocol.py +0 -5
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/runtime.py +23 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/runtime_services.py +2 -2
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/tools/agent_tool_schemas.py +1 -1
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/tools/apply_patch_tool.py +1 -1
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/tools/base_tool.py +1 -27
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/tools/close_agent_tool.py +11 -4
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/tools/code_mode_manager.py +1 -1
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/tools/exec_command_tool.py +40 -16
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/tools/exec_tool.py +18 -2
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/tools/grep_files_tool.py +19 -6
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/tools/ipython_tool.py +3 -2
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/tools/list_dir_tool.py +19 -6
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/tools/read_file_tool.py +39 -9
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/tools/request_permissions_tool.py +12 -1
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/tools/request_user_input_tool.py +28 -1
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/tools/send_input_tool.py +4 -2
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/tools/shell_command_tool.py +23 -6
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/tools/shell_tool.py +13 -4
- python_codex-0.2.0/pycodex/tools/spawn_agent_tool.py +119 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/tools/unified_exec_manager.py +49 -93
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/tools/update_plan_tool.py +14 -6
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/tools/view_image_tool.py +17 -16
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/tools/wait_agent_tool.py +15 -3
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/tools/wait_tool.py +18 -4
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/tools/web_search_tool.py +2 -1
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/tools/write_stdin_tool.py +42 -10
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/utils/compactor.py +7 -1
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/utils/session_persist.py +42 -1
- python_codex-0.2.0/pycodex/utils/truncation.py +206 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/utils/visualize.py +34 -15
- {python_codex-0.1.13 → python_codex-0.2.0}/pyproject.toml +7 -2
- {python_codex-0.1.13 → python_codex-0.2.0}/tests/TESTS.md +4 -4
- {python_codex-0.1.13 → python_codex-0.2.0}/tests/test_agent.py +442 -2
- {python_codex-0.1.13 → python_codex-0.2.0}/tests/test_builtin_tools.py +277 -4
- {python_codex-0.1.13 → python_codex-0.2.0}/tests/test_cli.py +390 -17
- {python_codex-0.1.13 → python_codex-0.2.0}/tests/test_compactor.py +21 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/tests/test_context.py +20 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/tests/test_feishu_card.py +48 -4
- python_codex-0.2.0/tests/test_feishu_link.py +208 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/tests/test_model.py +130 -13
- {python_codex-0.1.13 → python_codex-0.2.0}/tests/test_py36_syntax.py +1 -1
- python_codex-0.2.0/tests/test_workspace_server.py +1431 -0
- python_codex-0.2.0/workspace_server/__init__.py +23 -0
- python_codex-0.2.0/workspace_server/__main__.py +5 -0
- python_codex-0.2.0/workspace_server/app.py +1347 -0
- python_codex-0.2.0/workspace_server/workspace.html +866 -0
- python_codex-0.1.13/pycodex/prompts/exec_tools.json +0 -411
- python_codex-0.1.13/pycodex/prompts/subagent_tools.json +0 -163
- python_codex-0.1.13/pycodex/tools/spawn_agent_tool.py +0 -96
- python_codex-0.1.13/tests/test_feishu_link.py +0 -25
- {python_codex-0.1.13 → python_codex-0.2.0}/.github/workflows/publish.yml +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/.github/workflows/test.yml +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/.gitignore +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/LICENSE +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/README_ZH.md +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/docs/responses_server/README.md +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/__init__.py +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/collaboration.py +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/compat.py +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/doctor.py +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/portable.py +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/portable_server.py +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/prompts/collaboration_default.md +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/prompts/collaboration_plan.md +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/prompts/default_base_instructions.md +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/prompts/models.json +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/prompts/permissions/approval_policy/never.md +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/prompts/permissions/approval_policy/on_failure.md +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/prompts/permissions/approval_policy/on_request.md +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/prompts/permissions/approval_policy/on_request_rule_request_permission.md +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/prompts/permissions/approval_policy/unless_trusted.md +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/prompts/permissions/sandbox_mode/danger_full_access.md +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/prompts/permissions/sandbox_mode/read_only.md +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/prompts/permissions/sandbox_mode/workspace_write.md +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/tools/__init__.py +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/tools/exec_runtime.js +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/tools/resume_agent_tool.py +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/utils/__init__.py +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/utils/async_bridge.py +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/utils/debug.py +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/utils/dotenv.py +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/utils/get_env.py +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/utils/random_ids.py +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/pycodex/utils/toolcall_visualize.py +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/responses_server/__init__.py +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/responses_server/__main__.py +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/responses_server/app.py +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/responses_server/config.py +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/responses_server/messages_api.py +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/responses_server/payload_processors.py +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/responses_server/server.py +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/responses_server/session_store.py +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/responses_server/stream_router.py +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/responses_server/tools/__init__.py +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/responses_server/tools/custom_adapter.py +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/responses_server/tools/web_search.py +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/responses_server/trajectory_dump.py +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/tests/__init__.py +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/tests/compare_request_user_input_roundtrip.py +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/tests/compare_steer_request_bodies.py +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/tests/compare_tool_schemas.py +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/tests/fake_responses_server.py +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/tests/fakes.py +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/tests/responses_server/fake_chat_completions_server.py +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/tests/responses_server/test_server.py +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/tests/test_async_bridge.py +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/tests/test_doctor.py +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/tests/test_fake_responses_server.py +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/tests/test_ipython_tool.py +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/tests/test_portable.py +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/tests/test_visualize.py +0 -0
- {python_codex-0.1.13 → python_codex-0.2.0}/tools/feishu_oauth.py +0 -0
|
@@ -18,7 +18,9 @@
|
|
|
18
18
|
- 真实 vLLM `0.19.0` 的 `/v1/messages` 会对缺失 `max_tokens` 直接返回 `400`;messages 适配层必须总是补这个字段。当前约定是优先透传请求里的 `max_output_tokens`/`max_tokens`,否则回退到默认 `32000`。
|
|
19
19
|
- 对 vLLM chat-completions 打开 `return_token_ids=true` 时,streaming `prompt_token_ids` 只出现在首个 chunk,后续每个 chunk 的 `choices[*].token_ids` 都是 decode delta;要在 `responses_server` 侧导出 trajectory 时,按“首个 `prompt_token_ids` + 按序拼接所有 chunk 的 `token_ids`”重建即可。
|
|
20
20
|
- `pycodex` 默认是最小交互 CLI;无 prompt 时进入 REPL,并通过 `AgentRuntime` 跑外层提交循环。当前会显示最小事件流、assistant 流式输出、简单 title/history(`/title`, `/history`),并默认注册一组与原版一一对应的本地工具子集。
|
|
21
|
+
- Web workspace lives in the standalone `workspace_server/` package and is launched with `pycodex-ws --listen <host:port> --board <html>`, not through `pycodex` CLI dispatch. CLI and web share `pycodex.interactive_session.run_interactive_session`; slash-command semantics such as `/resume`, `/compact`, `/model`, and `/link` belong to that shared interactive shell loop, while workspace only supplies a web view/input adapter and tab/session lifecycle.
|
|
21
22
|
- 交互 CLI 的事件流展示优先表达用户可感知的阶段(例如工具开始/完成、模型回看工具结果),不要直接把内部 `iteration` 计数暴露成主要状态文案;`iterations` 应继续保留在 `TurnResult` 等程序化结果里。
|
|
23
|
+
- 在交互 CLI 里,`stream_error` 表示当前 Responses stream attempt 失败且模型客户端可能马上自动重试;不要在这个事件上 `finish_stream()` 输出当前 assistant delta buffer,否则第一次失败 attempt 的文本和重试成功后的最终回复会重复显示。真正 fatal 的失败仍由 `turn_failed` 走通用 flush,保留 partial 输出。
|
|
22
24
|
- prompt/context 相关逻辑统一放在 `pycodex/context.py`:`AgentLoop` 只维护真实会话历史;每轮请求前由 `ContextManager` 注入 base instructions、developer message、`AGENTS.md` 指令和 `<environment_context>`,且这些注入项不写回 history。
|
|
23
25
|
- 对需要 model-specific prompt 的本地 model slug,直接在 vendored `pycodex/prompts/models.json` 补条目;当前 `step-3.5-flash` / `step-3.5-flash-2603` / `step-3.6` 已按这个方式接入。
|
|
24
26
|
- 交互 REPL 的 context 用量提示也应尽量贴近上游语义:展示“剩余 context 百分比”而不是原始 token 数;计算时按上游同款 `BASELINE_TOKENS=12000` 做归一化,并在模型元数据只有 `context_window` 时默认按 `95%` effective window 处理。只要当前模型能解析出 context window,初始 prompt 就先显示 `100%`,等首个 usage 回来后再刷新成真实值。
|
|
@@ -58,4 +60,9 @@
|
|
|
58
60
|
- `pycodex` 本地 session 保存现在也按上游思路走:新 session 一开始就分配稳定的 uuidv7 thread/session id,并把历史增量追加到 `CODEX_HOME/sessions/.../rollout-*.jsonl`;`/resume` 列表应只展示至少有真实 user message 的 rollout,避免空白新 session 污染恢复列表。
|
|
59
61
|
- auto-compact 对齐上游配置名 `model_auto_compact_token_limit`;为空时关闭,触发依据是最近一次模型上报的 `usage.total_tokens`,pre-turn 压缩上一轮历史,mid-turn 压缩工具 follow-up 前的当前历史,并继续复用现有 compacted rollout 记录。
|
|
60
62
|
- Responses streaming 里的 `response.incomplete` 不是连接断开:不要让 `ResponsesModelClient` 把它当 retryable incomplete stream 反复重连。普通 turn 应明确报 `response.incomplete`;compact 请求如果已经收到 assistant partial summary,可以用这个 partial summary 完成 replacement history,避免 midturn auto-compact 卡在 5 次 retry。
|
|
63
|
+
- 上游 Codex Responses 请求当前不传模型级 `max_output_tokens`,也没有读取 `model_max_output_tokens` 这个 config key;这个名字在上游主要用于工具输出截断,不要为了上游对齐把它加进模型请求。
|
|
64
|
+
- 普通 turn 遇到 `ResponsesIncompleteError(reason="max_output_tokens")` 时,上游语义是保留异常前已经收到的 `response.output_item.done`;pycodex 因为模型客户端按整轮返回,需要在异常路径把这些 done assistant/reasoning items 写入 history 和 rollout,才能让用户下一句 `continue` 接上。不要把纯 `response.output_text.delta` 合成 history,也不要持久化没有 tool result 的 tool call。
|
|
61
65
|
- Feishu card tests read `~/.codex/.feishu_refresh_token` through production code; when running `tests/test_feishu_card.py` locally, isolate HOME (for example `HOME=/tmp/pycodex-empty-home env -u VIRTUAL_ENV uv run pytest tests/test_feishu_card.py tests/test_feishu_link.py`) unless the test itself controls `HOME`.
|
|
66
|
+
- `lark_oapi.ws.client` creates a module-level asyncio `loop` at import time and `Client.start()` always uses that global. For `/link` long-connection listeners, bind that SDK global to a listener-thread-owned loop before constructing the client, and stop it through private `_disconnect()` plus `loop.stop()` on `/unlink`; otherwise unlink/link can reuse a still-running SDK loop and fail with `RuntimeError: This event loop is already running`.
|
|
67
|
+
- `exec_command` background completion auto-resume is intentionally Agent-idle-only: when a session exits, it may call `Agent.maybe_invoke(...)` and start a synthetic `<exec_command_completed>` turn only if that Agent is not already running a turn. Do not enqueue/cache these events in `CliSubmissionQueue`; direct Agent/IPython use should share the same Agent-level hook.
|
|
68
|
+
- The tool description JSON fallbacks (`pycodex/prompts/exec_tools.json` and `pycodex/prompts/subagent_tools.json`) were deleted after moving schemas into class-level `BaseTool` specs. `ToolSpec.serialize()` intentionally skips function-tool `output_schema`, matching upstream `ResponsesApiTool.output_schema #[serde(skip)]`; keep output schemas as local metadata only unless upstream wire format changes.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: python-codex
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: A minimal Python extraction of Codex's main agent loop
|
|
5
5
|
License-File: LICENSE
|
|
6
6
|
Requires-Python: >=3.6.2
|
|
@@ -165,12 +165,15 @@ pycodex --put @127.0.0.1:5577
|
|
|
165
165
|
pycodex --put /data/.codex/@127.0.0.1:5577
|
|
166
166
|
pycodex --call SECRET-CALLID@127.0.0.1:5577 "Reply with exactly OK."
|
|
167
167
|
pycodex doctor
|
|
168
|
+
pycodex-ws --listen 0.0.0.0:6007 --board ./board.html
|
|
168
169
|
```
|
|
169
170
|
|
|
170
171
|
Current behavior:
|
|
171
172
|
|
|
172
173
|
- with no argv prompt and a TTY stdin, enter interactive mode
|
|
173
174
|
- with an argv prompt or piped stdin, run a single turn
|
|
175
|
+
- `pycodex-ws` starts the standalone browser workspace with a board pane and a
|
|
176
|
+
pycodex session pane
|
|
174
177
|
- interactive mode supports `/exit` and `/quit`
|
|
175
178
|
- interactive mode shows a compact event stream for user-visible phases such as
|
|
176
179
|
tool execution and model follow-up after tool results
|
|
@@ -144,12 +144,15 @@ pycodex --put @127.0.0.1:5577
|
|
|
144
144
|
pycodex --put /data/.codex/@127.0.0.1:5577
|
|
145
145
|
pycodex --call SECRET-CALLID@127.0.0.1:5577 "Reply with exactly OK."
|
|
146
146
|
pycodex doctor
|
|
147
|
+
pycodex-ws --listen 0.0.0.0:6007 --board ./board.html
|
|
147
148
|
```
|
|
148
149
|
|
|
149
150
|
Current behavior:
|
|
150
151
|
|
|
151
152
|
- with no argv prompt and a TTY stdin, enter interactive mode
|
|
152
153
|
- with an argv prompt or piped stdin, run a single turn
|
|
154
|
+
- `pycodex-ws` starts the standalone browser workspace with a board pane and a
|
|
155
|
+
pycodex session pane
|
|
153
156
|
- interactive mode supports `/exit` and `/quit`
|
|
154
157
|
- interactive mode shows a compact event stream for user-visible phases such as
|
|
155
158
|
tool execution and model follow-up after tool results
|
|
@@ -1,17 +1,23 @@
|
|
|
1
1
|
# Alignment
|
|
2
2
|
|
|
3
|
-
This document records the current
|
|
4
|
-
|
|
3
|
+
This document records the current alignment work between `pycodex` and upstream
|
|
4
|
+
Codex from `https://github.com/openai/codex`: prompt/context assembly,
|
|
5
|
+
model-visible tool schemas, and observed tool round-trip behavior.
|
|
5
6
|
|
|
6
7
|
## Scope
|
|
7
8
|
|
|
8
|
-
The comparison
|
|
9
|
+
The original comparison pass focused on the model-visible prompt assembly:
|
|
9
10
|
|
|
10
11
|
- `instructions`
|
|
11
12
|
- `input` items
|
|
12
13
|
- developer/contextual user message shape
|
|
13
14
|
- `AGENTS.md` / environment-context injection
|
|
14
15
|
|
|
16
|
+
The current document also tracks tool alignment at two different layers:
|
|
17
|
+
|
|
18
|
+
- request-visible payloads captured from real outbound `/responses` requests
|
|
19
|
+
- class-level `BaseTool` descriptions, schemas, and runtime result shapes
|
|
20
|
+
|
|
15
21
|
It does not claim full request parity for every runtime mode yet.
|
|
16
22
|
|
|
17
23
|
## Comparison method
|
|
@@ -37,10 +43,47 @@ The repository copy of that helper server now lives at
|
|
|
37
43
|
The temporary capture artifacts used during debugging are intentionally not part
|
|
38
44
|
of the repository contract and are not documented here as stable project files.
|
|
39
45
|
|
|
46
|
+
Tool-specific status uses two inputs:
|
|
47
|
+
|
|
48
|
+
- proxy captures of actual upstream Codex and `pycodex` requests/results
|
|
49
|
+
- source inspection against the latest upstream tool specifications
|
|
50
|
+
|
|
51
|
+
## Status checkpoint (2026-06-23)
|
|
52
|
+
|
|
53
|
+
- Latest upstream source checked: `openai/codex` `83c4934`
|
|
54
|
+
(`2026-06-23 00:31:56 -0700`, `Remove redundant Codex Apps cache guard`).
|
|
55
|
+
- Live request captures in this pass used installed `codex-cli 0.138.0`.
|
|
56
|
+
- Prompt/context parity remains aligned for the compared non-interactive `exec`
|
|
57
|
+
path and the captured default two-turn `codex-tui` path, modulo dynamic ids.
|
|
58
|
+
- The raw JSON tool fallback files have been deleted:
|
|
59
|
+
`pycodex/prompts/exec_tools.json` and
|
|
60
|
+
`pycodex/prompts/subagent_tools.json`.
|
|
61
|
+
- Class-level descriptions/schemas/runtime result shapes have now been refreshed
|
|
62
|
+
across the default local tool set, not only the tools previously hidden by the
|
|
63
|
+
JSON fallback.
|
|
64
|
+
- Request-visible tool serialization now comes from class-level `BaseTool`
|
|
65
|
+
specs. Function-tool `output_schema` remains available as local metadata but
|
|
66
|
+
is intentionally not serialized into `/responses` requests, matching upstream
|
|
67
|
+
`ResponsesApiTool.output_schema #[serde(skip)]`.
|
|
68
|
+
- Latest upstream-facing fixes included in the class/runtime layer:
|
|
69
|
+
`request_user_input.autoResolutionMs` with `[60000, 240000]` clamping,
|
|
70
|
+
`view_image.detail` (`high` default, `original` opt-in), `close_agent`
|
|
71
|
+
returning `previous_status`, and `spawn_agent` guidance that spawned agents
|
|
72
|
+
inherit the current model by default.
|
|
73
|
+
- Post-delete proxy compare:
|
|
74
|
+
`uv run python tests/compare_tool_schemas.py --root .tmp/tool_schema_proxy_compare_after_fallback_delete_2 --timeout-seconds 240`.
|
|
75
|
+
The request-visible payloads are now equal for `write_stdin`, `web_search`,
|
|
76
|
+
`update_plan`, `apply_patch`, and `view_image` on the captured default path.
|
|
77
|
+
`exec_command` intentionally omits upstream approval/sandbox parameters
|
|
78
|
+
(`sandbox_permissions`, `justification`, `prefix_rule`) because pycodex skips
|
|
79
|
+
that authorization path by design; its description also tells the agent that
|
|
80
|
+
it can reply first for long tasks and will be invoked to continue when the
|
|
81
|
+
task finishes.
|
|
82
|
+
|
|
40
83
|
## Result
|
|
41
84
|
|
|
42
|
-
As of this snapshot, prompt/context parity is achieved for the
|
|
43
|
-
`exec` comparison:
|
|
85
|
+
As of this snapshot, prompt/context parity is achieved for the compared
|
|
86
|
+
non-interactive `exec` comparison:
|
|
44
87
|
|
|
45
88
|
- `instructions` match exactly
|
|
46
89
|
- `input` match exactly
|
|
@@ -48,10 +91,19 @@ As of this snapshot, prompt/context parity is achieved for the non-interactive
|
|
|
48
91
|
In other words, the model-visible prompt dump for `pycodex` and upstream Codex
|
|
49
92
|
is currently identical for this comparison scenario.
|
|
50
93
|
|
|
94
|
+
Tool alignment is also materially improved: all default local tool classes have
|
|
95
|
+
been reviewed/refreshed against the latest upstream-facing specs where a current
|
|
96
|
+
upstream builtin exists. The request-visible payload for the compared official
|
|
97
|
+
tools now comes from class-level specs rather than vendored JSON snapshots.
|
|
98
|
+
|
|
51
99
|
## Current non-prompt status
|
|
52
100
|
|
|
53
101
|
After prompt/context parity, the next comparison layer is the full outbound
|
|
54
|
-
request shape. That work is
|
|
102
|
+
request shape. That work is still layered:
|
|
103
|
+
|
|
104
|
+
- request-visible parity for captured paths
|
|
105
|
+
- class-level tool spec/runtime parity after deleting the JSON fallback
|
|
106
|
+
- broader runtime parity for uncaptured modes
|
|
55
107
|
|
|
56
108
|
At the time of writing:
|
|
57
109
|
|
|
@@ -69,7 +121,10 @@ At the time of writing:
|
|
|
69
121
|
- transport/header parity is now aligned for the compared path, including the
|
|
70
122
|
sub-agent `x-openai-subagent` header and the observed `workspaces` omission
|
|
71
123
|
on later sub-agent turns
|
|
72
|
-
- tool schema parity is aligned for the compared exec-mode
|
|
124
|
+
- request-visible tool schema parity is aligned for the compared exec-mode and
|
|
125
|
+
default TUI captured paths where upstream still exposes the same tool names
|
|
126
|
+
- class-level tool descriptions, input schemas, output schemas, and notable
|
|
127
|
+
runtime result shapes have been refreshed across the default local tool set
|
|
73
128
|
|
|
74
129
|
The current implementation already matches:
|
|
75
130
|
|
|
@@ -80,7 +135,8 @@ The current implementation already matches:
|
|
|
80
135
|
- session-scoped request id headers
|
|
81
136
|
- turn metadata header shape (`turn_id` + `sandbox`)
|
|
82
137
|
- mode-aware `originator` header
|
|
83
|
-
- exact exec-mode tool schema payloads
|
|
138
|
+
- exact exec-mode tool schema payloads on the compared path, now generated from
|
|
139
|
+
class-level tool specs rather than vendored JSON snapshots
|
|
84
140
|
- `User-Agent` string for the compared non-interactive path
|
|
85
141
|
|
|
86
142
|
The main remaining deltas are now outside the prompt dump itself:
|
|
@@ -88,6 +144,9 @@ The main remaining deltas are now outside the prompt dump itself:
|
|
|
88
144
|
- dynamic run-specific values such as generated session ids and turn ids
|
|
89
145
|
- behavior outside the compared non-interactive `exec` path and the captured
|
|
90
146
|
default two-turn TUI path, especially other runtime modes not yet captured
|
|
147
|
+
- upstream's current default-path migration to `tool_search` / deferred
|
|
148
|
+
multi-agent tools and goal tools, while `pycodex` still exposes the legacy
|
|
149
|
+
flat sub-agent tools on the first request
|
|
91
150
|
|
|
92
151
|
## Proxy tool-schema compare
|
|
93
152
|
|
|
@@ -102,21 +161,45 @@ The main remaining deltas are now outside the prompt dump itself:
|
|
|
102
161
|
- 从 `tests/TESTS.md` 的真实 smoke tool 表读取工具顺序
|
|
103
162
|
- 逐个比较这条被抓到的 request path 里真正暴露给模型的 tool schema
|
|
104
163
|
|
|
105
|
-
|
|
106
|
-
|
|
164
|
+
注意:这项比较验证的是“这条 request 上模型实际看到的 payload”。raw JSON fallback
|
|
165
|
+
删除后,这项比较已经能证明当前被抓路径里的 official tool payload 来自类内
|
|
166
|
+
`BaseTool` spec。
|
|
167
|
+
|
|
168
|
+
删除 fallback 后的最新结果:
|
|
169
|
+
|
|
170
|
+
- command:
|
|
171
|
+
`env -u VIRTUAL_ENV uv run python tests/compare_tool_schemas.py --root .tmp/tool_schema_proxy_compare_after_fallback_delete_2 --timeout-seconds 240`
|
|
172
|
+
- upstream request:
|
|
173
|
+
`.tmp/tool_schema_proxy_compare_after_fallback_delete_2/upstream/008_POST_v1_responses.json`
|
|
174
|
+
- `pycodex` request:
|
|
175
|
+
`.tmp/tool_schema_proxy_compare_after_fallback_delete_2/pycodex/001_POST_v1_responses.json`
|
|
176
|
+
- comparison:
|
|
177
|
+
`.tmp/tool_schema_proxy_compare_after_fallback_delete_2/comparison.json`
|
|
178
|
+
|
|
179
|
+
在当前默认 CLI non-exec / `codex-tui` 这条被抓到的路径上,已经确认 request-visible
|
|
180
|
+
schema 一致的工具有:
|
|
107
181
|
|
|
108
|
-
- `exec_command`
|
|
109
182
|
- `write_stdin`
|
|
110
183
|
- `update_plan`
|
|
111
|
-
- `request_user_input`
|
|
112
184
|
- `apply_patch`
|
|
113
185
|
- `web_search`
|
|
114
186
|
- `view_image`
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
- `
|
|
119
|
-
|
|
187
|
+
|
|
188
|
+
仍需分层解释的差异:
|
|
189
|
+
|
|
190
|
+
- `exec_command`:`pycodex` 刻意不暴露 upstream 的
|
|
191
|
+
`sandbox_permissions` / `justification` / `prefix_rule`,因为当前实现明确跳过
|
|
192
|
+
approval/sandbox escalation 逻辑;description 还额外提示本地差异:长任务可以先回复
|
|
193
|
+
用户,任务完成时 agent 会被 invoke 来继续处理;其余参数和运行时默认/范围约束按类内
|
|
194
|
+
schema 对齐。
|
|
195
|
+
- `request_user_input`:`pycodex` 按 upstream source main 建模,带
|
|
196
|
+
`autoResolutionMs`;installed `codex-cli 0.138.0` 的 live capture 仍未带该字段。
|
|
197
|
+
- `spawn_agent` / `send_input` / `resume_agent` / `wait_agent` /
|
|
198
|
+
`close_agent`:upstream 当前首轮 request 不再平铺暴露这些工具,而是暴露
|
|
199
|
+
`tool_search`,并由 deferred discovery 加载 Multi-agent tools。`pycodex` 仍在首轮
|
|
200
|
+
request 平铺暴露 legacy sub-agent tools。
|
|
201
|
+
- upstream 当前还额外暴露 `get_goal` / `create_goal` / `update_goal`;`pycodex`
|
|
202
|
+
尚未实现 goal tools。
|
|
120
203
|
|
|
121
204
|
同一条被抓到的路径下,当前 upstream Codex 和 `pycodex` 都没有暴露这些工具:
|
|
122
205
|
|
|
@@ -131,6 +214,7 @@ The main remaining deltas are now outside the prompt dump itself:
|
|
|
131
214
|
|
|
132
215
|
这里的结论只针对当前被抓到的默认 `codex-tui` request path;它不等价于说这些
|
|
133
216
|
工具在上游全局不存在,只说明这次实际 context capture 没把它们带进首轮请求。
|
|
217
|
+
这些工具的类内 description/schema 状态见下面的 per-tool 表。
|
|
134
218
|
|
|
135
219
|
## Tool-call / tool-result schema compare
|
|
136
220
|
|
|
@@ -185,6 +269,8 @@ The main remaining deltas are now outside the prompt dump itself:
|
|
|
185
269
|
- Plan-mode happy path 现在也已按 upstream 源码建模:handler 会要求每个问题都带
|
|
186
270
|
非空 `options`、自动给每个问题补 `isOther=true`,并把结构化答案序列化成
|
|
187
271
|
JSON 字符串回传到下一轮 `function_call_output.output`,同时补 `success=true`
|
|
272
|
+
- 类内 schema 已补齐 upstream 最新的 `autoResolutionMs` 字段;runtime 会把非空值
|
|
273
|
+
clamp 到 `[60000, 240000]` 后交给交互层
|
|
188
274
|
- 当前仓库已经新增 deterministic proxy compare 脚本
|
|
189
275
|
`uv run python tests/compare_request_user_input_roundtrip.py`
|
|
190
276
|
- 该脚本会用同一套固定 origin SSE + proxy capture,同步比较 upstream Codex
|
|
@@ -200,7 +286,9 @@ The main remaining deltas are now outside the prompt dump itself:
|
|
|
200
286
|
- `function_call` item schema 一致
|
|
201
287
|
- 下一轮里的 `function_call_output` schema 一致
|
|
202
288
|
- 当前样本里,两边都会把 tool result 回传成同一个 `input_image` 列表,
|
|
203
|
-
`image_url` data URL
|
|
289
|
+
`image_url` data URL 也一致
|
|
290
|
+
- 类内 schema/runtime 已补齐 upstream 最新的 `detail` 参数:省略时按 `high`
|
|
291
|
+
返回,显式 `original` 时保留并回传到 `input_image.detail`
|
|
204
292
|
- `spawn_agent`
|
|
205
293
|
- 当前先补齐了一个最小 validation-path:当模型在没有 `message` / `items` 的情况下
|
|
206
294
|
强制调用 `spawn_agent` 时,upstream Codex 和 `pycodex` 现在都会回传同一个固定错误:
|
|
@@ -209,6 +297,8 @@ The main remaining deltas are now outside the prompt dump itself:
|
|
|
209
297
|
`agent_id` / `nickname`
|
|
210
298
|
- 当前 `pycodex` 也已经改成 uuid7 agent id,并接上了与 upstream 同一批候选名的
|
|
211
299
|
默认昵称池;剩余差异主要只在具体抽到哪个昵称这类动态值
|
|
300
|
+
- 类内 description 已刷新到 upstream 最新方向:spawned agents 默认继承当前模型,
|
|
301
|
+
不再在 tool desc 里硬编码模型 picker 列表
|
|
212
302
|
- `send_input`
|
|
213
303
|
- `function_call` item schema 一致
|
|
214
304
|
- 下一轮里的 `function_call_output` schema 一致
|
|
@@ -221,8 +311,8 @@ The main remaining deltas are now outside the prompt dump itself:
|
|
|
221
311
|
- `close_agent`
|
|
222
312
|
- `function_call` item schema 一致
|
|
223
313
|
- 下一轮里的 `function_call_output` schema 一致
|
|
224
|
-
-
|
|
225
|
-
|
|
314
|
+
- upstream 最新源码里的输出键名是 `previous_status`;当前仓库的 schema/runtime 已
|
|
315
|
+
回到 `previous_status`
|
|
226
316
|
- `resume_agent`
|
|
227
317
|
- 真实 happy path 已补抓:子 agent 完成、`close_agent`、`resume_agent`、再
|
|
228
318
|
`send_input` 的完整链路现在已经对齐
|
|
@@ -236,8 +326,8 @@ The main remaining deltas are now outside the prompt dump itself:
|
|
|
236
326
|
- request body 里的 `prompt_cache_key` 现在也改成和 upstream 一样:
|
|
237
327
|
parent thread 维持自己的稳定 session id,而 sub-agent thread 则改用
|
|
238
328
|
`agent_id` 本身,不再错误复用 parent 的 cache key
|
|
239
|
-
- 这 6 个 sub-agent tool schema
|
|
240
|
-
|
|
329
|
+
- 这 6 个 sub-agent tool schema 现在来自类内 `BaseTool` spec,并由 CLI
|
|
330
|
+
serialization 测试覆盖;`pycodex/prompts/subagent_tools.json` 已删除
|
|
241
331
|
- `sub-agent notification`
|
|
242
332
|
- 在 `wait_agent` 之后,upstream 会向 parent thread history 额外注入一条
|
|
243
333
|
`user` message:
|
|
@@ -271,30 +361,37 @@ The main remaining deltas are now outside the prompt dump itself:
|
|
|
271
361
|
- `not exposed`:在当前默认 `codex-tui` 首轮 request path 下两边都没把这个工具带进 `tools`
|
|
272
362
|
- `first-request same`:首轮 `tools` schema 已确认一致
|
|
273
363
|
- `round-trip same`:真实触发后的 `tool_call` / `tool_result` 外层 schema 已确认一致
|
|
364
|
+
- `class aligned`:类内 description/schema/runtime 已按当前 upstream-facing spec 刷新,
|
|
365
|
+
且不再依赖 raw JSON fallback
|
|
366
|
+
- `local shim`:本地工具有实现和 smoke 覆盖,但当前 upstream 默认 CLI 抓包没有同名
|
|
367
|
+
official model-visible tool 可直接逐字节对齐
|
|
368
|
+
- `legacy-flat mismatch`:`pycodex` 首轮仍直接暴露 legacy flat tool;upstream 首轮
|
|
369
|
+
已迁移到 `tool_search` / deferred discovery
|
|
274
370
|
- `pending`:这条工具链还没有补完真实触发对比
|
|
275
371
|
|
|
276
|
-
| tool |
|
|
277
|
-
|
|
278
|
-
| `shell` | `not exposed` |
|
|
279
|
-
| `shell_command` | `not exposed` |
|
|
280
|
-
| `exec_command` | `round-trip same` | `function_call` / `function_call_output` 外层 shape 一致;默认 `10_000` token 截断和未读输出 `1 MiB` head/tail cap
|
|
281
|
-
| `write_stdin` | `round-trip same` | `function_call` / `function_call_output` 外层 shape 一致;默认 `10_000` token 截断和未读输出 `1 MiB` head/tail cap
|
|
282
|
-
| `exec` | `not exposed` |
|
|
283
|
-
| `wait` | `not exposed` |
|
|
284
|
-
| `web_search` | `round-trip same` | `web_search_call` shape 一致;provider-native tool 无单独客户端 `tool_result` |
|
|
285
|
-
| `update_plan` | `round-trip same` | `function_call` / `function_call_output` 外层 shape 一致 |
|
|
286
|
-
| `request_user_input` | `round-trip same (Default mode); Plan mode
|
|
287
|
-
| `request_permissions` | `not exposed` |
|
|
288
|
-
| `apply_patch` | `round-trip same` | `custom_tool_call` / `custom_tool_call_output` 外层 shape
|
|
289
|
-
| `grep_files` | `not exposed` |
|
|
290
|
-
| `read_file` | `not exposed` |
|
|
291
|
-
| `list_dir` | `not exposed` |
|
|
292
|
-
| `view_image` | `round-trip same` | `function_call` / `function_call_output` 外层 shape
|
|
293
|
-
| `spawn_agent` | `round-trip same` | validation-path 与 happy-path
|
|
294
|
-
| `send_input` | `round-trip same` | `
|
|
295
|
-
| `resume_agent` | `round-trip same` |
|
|
296
|
-
| `wait_agent` | `round-trip same` | `
|
|
297
|
-
| `close_agent` | `round-trip same` | `
|
|
372
|
+
| tool | request-visible status | class/runtime status | note |
|
|
373
|
+
|---|---|---|---|
|
|
374
|
+
| `shell` | `not exposed` | `local shim` | 默认 `codex-tui` 首轮路径不带这个工具;类内 argv 执行语义和 schema 已整理,但没有当前 upstream 默认 CLI 同名 payload 可逐字节对齐 |
|
|
375
|
+
| `shell_command` | `not exposed` | `class aligned` | 默认首轮路径不带;类内 desc/schema 已刷新为 shell-string command 语义 |
|
|
376
|
+
| `exec_command` | `intentional approval-field/description delta; round-trip same` | `class aligned except skipped auth + local idle resume` | 删除 fallback 后不再暴露 `sandbox_permissions` / `justification` / `prefix_rule`,这是 pycodex 刻意跳过鉴权逻辑的差异;description 额外提示长任务可以先回复用户,任务完成时 agent 会被 invoke 来继续处理;其余参数按 schema 执行,`function_call` / `function_call_output` 外层 shape 一致;默认 `10_000` token 截断和未读输出 `1 MiB` head/tail cap 已补齐,仅剩动态值差异 |
|
|
377
|
+
| `write_stdin` | `first-request same; round-trip same` | `class aligned` | 删除 fallback 后首轮 schema 相等;`function_call` / `function_call_output` 外层 shape 一致;默认 `10_000` token 截断和未读输出 `1 MiB` head/tail cap 已补齐,仅剩动态值差异 |
|
|
378
|
+
| `exec` | `not exposed` | `class aligned` | 默认首轮路径不带;code-mode custom/freeform desc 和 grammar 已刷新,仍需 code-mode request-visible 抓包复测 |
|
|
379
|
+
| `wait` | `not exposed` | `class aligned` | 默认首轮路径不带;code-mode wait schema/runtime 已刷新,仍需 code-mode request-visible 抓包复测 |
|
|
380
|
+
| `web_search` | `first-request same; round-trip same` | `class aligned` | 删除 fallback 后 provider-native payload 相等,包含 `search_content_types=["text","image"]`;`web_search_call` shape 一致;provider-native tool 无单独客户端 `tool_result` |
|
|
381
|
+
| `update_plan` | `first-request same; round-trip same` | `class aligned` | 删除 fallback 后首轮 schema 相等;`function_call` / `function_call_output` 外层 shape 一致 |
|
|
382
|
+
| `request_user_input` | `round-trip same (Default mode); Plan mode source-aligned` | `class aligned` | Default-mode unavailable 路径已 capture 对齐;Plan-mode 按 upstream main 建模,包含 `success=true` 和 `autoResolutionMs` clamp;本机 installed `codex-cli 0.115.0` live capture 仍少 `success=true` |
|
|
383
|
+
| `request_permissions` | `not exposed` | `class aligned` | 默认首轮路径不带;类内 desc/schema 已补 `environment_id` passthrough,交互 handler 仍是最小实现 |
|
|
384
|
+
| `apply_patch` | `first-request same; round-trip same` | `class aligned` | 删除 fallback 后 custom grammar 相等;`custom_tool_call` / `custom_tool_call_output` 外层 shape 一致;输出包装已对齐,仅剩具体文件路径差异 |
|
|
385
|
+
| `grep_files` | `not exposed` | `local shim` | 默认首轮路径不带;本地文件搜索 helper 有 schema/smoke,但当前 upstream 默认 CLI 没有同名 official payload 可直接对齐 |
|
|
386
|
+
| `read_file` | `not exposed` | `local shim` | 默认首轮路径不带;本地 slice/indentation 读文件 helper 有 schema/smoke,但当前 upstream 默认 CLI 没有同名 official payload 可直接对齐 |
|
|
387
|
+
| `list_dir` | `not exposed` | `local shim` | 默认首轮路径不带;本地目录树 helper 有 schema/smoke,但当前 upstream 默认 CLI 没有同名 official payload 可直接对齐 |
|
|
388
|
+
| `view_image` | `first-request same; round-trip same` | `class aligned` | 删除 fallback 后首轮 schema 相等;`function_call` / `function_call_output` 外层 shape 一致;类内已支持 `detail=high|original`,默认回传 `high` |
|
|
389
|
+
| `spawn_agent` | `legacy-flat mismatch; round-trip same` | `class aligned` | pycodex 首轮仍平铺暴露此工具,upstream 首轮改用 `tool_search`;历史 validation-path 与 happy-path 已补抓;类内 desc 已去掉硬编码模型 picker |
|
|
390
|
+
| `send_input` | `legacy-flat mismatch; round-trip same` | `class aligned` | pycodex 首轮仍平铺暴露此工具,upstream 首轮改用 `tool_search`;历史 round-trip 外层 shape 一致,仅剩动态 `submission_id` |
|
|
391
|
+
| `resume_agent` | `legacy-flat mismatch; round-trip same` | `class aligned` | pycodex 首轮仍平铺暴露此工具,upstream 首轮改用 `tool_search`;已补抓真实 happy path;`pending_init` 返回值、sub-agent tool 子集、sub-agent context 都已对齐 |
|
|
392
|
+
| `wait_agent` | `legacy-flat mismatch; round-trip same` | `class aligned` | pycodex 首轮仍平铺暴露此工具,upstream 首轮改用 `tool_search`;历史 round-trip 外层 shape 一致,仅剩动态 agent id |
|
|
393
|
+
| `close_agent` | `legacy-flat mismatch; round-trip same` | `class aligned` | pycodex 首轮仍平铺暴露此工具,upstream 首轮改用 `tool_search`;schema/runtime 输出为 upstream 当前的 `previous_status`;parent-thread notification message 也已补齐 |
|
|
394
|
+
| `ipython` | `not in default registry` | `local shim` | 这是可选 IPython attach helper,不属于默认 CLI tool 集合;当前没有 upstream default CLI 同名 payload 对齐目标 |
|
|
298
395
|
|
|
299
396
|
### Redacted example: request-level diff categories
|
|
300
397
|
|
|
@@ -308,12 +405,20 @@ same:
|
|
|
308
405
|
- exec-mode tool subset membership
|
|
309
406
|
- request context field presence
|
|
310
407
|
- exec-mode tool schemas
|
|
408
|
+
- current default-path schemas for `write_stdin`, `web_search`, `update_plan`,
|
|
409
|
+
`apply_patch`, and `view_image`
|
|
311
410
|
- user-agent semantics and compared string
|
|
312
411
|
|
|
313
412
|
different:
|
|
314
413
|
- dynamic request metadata values
|
|
414
|
+
- intentional `exec_command` approval/sandbox field omission and idle-resume
|
|
415
|
+
description in pycodex
|
|
315
416
|
- transport-layer header casing / normalization
|
|
316
|
-
- paths and modes not yet aligned beyond
|
|
417
|
+
- paths and modes not yet aligned beyond captured `exec` / default TUI paths
|
|
418
|
+
- installed upstream `request_user_input` lacks source-main `autoResolutionMs`
|
|
419
|
+
- upstream default-path tool discovery now exposes `tool_search` instead of
|
|
420
|
+
legacy flat sub-agent tools
|
|
421
|
+
- upstream default-path goal tools are not implemented locally yet
|
|
317
422
|
```
|
|
318
423
|
|
|
319
424
|
## Redacted examples
|
|
@@ -457,7 +562,6 @@ Vendored upstream prompt data:
|
|
|
457
562
|
- `pycodex/prompts/models.json`
|
|
458
563
|
- `pycodex/prompts/permissions/sandbox_mode/`
|
|
459
564
|
- `pycodex/prompts/permissions/approval_policy/`
|
|
460
|
-
- `pycodex/prompts/exec_tools.json`
|
|
461
565
|
|
|
462
566
|
Tests:
|
|
463
567
|
|
|
@@ -467,16 +571,31 @@ Tests:
|
|
|
467
571
|
|
|
468
572
|
## What is still out of scope here
|
|
469
573
|
|
|
470
|
-
Prompt parity is not the same thing as full
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
-
|
|
478
|
-
|
|
479
|
-
|
|
574
|
+
Prompt/tool-schema parity is not the same thing as full Codex parity.
|
|
575
|
+
|
|
576
|
+
The remaining explicit alignment work is:
|
|
577
|
+
|
|
578
|
+
- Implement upstream's current default-path `tool_search` / deferred multi-agent
|
|
579
|
+
discovery, or explicitly decide to keep legacy flat agent tools as a
|
|
580
|
+
compatibility surface.
|
|
581
|
+
- Add goal tools (`get_goal`, `create_goal`, `update_goal`) if the local CLI is
|
|
582
|
+
intended to match the current upstream default tool set.
|
|
583
|
+
- Capture/compare code-mode request-visible payloads for `exec` / `wait`, not
|
|
584
|
+
only their class-level schemas and smoke behavior.
|
|
585
|
+
- Decide whether local helper tools (`shell`, `grep_files`, `read_file`,
|
|
586
|
+
`list_dir`, optional `ipython`) are intended to stay as local shims or should
|
|
587
|
+
be replaced/renamed as upstream evolves.
|
|
588
|
+
- Broaden runtime parity beyond the currently aligned non-interactive `exec`
|
|
589
|
+
path and captured default two-turn TUI path.
|
|
590
|
+
- App-server `AdditionalTools` handling from upstream `6e0c8b4` is not
|
|
591
|
+
implemented locally; this pass only verified it is not a builtin CLI tool spec
|
|
592
|
+
change for the local path.
|
|
593
|
+
- Newer multi-agent v2-style tools such as `send_message`, `followup_task`,
|
|
594
|
+
`interrupt_agent`, and `list_agents` are not implemented in this local tool
|
|
595
|
+
registry yet.
|
|
596
|
+
- Broader runtime features such as sandbox/approval enforcement, WebSocket/HTTP
|
|
597
|
+
transport fallback, cancellation markers, MCP/connectors/plugins, memory, and
|
|
598
|
+
review flows remain partial or out of scope for this pass.
|
|
480
599
|
|
|
481
600
|
## Steer semantics
|
|
482
601
|
|
|
@@ -228,7 +228,7 @@ A skill is a set of local instructions to follow that is stored in a `SKILL.md`
|
|
|
228
228
|
- `permissions` prompt 的来源目录不同:Codex 从 `codex-rs/protocol/src/prompts/permissions/...` 取,`pycodex` 从 `./pycodex/prompts/permissions/...` 取。
|
|
229
229
|
- `collaboration_mode` block 的来源不同:Codex 用上游协作提示模板,`pycodex` 用 `./pycodex/prompts/collaboration_default.md` / `./pycodex/prompts/collaboration_plan.md`。
|
|
230
230
|
- `skills guidance` 的来源不同:Codex 用上游固定 guidance,`pycodex` 用 `./pycodex/context.py::SKILLS_GUIDANCE`。
|
|
231
|
-
- `tools` 的构造来源不同:Codex 从上游 runtime tool registry 出来,`pycodex`
|
|
231
|
+
- `tools` 的构造来源不同:Codex 从上游 runtime tool registry 出来,`pycodex` 从本地 `BaseTool` class specs 经 `ToolSpec.serialize()` 出来。
|
|
232
232
|
|
|
233
233
|
### 1.3 首轮请求不变量
|
|
234
234
|
|
|
@@ -462,5 +462,5 @@ ProviderBuiltinToolSchema = {
|
|
|
462
462
|
当前实现方式:
|
|
463
463
|
|
|
464
464
|
- 不再使用 prompt 级别的 `serialized_tools` override。
|
|
465
|
-
-
|
|
466
|
-
-
|
|
465
|
+
- 不再使用 `pycodex/prompts/exec_tools.json` 这类 raw JSON fallback。
|
|
466
|
+
- 在工具类内维护 description / input schema;`ToolSpec.serialize()` 负责生成 request-visible payload。
|
|
@@ -17,7 +17,8 @@ from .protocol import (
|
|
|
17
17
|
TurnResult,
|
|
18
18
|
UserMessage,
|
|
19
19
|
)
|
|
20
|
-
from .tools import ToolContext, ToolRegistry
|
|
20
|
+
from .tools import ExecCommandTool, ToolContext, ToolRegistry, UnifiedExecManager
|
|
21
|
+
from .utils.truncation import truncate_tool_results_for_history
|
|
21
22
|
from .utils import uuid7_string
|
|
22
23
|
import typing
|
|
23
24
|
|
|
@@ -46,6 +47,7 @@ _CONTEXT_LENGTH_ERROR_MARKERS = (
|
|
|
46
47
|
"exceeds the context window",
|
|
47
48
|
"exceeded the context window",
|
|
48
49
|
)
|
|
50
|
+
TERMINAL_TURN_EVENTS = {"turn_completed", "turn_failed", "turn_interrupted"}
|
|
49
51
|
|
|
50
52
|
|
|
51
53
|
class TurnInterrupted(RuntimeError):
|
|
@@ -85,6 +87,15 @@ class Agent:
|
|
|
85
87
|
self._last_total_usage_tokens: 'typing.Union[int, None]' = None
|
|
86
88
|
self.runtime_environment = runtime_environment
|
|
87
89
|
self.interrupt_asap = False
|
|
90
|
+
self._turn_running = False
|
|
91
|
+
exec_command_tool = self._tool_registry.get_tool("exec_command")
|
|
92
|
+
self._exec_manager = (
|
|
93
|
+
exec_command_tool._manager
|
|
94
|
+
if isinstance(exec_command_tool, ExecCommandTool)
|
|
95
|
+
else None
|
|
96
|
+
)
|
|
97
|
+
if self._exec_manager is not None:
|
|
98
|
+
self._exec_manager.set_notify_hook(self.maybe_invoke)
|
|
88
99
|
|
|
89
100
|
@property
|
|
90
101
|
def history(self) -> 'typing.Tuple[ConversationItem, ...]':
|
|
@@ -129,6 +140,7 @@ class Agent:
|
|
|
129
140
|
async def run_turn(
|
|
130
141
|
self, texts: 'typing.List[str]', turn_id: 'typing.Union[str, None]' = None
|
|
131
142
|
) -> 'TurnResult':
|
|
143
|
+
self._turn_running = True
|
|
132
144
|
turn_id = turn_id or uuid7_string()
|
|
133
145
|
self.interrupt_asap = False
|
|
134
146
|
new_user_messages = [UserMessage(text=text) for text in texts]
|
|
@@ -168,16 +180,10 @@ class Agent:
|
|
|
168
180
|
item_count=len(response.items),
|
|
169
181
|
)
|
|
170
182
|
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
persisted_response_items.append(item)
|
|
176
|
-
if isinstance(item, AssistantMessage):
|
|
177
|
-
last_assistant_message = item.text
|
|
178
|
-
elif isinstance(item, ToolCall):
|
|
179
|
-
tool_calls.append(item)
|
|
180
|
-
self._persist_history_items(persisted_response_items)
|
|
183
|
+
recorded_items = self._record_model_response_items(response.items)
|
|
184
|
+
tool_calls = recorded_items[1]
|
|
185
|
+
if recorded_items[2] is not None:
|
|
186
|
+
last_assistant_message = recorded_items[2]
|
|
181
187
|
|
|
182
188
|
if not tool_calls:
|
|
183
189
|
self._raise_if_interrupt_requested(
|
|
@@ -191,6 +197,7 @@ class Agent:
|
|
|
191
197
|
iteration=iteration,
|
|
192
198
|
output_text=last_assistant_message,
|
|
193
199
|
)
|
|
200
|
+
self._turn_running = False
|
|
194
201
|
return TurnResult(
|
|
195
202
|
turn_id=turn_id,
|
|
196
203
|
output_text=last_assistant_message,
|
|
@@ -200,6 +207,7 @@ class Agent:
|
|
|
200
207
|
)
|
|
201
208
|
|
|
202
209
|
tool_results = await self._execute_tool_batch(turn_id, tool_calls)
|
|
210
|
+
tool_results = truncate_tool_results_for_history(tool_results)
|
|
203
211
|
self._history.extend(tool_results)
|
|
204
212
|
self._persist_history_items(tool_results)
|
|
205
213
|
follow_up_messages = self._build_follow_up_messages(tool_results)
|
|
@@ -211,6 +219,10 @@ class Agent:
|
|
|
211
219
|
output_text=last_assistant_message,
|
|
212
220
|
)
|
|
213
221
|
except TurnInterrupted:
|
|
222
|
+
self._turn_running = False
|
|
223
|
+
raise
|
|
224
|
+
except asyncio.CancelledError:
|
|
225
|
+
self._turn_running = False
|
|
214
226
|
raise
|
|
215
227
|
except Exception as exc:
|
|
216
228
|
context_usage = _usage_from_context_length_error(str(exc))
|
|
@@ -224,8 +236,29 @@ class Agent:
|
|
|
224
236
|
error=str(exc),
|
|
225
237
|
error_type=type(exc).__name__,
|
|
226
238
|
)
|
|
239
|
+
self._turn_running = False
|
|
227
240
|
raise
|
|
228
241
|
|
|
242
|
+
async def maybe_invoke(self, event: 'typing.Dict[str, object]') -> 'bool':
|
|
243
|
+
if self._turn_running or event.get("type") != "exec_command_completed":
|
|
244
|
+
return False
|
|
245
|
+
payload = {
|
|
246
|
+
"session_id": event.get("session_id"),
|
|
247
|
+
"exit_code": event.get("exit_code"),
|
|
248
|
+
"command": event.get("command"),
|
|
249
|
+
}
|
|
250
|
+
text = (
|
|
251
|
+
"<exec_command_completed>\n"
|
|
252
|
+
f"{json.dumps(payload, ensure_ascii=False, separators=(',', ':'))}\n"
|
|
253
|
+
"</exec_command_completed>"
|
|
254
|
+
)
|
|
255
|
+
self._turn_running = True
|
|
256
|
+
task = asyncio.create_task(self.run_turn([text]))
|
|
257
|
+
task.add_done_callback(
|
|
258
|
+
lambda task: None if task.cancelled() else task.exception()
|
|
259
|
+
)
|
|
260
|
+
return True
|
|
261
|
+
|
|
229
262
|
async def _execute_tool_batch(
|
|
230
263
|
self,
|
|
231
264
|
turn_id: 'str',
|
|
@@ -294,10 +327,18 @@ class Agent:
|
|
|
294
327
|
return result
|
|
295
328
|
|
|
296
329
|
def _emit(self, kind: 'str', turn_id: 'str', **payload: 'object') -> 'None':
|
|
330
|
+
if kind in TERMINAL_TURN_EVENTS:
|
|
331
|
+
payload["background_exec_count"] = self._background_exec_count()
|
|
297
332
|
self._event_handler(
|
|
298
333
|
AgentEvent(kind=kind, turn_id=turn_id, payload=dict(payload))
|
|
299
334
|
)
|
|
300
335
|
|
|
336
|
+
def _background_exec_count(self) -> 'int':
|
|
337
|
+
manager: 'typing.Union[UnifiedExecManager, None]' = self._exec_manager
|
|
338
|
+
if manager is None:
|
|
339
|
+
return 0
|
|
340
|
+
return manager.running_session_count()
|
|
341
|
+
|
|
301
342
|
def _persist_history_items(
|
|
302
343
|
self,
|
|
303
344
|
items: 'typing.Iterable[ConversationItem]',
|
|
@@ -310,6 +351,25 @@ class Agent:
|
|
|
310
351
|
except Exception: # pragma: no cover - persistence should not break turns
|
|
311
352
|
return
|
|
312
353
|
|
|
354
|
+
def _record_model_response_items(
|
|
355
|
+
self,
|
|
356
|
+
items: 'typing.Iterable[object]',
|
|
357
|
+
) -> 'typing.Tuple[typing.Tuple[ConversationItem, ...], typing.List[ToolCall], typing.Union[str, None]]':
|
|
358
|
+
persisted_response_items: 'typing.List[ConversationItem]' = []
|
|
359
|
+
tool_calls: 'typing.List[ToolCall]' = []
|
|
360
|
+
last_assistant_message = None
|
|
361
|
+
for item in items:
|
|
362
|
+
if not isinstance(item, (AssistantMessage, ToolCall, ReasoningItem)):
|
|
363
|
+
continue
|
|
364
|
+
self._history.append(item)
|
|
365
|
+
persisted_response_items.append(item)
|
|
366
|
+
if isinstance(item, AssistantMessage):
|
|
367
|
+
last_assistant_message = item.text
|
|
368
|
+
elif isinstance(item, ToolCall):
|
|
369
|
+
tool_calls.append(item)
|
|
370
|
+
self._persist_history_items(persisted_response_items)
|
|
371
|
+
return tuple(persisted_response_items), tool_calls, last_assistant_message
|
|
372
|
+
|
|
313
373
|
def _handle_model_stream_event(self, turn_id: 'str', event: 'ModelStreamEvent') -> 'None':
|
|
314
374
|
if event.kind == "token_count":
|
|
315
375
|
self._remember_token_usage(event.payload.get("usage"))
|