python-codex 0.1.11__tar.gz → 0.1.13__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {python_codex-0.1.11 → python_codex-0.1.13}/.gitignore +1 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/AGENTS.md +5 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/PKG-INFO +15 -2
- {python_codex-0.1.11 → python_codex-0.1.13}/README.md +14 -1
- {python_codex-0.1.11 → python_codex-0.1.13}/docs/ALIGNMENT.md +1 -1
- {python_codex-0.1.11 → python_codex-0.1.13}/docs/responses_server/README.md +7 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/__init__.py +10 -8
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/agent.py +226 -21
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/cli.py +199 -145
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/compat.py +8 -4
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/context.py +16 -0
- python_codex-0.1.13/pycodex/feishu_card.py +693 -0
- python_codex-0.1.13/pycodex/feishu_link.py +342 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/model.py +102 -7
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/prompts/models.json +4 -4
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/protocol.py +17 -17
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/runtime.py +9 -14
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/runtime_services.py +45 -23
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/tools/apply_patch_tool.py +11 -12
- python_codex-0.1.13/pycodex/tools/ipython_tool.py +144 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/tools/unified_exec_manager.py +3 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/utils/__init__.py +2 -13
- python_codex-0.1.13/pycodex/utils/async_bridge.py +54 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/utils/compactor.py +96 -19
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/utils/session_persist.py +57 -38
- python_codex-0.1.13/pycodex/utils/toolcall_visualize.py +713 -0
- python_codex-0.1.13/pycodex/utils/visualize.py +534 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/pyproject.toml +1 -1
- {python_codex-0.1.11 → python_codex-0.1.13}/responses_server/app.py +7 -3
- {python_codex-0.1.11 → python_codex-0.1.13}/responses_server/stream_router.py +39 -1
- {python_codex-0.1.11 → python_codex-0.1.13}/tests/compare_request_user_input_roundtrip.py +114 -100
- {python_codex-0.1.11 → python_codex-0.1.13}/tests/responses_server/test_server.py +167 -0
- python_codex-0.1.13/tests/test_agent.py +857 -0
- python_codex-0.1.13/tests/test_async_bridge.py +38 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/tests/test_builtin_tools.py +183 -8
- {python_codex-0.1.11 → python_codex-0.1.13}/tests/test_cli.py +391 -285
- {python_codex-0.1.11 → python_codex-0.1.13}/tests/test_compactor.py +27 -1
- {python_codex-0.1.11 → python_codex-0.1.13}/tests/test_context.py +35 -4
- python_codex-0.1.13/tests/test_feishu_card.py +338 -0
- python_codex-0.1.13/tests/test_feishu_link.py +25 -0
- python_codex-0.1.13/tests/test_ipython_tool.py +121 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/tests/test_model.py +213 -0
- python_codex-0.1.13/tests/test_visualize.py +37 -0
- python_codex-0.1.13/tools/feishu_oauth.py +188 -0
- python_codex-0.1.11/pycodex/utils/visualize.py +0 -1119
- python_codex-0.1.11/tests/test_agent.py +0 -349
- {python_codex-0.1.11 → python_codex-0.1.13}/.github/workflows/publish.yml +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/.github/workflows/test.yml +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/LICENSE +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/README_ZH.md +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/docs/CONTEXT.md +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/collaboration.py +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/doctor.py +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/portable.py +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/portable_server.py +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/prompts/collaboration_default.md +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/prompts/collaboration_plan.md +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/prompts/default_base_instructions.md +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/prompts/exec_tools.json +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/prompts/permissions/approval_policy/never.md +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/prompts/permissions/approval_policy/on_failure.md +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/prompts/permissions/approval_policy/on_request.md +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/prompts/permissions/approval_policy/on_request_rule_request_permission.md +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/prompts/permissions/approval_policy/unless_trusted.md +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/prompts/permissions/sandbox_mode/danger_full_access.md +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/prompts/permissions/sandbox_mode/read_only.md +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/prompts/permissions/sandbox_mode/workspace_write.md +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/prompts/subagent_tools.json +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/tools/__init__.py +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/tools/agent_tool_schemas.py +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/tools/base_tool.py +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/tools/close_agent_tool.py +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/tools/code_mode_manager.py +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/tools/exec_command_tool.py +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/tools/exec_runtime.js +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/tools/exec_tool.py +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/tools/grep_files_tool.py +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/tools/list_dir_tool.py +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/tools/read_file_tool.py +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/tools/request_permissions_tool.py +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/tools/request_user_input_tool.py +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/tools/resume_agent_tool.py +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/tools/send_input_tool.py +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/tools/shell_command_tool.py +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/tools/shell_tool.py +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/tools/spawn_agent_tool.py +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/tools/update_plan_tool.py +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/tools/view_image_tool.py +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/tools/wait_agent_tool.py +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/tools/wait_tool.py +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/tools/web_search_tool.py +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/tools/write_stdin_tool.py +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/utils/debug.py +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/utils/dotenv.py +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/utils/get_env.py +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/pycodex/utils/random_ids.py +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/responses_server/__init__.py +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/responses_server/__main__.py +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/responses_server/config.py +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/responses_server/messages_api.py +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/responses_server/payload_processors.py +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/responses_server/server.py +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/responses_server/session_store.py +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/responses_server/tools/__init__.py +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/responses_server/tools/custom_adapter.py +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/responses_server/tools/web_search.py +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/responses_server/trajectory_dump.py +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/tests/TESTS.md +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/tests/__init__.py +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/tests/compare_steer_request_bodies.py +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/tests/compare_tool_schemas.py +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/tests/fake_responses_server.py +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/tests/fakes.py +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/tests/responses_server/fake_chat_completions_server.py +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/tests/test_doctor.py +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/tests/test_fake_responses_server.py +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/tests/test_portable.py +0 -0
- {python_codex-0.1.11 → python_codex-0.1.13}/tests/test_py36_syntax.py +0 -0
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
- 现在 `ResponsesModelClient` 默认会对流式断连做 provider 级自动重试(`stream_max_retries` 默认 5);写 CLI/REPL 测试时如果断言“先向用户报错,再靠下一句 `go on` 继续”,必须在测试 provider 配置里显式设 `stream_max_retries = 0`,否则测试可能一直等不到预期错误而卡住。
|
|
13
13
|
- `responses_server` compat 层应透传请求里的 `model`;不要再做 “取 downstream /models 第一个 id 并强制覆盖请求模型” 这种兜底兼容。
|
|
14
14
|
- 对 `model_provider = "vllm"`,`responses_server` 仍然走 `/v1/chat/completions` compat 路径,但要保留 reasoning:把 chat chunk 里的 `reasoning` / `reasoning_content` 翻回 Responses `reasoning` item,并把历史里的 Responses `reasoning` item 回放成下游 assistant message 的 `reasoning` 字段。
|
|
15
|
+
- `responses_server` 不能把 terminal reasoning-only chat output 当成成功回复:如果 downstream 一轮结束时只返回 `reasoning` / `reasoning_content`,没有 assistant `content` 且没有 tool call,先丢弃本次 partial reasoning 并用原样 downstream request 静默重试一次;若仍然 reasoning-only,再发 `response.failed(type=model_output_invalid)`,避免把 partial reasoning 写进 rollout 后在下一轮变成 chat 后端拒绝的裸 assistant message。
|
|
15
16
|
- `responses_server` 的 provider-specific chat payload 定制统一放在 `responses_server/payload_processors.py`:使用 `CompatServerConfig.model_provider` 选择 `provider_name -> proc_fn(outcomming_request)` 映射,并且只在真正发出 downstream `/v1/chat/completions` 前 post-process;`StreamRouter` 内部继续保留 canonical payload,避免 tool hydration / mock web_search follow-up 被 provider 改写污染。
|
|
16
17
|
- `responses_server` 如果要兼容下游 `/v1/messages`,也优先保持这条边界:内部继续用 canonical chat request / chat-like chunk 流,只有真正发请求和读取 SSE 时才做 messages 适配,这样 tool hydration、mock `web_search` follow-up、provider payload post-process 都能复用。
|
|
17
18
|
- 真实 vLLM `0.19.0` 的 `/v1/messages` 会对缺失 `max_tokens` 直接返回 `400`;messages 适配层必须总是补这个字段。当前约定是优先透传请求里的 `max_output_tokens`/`max_tokens`,否则回退到默认 `32000`。
|
|
@@ -22,6 +23,7 @@
|
|
|
22
23
|
- 对需要 model-specific prompt 的本地 model slug,直接在 vendored `pycodex/prompts/models.json` 补条目;当前 `step-3.5-flash` / `step-3.5-flash-2603` / `step-3.6` 已按这个方式接入。
|
|
23
24
|
- 交互 REPL 的 context 用量提示也应尽量贴近上游语义:展示“剩余 context 百分比”而不是原始 token 数;计算时按上游同款 `BASELINE_TOKENS=12000` 做归一化,并在模型元数据只有 `context_window` 时默认按 `95%` effective window 处理。只要当前模型能解析出 context window,初始 prompt 就先显示 `100%`,等首个 usage 回来后再刷新成真实值。
|
|
24
25
|
- 对交互 REPL 的 context 指示器,`model_context_window` 的取值优先级也要贴近上游:先吃 `config.toml` / profile 里的 `model_context_window` override,再回退到 vendored `models.json` 的 `context_window`;effective percent 继续沿用模型元数据,没有时默认 `95%`。
|
|
26
|
+
- `pyco(<percent>)` 正常只来自模型流里最近一次 `response.completed.response.usage.total_tokens`;如果大 tool output 之后的下一次请求被下游 `context_length_exceeded` 拒绝,rollout 不会单独记录 usage。遇到这类错误时应从错误文案的 `requested ... tokens (... in the messages, ... in the completion)` 提取真实请求 token,作为失败请求的 `token_count` 事件回灌,并立即触发 compact 后重试一次。若服务端只返回 `Your input exceeds the context window...` 这类无 token 数的 `response.failed`,仍应触发 compact+retry,只是不要伪造 `token_count`。若 compact 请求本身也超长,先循环删除最旧的 `ToolResult` 及其配对 `ToolCall` 再重试 compact。
|
|
25
27
|
- `AgentLoop` 的 turn-loop 语义要跟上游 `codex-rs/core/src/codex.rs` 一致:按 follow-up / tool handoff 自然收敛,不要加固定 12 轮之类的 hard cap,也不要保留本地专用的 iteration-limit 参数。
|
|
26
28
|
- `README.md` 和 `docs/` 属于对齐工作的一部分:只要实现状态、对齐结论或使用方式发生实质变化,就应及时更新,不要让文档滞后于当前代码。
|
|
27
29
|
- 新工具必须继承 `BaseTool`,然后通过 `ToolRegistry.register(tool_instance)` 接入;不要再给 registry 传散装 name/description/handler 参数。
|
|
@@ -54,3 +56,6 @@
|
|
|
54
56
|
- `--call` / portable storage paths must not rely on the process default text encoding. Always pass `encoding="utf-8"` when reading config, prompts, AGENTS files, skills, dotenv, and session history; for user-authored instructions/history, prefer `errors="replace"` so a Windows GBK locale cannot crash on UTF-8 punctuation such as U+2264 or em dash.
|
|
55
57
|
- 对接真实 `~/.codex/sessions/.../rollout-*.jsonl` 时,不要假设它一定是严格的一行一个 JSON object:本机样本可能包含 pretty-printed 多行对象,且文件尾部偶尔带未完成记录。恢复历史时用 concatenated-JSON 方式读取,并容忍尾部残缺。
|
|
56
58
|
- `pycodex` 本地 session 保存现在也按上游思路走:新 session 一开始就分配稳定的 uuidv7 thread/session id,并把历史增量追加到 `CODEX_HOME/sessions/.../rollout-*.jsonl`;`/resume` 列表应只展示至少有真实 user message 的 rollout,避免空白新 session 污染恢复列表。
|
|
59
|
+
- auto-compact 对齐上游配置名 `model_auto_compact_token_limit`;为空时关闭,触发依据是最近一次模型上报的 `usage.total_tokens`,pre-turn 压缩上一轮历史,mid-turn 压缩工具 follow-up 前的当前历史,并继续复用现有 compacted rollout 记录。
|
|
60
|
+
- Responses streaming 里的 `response.incomplete` 不是连接断开:不要让 `ResponsesModelClient` 把它当 retryable incomplete stream 反复重连。普通 turn 应明确报 `response.incomplete`;compact 请求如果已经收到 assistant partial summary,可以用这个 partial summary 完成 replacement history,避免 midturn auto-compact 卡在 5 次 retry。
|
|
61
|
+
- Feishu card tests read `~/.codex/.feishu_refresh_token` through production code; when running `tests/test_feishu_card.py` locally, isolate HOME (for example `HOME=/tmp/pycodex-empty-home env -u VIRTUAL_ENV uv run pytest tests/test_feishu_card.py tests/test_feishu_link.py`) unless the test itself controls `HOME`.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: python-codex
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.13
|
|
4
4
|
Summary: A minimal Python extraction of Codex's main agent loop
|
|
5
5
|
License-File: LICENSE
|
|
6
6
|
Requires-Python: >=3.6.2
|
|
@@ -185,6 +185,14 @@ Current behavior:
|
|
|
185
185
|
- `/compact` synthesizes a local handoff summary, replaces the in-memory
|
|
186
186
|
conversation history with the compacted view, and appends a compacted-history
|
|
187
187
|
entry to the rollout so later `/resume` sees the same state
|
|
188
|
+
- `model_auto_compact_token_limit = <tokens>` in `config.toml` enables the same
|
|
189
|
+
compaction path automatically when the latest reported usage reaches that
|
|
190
|
+
threshold before a follow-up sampling request or the next user turn
|
|
191
|
+
- if a model request fails with `context_length_exceeded`, pycodex now treats
|
|
192
|
+
the provider-reported requested token count as a failed-request usage sample,
|
|
193
|
+
triggers the same compact path immediately, and retries the request once; if
|
|
194
|
+
the compact request is also over the limit, it repeatedly drops the oldest
|
|
195
|
+
tool response plus its matching tool call before retrying compact
|
|
188
196
|
- new sessions are now recorded under `CODEX_HOME/sessions/.../rollout-*.jsonl`
|
|
189
197
|
with a stable session/thread id and per-item append+flush semantics so
|
|
190
198
|
`/resume` reads back the same rollout format
|
|
@@ -211,7 +219,12 @@ Current behavior:
|
|
|
211
219
|
`reasoning_content` are translated back into Responses `reasoning` items, and
|
|
212
220
|
historical `reasoning` items are replayed into downstream assistant messages
|
|
213
221
|
via the `reasoning` field. Streaming token usage is also requested from vLLM
|
|
214
|
-
and forwarded to the final `response.completed.response.usage
|
|
222
|
+
and forwarded to the final `response.completed.response.usage`. If a
|
|
223
|
+
downstream chat stream terminates after emitting only reasoning, with no
|
|
224
|
+
assistant content and no tool call, the compat layer discards that partial
|
|
225
|
+
reasoning, retries the same downstream request once, and only then emits
|
|
226
|
+
`response.failed` with `type = "model_output_invalid"` if the retry is still
|
|
227
|
+
reasoning-only
|
|
215
228
|
- standalone `responses_server` now also supports downstream `/v1/messages`
|
|
216
229
|
backends via `--outcomming-api messages`, while keeping the internal
|
|
217
230
|
canonical request/route logic in chat-completions shape
|
|
@@ -164,6 +164,14 @@ Current behavior:
|
|
|
164
164
|
- `/compact` synthesizes a local handoff summary, replaces the in-memory
|
|
165
165
|
conversation history with the compacted view, and appends a compacted-history
|
|
166
166
|
entry to the rollout so later `/resume` sees the same state
|
|
167
|
+
- `model_auto_compact_token_limit = <tokens>` in `config.toml` enables the same
|
|
168
|
+
compaction path automatically when the latest reported usage reaches that
|
|
169
|
+
threshold before a follow-up sampling request or the next user turn
|
|
170
|
+
- if a model request fails with `context_length_exceeded`, pycodex now treats
|
|
171
|
+
the provider-reported requested token count as a failed-request usage sample,
|
|
172
|
+
triggers the same compact path immediately, and retries the request once; if
|
|
173
|
+
the compact request is also over the limit, it repeatedly drops the oldest
|
|
174
|
+
tool response plus its matching tool call before retrying compact
|
|
167
175
|
- new sessions are now recorded under `CODEX_HOME/sessions/.../rollout-*.jsonl`
|
|
168
176
|
with a stable session/thread id and per-item append+flush semantics so
|
|
169
177
|
`/resume` reads back the same rollout format
|
|
@@ -190,7 +198,12 @@ Current behavior:
|
|
|
190
198
|
`reasoning_content` are translated back into Responses `reasoning` items, and
|
|
191
199
|
historical `reasoning` items are replayed into downstream assistant messages
|
|
192
200
|
via the `reasoning` field. Streaming token usage is also requested from vLLM
|
|
193
|
-
and forwarded to the final `response.completed.response.usage
|
|
201
|
+
and forwarded to the final `response.completed.response.usage`. If a
|
|
202
|
+
downstream chat stream terminates after emitting only reasoning, with no
|
|
203
|
+
assistant content and no tool call, the compat layer discards that partial
|
|
204
|
+
reasoning, retries the same downstream request once, and only then emits
|
|
205
|
+
`response.failed` with `type = "model_output_invalid"` if the retry is still
|
|
206
|
+
reasoning-only
|
|
194
207
|
- standalone `responses_server` now also supports downstream `/v1/messages`
|
|
195
208
|
backends via `--outcomming-api messages`, while keeping the internal
|
|
196
209
|
canonical request/route logic in chat-completions shape
|
|
@@ -554,7 +554,7 @@ Those are the next alignment target after the prompt/context pass.
|
|
|
554
554
|
|
|
555
555
|
- `pycodex` 现在已经补上最小的 provider 级 stream retry:`ResponsesProviderConfig`
|
|
556
556
|
支持 `stream_max_retries` / `stream_idle_timeout_ms`,默认值对齐 upstream 的
|
|
557
|
-
`
|
|
557
|
+
`300_000 ms` SSE idle timeout;代码在 `pycodex/model.py`
|
|
558
558
|
- 当前实现会把 `response.failed`、stream 在 `response.completed` 前断开、以及
|
|
559
559
|
`requests` 侧的读流异常统一视为 retryable stream error,并在
|
|
560
560
|
`ResponsesModelClient.complete(...)` 里按 backoff 重试;重试前会向外发
|
|
@@ -97,6 +97,13 @@ trajectory 追加到 `${PYCODEX_DUMP}/dump.jsonl`,当前记录格式是:
|
|
|
97
97
|
当前内置规则里,`vllm` 仍走 chat-completions compat 路径,但会额外保留
|
|
98
98
|
reasoning;`stepfun` 会删除所有 `developer` role。
|
|
99
99
|
|
|
100
|
+
如果下游 chat stream 一轮结束时只给了 `reasoning` / `reasoning_content`,
|
|
101
|
+
没有 assistant `content` 且没有 tool call,server 会丢弃这次 partial reasoning 并用
|
|
102
|
+
原样 downstream request 静默重试一次。若 retry 后仍是 reasoning-only,才发
|
|
103
|
+
`response.failed(type=model_output_invalid)`。这样可以避免 interrupted 或
|
|
104
|
+
length-stopped thinking 被持久化成 terminal reasoning-only history,并在下一轮转换成
|
|
105
|
+
下游 chat 后端不接受的裸 assistant message。
|
|
106
|
+
|
|
100
107
|
`messages` compat 则故意不改这层 canonical request:仍然先构造 chat 风格
|
|
101
108
|
`outcomming_request`,只有在真正发请求和读 SSE 时,才在边界把它翻译成
|
|
102
109
|
messages request / event。这样 tool hydration、mock `web_search`
|
|
@@ -2,12 +2,13 @@ from .compat import patch_asyncio
|
|
|
2
2
|
|
|
3
3
|
patch_asyncio()
|
|
4
4
|
|
|
5
|
-
from .agent import
|
|
5
|
+
from .agent import Agent
|
|
6
6
|
from .context import ContextConfig, ContextManager
|
|
7
7
|
from .model import (
|
|
8
8
|
ModelClient,
|
|
9
9
|
NOOP_MODEL_STREAM_EVENT_HANDLER,
|
|
10
10
|
ResponsesApiError,
|
|
11
|
+
ResponsesIncompleteError,
|
|
11
12
|
ResponsesModelClient,
|
|
12
13
|
ResponsesProviderConfig,
|
|
13
14
|
)
|
|
@@ -26,14 +27,14 @@ from .protocol import (
|
|
|
26
27
|
TurnResult,
|
|
27
28
|
UserMessage,
|
|
28
29
|
)
|
|
29
|
-
from .runtime import
|
|
30
|
+
from .runtime import CliSubmissionQueue
|
|
30
31
|
from .runtime_services import (
|
|
31
32
|
PlanStore,
|
|
32
33
|
RequestPermissionsManager,
|
|
33
34
|
RequestUserInputManager,
|
|
34
35
|
SubAgentManager,
|
|
35
|
-
|
|
36
|
-
|
|
36
|
+
create_agent_runtime_environment,
|
|
37
|
+
get_agent_runtime_environment,
|
|
37
38
|
)
|
|
38
39
|
from .tools import (
|
|
39
40
|
ApplyPatchTool,
|
|
@@ -90,13 +91,13 @@ def debug(stop: 'bool' = False):
|
|
|
90
91
|
|
|
91
92
|
__all__ = [
|
|
92
93
|
"AgentEvent",
|
|
93
|
-
"
|
|
94
|
-
"
|
|
94
|
+
"Agent",
|
|
95
|
+
"CliSubmissionQueue",
|
|
95
96
|
"ApplyPatchTool",
|
|
96
97
|
"AssistantMessage",
|
|
97
98
|
"BaseTool",
|
|
98
99
|
"CloseAgentTool",
|
|
99
|
-
"
|
|
100
|
+
"create_agent_runtime_environment",
|
|
100
101
|
"CodeModeManager",
|
|
101
102
|
"ContextConfig",
|
|
102
103
|
"ContextManager",
|
|
@@ -120,6 +121,7 @@ __all__ = [
|
|
|
120
121
|
"RequestUserInputManager",
|
|
121
122
|
"ResumeAgentTool",
|
|
122
123
|
"ResponsesApiError",
|
|
124
|
+
"ResponsesIncompleteError",
|
|
123
125
|
"ResponsesModelClient",
|
|
124
126
|
"ResponsesProviderConfig",
|
|
125
127
|
"SendInputTool",
|
|
@@ -142,5 +144,5 @@ __all__ = [
|
|
|
142
144
|
"WaitTool",
|
|
143
145
|
"WebSearchTool",
|
|
144
146
|
"WriteStdinTool",
|
|
145
|
-
"
|
|
147
|
+
"get_agent_runtime_environment",
|
|
146
148
|
]
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
|
|
2
2
|
import asyncio
|
|
3
3
|
import json
|
|
4
|
+
import re
|
|
4
5
|
from typing import Callable
|
|
5
6
|
|
|
6
7
|
from .context import ContextManager
|
|
@@ -22,17 +23,36 @@ import typing
|
|
|
22
23
|
|
|
23
24
|
if typing.TYPE_CHECKING:
|
|
24
25
|
from .utils.session_persist import SessionRolloutRecorder
|
|
26
|
+
from .runtime_services import AgentRuntimeEnvironment
|
|
25
27
|
|
|
26
28
|
|
|
27
29
|
EventHandler = Callable[[AgentEvent], None]
|
|
28
|
-
|
|
30
|
+
BASE_EVENT_HANDLER: 'EventHandler' = lambda _event: None
|
|
31
|
+
_REQUESTED_TOKENS_RE = re.compile(
|
|
32
|
+
r"requested\s+([0-9,]+)\s+tokens",
|
|
33
|
+
re.IGNORECASE,
|
|
34
|
+
)
|
|
35
|
+
_REQUESTED_TOKEN_SPLIT_RE = re.compile(
|
|
36
|
+
r"\(([0-9,]+)\s+in\s+the\s+messages,\s+([0-9,]+)\s+in\s+the\s+completion\)",
|
|
37
|
+
re.IGNORECASE,
|
|
38
|
+
)
|
|
39
|
+
_MAX_CONTEXT_TOKENS_RE = re.compile(
|
|
40
|
+
r"maximum\s+context\s+length\s+is\s+([0-9,]+)\s+tokens",
|
|
41
|
+
re.IGNORECASE,
|
|
42
|
+
)
|
|
43
|
+
_CONTEXT_LENGTH_ERROR_MARKERS = (
|
|
44
|
+
"context_length_exceeded",
|
|
45
|
+
"maximum context length",
|
|
46
|
+
"exceeds the context window",
|
|
47
|
+
"exceeded the context window",
|
|
48
|
+
)
|
|
29
49
|
|
|
30
50
|
|
|
31
51
|
class TurnInterrupted(RuntimeError):
|
|
32
52
|
pass
|
|
33
53
|
|
|
34
54
|
|
|
35
|
-
class
|
|
55
|
+
class Agent:
|
|
36
56
|
"""Minimal Python port of Codex's turn loop.
|
|
37
57
|
|
|
38
58
|
The core idea mirrors the Rust implementation:
|
|
@@ -47,9 +67,10 @@ class AgentLoop:
|
|
|
47
67
|
tool_registry: 'ToolRegistry',
|
|
48
68
|
context_manager: 'typing.Union[ContextManager, None]' = None,
|
|
49
69
|
parallel_tool_calls: 'bool' = True,
|
|
50
|
-
event_handler: 'EventHandler' =
|
|
70
|
+
event_handler: 'EventHandler' = BASE_EVENT_HANDLER,
|
|
51
71
|
initial_history: 'typing.Tuple[ConversationItem, ...]' = (),
|
|
52
72
|
rollout_recorder: 'typing.Union[SessionRolloutRecorder, None]' = None,
|
|
73
|
+
runtime_environment: 'AgentRuntimeEnvironment' = None,
|
|
53
74
|
) -> 'None':
|
|
54
75
|
self._model_client = model_client
|
|
55
76
|
self._tool_registry = tool_registry
|
|
@@ -58,6 +79,11 @@ class AgentLoop:
|
|
|
58
79
|
self._event_handler = event_handler
|
|
59
80
|
self._history: 'typing.List[ConversationItem]' = list(initial_history)
|
|
60
81
|
self._rollout_recorder = rollout_recorder
|
|
82
|
+
self._auto_compact_token_limit = (
|
|
83
|
+
self._context_manager.resolve_auto_compact_token_limit()
|
|
84
|
+
)
|
|
85
|
+
self._last_total_usage_tokens: 'typing.Union[int, None]' = None
|
|
86
|
+
self.runtime_environment = runtime_environment
|
|
61
87
|
self.interrupt_asap = False
|
|
62
88
|
|
|
63
89
|
@property
|
|
@@ -65,7 +91,7 @@ class AgentLoop:
|
|
|
65
91
|
return tuple(self._history)
|
|
66
92
|
|
|
67
93
|
def set_event_handler(
|
|
68
|
-
self, event_handler: 'EventHandler' =
|
|
94
|
+
self, event_handler: 'EventHandler' = BASE_EVENT_HANDLER
|
|
69
95
|
) -> 'None':
|
|
70
96
|
self._event_handler = event_handler
|
|
71
97
|
|
|
@@ -81,6 +107,11 @@ class AgentLoop:
|
|
|
81
107
|
) -> 'None':
|
|
82
108
|
self._rollout_recorder = rollout_recorder
|
|
83
109
|
|
|
110
|
+
def ask(self, text: 'str') -> 'TurnResult':
|
|
111
|
+
from .utils.async_bridge import run_async
|
|
112
|
+
|
|
113
|
+
return run_async(self.run_turn([text]))
|
|
114
|
+
|
|
84
115
|
def _raise_if_interrupt_requested(
|
|
85
116
|
self,
|
|
86
117
|
turn_id: 'str',
|
|
@@ -101,8 +132,6 @@ class AgentLoop:
|
|
|
101
132
|
turn_id = turn_id or uuid7_string()
|
|
102
133
|
self.interrupt_asap = False
|
|
103
134
|
new_user_messages = [UserMessage(text=text) for text in texts]
|
|
104
|
-
self._history.extend(new_user_messages)
|
|
105
|
-
self._persist_history_items(new_user_messages)
|
|
106
135
|
|
|
107
136
|
self._emit(
|
|
108
137
|
"turn_started",
|
|
@@ -110,6 +139,9 @@ class AgentLoop:
|
|
|
110
139
|
user_text="\n".join(texts),
|
|
111
140
|
user_texts=list(texts),
|
|
112
141
|
)
|
|
142
|
+
await self._maybe_auto_compact(turn_id, phase="pre_turn")
|
|
143
|
+
self._history.extend(new_user_messages)
|
|
144
|
+
self._persist_history_items(new_user_messages)
|
|
113
145
|
|
|
114
146
|
last_assistant_message: 'typing.Union[str, None]' = None
|
|
115
147
|
final_response_items: 'typing.Tuple[\n typing.Union[typing.Union[AssistantMessage, ToolCall], ReasoningItem], ...\n]' = ()
|
|
@@ -122,23 +154,11 @@ class AgentLoop:
|
|
|
122
154
|
iteration,
|
|
123
155
|
output_text=last_assistant_message,
|
|
124
156
|
)
|
|
157
|
+
await self._maybe_auto_compact(turn_id, phase="mid_turn")
|
|
125
158
|
iteration += 1
|
|
126
|
-
|
|
127
|
-
self._history,
|
|
128
|
-
self._tool_registry.model_visible_specs(),
|
|
129
|
-
self._parallel_tool_calls,
|
|
130
|
-
turn_id=turn_id,
|
|
131
|
-
)
|
|
132
|
-
self._emit(
|
|
133
|
-
"model_called",
|
|
159
|
+
response = await self._complete_model_request(
|
|
134
160
|
turn_id,
|
|
135
|
-
iteration
|
|
136
|
-
history_size=len(prompt.input),
|
|
137
|
-
tool_count=len(prompt.tools),
|
|
138
|
-
)
|
|
139
|
-
response = await self._model_client.complete(
|
|
140
|
-
prompt,
|
|
141
|
-
lambda event: self._handle_model_stream_event(turn_id, event),
|
|
161
|
+
iteration,
|
|
142
162
|
)
|
|
143
163
|
final_response_items = tuple(response.items)
|
|
144
164
|
self._emit(
|
|
@@ -193,6 +213,10 @@ class AgentLoop:
|
|
|
193
213
|
except TurnInterrupted:
|
|
194
214
|
raise
|
|
195
215
|
except Exception as exc:
|
|
216
|
+
context_usage = _usage_from_context_length_error(str(exc))
|
|
217
|
+
if context_usage is not None:
|
|
218
|
+
self._remember_token_usage(context_usage)
|
|
219
|
+
self._emit("token_count", turn_id, usage=context_usage)
|
|
196
220
|
self._emit(
|
|
197
221
|
"turn_failed",
|
|
198
222
|
turn_id,
|
|
@@ -287,6 +311,8 @@ class AgentLoop:
|
|
|
287
311
|
return
|
|
288
312
|
|
|
289
313
|
def _handle_model_stream_event(self, turn_id: 'str', event: 'ModelStreamEvent') -> 'None':
|
|
314
|
+
if event.kind == "token_count":
|
|
315
|
+
self._remember_token_usage(event.payload.get("usage"))
|
|
290
316
|
if event.kind == "assistant_delta":
|
|
291
317
|
self._emit("assistant_delta", turn_id, **event.payload)
|
|
292
318
|
elif event.kind == "tool_call":
|
|
@@ -296,6 +322,149 @@ class AgentLoop:
|
|
|
296
322
|
elif event.kind == "stream_error":
|
|
297
323
|
self._emit("stream_error", turn_id, **event.payload)
|
|
298
324
|
|
|
325
|
+
def _remember_token_usage(self, usage: 'object') -> 'None':
|
|
326
|
+
if not isinstance(usage, dict):
|
|
327
|
+
return
|
|
328
|
+
try:
|
|
329
|
+
self._last_total_usage_tokens = int(usage["total_tokens"])
|
|
330
|
+
except (KeyError, TypeError, ValueError):
|
|
331
|
+
return
|
|
332
|
+
|
|
333
|
+
async def _complete_model_request(
|
|
334
|
+
self,
|
|
335
|
+
turn_id: 'str',
|
|
336
|
+
iteration: 'int',
|
|
337
|
+
) -> 'typing.Any':
|
|
338
|
+
attempted_context_compact = False
|
|
339
|
+
while True:
|
|
340
|
+
prompt = self._context_manager.build_prompt(
|
|
341
|
+
self._history,
|
|
342
|
+
self._tool_registry.model_visible_specs(),
|
|
343
|
+
self._parallel_tool_calls,
|
|
344
|
+
turn_id=turn_id,
|
|
345
|
+
)
|
|
346
|
+
self._emit(
|
|
347
|
+
"model_called",
|
|
348
|
+
turn_id,
|
|
349
|
+
iteration=iteration,
|
|
350
|
+
history_size=len(prompt.input),
|
|
351
|
+
tool_count=len(prompt.tools),
|
|
352
|
+
)
|
|
353
|
+
try:
|
|
354
|
+
return await self._model_client.complete(
|
|
355
|
+
prompt,
|
|
356
|
+
lambda event: self._handle_model_stream_event(turn_id, event),
|
|
357
|
+
)
|
|
358
|
+
except Exception as exc:
|
|
359
|
+
error_message = str(exc)
|
|
360
|
+
if (
|
|
361
|
+
not _is_context_length_error_message(error_message)
|
|
362
|
+
or attempted_context_compact
|
|
363
|
+
):
|
|
364
|
+
raise
|
|
365
|
+
attempted_context_compact = True
|
|
366
|
+
context_usage = _usage_from_context_length_error(error_message)
|
|
367
|
+
if context_usage is not None:
|
|
368
|
+
self._remember_token_usage(context_usage)
|
|
369
|
+
self._emit("token_count", turn_id, usage=context_usage)
|
|
370
|
+
await self._run_auto_compact(
|
|
371
|
+
turn_id,
|
|
372
|
+
phase="context_length_exceeded",
|
|
373
|
+
total_tokens=(
|
|
374
|
+
context_usage.get("total_tokens")
|
|
375
|
+
if context_usage is not None
|
|
376
|
+
else None
|
|
377
|
+
),
|
|
378
|
+
token_limit=_context_length_error_token_limit(error_message),
|
|
379
|
+
prune_tool_results_on_context_error=True,
|
|
380
|
+
)
|
|
381
|
+
self._raise_if_interrupt_requested(turn_id, iteration)
|
|
382
|
+
|
|
383
|
+
async def _maybe_auto_compact(
|
|
384
|
+
self,
|
|
385
|
+
turn_id: 'str',
|
|
386
|
+
phase: 'str',
|
|
387
|
+
) -> 'None':
|
|
388
|
+
limit = self._auto_compact_token_limit
|
|
389
|
+
total_tokens = self._last_total_usage_tokens
|
|
390
|
+
if limit is None or total_tokens is None:
|
|
391
|
+
return
|
|
392
|
+
if total_tokens < limit or not self._history:
|
|
393
|
+
return
|
|
394
|
+
|
|
395
|
+
await self._run_auto_compact(
|
|
396
|
+
turn_id,
|
|
397
|
+
phase=phase,
|
|
398
|
+
total_tokens=total_tokens,
|
|
399
|
+
token_limit=limit,
|
|
400
|
+
prune_tool_results_on_context_error=True,
|
|
401
|
+
)
|
|
402
|
+
|
|
403
|
+
async def _run_auto_compact(
|
|
404
|
+
self,
|
|
405
|
+
turn_id: 'str',
|
|
406
|
+
phase: 'str',
|
|
407
|
+
total_tokens: 'typing.Union[int, None]' = None,
|
|
408
|
+
token_limit: 'typing.Union[int, None]' = None,
|
|
409
|
+
prune_tool_results_on_context_error: 'bool' = False,
|
|
410
|
+
) -> 'None':
|
|
411
|
+
from .utils.compactor import compact_agent
|
|
412
|
+
|
|
413
|
+
payload: 'typing.Dict[str, object]' = {"phase": phase}
|
|
414
|
+
if total_tokens is not None:
|
|
415
|
+
payload["total_tokens"] = total_tokens
|
|
416
|
+
if token_limit is not None:
|
|
417
|
+
payload["token_limit"] = token_limit
|
|
418
|
+
self._emit(
|
|
419
|
+
"auto_compact_started",
|
|
420
|
+
turn_id,
|
|
421
|
+
**payload,
|
|
422
|
+
)
|
|
423
|
+
|
|
424
|
+
def handle_compact_stream_event(event: 'ModelStreamEvent') -> 'None':
|
|
425
|
+
if event.kind == "stream_error":
|
|
426
|
+
self._emit("stream_error", turn_id, **event.payload)
|
|
427
|
+
|
|
428
|
+
try:
|
|
429
|
+
compact_result = await compact_agent(
|
|
430
|
+
self,
|
|
431
|
+
handle_compact_stream_event,
|
|
432
|
+
prune_tool_results_on_context_error,
|
|
433
|
+
)
|
|
434
|
+
except Exception as exc:
|
|
435
|
+
failed_payload = dict(payload)
|
|
436
|
+
failed_payload.update(
|
|
437
|
+
{
|
|
438
|
+
"error": str(exc),
|
|
439
|
+
"error_type": type(exc).__name__,
|
|
440
|
+
}
|
|
441
|
+
)
|
|
442
|
+
self._emit(
|
|
443
|
+
"auto_compact_failed",
|
|
444
|
+
turn_id,
|
|
445
|
+
**failed_payload,
|
|
446
|
+
)
|
|
447
|
+
raise
|
|
448
|
+
|
|
449
|
+
self._last_total_usage_tokens = None
|
|
450
|
+
if compact_result is None:
|
|
451
|
+
return
|
|
452
|
+
completed_payload = dict(payload)
|
|
453
|
+
completed_payload.update(
|
|
454
|
+
{
|
|
455
|
+
"original_item_count": compact_result.original_item_count,
|
|
456
|
+
"retained_item_count": compact_result.retained_item_count,
|
|
457
|
+
"summary": compact_result.display_text(),
|
|
458
|
+
}
|
|
459
|
+
)
|
|
460
|
+
if compact_result.pruned_tool_results:
|
|
461
|
+
completed_payload["pruned_tool_results"] = compact_result.pruned_tool_results
|
|
462
|
+
self._emit(
|
|
463
|
+
"auto_compact_completed",
|
|
464
|
+
turn_id,
|
|
465
|
+
**completed_payload,
|
|
466
|
+
)
|
|
467
|
+
|
|
299
468
|
def _build_follow_up_messages(
|
|
300
469
|
self,
|
|
301
470
|
tool_results: 'typing.List[ToolResult]',
|
|
@@ -326,3 +495,39 @@ class AgentLoop:
|
|
|
326
495
|
)
|
|
327
496
|
)
|
|
328
497
|
return follow_ups
|
|
498
|
+
|
|
499
|
+
|
|
500
|
+
def _usage_from_context_length_error(
|
|
501
|
+
message: 'str',
|
|
502
|
+
) -> 'typing.Union[typing.Dict[str, int], None]':
|
|
503
|
+
if not _is_context_length_error_message(message):
|
|
504
|
+
return None
|
|
505
|
+
|
|
506
|
+
requested_match = _REQUESTED_TOKENS_RE.search(message)
|
|
507
|
+
if requested_match is None:
|
|
508
|
+
return None
|
|
509
|
+
|
|
510
|
+
usage = {"total_tokens": _parse_token_count(requested_match.group(1))}
|
|
511
|
+
split_match = _REQUESTED_TOKEN_SPLIT_RE.search(message)
|
|
512
|
+
if split_match is not None:
|
|
513
|
+
usage["input_tokens"] = _parse_token_count(split_match.group(1))
|
|
514
|
+
usage["output_tokens"] = _parse_token_count(split_match.group(2))
|
|
515
|
+
else:
|
|
516
|
+
usage["input_tokens"] = usage["total_tokens"]
|
|
517
|
+
return usage
|
|
518
|
+
|
|
519
|
+
|
|
520
|
+
def _is_context_length_error_message(message: 'str') -> 'bool':
|
|
521
|
+
lower = message.lower()
|
|
522
|
+
return any(marker in lower for marker in _CONTEXT_LENGTH_ERROR_MARKERS)
|
|
523
|
+
|
|
524
|
+
|
|
525
|
+
def _context_length_error_token_limit(message: 'str') -> 'typing.Union[int, None]':
|
|
526
|
+
limit_match = _MAX_CONTEXT_TOKENS_RE.search(message)
|
|
527
|
+
if limit_match is None:
|
|
528
|
+
return None
|
|
529
|
+
return _parse_token_count(limit_match.group(1))
|
|
530
|
+
|
|
531
|
+
|
|
532
|
+
def _parse_token_count(value: 'str') -> 'int':
|
|
533
|
+
return int(value.replace(",", ""))
|