pythinker-code 2.3.0__py3-none-any.whl → 2.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. pythinker_code/CHANGELOG.md +82 -0
  2. pythinker_code/acp/tools.py +7 -0
  3. pythinker_code/agents/default/agent.yaml +10 -0
  4. pythinker_code/agents/default/coder.yaml +17 -0
  5. pythinker_code/agents/default/explore.yaml +15 -2
  6. pythinker_code/agents/default/implementer.yaml +46 -0
  7. pythinker_code/agents/default/plan.yaml +16 -4
  8. pythinker_code/agents/default/review.yaml +47 -0
  9. pythinker_code/agents/default/system.md +17 -1
  10. pythinker_code/agents/default/verifier.yaml +46 -0
  11. pythinker_code/app.py +18 -10
  12. pythinker_code/background/manager.py +22 -4
  13. pythinker_code/background/models.py +14 -1
  14. pythinker_code/background/store.py +7 -1
  15. pythinker_code/config.py +30 -2
  16. pythinker_code/llm.py +35 -6
  17. pythinker_code/plugin/manager.py +19 -6
  18. pythinker_code/soul/agent.py +1 -1
  19. pythinker_code/soul/permission.py +341 -0
  20. pythinker_code/soul/pythinkersoul.py +104 -65
  21. pythinker_code/soul/toolset.py +35 -31
  22. pythinker_code/subagents/builder.py +1 -0
  23. pythinker_code/subagents/models.py +2 -0
  24. pythinker_code/subagents/runner.py +16 -8
  25. pythinker_code/subagents/store.py +4 -0
  26. pythinker_code/telemetry/config.py +27 -4
  27. pythinker_code/telemetry/crash.py +15 -2
  28. pythinker_code/telemetry/otel.py +2 -1
  29. pythinker_code/telemetry/sentry.py +46 -1
  30. pythinker_code/tools/agent/__init__.py +47 -1
  31. pythinker_code/tools/agent/description.md +11 -4
  32. pythinker_code/tools/file/__init__.py +2 -1
  33. pythinker_code/tools/file/grep_local.py +325 -13
  34. pythinker_code/tools/file/replace.py +6 -0
  35. pythinker_code/tools/file/write.py +6 -0
  36. pythinker_code/tools/plan/__init__.py +32 -0
  37. pythinker_code/tools/plan/handoff.py +69 -0
  38. pythinker_code/tools/shell/__init__.py +4 -0
  39. pythinker_code/tools/web/fetch.py +74 -2
  40. pythinker_code/ui/shell/__init__.py +8 -0
  41. pythinker_code/ui/shell/prompt.py +27 -0
  42. pythinker_code/ui/shell/slash.py +49 -32
  43. pythinker_code/ui/shell/update.py +116 -2
  44. pythinker_code/vis/api/sessions.py +9 -6
  45. pythinker_code/vis/app.py +26 -2
  46. pythinker_code/vis/static/assets/{highlighted-body-B3W2YXNL-D2MTYyJz.js → highlighted-body-B3W2YXNL-CY1rtwrX.js} +1 -1
  47. pythinker_code/vis/static/assets/{index-CezafTt_.js → index-DgmTI2M_.js} +70 -70
  48. pythinker_code/vis/static/index.html +1 -1
  49. pythinker_code/web/api/open_in.py +34 -19
  50. pythinker_code/web/api/sessions.py +18 -13
  51. pythinker_code/web/app.py +2 -4
  52. pythinker_code/web/static/assets/{_baseUniq-DYwtr3m4.js → _baseUniq-Bv26EHIE.js} +1 -1
  53. pythinker_code/web/static/assets/{arc-CNhBgyVb.js → arc-DuCCCcUZ.js} +1 -1
  54. pythinker_code/web/static/assets/{architectureDiagram-VXUJARFQ-DpvaxB3Y.js → architectureDiagram-VXUJARFQ-CVZ131zn.js} +1 -1
  55. pythinker_code/web/static/assets/{blockDiagram-VD42YOAC-IlYHIkrW.js → blockDiagram-VD42YOAC-BAC2VOip.js} +1 -1
  56. pythinker_code/web/static/assets/{c4Diagram-YG6GDRKO-D_jGrUIu.js → c4Diagram-YG6GDRKO-9uwamEIP.js} +1 -1
  57. pythinker_code/web/static/assets/channel-aVyB491s.js +1 -0
  58. pythinker_code/web/static/assets/{chunk-4BX2VUAB-uYRqFG6q.js → chunk-4BX2VUAB-BTdhSGW0.js} +1 -1
  59. pythinker_code/web/static/assets/{chunk-55IACEB6-5K_8Tvtf.js → chunk-55IACEB6-CYDI0p8Q.js} +1 -1
  60. pythinker_code/web/static/assets/{chunk-B4BG7PRW-BAp2tokd.js → chunk-B4BG7PRW-b8oi1KW8.js} +1 -1
  61. pythinker_code/web/static/assets/{chunk-DI55MBZ5-C3ICALbg.js → chunk-DI55MBZ5-DGaf6dom.js} +1 -1
  62. pythinker_code/web/static/assets/{chunk-FMBD7UC4-B3ntDoat.js → chunk-FMBD7UC4-C1R9DMCj.js} +1 -1
  63. pythinker_code/web/static/assets/{chunk-QN33PNHL-Dy8y3fp6.js → chunk-QN33PNHL-DjSDLitQ.js} +1 -1
  64. pythinker_code/web/static/assets/{chunk-QZHKN3VN-BXmiK1aE.js → chunk-QZHKN3VN-_zmK8SCU.js} +1 -1
  65. pythinker_code/web/static/assets/{chunk-TZMSLE5B-BbI6RHhP.js → chunk-TZMSLE5B-DURAXY_D.js} +1 -1
  66. pythinker_code/web/static/assets/classDiagram-2ON5EDUG-cvkDF0Mx.js +1 -0
  67. pythinker_code/web/static/assets/classDiagram-v2-WZHVMYZB-cvkDF0Mx.js +1 -0
  68. pythinker_code/web/static/assets/clone-DQNyDB_s.js +1 -0
  69. pythinker_code/web/static/assets/{code-block-IT6T5CEO-C09u1ZPS.js → code-block-IT6T5CEO-DL4aF17r.js} +1 -1
  70. pythinker_code/web/static/assets/{cose-bilkent-S5V4N54A-OfdgQa9b.js → cose-bilkent-S5V4N54A-BuUIleVc.js} +1 -1
  71. pythinker_code/web/static/assets/{cytoscape.esm-BHPoE92Y.js → cytoscape.esm-C-OXuR4H.js} +1 -1
  72. pythinker_code/web/static/assets/{dagre-6UL2VRFP-Dqsjg8sJ.js → dagre-6UL2VRFP-CFMQD_BU.js} +1 -1
  73. pythinker_code/web/static/assets/{diagram-PSM6KHXK-DxkId0Z8.js → diagram-PSM6KHXK-BlZG6Knx.js} +1 -1
  74. pythinker_code/web/static/assets/{diagram-QEK2KX5R-CkPNihvj.js → diagram-QEK2KX5R-Bx6pGAz_.js} +1 -1
  75. pythinker_code/web/static/assets/{diagram-S2PKOQOG-C_N5Jjql.js → diagram-S2PKOQOG-C3k3j5WT.js} +1 -1
  76. pythinker_code/web/static/assets/{erDiagram-Q2GNP2WA-C8_5yrCr.js → erDiagram-Q2GNP2WA-ZXF9DMQm.js} +1 -1
  77. pythinker_code/web/static/assets/{flowDiagram-NV44I4VS-BfV7xDb8.js → flowDiagram-NV44I4VS-BbSLxWgp.js} +1 -1
  78. pythinker_code/web/static/assets/{ganttDiagram-JELNMOA3-Cld5kwhV.js → ganttDiagram-JELNMOA3-NOlUrkQp.js} +1 -1
  79. pythinker_code/web/static/assets/{gitGraphDiagram-NY62KEGX-F3FwjYQD.js → gitGraphDiagram-NY62KEGX-BAlT86AC.js} +1 -1
  80. pythinker_code/web/static/assets/{graph-BWlEpfBO.js → graph-BzHVPchG.js} +1 -1
  81. pythinker_code/web/static/assets/{index-BqPJMGF-.js → index-Cm_lwIyA.js} +2 -2
  82. pythinker_code/web/static/assets/{index-DpudRZuI.js → index-CnM44gk-.js} +1 -1
  83. pythinker_code/web/static/assets/{index-DYUDz2ym.js → index-Cqg-K1YV.js} +1 -1
  84. pythinker_code/web/static/assets/{infoDiagram-WHAUD3N6-BJOGXqn7.js → infoDiagram-WHAUD3N6-DBX3JCDO.js} +1 -1
  85. pythinker_code/web/static/assets/{journeyDiagram-XKPGCS4Q-BZdlH-JG.js → journeyDiagram-XKPGCS4Q-yaCqhNqw.js} +1 -1
  86. pythinker_code/web/static/assets/{kanban-definition-3W4ZIXB7-CmgmSsYi.js → kanban-definition-3W4ZIXB7-Cw2loPy6.js} +1 -1
  87. pythinker_code/web/static/assets/{layout-CWaYhVVo.js → layout-DC8qmv-q.js} +1 -1
  88. pythinker_code/web/static/assets/{linear-Bw6Dncma.js → linear-Bpra1Fqc.js} +1 -1
  89. pythinker_code/web/static/assets/{mermaid-VLURNSYL-CzjjwzDB.js → mermaid-VLURNSYL-BvHbNBJJ.js} +7 -7
  90. pythinker_code/web/static/assets/{mermaid.core-Bb0_1h52.js → mermaid.core-QA4yHgUs.js} +5 -5
  91. pythinker_code/web/static/assets/{min-Df20Er5m.js → min-sLtZymTB.js} +1 -1
  92. pythinker_code/web/static/assets/{mindmap-definition-VGOIOE7T-CAe0siLd.js → mindmap-definition-VGOIOE7T-BycLAQjs.js} +1 -1
  93. pythinker_code/web/static/assets/{pieDiagram-ADFJNKIX-CLMBAwjU.js → pieDiagram-ADFJNKIX-CpFv1-B_.js} +1 -1
  94. pythinker_code/web/static/assets/{quadrantDiagram-AYHSOK5B-B9vjzD3o.js → quadrantDiagram-AYHSOK5B-C8HyfNyW.js} +1 -1
  95. pythinker_code/web/static/assets/{requirementDiagram-UZGBJVZJ-Bbjo8TGX.js → requirementDiagram-UZGBJVZJ-DyTuOubd.js} +1 -1
  96. pythinker_code/web/static/assets/{sankeyDiagram-TZEHDZUN-xnxkDnDQ.js → sankeyDiagram-TZEHDZUN-BlUGUAE1.js} +1 -1
  97. pythinker_code/web/static/assets/{sequenceDiagram-WL72ISMW-qkafBa71.js → sequenceDiagram-WL72ISMW-jqMJUHqm.js} +1 -1
  98. pythinker_code/web/static/assets/{stateDiagram-FKZM4ZOC-BzTcRJpG.js → stateDiagram-FKZM4ZOC-zShF28En.js} +1 -1
  99. pythinker_code/web/static/assets/stateDiagram-v2-4FDKWEC3-lllXGHDb.js +1 -0
  100. pythinker_code/web/static/assets/{timeline-definition-IT6M3QCI-0ls9u7xd.js → timeline-definition-IT6M3QCI-Cvekw0Hb.js} +1 -1
  101. pythinker_code/web/static/assets/{treemap-KMMF4GRG-C1ChVaOv.js → treemap-KMMF4GRG-DuFTYtWm.js} +1 -1
  102. pythinker_code/web/static/assets/{xychartDiagram-PRI3JC2R-Ba9eacUU.js → xychartDiagram-PRI3JC2R-BVeBFpa0.js} +1 -1
  103. pythinker_code/web/static/index.html +1 -1
  104. pythinker_code/wire/server.py +16 -13
  105. {pythinker_code-2.3.0.dist-info → pythinker_code-2.5.0.dist-info}/METADATA +34 -5
  106. {pythinker_code-2.3.0.dist-info → pythinker_code-2.5.0.dist-info}/RECORD +110 -105
  107. pythinker_code/web/static/assets/channel-BPOuE91b.js +0 -1
  108. pythinker_code/web/static/assets/classDiagram-2ON5EDUG-C1S9FRV4.js +0 -1
  109. pythinker_code/web/static/assets/classDiagram-v2-WZHVMYZB-C1S9FRV4.js +0 -1
  110. pythinker_code/web/static/assets/clone-D2vuslet.js +0 -1
  111. pythinker_code/web/static/assets/stateDiagram-v2-4FDKWEC3-TyAV0qk_.js +0 -1
  112. {pythinker_code-2.3.0.dist-info → pythinker_code-2.5.0.dist-info}/WHEEL +0 -0
  113. {pythinker_code-2.3.0.dist-info → pythinker_code-2.5.0.dist-info}/entry_points.txt +0 -0
  114. {pythinker_code-2.3.0.dist-info → pythinker_code-2.5.0.dist-info}/licenses/LICENSE +0 -0
  115. {pythinker_code-2.3.0.dist-info → pythinker_code-2.5.0.dist-info}/licenses/NOTICE +0 -0
@@ -2,6 +2,88 @@
2
2
 
3
3
  ## Unreleased
4
4
 
5
+ ## 2.5.0 (2026-05-13)
6
+
7
+ bk_box_main coding-agent runtime port, Windows self-upgrade fix, FetchURL SSRF hardening, and a broad reliability/security pass.
8
+
9
+ ### Subagent runtime & permissions
10
+
11
+ - Runtime-enforced permission profiles for every built-in role: **read-only**, **plan**, **ask**, **implement**, **review**, **verify**. Profiles are snapshot per LLM step in the new `src/pythinker_code/soul/permission.py` so a mid-step model switch can't escalate. Plan mode now **hard-denies** non-plan writes and dangerous shell mutations instead of relying on prompt-deny.
12
+ - New plan-handoff workflow in `src/pythinker_code/tools/plan/handoff.py` with dynamic injection through `soul/dynamic_injections/plan_mode.py`. Smooth handoff from `plan` → `implement` without re-priming the context.
13
+ - New smart-search grep variant; new subagent metadata plumbing (`subagents/models.py`, `subagents/store.py`, `subagents/builder.py`, `subagents/runner.py`).
14
+
15
+ ### Background tasks
16
+
17
+ - Recovery distinguishes **`recoverable`** (resumable via a stored `agent_id`) from **`lost`** (worker is gone with no resume target). Agent instances are parked as `idle` rather than failed when the underlying task is recoverable.
18
+ - Guards against overwriting terminal task states; subagent races on instance transitions closed.
19
+ - `pythinker-host`: subprocess teardown now kills the **entire child process tree** and creates a new session group, so background workers can no longer survive their parent on Linux/macOS.
20
+
21
+ ### FetchURL — SSRF + resource-exhaustion hardening
22
+
23
+ - `pythinker_code.tools.web.fetch._validate_fetch_url` blocks **private, loopback, link-local, multicast, and reserved** IPv4/IPv6 ranges; rejects non-`http`/`https` schemes and host-less URLs up front.
24
+ - Responses are streamed with a hard **5 MB** ceiling (`_read_limited`) honoring `Content-Length`. Both the direct path and the configured fetch-service path enforce the same caps.
25
+
26
+ ### Web / vis surface
27
+
28
+ - Upload limits, open-in path escaping, and vis auth all hardened (`src/pythinker_code/web/`, `src/pythinker_code/vis/`, `vis/src/lib/api.ts`).
29
+
30
+ ### Plugin
31
+
32
+ - Plugin definitions no longer persist host credentials. Plugin **name validation** tightened to reject path-traversal and shell-meta characters.
33
+
34
+ ### Telemetry & observability
35
+
36
+ - OTel `service.name` normalized to a stable value, decoupled from the configured display name, so SigNoz dashboards keep working across rebrands.
37
+ - Sentry filters drop test-process noise and benign shutdown errors; `pythinker_code/telemetry/config.py` and `pythinker_code/telemetry/crash.py` updated accordingly.
38
+ - New `tests/telemetry/test_otel_resource.py` asserts the resource identity used by the dashboards.
39
+
40
+ ### Windows
41
+
42
+ - `pythinker update` on Windows now spawns the upgrade in a **detached console** and exits the parent process before `uv tool upgrade` runs, releasing the lock on the running `pythinker.exe`. Fixes the `os error 32: The process cannot access the file because it is being used by another process` error that blocked self-upgrade.
43
+ - New CI matrix entry on **`windows-2025-vs2026`** (experimental, non-blocking) for the pythinker-host and pythinker-cli build, validating Visual Studio 2026 / MSVC v144 forward-compat before GitHub eventually deprecates `windows-2022`.
44
+
45
+ ### Feedback
46
+
47
+ - New `feedback` config block: `endpoint_url`, `api_key`, `custom_headers`. The `/feedback` slash command now routes user submissions to a user-configured HTTP endpoint instead of being a no-op.
48
+
49
+ ### UI
50
+
51
+ - Pythinker version is shown on the welcome screen.
52
+
53
+ ### CI
54
+
55
+ - Pre-push hooks mirror CI's `check` target (`ruff format --check`, `ruff check`, `pyright`) so local pushes catch the same regressions CI does.
56
+ - README + CHANGELOG release-validate gate hardened; the GitHub Release publish step is now resilient to transient upstream failures.
57
+ - Spell-check vocabulary fix in `soul/permission.py` for an internal error string the typos crate flagged; experimental `windows-2025-vs2026` build no longer collides with `windows-2022` on the shared `pythinker-x86_64-pc-windows-msvc` artifact name.
58
+
59
+ ### Compatibility
60
+
61
+ - `pythinker_core.contrib.chat_provider.anthropic`: handle the six new tool-result block types added by anthropic SDK 0.101 (`web_fetch_tool_result`, `code_execution_tool_result`, `bash_code_execution_tool_result`, `text_editor_code_execution_tool_result`, `tool_search_tool_result`, `container_upload`). pyright is exhaustive again.
62
+
63
+ Upgrade with `pythinker update` or `pip install --upgrade pythinker-code==2.5.0`.
64
+
65
+ ## 2.4.0 (2026-05-11)
66
+
67
+ Subagent roles overhaul, Moonshot/Kimi K2 provider support, and a ripgrep-free Grep fallback.
68
+
69
+ - New built-in subagents under `src/pythinker_code/agents/default/`:
70
+ - `implementer.yaml` — scoped code changes with minimum surrounding edits and a quick verification pass.
71
+ - `review.yaml` — read-only code review with severity-scored findings (BLOCKER / MAJOR / MINOR / NIT).
72
+ - `verifier.yaml` — read-only validation runner that reports `PASS` / `FAIL` / `FLAKY` without applying fixes.
73
+ - `coder.yaml`, `explore.yaml`, and `plan.yaml` now emit a standard `### SUMMARY / EVIDENCE / CHANGES / RISKS / BLOCKERS` response contract so the parent agent can consume subagent output without re-parsing prose.
74
+ - `agent.yaml` registers the three new roles; `tools/agent/description.md` documents the Scout → Plan → Implement → Review → Verify workflow and the parallel review/verification pattern.
75
+ - `agents/default/system.md`: adds decomposition guidance (preview → todo list → parallel chunks), enforces post-tool-call verification before acting on results, and tells the agent to cross-check at least one load-bearing subagent finding before editing from it.
76
+ - Kimi K2.5 / K2.6 (Moonshot) and other strict interleaved-thinking providers:
77
+ - `packages/pythinker-core/.../chat_provider/pythinker.py`: always emit `reasoning_content` on assistant tool-call replays so Moonshot's "thinking is enabled but reasoning_content is missing in assistant tool call message at index N" error no longer trips multi-step tool flows.
78
+ - `packages/pythinker-core/.../contrib/chat_provider/openai_legacy.py`: replay reasoning metadata on every assistant turn for `kimi-k2*` / `deepseek*` models (falls back to the assistant text or `"[reasoning unavailable]"` when reasoning content was not retained).
79
+ - `src/pythinker_code/llm.py`: route Kimi K2 thinking through the provider-specific `extra_body={"thinking": {"type": "enabled"|"disabled"}}` body field instead of OpenAI's `reasoning_effort` (which Kimi ignores), and persist `LLM.thinking` across `clone_llm_with_model_alias` so model switches preserve the user's thinking choice.
80
+ - `tools/file/grep_local.py`:
81
+ - Pure-Python `rg`-free fallback (`_python_grep`) honoring `pattern`, `path`, `glob`, `type` (bash / c / cpp / go / java / js / json / md / py / rust / sh / toml / ts / txt / yaml / zsh), `ignore_case`, `multiline`, `context` / `before_context` / `after_context`, `line_number`, `output_mode` (`content` / `files_with_matches` / `count_matches`), `offset`, `head_limit`, and the standard sensitive-file redaction. `.gitignore` / `.ignore` and the VCS metadata directories (`.git`, `.svn`, `.hg`, `.bzr`, `.jj`, `.sl`) are respected unless `include_ignored=true`.
82
+ - `_find_existing_rg` now honors `PYTHINKER_RG_PATH` and additionally probes `/usr/bin`, `/usr/local/bin`, `~/.cargo/bin`, `~/.local/bin`, and `~/.pi/agent/bin` before falling through to download.
83
+ - Downloader retries against the upstream GitHub releases mirror (`https://github.com/BurntSushi/ripgrep/releases/download/<version>/...`) when the CDN mirror is unreachable, and the failure path now degrades into the Python fallback instead of raising.
84
+ - `.gitignore`: ignore `graphify-out*/`, `.graphify_*.json`, `.graphify_*.txt`, and the local `blackbox/` scratch area.
85
+ - `AGENTS.md` rewritten to reflect the new subagent roster and workflow.
86
+
5
87
  ## 2.3.0 (2026-05-09)
6
88
 
7
89
  Telemetry & observability audit.
@@ -8,6 +8,7 @@ from pythinker_host.local import local_host
8
8
 
9
9
  from pythinker_code.soul.agent import Runtime
10
10
  from pythinker_code.soul.approval import Approval
11
+ from pythinker_code.soul.permission import check_shell_command_allowed
11
12
  from pythinker_code.soul.toolset import PythinkerToolset
12
13
  from pythinker_code.tools.shell import Params as ShellParams
13
14
  from pythinker_code.tools.shell import Shell
@@ -35,6 +36,7 @@ def replace_tools(
35
36
  acp_conn,
36
37
  acp_session_id,
37
38
  runtime.approval,
39
+ runtime,
38
40
  )
39
41
  )
40
42
 
@@ -52,6 +54,7 @@ class Terminal(CallableTool2[ShellParams]):
52
54
  acp_conn: acp.Client,
53
55
  acp_session_id: str,
54
56
  approval: Approval,
57
+ runtime: Runtime,
55
58
  ) -> None:
56
59
  # Use the `name`, `description`, and `params` from the existing Shell tool,
57
60
  # so that when this is added to the toolset, it replaces the original Shell tool.
@@ -59,6 +62,7 @@ class Terminal(CallableTool2[ShellParams]):
59
62
  self._acp_conn = acp_conn
60
63
  self._acp_session_id = acp_session_id
61
64
  self._approval = approval
65
+ self._runtime = runtime
62
66
 
63
67
  async def __call__(self, params: ShellParams) -> ToolReturnValue:
64
68
  from pythinker_code.acp.session import get_current_acp_tool_call_id_or_none
@@ -71,6 +75,9 @@ class Terminal(CallableTool2[ShellParams]):
71
75
  if not params.command:
72
76
  return builder.error("Command cannot be empty.", brief="Empty command")
73
77
 
78
+ if err := check_shell_command_allowed(self._runtime, params.command):
79
+ return err
80
+
74
81
  approval_result = await self._approval.request(
75
82
  self.name,
76
83
  "run shell command",
@@ -18,6 +18,7 @@ agent:
18
18
  - "pythinker_code.tools.file:ReadMediaFile"
19
19
  - "pythinker_code.tools.file:Glob"
20
20
  - "pythinker_code.tools.file:Grep"
21
+ - "pythinker_code.tools.file:SmartSearch"
21
22
  - "pythinker_code.tools.file:WriteFile"
22
23
  - "pythinker_code.tools.file:StrReplaceFile"
23
24
  - "pythinker_code.tools.web:SearchWeb"
@@ -34,3 +35,12 @@ agent:
34
35
  plan:
35
36
  path: ./plan.yaml
36
37
  description: "Read-only implementation planning and architecture design."
38
+ review:
39
+ path: ./review.yaml
40
+ description: "Read-only code review with severity-scored findings."
41
+ implementer:
42
+ path: ./implementer.yaml
43
+ description: "Scoped implementation with minimal edits and verification."
44
+ verifier:
45
+ path: ./verifier.yaml
46
+ description: "Read-only validation runner for tests, lint, and builds."
@@ -4,6 +4,22 @@ agent:
4
4
  system_prompt_args:
5
5
  ROLE_ADDITIONAL: |
6
6
  You are now running as a subagent. All the `user` messages are sent by the main agent. The main agent cannot see your context, it can only see your last message when you finish the task. You must treat the parent agent as your caller. Do not directly ask the end user questions. If something is unclear, explain the ambiguity in your final summary to the parent agent.
7
+
8
+ Stay tightly scoped to exactly what the parent assigned. Do not expand into adjacent cleanup or refactors. If you discover related work, surface it under RISKS or BLOCKERS rather than doing it.
9
+
10
+ Before editing, read the target files and confirm the line ranges/patterns you will change. Prefer the minimum edit that satisfies the brief. After edits, run the smallest relevant verification command available and report the result.
11
+
12
+ Final response contract:
13
+ ### SUMMARY
14
+ One paragraph with what you did and the outcome.
15
+ ### EVIDENCE
16
+ Bullet list of concrete file paths, command results, or observed errors that support the outcome.
17
+ ### CHANGES
18
+ Bullet list of every file you modified, or `None.` if read-only.
19
+ ### RISKS
20
+ Bullet list of remaining risks or `None observed.`.
21
+ ### BLOCKERS
22
+ Bullet list of anything that stopped completion, or `None.`.
7
23
  when_to_use: |
8
24
  Use this agent for non-trivial software engineering work that may require reading files, editing code, running commands, and returning a compact but technically complete summary to the parent agent.
9
25
  allowed_tools:
@@ -12,6 +28,7 @@ agent:
12
28
  - "pythinker_code.tools.file:ReadMediaFile"
13
29
  - "pythinker_code.tools.file:Glob"
14
30
  - "pythinker_code.tools.file:Grep"
31
+ - "pythinker_code.tools.file:SmartSearch"
15
32
  - "pythinker_code.tools.file:WriteFile"
16
33
  - "pythinker_code.tools.file:StrReplaceFile"
17
34
  - "pythinker_code.tools.web:SearchWeb"
@@ -5,7 +5,7 @@ agent:
5
5
  ROLE_ADDITIONAL: |
6
6
  You are now running as a subagent. All the `user` messages are sent by the main agent. The main agent cannot see your context, it can only see your last message when you finish the task. You must treat the parent agent as your caller. Do not directly ask the end user questions. If something is unclear, explain the ambiguity in your final summary to the parent agent.
7
7
 
8
- You are a codebase exploration specialist. Your role is EXCLUSIVELY to search, read, and analyze existing code and resources. You do NOT have access to file editing tools.
8
+ You are a codebase exploration specialist. Your role is EXCLUSIVELY to search, read, and analyze existing code and resources. You do NOT have access to file editing tools. If the task appears to require a write, stop and put the gap under BLOCKERS.
9
9
 
10
10
  Your strengths:
11
11
  - Rapidly finding files using glob patterns
@@ -24,7 +24,19 @@ agent:
24
24
 
25
25
  If the prompt includes a <git-context> block, use it to orient yourself about the repository state before starting your investigation.
26
26
 
27
- You are meant to be a fast agent. Complete the search request efficiently and report your findings clearly in a structured format.
27
+ You are meant to be a fast agent. Complete the search request efficiently and report your findings clearly in a structured format. EVIDENCE is the load-bearing section: cite each important finding as `path:line-range` when possible, and stop once you have enough evidence rather than exhaustively reading the whole repository.
28
+
29
+ Final response contract:
30
+ ### SUMMARY
31
+ One paragraph with the headline answer.
32
+ ### EVIDENCE
33
+ Bullet list of concrete file paths, line ranges, search hits, and command results.
34
+ ### CHANGES
35
+ Always write `None.`.
36
+ ### RISKS
37
+ Bullet list of uncertainties or `None observed.`.
38
+ ### BLOCKERS
39
+ Bullet list of missing context/capabilities or `None.`.
28
40
  when_to_use: |
29
41
  Fast agent specialized for exploring codebases. Use this when you need to quickly find files by patterns (e.g. "src/**/*.yaml"), search code for keywords (e.g. "database connection"), or answer questions about the codebase (e.g. "how does the auth module work?"). When calling this agent, specify the desired thoroughness level: "quick" for basic searches, "medium" for moderate exploration, or "thorough" for comprehensive analysis across multiple locations and naming conventions. Use this agent for any read-only exploration that will clearly require more than 3 tool calls. Prefer launching multiple explore agents concurrently when investigating independent questions.
30
42
  allowed_tools:
@@ -33,6 +45,7 @@ agent:
33
45
  - "pythinker_code.tools.file:ReadMediaFile"
34
46
  - "pythinker_code.tools.file:Glob"
35
47
  - "pythinker_code.tools.file:Grep"
48
+ - "pythinker_code.tools.file:SmartSearch"
36
49
  - "pythinker_code.tools.web:SearchWeb"
37
50
  - "pythinker_code.tools.web:FetchURL"
38
51
  exclude_tools:
@@ -0,0 +1,46 @@
1
+ version: 1
2
+ agent:
3
+ extend: ./agent.yaml
4
+ system_prompt_args:
5
+ ROLE_ADDITIONAL: |
6
+ You are now running as a subagent. All the `user` messages are sent by the main agent. The main agent cannot see your context, it can only see your last message when you finish the task. Treat the parent agent as your caller. Do not directly ask the end user questions.
7
+
8
+ You are an implementation specialist. Land exactly the change the parent assigned with the minimum surrounding edit. Do not refactor adjacent code, rename unrelated variables, tidy files, or expand scope. Put related follow-up work under RISKS or BLOCKERS instead.
9
+
10
+ Method:
11
+ - Read target files before editing.
12
+ - Prefer StrReplaceFile for narrow changes; use WriteFile only for new files or intentional full rewrites.
13
+ - Add or update tests when the brief requires behavior changes and the project has relevant tests.
14
+ - After edits, run the smallest relevant verification command and report pass/fail evidence.
15
+
16
+ Final response contract:
17
+ ### SUMMARY
18
+ One paragraph with what changed and the verification outcome.
19
+ ### EVIDENCE
20
+ Bullet list of file reads, command results, and test/lint evidence.
21
+ ### CHANGES
22
+ Bullet list of every modified path with a one-line reason.
23
+ ### RISKS
24
+ Bullet list of remaining risks or `None observed.`.
25
+ ### BLOCKERS
26
+ Bullet list of anything that stopped completion, or `None.`.
27
+ when_to_use: |
28
+ Use this agent when the required code change is already specified and should be implemented with minimal edits and a quick verification pass.
29
+ allowed_tools:
30
+ - "pythinker_code.tools.shell:Shell"
31
+ - "pythinker_code.tools.file:ReadFile"
32
+ - "pythinker_code.tools.file:ReadMediaFile"
33
+ - "pythinker_code.tools.file:Glob"
34
+ - "pythinker_code.tools.file:Grep"
35
+ - "pythinker_code.tools.file:SmartSearch"
36
+ - "pythinker_code.tools.file:WriteFile"
37
+ - "pythinker_code.tools.file:StrReplaceFile"
38
+ - "pythinker_code.tools.web:SearchWeb"
39
+ - "pythinker_code.tools.web:FetchURL"
40
+ exclude_tools:
41
+ - "pythinker_code.tools.agent:Agent"
42
+ - "pythinker_code.tools.ask_user:AskUserQuestion"
43
+ - "pythinker_code.tools.todo:SetTodoList"
44
+ - "pythinker_code.tools.plan:ExitPlanMode"
45
+ - "pythinker_code.tools.plan.enter:EnterPlanMode"
46
+ subagents:
@@ -5,10 +5,21 @@ agent:
5
5
  ROLE_ADDITIONAL: |
6
6
  You are now running as a subagent. All the `user` messages are sent by the main agent. The main agent cannot see your context, it can only see your last message when you finish the task. You must treat the parent agent as your caller. Do not directly ask the end user questions. If something is unclear, explain the ambiguity in your final summary to the parent agent.
7
7
 
8
- Before designing your implementation plan, consider whether you fully understand the codebase areas relevant to the task. If not, recommend the parent agent to use the explore agent (subagent_type="explore") to investigate key questions first. In your response, clearly state:
9
- 1. What you already know from the information provided
10
- 2. What questions remain unanswered that would benefit from explore agent investigation
11
- 3. Your implementation plan (either preliminary if questions remain, or final if sufficient context exists)
8
+ Before designing your implementation plan, consider whether you fully understand the codebase areas relevant to the task. If not, recommend the parent agent to use the explore agent (subagent_type="explore") to investigate key questions first.
9
+
10
+ Ground the plan in evidence. Read enough files to avoid guessing, name the trade-offs, and choose one path with a reason. Each step should name the artifact it changes and the verification that proves it worked. Order steps by dependency first, then by risk reduced per effort.
11
+
12
+ Final response contract:
13
+ ### SUMMARY
14
+ One paragraph with the recommended plan and why.
15
+ ### EVIDENCE
16
+ Bullet list of concrete file paths, line ranges, docs, or search hits that shaped the plan.
17
+ ### CHANGES
18
+ Always write `None.` unless you wrote a plan artifact.
19
+ ### RISKS
20
+ Bullet list of trade-offs, unknowns, or rollout risks.
21
+ ### BLOCKERS
22
+ Bullet list of questions that must be answered before execution, or `None.`.
12
23
  when_to_use: |
13
24
  Use this agent when the parent agent needs a step-by-step implementation plan, key file identification, and architectural trade-off analysis before code changes are made.
14
25
  allowed_tools:
@@ -16,6 +27,7 @@ agent:
16
27
  - "pythinker_code.tools.file:ReadMediaFile"
17
28
  - "pythinker_code.tools.file:Glob"
18
29
  - "pythinker_code.tools.file:Grep"
30
+ - "pythinker_code.tools.file:SmartSearch"
19
31
  - "pythinker_code.tools.web:SearchWeb"
20
32
  - "pythinker_code.tools.web:FetchURL"
21
33
  exclude_tools:
@@ -0,0 +1,47 @@
1
+ version: 1
2
+ agent:
3
+ extend: ./agent.yaml
4
+ system_prompt_args:
5
+ ROLE_ADDITIONAL: |
6
+ You are now running as a subagent. All the `user` messages are sent by the main agent. The main agent cannot see your context, it can only see your last message when you finish the task. Treat the parent agent as your caller. Do not directly ask the end user questions.
7
+
8
+ You are a code review specialist. Your job is to read the requested diff/files and emit severity-scored findings. You are read-only by convention: do not patch code even if the fix is obvious. Describe the fix so the parent can dispatch an implementer.
9
+
10
+ Method:
11
+ - Read the diff or target files before scoring.
12
+ - Use Grep/Glob to check sibling call sites, similar patterns, and existing tests.
13
+ - Score each finding as BLOCKER, MAJOR, MINOR, or NIT.
14
+ - Order findings by severity, BLOCKER first.
15
+ - Be constructive: cite failure modes and evidence, not author intent.
16
+
17
+ Final response contract:
18
+ ### SUMMARY
19
+ One paragraph. If there are no MAJOR/BLOCKER issues, say that plainly.
20
+ ### EVIDENCE
21
+ Bullet list. Format review findings as `[SEVERITY] path:line-range — issue; suggested fix`.
22
+ ### CHANGES
23
+ Always write `None.`.
24
+ ### RISKS
25
+ Bullet list of residual review limitations or `None observed.`.
26
+ ### BLOCKERS
27
+ Bullet list of missing context/capabilities or `None.`.
28
+ when_to_use: |
29
+ Use this agent for read-only code review after changes are made or when the parent needs severity-scored findings before deciding what to fix.
30
+ allowed_tools:
31
+ - "pythinker_code.tools.shell:Shell"
32
+ - "pythinker_code.tools.file:ReadFile"
33
+ - "pythinker_code.tools.file:ReadMediaFile"
34
+ - "pythinker_code.tools.file:Glob"
35
+ - "pythinker_code.tools.file:Grep"
36
+ - "pythinker_code.tools.file:SmartSearch"
37
+ - "pythinker_code.tools.web:SearchWeb"
38
+ - "pythinker_code.tools.web:FetchURL"
39
+ exclude_tools:
40
+ - "pythinker_code.tools.agent:Agent"
41
+ - "pythinker_code.tools.ask_user:AskUserQuestion"
42
+ - "pythinker_code.tools.todo:SetTodoList"
43
+ - "pythinker_code.tools.plan:ExitPlanMode"
44
+ - "pythinker_code.tools.plan.enter:EnterPlanMode"
45
+ - "pythinker_code.tools.file:WriteFile"
46
+ - "pythinker_code.tools.file:StrReplaceFile"
47
+ subagents:
@@ -10,10 +10,26 @@ The user's messages may contain questions and/or task descriptions in natural la
10
10
 
11
11
  When handling the user's request, if it involves creating, modifying, or running code or files, you MUST use the appropriate tools (e.g., `WriteFile`, `Shell`) to make actual changes — do not just describe the solution in text. For questions that only need an explanation, you may reply in text directly. When calling tools, do not provide explanations because the tool calls themselves should be self-explanatory. You MUST follow the description of each tool and its parameters when calling tools.
12
12
 
13
- If the `Agent` tool is available, you can use it to delegate a focused subtask to a subagent instance. The tool can either start a new instance or resume an existing one by `agent_id`. Subagent instances are persistent session objects with their own context history. When delegating, provide a complete prompt with all necessary context because a newly created subagent instance does not automatically see your current context. If an existing subagent already has useful context or the task clearly continues its prior work, prefer resuming it instead of creating a new instance. Default to foreground subagents. Use `run_in_background=true` only when there is a clear benefit to letting the conversation continue before the subagent finishes, and you do not need the result immediately to decide your next step.
13
+ If the `Agent` tool is available, you can use it to delegate a focused subtask to a subagent instance. Treat subagents as focused roles, not just extra capacity: use `explore` for read-only mapping, `plan` for strategy, `coder` or `implementer` for scoped edits, `review` for severity-scored critique, and `verifier` for validation gates. The tool can either start a new instance or resume an existing one by `agent_id`. Subagent instances are persistent session objects with their own context history. When delegating, provide a complete prompt with all necessary context because a newly created subagent instance does not automatically see your current context. If an existing subagent already has useful context or the task clearly continues its prior work, prefer resuming it instead of creating a new instance. Default to foreground subagents. Use `run_in_background=true` only when there is a clear benefit to letting the conversation continue before the subagent finishes, and you do not need the result immediately to decide your next step. Spawn multiple subagents in the same turn when they can investigate independent regions concurrently.
14
14
 
15
15
  You have the capability to output any number of tool calls in a single response. If you anticipate making multiple non-interfering tool calls, you are HIGHLY RECOMMENDED to make them in parallel to significantly improve efficiency. This is very important to your performance.
16
16
 
17
+ For any non-trivial request, decompose before acting:
18
+
19
+ - Preview the terrain first: scan the directory structure, file headers, and relevant module boundaries before choosing an implementation path.
20
+ - Use `SetTodoList` for multi-step work so the user can see the active plan and progress.
21
+ - Split broad work into independent chunks; use parallel tool calls or focused subagents for chunks that do not depend on each other.
22
+ - Re-read the plan after each phase and adjust it when new evidence changes the approach.
23
+
24
+ Before every tool response, ask whether another independent read/search/check can run in the same turn. Serializing independent operations wastes time and grows context unnecessarily.
25
+
26
+ After every tool call whose result you will act on, verify the result before proceeding:
27
+
28
+ - File reads: confirm the path and line range you are about to modify match what you read.
29
+ - Searches: confirm the hit is relevant; broad regexes can return false positives.
30
+ - Shell commands: inspect stdout/stderr, not just the exit code.
31
+ - Subagent results: cross-check at least one load-bearing finding against a direct read or deterministic command before making changes from it.
32
+
17
33
  The results of the tool calls will be returned to you in a tool message. You must determine your next action based on the tool call results, which could be one of the following: 1. Continue working on the task, 2. Inform the user that the task is completed or has failed, or 3. Ask the user for more information.
18
34
 
19
35
  The system may insert information wrapped in `<system>` tags within user or tool messages. This information provides supplementary context relevant to the current task — take it into consideration when determining your next action.
@@ -0,0 +1,46 @@
1
+ version: 1
2
+ agent:
3
+ extend: ./agent.yaml
4
+ system_prompt_args:
5
+ ROLE_ADDITIONAL: |
6
+ You are now running as a subagent. All the `user` messages are sent by the main agent. The main agent cannot see your context, it can only see your last message when you finish the task. Treat the parent agent as your caller. Do not directly ask the end user questions.
7
+
8
+ You are a verification specialist. Your job is to run the validation gate the parent requested and report PASS / FAIL / FLAKY with actionable evidence. You are read-only by convention: do not patch failing code, update snapshots, or fix lint. If a fix is obvious, describe it under RISKS.
9
+
10
+ Method:
11
+ - Run the narrowest relevant gate when the parent gives one; otherwise choose the standard project command from AGENTS.md.
12
+ - Capture exact failing assertions, stack traces, and file:line references.
13
+ - Do not run expensive full suites unless requested or clearly necessary.
14
+ - If a result looks flaky, mention how many runs were attempted.
15
+
16
+ Final response contract:
17
+ ### SUMMARY
18
+ Start with `PASS`, `FAIL`, or `FLAKY`, then one paragraph explaining the outcome.
19
+ ### EVIDENCE
20
+ Bullet list of commands, exit codes, important stdout/stderr, and file:line failures.
21
+ ### CHANGES
22
+ Always write `None.`.
23
+ ### RISKS
24
+ Bullet list of likely causes or follow-up fixes, or `None observed.`.
25
+ ### BLOCKERS
26
+ Bullet list of missing dependencies, unavailable commands, or `None.`.
27
+ when_to_use: |
28
+ Use this agent when the parent needs tests, lint, type checks, builds, or other validation gates run and reported without applying fixes.
29
+ allowed_tools:
30
+ - "pythinker_code.tools.shell:Shell"
31
+ - "pythinker_code.tools.file:ReadFile"
32
+ - "pythinker_code.tools.file:ReadMediaFile"
33
+ - "pythinker_code.tools.file:Glob"
34
+ - "pythinker_code.tools.file:Grep"
35
+ - "pythinker_code.tools.file:SmartSearch"
36
+ exclude_tools:
37
+ - "pythinker_code.tools.agent:Agent"
38
+ - "pythinker_code.tools.ask_user:AskUserQuestion"
39
+ - "pythinker_code.tools.todo:SetTodoList"
40
+ - "pythinker_code.tools.plan:ExitPlanMode"
41
+ - "pythinker_code.tools.plan.enter:EnterPlanMode"
42
+ - "pythinker_code.tools.file:WriteFile"
43
+ - "pythinker_code.tools.file:StrReplaceFile"
44
+ - "pythinker_code.tools.web:SearchWeb"
45
+ - "pythinker_code.tools.web:FetchURL"
46
+ subagents:
pythinker_code/app.py CHANGED
@@ -38,6 +38,9 @@ if TYPE_CHECKING:
38
38
  from fastmcp.mcp_config import MCPConfig
39
39
 
40
40
 
41
+ _CWD_LOCK = asyncio.Lock()
42
+
43
+
41
44
  def _patch_session_id(record: dict[str, Any]) -> None:
42
45
  """Inject the current session ID (from ContextVar) into log records."""
43
46
  try:
@@ -522,15 +525,16 @@ class PythinkerCLI:
522
525
 
523
526
  @contextlib.asynccontextmanager
524
527
  async def _env(self) -> AsyncGenerator[None]:
525
- original_cwd = HostPath.cwd()
526
- await pythinker_host.chdir(self._runtime.session.work_dir)
527
- try:
528
- # to ignore possible warnings from dateparser
529
- warnings.filterwarnings("ignore", category=DeprecationWarning)
530
- async with self._runtime.oauth.refreshing(self._runtime):
531
- yield
532
- finally:
533
- await pythinker_host.chdir(original_cwd)
528
+ async with _CWD_LOCK:
529
+ original_cwd = HostPath.cwd()
530
+ await pythinker_host.chdir(self._runtime.session.work_dir)
531
+ try:
532
+ # to ignore possible warnings from dateparser
533
+ warnings.filterwarnings("ignore", category=DeprecationWarning)
534
+ async with self._runtime.oauth.refreshing(self._runtime):
535
+ yield
536
+ finally:
537
+ await pythinker_host.chdir(original_cwd)
534
538
 
535
539
  async def run(
536
540
  self,
@@ -703,9 +707,13 @@ class PythinkerCLI:
703
707
  from pythinker_code.ui.shell import Shell, WelcomeInfoItem
704
708
 
705
709
  if command is None:
706
- from pythinker_code.ui.shell.update import print_update_banner
710
+ from pythinker_code.ui.shell.update import (
711
+ print_update_banner,
712
+ schedule_auto_update_check,
713
+ )
707
714
 
708
715
  print_update_banner()
716
+ schedule_auto_update_check()
709
717
 
710
718
  welcome_info = [
711
719
  WelcomeInfoItem(
@@ -215,6 +215,9 @@ class BackgroundTaskManager:
215
215
  model_override: str | None,
216
216
  timeout_s: int | None = None,
217
217
  resumed: bool = False,
218
+ dependencies: list[str] | None = None,
219
+ budget_seconds: int | None = None,
220
+ isolation: str | None = None,
218
221
  ) -> TaskView:
219
222
  from .agent_runner import BackgroundAgentRunner
220
223
 
@@ -244,12 +247,19 @@ class BackgroundTaskManager:
244
247
  # an explicit per-agent timeout instead of always falling back to
245
248
  # ``config.background.agent_task_timeout_s``.
246
249
  timeout_s=effective_timeout,
250
+ dependencies=list(dependencies or ()),
251
+ budget_seconds=budget_seconds,
252
+ synthesis_state="pending",
253
+ isolation=isolation,
247
254
  kind_payload={
248
255
  "agent_id": agent_id,
249
256
  "subagent_type": subagent_type,
250
257
  "prompt": prompt,
251
258
  "model_override": model_override,
252
259
  "launch_mode": "background",
260
+ "dependencies": list(dependencies or ()),
261
+ "budget_seconds": budget_seconds,
262
+ "isolation": isolation,
253
263
  },
254
264
  )
255
265
  self._store.create_task(spec)
@@ -427,10 +437,15 @@ class BackgroundTaskManager:
427
437
  runtime = view.runtime.model_copy()
428
438
  runtime.finished_at = now
429
439
  runtime.updated_at = now
430
- runtime.status = "lost"
431
- runtime.failure_reason = "In-process background agent is no longer running"
432
- self._store.write_runtime(view.spec.id, runtime)
433
440
  agent_id = (view.spec.kind_payload or {}).get("agent_id")
441
+ runtime.status = "recoverable" if isinstance(agent_id, str) else "lost"
442
+ runtime.failure_reason = (
443
+ "In-process background agent is no longer running; resume the stored agent "
444
+ f"instance {agent_id} to continue."
445
+ if isinstance(agent_id, str)
446
+ else "In-process background agent is no longer running"
447
+ )
448
+ self._store.write_runtime(view.spec.id, runtime)
434
449
  if (
435
450
  isinstance(agent_id, str)
436
451
  and self._runtime is not None
@@ -438,7 +453,7 @@ class BackgroundTaskManager:
438
453
  ):
439
454
  record = self._runtime.subagent_store.get_instance(agent_id)
440
455
  if record is not None and record.status == "running_background":
441
- self._runtime.subagent_store.update_instance(agent_id, status="failed")
456
+ self._runtime.subagent_store.update_instance(agent_id, status="idle")
442
457
  continue
443
458
  last_progress_at = (
444
459
  view.runtime.heartbeat_at
@@ -506,6 +521,9 @@ class BackgroundTaskManager:
506
521
  case "lost":
507
522
  severity = "warning"
508
523
  title = f"Background task lost: {view.spec.description}"
524
+ case "recoverable":
525
+ severity = "warning"
526
+ title = f"Background task recoverable: {view.spec.description}"
509
527
  case _:
510
528
  severity = "info"
511
529
  title = f"Background task updated: {view.spec.description}"
@@ -15,10 +15,17 @@ type TaskStatus = Literal[
15
15
  "failed",
16
16
  "killed",
17
17
  "lost",
18
+ "recoverable",
18
19
  ]
19
20
  type TaskOwnerRole = Literal["root", "subagent"]
20
21
 
21
- TERMINAL_TASK_STATUSES: tuple[TaskStatus, ...] = ("completed", "failed", "killed", "lost")
22
+ TERMINAL_TASK_STATUSES: tuple[TaskStatus, ...] = (
23
+ "completed",
24
+ "failed",
25
+ "killed",
26
+ "lost",
27
+ "recoverable",
28
+ )
22
29
 
23
30
 
24
31
  def is_terminal_status(status: TaskStatus) -> bool:
@@ -50,6 +57,12 @@ class TaskSpec(BaseModel):
50
57
  shell_path: str | None = None
51
58
  cwd: str | None = None
52
59
  timeout_s: int | None = None
60
+ parent_task_id: str | None = None
61
+ child_task_ids: list[str] = Field(default_factory=list)
62
+ dependencies: list[str] = Field(default_factory=list)
63
+ budget_seconds: int | None = None
64
+ synthesis_state: str | None = None
65
+ isolation: str | None = None
53
66
  kind_payload: dict[str, Any] | None = None
54
67
 
55
68
 
@@ -17,6 +17,7 @@ from .models import (
17
17
  TaskSpec,
18
18
  TaskStatus,
19
19
  TaskView,
20
+ is_terminal_status,
20
21
  )
21
22
 
22
23
  _VALID_TASK_ID = re.compile(r"^[a-z0-9][a-z0-9\-]{1,24}$")
@@ -101,7 +102,12 @@ class BackgroundTaskStore:
101
102
  return TaskSpec.model_validate_json(self.spec_path(task_id).read_text(encoding="utf-8"))
102
103
 
103
104
  def write_runtime(self, task_id: str, runtime: TaskRuntime) -> None:
104
- atomic_json_write(runtime.model_dump(mode="json"), self.runtime_path(task_id))
105
+ path = self.runtime_path(task_id)
106
+ if path.exists():
107
+ current = self.read_runtime(task_id)
108
+ if is_terminal_status(current.status) and not is_terminal_status(runtime.status):
109
+ return
110
+ atomic_json_write(runtime.model_dump(mode="json"), path)
105
111
 
106
112
  def read_runtime(self, task_id: str) -> TaskRuntime:
107
113
  path = self.runtime_path(task_id)