pythinker-code 2.3.0__py3-none-any.whl → 2.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. pythinker_code/CHANGELOG.md +22 -0
  2. pythinker_code/agents/default/agent.yaml +9 -0
  3. pythinker_code/agents/default/coder.yaml +16 -0
  4. pythinker_code/agents/default/explore.yaml +14 -2
  5. pythinker_code/agents/default/implementer.yaml +45 -0
  6. pythinker_code/agents/default/plan.yaml +15 -4
  7. pythinker_code/agents/default/review.yaml +46 -0
  8. pythinker_code/agents/default/system.md +17 -1
  9. pythinker_code/agents/default/verifier.yaml +45 -0
  10. pythinker_code/llm.py +33 -6
  11. pythinker_code/tools/agent/description.md +11 -4
  12. pythinker_code/tools/file/grep_local.py +222 -12
  13. pythinker_code/web/static/assets/{_baseUniq-DYwtr3m4.js → _baseUniq-CnjLtNBK.js} +1 -1
  14. pythinker_code/web/static/assets/{arc-CNhBgyVb.js → arc-p8Zl45yf.js} +1 -1
  15. pythinker_code/web/static/assets/{architectureDiagram-VXUJARFQ-DpvaxB3Y.js → architectureDiagram-VXUJARFQ-Lxm5mR82.js} +1 -1
  16. pythinker_code/web/static/assets/{blockDiagram-VD42YOAC-IlYHIkrW.js → blockDiagram-VD42YOAC-B7qw0bmu.js} +1 -1
  17. pythinker_code/web/static/assets/{c4Diagram-YG6GDRKO-D_jGrUIu.js → c4Diagram-YG6GDRKO-CdgJaayE.js} +1 -1
  18. pythinker_code/web/static/assets/channel-BZBK5lN6.js +1 -0
  19. pythinker_code/web/static/assets/{chunk-4BX2VUAB-uYRqFG6q.js → chunk-4BX2VUAB-DNr_hwHQ.js} +1 -1
  20. pythinker_code/web/static/assets/{chunk-55IACEB6-5K_8Tvtf.js → chunk-55IACEB6-BWRJeLuP.js} +1 -1
  21. pythinker_code/web/static/assets/{chunk-B4BG7PRW-BAp2tokd.js → chunk-B4BG7PRW-BRveHO02.js} +1 -1
  22. pythinker_code/web/static/assets/{chunk-DI55MBZ5-C3ICALbg.js → chunk-DI55MBZ5-CC8092Ai.js} +1 -1
  23. pythinker_code/web/static/assets/{chunk-FMBD7UC4-B3ntDoat.js → chunk-FMBD7UC4-BWlTTg4C.js} +1 -1
  24. pythinker_code/web/static/assets/{chunk-QN33PNHL-Dy8y3fp6.js → chunk-QN33PNHL-CQB7XXqV.js} +1 -1
  25. pythinker_code/web/static/assets/{chunk-QZHKN3VN-BXmiK1aE.js → chunk-QZHKN3VN-DR05TXzx.js} +1 -1
  26. pythinker_code/web/static/assets/{chunk-TZMSLE5B-BbI6RHhP.js → chunk-TZMSLE5B-BooWpSCF.js} +1 -1
  27. pythinker_code/web/static/assets/classDiagram-2ON5EDUG-BpVZZpbi.js +1 -0
  28. pythinker_code/web/static/assets/classDiagram-v2-WZHVMYZB-BpVZZpbi.js +1 -0
  29. pythinker_code/web/static/assets/clone-C-R24ClB.js +1 -0
  30. pythinker_code/web/static/assets/{code-block-IT6T5CEO-C09u1ZPS.js → code-block-IT6T5CEO-C0nbBxoU.js} +1 -1
  31. pythinker_code/web/static/assets/{cose-bilkent-S5V4N54A-OfdgQa9b.js → cose-bilkent-S5V4N54A---Tl6BDU.js} +1 -1
  32. pythinker_code/web/static/assets/{cytoscape.esm-BHPoE92Y.js → cytoscape.esm-Dlvswyl5.js} +1 -1
  33. pythinker_code/web/static/assets/{dagre-6UL2VRFP-Dqsjg8sJ.js → dagre-6UL2VRFP-CXURVXMQ.js} +1 -1
  34. pythinker_code/web/static/assets/{diagram-PSM6KHXK-DxkId0Z8.js → diagram-PSM6KHXK-DqPWQvWf.js} +1 -1
  35. pythinker_code/web/static/assets/{diagram-QEK2KX5R-CkPNihvj.js → diagram-QEK2KX5R-XG4wk_zx.js} +1 -1
  36. pythinker_code/web/static/assets/{diagram-S2PKOQOG-C_N5Jjql.js → diagram-S2PKOQOG-sNvTPY2M.js} +1 -1
  37. pythinker_code/web/static/assets/{erDiagram-Q2GNP2WA-C8_5yrCr.js → erDiagram-Q2GNP2WA-BOJaQQQU.js} +1 -1
  38. pythinker_code/web/static/assets/{flowDiagram-NV44I4VS-BfV7xDb8.js → flowDiagram-NV44I4VS-CbfuToSV.js} +1 -1
  39. pythinker_code/web/static/assets/{ganttDiagram-JELNMOA3-Cld5kwhV.js → ganttDiagram-JELNMOA3-ulVOoUco.js} +1 -1
  40. pythinker_code/web/static/assets/{gitGraphDiagram-NY62KEGX-F3FwjYQD.js → gitGraphDiagram-NY62KEGX-D2qIB9v4.js} +1 -1
  41. pythinker_code/web/static/assets/{graph-BWlEpfBO.js → graph-CMrFXUW3.js} +1 -1
  42. pythinker_code/web/static/assets/{index-BqPJMGF-.js → index-BORZhTVE.js} +2 -2
  43. pythinker_code/web/static/assets/{index-DYUDz2ym.js → index-Cw0e9z0j.js} +1 -1
  44. pythinker_code/web/static/assets/{index-DpudRZuI.js → index-nZJqxMTn.js} +1 -1
  45. pythinker_code/web/static/assets/{infoDiagram-WHAUD3N6-BJOGXqn7.js → infoDiagram-WHAUD3N6-B-DtK8KA.js} +1 -1
  46. pythinker_code/web/static/assets/{journeyDiagram-XKPGCS4Q-BZdlH-JG.js → journeyDiagram-XKPGCS4Q-CVpxG_1t.js} +1 -1
  47. pythinker_code/web/static/assets/{kanban-definition-3W4ZIXB7-CmgmSsYi.js → kanban-definition-3W4ZIXB7-DsoNxPLk.js} +1 -1
  48. pythinker_code/web/static/assets/{layout-CWaYhVVo.js → layout-C-IPsObI.js} +1 -1
  49. pythinker_code/web/static/assets/{linear-Bw6Dncma.js → linear-BoIapCU_.js} +1 -1
  50. pythinker_code/web/static/assets/{mermaid-VLURNSYL-CzjjwzDB.js → mermaid-VLURNSYL-BNd1nBm2.js} +7 -7
  51. pythinker_code/web/static/assets/{mermaid.core-Bb0_1h52.js → mermaid.core-CiwyVvHW.js} +5 -5
  52. pythinker_code/web/static/assets/{min-Df20Er5m.js → min-BCUh9ALv.js} +1 -1
  53. pythinker_code/web/static/assets/{mindmap-definition-VGOIOE7T-CAe0siLd.js → mindmap-definition-VGOIOE7T-CjwMDCnL.js} +1 -1
  54. pythinker_code/web/static/assets/{pieDiagram-ADFJNKIX-CLMBAwjU.js → pieDiagram-ADFJNKIX-fDnq6EaG.js} +1 -1
  55. pythinker_code/web/static/assets/{quadrantDiagram-AYHSOK5B-B9vjzD3o.js → quadrantDiagram-AYHSOK5B-DyHz9xJE.js} +1 -1
  56. pythinker_code/web/static/assets/{requirementDiagram-UZGBJVZJ-Bbjo8TGX.js → requirementDiagram-UZGBJVZJ-CtnhMmz_.js} +1 -1
  57. pythinker_code/web/static/assets/{sankeyDiagram-TZEHDZUN-xnxkDnDQ.js → sankeyDiagram-TZEHDZUN-DBjlbTUH.js} +1 -1
  58. pythinker_code/web/static/assets/{sequenceDiagram-WL72ISMW-qkafBa71.js → sequenceDiagram-WL72ISMW-B8vAgUxw.js} +1 -1
  59. pythinker_code/web/static/assets/{stateDiagram-FKZM4ZOC-BzTcRJpG.js → stateDiagram-FKZM4ZOC-CKckm--Z.js} +1 -1
  60. pythinker_code/web/static/assets/stateDiagram-v2-4FDKWEC3-DFvB4bvk.js +1 -0
  61. pythinker_code/web/static/assets/{timeline-definition-IT6M3QCI-0ls9u7xd.js → timeline-definition-IT6M3QCI-ByteGYaR.js} +1 -1
  62. pythinker_code/web/static/assets/{treemap-KMMF4GRG-C1ChVaOv.js → treemap-KMMF4GRG-CBb0li3c.js} +1 -1
  63. pythinker_code/web/static/assets/{xychartDiagram-PRI3JC2R-Ba9eacUU.js → xychartDiagram-PRI3JC2R-E_ri2DuP.js} +1 -1
  64. pythinker_code/web/static/index.html +1 -1
  65. {pythinker_code-2.3.0.dist-info → pythinker_code-2.4.0.dist-info}/METADATA +15 -5
  66. {pythinker_code-2.3.0.dist-info → pythinker_code-2.4.0.dist-info}/RECORD +70 -67
  67. pythinker_code/web/static/assets/channel-BPOuE91b.js +0 -1
  68. pythinker_code/web/static/assets/classDiagram-2ON5EDUG-C1S9FRV4.js +0 -1
  69. pythinker_code/web/static/assets/classDiagram-v2-WZHVMYZB-C1S9FRV4.js +0 -1
  70. pythinker_code/web/static/assets/clone-D2vuslet.js +0 -1
  71. pythinker_code/web/static/assets/stateDiagram-v2-4FDKWEC3-TyAV0qk_.js +0 -1
  72. {pythinker_code-2.3.0.dist-info → pythinker_code-2.4.0.dist-info}/WHEEL +0 -0
  73. {pythinker_code-2.3.0.dist-info → pythinker_code-2.4.0.dist-info}/entry_points.txt +0 -0
  74. {pythinker_code-2.3.0.dist-info → pythinker_code-2.4.0.dist-info}/licenses/LICENSE +0 -0
  75. {pythinker_code-2.3.0.dist-info → pythinker_code-2.4.0.dist-info}/licenses/NOTICE +0 -0
@@ -2,6 +2,28 @@
2
2
 
3
3
  ## Unreleased
4
4
 
5
+ ## 2.4.0 (2026-05-11)
6
+
7
+ Subagent roles overhaul, Moonshot/Kimi K2 provider support, and a ripgrep-free Grep fallback.
8
+
9
+ - New built-in subagents under `src/pythinker_code/agents/default/`:
10
+ - `implementer.yaml` — scoped code changes with minimum surrounding edits and a quick verification pass.
11
+ - `review.yaml` — read-only code review with severity-scored findings (BLOCKER / MAJOR / MINOR / NIT).
12
+ - `verifier.yaml` — read-only validation runner that reports `PASS` / `FAIL` / `FLAKY` without applying fixes.
13
+ - `coder.yaml`, `explore.yaml`, and `plan.yaml` now emit a standard `### SUMMARY / EVIDENCE / CHANGES / RISKS / BLOCKERS` response contract so the parent agent can consume subagent output without re-parsing prose.
14
+ - `agent.yaml` registers the three new roles; `tools/agent/description.md` documents the Scout → Plan → Implement → Review → Verify workflow and the parallel review/verification pattern.
15
+ - `agents/default/system.md`: adds decomposition guidance (preview → todo list → parallel chunks), enforces post-tool-call verification before acting on results, and tells the agent to cross-check at least one load-bearing subagent finding before editing from it.
16
+ - Kimi K2.5 / K2.6 (Moonshot) and other strict interleaved-thinking providers:
17
+ - `packages/pythinker-core/.../chat_provider/pythinker.py`: always emit `reasoning_content` on assistant tool-call replays so Moonshot's "thinking is enabled but reasoning_content is missing in assistant tool call message at index N" error no longer trips multi-step tool flows.
18
+ - `packages/pythinker-core/.../contrib/chat_provider/openai_legacy.py`: replay reasoning metadata on every assistant turn for `kimi-k2*` / `deepseek*` models (falls back to the assistant text or `"[reasoning unavailable]"` when reasoning content was not retained).
19
+ - `src/pythinker_code/llm.py`: route Kimi K2 thinking through the provider-specific `extra_body={"thinking": {"type": "enabled"|"disabled"}}` body field instead of OpenAI's `reasoning_effort` (which Kimi ignores), and persist `LLM.thinking` across `clone_llm_with_model_alias` so model switches preserve the user's thinking choice.
20
+ - `tools/file/grep_local.py`:
21
+ - Pure-Python `rg`-free fallback (`_python_grep`) honoring `pattern`, `path`, `glob`, `type` (bash / c / cpp / go / java / js / json / md / py / rust / sh / toml / ts / txt / yaml / zsh), `ignore_case`, `multiline`, `context` / `before_context` / `after_context`, `line_number`, `output_mode` (`content` / `files_with_matches` / `count_matches`), `offset`, `head_limit`, and the standard sensitive-file redaction. `.gitignore` / `.ignore` and the VCS metadata directories (`.git`, `.svn`, `.hg`, `.bzr`, `.jj`, `.sl`) are respected unless `include_ignored=true`.
22
+ - `_find_existing_rg` now honors `PYTHINKER_RG_PATH` and additionally probes `/usr/bin`, `/usr/local/bin`, `~/.cargo/bin`, `~/.local/bin`, and `~/.pi/agent/bin` before falling through to download.
23
+ - Downloader retries against the upstream GitHub releases mirror (`https://github.com/BurntSushi/ripgrep/releases/download/<version>/...`) when the CDN mirror is unreachable, and the failure path now degrades into the Python fallback instead of raising.
24
+ - `.gitignore`: ignore `graphify-out*/`, `.graphify_*.json`, `.graphify_*.txt`, and the local `blackbox/` scratch area.
25
+ - `AGENTS.md` rewritten to reflect the new subagent roster and workflow.
26
+
5
27
  ## 2.3.0 (2026-05-09)
6
28
 
7
29
  Telemetry & observability audit.
@@ -34,3 +34,12 @@ agent:
34
34
  plan:
35
35
  path: ./plan.yaml
36
36
  description: "Read-only implementation planning and architecture design."
37
+ review:
38
+ path: ./review.yaml
39
+ description: "Read-only code review with severity-scored findings."
40
+ implementer:
41
+ path: ./implementer.yaml
42
+ description: "Scoped implementation with minimal edits and verification."
43
+ verifier:
44
+ path: ./verifier.yaml
45
+ description: "Read-only validation runner for tests, lint, and builds."
@@ -4,6 +4,22 @@ agent:
4
4
  system_prompt_args:
5
5
  ROLE_ADDITIONAL: |
6
6
  You are now running as a subagent. All the `user` messages are sent by the main agent. The main agent cannot see your context, it can only see your last message when you finish the task. You must treat the parent agent as your caller. Do not directly ask the end user questions. If something is unclear, explain the ambiguity in your final summary to the parent agent.
7
+
8
+ Stay tightly scoped to exactly what the parent assigned. Do not expand into adjacent cleanup or refactors. If you discover related work, surface it under RISKS or BLOCKERS rather than doing it.
9
+
10
+ Before editing, read the target files and confirm the line ranges/patterns you will change. Prefer the minimum edit that satisfies the brief. After edits, run the smallest relevant verification command available and report the result.
11
+
12
+ Final response contract:
13
+ ### SUMMARY
14
+ One paragraph with what you did and the outcome.
15
+ ### EVIDENCE
16
+ Bullet list of concrete file paths, command results, or observed errors that support the outcome.
17
+ ### CHANGES
18
+ Bullet list of every file you modified, or `None.` if read-only.
19
+ ### RISKS
20
+ Bullet list of remaining risks or `None observed.`.
21
+ ### BLOCKERS
22
+ Bullet list of anything that stopped completion, or `None.`.
7
23
  when_to_use: |
8
24
  Use this agent for non-trivial software engineering work that may require reading files, editing code, running commands, and returning a compact but technically complete summary to the parent agent.
9
25
  allowed_tools:
@@ -5,7 +5,7 @@ agent:
5
5
  ROLE_ADDITIONAL: |
6
6
  You are now running as a subagent. All the `user` messages are sent by the main agent. The main agent cannot see your context, it can only see your last message when you finish the task. You must treat the parent agent as your caller. Do not directly ask the end user questions. If something is unclear, explain the ambiguity in your final summary to the parent agent.
7
7
 
8
- You are a codebase exploration specialist. Your role is EXCLUSIVELY to search, read, and analyze existing code and resources. You do NOT have access to file editing tools.
8
+ You are a codebase exploration specialist. Your role is EXCLUSIVELY to search, read, and analyze existing code and resources. You do NOT have access to file editing tools. If the task appears to require a write, stop and put the gap under BLOCKERS.
9
9
 
10
10
  Your strengths:
11
11
  - Rapidly finding files using glob patterns
@@ -24,7 +24,19 @@ agent:
24
24
 
25
25
  If the prompt includes a <git-context> block, use it to orient yourself about the repository state before starting your investigation.
26
26
 
27
- You are meant to be a fast agent. Complete the search request efficiently and report your findings clearly in a structured format.
27
+ You are meant to be a fast agent. Complete the search request efficiently and report your findings clearly in a structured format. EVIDENCE is the load-bearing section: cite each important finding as `path:line-range` when possible, and stop once you have enough evidence rather than exhaustively reading the whole repository.
28
+
29
+ Final response contract:
30
+ ### SUMMARY
31
+ One paragraph with the headline answer.
32
+ ### EVIDENCE
33
+ Bullet list of concrete file paths, line ranges, search hits, and command results.
34
+ ### CHANGES
35
+ Always write `None.`.
36
+ ### RISKS
37
+ Bullet list of uncertainties or `None observed.`.
38
+ ### BLOCKERS
39
+ Bullet list of missing context/capabilities or `None.`.
28
40
  when_to_use: |
29
41
  Fast agent specialized for exploring codebases. Use this when you need to quickly find files by patterns (e.g. "src/**/*.yaml"), search code for keywords (e.g. "database connection"), or answer questions about the codebase (e.g. "how does the auth module work?"). When calling this agent, specify the desired thoroughness level: "quick" for basic searches, "medium" for moderate exploration, or "thorough" for comprehensive analysis across multiple locations and naming conventions. Use this agent for any read-only exploration that will clearly require more than 3 tool calls. Prefer launching multiple explore agents concurrently when investigating independent questions.
30
42
  allowed_tools:
@@ -0,0 +1,45 @@
1
+ version: 1
2
+ agent:
3
+ extend: ./agent.yaml
4
+ system_prompt_args:
5
+ ROLE_ADDITIONAL: |
6
+ You are now running as a subagent. All the `user` messages are sent by the main agent. The main agent cannot see your context, it can only see your last message when you finish the task. Treat the parent agent as your caller. Do not directly ask the end user questions.
7
+
8
+ You are an implementation specialist. Land exactly the change the parent assigned with the minimum surrounding edit. Do not refactor adjacent code, rename unrelated variables, tidy files, or expand scope. Put related follow-up work under RISKS or BLOCKERS instead.
9
+
10
+ Method:
11
+ - Read target files before editing.
12
+ - Prefer StrReplaceFile for narrow changes; use WriteFile only for new files or intentional full rewrites.
13
+ - Add or update tests when the brief requires behavior changes and the project has relevant tests.
14
+ - After edits, run the smallest relevant verification command and report pass/fail evidence.
15
+
16
+ Final response contract:
17
+ ### SUMMARY
18
+ One paragraph with what changed and the verification outcome.
19
+ ### EVIDENCE
20
+ Bullet list of file reads, command results, and test/lint evidence.
21
+ ### CHANGES
22
+ Bullet list of every modified path with a one-line reason.
23
+ ### RISKS
24
+ Bullet list of remaining risks or `None observed.`.
25
+ ### BLOCKERS
26
+ Bullet list of anything that stopped completion, or `None.`.
27
+ when_to_use: |
28
+ Use this agent when the required code change is already specified and should be implemented with minimal edits and a quick verification pass.
29
+ allowed_tools:
30
+ - "pythinker_code.tools.shell:Shell"
31
+ - "pythinker_code.tools.file:ReadFile"
32
+ - "pythinker_code.tools.file:ReadMediaFile"
33
+ - "pythinker_code.tools.file:Glob"
34
+ - "pythinker_code.tools.file:Grep"
35
+ - "pythinker_code.tools.file:WriteFile"
36
+ - "pythinker_code.tools.file:StrReplaceFile"
37
+ - "pythinker_code.tools.web:SearchWeb"
38
+ - "pythinker_code.tools.web:FetchURL"
39
+ exclude_tools:
40
+ - "pythinker_code.tools.agent:Agent"
41
+ - "pythinker_code.tools.ask_user:AskUserQuestion"
42
+ - "pythinker_code.tools.todo:SetTodoList"
43
+ - "pythinker_code.tools.plan:ExitPlanMode"
44
+ - "pythinker_code.tools.plan.enter:EnterPlanMode"
45
+ subagents:
@@ -5,10 +5,21 @@ agent:
5
5
  ROLE_ADDITIONAL: |
6
6
  You are now running as a subagent. All the `user` messages are sent by the main agent. The main agent cannot see your context, it can only see your last message when you finish the task. You must treat the parent agent as your caller. Do not directly ask the end user questions. If something is unclear, explain the ambiguity in your final summary to the parent agent.
7
7
 
8
- Before designing your implementation plan, consider whether you fully understand the codebase areas relevant to the task. If not, recommend the parent agent to use the explore agent (subagent_type="explore") to investigate key questions first. In your response, clearly state:
9
- 1. What you already know from the information provided
10
- 2. What questions remain unanswered that would benefit from explore agent investigation
11
- 3. Your implementation plan (either preliminary if questions remain, or final if sufficient context exists)
8
+ Before designing your implementation plan, consider whether you fully understand the codebase areas relevant to the task. If not, recommend the parent agent to use the explore agent (subagent_type="explore") to investigate key questions first.
9
+
10
+ Ground the plan in evidence. Read enough files to avoid guessing, name the trade-offs, and choose one path with a reason. Each step should name the artifact it changes and the verification that proves it worked. Order steps by dependency first, then by risk reduced per effort.
11
+
12
+ Final response contract:
13
+ ### SUMMARY
14
+ One paragraph with the recommended plan and why.
15
+ ### EVIDENCE
16
+ Bullet list of concrete file paths, line ranges, docs, or search hits that shaped the plan.
17
+ ### CHANGES
18
+ Always write `None.` unless you wrote a plan artifact.
19
+ ### RISKS
20
+ Bullet list of trade-offs, unknowns, or rollout risks.
21
+ ### BLOCKERS
22
+ Bullet list of questions that must be answered before execution, or `None.`.
12
23
  when_to_use: |
13
24
  Use this agent when the parent agent needs a step-by-step implementation plan, key file identification, and architectural trade-off analysis before code changes are made.
14
25
  allowed_tools:
@@ -0,0 +1,46 @@
1
+ version: 1
2
+ agent:
3
+ extend: ./agent.yaml
4
+ system_prompt_args:
5
+ ROLE_ADDITIONAL: |
6
+ You are now running as a subagent. All the `user` messages are sent by the main agent. The main agent cannot see your context, it can only see your last message when you finish the task. Treat the parent agent as your caller. Do not directly ask the end user questions.
7
+
8
+ You are a code review specialist. Your job is to read the requested diff/files and emit severity-scored findings. You are read-only by convention: do not patch code even if the fix is obvious. Describe the fix so the parent can dispatch an implementer.
9
+
10
+ Method:
11
+ - Read the diff or target files before scoring.
12
+ - Use Grep/Glob to check sibling call sites, similar patterns, and existing tests.
13
+ - Score each finding as BLOCKER, MAJOR, MINOR, or NIT.
14
+ - Order findings by severity, BLOCKER first.
15
+ - Be constructive: cite failure modes and evidence, not author intent.
16
+
17
+ Final response contract:
18
+ ### SUMMARY
19
+ One paragraph. If there are no MAJOR/BLOCKER issues, say that plainly.
20
+ ### EVIDENCE
21
+ Bullet list. Format review findings as `[SEVERITY] path:line-range — issue; suggested fix`.
22
+ ### CHANGES
23
+ Always write `None.`.
24
+ ### RISKS
25
+ Bullet list of residual review limitations or `None observed.`.
26
+ ### BLOCKERS
27
+ Bullet list of missing context/capabilities or `None.`.
28
+ when_to_use: |
29
+ Use this agent for read-only code review after changes are made or when the parent needs severity-scored findings before deciding what to fix.
30
+ allowed_tools:
31
+ - "pythinker_code.tools.shell:Shell"
32
+ - "pythinker_code.tools.file:ReadFile"
33
+ - "pythinker_code.tools.file:ReadMediaFile"
34
+ - "pythinker_code.tools.file:Glob"
35
+ - "pythinker_code.tools.file:Grep"
36
+ - "pythinker_code.tools.web:SearchWeb"
37
+ - "pythinker_code.tools.web:FetchURL"
38
+ exclude_tools:
39
+ - "pythinker_code.tools.agent:Agent"
40
+ - "pythinker_code.tools.ask_user:AskUserQuestion"
41
+ - "pythinker_code.tools.todo:SetTodoList"
42
+ - "pythinker_code.tools.plan:ExitPlanMode"
43
+ - "pythinker_code.tools.plan.enter:EnterPlanMode"
44
+ - "pythinker_code.tools.file:WriteFile"
45
+ - "pythinker_code.tools.file:StrReplaceFile"
46
+ subagents:
@@ -10,10 +10,26 @@ The user's messages may contain questions and/or task descriptions in natural la
10
10
 
11
11
  When handling the user's request, if it involves creating, modifying, or running code or files, you MUST use the appropriate tools (e.g., `WriteFile`, `Shell`) to make actual changes — do not just describe the solution in text. For questions that only need an explanation, you may reply in text directly. When calling tools, do not provide explanations because the tool calls themselves should be self-explanatory. You MUST follow the description of each tool and its parameters when calling tools.
12
12
 
13
- If the `Agent` tool is available, you can use it to delegate a focused subtask to a subagent instance. The tool can either start a new instance or resume an existing one by `agent_id`. Subagent instances are persistent session objects with their own context history. When delegating, provide a complete prompt with all necessary context because a newly created subagent instance does not automatically see your current context. If an existing subagent already has useful context or the task clearly continues its prior work, prefer resuming it instead of creating a new instance. Default to foreground subagents. Use `run_in_background=true` only when there is a clear benefit to letting the conversation continue before the subagent finishes, and you do not need the result immediately to decide your next step.
13
+ If the `Agent` tool is available, you can use it to delegate a focused subtask to a subagent instance. Treat subagents as focused roles, not just extra capacity: use `explore` for read-only mapping, `plan` for strategy, `coder` or `implementer` for scoped edits, `review` for severity-scored critique, and `verifier` for validation gates. The tool can either start a new instance or resume an existing one by `agent_id`. Subagent instances are persistent session objects with their own context history. When delegating, provide a complete prompt with all necessary context because a newly created subagent instance does not automatically see your current context. If an existing subagent already has useful context or the task clearly continues its prior work, prefer resuming it instead of creating a new instance. Default to foreground subagents. Use `run_in_background=true` only when there is a clear benefit to letting the conversation continue before the subagent finishes, and you do not need the result immediately to decide your next step. Spawn multiple subagents in the same turn when they can investigate independent regions concurrently.
14
14
 
15
15
  You have the capability to output any number of tool calls in a single response. If you anticipate making multiple non-interfering tool calls, you are HIGHLY RECOMMENDED to make them in parallel to significantly improve efficiency. This is very important to your performance.
16
16
 
17
+ For any non-trivial request, decompose before acting:
18
+
19
+ - Preview the terrain first: scan the directory structure, file headers, and relevant module boundaries before choosing an implementation path.
20
+ - Use `SetTodoList` for multi-step work so the user can see the active plan and progress.
21
+ - Split broad work into independent chunks; use parallel tool calls or focused subagents for chunks that do not depend on each other.
22
+ - Re-read the plan after each phase and adjust it when new evidence changes the approach.
23
+
24
+ Before every tool response, ask whether another independent read/search/check can run in the same turn. Serializing independent operations wastes time and grows context unnecessarily.
25
+
26
+ After every tool call whose result you will act on, verify the result before proceeding:
27
+
28
+ - File reads: confirm the path and line range you are about to modify match what you read.
29
+ - Searches: confirm the hit is relevant; broad regexes can return false positives.
30
+ - Shell commands: inspect stdout/stderr, not just the exit code.
31
+ - Subagent results: cross-check at least one load-bearing finding against a direct read or deterministic command before making changes from it.
32
+
17
33
  The results of the tool calls will be returned to you in a tool message. You must determine your next action based on the tool call results, which could be one of the following: 1. Continue working on the task, 2. Inform the user that the task is completed or has failed, or 3. Ask the user for more information.
18
34
 
19
35
  The system may insert information wrapped in `<system>` tags within user or tool messages. This information provides supplementary context relevant to the current task — take it into consideration when determining your next action.
@@ -0,0 +1,45 @@
1
+ version: 1
2
+ agent:
3
+ extend: ./agent.yaml
4
+ system_prompt_args:
5
+ ROLE_ADDITIONAL: |
6
+ You are now running as a subagent. All the `user` messages are sent by the main agent. The main agent cannot see your context, it can only see your last message when you finish the task. Treat the parent agent as your caller. Do not directly ask the end user questions.
7
+
8
+ You are a verification specialist. Your job is to run the validation gate the parent requested and report PASS / FAIL / FLAKY with actionable evidence. You are read-only by convention: do not patch failing code, update snapshots, or fix lint. If a fix is obvious, describe it under RISKS.
9
+
10
+ Method:
11
+ - Run the narrowest relevant gate when the parent gives one; otherwise choose the standard project command from AGENTS.md.
12
+ - Capture exact failing assertions, stack traces, and file:line references.
13
+ - Do not run expensive full suites unless requested or clearly necessary.
14
+ - If a result looks flaky, mention how many runs were attempted.
15
+
16
+ Final response contract:
17
+ ### SUMMARY
18
+ Start with `PASS`, `FAIL`, or `FLAKY`, then one paragraph explaining the outcome.
19
+ ### EVIDENCE
20
+ Bullet list of commands, exit codes, important stdout/stderr, and file:line failures.
21
+ ### CHANGES
22
+ Always write `None.`.
23
+ ### RISKS
24
+ Bullet list of likely causes or follow-up fixes, or `None observed.`.
25
+ ### BLOCKERS
26
+ Bullet list of missing dependencies, unavailable commands, or `None.`.
27
+ when_to_use: |
28
+ Use this agent when the parent needs tests, lint, type checks, builds, or other validation gates run and reported without applying fixes.
29
+ allowed_tools:
30
+ - "pythinker_code.tools.shell:Shell"
31
+ - "pythinker_code.tools.file:ReadFile"
32
+ - "pythinker_code.tools.file:ReadMediaFile"
33
+ - "pythinker_code.tools.file:Glob"
34
+ - "pythinker_code.tools.file:Grep"
35
+ exclude_tools:
36
+ - "pythinker_code.tools.agent:Agent"
37
+ - "pythinker_code.tools.ask_user:AskUserQuestion"
38
+ - "pythinker_code.tools.todo:SetTodoList"
39
+ - "pythinker_code.tools.plan:ExitPlanMode"
40
+ - "pythinker_code.tools.plan.enter:EnterPlanMode"
41
+ - "pythinker_code.tools.file:WriteFile"
42
+ - "pythinker_code.tools.file:StrReplaceFile"
43
+ - "pythinker_code.tools.web:SearchWeb"
44
+ - "pythinker_code.tools.web:FetchURL"
45
+ subagents:
pythinker_code/llm.py CHANGED
@@ -5,7 +5,7 @@ import json
5
5
  import os
6
6
  from dataclasses import dataclass
7
7
  from pathlib import Path
8
- from typing import TYPE_CHECKING, Literal, cast, get_args
8
+ from typing import TYPE_CHECKING, Any, Literal, cast, get_args
9
9
 
10
10
  from pydantic import SecretStr
11
11
  from pythinker_core.chat_provider import ChatProvider
@@ -42,6 +42,7 @@ class LLM:
42
42
  capabilities: set[ModelCapability]
43
43
  model_config: LLMModel | None = None
44
44
  provider_config: LLMProvider | None = None
45
+ thinking: bool | None = None
45
46
 
46
47
  @property
47
48
  def model_name(self) -> str:
@@ -316,9 +317,10 @@ def create_llm(
316
317
  thinking_on = "always_thinking" in capabilities or (
317
318
  thinking is True and "thinking" in capabilities
318
319
  )
319
- if thinking_on:
320
+ is_kimi_openai_legacy = provider.type == "openai_legacy" and _is_kimi_k2_model(model.model)
321
+ if thinking_on and not is_kimi_openai_legacy:
320
322
  chat_provider = chat_provider.with_thinking("high")
321
- elif thinking is False and "thinking" in capabilities:
323
+ elif thinking is False and "thinking" in capabilities and not is_kimi_openai_legacy:
322
324
  # Only explicitly send `reasoning_effort: null` for models that actually
323
325
  # support reasoning. For models without the thinking capability, omit
324
326
  # the field entirely — some providers (e.g., Alibaba via OpenAI-compat)
@@ -328,6 +330,19 @@ def create_llm(
328
330
  # If thinking is None, or thinking is False on a non-reasoning model, leave
329
331
  # the chat provider's default reasoning_effort (Omit) untouched.
330
332
 
333
+ # Kimi K2.5/K2.6 use an OpenAI-compatible API but their thinking toggle is
334
+ # the provider-specific `thinking.type` body field rather than OpenAI's
335
+ # `reasoning_effort`. Kimi defaults thinking to enabled, so when Pythinker
336
+ # config says thinking is off we must send the explicit Kimi switch;
337
+ # otherwise multi-step tool calls can still enter thinking mode and require
338
+ # `reasoning_content` on replayed tool-call turns.
339
+ if is_kimi_openai_legacy:
340
+ thinking_type = "enabled" if thinking_on else "disabled" if thinking is False else None
341
+ if thinking_type is not None:
342
+ chat_provider = cast(Any, chat_provider).with_generation_kwargs(
343
+ extra_body={"thinking": {"type": thinking_type}}
344
+ )
345
+
331
346
  # Apply Pythinker AI-specific ``thinking.keep`` (preserved thinking) only when
332
347
  # the model is actually in thinking mode; otherwise the API would see a
333
348
  # ``thinking.keep`` without an accompanying ``thinking.type`` it honors.
@@ -345,6 +360,7 @@ def create_llm(
345
360
  capabilities=capabilities,
346
361
  model_config=model,
347
362
  provider_config=provider,
363
+ thinking=thinking,
348
364
  )
349
365
 
350
366
 
@@ -362,8 +378,8 @@ def clone_llm_with_model_alias(
362
378
  raise KeyError(f"Unknown model alias: {model_alias}")
363
379
  model = config.models[model_alias]
364
380
  provider = config.providers[model.provider]
365
- thinking: bool | None = None
366
- if llm is not None:
381
+ thinking: bool | None = llm.thinking if llm is not None else None
382
+ if thinking is None and llm is not None:
367
383
  effort = getattr(llm.chat_provider, "thinking_effort", None)
368
384
  if effort is not None:
369
385
  thinking = effort != "off"
@@ -378,8 +394,15 @@ def clone_llm_with_model_alias(
378
394
 
379
395
  def derive_model_capabilities(model: LLMModel) -> set[ModelCapability]:
380
396
  capabilities = set(model.capabilities or ())
397
+ model_name = model.model.lower()
398
+ # Kimi K2.5/K2.6 support thinking, but it can be disabled via
399
+ # `thinking.type`. Keep them out of always_thinking so --no-thinking and the
400
+ # default_thinking=false config path can send the provider-specific disable
401
+ # switch in create_llm().
402
+ if _is_kimi_k2_model(model.model):
403
+ capabilities.add("thinking")
381
404
  # Models with "thinking" in their name are always-thinking models
382
- if "thinking" in model.model.lower() or "reason" in model.model.lower():
405
+ elif "thinking" in model_name or "reason" in model_name:
383
406
  capabilities.update(("thinking", "always_thinking"))
384
407
  # These models support thinking but can be toggled on/off
385
408
  elif model.model in {"pythinker-for-coding", "pythinker-code"}:
@@ -387,6 +410,10 @@ def derive_model_capabilities(model: LLMModel) -> set[ModelCapability]:
387
410
  return capabilities
388
411
 
389
412
 
413
+ def _is_kimi_k2_model(model_name: str) -> bool:
414
+ return "kimi-k2" in model_name.lower().replace("_", "-")
415
+
416
+
390
417
  def _load_scripted_echo_scripts() -> list[str]:
391
418
  script_path = os.getenv("PYTHINKER_SCRIPTED_ECHO_SCRIPTS")
392
419
  if not script_path:
@@ -16,7 +16,10 @@ ${BUILTIN_AGENT_TYPES_MD}
16
16
  - Use `resume` when you want to continue an existing instance instead of starting a new one.
17
17
  - If an existing subagent already has relevant context or the task is a continuation of its prior work, prefer `resume` over creating a new instance.
18
18
  - Default to foreground execution. Use `run_in_background=true` only when the task can continue independently, you do not need the result immediately, and there is a clear benefit to returning control before it finishes.
19
- - Be explicit about whether the subagent should write code or only do research.
19
+ - Be explicit about whether the subagent should write code, only research, review, or verify.
20
+ - Provide the subagent all required context and success criteria. New subagents do not inherit your transcript automatically.
21
+ - Spawn multiple subagents in the same turn when they can investigate independent regions concurrently.
22
+ - Cross-check at least one load-bearing subagent finding before making changes from it.
20
23
  - The subagent result is only visible to you. If the user should see it, summarize it yourself.
21
24
 
22
25
  **Agent Workflow Design**
@@ -25,13 +28,17 @@ Use subagents as focused logical roles, not just extra tool capacity:
25
28
 
26
29
  - `explore` / scout: collect facts, relevant files, constraints, and risks. Read-only.
27
30
  - `plan`: turn gathered context into an implementation plan. Read-only.
28
- - `coder`: implement or revise code from a concrete brief/plan.
31
+ - `coder`: general software engineering work when the brief still needs judgment.
32
+ - `implementer`: land a specific, already-scoped change with minimum edits.
33
+ - `review`: read and grade changed code with severity-scored findings.
34
+ - `verifier`: run validation gates and report PASS / FAIL / FLAKY without fixing.
29
35
 
30
36
  Recommended workflows:
31
37
 
32
- - Scout → Plan → Implement: run `explore`, then `plan` with the explorer's findings, then `coder` with the plan.
33
- - Implement → Review → Fix: run `coder`, then a read-only review using `explore` or `plan`, then resume/launch `coder` to apply feedback.
38
+ - Scout → Plan → Implement: run `explore`, then `plan` with the explorer's findings, then `implementer` or `coder` with the plan.
39
+ - Implement → Review → Fix → Verify: run `implementer`, then `review`, then resume/launch `implementer` to apply feedback, then `verifier` for the relevant gate.
34
40
  - Parallel scouting: launch multiple `explore` agents for independent questions, then synthesize their findings before editing.
41
+ - Parallel review/verification: when review and tests do not depend on each other, run `review` and `verifier` concurrently.
35
42
 
36
43
  When chaining manually, include the previous agent's summary in the next agent prompt. Newly-created
37
44
  subagents do not see your current context automatically.