pi-subagents 0.28.0 → 0.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/CHANGELOG.md +14 -0
  2. package/README.md +18 -61
  3. package/package.json +1 -1
  4. package/skills/pi-subagents/SKILL.md +4 -35
  5. package/src/agents/agent-management.ts +10 -20
  6. package/src/agents/agent-selection.ts +2 -0
  7. package/src/agents/agent-serializer.ts +0 -10
  8. package/src/agents/agents.ts +304 -47
  9. package/src/agents/chain-serializer.ts +4 -9
  10. package/src/extension/doctor.ts +4 -3
  11. package/src/extension/fanout-child.ts +0 -2
  12. package/src/extension/index.ts +3 -8
  13. package/src/extension/schemas.ts +32 -22
  14. package/src/intercom/intercom-bridge.ts +11 -1
  15. package/src/intercom/result-intercom.ts +0 -5
  16. package/src/runs/background/async-execution.ts +20 -11
  17. package/src/runs/background/run-status.ts +1 -7
  18. package/src/runs/background/subagent-runner.ts +81 -211
  19. package/src/runs/foreground/chain-execution.ts +62 -58
  20. package/src/runs/foreground/execution.ts +38 -343
  21. package/src/runs/foreground/subagent-executor.ts +28 -99
  22. package/src/runs/shared/acceptance.ts +605 -22
  23. package/src/runs/shared/completion-guard.ts +3 -26
  24. package/src/runs/shared/model-fallback.ts +38 -0
  25. package/src/runs/shared/parallel-utils.ts +6 -10
  26. package/src/runs/shared/subagent-prompt-runtime.ts +3 -2
  27. package/src/runs/shared/workflow-graph.ts +2 -6
  28. package/src/shared/atomic-json.ts +68 -11
  29. package/src/shared/settings.ts +1 -0
  30. package/src/shared/types.ts +10 -48
  31. package/src/shared/utils.ts +2 -8
  32. package/src/tui/render.ts +14 -29
  33. package/src/runs/shared/acceptance-contract.ts +0 -318
  34. package/src/runs/shared/acceptance-evaluation.ts +0 -221
  35. package/src/runs/shared/acceptance-finalization.ts +0 -173
  36. package/src/runs/shared/acceptance-reports.ts +0 -127
package/CHANGELOG.md CHANGED
@@ -2,6 +2,20 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [0.29.0] - 2026-06-19
6
+
7
+ ### Added
8
+ - Added package-provided agent and chain discovery from installed Pi packages and package settings, including read-only management behavior, package source counts in doctor output, nested-cwd project package discovery, and package definitions that remain below user/project overrides. Thanks to Fabian Jocks (@iamfj) for #278.
9
+ - Added `PI_SUBAGENT_EXTRA_AGENT_DIRS` and `PI_INTERCOM_EXTENSION_DIR` overrides so bundled agents and `pi-intercom` can be loaded from read-only package locations. Thanks to David Barroso (@dbarrosop) for #288.
10
+
11
+ ### Fixed
12
+ - Show captured output from failed foreground subagents instead of returning only the failure summary. Thanks to Jürgen Schmied (@jschmied) for #277.
13
+ - Preserve nested fanout child subagent history when building child prompts. Thanks to James Wood (@jamesjwood) for the original #270 fix.
14
+ - Retry Windows atomic JSON renames on transient `EPERM`, `EBUSY`, and `EACCES` failures. Thanks to Wings Butterfly (@wings1848) for #269.
15
+ - Inherit the parent session model for subagents instead of falling back to global settings, including foreground, chain, async chain, async single, and resume/revive paths. Thanks to Rogerio Saulo (@rsaulo) for #266 and Nicolas Marchildon (@elecnix) for the original #283 fix.
16
+ - Avoid duplicate `subagent` tool registration in fanout-authorized child processes. Thanks to Aleksei Gurianov (@Guria) for #279.
17
+ - Hardened the parallel intercom integration test fixture after Windows CI exposed nondeterministic failure ordering.
18
+
5
19
  ## [0.28.0] - 2026-06-03
6
20
 
7
21
  ### Added
package/README.md CHANGED
@@ -145,7 +145,7 @@ Use `~/.pi/agent/settings.json` for a user override or `.pi/settings.json` for a
145
145
 
146
146
  ## Where running subagents show up
147
147
 
148
- Foreground runs stream progress in the conversation while they run. Use `timeoutMs` or its alias `maxRuntimeMs` when a foreground run must return within a wall-clock budget. When the timeout expires, running children are soft-interrupted, completed children stay in the result, and timed-out children return `timedOut: true` with a stable timeout message.
148
+ Foreground runs stream progress in the conversation while they run.
149
149
 
150
150
  Background runs keep working after control returns to you. Inspect active runs with `subagent({ action: "status" })`, or a specific run with `subagent({ action: "status", id: "..." })`.
151
151
 
@@ -358,10 +358,11 @@ Agent locations, lowest to highest priority:
358
358
  | Scope | Path |
359
359
  |-------|------|
360
360
  | Builtin | `~/.pi/agent/extensions/subagent/agents/` |
361
+ | Installed package | `package.json` `pi-subagents.agents` or `pi.subagents.agents` |
361
362
  | User | `~/.pi/agent/agents/**/*.md` |
362
363
  | Project | `.pi/agents/**/*.md` |
363
364
 
364
- Project discovery also reads legacy `.agents/**/*.md` files. Nested subdirectories are discovered recursively. `.chain.md` files do not define agents. If both `.agents/` and `.pi/agents/` define the same parsed runtime agent name, `.pi/agents/` wins. Use `agentScope: "user" | "project" | "both"` to control discovery; `both` is the default and project definitions win runtime-name collisions.
365
+ Project discovery also reads legacy `.agents/**/*.md` files. Nested subdirectories are discovered recursively. `.chain.md` files do not define agents. Installed Pi packages can expose agent directories from either `{"pi-subagents":{"agents":["./agents"]}}` or `{"pi":{"subagents":{"agents":["./agents"]}}}` in their package manifest. Package agents load above builtins and below user/project agents. If both `.agents/` and `.pi/agents/` define the same parsed runtime agent name, `.pi/agents/` wins. Use `agentScope: "user" | "project" | "both"` to control discovery; `both` is the default and project definitions win runtime-name collisions.
365
366
 
366
367
  Builtin agents load at the lowest priority, so a user or project agent with the same name overrides them. They do not pin a provider model; they inherit your current Pi default model unless you set `subagents.agentOverrides.<name>.model`. `oracle` is an advisory reviewer that critiques direction and proposes an execution prompt without editing files. `worker` is the implementation agent for normal tasks and approved oracle handoffs.
367
368
 
@@ -436,8 +437,6 @@ defaultProgress: true
436
437
  completionGuard: false
437
438
  interactive: true
438
439
  maxSubagentDepth: 1
439
- maxExecutionTimeMs: 600000
440
- maxTokens: 50000
441
440
  ---
442
441
 
443
442
  Your system prompt goes here.
@@ -464,8 +463,6 @@ Important fields:
464
463
  | `completionGuard` | Set `false` only for non-implementation agents that may mention implementation words while using mutation-capable tools such as `bash`. |
465
464
  | `interactive` | Parsed for compatibility but not enforced in v1. |
466
465
  | `maxSubagentDepth` | Tightens nested delegation for this agent’s children. |
467
- | `maxExecutionTimeMs` | Stops each foreground or async child run for this agent after the given number of milliseconds. |
468
- | `maxTokens` | Stops each foreground or async child run for this agent when observed input plus output tokens reach the limit. Token enforcement is best-effort because usage is reported after model events arrive. |
469
466
 
470
467
  ### Tool and extension selection
471
468
 
@@ -500,10 +497,11 @@ Chains are reusable workflows stored separately from agent files. Use `.chain.md
500
497
 
501
498
  | Scope | Path |
502
499
  |-------|------|
500
+ | Installed package | `package.json` `pi-subagents.chains` or `pi.subagents.chains` |
503
501
  | User | `~/.pi/agent/chains/**/*.chain.md`, `~/.pi/agent/chains/**/*.chain.json` |
504
502
  | Project | `.pi/chains/**/*.chain.md`, `.pi/chains/**/*.chain.json` |
505
503
 
506
- Nested subdirectories are discovered recursively. If both `.chain.md` and `.chain.json` define the same parsed runtime chain name in the same scope, `.chain.json` wins. If user and project scopes define the same parsed runtime chain name, the project chain wins. Chains support the same optional `package` frontmatter as agents; `name: review-flow` plus `package: code-analysis` runs as `code-analysis.review-flow`.
504
+ Nested subdirectories are discovered recursively. Installed Pi packages can expose chain directories from either `{"pi-subagents":{"chains":["./chains"]}}` or `{"pi":{"subagents":{"chains":["./chains"]}}}` in their package manifest. Package chains load below user/project chains. If both `.chain.md` and `.chain.json` define the same parsed runtime chain name in the same scope, `.chain.json` wins. If user and project scopes define the same parsed runtime chain name, the project chain wins. Chains support the same optional `package` frontmatter as agents; `name: review-flow` plus `package: code-analysis` runs as `code-analysis.review-flow`.
507
505
 
508
506
  Example:
509
507
 
@@ -797,9 +795,8 @@ Agent definitions are not loaded into context by default. Management actions let
797
795
  | `model` | string | agent default | Override model. |
798
796
  | `tasks` | array | - | Top-level parallel tasks. Supports `agent`, `task`, `cwd`, `count`, `output`, `outputMode`, `reads`, `progress`, `skill`, `model`, and `acceptance`. |
799
797
  | `concurrency` | number | config or `4` | Top-level parallel concurrency. |
800
- | `timeoutMs` / `maxRuntimeMs` | number | - | Foreground wall-clock timeout for single, parallel, and chain runs. Timed-out children return `timedOut: true`; async/background runs reject it. |
801
798
  | `worktree` | boolean | false | Create isolated git worktrees for parallel tasks. |
802
- | `chain` | array | - | Sequential, static parallel, and dynamic fanout chain steps. Sequential steps and parallel child tasks support `phase`, `label`, `as`, `outputSchema`, and `acceptance` in addition to the usual execution fields. Dynamic fanout uses `expand`, one child `parallel` template, and `collect`; group-level acceptance is not supported because there is no child session to finalize. |
799
+ | `chain` | array | - | Sequential, static parallel, and dynamic fanout chain steps. Steps and chain parallel tasks support `phase`, `label`, `as`, `outputSchema`, and `acceptance` in addition to the usual execution fields. Dynamic fanout uses `expand`, one child `parallel` template, and `collect`. |
803
800
  | `context` | `fresh \| fork` | agent default or `fresh` | `fork` creates real branched sessions from the parent leaf. Packaged `planner`, `worker`, and `oracle` default to `fork`. |
804
801
  | `chainDir` | string | temp chain dir | Persistent directory for chain artifacts. |
805
802
  | `clarify` | boolean | true for chains | Show TUI preview/edit flow. |
@@ -811,7 +808,7 @@ Agent definitions are not loaded into context by default. Management actions let
811
808
  | `includeProgress` | boolean | false | Include full progress in result. |
812
809
  | `share` | boolean | false | Upload session export to GitHub Gist. |
813
810
  | `sessionDir` | string | derived | Override session log directory. |
814
- | `acceptance` | object | omitted | Explicit acceptance contract. When present, the child gets a structured contract, then the runtime continues the same session for a bounded self-review/repair loop before evaluating acceptance. |
811
+ | `acceptance` | string/object/false | inferred | Override the run's inferred acceptance gates. Use `"auto"`, `"attested"`, `"checked"`, `"verified"`, `"reviewed"`, or `{ level: "none", reason: "..." }`. |
815
812
 
816
813
  `context: "fork"` fails fast when the parent session is not persisted, the current leaf is missing, or the branched child session cannot be created. It never silently downgrades to `fresh`. In multi-agent runs, if any requested agent has `defaultContext: fork` and the launch omits `context`, the whole invocation uses forked context; pass `context: "fresh"` when you intentionally want a fresh run.
817
814
 
@@ -916,19 +913,6 @@ Session directory precedence is: `params.sessionDir`, then `config.defaultSessio
916
913
 
917
914
  Controls nested delegation when no inherited `PI_SUBAGENT_MAX_DEPTH` is already in effect. Per-agent `maxSubagentDepth` can tighten the limit for that agent’s child runs, but cannot relax an inherited stricter limit. This applies even to children that explicitly declare `tools: subagent`; at the cap, execution fanout is blocked instead of silently hiding nested work.
918
915
 
919
- ### Agent resource limits
920
-
921
- Set `maxExecutionTimeMs` and `maxTokens` in agent frontmatter or through `subagent({ action: "create" | "update", config })` to bound a specific agent across foreground and async runs.
922
-
923
- ```yaml
924
- maxExecutionTimeMs: 600000
925
- maxTokens: 50000
926
- ```
927
-
928
- When a limit is reached, the child receives a soft interrupt, the run fails with a clear `Resource limit exceeded...` error, and the result includes `resourceLimitExceeded` with the limit kind, configured limit, and observed token count when available. Resource-limit failures do not trigger fallback model retries. `maxTokens` is best-effort because providers report usage after message events; a child may exceed the exact limit before the runtime can stop it.
929
-
930
- Spawn-count and per-agent child-concurrency quotas are not part of this release; use `maxSubagentDepth` and parallel `concurrency` for those boundaries today.
931
-
932
916
  ### `intercomBridge`
933
917
 
934
918
  ```json
@@ -987,7 +971,7 @@ Debug artifacts live under `{sessionDir}/subagent-artifacts/` or a user-scoped t
987
971
  - `{runId}_{agent}.jsonl`
988
972
  - `{runId}_{agent}_meta.json`
989
973
 
990
- Metadata records timing, usage, exit code, final model, attempted models, fallback attempt outcomes, and any resource-limit termination reason.
974
+ Metadata records timing, usage, exit code, final model, attempted models, and fallback attempt outcomes.
991
975
 
992
976
  Session files are stored under a per-run session directory. With `context: "fork"`, each child starts with `--session <branched-session-file>` produced from the parent’s current leaf. That is a real session fork, not an injected summary.
993
977
 
@@ -1007,60 +991,33 @@ Async runs write:
1007
991
 
1008
992
  ## Acceptance Gates
1009
993
 
1010
- `acceptance` is an explicit contract. Omit it for lightweight runs. Set it on single runs, top-level parallel task items, sequential chain steps, static parallel task items, and dynamic fanout child templates when the child must prove the work meets concrete criteria. Do not set it on static parallel groups or dynamic fanout aggregate groups; those groups do not own a same-session child turn.
1011
-
1012
- If you are coming from Codex Goals, `acceptance` is the subagent equivalent for one delegated run. When a user says `/goal`, “goal”, “active goal”, “continue until evidence says done”, or “verify against a goal”, translate that into an acceptance contract: `criteria` are the target, `evidence` and `verify` are proof, `stopRules` are constraints, and `maxFinalizationTurns` is the bounded loop budget.
994
+ Every run resolves an effective acceptance policy. Callers may omit `acceptance` for the inferred default, or set it on single runs, top-level parallel task items, chain steps, static parallel tasks, and dynamic fanout templates.
1013
995
 
1014
996
  ```ts
1015
997
  {
1016
998
  agent: "worker",
1017
999
  task: "Implement the fix",
1018
1000
  acceptance: {
1001
+ level: "verified",
1019
1002
  criteria: ["Patch the bug without widening scope"],
1020
1003
  evidence: ["changed-files", "tests-added", "commands-run", "residual-risks", "no-staged-files"],
1021
- verify: [{ id: "focused", command: "npm test", timeoutMs: 120000 }],
1022
- maxFinalizationTurns: 3
1004
+ verify: [{ id: "focused", command: "npm test", timeoutMs: 120000 }]
1023
1005
  }
1024
1006
  }
1025
1007
  ```
1026
1008
 
1027
- When `acceptance` is present, the initial child prompt includes a standardized acceptance section and asks for a fenced `acceptance-report` JSON block. After the child’s initial completion, the runtime continues the same persisted child session with an acceptance finalization prompt. The child can repair omissions in that same session, then must return the final `acceptance-report`. Missing or malformed finalization reports reject the run when the loop limit is reached.
1009
+ Accepted levels are `auto`, `none`, `attested`, `checked`, `verified`, and `reviewed`. `acceptance: "auto"` is the default. Read-only reviewer/scout tasks infer lightweight attestation, normal writer tasks infer checked evidence, and async/risky/dynamic writer contexts infer a reviewed gate. To disable gates, prefer `{ level: "none", reason: "..." }`.
1028
1010
 
1029
- Public acceptance config is evidence-driven. There is no public `level` field and no `acceptance: "checked"` shorthand. Runtime provenance is derived from what actually happened:
1011
+ Acceptance provenance is stored separately from child prose:
1030
1012
 
1031
- - `attested`: the child returned a structured acceptance report.
1032
- - `checked`: runtime structural checks passed, such as required criteria, required evidence, and no staged files.
1013
+ - `claimed`: child finished but did not provide structured evidence.
1014
+ - `attested`: child returned a structured acceptance report.
1015
+ - `checked`: runtime structural checks passed, such as required evidence and no staged files.
1033
1016
  - `verified`: configured runtime verification commands passed. Child-reported command success does not count.
1034
1017
  - `reviewed`: an independent reviewer result is present.
1035
- - `rejected`: attestation, structural checks, verification, review, or finalization failed.
1018
+ - `rejected`: attestation, structural checks, verification, or review failed.
1036
1019
 
1037
- Self-review finalization never counts as `reviewed`, and it never counts as `verified` unless configured runtime verification commands actually pass. The visible child output remains the initial answer; finalization reports and residual risks are stored in the acceptance ledger and async/status details.
1038
-
1039
- When delegating implementation from a plan or spec, keep the task focused on what to implement and put the definition of done in `acceptance` so the runtime can finalize and evaluate it:
1040
-
1041
- ```ts
1042
- subagent({
1043
- agent: "worker",
1044
- async: true,
1045
- task: "Implement the plan at /Users/me/docs/mcp-alignment-plan.md. Use scout artifacts in ./handoff/ as context. Do not commit the scout artifacts.",
1046
- acceptance: {
1047
- criteria: [
1048
- "Implementation follows /Users/me/docs/mcp-alignment-plan.md",
1049
- "Plan acceptance checks are addressed",
1050
- "Scout handoff artifacts are not committed",
1051
- "Focused validation for changed behavior passes",
1052
- "Residual risks or skipped checks are reported"
1053
- ],
1054
- evidence: ["changed-files", "commands-run", "validation-output", "residual-risks"],
1055
- verify: [{ id: "focused", command: "npm test -- --runInBand" }],
1056
- stopRules: [
1057
- "Do not edit unrelated files",
1058
- "Stop and report if the plan requires an unapproved product decision"
1059
- ],
1060
- maxFinalizationTurns: 3
1061
- }
1062
- })
1063
- ```
1020
+ For `attested` or stricter levels, the child prompt includes a standardized acceptance section and asks for a fenced `acceptance-report` JSON block. Explicit failed gates fail the run. Inferred gates are persisted for observability without breaking older calls that omit `acceptance`.
1064
1021
 
1065
1022
  ## Live progress
1066
1023
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-subagents",
3
- "version": "0.28.0",
3
+ "version": "0.29.0",
4
4
  "description": "Pi extension for delegating tasks to subagents with chains, parallel execution, and TUI clarification",
5
5
  "author": "Nico Bailon",
6
6
  "license": "MIT",
@@ -134,7 +134,7 @@ subagent({
134
134
  { agent: "reviewer", phase: "Planning", label: "Scheduler contract", as: "schedulerPlan", task: "Plan fixes for scheduler contract. Inspect the current diff. Do not modify project/source files; returning findings via the configured output artifact is allowed.", output: "plans/scheduler.md", outputMode: "file-only" },
135
135
  { agent: "reviewer", phase: "Planning", label: "Sandbox/security", as: "sandboxPlan", task: "Plan fixes for sandbox/security. Inspect the current diff. Do not modify project/source files; returning findings via the configured output artifact is allowed.", output: "plans/sandbox.md", outputMode: "file-only" }
136
136
  ], concurrency: 3 },
137
- { agent: "worker", phase: "Implementation", label: "Apply accepted fixes", as: "workerResult", task: "Apply only the accepted fixes from these planning summaries. You are the sole writer for the active worktree.\n\nDeploy plan:\n{outputs.deployPlan}\n\nScheduler plan:\n{outputs.schedulerPlan}\n\nSandbox plan:\n{outputs.sandboxPlan}", acceptance: { criteria: ["Accepted fixes from each planning summary are applied", "Focused validation for changed behavior passes", "Changed files, validation commands, failures, and residual risks are reported"], evidence: ["changed-files", "commands-run", "validation-output", "residual-risks"], stopRules: ["Do not expand product scope beyond accepted fixes", "Stop and report if a fix requires an unapproved decision"], maxFinalizationTurns: 3 }, output: "worker/fixes.md", outputMode: "file-only", progress: true },
137
+ { agent: "worker", phase: "Implementation", label: "Apply accepted fixes", as: "workerResult", task: "Apply only the accepted fixes from these planning summaries. You are the sole writer for the active worktree. Run focused validation and report changed files, commands, failures, and remaining issues.\n\nDeploy plan:\n{outputs.deployPlan}\n\nScheduler plan:\n{outputs.schedulerPlan}\n\nSandbox plan:\n{outputs.sandboxPlan}", output: "worker/fixes.md", outputMode: "file-only", progress: true },
138
138
  { parallel: [
139
139
  { agent: "reviewer", phase: "Validation", label: "Deploy/scheduler validation", task: "Validate the post-worker diff for deploy and scheduler fixes. Start from the worker result: {outputs.workerResult}. Do not modify project/source files; returning findings via the configured output artifact is allowed.", output: "validation/deploy-scheduler.md", outputMode: "file-only" },
140
140
  { agent: "reviewer", phase: "Validation", label: "Sandbox validation", task: "Validate the post-worker diff for sandbox/security fixes. Start from the worker result: {outputs.workerResult}. Do not modify project/source files; returning findings via the configured output artifact is allowed.", output: "validation/sandbox.md", outputMode: "file-only" }
@@ -692,37 +692,7 @@ clarify → validation contract → planner → async worker → parallel async
692
692
 
693
693
  The validation contract defines acceptance before code is written: expected behavior, acceptance checks, commands or user flows to exercise, and evidence the worker should return. Keep it lightweight for small tasks, but make it explicit enough that reviewers and validators are checking the intended outcome rather than the worker’s own assumptions.
694
694
 
695
- Use the structured `acceptance` field when the run should carry an explicit acceptance contract. If omitted, the run stays lightweight. When present, acceptance is object-only: define concrete `criteria`, required `evidence`, optional runtime `verify` commands, optional independent `review`, and optionally `maxFinalizationTurns`. The runtime continues the same child session for a bounded self-review/repair loop before evaluating the final report, so set `acceptance` on single runs, sequential chain steps, parallel task items, and dynamic fanout child templates, not on static parallel or dynamic fanout groups. Do not call a run reviewed just because the worker says it is done; reviewed means a reviewer gate returned a result. Child-reported command success is evidence, not runtime verification.
696
-
697
- Goal-style requests map to `acceptance`. If the user says `/goal`, “goal”, “active goal”, “continue until evidence says done”, or “verify against a goal” for a subagent run, create an explicit run-scoped acceptance contract: `criteria` for the target, `evidence` and `verify` for proof, `stopRules` for constraints, and `maxFinalizationTurns` for the bounded loop budget.
698
-
699
- When launching a writer/worker from a plan, PRD, spec, issue, or broad fix, set structured `acceptance` proactively. Put implementation instructions, plan paths, and handoff artifacts in `task`; put the definition of done in `acceptance.criteria`, proof requirements in `acceptance.evidence` and `acceptance.verify`, constraints in `acceptance.stopRules`, and usually set `maxFinalizationTurns: 3`. Do not bury all validation requirements only in the task prompt.
700
-
701
- Example writer handoff:
702
-
703
- ```typescript
704
- subagent({
705
- agent: "worker",
706
- async: true,
707
- task: "Implement the plan at /Users/me/docs/mcp-alignment-plan.md. Use scout artifacts in ./handoff/ as context. Do not commit the scout artifacts.",
708
- acceptance: {
709
- criteria: [
710
- "Implementation follows /Users/me/docs/mcp-alignment-plan.md",
711
- "Plan acceptance checks are addressed",
712
- "Scout handoff artifacts are not committed",
713
- "Focused validation for changed behavior passes",
714
- "Residual risks or skipped checks are reported"
715
- ],
716
- evidence: ["changed-files", "commands-run", "validation-output", "residual-risks"],
717
- verify: [{ id: "focused", command: "npm test -- --runInBand" }],
718
- stopRules: [
719
- "Do not edit unrelated files",
720
- "Stop and report if the plan requires an unapproved product decision"
721
- ],
722
- maxFinalizationTurns: 3
723
- }
724
- })
725
- ```
695
+ Use the structured `acceptance` field when the run should carry an explicit acceptance contract. If omitted, subagents infer an effective acceptance policy from role, mode, and risk. Use `level: "checked"` for ordinary writer evidence gates, `level: "verified"` when the runtime should run explicit validation commands, and `level: "reviewed"` only when an independent reviewer result is expected. Do not call a run reviewed just because the worker says it is done; reviewed means a reviewer gate returned a result. Child-reported command success is evidence, not runtime verification.
726
696
 
727
697
  The first `worker` implements the approved plan. The parent continues with independent inspection or validation prep while it runs, not parallel edits to the same worktree. When the async worker completes, treat its handoff as the transition into review, not as final completion, unless the user explicitly asked for worker-only work, review-only output, or to stop after implementation. Parallel reviewers inspect the resulting diff from fresh context. Validators check behavior with the best available evidence: commands, tests, browser/CLI interaction, screenshots, logs, or manual reproduction notes. The final `worker` applies synthesized review fixes in forked context, then the parent looks over the final diff before completing. The parent may launch these steps as an initial async chain when the workflow is already clear, or as follow-up subagent runs after each async completion. Initial chains should pass `async: true` so the main chat is unblocked; avoid `clarify: true` unless the user asked for foreground clarification. Do not stop after parallel review unless the user explicitly asked for review-only output or the review surfaced a decision that needs approval first.
728
698
 
@@ -751,9 +721,8 @@ subagent({
751
721
  agent: "worker",
752
722
  task: "Implement the approved feature.\n\nClarified requirements:\n- ...\n\nPlan: see ~/Documents/docs/...-plan.md\n\nValidation contract:\n- ...\n\nReturn a handoff with changed files, what was implemented, what was left undone, commands run with exit codes, validation evidence, surprises/new risks, and decisions needing parent approval.",
753
723
  acceptance: {
754
- criteria: ["Implement the approved feature without widening scope"],
755
- evidence: ["changed-files", "tests-added", "commands-run", "residual-risks", "no-staged-files"],
756
- maxFinalizationTurns: 3
724
+ level: "checked",
725
+ evidence: ["changed-files", "tests-added", "commands-run", "residual-risks", "no-staged-files"]
757
726
  },
758
727
  async: true
759
728
  })
@@ -78,8 +78,8 @@ function parsePackageConfig(value: unknown): { packageName?: string; error?: str
78
78
  return parsePackageName(value, "config.package");
79
79
  }
80
80
 
81
- function allAgents(d: { builtin: AgentConfig[]; user: AgentConfig[]; project: AgentConfig[] }): AgentConfig[] {
82
- return [...d.builtin, ...d.user, ...d.project];
81
+ function allAgents(d: { builtin: AgentConfig[]; package: AgentConfig[]; user: AgentConfig[]; project: AgentConfig[] }): AgentConfig[] {
82
+ return [...d.builtin, ...d.package, ...d.user, ...d.project];
83
83
  }
84
84
 
85
85
  function availableNames(cwd: string, kind: "agent" | "chain"): string[] {
@@ -116,6 +116,10 @@ function nameExistsInScope(cwd: string, scope: ManagementScope, name: string, ex
116
116
  return false;
117
117
  }
118
118
 
119
+ function isMutableSource(source: AgentSource): source is ManagementScope {
120
+ return source === "user" || source === "project";
121
+ }
122
+
119
123
  function unknownChainAgents(cwd: string, steps: ChainStepConfig[]): string[] {
120
124
  const d = discoverAgentsAll(cwd);
121
125
  const known = new Set(allAgents(d).map((a) => a.name));
@@ -313,18 +317,6 @@ function applyAgentConfig(target: AgentConfig, cfg: Record<string, unknown>): st
313
317
  target.maxSubagentDepth = cfg.maxSubagentDepth;
314
318
  } else return "config.maxSubagentDepth must be an integer >= 0 or false when provided.";
315
319
  }
316
- if (hasKey(cfg, "maxExecutionTimeMs")) {
317
- if (cfg.maxExecutionTimeMs === false || cfg.maxExecutionTimeMs === "") target.maxExecutionTimeMs = undefined;
318
- else if (typeof cfg.maxExecutionTimeMs === "number" && Number.isInteger(cfg.maxExecutionTimeMs) && cfg.maxExecutionTimeMs >= 1) {
319
- target.maxExecutionTimeMs = cfg.maxExecutionTimeMs;
320
- } else return "config.maxExecutionTimeMs must be an integer >= 1 or false when provided.";
321
- }
322
- if (hasKey(cfg, "maxTokens")) {
323
- if (cfg.maxTokens === false || cfg.maxTokens === "") target.maxTokens = undefined;
324
- else if (typeof cfg.maxTokens === "number" && Number.isInteger(cfg.maxTokens) && cfg.maxTokens >= 1) {
325
- target.maxTokens = cfg.maxTokens;
326
- } else return "config.maxTokens must be an integer >= 1 or false when provided.";
327
- }
328
320
  if (hasKey(cfg, "completionGuard")) {
329
321
  if (typeof cfg.completionGuard !== "boolean") return "config.completionGuard must be a boolean when provided.";
330
322
  target.completionGuard = cfg.completionGuard;
@@ -339,10 +331,10 @@ function resolveTarget<T extends { source: AgentSource; filePath: string }>(
339
331
  cwd: string,
340
332
  scopeHint?: string,
341
333
  ): T | AgentToolResult<Details> {
342
- const mutable = matches.filter((m) => m.source !== "builtin");
334
+ const mutable = matches.filter((m): m is T & { source: ManagementScope } => isMutableSource(m.source));
343
335
  if (mutable.length === 0) {
344
336
  if (matches.length > 0) {
345
- return result(`${kind === "agent" ? "Agent" : "Chain"} '${name}' is builtin and cannot be modified. Create a same-named ${kind} in user or project scope to override it.`, true);
337
+ return result(`${kind === "agent" ? "Agent" : "Chain"} '${name}' is read-only and cannot be modified. Create a same-named ${kind} in user or project scope to override it.`, true);
346
338
  }
347
339
  const available = availableNames(cwd, kind);
348
340
  return result(`${kind === "agent" ? "Agent" : "Chain"} '${name}' not found. Available: ${available.join(", ") || "none"}.`, true);
@@ -398,8 +390,6 @@ function formatAgentDetail(agent: AgentConfig): string {
398
390
  if (agent.defaultReads?.length) lines.push(`Reads: ${agent.defaultReads.join(", ")}`);
399
391
  if (agent.defaultProgress) lines.push("Progress: true");
400
392
  if (agent.maxSubagentDepth !== undefined) lines.push(`Max subagent depth: ${agent.maxSubagentDepth}`);
401
- if (agent.maxExecutionTimeMs !== undefined) lines.push(`Max execution time: ${agent.maxExecutionTimeMs}ms`);
402
- if (agent.maxTokens !== undefined) lines.push(`Max tokens: ${agent.maxTokens}`);
403
393
  if (agent.completionGuard === false) lines.push("Completion guard: false");
404
394
  if (agent.systemPrompt.trim()) lines.push("", "System Prompt:", agent.systemPrompt);
405
395
  return lines.join("\n");
@@ -456,9 +446,9 @@ function formatChainDetail(chain: ChainConfig): string {
456
446
  export function handleList(params: ManagementParams, ctx: ManagementContext): AgentToolResult<Details> {
457
447
  const scope = normalizeListScope(params.agentScope) ?? "both";
458
448
  const d = discoverAgentsAll(ctx.cwd);
459
- const scopedAgents = allAgents(d).filter((a) => scope === "both" || a.source === "builtin" || a.source === scope).sort((a, b) => a.name.localeCompare(b.name));
449
+ const scopedAgents = allAgents(d).filter((a) => scope === "both" || a.source === "builtin" || a.source === "package" || a.source === scope).sort((a, b) => a.name.localeCompare(b.name));
460
450
  const agents = scopedAgents.filter((a) => !a.disabled);
461
- const chains = d.chains.filter((c) => scope === "both" || c.source === scope).sort((a, b) => a.name.localeCompare(b.name));
451
+ const chains = d.chains.filter((c) => scope === "both" || c.source === "package" || c.source === scope).sort((a, b) => a.name.localeCompare(b.name));
462
452
  const diagnostics = d.chainDiagnostics.filter((entry) => scope === "both" || entry.source === scope);
463
453
  const lines = [
464
454
  "Executable agents:",
@@ -5,10 +5,12 @@ export function mergeAgentsForScope(
5
5
  userAgents: AgentConfig[],
6
6
  projectAgents: AgentConfig[],
7
7
  builtinAgents: AgentConfig[] = [],
8
+ packageAgents: AgentConfig[] = [],
8
9
  ): AgentConfig[] {
9
10
  const agentMap = new Map<string, AgentConfig>();
10
11
 
11
12
  for (const agent of builtinAgents) agentMap.set(agent.name, agent);
13
+ for (const agent of packageAgents) agentMap.set(agent.name, agent);
12
14
 
13
15
  if (scope === "both") {
14
16
  for (const agent of userAgents) agentMap.set(agent.name, agent);
@@ -21,8 +21,6 @@ export const KNOWN_FIELDS = new Set([
21
21
  "defaultProgress",
22
22
  "interactive",
23
23
  "maxSubagentDepth",
24
- "maxExecutionTimeMs",
25
- "maxTokens",
26
24
  "completionGuard",
27
25
  ]);
28
26
 
@@ -73,14 +71,6 @@ export function serializeAgent(config: AgentConfig): string {
73
71
  if (typeof maxSubagentDepth === "number" && Number.isInteger(maxSubagentDepth) && maxSubagentDepth >= 0) {
74
72
  lines.push(`maxSubagentDepth: ${maxSubagentDepth}`);
75
73
  }
76
- const maxExecutionTimeMs = config.maxExecutionTimeMs;
77
- if (typeof maxExecutionTimeMs === "number" && Number.isInteger(maxExecutionTimeMs) && maxExecutionTimeMs >= 1) {
78
- lines.push(`maxExecutionTimeMs: ${maxExecutionTimeMs}`);
79
- }
80
- const maxTokens = config.maxTokens;
81
- if (typeof maxTokens === "number" && Number.isInteger(maxTokens) && maxTokens >= 1) {
82
- lines.push(`maxTokens: ${maxTokens}`);
83
- }
84
74
  if (config.completionGuard === false) lines.push("completionGuard: false");
85
75
 
86
76
  if (config.extraFields) {