npm - pi-subagents - Versions diffs - 0.28.0 → 0.29.0 - Mend

pi-subagents 0.28.0 → 0.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

package/CHANGELOG.md +14 -0
package/README.md +18 -61
package/package.json +1 -1
package/skills/pi-subagents/SKILL.md +4 -35
package/src/agents/agent-management.ts +10 -20
package/src/agents/agent-selection.ts +2 -0
package/src/agents/agent-serializer.ts +0 -10
package/src/agents/agents.ts +304 -47
package/src/agents/chain-serializer.ts +4 -9
package/src/extension/doctor.ts +4 -3
package/src/extension/fanout-child.ts +0 -2
package/src/extension/index.ts +3 -8
package/src/extension/schemas.ts +32 -22
package/src/intercom/intercom-bridge.ts +11 -1
package/src/intercom/result-intercom.ts +0 -5
package/src/runs/background/async-execution.ts +20 -11
package/src/runs/background/run-status.ts +1 -7
package/src/runs/background/subagent-runner.ts +81 -211
package/src/runs/foreground/chain-execution.ts +62 -58
package/src/runs/foreground/execution.ts +38 -343
package/src/runs/foreground/subagent-executor.ts +28 -99
package/src/runs/shared/acceptance.ts +605 -22
package/src/runs/shared/completion-guard.ts +3 -26
package/src/runs/shared/model-fallback.ts +38 -0
package/src/runs/shared/parallel-utils.ts +6 -10
package/src/runs/shared/subagent-prompt-runtime.ts +3 -2
package/src/runs/shared/workflow-graph.ts +2 -6
package/src/shared/atomic-json.ts +68 -11
package/src/shared/settings.ts +1 -0
package/src/shared/types.ts +10 -48
package/src/shared/utils.ts +2 -8
package/src/tui/render.ts +14 -29
package/src/runs/shared/acceptance-contract.ts +0 -318
package/src/runs/shared/acceptance-evaluation.ts +0 -221
package/src/runs/shared/acceptance-finalization.ts +0 -173
package/src/runs/shared/acceptance-reports.ts +0 -127

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,20 @@
 ## [Unreleased]
+## [0.29.0] - 2026-06-19
+### Added
+- Added package-provided agent and chain discovery from installed Pi packages and package settings, including read-only management behavior, package source counts in doctor output, nested-cwd project package discovery, and package definitions that remain below user/project overrides. Thanks to Fabian Jocks (@iamfj) for #278.
+- Added `PI_SUBAGENT_EXTRA_AGENT_DIRS` and `PI_INTERCOM_EXTENSION_DIR` overrides so bundled agents and `pi-intercom` can be loaded from read-only package locations. Thanks to David Barroso (@dbarrosop) for #288.
+### Fixed
+- Show captured output from failed foreground subagents instead of returning only the failure summary. Thanks to Jürgen Schmied (@jschmied) for #277.
+- Preserve nested fanout child subagent history when building child prompts. Thanks to James Wood (@jamesjwood) for the original #270 fix.
+- Retry Windows atomic JSON renames on transient `EPERM`, `EBUSY`, and `EACCES` failures. Thanks to Wings Butterfly (@wings1848) for #269.
+- Inherit the parent session model for subagents instead of falling back to global settings, including foreground, chain, async chain, async single, and resume/revive paths. Thanks to Rogerio Saulo (@rsaulo) for #266 and Nicolas Marchildon (@elecnix) for the original #283 fix.
+- Avoid duplicate `subagent` tool registration in fanout-authorized child processes. Thanks to Aleksei Gurianov (@Guria) for #279.
+- Hardened the parallel intercom integration test fixture after Windows CI exposed nondeterministic failure ordering.
 ## [0.28.0] - 2026-06-03
 ### Added

package/README.md CHANGED Viewed

@@ -145,7 +145,7 @@ Use `~/.pi/agent/settings.json` for a user override or `.pi/settings.json` for a
 ## Where running subagents show up
-Foreground runs stream progress in the conversation while they run. Use `timeoutMs` or its alias `maxRuntimeMs` when a foreground run must return within a wall-clock budget. When the timeout expires, running children are soft-interrupted, completed children stay in the result, and timed-out children return `timedOut: true` with a stable timeout message.
+Foreground runs stream progress in the conversation while they run.
 Background runs keep working after control returns to you. Inspect active runs with `subagent({ action: "status" })`, or a specific run with `subagent({ action: "status", id: "..." })`.
@@ -358,10 +358,11 @@ Agent locations, lowest to highest priority:
 | Scope | Path |
 |-------|------|
 | Builtin | `~/.pi/agent/extensions/subagent/agents/` |
+| Installed package | `package.json` `pi-subagents.agents` or `pi.subagents.agents` |
 | User | `~/.pi/agent/agents/**/*.md` |
 | Project | `.pi/agents/**/*.md` |
-Project discovery also reads legacy `.agents/**/*.md` files. Nested subdirectories are discovered recursively. `.chain.md` files do not define agents. If both `.agents/` and `.pi/agents/` define the same parsed runtime agent name, `.pi/agents/` wins. Use `agentScope: "user" | "project" | "both"` to control discovery; `both` is the default and project definitions win runtime-name collisions.
+Project discovery also reads legacy `.agents/**/*.md` files. Nested subdirectories are discovered recursively. `.chain.md` files do not define agents. Installed Pi packages can expose agent directories from either `{"pi-subagents":{"agents":["./agents"]}}` or `{"pi":{"subagents":{"agents":["./agents"]}}}` in their package manifest. Package agents load above builtins and below user/project agents. If both `.agents/` and `.pi/agents/` define the same parsed runtime agent name, `.pi/agents/` wins. Use `agentScope: "user" | "project" | "both"` to control discovery; `both` is the default and project definitions win runtime-name collisions.
 Builtin agents load at the lowest priority, so a user or project agent with the same name overrides them. They do not pin a provider model; they inherit your current Pi default model unless you set `subagents.agentOverrides.<name>.model`. `oracle` is an advisory reviewer that critiques direction and proposes an execution prompt without editing files. `worker` is the implementation agent for normal tasks and approved oracle handoffs.
@@ -436,8 +437,6 @@ defaultProgress: true
 completionGuard: false
 interactive: true
 maxSubagentDepth: 1
-maxExecutionTimeMs: 600000
-maxTokens: 50000
 ---
 Your system prompt goes here.
@@ -464,8 +463,6 @@ Important fields:
 | `completionGuard` | Set `false` only for non-implementation agents that may mention implementation words while using mutation-capable tools such as `bash`. |
 | `interactive` | Parsed for compatibility but not enforced in v1. |
 | `maxSubagentDepth` | Tightens nested delegation for this agent’s children. |
-| `maxExecutionTimeMs` | Stops each foreground or async child run for this agent after the given number of milliseconds. |
-| `maxTokens` | Stops each foreground or async child run for this agent when observed input plus output tokens reach the limit. Token enforcement is best-effort because usage is reported after model events arrive. |
 ### Tool and extension selection
@@ -500,10 +497,11 @@ Chains are reusable workflows stored separately from agent files. Use `.chain.md
 | Scope | Path |
 |-------|------|
+| Installed package | `package.json` `pi-subagents.chains` or `pi.subagents.chains` |
 | User | `~/.pi/agent/chains/**/*.chain.md`, `~/.pi/agent/chains/**/*.chain.json` |
 | Project | `.pi/chains/**/*.chain.md`, `.pi/chains/**/*.chain.json` |
-Nested subdirectories are discovered recursively. If both `.chain.md` and `.chain.json` define the same parsed runtime chain name in the same scope, `.chain.json` wins. If user and project scopes define the same parsed runtime chain name, the project chain wins. Chains support the same optional `package` frontmatter as agents; `name: review-flow` plus `package: code-analysis` runs as `code-analysis.review-flow`.
+Nested subdirectories are discovered recursively. Installed Pi packages can expose chain directories from either `{"pi-subagents":{"chains":["./chains"]}}` or `{"pi":{"subagents":{"chains":["./chains"]}}}` in their package manifest. Package chains load below user/project chains. If both `.chain.md` and `.chain.json` define the same parsed runtime chain name in the same scope, `.chain.json` wins. If user and project scopes define the same parsed runtime chain name, the project chain wins. Chains support the same optional `package` frontmatter as agents; `name: review-flow` plus `package: code-analysis` runs as `code-analysis.review-flow`.
 Example:
@@ -797,9 +795,8 @@ Agent definitions are not loaded into context by default. Management actions let
 | `model` | string | agent default | Override model. |
 | `tasks` | array | - | Top-level parallel tasks. Supports `agent`, `task`, `cwd`, `count`, `output`, `outputMode`, `reads`, `progress`, `skill`, `model`, and `acceptance`. |
 | `concurrency` | number | config or `4` | Top-level parallel concurrency. |
-| `timeoutMs` / `maxRuntimeMs` | number | - | Foreground wall-clock timeout for single, parallel, and chain runs. Timed-out children return `timedOut: true`; async/background runs reject it. |
 | `worktree` | boolean | false | Create isolated git worktrees for parallel tasks. |
-| `chain` | array | - | Sequential, static parallel, and dynamic fanout chain steps. Sequential steps and parallel child tasks support `phase`, `label`, `as`, `outputSchema`, and `acceptance` in addition to the usual execution fields. Dynamic fanout uses `expand`, one child `parallel` template, and `collect`; group-level acceptance is not supported because there is no child session to finalize. |
+| `chain` | array | - | Sequential, static parallel, and dynamic fanout chain steps. Steps and chain parallel tasks support `phase`, `label`, `as`, `outputSchema`, and `acceptance` in addition to the usual execution fields. Dynamic fanout uses `expand`, one child `parallel` template, and `collect`. |
 | `context` | `fresh \| fork` | agent default or `fresh` | `fork` creates real branched sessions from the parent leaf. Packaged `planner`, `worker`, and `oracle` default to `fork`. |
 | `chainDir` | string | temp chain dir | Persistent directory for chain artifacts. |
 | `clarify` | boolean | true for chains | Show TUI preview/edit flow. |
@@ -811,7 +808,7 @@ Agent definitions are not loaded into context by default. Management actions let
 | `includeProgress` | boolean | false | Include full progress in result. |
 | `share` | boolean | false | Upload session export to GitHub Gist. |
 | `sessionDir` | string | derived | Override session log directory. |
-| `acceptance` | object | omitted | Explicit acceptance contract. When present, the child gets a structured contract, then the runtime continues the same session for a bounded self-review/repair loop before evaluating acceptance. |
+| `acceptance` | string/object/false | inferred | Override the run's inferred acceptance gates. Use `"auto"`, `"attested"`, `"checked"`, `"verified"`, `"reviewed"`, or `{ level: "none", reason: "..." }`. |
 `context: "fork"` fails fast when the parent session is not persisted, the current leaf is missing, or the branched child session cannot be created. It never silently downgrades to `fresh`. In multi-agent runs, if any requested agent has `defaultContext: fork` and the launch omits `context`, the whole invocation uses forked context; pass `context: "fresh"` when you intentionally want a fresh run.
@@ -916,19 +913,6 @@ Session directory precedence is: `params.sessionDir`, then `config.defaultSessio
 Controls nested delegation when no inherited `PI_SUBAGENT_MAX_DEPTH` is already in effect. Per-agent `maxSubagentDepth` can tighten the limit for that agent’s child runs, but cannot relax an inherited stricter limit. This applies even to children that explicitly declare `tools: subagent`; at the cap, execution fanout is blocked instead of silently hiding nested work.
-### Agent resource limits
-Set `maxExecutionTimeMs` and `maxTokens` in agent frontmatter or through `subagent({ action: "create" | "update", config })` to bound a specific agent across foreground and async runs.
-```yaml
-maxExecutionTimeMs: 600000
-maxTokens: 50000
-```
-When a limit is reached, the child receives a soft interrupt, the run fails with a clear `Resource limit exceeded...` error, and the result includes `resourceLimitExceeded` with the limit kind, configured limit, and observed token count when available. Resource-limit failures do not trigger fallback model retries. `maxTokens` is best-effort because providers report usage after message events; a child may exceed the exact limit before the runtime can stop it.
-Spawn-count and per-agent child-concurrency quotas are not part of this release; use `maxSubagentDepth` and parallel `concurrency` for those boundaries today.
 ### `intercomBridge`
 ```json
@@ -987,7 +971,7 @@ Debug artifacts live under `{sessionDir}/subagent-artifacts/` or a user-scoped t
 - `{runId}_{agent}.jsonl`
 - `{runId}_{agent}_meta.json`
-Metadata records timing, usage, exit code, final model, attempted models, fallback attempt outcomes, and any resource-limit termination reason.
+Metadata records timing, usage, exit code, final model, attempted models, and fallback attempt outcomes.
 Session files are stored under a per-run session directory. With `context: "fork"`, each child starts with `--session <branched-session-file>` produced from the parent’s current leaf. That is a real session fork, not an injected summary.
@@ -1007,60 +991,33 @@ Async runs write:
 ## Acceptance Gates
-`acceptance` is an explicit contract. Omit it for lightweight runs. Set it on single runs, top-level parallel task items, sequential chain steps, static parallel task items, and dynamic fanout child templates when the child must prove the work meets concrete criteria. Do not set it on static parallel groups or dynamic fanout aggregate groups; those groups do not own a same-session child turn.
-If you are coming from Codex Goals, `acceptance` is the subagent equivalent for one delegated run. When a user says `/goal`, “goal”, “active goal”, “continue until evidence says done”, or “verify against a goal”, translate that into an acceptance contract: `criteria` are the target, `evidence` and `verify` are proof, `stopRules` are constraints, and `maxFinalizationTurns` is the bounded loop budget.
+Every run resolves an effective acceptance policy. Callers may omit `acceptance` for the inferred default, or set it on single runs, top-level parallel task items, chain steps, static parallel tasks, and dynamic fanout templates.
 ```ts
 {
   agent: "worker",
   task: "Implement the fix",
   acceptance: {
+    level: "verified",
     criteria: ["Patch the bug without widening scope"],
     evidence: ["changed-files", "tests-added", "commands-run", "residual-risks", "no-staged-files"],
-    verify: [{ id: "focused", command: "npm test", timeoutMs: 120000 }],
-    maxFinalizationTurns: 3
+    verify: [{ id: "focused", command: "npm test", timeoutMs: 120000 }]
   }
 }
 ```
-When `acceptance` is present, the initial child prompt includes a standardized acceptance section and asks for a fenced `acceptance-report` JSON block. After the child’s initial completion, the runtime continues the same persisted child session with an acceptance finalization prompt. The child can repair omissions in that same session, then must return the final `acceptance-report`. Missing or malformed finalization reports reject the run when the loop limit is reached.
+Accepted levels are `auto`, `none`, `attested`, `checked`, `verified`, and `reviewed`. `acceptance: "auto"` is the default. Read-only reviewer/scout tasks infer lightweight attestation, normal writer tasks infer checked evidence, and async/risky/dynamic writer contexts infer a reviewed gate. To disable gates, prefer `{ level: "none", reason: "..." }`.
-Public acceptance config is evidence-driven. There is no public `level` field and no `acceptance: "checked"` shorthand. Runtime provenance is derived from what actually happened:
+Acceptance provenance is stored separately from child prose:
-- `attested`: the child returned a structured acceptance report.
-- `checked`: runtime structural checks passed, such as required criteria, required evidence, and no staged files.
+- `claimed`: child finished but did not provide structured evidence.
+- `attested`: child returned a structured acceptance report.
+- `checked`: runtime structural checks passed, such as required evidence and no staged files.
 - `verified`: configured runtime verification commands passed. Child-reported command success does not count.
 - `reviewed`: an independent reviewer result is present.
-- `rejected`: attestation, structural checks, verification, review, or finalization failed.
+- `rejected`: attestation, structural checks, verification, or review failed.
-Self-review finalization never counts as `reviewed`, and it never counts as `verified` unless configured runtime verification commands actually pass. The visible child output remains the initial answer; finalization reports and residual risks are stored in the acceptance ledger and async/status details.
-When delegating implementation from a plan or spec, keep the task focused on what to implement and put the definition of done in `acceptance` so the runtime can finalize and evaluate it:
-```ts
-subagent({
-  agent: "worker",
-  async: true,
-  task: "Implement the plan at /Users/me/docs/mcp-alignment-plan.md. Use scout artifacts in ./handoff/ as context. Do not commit the scout artifacts.",
-  acceptance: {
-    criteria: [
-      "Implementation follows /Users/me/docs/mcp-alignment-plan.md",
-      "Plan acceptance checks are addressed",
-      "Scout handoff artifacts are not committed",
-      "Focused validation for changed behavior passes",
-      "Residual risks or skipped checks are reported"
-    ],
-    evidence: ["changed-files", "commands-run", "validation-output", "residual-risks"],
-    verify: [{ id: "focused", command: "npm test -- --runInBand" }],
-    stopRules: [
-      "Do not edit unrelated files",
-      "Stop and report if the plan requires an unapproved product decision"
-    ],
-    maxFinalizationTurns: 3
-  }
-})
-```
+For `attested` or stricter levels, the child prompt includes a standardized acceptance section and asks for a fenced `acceptance-report` JSON block. Explicit failed gates fail the run. Inferred gates are persisted for observability without breaking older calls that omit `acceptance`.
 ## Live progress

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "pi-subagents",
-  "version": "0.28.0",
+  "version": "0.29.0",
   "description": "Pi extension for delegating tasks to subagents with chains, parallel execution, and TUI clarification",
   "author": "Nico Bailon",
   "license": "MIT",

package/skills/pi-subagents/SKILL.md CHANGED Viewed

@@ -134,7 +134,7 @@ subagent({
       { agent: "reviewer", phase: "Planning", label: "Scheduler contract", as: "schedulerPlan", task: "Plan fixes for scheduler contract. Inspect the current diff. Do not modify project/source files; returning findings via the configured output artifact is allowed.", output: "plans/scheduler.md", outputMode: "file-only" },
       { agent: "reviewer", phase: "Planning", label: "Sandbox/security", as: "sandboxPlan", task: "Plan fixes for sandbox/security. Inspect the current diff. Do not modify project/source files; returning findings via the configured output artifact is allowed.", output: "plans/sandbox.md", outputMode: "file-only" }
     ], concurrency: 3 },
-    { agent: "worker", phase: "Implementation", label: "Apply accepted fixes", as: "workerResult", task: "Apply only the accepted fixes from these planning summaries. You are the sole writer for the active worktree.\n\nDeploy plan:\n{outputs.deployPlan}\n\nScheduler plan:\n{outputs.schedulerPlan}\n\nSandbox plan:\n{outputs.sandboxPlan}", acceptance: { criteria: ["Accepted fixes from each planning summary are applied", "Focused validation for changed behavior passes", "Changed files, validation commands, failures, and residual risks are reported"], evidence: ["changed-files", "commands-run", "validation-output", "residual-risks"], stopRules: ["Do not expand product scope beyond accepted fixes", "Stop and report if a fix requires an unapproved decision"], maxFinalizationTurns: 3 }, output: "worker/fixes.md", outputMode: "file-only", progress: true },
+    { agent: "worker", phase: "Implementation", label: "Apply accepted fixes", as: "workerResult", task: "Apply only the accepted fixes from these planning summaries. You are the sole writer for the active worktree. Run focused validation and report changed files, commands, failures, and remaining issues.\n\nDeploy plan:\n{outputs.deployPlan}\n\nScheduler plan:\n{outputs.schedulerPlan}\n\nSandbox plan:\n{outputs.sandboxPlan}", output: "worker/fixes.md", outputMode: "file-only", progress: true },
     { parallel: [
       { agent: "reviewer", phase: "Validation", label: "Deploy/scheduler validation", task: "Validate the post-worker diff for deploy and scheduler fixes. Start from the worker result: {outputs.workerResult}. Do not modify project/source files; returning findings via the configured output artifact is allowed.", output: "validation/deploy-scheduler.md", outputMode: "file-only" },
       { agent: "reviewer", phase: "Validation", label: "Sandbox validation", task: "Validate the post-worker diff for sandbox/security fixes. Start from the worker result: {outputs.workerResult}. Do not modify project/source files; returning findings via the configured output artifact is allowed.", output: "validation/sandbox.md", outputMode: "file-only" }
@@ -692,37 +692,7 @@ clarify → validation contract → planner → async worker → parallel async
 The validation contract defines acceptance before code is written: expected behavior, acceptance checks, commands or user flows to exercise, and evidence the worker should return. Keep it lightweight for small tasks, but make it explicit enough that reviewers and validators are checking the intended outcome rather than the worker’s own assumptions.
-Use the structured `acceptance` field when the run should carry an explicit acceptance contract. If omitted, the run stays lightweight. When present, acceptance is object-only: define concrete `criteria`, required `evidence`, optional runtime `verify` commands, optional independent `review`, and optionally `maxFinalizationTurns`. The runtime continues the same child session for a bounded self-review/repair loop before evaluating the final report, so set `acceptance` on single runs, sequential chain steps, parallel task items, and dynamic fanout child templates, not on static parallel or dynamic fanout groups. Do not call a run reviewed just because the worker says it is done; reviewed means a reviewer gate returned a result. Child-reported command success is evidence, not runtime verification.
-Goal-style requests map to `acceptance`. If the user says `/goal`, “goal”, “active goal”, “continue until evidence says done”, or “verify against a goal” for a subagent run, create an explicit run-scoped acceptance contract: `criteria` for the target, `evidence` and `verify` for proof, `stopRules` for constraints, and `maxFinalizationTurns` for the bounded loop budget.
-When launching a writer/worker from a plan, PRD, spec, issue, or broad fix, set structured `acceptance` proactively. Put implementation instructions, plan paths, and handoff artifacts in `task`; put the definition of done in `acceptance.criteria`, proof requirements in `acceptance.evidence` and `acceptance.verify`, constraints in `acceptance.stopRules`, and usually set `maxFinalizationTurns: 3`. Do not bury all validation requirements only in the task prompt.
-Example writer handoff:
-```typescript
-subagent({
-  agent: "worker",
-  async: true,
-  task: "Implement the plan at /Users/me/docs/mcp-alignment-plan.md. Use scout artifacts in ./handoff/ as context. Do not commit the scout artifacts.",
-  acceptance: {
-    criteria: [
-      "Implementation follows /Users/me/docs/mcp-alignment-plan.md",
-      "Plan acceptance checks are addressed",
-      "Scout handoff artifacts are not committed",
-      "Focused validation for changed behavior passes",
-      "Residual risks or skipped checks are reported"
-    ],
-    evidence: ["changed-files", "commands-run", "validation-output", "residual-risks"],
-    verify: [{ id: "focused", command: "npm test -- --runInBand" }],
-    stopRules: [
-      "Do not edit unrelated files",
-      "Stop and report if the plan requires an unapproved product decision"
-    ],
-    maxFinalizationTurns: 3
-  }
-})
-```
+Use the structured `acceptance` field when the run should carry an explicit acceptance contract. If omitted, subagents infer an effective acceptance policy from role, mode, and risk. Use `level: "checked"` for ordinary writer evidence gates, `level: "verified"` when the runtime should run explicit validation commands, and `level: "reviewed"` only when an independent reviewer result is expected. Do not call a run reviewed just because the worker says it is done; reviewed means a reviewer gate returned a result. Child-reported command success is evidence, not runtime verification.
 The first `worker` implements the approved plan. The parent continues with independent inspection or validation prep while it runs, not parallel edits to the same worktree. When the async worker completes, treat its handoff as the transition into review, not as final completion, unless the user explicitly asked for worker-only work, review-only output, or to stop after implementation. Parallel reviewers inspect the resulting diff from fresh context. Validators check behavior with the best available evidence: commands, tests, browser/CLI interaction, screenshots, logs, or manual reproduction notes. The final `worker` applies synthesized review fixes in forked context, then the parent looks over the final diff before completing. The parent may launch these steps as an initial async chain when the workflow is already clear, or as follow-up subagent runs after each async completion. Initial chains should pass `async: true` so the main chat is unblocked; avoid `clarify: true` unless the user asked for foreground clarification. Do not stop after parallel review unless the user explicitly asked for review-only output or the review surfaced a decision that needs approval first.
@@ -751,9 +721,8 @@ subagent({
   agent: "worker",
   task: "Implement the approved feature.\n\nClarified requirements:\n- ...\n\nPlan: see ~/Documents/docs/...-plan.md\n\nValidation contract:\n- ...\n\nReturn a handoff with changed files, what was implemented, what was left undone, commands run with exit codes, validation evidence, surprises/new risks, and decisions needing parent approval.",
   acceptance: {
-    criteria: ["Implement the approved feature without widening scope"],
-    evidence: ["changed-files", "tests-added", "commands-run", "residual-risks", "no-staged-files"],
-    maxFinalizationTurns: 3
+    level: "checked",
+    evidence: ["changed-files", "tests-added", "commands-run", "residual-risks", "no-staged-files"]
   },
   async: true
 })

package/src/agents/agent-management.ts CHANGED Viewed

@@ -78,8 +78,8 @@ function parsePackageConfig(value: unknown): { packageName?: string; error?: str
 	return parsePackageName(value, "config.package");
 }
-function allAgents(d: { builtin: AgentConfig[]; user: AgentConfig[]; project: AgentConfig[] }): AgentConfig[] {
-	return [...d.builtin, ...d.user, ...d.project];
+function allAgents(d: { builtin: AgentConfig[]; package: AgentConfig[]; user: AgentConfig[]; project: AgentConfig[] }): AgentConfig[] {
+	return [...d.builtin, ...d.package, ...d.user, ...d.project];
 }
 function availableNames(cwd: string, kind: "agent" | "chain"): string[] {
@@ -116,6 +116,10 @@ function nameExistsInScope(cwd: string, scope: ManagementScope, name: string, ex
 	return false;
 }
+function isMutableSource(source: AgentSource): source is ManagementScope {
+	return source === "user" || source === "project";
+}
 function unknownChainAgents(cwd: string, steps: ChainStepConfig[]): string[] {
 	const d = discoverAgentsAll(cwd);
 	const known = new Set(allAgents(d).map((a) => a.name));
@@ -313,18 +317,6 @@ function applyAgentConfig(target: AgentConfig, cfg: Record<string, unknown>): st
 			target.maxSubagentDepth = cfg.maxSubagentDepth;
 		} else return "config.maxSubagentDepth must be an integer >= 0 or false when provided.";
 	}
-	if (hasKey(cfg, "maxExecutionTimeMs")) {
-		if (cfg.maxExecutionTimeMs === false || cfg.maxExecutionTimeMs === "") target.maxExecutionTimeMs = undefined;
-		else if (typeof cfg.maxExecutionTimeMs === "number" && Number.isInteger(cfg.maxExecutionTimeMs) && cfg.maxExecutionTimeMs >= 1) {
-			target.maxExecutionTimeMs = cfg.maxExecutionTimeMs;
-		} else return "config.maxExecutionTimeMs must be an integer >= 1 or false when provided.";
-	}
-	if (hasKey(cfg, "maxTokens")) {
-		if (cfg.maxTokens === false || cfg.maxTokens === "") target.maxTokens = undefined;
-		else if (typeof cfg.maxTokens === "number" && Number.isInteger(cfg.maxTokens) && cfg.maxTokens >= 1) {
-			target.maxTokens = cfg.maxTokens;
-		} else return "config.maxTokens must be an integer >= 1 or false when provided.";
-	}
 	if (hasKey(cfg, "completionGuard")) {
 		if (typeof cfg.completionGuard !== "boolean") return "config.completionGuard must be a boolean when provided.";
 		target.completionGuard = cfg.completionGuard;
@@ -339,10 +331,10 @@ function resolveTarget<T extends { source: AgentSource; filePath: string }>(
 	cwd: string,
 	scopeHint?: string,
 ): T | AgentToolResult<Details> {
-	const mutable = matches.filter((m) => m.source !== "builtin");
+	const mutable = matches.filter((m): m is T & { source: ManagementScope } => isMutableSource(m.source));
 	if (mutable.length === 0) {
 		if (matches.length > 0) {
-			return result(`${kind === "agent" ? "Agent" : "Chain"} '${name}' is builtin and cannot be modified. Create a same-named ${kind} in user or project scope to override it.`, true);
+			return result(`${kind === "agent" ? "Agent" : "Chain"} '${name}' is read-only and cannot be modified. Create a same-named ${kind} in user or project scope to override it.`, true);
 		}
 		const available = availableNames(cwd, kind);
 		return result(`${kind === "agent" ? "Agent" : "Chain"} '${name}' not found. Available: ${available.join(", ") || "none"}.`, true);
@@ -398,8 +390,6 @@ function formatAgentDetail(agent: AgentConfig): string {
 	if (agent.defaultReads?.length) lines.push(`Reads: ${agent.defaultReads.join(", ")}`);
 	if (agent.defaultProgress) lines.push("Progress: true");
 	if (agent.maxSubagentDepth !== undefined) lines.push(`Max subagent depth: ${agent.maxSubagentDepth}`);
-	if (agent.maxExecutionTimeMs !== undefined) lines.push(`Max execution time: ${agent.maxExecutionTimeMs}ms`);
-	if (agent.maxTokens !== undefined) lines.push(`Max tokens: ${agent.maxTokens}`);
 	if (agent.completionGuard === false) lines.push("Completion guard: false");
 	if (agent.systemPrompt.trim()) lines.push("", "System Prompt:", agent.systemPrompt);
 	return lines.join("\n");
@@ -456,9 +446,9 @@ function formatChainDetail(chain: ChainConfig): string {
 export function handleList(params: ManagementParams, ctx: ManagementContext): AgentToolResult<Details> {
 	const scope = normalizeListScope(params.agentScope) ?? "both";
 	const d = discoverAgentsAll(ctx.cwd);
-	const scopedAgents = allAgents(d).filter((a) => scope === "both" || a.source === "builtin" || a.source === scope).sort((a, b) => a.name.localeCompare(b.name));
+	const scopedAgents = allAgents(d).filter((a) => scope === "both" || a.source === "builtin" || a.source === "package" || a.source === scope).sort((a, b) => a.name.localeCompare(b.name));
 	const agents = scopedAgents.filter((a) => !a.disabled);
-	const chains = d.chains.filter((c) => scope === "both" || c.source === scope).sort((a, b) => a.name.localeCompare(b.name));
+	const chains = d.chains.filter((c) => scope === "both" || c.source === "package" || c.source === scope).sort((a, b) => a.name.localeCompare(b.name));
 	const diagnostics = d.chainDiagnostics.filter((entry) => scope === "both" || entry.source === scope);
 	const lines = [
 		"Executable agents:",

package/src/agents/agent-selection.ts CHANGED Viewed

@@ -5,10 +5,12 @@ export function mergeAgentsForScope(
 	userAgents: AgentConfig[],
 	projectAgents: AgentConfig[],
 	builtinAgents: AgentConfig[] = [],
+	packageAgents: AgentConfig[] = [],
 ): AgentConfig[] {
 	const agentMap = new Map<string, AgentConfig>();
 	for (const agent of builtinAgents) agentMap.set(agent.name, agent);
+	for (const agent of packageAgents) agentMap.set(agent.name, agent);
 	if (scope === "both") {
 		for (const agent of userAgents) agentMap.set(agent.name, agent);

package/src/agents/agent-serializer.ts CHANGED Viewed

@@ -21,8 +21,6 @@ export const KNOWN_FIELDS = new Set([
 	"defaultProgress",
 	"interactive",
 	"maxSubagentDepth",
-	"maxExecutionTimeMs",
-	"maxTokens",
 	"completionGuard",
 ]);
@@ -73,14 +71,6 @@ export function serializeAgent(config: AgentConfig): string {
 	if (typeof maxSubagentDepth === "number" && Number.isInteger(maxSubagentDepth) && maxSubagentDepth >= 0) {
 		lines.push(`maxSubagentDepth: ${maxSubagentDepth}`);
 	}
-	const maxExecutionTimeMs = config.maxExecutionTimeMs;
-	if (typeof maxExecutionTimeMs === "number" && Number.isInteger(maxExecutionTimeMs) && maxExecutionTimeMs >= 1) {
-		lines.push(`maxExecutionTimeMs: ${maxExecutionTimeMs}`);
-	}
-	const maxTokens = config.maxTokens;
-	if (typeof maxTokens === "number" && Number.isInteger(maxTokens) && maxTokens >= 1) {
-		lines.push(`maxTokens: ${maxTokens}`);
-	}
 	if (config.completionGuard === false) lines.push("completionGuard: false");
 	if (config.extraFields) {