npm - pi-crew - Versions diffs - 0.7.6 → 0.8.1 - Mend

pi-crew 0.7.6 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/CHANGELOG.md +289 -0
package/package.json +1 -1
package/src/agents/agent-config.ts +101 -1
package/src/agents/discover-agents.ts +34 -3
package/src/config/types.ts +8 -0
package/src/errors.ts +9 -0
package/src/extension/context-status-injection.ts +14 -5
package/src/extension/register.ts +4 -18
package/src/extension/registration/compaction-guard.ts +44 -13
package/src/extension/team-tool/handle-settings.ts +2 -0
package/src/runtime/crash-recovery.ts +21 -1
package/src/runtime/live-session-runtime.ts +69 -7
package/src/runtime/model-fallback.ts +39 -1
package/src/runtime/model-scope.ts +141 -0
package/src/runtime/pi-args.ts +21 -6
package/src/runtime/pi-spawn.ts +66 -0
package/src/runtime/skill-instructions.ts +14 -4
package/src/runtime/stale-reconciler.ts +30 -0
package/src/runtime/task-runner.ts +21 -0
package/src/skills/discover-skills.ts +31 -2
package/src/ui/agent-management-overlay.ts +1 -1
package/src/utils/session-utils.ts +30 -0

package/src/extension/registration/compaction-guard.ts CHANGED Viewed

@@ -1,5 +1,6 @@
 import type { ExtensionAPI, ExtensionContext } from "@earendil-works/pi-coding-agent";
 import { listRecentRuns } from "../run-index.ts";
+import { extractSessionId } from "../../utils/session-utils.ts";
 import type { ArtifactDescriptor, TeamRunManifest } from "../../state/types.ts";
 export interface RegisterCompactionGuardOptions {
@@ -71,11 +72,25 @@ function formatCrewArtifactIndex(entries: CrewArtifactIndexEntry[]): string {
 /**
  * Collect in-flight (non-terminal) crew runs that must be resumable after
  * compaction. These are runs the agent was actively working on or awaiting.
+ *
+ * @param cwd - project working directory (shared, per-project state root).
+ * @param currentSessionId - if provided, restrict to runs OWNED BY THIS
+ *   session (`run.ownerSessionId === currentSessionId`). The state store is
+ *   per-PROJECT, not per-SESSION — multiple sessions share `.crew/state/runs/`.
+ *   Without this filter, Session B's compaction would pick up Session A's
+ *   in-flight runs and wrongly resume them. Legacy runs with no
+ *   `ownerSessionId` are excluded under filtering (strict): a run with no
+ *   declared owner must not be auto-resumed by an arbitrary session; true
+ *   orphans are handled separately by crash-recovery. When omitted, returns
+ *   ALL in-flight runs (back-compat for callers that deliberately want the
+ *   cross-session view, e.g. diagnostics).
  */
-export function collectInFlightRuns(cwd: string): TeamRunManifest[] {
-	return listRecentRuns(cwd, MAX_ARTIFACT_INDEX_RUNS).filter((run) =>
-		IN_FLIGHT_RUN_STATUSES.has(run.status),
-	);
+export function collectInFlightRuns(cwd: string, currentSessionId?: string): TeamRunManifest[] {
+	return listRecentRuns(cwd, MAX_ARTIFACT_INDEX_RUNS).filter((run) => {
+		if (!IN_FLIGHT_RUN_STATUSES.has(run.status)) return false;
+		if (currentSessionId === undefined) return true; // no filter → back-compat
+		return run.ownerSessionId === currentSessionId; // strict: legacy ownerless runs excluded
+	});
 }
 /**
@@ -130,29 +145,43 @@ export function buildContinuationPrompt(runs: TeamRunManifest[]): string {
  * Trigger automatic agent continuation after compaction. Fire-and-forget the
  * promise — never block the compaction flow. The sendUserMessage type is
  * declared `void` but the runtime returns a Promise (it triggers an agent turn).
+ *
+ * During compaction the agent may still be mid-processing, so Pi can reject
+ * the queued message with "Agent is already processing a prompt...". This is
+ * BENIGN — the in-flight worker continues independently regardless — so we
+ * detect that specific race and downgrade it to a silent debug log instead of
+ * surfacing a scary warning to the user. Other errors still notify.
  */
 export function triggerContinuation(pi: ExtensionAPI, ctx: ExtensionContext, runs: TeamRunManifest[]): void {
 	if (!runs.length) return;
 	const prompt = buildContinuationPrompt(runs);
+	const isBenignProcessingRace = (err: unknown): boolean => {
+		const msg = err instanceof Error ? err.message : String(err ?? "");
+		return /already processing a prompt/i.test(msg) || /use steer\(\) or followUp\(\)/i.test(msg);
+	};
 	try {
 		const result = pi.sendUserMessage(prompt) as unknown;
-		Promise.resolve(result).catch(() => {
-			// best-effort: if continuation fails, at least notify
+		Promise.resolve(result).catch((err: unknown) => {
+			// Benign race: the worker keeps running independently — no need to alarm.
+			if (isBenignProcessingRace(err)) return;
+			// Real failure: surface a hint so the user can resume manually.
 			try {
 				ctx.ui.notify("pi-crew: auto-continuation after compaction failed — use team status to resume manually.", "warning");
 			} catch {
 				// swallow
 			}
 		});
-	} catch {
+	} catch (err: unknown) {
+		// Synchronous throw — same benign-race handling.
+		if (isBenignProcessingRace(err)) return;
 		// best-effort
 	}
 }
 /** Combined customInstructions injected into proactive compaction summaries. */
-function buildCompactionInstructions(cwd: string): string {
+function buildCompactionInstructions(cwd: string, currentSessionId?: string): string {
 	const artifactIndex = collectCrewArtifactIndex(cwd);
-	const inFlight = collectInFlightRuns(cwd);
+	const inFlight = collectInFlightRuns(cwd, currentSessionId);
 	const parts = [
 		"Prioritize keeping pi-crew run state, task results, artifact references, run IDs, and next actions. Keep completed-task detail concise.",
 	];
@@ -168,10 +197,11 @@ export function registerCompactionGuard(pi: ExtensionAPI, options: RegisterCompa
 	const startCompact = (ctx: ExtensionContext, reason: string): void => {
 		if (compactionInProgress) return;
 		compactionInProgress = true;
-		const customInstructions = buildCompactionInstructions(ctx.cwd);
+		const sessionId = extractSessionId(ctx);
+		const customInstructions = buildCompactionInstructions(ctx.cwd, sessionId);
 		// Append a durable resume entry so it appears in the post-compaction
 		// context regardless of how summarization treats customInstructions.
-		const inFlight = collectInFlightRuns(ctx.cwd);
+		const inFlight = collectInFlightRuns(ctx.cwd, sessionId);
 		if (inFlight.length > 0) {
 			pi.appendEntry("crew:resume-directive", {
 				reason,
@@ -192,7 +222,7 @@ export function registerCompactionGuard(pi: ExtensionAPI, options: RegisterCompa
 				// O10 FIX: Pi's threshold compaction does NOT auto-retry — it
 				// stops and waits for user input. Trigger automatic
 				// continuation so the agent resumes the in-flight crew task.
-				const runs = collectInFlightRuns(ctx.cwd);
+				const runs = collectInFlightRuns(ctx.cwd, extractSessionId(ctx));
 				triggerContinuation(pi, ctx, runs);
 				ctx.ui.notify(reason === "deferred" ? "Deferred compaction completed" : "Auto-compacted context during team run", "info");
 			},
@@ -219,7 +249,8 @@ export function registerCompactionGuard(pi: ExtensionAPI, options: RegisterCompa
 	// our proactive startCompact path.
 	pi.on("session_compact", (_event, ctx) => {
 		try {
-			const inFlight = collectInFlightRuns(ctx.cwd);
+			const sessionId = extractSessionId(ctx);
+			const inFlight = collectInFlightRuns(ctx.cwd, sessionId);
 			if (inFlight.length === 0) return;
 			// Re-append the resume directive entry for durable record.
 			pi.appendEntry("crew:resume-directive", {

package/src/extension/team-tool/handle-settings.ts CHANGED Viewed

@@ -205,6 +205,8 @@ const KNOWN_KEYS = new Set([
 	"reliability.retryPolicy.jitterRatio",
 	"reliability.retryPolicy.exponentialFactor",
 	"reliability.retryPolicy.retryableErrors",
+	// F7: opt-in model scope enforcement (hard-error caller out-of-scope, warn frontmatter).
+	"reliability.scopeModels",
 	// otlp
 	"otlp.enabled",
 	"otlp.endpoint",

package/src/runtime/crash-recovery.ts CHANGED Viewed

@@ -9,7 +9,7 @@ import type { TeamTaskState } from "../state/types.ts";
 import { isWorkerHeartbeatStale } from "./worker-heartbeat.ts";
 import type { ManifestCache } from "./manifest-cache.ts";
 import { checkProcessLiveness } from "./process-status.ts";
-import { reconcileStaleRun, type ReconcileResult } from "./stale-reconciler.ts";
+import { isPlanApprovalPending, reconcileStaleRun, type ReconcileResult } from "./stale-reconciler.ts";
 import { executeHook, appendHookEvent } from "../hooks/registry.ts";
 import { unregisterActiveRun, readActiveRunRegistry } from "../state/active-run-registry.ts";
 import { resolveRealContainedPath } from "../utils/safe-paths.ts";
@@ -38,6 +38,8 @@ export function detectInterruptedRuns(cwd: string, manifestCache: ManifestCache,
 	const plans: RecoveryPlan[] = [];
 	for (const manifest of manifestCache.list(50)) {
 		if (manifest.status !== "running" && manifest.status !== "blocked") continue;
+		// Preserve runs intentionally blocked on plan approval — not crashes.
+		if (isPlanApprovalPending(manifest)) continue;
 		if (manifest.async?.pid !== undefined && checkProcessLiveness(manifest.async.pid).alive) continue;
 		// NOTE: no withRunLock — best-effort only; concurrent writes may cause inconsistency
 		const loaded = loadRunManifestById(cwd, manifest.runId); // NOTE: no withRunLock - best-effort only; concurrent writes may cause inconsistency
@@ -107,6 +109,12 @@ export function cancelOrphanedRuns(
 	// Phase 1: Scan project-level manifests via manifestCache
 	for (const manifest of manifestCache.list(50)) {
 		if (manifest.status !== "running" && manifest.status !== "blocked") continue;
+		// Preserve plan-approval-blocked runs — they belong to their owner and are
+		// waiting on a human decision, not orphaned by a dead owner process.
+		if (isPlanApprovalPending(manifest)) {
+			skipped.push(manifest.runId);
+			continue;
+		}
 		// Only consider runs owned by a different session
 		const ownerId = manifest.ownerSessionId;
@@ -340,6 +348,18 @@ export function reconcileAllStaleRuns(cwd: string, manifestCache: ManifestCache,
 			// Re-read inside lock to get freshest data
 			const fresh = loadRunManifestById(cwd, runId); // NOTE: inside withRunLockSync - consistent read
 			if (!fresh || (fresh.manifest.status !== "running" && fresh.manifest.status !== "blocked")) return;
+			// Belt-and-suspenders: reconcileStaleRun itself guards this, but the run
+			// may have flipped to blocked+plan-approval between cache-list and lock
+			// acquisition — re-check the freshest manifest under the lock.
+			if (isPlanApprovalPending(fresh.manifest)) {
+				results.push({
+					runId,
+					verdict: "blocked_awaiting_approval",
+					repaired: false,
+					detail: "Plan approval is pending; stale reconciliation skipped",
+				});
+				return;
+			}
 			const result = reconcileStaleRun(fresh.manifest, fresh.tasks, now);
 			if (result.repaired || result.verdict === "result_exists") {
 				if (result.repairedTasks) {

package/src/runtime/live-session-runtime.ts CHANGED Viewed

@@ -15,6 +15,9 @@ import type { WorkflowStep } from "../workflows/workflow-config.ts";
 import { isLiveSessionRuntimeAvailable } from "./runtime-resolver.ts";
 import { redactSecrets } from "../utils/redaction.ts";
 import { buildConfiguredModelRouting } from "./model-fallback.ts";
+import { readEnabledModelsPatterns } from "./model-scope.ts";
+import { resolveToolPolicy } from "../agents/agent-config.ts";
+import { loadConfig } from "../config/config.ts";
 import { DEFAULT_LIVE_SESSION } from "../config/defaults.ts";
 import { buildYieldReminder, hasYieldInOutput, isYieldEvent, extractYieldResult, validateYieldData, DEFAULT_YIELD_CONFIG, type YieldResult } from "./yield-handler.ts";
 import { buildMcpProxyFromSession } from "./mcp-proxy.ts";
@@ -28,6 +31,30 @@ import { buildSensitivePathConstraint } from "./sensitive-paths.ts";
 import { collectLiveSessionHealth, formatLiveSessionDiagnostics, type LiveSessionHealth } from "./live-session-health.ts";
 import { listLiveAgents } from "./live-agent-manager.ts";
+/**
+ * Module-scoped latch for the optional peer dependency import. When N
+ * in-process live-session subagents spawn CONCURRENTLY (e.g. several
+ * `Agent({run_in_background:true})` started at once), each used to call
+ * `await import("@earendil-works/pi-coding-agent")` independently. Under the
+ * tsx loader (registering load/resolve hooks), concurrent first-imports can
+ * each enter the loader and race module-record instantiation, yielding
+ * `Cannot read properties of undefined (reading 'existsSync')` /
+ * `'validateWorkflowForTeam'` as namespace bindings observed mid-evaluation.
+ * Sequential retries always succeed → this is a cold-start race, not a logic
+ * bug. ESM engines memoize imports, but that memoization is not guaranteed
+ * to be observed synchronously across concurrent evaluation under transpiling
+ * loaders, so we add an explicit JS-level latch: the first caller wins, every
+ * later caller awaits the same in-flight promise. (Observed 2026-06-16 when 4
+ * explorer subagents launched together; 3 of 4 crashed.)
+ */
+let liveSessionModulePromise: Promise<LiveSessionModule> | undefined;
+function loadLiveSessionModule(): Promise<LiveSessionModule> {
+	if (!liveSessionModulePromise) {
+		liveSessionModulePromise = import("@earendil-works/pi-coding-agent") as unknown as Promise<LiveSessionModule>;
+	}
+	return liveSessionModulePromise;
+}
 export interface LiveSessionSpawnInput {
 	manifest: TeamRunManifest;
 	task: TeamTaskState;
@@ -179,6 +206,25 @@ function numberField(obj: Record<string, unknown> | undefined, keys: string[]):
 	return undefined;
 }
+/**
+ * F7: resolve the enabledModels allowlist for the current project, but only
+ * if the `runtime.reliability.scopeModels` toggle is ON. Returns an empty
+ * array when the toggle is off or no allowlist is configured — the routing
+ * gate treats empty patterns as "no enforcement" (no-op). Best-effort:
+ * any failure to read the toggle or the allowlist silently disables the gate
+ * rather than blocking spawn.
+ */
+async function resolveScopeModelsPatterns(cwd: string, agentDir?: string): Promise<string[]> {
+	let scopeModels = false;
+	try {
+		scopeModels = loadConfig(cwd).config.reliability?.scopeModels === true;
+	} catch {
+		return [];
+	}
+	if (!scopeModels) return [];
+	return readEnabledModelsPatterns(cwd, agentDir);
+}
 function modelFromRegistry(modelRegistry: unknown, modelId: string | undefined): unknown {
 	if (!modelId || !modelId.includes("/")) return undefined;
 	const registry = asRecord(modelRegistry);
@@ -298,11 +344,17 @@ function liveSystemPrompt(input: LiveSessionSpawnInput): string {
 	].filter(Boolean).join("\n");
 }
-function filterActiveTools(session: LiveSessionLike, agent: AgentConfig): void {
+function filterActiveTools(session: LiveSessionLike, agent: AgentConfig, role?: string): void {
 	if (typeof session.getActiveToolNames !== "function" || typeof session.setActiveToolsByName !== "function") return;
 	const recursiveTools = new Set(["team", "Team", "Agent", "get_subagent_result", "steer_subagent"]);
-	const disallowed = agent.disallowedTools?.length ? new Set(agent.disallowedTools) : undefined;
-	const allowed = agent.tools?.length ? new Set(agent.tools) : undefined;
+	// F1 unify (v0.8.0): use the shared resolveToolPolicy so this path agrees
+	// with child-pi (pi-args.ts). Before this, live-session used frontmatter
+	// only and ignored role-config entirely — so a builtin explorer on the
+	// live-session path wasn't bound by the role's read-only security constraint.
+	// Now allowlist precedence is source-aware and the denylist is additive.
+	const policy = resolveToolPolicy(agent, role);
+	const disallowed = policy.excludeTools?.length ? new Set(policy.excludeTools) : undefined;
+	const allowed = policy.tools?.length ? new Set(policy.tools) : undefined;
 	const active = session.getActiveToolNames().filter((name) => !recursiveTools.has(name) && (!disallowed || !disallowed.has(name)) && (!allowed || allowed.has(name)));
 	session.setActiveToolsByName(active);
 }
@@ -369,8 +421,11 @@ export async function runLiveSessionTask(input: LiveSessionSpawnInput): Promise<
 	}
 	const availability = await isLiveSessionRuntimeAvailable();
 	if (!availability.available) return { available: true, exitCode: 1, stdout: "", stderr: availability.reason ?? "Live-session runtime unavailable.", jsonEvents: 0, error: availability.reason };
-	// LAZY: optional peer dependency — only loaded when live-session runtime is chosen.
-	const mod = await import("@earendil-works/pi-coding-agent") as unknown as LiveSessionModule;
+	// LAZY: optional peer dependency — only loaded when live-session runtime is
+	// chosen. Goes through the module-scoped latch (loadLiveSessionModule) so
+	// concurrent first-imports share ONE in-flight promise instead of racing
+	// module-record instantiation under the tsx loader.
+	const mod = await loadLiveSessionModule();
 	if (typeof mod.createAgentSession !== "function") return { available: true, exitCode: 1, stdout: "", stderr: "createAgentSession export is unavailable.", jsonEvents: 0, error: "createAgentSession export is unavailable." };
 	let session: LiveSessionLike | undefined;
 	let unsubscribe: (() => void) | undefined;
@@ -393,6 +448,13 @@ export async function runLiveSessionTask(input: LiveSessionSpawnInput): Promise<
 	try {
 		const agentDir = typeof mod.getAgentDir === "function" ? mod.getAgentDir() : undefined;
 		let resourceLoader: unknown;
+		// F1 (v0.7.9) NOTE: `agent.excludeExtensions` is applied on the
+		// child-pi path (see `pi-args.ts`). The live-session path loads
+		// extensions via pi's `DefaultResourceLoader`, which has no explicit
+		// per-extension allow/deny API at the point we hand off. For
+		// v0.7.9, the denylist is honored on the default async path only;
+		// the live-session path (opt-in via `runtime.preferLiveSession`)
+		// ignores it. This is a documented limitation, not a silent bug.
 		if (mod.DefaultResourceLoader && agentDir) {
 			resourceLoader = new mod.DefaultResourceLoader({
 				cwd: input.task.cwd,
@@ -405,7 +467,7 @@ export async function runLiveSessionTask(input: LiveSessionSpawnInput): Promise<
 			});
 			await (resourceLoader as { reload?: () => Promise<void> }).reload?.();
 		}
-		const modelRouting = buildConfiguredModelRouting({ overrideModel: input.modelOverride, stepModel: input.step.model, teamRoleModel: input.teamRoleModel, agentModel: input.agent.model, fallbackModels: input.agent.fallbackModels, parentModel: input.parentModel, modelRegistry: input.modelRegistry, cwd: input.manifest.cwd });
+		const modelRouting = buildConfiguredModelRouting({ overrideModel: input.modelOverride, stepModel: input.step.model, teamRoleModel: input.teamRoleModel, agentModel: input.agent.model, fallbackModels: input.agent.fallbackModels, parentModel: input.parentModel, modelRegistry: input.modelRegistry, cwd: input.manifest.cwd, scopeModelsPatterns: await resolveScopeModelsPatterns(input.manifest.cwd) });
 		const resolvedModel = modelFromRegistry(input.modelRegistry, modelRouting.candidates[0] ?? modelRouting.requested) ?? input.parentModel;
 		// Phase 4: MCP proxy — will be determined after session creation
 		// (we check parent's MCP tools and share connections when available)
@@ -434,7 +496,7 @@ export async function runLiveSessionTask(input: LiveSessionSpawnInput): Promise<
 		});
 		session = created.session;
 		appendEvent(input.manifest.eventsPath, { type: "live-session.session_created", runId: input.manifest.runId, taskId: input.task.id, data: { elapsedMs: Date.now() - sessionCreateStart, modelFallbackMessage: created.modelFallbackMessage } });
-		filterActiveTools(session, input.agent);
+		filterActiveTools(session, input.agent, input.task.role);
 		// Diagnostic: log before bindExtensions so we can identify extension-loading hangs
 		const bindExtensionsStart = Date.now();

package/src/runtime/model-fallback.ts CHANGED Viewed

@@ -1,6 +1,8 @@
 import * as fs from "node:fs";
 import * as os from "node:os";
 import * as path from "node:path";
+import { errors } from "../errors.ts";
+import { checkModelScope } from "./model-scope.ts";
 import { fuzzyResolveModelId } from "./model-resolver.ts";
 export interface AvailableModelInfo {
@@ -241,6 +243,15 @@ export interface ConfiguredModelRouting {
 	requested?: string;
 	candidates: string[];
 	reason?: string;
+	/**
+	 * F7 scope gate verdict. Populated when the caller passed `scopeModelsPatterns`.
+	 * - `inScope: true` → the resolved model is inside the allowlist (or no allowlist).
+	 * - `inScope: false, source: "caller"` → caller override is out-of-scope; the
+	 *   function throws `errors.modelOutOfScope` (hard error before spawn) UNLESS
+	 *   the caller marked it as a frontmatter override (`isFrontmatterOverride: true`),
+	 *   in which case the verdict is returned for the caller to log as a warning.
+	 */
+	scopeVerdict?: import("./model-scope.ts").ModelScopeCheck;
 }
 export function buildConfiguredModelRouting(input: {
@@ -252,6 +263,19 @@ export function buildConfiguredModelRouting(input: {
 	parentModel?: unknown;
 	modelRegistry?: unknown;
 	cwd?: string;
+	/**
+	 * F7: when set, enforce the enabledModels allowlist. Caller-supplied out-of-
+	 * scope models throw `errors.modelOutOfScope`; frontmatter-pinned out-of-scope
+	 * models are returned as a `scopeVerdict` for the caller to log.
+	 */
+	scopeModelsPatterns?: string[];
+	/**
+	 * F7: when true, the `overrideModel` (if any) is treated as a frontmatter
+	 * (agent) override rather than a per-spawn caller override — out-of-scope
+	 * is a warning, not a hard error. Used when the agent config is the
+	 * authoritative source.
+	 */
+	isFrontmatterOverride?: boolean;
 }): ConfiguredModelRouting {
 	const registryModels = availableModelInfosFromRegistry(input.modelRegistry);
 	const configModels = configuredModelInfosFromPiConfig(input.cwd);
@@ -275,7 +299,21 @@ export function buildConfiguredModelRouting(input: {
 		: candidates.length > 1
 			? "configured Pi fallback chain"
 			: undefined;
-	return { requested, candidates, reason };
+	// F7 scope gate: when `scopeModelsPatterns` is configured, check the
+	// resolved model. Caller-supplied (override/step/team role) out-of-scope
+	// is a HARD ERROR (we surface it via the verdict AND throw, so spawn aborts
+	// before any cost is incurred). Frontmatter-pinned out-of-scope is a
+	// WARNING returned on the verdict for the caller to log.
+	let scopeVerdict: ConfiguredModelRouting["scopeVerdict"];
+	if (input.scopeModelsPatterns && input.scopeModelsPatterns.length > 0) {
+		const resolved = candidates[0] ?? requested;
+		const source = input.overrideModel ? "caller" : input.agentModel ? "frontmatter" : "resolved";
+		scopeVerdict = checkModelScope(resolved, input.scopeModelsPatterns, source);
+		if (!scopeVerdict.inScope && source === "caller" && !input.isFrontmatterOverride) {
+			throw errors.modelOutOfScope(resolved ?? "", input.scopeModelsPatterns);
+		}
+	}
+	return { requested, candidates, reason, scopeVerdict };
 }
 export function buildConfiguredModelCandidates(input: Parameters<typeof buildConfiguredModelRouting>[0]): string[] {

package/src/runtime/model-scope.ts ADDED Viewed

@@ -0,0 +1,141 @@
+/**
+ * model-scope.ts — Opt-in model-scope enforcement (F7).
+ *
+ * When `runtime.reliability.scopeModels` is enabled, subagent model choices
+ * that fall outside the user's pi `enabledModels` allowlist are flagged:
+ *   - Caller-supplied (per-spawn override / step / team role) out-of-scope
+ *     → HARD ERROR to orchestrator (fail fast before spawn).
+ *   - Frontmatter-pinned (AgentConfig.model) out-of-scope
+ *     → WARNING + runs anyway (frontmatter is authoritative; the agent
+ *     author made a deliberate choice).
+ *
+ * Pattern semantics match pi's `--models` CLI / `enabledModels` allowlist:
+ *   - `"anthropic/claude-opus-4-5"` — exact match (case-insensitive).
+ *   - `"claude-*"`, `"*sonnet*"`, `"github-copilot/*"` — glob (single `*`).
+ *   - Any other string — case-insensitive substring fallback (pi's
+ *     `tryMatchModel` behavior, model-resolver.ts).
+ *
+ * This module is pure (no I/O, no globals). Reading the actual
+ * `enabledModels` from pi's settings is the caller's job (instantiate
+ * `SettingsManager.create(cwd, agentDir).getEnabledModels()`).
+ *
+ * The toggle itself lives in `config/defaults.ts` (`reliability.scopeModels`,
+ * default `false` = opt-in, fully back-compat).
+ */
+export type ModelScopeSource = "caller" | "frontmatter" | "resolved" | "fallback";
+export interface ModelScopeCheck {
+	/** True when the model is in scope, or no allowlist is configured. */
+	inScope: boolean;
+	/** What the model came from. Informational; the gate decision lives in `enforce`. */
+	source: ModelScopeSource;
+	/** The model id that was checked. */
+	model: string;
+	/** The pattern(s) that matched, or undefined when no allowlist was configured. */
+	matchedPattern?: string;
+	/** Human-readable reason for out-of-scope (caller-facing when rejected). */
+	reason?: string;
+}
+/**
+ * Convert a glob pattern with `*` wildcards into a RegExp.
+ * Escape all regex meta-characters except `*`, which becomes `.*`.
+ * Anchored (^...$) and case-insensitive.
+ */
+export function patternToRegExp(pattern: string): RegExp {
+	const escaped = pattern.replace(/[.+?^${}()|[\]\\]/g, "\\$&");
+	return new RegExp(`${escaped.replace(/\*/g, ".*")}`, "i");
+}
+/**
+ * Does a model id match a single allowlist pattern?
+ * Semantics (in order):
+ *   1. Exact case-insensitive match.
+ *   2. Glob match (pattern contains `*`).
+ *   3. Case-insensitive substring match (pi's fallback).
+ * Returns true on first hit; false otherwise.
+ */
+export function matchesModelPattern(modelId: string, pattern: string): boolean {
+	if (!modelId || !pattern) return false;
+	const id = modelId.trim();
+	const pat = pattern.trim();
+	if (!id || !pat) return false;
+	if (id.toLowerCase() === pat.toLowerCase()) return true;
+	if (pat.includes("*")) {
+		try {
+			return patternToRegExp(pat).test(id);
+		} catch {
+			return false;
+		}
+	}
+	return id.toLowerCase().includes(pat.toLowerCase());
+}
+/**
+ * Is the model id accepted by ANY of the allowlist patterns?
+ * Returns false when patterns is empty/undefined (caller treats as "no scope").
+ */
+export function isModelInScope(modelId: string | undefined, patterns: readonly string[] | undefined): boolean {
+	if (!modelId || !patterns || patterns.length === 0) return false;
+	return patterns.some((p) => matchesModelPattern(modelId, p));
+}
+/**
+ * Check a model against the allowlist and return a verdict.
+ * Returns `inScope: true` with no `reason` when no allowlist is configured
+ * (so callers can no-op cleanly).
+ */
+export function checkModelScope(
+	modelId: string | undefined,
+	patterns: readonly string[] | undefined,
+	source: ModelScopeSource,
+): ModelScopeCheck {
+	if (!modelId) {
+		return { inScope: true, source, model: "", reason: "no model specified" };
+	}
+	if (!patterns || patterns.length === 0) {
+		// No allowlist → not enforcing. The toggle is opt-in; the user hasn't
+		// configured `enabledModels` so there is nothing to enforce against.
+		return { inScope: true, source, model: modelId };
+	}
+	for (const pattern of patterns) {
+		if (matchesModelPattern(modelId, pattern)) {
+			return { inScope: true, source, model: modelId, matchedPattern: pattern };
+		}
+	}
+	return {
+		inScope: false,
+		source,
+		model: modelId,
+		reason: `model "${modelId}" is not in enabledModels allowlist (${patterns.join(", ")})`,
+	};
+}
+/**
+ * Read the user's `enabledModels` allowlist from pi's SettingsManager.
+ * Returns an empty array when the SettingsManager export is unavailable, the
+ * allowlist is unset, or any error occurs (best-effort, never throws). The
+ * caller should still gate on `runtime.reliability.scopeModels` — an empty
+ * patterns array is a no-op (nothing to enforce against).
+ *
+ * @internal Only the runtime spawn layers should call this. Pure module: pure
+ * function over a cwd + optional agentDir.
+ */
+export async function readEnabledModelsPatterns(cwd: string, agentDir?: string): Promise<string[]> {
+	try {
+		// Match the pattern live-session-runtime.ts:428 uses to bridge to pi's
+		// SDK. SettingsManager is dynamically imported because the module
+		// shape differs across pi versions; the create() factory is the
+		// canonical, version-stable entry point.
+		const mod = await import("@earendil-works/pi-coding-agent" as string).catch(() => null);
+		if (!mod) return [];
+		const SettingsManagerCtor = (mod as { SettingsManager?: { create?: (cwd: string, agentDir?: string) => { getEnabledModels?: () => string[] | undefined } } }).SettingsManager;
+		if (!SettingsManagerCtor?.create) return [];
+		const sm = SettingsManagerCtor.create(cwd, agentDir);
+		const patterns = sm.getEnabledModels?.();
+		return Array.isArray(patterns) ? patterns : [];
+	} catch {
+		return [];
+	}
+}

package/src/runtime/pi-args.ts CHANGED Viewed

@@ -3,7 +3,7 @@ import * as os from "node:os";
 import * as path from "node:path";
 import { fileURLToPath } from "node:url";
 import type { AgentConfig } from "../agents/agent-config.ts";
-import { getAgentSessionOptions } from "../agents/agent-config.ts";
+import { resolveToolPolicy } from "../agents/agent-config.ts";
 import { userPiRoot } from "../utils/paths.ts";
 const THINKING_LEVELS = ["off", "minimal", "low", "medium", "high", "xhigh"];
@@ -257,10 +257,17 @@ export function buildPiWorkerArgs(input: BuildPiWorkerArgsInput): BuildPiWorkerA
 	}
 	// Apply role-based tool restrictions (from role-tools.ts)
-	// Role-specific config takes precedence over agent-defined tools
-	const toolConfig = input.role ? getAgentSessionOptions(input.role) : {};
-	const explicitTools = toolConfig.tools ?? input.agent.tools;
-	const excludeTools = toolConfig.excludeTools;
+	// F1 unify (v0.8.0): the tool policy is resolved by the shared
+	// `resolveToolPolicy` helper (same code as the live-session path), so the
+	// two spawn paths agree. Before this, child-pi used role-config
+	// authoritative and ignored `agent.disallowedTools`; live-session used
+	// frontmatter authoritative and ignored role-config. Now:
+	//   - allowlist precedence is source-aware (builtin → role authoritative;
+	//     user/project → frontmatter authoritative)
+	//   - denylist is additive (role excludeTools + agent disallowedTools merged)
+	const policy = resolveToolPolicy(input.agent, input.role);
+	const explicitTools = policy.tools;
+	const excludeTools = policy.excludeTools;
 	if (explicitTools?.length) args.push("--tools", explicitTools.join(","));
 	if (excludeTools?.length) args.push("--exclude-tools", excludeTools.join(","));
@@ -268,7 +275,15 @@ export function buildPiWorkerArgs(input: BuildPiWorkerArgsInput): BuildPiWorkerA
 	// User extensions in ~/.pi/agent/extensions/ may fail due to missing dependencies.
 	args.push("--no-extensions");
 	if (input.agent.extensions !== undefined) {
-		for (const extension of [PROMPT_RUNTIME_EXTENSION_PATH, ...input.agent.extensions]) args.push("--extension", extension);
+		// F1 (v0.7.9): apply `excludeExtensions` denylist (case-insensitive
+		// basename match) BEFORE the trusted PROMPT_RUNTIME_EXTENSION_PATH is
+		// prepended. The prompt-runtime is a pi-crew internal and is never
+		// excludable. Unknown names in the denylist are tolerated (logged
+		// would be nice but this path is sync and minimal — keeping parity
+		// with the rest of the agent loader's best-effort semantics).
+		const excluded = new Set((input.agent.excludeExtensions ?? []).map((name) => path.basename(name).toLowerCase()));
+		const allowed = input.agent.extensions.filter((ext) => !excluded.has(path.basename(ext).toLowerCase()));
+		for (const extension of [PROMPT_RUNTIME_EXTENSION_PATH, ...allowed]) args.push("--extension", extension);
 	} else {
 		args.push("--extension", PROMPT_RUNTIME_EXTENSION_PATH);
 	}

package/src/runtime/pi-spawn.ts CHANGED Viewed

@@ -1,5 +1,6 @@
 import * as fs from "node:fs";
 import * as os from "node:os";
+import { execSync } from "node:child_process";
 import { fileURLToPath } from "node:url";
 import * as path from "node:path";
@@ -118,6 +119,63 @@ function findPiPackageJsonFrom(startDir: string): string | undefined {
 	return undefined;
 }
+/**
+ * Discover the real npm global node_modules directory at runtime.
+ *
+ * Why this exists (Issue #33): on Windows, pi may be installed somewhere
+ * other than %APPDATA%\npm — e.g. nvm-windows puts the global node_modules
+ * under %NVM_HOME%/<version>/node_modules, Volta under
+ * %LOCALAPPDATA%\Volta, fnm under %LOCALAPPDATA%\fnm_multishells. The static
+ * %APPDATA%\npm paths in resolvePiCliScript() miss all of those, and the
+ * fallback spawn("pi") then fails with ENOENT because child_process.spawn does
+ * NOT do PATHEXT resolution on Windows (only exec/execSync via cmd.exe do).
+ *
+ * `npm root -g` is the canonical way to find the global node_modules dir and
+ * works across every npm-based install layout. We run it via execSync, which
+ * DOES resolve `npm.cmd` through PATHEXT. Capped at 5s; any failure (npm not
+ * on PATH, slow start, etc.) just falls through to the other resolution roots.
+ *
+ * Memoized: the npm global root does not change during a process lifetime, so
+ * this is a one-time ~200ms cost rather than per-worker.
+ *
+ * @internal — exported for unit-test injection via __setNpmGlobalRootForTest.
+ */
+let cachedNpmGlobalRoot: string | undefined | null = null;
+export function resolveNpmGlobalRoot(): string | undefined {
+	if (cachedNpmGlobalRoot !== null) {
+		return cachedNpmGlobalRoot ?? undefined;
+	}
+	let resolved: string | undefined;
+	try {
+		const out = execSync("npm root -g", {
+			encoding: "utf-8",
+			timeout: 5000,
+			stdio: ["pipe", "pipe", "pipe"], // suppress npm's stderr chatter
+			windowsHide: true,
+		}).trim();
+		resolved = out.length > 0 ? out : undefined;
+	} catch {
+		resolved = undefined;
+	}
+	cachedNpmGlobalRoot = resolved ?? null;
+	return resolved;
+}
+/**
+ * Given an npm global node_modules root, derive the candidate package dirs for
+ * each supported pi scope. Pure + exported so the mapping is unit-testable
+ * without spawning npm.
+ * @internal
+ */
+export function buildNpmGlobalPackageDirs(npmGlobalRoot: string): string[] {
+	return PI_PACKAGE_NAMES.map((pkgName) => path.join(npmGlobalRoot, ...pkgName.split("/")));
+}
+/** @internal — test hook: inject a fake global root (or undefined) and reset the memo. */
+export function __setNpmGlobalRootForTest(root: string | undefined): void {
+	cachedNpmGlobalRoot = root ?? null;
+}
 function resolvePiCliScript(): string | undefined {
 	const argv1 = process.argv[1];
 	if (argv1) {
@@ -125,8 +183,16 @@ function resolvePiCliScript(): string | undefined {
 		if (isRunnableNodeScript(argvPath)) return argvPath;
 	}
+	// npm-global package dirs derived from `npm root -g` — placed BEFORE the
+	// %APPDATA%\npm static paths and the cwd/import.meta fallbacks so that a pi
+	// install under nvm-windows / Volta / fnm is found even when %APPDATA%\npm
+	// doesn't contain it. Covers Issue #33.
+	const npmGlobalRoot = resolveNpmGlobalRoot();
+	const npmGlobalDirs = npmGlobalRoot ? buildNpmGlobalPackageDirs(npmGlobalRoot) : [];
 	const roots = [
 		resolvePiPackageRoot(),
+		...npmGlobalDirs,
 		process.env.APPDATA ? path.join(process.env.APPDATA, "npm", "node_modules", "@earendil-works", "pi-coding-agent") : undefined,
 		process.env.APPDATA ? path.join(process.env.APPDATA, "npm", "node_modules", "@mariozechner", "pi-coding-agent") : undefined,
 		path.dirname(fileURLToPath(import.meta.url)),