npm - @themoltnet/pi-extension - Versions diffs - 0.16.2 → 0.18.0 - Mend

@themoltnet/pi-extension 0.16.2 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/index.d.ts CHANGED Viewed

@@ -19,6 +19,7 @@ import { TObject } from '@sinclair/typebox';
 import { ToolDefinition } from '@earendil-works/pi-coding-agent';
 import { TOptional } from '@sinclair/typebox';
 import { TRecord } from '@sinclair/typebox';
+import { TSchema } from '@sinclair/typebox';
 import { TString } from '@sinclair/typebox';
 import { TUnion } from '@sinclair/typebox';
 import { TUnknown } from '@sinclair/typebox';
@@ -32,15 +33,19 @@ import { WriteOperations } from '@earendil-works/pi-coding-agent';
 export declare function activateAgentEnv(agentEnv: Record<string, string | undefined>, repoRoot: string): void;
 /**
- * Construct an in-memory `AgentSession`. The caller is responsible for
- * eventually invoking `session.prompt(...)` and for tearing down — the
- * helper does no lifecycle management beyond construction.
+ * Construct an `AgentSession`. By default it is in-memory; callers may opt
+ * parent sessions into daemon-owned file persistence via `sessionPersistence`.
+ * The caller is responsible for eventually invoking `session.prompt(...)` and
+ * for tearing down — the helper does no lifecycle management beyond
+ * construction.
  */
 export declare function buildAgentSession(args: BuildAgentSessionArgs): Promise<AgentSession>;
 declare interface BuildAgentSessionArgs {
     /** Host directory mounted at /workspace inside the VM. */
     mountPath: string;
+    /** Host working directory where the agent session should start. */
+    cwdPath: string;
     /** pi auth directory (resolved from `PI_CODING_AGENT_DIR` or `~/.pi/agent`). */
     piAuthDir: string;
     /** Resolved pi model handle (provider + model id). */
@@ -56,6 +61,13 @@ declare interface BuildAgentSessionArgs {
     otelSpanAttrs: Record<string, string | number | boolean>;
     /** Agent name for `gen_ai.agent.name` on the root span. */
     agentName: string;
+    /**
+     * Parent sessions may persist their conversation history in a daemon-owned
+     * directory. Subagents should leave this unset and stay in-memory.
+     */
+    sessionPersistence?: {
+        sessionDir: string;
+    };
 }
 declare interface ClaimedTask {
@@ -124,6 +136,8 @@ export declare function createSubagentTool(args: CreateSubagentToolArgs): Subage
 export declare interface CreateSubagentToolArgs {
     /** Host directory mounted at /workspace inside the VM. */
     mountPath: string;
+    /** Host working directory the subagent should start in. Defaults to mountPath. */
+    cwdPath?: string;
     /** pi auth directory the parent resolved. */
     piAuthDir: string;
     /** Resolved pi model handle — subagents share it. */
@@ -179,6 +193,16 @@ export declare interface CreateSubagentToolArgs {
      * exercise the tool's logic without booting a VM.
      */
     buildAgentSession?: (args: BuildAgentSessionArgs) => Promise<AgentSession>;
+    /**
+     * Contract registry for resolving output_schema names to TypeBox
+     * schemas at call time. The subagent tool reads ONLY via `.get()`
+     * and `.list()` — the registry is immutable after construction.
+     *
+     * Production callers (executePiTask) create the registry with
+     * built-in contracts at session-setup; tests inject a registry
+     * with whatever stubs they need.
+     */
+    contractRegistry: SubagentContractRegistry;
 }
 /**
@@ -231,6 +255,17 @@ export declare interface ExecutePiTaskOptions {
      * across tasks.
      */
     checkpointPath?: string;
+    /**
+     * Lazy checkpoint resolver used by `createPiTaskExecutor` so snapshot
+     * creation can happen after the reporter has been opened and can surface
+     * setup failures as task messages.
+     */
+    resolveCheckpointPath?: () => Promise<string>;
+    /**
+     * Set when the caller already opened the reporter before handing control
+     * to `executePiTask`.
+     */
+    reporterAlreadyOpened?: boolean;
     /**
      * Optional callback invoked alongside every `reporter.record()` so
      * the daemon can mirror task messages into its local logger.
@@ -277,6 +312,19 @@ export declare interface ExecutePiTaskOptions {
      * after HOST_EXEC_ALLOWED; an array limits auto-approval to matching rules.
      */
     hostExecAutoApprove?: HostExecAutoApproveConfig;
+    /**
+     * Optional daemon-supplied execution plan. Keeps task semantics out of
+     * `pi-extension` while still letting callers opt into stable worktrees and
+     * file-backed Pi sessions for selected task classes.
+     */
+    makeExecutionPlan?: PiTaskExecutionPlanFactory;
+    /**
+     * Immutable subagent contract registry used to resolve `output_schema`
+     * names at subagent tool call time. Constructed by the daemon (or
+     * tests) from static built-in schemas — `execute-pi-task` never hardcodes
+     * contracts. See #1106.
+     */
+    subagentContractRegistry?: SubagentContractRegistry;
 }
 /**
@@ -415,6 +463,42 @@ export declare interface PiOtelOptions {
     spanAttributes?: Record<string, string | number | boolean>;
 }
+export declare interface PiSessionPersistencePlan {
+    sessionDir: string;
+}
+export declare interface PiTaskExecutionPlan {
+    /**
+     * Daemon-local reuse key. When set alongside `workspaceScope: 'session'`,
+     * dedicated worktrees may be retained and reopened across related tasks.
+     */
+    sessionKey: string | null;
+    /**
+     * Workspace identity selected by the daemon. `null` means the task should
+     * run against the shared mount path.
+     */
+    workspaceId: string | null;
+    /**
+     * Branch to create or reopen for the workspace. `null` means no dedicated
+     * worktree is required.
+     */
+    worktreeBranch: string | null;
+    /**
+     * Lifetime of the task workspace from the daemon's point of view.
+     * `attempt` = disposable; `session` = keep stable for the reuse key.
+     */
+    workspaceScope: 'attempt' | 'session';
+    /**
+     * Optional location for file-backed Pi session history. When omitted,
+     * the executor keeps the conversation in memory for this attempt only.
+     */
+    sessionPersistence?: PiSessionPersistencePlan | null;
+}
+export declare type PiTaskExecutionPlanFactory = (claimedTask: ClaimedTask) => PiTaskExecutionPlan | null;
+export declare function resolveTaskWorktreePath(mainRepo: string, workspaceId: string): string;
 /**
  * Resume a VM from a checkpoint, inject credentials, configure egress +
  * TLS. Returns the managed VM handle.
@@ -476,6 +560,29 @@ export declare interface SandboxConfig {
 /** Extract snapshot-specific config for backwards compat with ensureSnapshot. */
 export declare type SnapshotConfig = NonNullable<SandboxConfig['snapshot']>;
+declare interface SubagentContractRegistry {
+    /** Resolve a contract by name. Returns `null` for unknown names. */
+    get(name: string): SubagentOutputContract | null;
+    /** List all registered contracts. */
+    list(): SubagentOutputContract[];
+}
+declare interface SubagentOutputContract {
+    /** Stable identifier the parent uses to reference this contract.
+     *  Lower-snake-case by convention (e.g. `judge_eval_variant_result`). */
+    readonly name: string;
+    /** Human-readable description shown in the subagent tool's help text
+     *  and in the inner session's submit-tool description. Useful when a
+     *  parent LLM has multiple contracts to choose from. */
+    readonly description: string;
+    /**
+     * TypeBox schema the subagent's submit-tool args MUST validate
+     * against. The args ARE the output payload (no `{ output: ... }`
+     * wrapping), so the LLM gets field-level guidance directly.
+     */
+    readonly parametersSchema: TSchema;
+}
 export declare interface SubagentToolHandle {
     /** ToolDefinition to register via `customTools` on the parent session. */
     readonly tool: ToolDefinition;
@@ -718,9 +825,10 @@ export declare interface VmCredentials {
     agentEnvRaw: string;
     /**
      * Pi OAuth/API-key auth blob. Null when neither `~/.pi/agent/auth.json`
-     * (or its `PI_AUTH_PATH` override) is present — in that case the daemon
-     * relies on Pi's env-var providers (`ANTHROPIC_API_KEY`, etc.) carried
-     * via `agentEnv` and the host environment instead. CI uses this path.
+     * (resolved via `PI_CODING_AGENT_DIR` when set) is present — in that
+     * case the daemon relies on Pi's env-var providers (`ANTHROPIC_API_KEY`,
+     * etc.) carried via `agentEnv` and the host environment instead. CI uses
+     * this path.
      */
     piAuthJson: string | null;
     agentEnv: Record<string, string | undefined>;

package/dist/index.js CHANGED Viewed

@@ -8133,7 +8133,8 @@ function findMainWorktree() {
 function loadCredentials(agentDir) {
 	const moltnetJson = readFileSync(path.join(agentDir, "moltnet.json"), "utf8");
 	const agentEnvRaw = readFileSync(path.join(agentDir, "env"), "utf8");
-	const piAuthPath = process.env.PI_AUTH_PATH ?? path.join(process.env.HOME ?? "", ".pi", "agent", "auth.json");
+	const piAgentDir = process.env.PI_CODING_AGENT_DIR ?? path.join(process.env.HOME ?? "", ".pi", "agent");
+	const piAuthPath = path.join(piAgentDir, "auth.json");
 	const piAuthJson = existsSync(piAuthPath) ? readFileSync(piAuthPath, "utf8") : null;
 	const gitconfigPath = path.join(agentDir, "gitconfig");
 	const gitconfig = existsSync(gitconfigPath) ? readFileSync(gitconfigPath, "utf8") : null;
@@ -8633,9 +8634,11 @@ var NO_SKILLS = () => ({
 	diagnostics: []
 });
 /**
-* Construct an in-memory `AgentSession`. The caller is responsible for
-* eventually invoking `session.prompt(...)` and for tearing down — the
-* helper does no lifecycle management beyond construction.
+* Construct an `AgentSession`. By default it is in-memory; callers may opt
+* parent sessions into daemon-owned file persistence via `sessionPersistence`.
+* The caller is responsible for eventually invoking `session.prompt(...)` and
+* for tearing down — the helper does no lifecycle management beyond
+* construction.
 */
 async function buildAgentSession(args) {
 	const piOtelExtension = createPiOtelExtension({
@@ -8643,22 +8646,85 @@ async function buildAgentSession(args) {
 		spanAttributes: args.otelSpanAttrs
 	});
 	const resourceLoader = new DefaultResourceLoader({
-		cwd: args.mountPath,
+		cwd: args.cwdPath,
 		agentDir: args.piAuthDir,
 		extensionFactories: [piOtelExtension],
 		appendSystemPrompt: args.appendSystemPrompt,
 		skillsOverride: args.skillsOverride ?? NO_SKILLS
 	});
 	await resourceLoader.reload();
+	const sessionManager = args.sessionPersistence ? await resolvePersistentSessionManager({
+		cwd: args.cwdPath,
+		sessionDir: args.sessionPersistence.sessionDir
+	}) : SessionManager.inMemory(args.cwdPath);
 	return (await createAgentSession({
 		agentDir: args.piAuthDir,
-		cwd: args.mountPath,
+		cwd: args.cwdPath,
 		model: args.modelHandle,
 		customTools: args.customTools,
-		sessionManager: SessionManager.inMemory(),
+		sessionManager,
 		resourceLoader
 	})).session;
 }
+async function resolvePersistentSessionManager(args) {
+	await SessionManager.list(args.cwd, args.sessionDir);
+	return SessionManager.continueRecent(args.cwd, args.sessionDir);
+}
+//#endregion
+//#region ../agent-runtime/src/context-bindings.ts
+var PROMPT_SEPARATOR = "\n\n---\n\n";
+/**
+* Resolve `task.input.context[]` into delivered side-effects (skills
+* persisted via `deliver.skill`) and prompt fragments
+* (`systemPromptPrefix`, `userInlineSuffix`) the caller weaves into the
+* built prompt.
+*
+* Per-binding semantics (V1):
+*   - `skill`         → `deliver.skill({ slug, content })` once per ref.
+*                       Slug collisions on distinct contents are
+*                       refused loudly.
+*   - `prompt_prefix` → content appended to `systemPromptPrefix` with
+*                       the canonical `\n\n---\n\n` separator (in
+*                       declared order).
+*   - `user_inline`   → content appended to `userInlineSuffix` in
+*                       declared order, same separator.
+*
+* No fetching, no hashing — bytes are inlined in `ContextRef.content`,
+* and the task's `inputCid` already pins the entire input. The imposer
+* chose these bytes; the resolver just dispatches them.
+*
+* The function is pure with respect to its arguments: file writes are
+* confined to the injected `deliver` callback, which makes the
+* resolver trivial to test.
+*/
+async function resolveTaskContext(args) {
+	const promptParts = [];
+	const userParts = [];
+	const injected = [];
+	const usedSlugs = /* @__PURE__ */ new Map();
+	for (const ref of args.context) {
+		if (ref.binding === "skill") {
+			const prior = usedSlugs.get(ref.slug);
+			if (prior !== void 0) {
+				if (prior !== ref.content) throw new Error(`slug collision on '${ref.slug}': two skill entries share the same slug but have different content`);
+				injected.push(ref);
+				continue;
+			}
+			usedSlugs.set(ref.slug, ref.content);
+			await args.deliver.skill({
+				slug: ref.slug,
+				content: ref.content
+			});
+		} else if (ref.binding === "prompt_prefix") promptParts.push(ref.content);
+		else userParts.push(ref.content);
+		injected.push(ref);
+	}
+	return {
+		injected,
+		systemPromptPrefix: promptParts.join(PROMPT_SEPARATOR),
+		userInlineSuffix: userParts.join(PROMPT_SEPARATOR)
+	};
+}
 //#endregion
 //#region ../tasks/src/formats.ts
 /**
@@ -8831,10 +8897,10 @@ function validateRubricWeights(rubric) {
 * complementary places.
 *
 * Before this envelope existed, criteria were scattered: a vestigial
-* `criteriaCid` column nobody resolved, an `acceptanceCriteria: string[]`
-* field on `fulfill_brief.input` that was "interpreted by the claiming
-* agent," and inline `rubric` / `criteria[]` fields on judgment-task
-* inputs. None of those were machine-verifiable end-to-end.
+* `criteriaCid` column nobody resolved, free-form prose on
+* `fulfill_brief.input`, and inline `rubric` / `criteria[]` fields on
+* judgment-task inputs. None of those were machine-verifiable
+* end-to-end.
 *
 * This module defines a single, content-addressable envelope an imposer
 * attaches to any task type. It has four orthogonal sections — pick
@@ -9130,7 +9196,6 @@ var FULFILL_BRIEF_TYPE = "fulfill_brief";
 var FulfillBriefInput = Type$1.Object({
 	brief: Type$1.String({ minLength: 1 }),
 	title: Type$1.Optional(Type$1.String()),
-	acceptanceCriteria: Type$1.Optional(Type$1.Array(Type$1.String())),
 	successCriteria: Type$1.Optional(SuccessCriteria),
 	seedFiles: Type$1.Optional(Type$1.Array(Type$1.String())),
 	scopeHint: Type$1.Optional(Type$1.String())
@@ -9702,7 +9767,10 @@ var BUILT_IN_TASK_TYPES = {
 		inputSchema: FulfillBriefInput,
 		outputSchema: FulfillBriefOutput,
 		outputKind: "artifact",
+		resumable: true,
 		workspaceMode: "dedicated_worktree",
+		workspaceScope: "session",
+		sessionScope: "correlation",
 		requiresReferences: false,
 		validateOutput: requireVerificationWhenCriteriaPresent
 	},
@@ -9712,6 +9780,8 @@ var BUILT_IN_TASK_TYPES = {
 		outputSchema: AssessBriefOutput,
 		outputKind: "judgment",
 		workspaceMode: "dedicated_worktree",
+		workspaceScope: "attempt",
+		sessionScope: "none",
 		requiresReferences: true,
 		validateInput: validateJudgmentInput,
 		validateInputAsync: validateAssessBriefInputAsync
@@ -9721,6 +9791,8 @@ var BUILT_IN_TASK_TYPES = {
 		inputSchema: CuratePackInput,
 		outputSchema: CuratePackOutput,
 		outputKind: "artifact",
+		workspaceScope: "attempt",
+		sessionScope: "none",
 		requiresReferences: false,
 		validateOutput: requireVerificationWhenCriteriaPresent
 	},
@@ -9729,6 +9801,8 @@ var BUILT_IN_TASK_TYPES = {
 		inputSchema: RenderPackInput,
 		outputSchema: RenderPackOutput,
 		outputKind: "artifact",
+		workspaceScope: "attempt",
+		sessionScope: "none",
 		requiresReferences: false,
 		validateOutput: requireVerificationWhenCriteriaPresent,
 		validateInputAsync: validateRenderPackInputAsync
@@ -9738,6 +9812,8 @@ var BUILT_IN_TASK_TYPES = {
 		inputSchema: JudgePackInput,
 		outputSchema: JudgePackOutput,
 		outputKind: "judgment",
+		workspaceScope: "attempt",
+		sessionScope: "none",
 		requiresReferences: true,
 		validateInput: validateJudgmentInput,
 		validateOutput: validateJudgePackOutput,
@@ -9748,6 +9824,8 @@ var BUILT_IN_TASK_TYPES = {
 		inputSchema: RunEvalInput,
 		outputSchema: RunEvalOutput,
 		outputKind: "artifact",
+		workspaceScope: "attempt",
+		sessionScope: "custom",
 		requiresReferences: false,
 		validateOutput: validateRunEvalOutput
 	},
@@ -9756,6 +9834,8 @@ var BUILT_IN_TASK_TYPES = {
 		inputSchema: JudgeEvalVariantInput,
 		outputSchema: JudgeEvalVariantOutput,
 		outputKind: "judgment",
+		workspaceScope: "attempt",
+		sessionScope: "custom",
 		requiresReferences: false,
 		validateInput: validateJudgeEvalVariantInput,
 		validateOutput: validateJudgeEvalVariantOutput,
@@ -9825,15 +9905,6 @@ function getTaskOutputSchema(taskType) {
 function taskTypeUsesSubagents(taskType) {
 	return getTaskTypeEntry(taskType)?.usesSubagents === true;
 }
-/**
-* Filesystem isolation policy requested by the task type.
-*
-* Unknown task types and task types without an explicit policy default to the
-* legacy/shared behaviour.
-*/
-function taskTypeWorkspaceMode(taskType) {
-	return getTaskTypeEntry(taskType)?.workspaceMode ?? "shared_mount";
-}
 //#endregion
 //#region ../tasks/src/wire.ts
 /**
@@ -10070,133 +10141,6 @@ Type$1.Object({
 	additionalProperties: false
 });
 //#endregion
-//#region ../agent-runtime/src/subagent-output-contracts.ts
-var REGISTRY = /* @__PURE__ */ new Map();
-/**
-* Register a subagent output contract. Idempotent: re-registering the
-* same name with a different schema throws — contracts are meant to
-* be stable. Re-registering with the identical contract object (same
-* reference) is a no-op for HMR and test convenience.
-*
-* Typically called at module-init time alongside task-type
-* registration. See task-types/index.ts in @moltnet/tasks for the
-* conventional pattern.
-*/
-function registerSubagentOutputContract(contract) {
-	if (!contract.name || contract.name.trim().length === 0) throw new Error("subagent output contract name is required");
-	if (!/^[a-z][a-z0-9_]*$/.test(contract.name)) throw new Error(`subagent output contract name '${contract.name}' must be lower_snake_case (starts with a letter, then [a-z0-9_]+)`);
-	const existing = REGISTRY.get(contract.name);
-	if (existing && existing !== contract) {
-		if (existing.parametersSchema !== contract.parametersSchema) throw new Error(`subagent output contract '${contract.name}' is already registered with a different schema; refusing to override`);
-	}
-	REGISTRY.set(contract.name, contract);
-}
-/**
-* Resolve a subagent output contract by name. Returns `null` for
-* unknown names — callers (the subagent custom tool) decide whether
-* that's a tool error the parent LLM can recover from or a hard fail.
-*/
-function getSubagentOutputContract(name) {
-	return REGISTRY.get(name) ?? null;
-}
-/**
-* List all registered contracts. Useful for diagnostics and for the
-* subagent tool's parameter description so a parent LLM can see what
-* contracts are available without enumerating them in its prompt.
-*/
-function listSubagentOutputContracts() {
-	return [...REGISTRY.values()];
-}
-//#endregion
-//#region ../agent-runtime/src/built-in-contract-registrations.ts
-/**
-* Built-in subagent output contracts (#1087, #943).
-*
-* Why this is an exported function and not a module-init side
-* effect:
-*
-*   - The registry is process-global. Module-init registration
-*     fires exactly once per Node process (ESM modules are cached
-*     by URL). Tests that call `__resetSubagentOutputContractsForTests()`
-*     to start from an empty registry have no way to repopulate
-*     the built-ins without re-evaluating the module — which the
-*     cache prevents. PR #1101 review M4.
-*   - An explicit `registerBuiltInSubagentContracts()` lets the
-*     package index call it once at module load AND lets test
-*     setup hooks call it again after `__reset...`.
-*   - `registerSubagentOutputContract` is itself idempotent for
-*     identical re-registrations, so calling this function twice
-*     in the same process is safe.
-*
-* Adding a new built-in: extend the body of this function. Do not
-* call `registerSubagentOutputContract` from anywhere else in the
-* package — keeping all built-ins in one function makes the set
-* auditable.
-*/
-function registerBuiltInSubagentContracts() {
-	registerSubagentOutputContract({
-		name: "judge_eval_variant_result",
-		description: "Per-variant grading result produced by a subagent of judge_eval_variant: scores against the shared rubric, composite, and a 1-3 sentence verdict for a single variant.",
-		parametersSchema: JudgeEvalVariantResult
-	});
-}
-registerBuiltInSubagentContracts();
-//#endregion
-//#region ../agent-runtime/src/context-bindings.ts
-var PROMPT_SEPARATOR = "\n\n---\n\n";
-/**
-* Resolve `task.input.context[]` into delivered side-effects (skills
-* persisted via `deliver.skill`) and prompt fragments
-* (`systemPromptPrefix`, `userInlineSuffix`) the caller weaves into the
-* built prompt.
-*
-* Per-binding semantics (V1):
-*   - `skill`         → `deliver.skill({ slug, content })` once per ref.
-*                       Slug collisions on distinct contents are
-*                       refused loudly.
-*   - `prompt_prefix` → content appended to `systemPromptPrefix` with
-*                       the canonical `\n\n---\n\n` separator (in
-*                       declared order).
-*   - `user_inline`   → content appended to `userInlineSuffix` in
-*                       declared order, same separator.
-*
-* No fetching, no hashing — bytes are inlined in `ContextRef.content`,
-* and the task's `inputCid` already pins the entire input. The imposer
-* chose these bytes; the resolver just dispatches them.
-*
-* The function is pure with respect to its arguments: file writes are
-* confined to the injected `deliver` callback, which makes the
-* resolver trivial to test.
-*/
-async function resolveTaskContext(args) {
-	const promptParts = [];
-	const userParts = [];
-	const injected = [];
-	const usedSlugs = /* @__PURE__ */ new Map();
-	for (const ref of args.context) {
-		if (ref.binding === "skill") {
-			const prior = usedSlugs.get(ref.slug);
-			if (prior !== void 0) {
-				if (prior !== ref.content) throw new Error(`slug collision on '${ref.slug}': two skill entries share the same slug but have different content`);
-				injected.push(ref);
-				continue;
-			}
-			usedSlugs.set(ref.slug, ref.content);
-			await args.deliver.skill({
-				slug: ref.slug,
-				content: ref.content
-			});
-		} else if (ref.binding === "prompt_prefix") promptParts.push(ref.content);
-		else userParts.push(ref.content);
-		injected.push(ref);
-	}
-	return {
-		injected,
-		systemPromptPrefix: promptParts.join(PROMPT_SEPARATOR),
-		userInlineSuffix: userParts.join(PROMPT_SEPARATOR)
-	};
-}
-//#endregion
 //#region ../agent-runtime/src/output-tools.ts
 /**
 * Submit-output tool contract.
@@ -10601,13 +10545,7 @@ function buildCuratePackUserPrompt(input, ctx) {
 * is told to inspect them itself.
 */
 function buildFulfillBriefUserPrompt(input, ctx) {
-	const { brief, title, acceptanceCriteria, seedFiles, scopeHint } = input;
-	const criteriaSection = acceptanceCriteria?.length ? [
-		"### Acceptance criteria",
-		"",
-		...acceptanceCriteria.map((c) => `- ${c}`),
-		""
-	].join("\n") : "";
+	const { brief, title, seedFiles, scopeHint } = input;
 	const seedSection = seedFiles?.length ? [
 		"### Seed files",
 		"",
@@ -10655,7 +10593,6 @@ function buildFulfillBriefUserPrompt(input, ctx) {
 		"",
 		brief,
 		"",
-		criteriaSection,
 		seedSection,
 		correlationSection,
 		workspaceSection,
@@ -14822,6 +14759,7 @@ var DEFAULT_SUBAGENT_TIMEOUT_MS = 300 * 1e3;
 */
 function createSubagentTool(args) {
 	const buildSession = args.buildAgentSession ?? buildAgentSession;
+	const { contractRegistry } = args;
 	let callCount = 0;
 	return {
 		tool: defineTool({
@@ -14832,8 +14770,8 @@ function createSubagentTool(args) {
 			async execute(_id, params) {
 				if (!Value.Check(SubagentToolParameters, params)) return toolError(`subagent: invalid parameters: ${JSON.stringify([...Value.Errors(SubagentToolParameters, params)].slice(0, 3))}`);
 				const { task, output_schema } = params;
-				const contract = getSubagentOutputContract(output_schema);
-				if (!contract) return toolError(`subagent: unknown output_schema "${output_schema}". Registered contracts: [${listSubagentOutputContracts().map((c) => c.name).join(", ")}]`);
+				const contract = contractRegistry.get(output_schema);
+				if (!contract) return toolError(`subagent: unknown output_schema "${output_schema}". Registered contracts: [${contractRegistry.list().map((c) => c.name).join(", ")}]`);
 				callCount += 1;
 				const callIndex = callCount;
 				let captured = null;
@@ -14863,6 +14801,7 @@ function createSubagentTool(args) {
 				});
 				const session = await buildSession({
 					mountPath: args.mountPath,
+					cwdPath: args.cwdPath ?? args.mountPath,
 					piAuthDir: args.piAuthDir,
 					modelHandle: args.modelHandle,
 					agentName: args.agentName,
@@ -15198,6 +15137,115 @@ function resolveSubmitTools(taskType, opts = {}) {
 	};
 }
 //#endregion
+//#region src/runtime/task-workspace.ts
+function prepareTaskWorkspace(task, requestedMountPath, executionPlan) {
+	const branch = executionPlan?.worktreeBranch ?? null;
+	if (!branch) return {
+		mountPath: requestedMountPath,
+		cwdPath: requestedMountPath,
+		mode: "shared_mount",
+		branch: null,
+		cleanup: () => {}
+	};
+	const mainRepo = findMainWorktree();
+	const worktreeDir = resolveTaskWorktreePath(mainRepo, executionPlan?.workspaceId ?? `task-${task.id}`);
+	const relMount = relative(mainRepo, requestedMountPath);
+	const cwdPath = relMount === "" || relMount.startsWith("..") ? worktreeDir : join(worktreeDir, relMount);
+	const keepWorkspace = executionPlan?.workspaceScope === "session" && executionPlan.sessionKey !== null;
+	if (keepWorkspace) ensureReusableTaskWorktree(mainRepo, worktreeDir, branch);
+	else {
+		removeExistingTaskWorktree(mainRepo, worktreeDir);
+		addTaskWorktree(mainRepo, worktreeDir, branch);
+	}
+	return {
+		mountPath: mainRepo,
+		cwdPath,
+		mode: "dedicated_worktree",
+		branch,
+		cleanup: keepWorkspace ? () => {} : () => {
+			execFileSync("git", [
+				"-C",
+				mainRepo,
+				"worktree",
+				"remove",
+				"--force",
+				worktreeDir
+			], { stdio: "pipe" });
+		}
+	};
+}
+function resolveTaskWorktreePath(mainRepo, workspaceId) {
+	return join(mainRepo, ".worktrees", workspaceId);
+}
+function ensureReusableTaskWorktree(mainRepo, worktreeDir, branch) {
+	if (isRegisteredWorktree(mainRepo, worktreeDir)) return;
+	if (existsSync(worktreeDir)) throw new Error(`Expected reusable worktree ${worktreeDir} to be git-managed, but it exists outside git worktree metadata.`);
+	addTaskWorktree(mainRepo, worktreeDir, branch);
+}
+function addTaskWorktree(mainRepo, worktreeDir, branch) {
+	const baseRef = resolveWorktreeBaseRef(mainRepo);
+	execFileSync("git", gitRefExists(mainRepo, `refs/heads/${branch}`) ? [
+		"-C",
+		mainRepo,
+		"worktree",
+		"add",
+		worktreeDir,
+		branch
+	] : [
+		"-C",
+		mainRepo,
+		"worktree",
+		"add",
+		"-b",
+		branch,
+		worktreeDir,
+		baseRef
+	], { stdio: "pipe" });
+}
+function removeExistingTaskWorktree(mainRepo, worktreeDir) {
+	if (!existsSync(worktreeDir) || !isRegisteredWorktree(mainRepo, worktreeDir)) return;
+	execFileSync("git", [
+		"-C",
+		mainRepo,
+		"worktree",
+		"remove",
+		"--force",
+		worktreeDir
+	], { stdio: "pipe" });
+}
+function isRegisteredWorktree(mainRepo, worktreeDir) {
+	const list = execFileSync("git", [
+		"-C",
+		mainRepo,
+		"worktree",
+		"list",
+		"--porcelain"
+	], {
+		encoding: "utf8",
+		stdio: "pipe"
+	});
+	const marker = `worktree ${worktreeDir}\n`;
+	return list.includes(marker) || list.endsWith(`worktree ${worktreeDir}`);
+}
+function resolveWorktreeBaseRef(mainRepo) {
+	return gitRefExists(mainRepo, "refs/heads/main") ? "main" : "HEAD";
+}
+function gitRefExists(mainRepo, ref) {
+	try {
+		execFileSync("git", [
+			"-C",
+			mainRepo,
+			"show-ref",
+			"--verify",
+			"--quiet",
+			ref
+		], { stdio: "pipe" });
+		return true;
+	} catch {
+		return false;
+	}
+}
+//#endregion
 //#region src/runtime/execute-pi-task.ts
 /**
 * executePiTask — run a single Task attempt using pi-coding-agent inside a
@@ -15224,15 +15272,24 @@ var noopTurnEventHandler = () => {};
 function createPiTaskExecutor(opts) {
 	let cachedCheckpoint = opts.checkpointPath ?? null;
 	return async (claimedTask, reporter) => {
-		if (!cachedCheckpoint) cachedCheckpoint = await ensureSnapshot({
-			config: opts.sandboxConfig?.snapshot,
-			onProgress: opts.onSnapshotProgress ?? ((m) => {
-				process.stderr.write(`[snapshot] ${m}\n`);
-			})
+		const reporterWasOpened = !reporter.cancelSignal.aborted;
+		if (reporterWasOpened) await reporter.open({
+			taskId: claimedTask.task.id,
+			attemptN: claimedTask.attemptN
 		});
 		return executePiTask(claimedTask, reporter, {
 			...opts,
-			checkpointPath: cachedCheckpoint
+			checkpointPath: cachedCheckpoint ?? void 0,
+			resolveCheckpointPath: async () => {
+				if (!cachedCheckpoint) cachedCheckpoint = await ensureSnapshot({
+					config: opts.sandboxConfig?.snapshot,
+					onProgress: opts.onSnapshotProgress ?? ((m) => {
+						process.stderr.write(`[snapshot] ${m}\n`);
+					})
+				});
+				return cachedCheckpoint;
+			},
+			reporterAlreadyOpened: reporterWasOpened
 		});
 	};
 }
@@ -15246,8 +15303,11 @@ async function executePiTask(claimedTask, reporter, opts) {
 	const task = claimedTask.task;
 	const attemptN = claimedTask.attemptN;
 	const startTime = Date.now();
-	const workspace = prepareTaskWorkspace(task, opts.mountPath ?? process.cwd());
-	const mountPath = workspace.mountPath;
+	const requestedMountPath = opts.mountPath ?? process.cwd();
+	const executionPlan = opts.makeExecutionPlan?.(claimedTask) ?? null;
+	let workspace = null;
+	let mountPath = requestedMountPath;
+	let cwdPath = requestedMountPath;
 	if (reporter.cancelSignal.aborted) return {
 		taskId: task.id,
 		attemptN,
@@ -15262,33 +15322,8 @@ async function executePiTask(claimedTask, reporter, opts) {
 			retryable: false
 		}
 	};
-	const checkpointPath = opts.checkpointPath ?? await ensureSnapshot({
-		config: opts.sandboxConfig?.snapshot,
-		onProgress: opts.onSnapshotProgress ?? ((m) => {
-			process.stderr.write(`[snapshot] ${m}\n`);
-		})
-	});
-	const mainRepoForRepair = findMainWorktree();
-	try {
-		execFileSync("git", [
-			"-C",
-			mainRepoForRepair,
-			"worktree",
-			"repair",
-			"--relative-paths"
-		], { stdio: "pipe" });
-	} catch {}
+	let reporterOpen = opts.reporterAlreadyOpened ?? false;
 	let managed = null;
-	managed = await resumeVm({
-		checkpointPath,
-		agentName: opts.agentName,
-		mountPath,
-		extraAllowedHosts: opts.extraAllowedHosts,
-		sandboxConfig: opts.sandboxConfig
-	});
-	const diaryId = task.diaryId ?? "";
-	const taskTeamId = task.teamId ?? "";
-	let reporterOpen = false;
 	let session = null;
 	let subagentHandle = null;
 	const finalUsage = emptyUsage(opts.provider, opts.model);
@@ -15307,41 +15342,103 @@ async function executePiTask(claimedTask, reporter, opts) {
 			retryable: false
 		}
 	});
+	let onTurnEvent;
+	if (opts.makeOnTurnEvent) try {
+		onTurnEvent = opts.makeOnTurnEvent(claimedTask);
+	} catch (err) {
+		process.stderr.write(`[emit] makeOnTurnEvent threw: ${err instanceof Error ? err.message : String(err)}\n`);
+		onTurnEvent = noopTurnEventHandler;
+	}
+	else onTurnEvent = opts.onTurnEvent ?? noopTurnEventHandler;
+	const emit = (kind, payload) => {
+		try {
+			onTurnEvent(kind, summarizePayloadForLog(kind, payload));
+		} catch (err) {
+			process.stderr.write(`[emit] onTurnEvent threw for kind="${kind}": ${err instanceof Error ? err.message : String(err)}\n`);
+		}
+		return reporter.record({
+			kind,
+			payload
+		});
+	};
+	const emitError = async (phase, message, extra = {}) => {
+		await emit("error", {
+			phase,
+			message,
+			...extra
+		});
+	};
 	try {
-		const mainRepo = findMainWorktree();
-		activateAgentEnv(managed.credentials.agentEnv, mainRepo);
-		await reporter.open({
+		if (!opts.reporterAlreadyOpened) await reporter.open({
 			taskId: task.id,
 			attemptN
 		});
 		reporterOpen = true;
-		let onTurnEvent;
-		if (opts.makeOnTurnEvent) try {
-			onTurnEvent = opts.makeOnTurnEvent(claimedTask);
+		let checkpointPath;
+		try {
+			checkpointPath = opts.checkpointPath ?? (opts.resolveCheckpointPath ? await opts.resolveCheckpointPath() : await ensureSnapshot({
+				config: opts.sandboxConfig?.snapshot,
+				onProgress: opts.onSnapshotProgress ?? ((m) => {
+					process.stderr.write(`[snapshot] ${m}\n`);
+				})
+			}));
 		} catch (err) {
-			process.stderr.write(`[emit] makeOnTurnEvent threw: ${err instanceof Error ? err.message : String(err)}\n`);
-			onTurnEvent = noopTurnEventHandler;
+			const message = err instanceof Error ? err.message : String(err);
+			await emitError("snapshot", message);
+			return makeFailedOutput("snapshot_failed", message);
 		}
-		else onTurnEvent = opts.onTurnEvent ?? noopTurnEventHandler;
-		const emit = (kind, payload) => {
+		try {
+			workspace = prepareTaskWorkspace(task, requestedMountPath, executionPlan);
+			mountPath = workspace.mountPath;
+			cwdPath = workspace.cwdPath;
+		} catch (err) {
+			const message = err instanceof Error ? err.message : String(err);
+			await emitError("worktree_setup", message);
+			return makeFailedOutput("worktree_setup_failed", message);
+		}
+		try {
+			const mainRepoForRepair = findMainWorktree();
 			try {
-				onTurnEvent(kind, summarizePayloadForLog(kind, payload));
-			} catch (err) {
-				process.stderr.write(`[emit] onTurnEvent threw for kind="${kind}": ${err instanceof Error ? err.message : String(err)}\n`);
-			}
-			return reporter.record({
-				kind,
-				payload
+				execFileSync("git", [
+					"-C",
+					mainRepoForRepair,
+					"worktree",
+					"repair",
+					"--relative-paths"
+				], { stdio: "pipe" });
+			} catch {}
+		} catch (err) {
+			const message = err instanceof Error ? err.message : String(err);
+			await emitError("worktree_setup", message);
+			return makeFailedOutput("worktree_setup_failed", message);
+		}
+		try {
+			managed = await resumeVm({
+				checkpointPath,
+				agentName: opts.agentName,
+				mountPath,
+				extraAllowedHosts: opts.extraAllowedHosts,
+				sandboxConfig: opts.sandboxConfig
 			});
-		};
+		} catch (err) {
+			const message = err instanceof Error ? err.message : String(err);
+			await emitError("vm_resume", message);
+			return makeFailedOutput("vm_resume_failed", message);
+		}
+		const diaryId = task.diaryId ?? "";
+		const taskTeamId = task.teamId ?? "";
+		const mainRepo = findMainWorktree();
+		activateAgentEnv(managed.credentials.agentEnv, mainRepo);
+		const activeWorkspace = workspace;
+		if (!activeWorkspace) throw new Error("task workspace not prepared");
 		await emit("info", {
 			event: "execute_start",
 			taskType: task.taskType,
 			teamId: task.teamId,
 			provider: opts.provider,
 			model: opts.model,
-			workspaceMode: workspace.mode,
-			workspaceBranch: workspace.branch
+			workspaceMode: activeWorkspace.mode,
+			workspaceBranch: activeWorkspace.branch
 		});
 		let taskPrompt;
 		try {
@@ -15349,8 +15446,8 @@ async function executePiTask(claimedTask, reporter, opts) {
 				diaryId,
 				taskId: task.id,
 				workspace: {
-					mode: workspace.mode,
-					branch: workspace.branch
+					mode: activeWorkspace.mode,
+					branch: activeWorkspace.branch
 				},
 				extras: opts.promptExtras
 			});
@@ -15402,7 +15499,7 @@ async function executePiTask(claimedTask, reporter, opts) {
 				getTeamId: () => taskTeamId,
 				getSessionErrors: () => [],
 				clearSessionErrors: () => {},
-				getHostCwd: () => mountPath,
+				getHostCwd: () => cwdPath,
 				hostExecBaseEnv: new Set([...HOST_EXEC_DEFAULT_BASE_ENV, ...Object.keys(managed.credentials.agentEnv)]),
 				hostExecAutoApprove: opts.hostExecAutoApprove ?? opts.sandboxConfig?.hostExec?.autoApprove ?? false,
 				getTaskContext: () => ({
@@ -15430,6 +15527,7 @@ async function executePiTask(claimedTask, reporter, opts) {
 			if (taskTypeUsesSubagents(task.taskType)) {
 				subagentHandle = createSubagentTool({
 					mountPath,
+					cwdPath,
 					piAuthDir,
 					modelHandle,
 					agentName: opts.agentName,
@@ -15438,12 +15536,14 @@ async function executePiTask(claimedTask, reporter, opts) {
 					parentTaskId: task.id,
 					parentTaskType: task.taskType,
 					parentAttemptN: attemptN,
+					contractRegistry: opts.subagentContractRegistry,
 					parentCancelSignal: reporter.cancelSignal
 				});
 				parentSubagentTools.push(subagentHandle.tool);
 			}
 			session = await buildAgentSession({
 				mountPath,
+				cwdPath,
 				piAuthDir,
 				modelHandle,
 				agentName: opts.agentName,
@@ -15462,7 +15562,8 @@ async function executePiTask(claimedTask, reporter, opts) {
 					"moltnet.task.id": task.id,
 					"moltnet.task.attempt": attemptN,
 					"moltnet.task.type": task.taskType
-				}
+				},
+				sessionPersistence: executionPlan?.sessionPersistence ?? void 0
 			});
 		} catch (err) {
 			const message = err instanceof Error ? err.message : String(err);
@@ -15527,6 +15628,10 @@ async function executePiTask(claimedTask, reporter, opts) {
 					is_error: event.isError,
 					result: event.isError ? truncateForWire(event.result) : void 0
 				}));
+				if (event.isError) track(emitError("tool_call_error", describeToolErrorMessage(event.result), {
+					tool: event.toolName,
+					result: truncateForWire(event.result)
+				}));
 				if (maxBashTimeouts > 0 && event.toolName === "bash" && event.isError && isBashTimeoutResult(event.result)) {
 					bashTimeoutCount += 1;
 					if (bashTimeoutCount >= maxBashTimeouts) triggerCapAbort("max_bash_timeouts_exceeded", `Aborted after ${bashTimeoutCount} bash timeouts in this attempt (cap ${maxBashTimeouts}).`);
@@ -15682,7 +15787,7 @@ async function executePiTask(claimedTask, reporter, opts) {
 			}
 		}
 		if (managed) await managed.vm.close();
-		try {
+		if (workspace) try {
 			workspace.cleanup();
 		} catch (err) {
 			const detail = err instanceof Error ? err.message : String(err);
@@ -15690,107 +15795,6 @@ async function executePiTask(claimedTask, reporter, opts) {
 		}
 	}
 }
-function resolveTaskWorktreeBranch(task) {
-	if (taskTypeWorkspaceMode(task.taskType) !== "dedicated_worktree") return null;
-	if (task.taskType === "fulfill_brief") {
-		const input = task.input;
-		const slug = slugifyBranchComponent(typeof input.title === "string" && input.title.trim().length > 0 ? input.title : typeof input.brief === "string" && input.brief.trim().length > 0 ? input.brief : task.taskType) || "task";
-		if (task.correlationId) return `moltnet/${task.correlationId}/${slug}`;
-		return `feat/${(typeof input.scopeHint === "string" && input.scopeHint.trim().length > 0 ? slugifyBranchComponent(input.scopeHint) : "task") || "task"}-${slug}`;
-	}
-	return `task/${slugifyBranchComponent(task.taskType) || "task"}-${task.id.slice(0, 8)}`;
-}
-function slugifyBranchComponent(input) {
-	return input.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-+|-+$/g, "").slice(0, 60).replace(/-+$/g, "");
-}
-function prepareTaskWorkspace(task, requestedMountPath) {
-	const branch = resolveTaskWorktreeBranch(task);
-	if (!branch) return {
-		mountPath: requestedMountPath,
-		mode: "shared_mount",
-		branch: null,
-		cleanup: () => {}
-	};
-	const mainRepo = findMainWorktree();
-	const worktreeDir = join(mainRepo, ".worktrees", `task-${task.id}`);
-	removeExistingTaskWorktree(mainRepo, worktreeDir);
-	const relMount = relative(mainRepo, requestedMountPath);
-	const mountPath = relMount === "" || relMount.startsWith("..") ? worktreeDir : join(worktreeDir, relMount);
-	const baseRef = resolveWorktreeBaseRef(mainRepo);
-	execFileSync("git", gitRefExists(mainRepo, `refs/heads/${branch}`) ? [
-		"-C",
-		mainRepo,
-		"worktree",
-		"add",
-		worktreeDir,
-		branch
-	] : [
-		"-C",
-		mainRepo,
-		"worktree",
-		"add",
-		"-b",
-		branch,
-		worktreeDir,
-		baseRef
-	], { stdio: "pipe" });
-	return {
-		mountPath,
-		mode: "dedicated_worktree",
-		branch,
-		cleanup: () => {
-			execFileSync("git", [
-				"-C",
-				mainRepo,
-				"worktree",
-				"remove",
-				"--force",
-				worktreeDir
-			], { stdio: "pipe" });
-		}
-	};
-}
-function removeExistingTaskWorktree(mainRepo, worktreeDir) {
-	if (!existsSync(worktreeDir)) return;
-	const list = execFileSync("git", [
-		"-C",
-		mainRepo,
-		"worktree",
-		"list",
-		"--porcelain"
-	], {
-		encoding: "utf8",
-		stdio: "pipe"
-	});
-	const marker = `worktree ${worktreeDir}\n`;
-	if (!list.includes(marker) && !list.endsWith(`worktree ${worktreeDir}`)) return;
-	execFileSync("git", [
-		"-C",
-		mainRepo,
-		"worktree",
-		"remove",
-		"--force",
-		worktreeDir
-	], { stdio: "pipe" });
-}
-function resolveWorktreeBaseRef(mainRepo) {
-	return gitRefExists(mainRepo, "refs/heads/main") ? "main" : "HEAD";
-}
-function gitRefExists(mainRepo, ref) {
-	try {
-		execFileSync("git", [
-			"-C",
-			mainRepo,
-			"show-ref",
-			"--verify",
-			"--quiet",
-			ref
-		], { stdio: "pipe" });
-		return true;
-	} catch {
-		return false;
-	}
-}
 function emptyUsage(provider, model) {
 	return {
 		inputTokens: 0,
@@ -15894,6 +15898,23 @@ function truncateForWire(value) {
 		};
 	}
 }
+function describeToolErrorMessage(result) {
+	if (typeof result === "string" && result.trim().length > 0) return result.trim();
+	if (result && typeof result === "object") {
+		const content = result.content;
+		if (Array.isArray(content)) {
+			for (const item of content) if (item && typeof item === "object" && typeof item.text === "string") {
+				const text = item.text.trim();
+				if (text.length > 0) return text;
+			}
+		}
+	}
+	try {
+		return JSON.stringify(truncateForWire(result));
+	} catch {
+		return "Tool call failed";
+	}
+}
 //#endregion
 //#region src/index.ts
 /**
@@ -16170,4 +16191,4 @@ function moltnetExtension(pi) {
 	registerMoltnetReflectCommand(pi, state);
 }
 //#endregion
-export { HOST_EXEC_DEFAULT_BASE_ENV, activateAgentEnv, buildAgentSession, createGondolinBashOps, createGondolinEditOps, createGondolinReadOps, createGondolinWriteOps, createMoltNetTools, createPiOtelExtension, createPiTaskExecutor, createSubagentTool, moltnetExtension as default, ensureSnapshot, executePiTask, findMainWorktree, injectTaskContext, loadCredentials, resumeVm, toGuestPath };
+export { HOST_EXEC_DEFAULT_BASE_ENV, activateAgentEnv, buildAgentSession, createGondolinBashOps, createGondolinEditOps, createGondolinReadOps, createGondolinWriteOps, createMoltNetTools, createPiOtelExtension, createPiTaskExecutor, createSubagentTool, moltnetExtension as default, ensureSnapshot, executePiTask, findMainWorktree, injectTaskContext, loadCredentials, resolveTaskWorktreePath, resumeVm, toGuestPath };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@themoltnet/pi-extension",
-  "version": "0.16.2",
+  "version": "0.18.0",
   "type": "module",
   "description": "MoltNet pi extension — sandboxed tool execution in Gondolin VMs with MoltNet identity and persistent memory",
   "license": "MIT",
@@ -32,7 +32,7 @@
     "@opentelemetry/api": "^1.9.0",
     "@sinclair/typebox": "^0.34.0",
     "@themoltnet/sdk": "0.102.0",
-    "@themoltnet/agent-runtime": "0.15.1"
+    "@themoltnet/agent-runtime": "0.15.2"
   },
   "peerDependencies": {
     "@earendil-works/pi-coding-agent": ">=0.74.0",
@@ -51,7 +51,7 @@
     "@earendil-works/pi-coding-agent": "^0.74.0",
     "@opentelemetry/sdk-metrics": "^2.5.1",
     "@opentelemetry/sdk-trace-base": "^2.5.1",
-    "@types/node": "^20.11.0",
+    "@types/node": "^22.19.0",
     "typescript": "^5.3.3",
     "vite": "^8.0.0",
     "vite-plugin-dts": "^4.5.4",