npm - @themoltnet/pi-extension - Versions diffs - 0.17.0 → 0.18.1 - Mend

@themoltnet/pi-extension 0.17.0 → 0.18.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/index.d.ts CHANGED Viewed

@@ -19,6 +19,7 @@ import { TObject } from '@sinclair/typebox';
 import { ToolDefinition } from '@earendil-works/pi-coding-agent';
 import { TOptional } from '@sinclair/typebox';
 import { TRecord } from '@sinclair/typebox';
+import { TSchema } from '@sinclair/typebox';
 import { TString } from '@sinclair/typebox';
 import { TUnion } from '@sinclair/typebox';
 import { TUnknown } from '@sinclair/typebox';
@@ -43,6 +44,8 @@ export declare function buildAgentSession(args: BuildAgentSessionArgs): Promise<
 declare interface BuildAgentSessionArgs {
     /** Host directory mounted at /workspace inside the VM. */
     mountPath: string;
+    /** Host working directory where the agent session should start. */
+    cwdPath: string;
     /** pi auth directory (resolved from `PI_CODING_AGENT_DIR` or `~/.pi/agent`). */
     piAuthDir: string;
     /** Resolved pi model handle (provider + model id). */
@@ -133,6 +136,8 @@ export declare function createSubagentTool(args: CreateSubagentToolArgs): Subage
 export declare interface CreateSubagentToolArgs {
     /** Host directory mounted at /workspace inside the VM. */
     mountPath: string;
+    /** Host working directory the subagent should start in. Defaults to mountPath. */
+    cwdPath?: string;
     /** pi auth directory the parent resolved. */
     piAuthDir: string;
     /** Resolved pi model handle — subagents share it. */
@@ -188,6 +193,16 @@ export declare interface CreateSubagentToolArgs {
      * exercise the tool's logic without booting a VM.
      */
     buildAgentSession?: (args: BuildAgentSessionArgs) => Promise<AgentSession>;
+    /**
+     * Contract registry for resolving output_schema names to TypeBox
+     * schemas at call time. The subagent tool reads ONLY via `.get()`
+     * and `.list()` — the registry is immutable after construction.
+     *
+     * Production callers (executePiTask) create the registry with
+     * built-in contracts at session-setup; tests inject a registry
+     * with whatever stubs they need.
+     */
+    contractRegistry: SubagentContractRegistry;
 }
 /**
@@ -240,6 +255,17 @@ export declare interface ExecutePiTaskOptions {
      * across tasks.
      */
     checkpointPath?: string;
+    /**
+     * Lazy checkpoint resolver used by `createPiTaskExecutor` so snapshot
+     * creation can happen after the reporter has been opened and can surface
+     * setup failures as task messages.
+     */
+    resolveCheckpointPath?: () => Promise<string>;
+    /**
+     * Set when the caller already opened the reporter before handing control
+     * to `executePiTask`.
+     */
+    reporterAlreadyOpened?: boolean;
     /**
      * Optional callback invoked alongside every `reporter.record()` so
      * the daemon can mirror task messages into its local logger.
@@ -292,6 +318,13 @@ export declare interface ExecutePiTaskOptions {
      * file-backed Pi sessions for selected task classes.
      */
     makeExecutionPlan?: PiTaskExecutionPlanFactory;
+    /**
+     * Immutable subagent contract registry used to resolve `output_schema`
+     * names at subagent tool call time. Constructed by the daemon (or
+     * tests) from static built-in schemas — `execute-pi-task` never hardcodes
+     * contracts. See #1106.
+     */
+    subagentContractRegistry?: SubagentContractRegistry;
 }
 /**
@@ -527,6 +560,29 @@ export declare interface SandboxConfig {
 /** Extract snapshot-specific config for backwards compat with ensureSnapshot. */
 export declare type SnapshotConfig = NonNullable<SandboxConfig['snapshot']>;
+declare interface SubagentContractRegistry {
+    /** Resolve a contract by name. Returns `null` for unknown names. */
+    get(name: string): SubagentOutputContract | null;
+    /** List all registered contracts. */
+    list(): SubagentOutputContract[];
+}
+declare interface SubagentOutputContract {
+    /** Stable identifier the parent uses to reference this contract.
+     *  Lower-snake-case by convention (e.g. `judge_eval_variant_result`). */
+    readonly name: string;
+    /** Human-readable description shown in the subagent tool's help text
+     *  and in the inner session's submit-tool description. Useful when a
+     *  parent LLM has multiple contracts to choose from. */
+    readonly description: string;
+    /**
+     * TypeBox schema the subagent's submit-tool args MUST validate
+     * against. The args ARE the output payload (no `{ output: ... }`
+     * wrapping), so the LLM gets field-level guidance directly.
+     */
+    readonly parametersSchema: TSchema;
+}
 export declare interface SubagentToolHandle {
     /** ToolDefinition to register via `customTools` on the parent session. */
     readonly tool: ToolDefinition;
@@ -769,9 +825,10 @@ export declare interface VmCredentials {
     agentEnvRaw: string;
     /**
      * Pi OAuth/API-key auth blob. Null when neither `~/.pi/agent/auth.json`
-     * (or its `PI_AUTH_PATH` override) is present — in that case the daemon
-     * relies on Pi's env-var providers (`ANTHROPIC_API_KEY`, etc.) carried
-     * via `agentEnv` and the host environment instead. CI uses this path.
+     * (resolved via `PI_CODING_AGENT_DIR` when set) is present — in that
+     * case the daemon relies on Pi's env-var providers (`ANTHROPIC_API_KEY`,
+     * etc.) carried via `agentEnv` and the host environment instead. CI uses
+     * this path.
      */
     piAuthJson: string | null;
     agentEnv: Record<string, string | undefined>;

package/dist/index.js CHANGED Viewed

@@ -8133,7 +8133,8 @@ function findMainWorktree() {
 function loadCredentials(agentDir) {
 	const moltnetJson = readFileSync(path.join(agentDir, "moltnet.json"), "utf8");
 	const agentEnvRaw = readFileSync(path.join(agentDir, "env"), "utf8");
-	const piAuthPath = process.env.PI_AUTH_PATH ?? path.join(process.env.HOME ?? "", ".pi", "agent", "auth.json");
+	const piAgentDir = process.env.PI_CODING_AGENT_DIR ?? path.join(process.env.HOME ?? "", ".pi", "agent");
+	const piAuthPath = path.join(piAgentDir, "auth.json");
 	const piAuthJson = existsSync(piAuthPath) ? readFileSync(piAuthPath, "utf8") : null;
 	const gitconfigPath = path.join(agentDir, "gitconfig");
 	const gitconfig = existsSync(gitconfigPath) ? readFileSync(gitconfigPath, "utf8") : null;
@@ -8645,7 +8646,7 @@ async function buildAgentSession(args) {
 		spanAttributes: args.otelSpanAttrs
 	});
 	const resourceLoader = new DefaultResourceLoader({
-		cwd: args.mountPath,
+		cwd: args.cwdPath,
 		agentDir: args.piAuthDir,
 		extensionFactories: [piOtelExtension],
 		appendSystemPrompt: args.appendSystemPrompt,
@@ -8653,12 +8654,12 @@ async function buildAgentSession(args) {
 	});
 	await resourceLoader.reload();
 	const sessionManager = args.sessionPersistence ? await resolvePersistentSessionManager({
-		cwd: args.mountPath,
+		cwd: args.cwdPath,
 		sessionDir: args.sessionPersistence.sessionDir
-	}) : SessionManager.inMemory(args.mountPath);
+	}) : SessionManager.inMemory(args.cwdPath);
 	return (await createAgentSession({
 		agentDir: args.piAuthDir,
-		cwd: args.mountPath,
+		cwd: args.cwdPath,
 		model: args.modelHandle,
 		customTools: args.customTools,
 		sessionManager,
@@ -8670,6 +8671,61 @@ async function resolvePersistentSessionManager(args) {
 	return SessionManager.continueRecent(args.cwd, args.sessionDir);
 }
 //#endregion
+//#region ../agent-runtime/src/context-bindings.ts
+var PROMPT_SEPARATOR = "\n\n---\n\n";
+/**
+* Resolve `task.input.context[]` into delivered side-effects (skills
+* persisted via `deliver.skill`) and prompt fragments
+* (`systemPromptPrefix`, `userInlineSuffix`) the caller weaves into the
+* built prompt.
+*
+* Per-binding semantics (V1):
+*   - `skill`         → `deliver.skill({ slug, content })` once per ref.
+*                       Slug collisions on distinct contents are
+*                       refused loudly.
+*   - `prompt_prefix` → content appended to `systemPromptPrefix` with
+*                       the canonical `\n\n---\n\n` separator (in
+*                       declared order).
+*   - `user_inline`   → content appended to `userInlineSuffix` in
+*                       declared order, same separator.
+*
+* No fetching, no hashing — bytes are inlined in `ContextRef.content`,
+* and the task's `inputCid` already pins the entire input. The imposer
+* chose these bytes; the resolver just dispatches them.
+*
+* The function is pure with respect to its arguments: file writes are
+* confined to the injected `deliver` callback, which makes the
+* resolver trivial to test.
+*/
+async function resolveTaskContext(args) {
+	const promptParts = [];
+	const userParts = [];
+	const injected = [];
+	const usedSlugs = /* @__PURE__ */ new Map();
+	for (const ref of args.context) {
+		if (ref.binding === "skill") {
+			const prior = usedSlugs.get(ref.slug);
+			if (prior !== void 0) {
+				if (prior !== ref.content) throw new Error(`slug collision on '${ref.slug}': two skill entries share the same slug but have different content`);
+				injected.push(ref);
+				continue;
+			}
+			usedSlugs.set(ref.slug, ref.content);
+			await args.deliver.skill({
+				slug: ref.slug,
+				content: ref.content
+			});
+		} else if (ref.binding === "prompt_prefix") promptParts.push(ref.content);
+		else userParts.push(ref.content);
+		injected.push(ref);
+	}
+	return {
+		injected,
+		systemPromptPrefix: promptParts.join(PROMPT_SEPARATOR),
+		userInlineSuffix: userParts.join(PROMPT_SEPARATOR)
+	};
+}
+//#endregion
 //#region ../tasks/src/formats.ts
 /**
 * Register TypeBox string formats used across Task / TaskOutput / task-type
@@ -8841,10 +8897,10 @@ function validateRubricWeights(rubric) {
 * complementary places.
 *
 * Before this envelope existed, criteria were scattered: a vestigial
-* `criteriaCid` column nobody resolved, an `acceptanceCriteria: string[]`
-* field on `fulfill_brief.input` that was "interpreted by the claiming
-* agent," and inline `rubric` / `criteria[]` fields on judgment-task
-* inputs. None of those were machine-verifiable end-to-end.
+* `criteriaCid` column nobody resolved, free-form prose on
+* `fulfill_brief.input`, and inline `rubric` / `criteria[]` fields on
+* judgment-task inputs. None of those were machine-verifiable
+* end-to-end.
 *
 * This module defines a single, content-addressable envelope an imposer
 * attaches to any task type. It has four orthogonal sections — pick
@@ -9140,7 +9196,6 @@ var FULFILL_BRIEF_TYPE = "fulfill_brief";
 var FulfillBriefInput = Type$1.Object({
 	brief: Type$1.String({ minLength: 1 }),
 	title: Type$1.Optional(Type$1.String()),
-	acceptanceCriteria: Type$1.Optional(Type$1.Array(Type$1.String())),
 	successCriteria: Type$1.Optional(SuccessCriteria),
 	seedFiles: Type$1.Optional(Type$1.Array(Type$1.String())),
 	scopeHint: Type$1.Optional(Type$1.String())
@@ -9564,6 +9619,72 @@ async function onCreateJudgeEvalVariant(input, ctx) {
 	}];
 }
 //#endregion
+//#region ../tasks/src/task-types/pr-review.ts
+var PR_REVIEW_TYPE = "pr_review";
+var PrReviewSubject = Type$1.Object({
+	title: Type$1.String({ minLength: 1 }),
+	summary: Type$1.String({ minLength: 1 }),
+	resourceUrls: Type$1.Optional(Type$1.Array(Type$1.String({ minLength: 1 }))),
+	inspectionHints: Type$1.Optional(Type$1.Array(Type$1.String({ minLength: 1 })))
+}, {
+	$id: "PrReviewSubject",
+	additionalProperties: false
+});
+var PrReviewInput = Type$1.Object({
+	subject: PrReviewSubject,
+	taskPrompt: Type$1.Optional(Type$1.String({ minLength: 1 })),
+	successCriteria: SuccessCriteria
+}, {
+	$id: "PrReviewInput",
+	additionalProperties: false
+});
+var PrReviewScore = Type$1.Object({
+	criterionId: Type$1.String({ minLength: 1 }),
+	score: Type$1.Union([Type$1.Literal(0), Type$1.Literal(1)]),
+	rationale: Type$1.String({ minLength: 1 })
+}, {
+	$id: "PrReviewScore",
+	additionalProperties: false
+});
+var PrReviewOutput = Type$1.Object({
+	scores: Type$1.Array(PrReviewScore, { minItems: 1 }),
+	composite: Type$1.Number({
+		minimum: 0,
+		maximum: 1
+	}),
+	verdict: Type$1.String({ minLength: 1 })
+}, {
+	$id: "PrReviewOutput",
+	additionalProperties: false
+});
+function requireBooleanRubric(rubric) {
+	for (const criterion of rubric.criteria) if (criterion.scoring !== "boolean") return `pr_review requires boolean scoring for every rubric criterion; criterion "${criterion.id}" uses "${criterion.scoring}"`;
+	return null;
+}
+function validatePrReviewInput(input) {
+	const sc = input.successCriteria;
+	if (!sc) return "successCriteria is required for judgment tasks";
+	if (!sc.rubric) return "successCriteria.rubric is required for judgment tasks";
+	return validateRubricWeights(sc.rubric) ?? requireBooleanRubric(sc.rubric);
+}
+function validatePrReviewOutput(output, input) {
+	if (!input) return null;
+	const scores = output.scores;
+	const rubric = input.successCriteria.rubric;
+	if (!rubric) return null;
+	if (scores.length !== rubric.criteria.length) return `scores length ${scores.length} does not match rubric criteria length ${rubric.criteria.length}`;
+	let composite = 0;
+	for (let i = 0; i < rubric.criteria.length; i++) {
+		const criterion = rubric.criteria[i];
+		const score = scores[i];
+		if (score.criterionId !== criterion.id) return `scores[${i}] has criterionId "${score.criterionId}" but rubric expects "${criterion.id}" in that position`;
+		composite += criterion.weight * score.score;
+	}
+	const claimed = output.composite;
+	if (Math.abs(claimed - composite) > 1e-6) return `composite ${claimed} does not match weighted sum ${composite.toFixed(6)}`;
+	return null;
+}
+//#endregion
 //#region ../tasks/src/task-types/render-pack.ts
 /**
 * `render_pack` — turn a context pack into a signed rendered artefact.
@@ -9731,6 +9852,18 @@ var BUILT_IN_TASK_TYPES = {
 		validateInput: validateJudgmentInput,
 		validateInputAsync: validateAssessBriefInputAsync
 	},
+	[PR_REVIEW_TYPE]: {
+		name: PR_REVIEW_TYPE,
+		inputSchema: PrReviewInput,
+		outputSchema: PrReviewOutput,
+		outputKind: "judgment",
+		workspaceMode: "dedicated_worktree",
+		workspaceScope: "attempt",
+		sessionScope: "none",
+		requiresReferences: false,
+		validateInput: validatePrReviewInput,
+		validateOutput: validatePrReviewOutput
+	},
 	[CURATE_PACK_TYPE]: {
 		name: CURATE_PACK_TYPE,
 		inputSchema: CuratePackInput,
@@ -10086,133 +10219,6 @@ Type$1.Object({
 	additionalProperties: false
 });
 //#endregion
-//#region ../agent-runtime/src/subagent-output-contracts.ts
-var REGISTRY = /* @__PURE__ */ new Map();
-/**
-* Register a subagent output contract. Idempotent: re-registering the
-* same name with a different schema throws — contracts are meant to
-* be stable. Re-registering with the identical contract object (same
-* reference) is a no-op for HMR and test convenience.
-*
-* Typically called at module-init time alongside task-type
-* registration. See task-types/index.ts in @moltnet/tasks for the
-* conventional pattern.
-*/
-function registerSubagentOutputContract(contract) {
-	if (!contract.name || contract.name.trim().length === 0) throw new Error("subagent output contract name is required");
-	if (!/^[a-z][a-z0-9_]*$/.test(contract.name)) throw new Error(`subagent output contract name '${contract.name}' must be lower_snake_case (starts with a letter, then [a-z0-9_]+)`);
-	const existing = REGISTRY.get(contract.name);
-	if (existing && existing !== contract) {
-		if (existing.parametersSchema !== contract.parametersSchema) throw new Error(`subagent output contract '${contract.name}' is already registered with a different schema; refusing to override`);
-	}
-	REGISTRY.set(contract.name, contract);
-}
-/**
-* Resolve a subagent output contract by name. Returns `null` for
-* unknown names — callers (the subagent custom tool) decide whether
-* that's a tool error the parent LLM can recover from or a hard fail.
-*/
-function getSubagentOutputContract(name) {
-	return REGISTRY.get(name) ?? null;
-}
-/**
-* List all registered contracts. Useful for diagnostics and for the
-* subagent tool's parameter description so a parent LLM can see what
-* contracts are available without enumerating them in its prompt.
-*/
-function listSubagentOutputContracts() {
-	return [...REGISTRY.values()];
-}
-//#endregion
-//#region ../agent-runtime/src/built-in-contract-registrations.ts
-/**
-* Built-in subagent output contracts (#1087, #943).
-*
-* Why this is an exported function and not a module-init side
-* effect:
-*
-*   - The registry is process-global. Module-init registration
-*     fires exactly once per Node process (ESM modules are cached
-*     by URL). Tests that call `__resetSubagentOutputContractsForTests()`
-*     to start from an empty registry have no way to repopulate
-*     the built-ins without re-evaluating the module — which the
-*     cache prevents. PR #1101 review M4.
-*   - An explicit `registerBuiltInSubagentContracts()` lets the
-*     package index call it once at module load AND lets test
-*     setup hooks call it again after `__reset...`.
-*   - `registerSubagentOutputContract` is itself idempotent for
-*     identical re-registrations, so calling this function twice
-*     in the same process is safe.
-*
-* Adding a new built-in: extend the body of this function. Do not
-* call `registerSubagentOutputContract` from anywhere else in the
-* package — keeping all built-ins in one function makes the set
-* auditable.
-*/
-function registerBuiltInSubagentContracts() {
-	registerSubagentOutputContract({
-		name: "judge_eval_variant_result",
-		description: "Per-variant grading result produced by a subagent of judge_eval_variant: scores against the shared rubric, composite, and a 1-3 sentence verdict for a single variant.",
-		parametersSchema: JudgeEvalVariantResult
-	});
-}
-registerBuiltInSubagentContracts();
-//#endregion
-//#region ../agent-runtime/src/context-bindings.ts
-var PROMPT_SEPARATOR = "\n\n---\n\n";
-/**
-* Resolve `task.input.context[]` into delivered side-effects (skills
-* persisted via `deliver.skill`) and prompt fragments
-* (`systemPromptPrefix`, `userInlineSuffix`) the caller weaves into the
-* built prompt.
-*
-* Per-binding semantics (V1):
-*   - `skill`         → `deliver.skill({ slug, content })` once per ref.
-*                       Slug collisions on distinct contents are
-*                       refused loudly.
-*   - `prompt_prefix` → content appended to `systemPromptPrefix` with
-*                       the canonical `\n\n---\n\n` separator (in
-*                       declared order).
-*   - `user_inline`   → content appended to `userInlineSuffix` in
-*                       declared order, same separator.
-*
-* No fetching, no hashing — bytes are inlined in `ContextRef.content`,
-* and the task's `inputCid` already pins the entire input. The imposer
-* chose these bytes; the resolver just dispatches them.
-*
-* The function is pure with respect to its arguments: file writes are
-* confined to the injected `deliver` callback, which makes the
-* resolver trivial to test.
-*/
-async function resolveTaskContext(args) {
-	const promptParts = [];
-	const userParts = [];
-	const injected = [];
-	const usedSlugs = /* @__PURE__ */ new Map();
-	for (const ref of args.context) {
-		if (ref.binding === "skill") {
-			const prior = usedSlugs.get(ref.slug);
-			if (prior !== void 0) {
-				if (prior !== ref.content) throw new Error(`slug collision on '${ref.slug}': two skill entries share the same slug but have different content`);
-				injected.push(ref);
-				continue;
-			}
-			usedSlugs.set(ref.slug, ref.content);
-			await args.deliver.skill({
-				slug: ref.slug,
-				content: ref.content
-			});
-		} else if (ref.binding === "prompt_prefix") promptParts.push(ref.content);
-		else userParts.push(ref.content);
-		injected.push(ref);
-	}
-	return {
-		injected,
-		systemPromptPrefix: promptParts.join(PROMPT_SEPARATOR),
-		userInlineSuffix: userParts.join(PROMPT_SEPARATOR)
-	};
-}
-//#endregion
 //#region ../agent-runtime/src/output-tools.ts
 /**
 * Submit-output tool contract.
@@ -10305,6 +10311,20 @@ function buildFinalOutputBlock(opts) {
 	return lines.join("\n");
 }
 //#endregion
+//#region ../agent-runtime/src/prompts/rubric-common.ts
+function renderRubricCriteriaList(rubric) {
+	return rubric.criteria.map((c, i) => `${i + 1}. **${c.id}** (weight ${c.weight}, scoring: \`${c.scoring}\`) — ${c.description}`).join("\n");
+}
+function renderRubricPreambleSection(rubric) {
+	if (!rubric.preamble) return null;
+	return [
+		"### Rubric preamble",
+		"",
+		rubric.preamble,
+		""
+	].join("\n");
+}
+//#endregion
 //#region ../agent-runtime/src/prompts/assess-brief.ts
 /**
 * Build the first user-message prompt for an `assess_brief` judge attempt.
@@ -10330,13 +10350,8 @@ function buildFinalOutputBlock(opts) {
 */
 function buildAssessBriefUserPrompt(input, ctx) {
 	const rubric = input.successCriteria.rubric;
-	const criteriaList = rubric.criteria.map((c, i) => `${i + 1}. **${c.id}** (weight ${c.weight}, scoring: \`${c.scoring}\`) — ${c.description}`).join("\n");
-	const preambleSection = rubric.preamble ? [
-		"### Rubric preamble",
-		"",
-		rubric.preamble,
-		""
-	].join("\n") : "";
+	const criteriaList = renderRubricCriteriaList(rubric);
+	const preambleSection = renderRubricPreambleSection(rubric) ?? "";
 	const workspaceSection = ctx.workspace?.mode === "dedicated_worktree" ? [
 		"### Workspace",
 		"",
@@ -10617,13 +10632,7 @@ function buildCuratePackUserPrompt(input, ctx) {
 * is told to inspect them itself.
 */
 function buildFulfillBriefUserPrompt(input, ctx) {
-	const { brief, title, acceptanceCriteria, seedFiles, scopeHint } = input;
-	const criteriaSection = acceptanceCriteria?.length ? [
-		"### Acceptance criteria",
-		"",
-		...acceptanceCriteria.map((c) => `- ${c}`),
-		""
-	].join("\n") : "";
+	const { brief, title, seedFiles, scopeHint } = input;
 	const seedSection = seedFiles?.length ? [
 		"### Seed files",
 		"",
@@ -10671,7 +10680,6 @@ function buildFulfillBriefUserPrompt(input, ctx) {
 		"",
 		brief,
 		"",
-		criteriaSection,
 		seedSection,
 		correlationSection,
 		workspaceSection,
@@ -10811,13 +10819,8 @@ function buildJudgeEvalVariantUserPrompt(input, ctx) {
 function buildJudgePackUserPrompt(input, ctx) {
 	const { renderedPackId, sourcePackId, successCriteria } = input;
 	const rubric = successCriteria.rubric;
-	const criteriaList = rubric.criteria.map((c, i) => `${i + 1}. **${c.id}** (weight ${c.weight}, scoring: \`${c.scoring}\`) — ${c.description}`).join("\n");
-	const preambleSection = rubric.preamble ? [
-		"### Rubric preamble",
-		"",
-		rubric.preamble,
-		""
-	].join("\n") : null;
+	const criteriaList = renderRubricCriteriaList(rubric);
+	const preambleSection = renderRubricPreambleSection(rubric);
 	return [
 		"# Judge Pack Agent",
 		"",
@@ -10933,6 +10936,112 @@ function buildJudgePackUserPrompt(input, ctx) {
 	].filter((l) => l !== null).join("\n");
 }
 //#endregion
+//#region ../agent-runtime/src/prompts/pr-review.ts
+function buildPrReviewUserPrompt(input, ctx) {
+	const rubric = input.successCriteria.rubric;
+	const criteriaList = renderRubricCriteriaList(rubric);
+	const preambleSection = renderRubricPreambleSection(rubric);
+	const taskPromptSection = input.taskPrompt ? [
+		"## Task-specific instructions",
+		"",
+		input.taskPrompt,
+		""
+	].join("\n") : "";
+	const resourceSection = input.subject.resourceUrls && input.subject.resourceUrls.length > 0 ? [
+		"### Resources",
+		"",
+		...input.subject.resourceUrls.map((url) => `- ${url}`),
+		""
+	].join("\n") : "";
+	const hintsSection = input.subject.inspectionHints && input.subject.inspectionHints.length > 0 ? [
+		"### Inspection hints",
+		"",
+		...input.subject.inspectionHints.map((hint) => `- ${hint}`),
+		""
+	].join("\n") : "";
+	const workspaceSection = ctx.workspace?.mode === "dedicated_worktree" ? [
+		"### Workspace",
+		"",
+		"This review attempt is running inside a dedicated disposable git",
+		"worktree. Inspect and reason inside this workspace only.",
+		ctx.workspace.branch ? `The current review branch is \`${ctx.workspace.branch}\`.` : "The current checkout is disposable and will be cleaned up when the task ends.",
+		""
+	].join("\n") : "";
+	return [
+		"# Review Agent",
+		"",
+		"You are an independent judge. You did NOT produce the subject under review.",
+		"Assess it strictly against the rubric below and emit a structured judgment.",
+		"You may inspect the local workspace and the referenced resources, but do NOT modify anything.",
+		"",
+		`Your diary ID is: ${ctx.diaryId}`,
+		`This task's id is: ${ctx.taskId}`,
+		"",
+		"## Subject",
+		"",
+		`**Title:** ${input.subject.title}`,
+		"",
+		input.subject.summary,
+		"",
+		resourceSection,
+		hintsSection,
+		workspaceSection,
+		"### Execution contract",
+		"",
+		"Treat the provided subject, resources, inspection hints, and any",
+		"task-specific instructions as the full",
+		"review contract for this task.",
+		"",
+		"If the task-specific instructions or inspection hints require an outward action tied to the review",
+		"(for example publishing the judgment somewhere), perform that action as",
+		"part of the task before reporting structured output.",
+		"",
+		"## Review workflow",
+		"",
+		"1. Read the subject summary, resources, inspection hints, and any",
+		"   task-specific instructions before scoring.",
+		"2. Inspect the target artefact directly using the tools and resources the",
+		"   task makes available.",
+		"3. If you are in a dedicated disposable worktree and need the review target",
+		"   checked out locally, do that work inside this disposable workspace only.",
+		"4. Apply the rubric strictly. This task is about complexity and",
+		"   reviewability, not correctness or feature desirability.",
+		"5. Perform any required outward action before emitting the final",
+		"   structured output.",
+		"",
+		taskPromptSection,
+		preambleSection,
+		"## Criteria",
+		"",
+		criteriaList,
+		"",
+		"### Scoring rules",
+		"",
+		"- Every criterion uses binary scoring only.",
+		"- Score `1` when the subject clearly clears the criterion.",
+		"- Score `0` when it does not, or when the evidence is ambiguous.",
+		"- `rationale` is REQUIRED for every score. Keep it concrete and audit-friendly.",
+		"- Compute `composite = Σ(weight_i × score_i)` exactly; the runtime rejects mismatches.",
+		"",
+		"Write a signed diary entry (tags: `judgment`, `pr_review`) capturing the rationale before reporting structured output.",
+		"",
+		buildFinalOutputBlock({
+			taskType: "pr_review",
+			outputSchemaName: "PrReviewOutput",
+			shapeSketch: [
+				"{",
+				"  \"scores\": [",
+				"    { \"criterionId\": \"...\", \"score\": 0, \"rationale\": \"...\" }",
+				"  ],",
+				"  \"composite\": <sum-of-weighted-binary-scores>,",
+				"  \"verdict\": \"<1-3 sentence overall>\"",
+				"}"
+			].join("\n"),
+			extraNotes: ["`scores` MUST stay in the same order as the rubric criteria.", "`score` MUST be exactly `0` or `1` for every criterion."]
+		})
+	].filter(Boolean).join("\n");
+}
+//#endregion
 //#region ../agent-runtime/src/prompts/render-pack.ts
 /**
 * Build the first user-message prompt for a `render_pack` task. Almost mechanical:
@@ -11115,6 +11224,16 @@ function buildTaskUserPrompt(task, ctx) {
 				diaryId: ctx.diaryId,
 				taskId: ctx.taskId
 			});
+		case PR_REVIEW_TYPE:
+			if (!Value.Check(PrReviewInput, task.input)) {
+				const errors = [...Value.Errors(PrReviewInput, task.input)];
+				throw new Error(`pr_review input failed validation: ${JSON.stringify(errors.slice(0, 3))}`);
+			}
+			return buildPrReviewUserPrompt(task.input, {
+				diaryId: ctx.diaryId,
+				taskId: ctx.taskId,
+				workspace: ctx.workspace
+			});
 		case JUDGE_EVAL_VARIANT_TYPE:
 			if (!Value.Check(JudgeEvalVariantInput, task.input)) {
 				const errors = [...Value.Errors(JudgeEvalVariantInput, task.input)];
@@ -14838,6 +14957,7 @@ var DEFAULT_SUBAGENT_TIMEOUT_MS = 300 * 1e3;
 */
 function createSubagentTool(args) {
 	const buildSession = args.buildAgentSession ?? buildAgentSession;
+	const { contractRegistry } = args;
 	let callCount = 0;
 	return {
 		tool: defineTool({
@@ -14848,8 +14968,8 @@ function createSubagentTool(args) {
 			async execute(_id, params) {
 				if (!Value.Check(SubagentToolParameters, params)) return toolError(`subagent: invalid parameters: ${JSON.stringify([...Value.Errors(SubagentToolParameters, params)].slice(0, 3))}`);
 				const { task, output_schema } = params;
-				const contract = getSubagentOutputContract(output_schema);
-				if (!contract) return toolError(`subagent: unknown output_schema "${output_schema}". Registered contracts: [${listSubagentOutputContracts().map((c) => c.name).join(", ")}]`);
+				const contract = contractRegistry.get(output_schema);
+				if (!contract) return toolError(`subagent: unknown output_schema "${output_schema}". Registered contracts: [${contractRegistry.list().map((c) => c.name).join(", ")}]`);
 				callCount += 1;
 				const callIndex = callCount;
 				let captured = null;
@@ -14879,6 +14999,7 @@ function createSubagentTool(args) {
 				});
 				const session = await buildSession({
 					mountPath: args.mountPath,
+					cwdPath: args.cwdPath ?? args.mountPath,
 					piAuthDir: args.piAuthDir,
 					modelHandle: args.modelHandle,
 					agentName: args.agentName,
@@ -15219,6 +15340,7 @@ function prepareTaskWorkspace(task, requestedMountPath, executionPlan) {
 	const branch = executionPlan?.worktreeBranch ?? null;
 	if (!branch) return {
 		mountPath: requestedMountPath,
+		cwdPath: requestedMountPath,
 		mode: "shared_mount",
 		branch: null,
 		cleanup: () => {}
@@ -15226,7 +15348,7 @@ function prepareTaskWorkspace(task, requestedMountPath, executionPlan) {
 	const mainRepo = findMainWorktree();
 	const worktreeDir = resolveTaskWorktreePath(mainRepo, executionPlan?.workspaceId ?? `task-${task.id}`);
 	const relMount = relative(mainRepo, requestedMountPath);
-	const mountPath = relMount === "" || relMount.startsWith("..") ? worktreeDir : join(worktreeDir, relMount);
+	const cwdPath = relMount === "" || relMount.startsWith("..") ? worktreeDir : join(worktreeDir, relMount);
 	const keepWorkspace = executionPlan?.workspaceScope === "session" && executionPlan.sessionKey !== null;
 	if (keepWorkspace) ensureReusableTaskWorktree(mainRepo, worktreeDir, branch);
 	else {
@@ -15234,7 +15356,8 @@ function prepareTaskWorkspace(task, requestedMountPath, executionPlan) {
 		addTaskWorktree(mainRepo, worktreeDir, branch);
 	}
 	return {
-		mountPath,
+		mountPath: mainRepo,
+		cwdPath,
 		mode: "dedicated_worktree",
 		branch,
 		cleanup: keepWorkspace ? () => {} : () => {
@@ -15347,15 +15470,24 @@ var noopTurnEventHandler = () => {};
 function createPiTaskExecutor(opts) {
 	let cachedCheckpoint = opts.checkpointPath ?? null;
 	return async (claimedTask, reporter) => {
-		if (!cachedCheckpoint) cachedCheckpoint = await ensureSnapshot({
-			config: opts.sandboxConfig?.snapshot,
-			onProgress: opts.onSnapshotProgress ?? ((m) => {
-				process.stderr.write(`[snapshot] ${m}\n`);
-			})
+		const reporterWasOpened = !reporter.cancelSignal.aborted;
+		if (reporterWasOpened) await reporter.open({
+			taskId: claimedTask.task.id,
+			attemptN: claimedTask.attemptN
 		});
 		return executePiTask(claimedTask, reporter, {
 			...opts,
-			checkpointPath: cachedCheckpoint
+			checkpointPath: cachedCheckpoint ?? void 0,
+			resolveCheckpointPath: async () => {
+				if (!cachedCheckpoint) cachedCheckpoint = await ensureSnapshot({
+					config: opts.sandboxConfig?.snapshot,
+					onProgress: opts.onSnapshotProgress ?? ((m) => {
+						process.stderr.write(`[snapshot] ${m}\n`);
+					})
+				});
+				return cachedCheckpoint;
+			},
+			reporterAlreadyOpened: reporterWasOpened
 		});
 	};
 }
@@ -15371,8 +15503,9 @@ async function executePiTask(claimedTask, reporter, opts) {
 	const startTime = Date.now();
 	const requestedMountPath = opts.mountPath ?? process.cwd();
 	const executionPlan = opts.makeExecutionPlan?.(claimedTask) ?? null;
-	const workspace = prepareTaskWorkspace(task, requestedMountPath, executionPlan);
-	const mountPath = workspace.mountPath;
+	let workspace = null;
+	let mountPath = requestedMountPath;
+	let cwdPath = requestedMountPath;
 	if (reporter.cancelSignal.aborted) return {
 		taskId: task.id,
 		attemptN,
@@ -15387,33 +15520,8 @@ async function executePiTask(claimedTask, reporter, opts) {
 			retryable: false
 		}
 	};
-	const checkpointPath = opts.checkpointPath ?? await ensureSnapshot({
-		config: opts.sandboxConfig?.snapshot,
-		onProgress: opts.onSnapshotProgress ?? ((m) => {
-			process.stderr.write(`[snapshot] ${m}\n`);
-		})
-	});
-	const mainRepoForRepair = findMainWorktree();
-	try {
-		execFileSync("git", [
-			"-C",
-			mainRepoForRepair,
-			"worktree",
-			"repair",
-			"--relative-paths"
-		], { stdio: "pipe" });
-	} catch {}
+	let reporterOpen = opts.reporterAlreadyOpened ?? false;
 	let managed = null;
-	managed = await resumeVm({
-		checkpointPath,
-		agentName: opts.agentName,
-		mountPath,
-		extraAllowedHosts: opts.extraAllowedHosts,
-		sandboxConfig: opts.sandboxConfig
-	});
-	const diaryId = task.diaryId ?? "";
-	const taskTeamId = task.teamId ?? "";
-	let reporterOpen = false;
 	let session = null;
 	let subagentHandle = null;
 	const finalUsage = emptyUsage(opts.provider, opts.model);
@@ -15432,41 +15540,103 @@ async function executePiTask(claimedTask, reporter, opts) {
 			retryable: false
 		}
 	});
+	let onTurnEvent;
+	if (opts.makeOnTurnEvent) try {
+		onTurnEvent = opts.makeOnTurnEvent(claimedTask);
+	} catch (err) {
+		process.stderr.write(`[emit] makeOnTurnEvent threw: ${err instanceof Error ? err.message : String(err)}\n`);
+		onTurnEvent = noopTurnEventHandler;
+	}
+	else onTurnEvent = opts.onTurnEvent ?? noopTurnEventHandler;
+	const emit = (kind, payload) => {
+		try {
+			onTurnEvent(kind, summarizePayloadForLog(kind, payload));
+		} catch (err) {
+			process.stderr.write(`[emit] onTurnEvent threw for kind="${kind}": ${err instanceof Error ? err.message : String(err)}\n`);
+		}
+		return reporter.record({
+			kind,
+			payload
+		});
+	};
+	const emitError = async (phase, message, extra = {}) => {
+		await emit("error", {
+			phase,
+			message,
+			...extra
+		});
+	};
 	try {
-		const mainRepo = findMainWorktree();
-		activateAgentEnv(managed.credentials.agentEnv, mainRepo);
-		await reporter.open({
+		if (!opts.reporterAlreadyOpened) await reporter.open({
 			taskId: task.id,
 			attemptN
 		});
 		reporterOpen = true;
-		let onTurnEvent;
-		if (opts.makeOnTurnEvent) try {
-			onTurnEvent = opts.makeOnTurnEvent(claimedTask);
+		let checkpointPath;
+		try {
+			checkpointPath = opts.checkpointPath ?? (opts.resolveCheckpointPath ? await opts.resolveCheckpointPath() : await ensureSnapshot({
+				config: opts.sandboxConfig?.snapshot,
+				onProgress: opts.onSnapshotProgress ?? ((m) => {
+					process.stderr.write(`[snapshot] ${m}\n`);
+				})
+			}));
+		} catch (err) {
+			const message = err instanceof Error ? err.message : String(err);
+			await emitError("snapshot", message);
+			return makeFailedOutput("snapshot_failed", message);
+		}
+		try {
+			workspace = prepareTaskWorkspace(task, requestedMountPath, executionPlan);
+			mountPath = workspace.mountPath;
+			cwdPath = workspace.cwdPath;
 		} catch (err) {
-			process.stderr.write(`[emit] makeOnTurnEvent threw: ${err instanceof Error ? err.message : String(err)}\n`);
-			onTurnEvent = noopTurnEventHandler;
+			const message = err instanceof Error ? err.message : String(err);
+			await emitError("worktree_setup", message);
+			return makeFailedOutput("worktree_setup_failed", message);
 		}
-		else onTurnEvent = opts.onTurnEvent ?? noopTurnEventHandler;
-		const emit = (kind, payload) => {
+		try {
+			const mainRepoForRepair = findMainWorktree();
 			try {
-				onTurnEvent(kind, summarizePayloadForLog(kind, payload));
-			} catch (err) {
-				process.stderr.write(`[emit] onTurnEvent threw for kind="${kind}": ${err instanceof Error ? err.message : String(err)}\n`);
-			}
-			return reporter.record({
-				kind,
-				payload
+				execFileSync("git", [
+					"-C",
+					mainRepoForRepair,
+					"worktree",
+					"repair",
+					"--relative-paths"
+				], { stdio: "pipe" });
+			} catch {}
+		} catch (err) {
+			const message = err instanceof Error ? err.message : String(err);
+			await emitError("worktree_setup", message);
+			return makeFailedOutput("worktree_setup_failed", message);
+		}
+		try {
+			managed = await resumeVm({
+				checkpointPath,
+				agentName: opts.agentName,
+				mountPath,
+				extraAllowedHosts: opts.extraAllowedHosts,
+				sandboxConfig: opts.sandboxConfig
 			});
-		};
+		} catch (err) {
+			const message = err instanceof Error ? err.message : String(err);
+			await emitError("vm_resume", message);
+			return makeFailedOutput("vm_resume_failed", message);
+		}
+		const diaryId = task.diaryId ?? "";
+		const taskTeamId = task.teamId ?? "";
+		const mainRepo = findMainWorktree();
+		activateAgentEnv(managed.credentials.agentEnv, mainRepo);
+		const activeWorkspace = workspace;
+		if (!activeWorkspace) throw new Error("task workspace not prepared");
 		await emit("info", {
 			event: "execute_start",
 			taskType: task.taskType,
 			teamId: task.teamId,
 			provider: opts.provider,
 			model: opts.model,
-			workspaceMode: workspace.mode,
-			workspaceBranch: workspace.branch
+			workspaceMode: activeWorkspace.mode,
+			workspaceBranch: activeWorkspace.branch
 		});
 		let taskPrompt;
 		try {
@@ -15474,8 +15644,8 @@ async function executePiTask(claimedTask, reporter, opts) {
 				diaryId,
 				taskId: task.id,
 				workspace: {
-					mode: workspace.mode,
-					branch: workspace.branch
+					mode: activeWorkspace.mode,
+					branch: activeWorkspace.branch
 				},
 				extras: opts.promptExtras
 			});
@@ -15527,7 +15697,7 @@ async function executePiTask(claimedTask, reporter, opts) {
 				getTeamId: () => taskTeamId,
 				getSessionErrors: () => [],
 				clearSessionErrors: () => {},
-				getHostCwd: () => mountPath,
+				getHostCwd: () => cwdPath,
 				hostExecBaseEnv: new Set([...HOST_EXEC_DEFAULT_BASE_ENV, ...Object.keys(managed.credentials.agentEnv)]),
 				hostExecAutoApprove: opts.hostExecAutoApprove ?? opts.sandboxConfig?.hostExec?.autoApprove ?? false,
 				getTaskContext: () => ({
@@ -15555,6 +15725,7 @@ async function executePiTask(claimedTask, reporter, opts) {
 			if (taskTypeUsesSubagents(task.taskType)) {
 				subagentHandle = createSubagentTool({
 					mountPath,
+					cwdPath,
 					piAuthDir,
 					modelHandle,
 					agentName: opts.agentName,
@@ -15563,12 +15734,14 @@ async function executePiTask(claimedTask, reporter, opts) {
 					parentTaskId: task.id,
 					parentTaskType: task.taskType,
 					parentAttemptN: attemptN,
+					contractRegistry: opts.subagentContractRegistry,
 					parentCancelSignal: reporter.cancelSignal
 				});
 				parentSubagentTools.push(subagentHandle.tool);
 			}
 			session = await buildAgentSession({
 				mountPath,
+				cwdPath,
 				piAuthDir,
 				modelHandle,
 				agentName: opts.agentName,
@@ -15653,6 +15826,10 @@ async function executePiTask(claimedTask, reporter, opts) {
 					is_error: event.isError,
 					result: event.isError ? truncateForWire(event.result) : void 0
 				}));
+				if (event.isError) track(emitError("tool_call_error", describeToolErrorMessage(event.result), {
+					tool: event.toolName,
+					result: truncateForWire(event.result)
+				}));
 				if (maxBashTimeouts > 0 && event.toolName === "bash" && event.isError && isBashTimeoutResult(event.result)) {
 					bashTimeoutCount += 1;
 					if (bashTimeoutCount >= maxBashTimeouts) triggerCapAbort("max_bash_timeouts_exceeded", `Aborted after ${bashTimeoutCount} bash timeouts in this attempt (cap ${maxBashTimeouts}).`);
@@ -15808,7 +15985,7 @@ async function executePiTask(claimedTask, reporter, opts) {
 			}
 		}
 		if (managed) await managed.vm.close();
-		try {
+		if (workspace) try {
 			workspace.cleanup();
 		} catch (err) {
 			const detail = err instanceof Error ? err.message : String(err);
@@ -15919,6 +16096,23 @@ function truncateForWire(value) {
 		};
 	}
 }
+function describeToolErrorMessage(result) {
+	if (typeof result === "string" && result.trim().length > 0) return result.trim();
+	if (result && typeof result === "object") {
+		const content = result.content;
+		if (Array.isArray(content)) {
+			for (const item of content) if (item && typeof item === "object" && typeof item.text === "string") {
+				const text = item.text.trim();
+				if (text.length > 0) return text;
+			}
+		}
+	}
+	try {
+		return JSON.stringify(truncateForWire(result));
+	} catch {
+		return "Tool call failed";
+	}
+}
 //#endregion
 //#region src/index.ts
 /**

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@themoltnet/pi-extension",
-  "version": "0.17.0",
+  "version": "0.18.1",
   "type": "module",
   "description": "MoltNet pi extension — sandboxed tool execution in Gondolin VMs with MoltNet identity and persistent memory",
   "license": "MIT",
@@ -31,8 +31,8 @@
     "@earendil-works/gondolin": "^0.9.1",
     "@opentelemetry/api": "^1.9.0",
     "@sinclair/typebox": "^0.34.0",
-    "@themoltnet/sdk": "0.102.0",
-    "@themoltnet/agent-runtime": "0.15.1"
+    "@themoltnet/agent-runtime": "0.16.0",
+    "@themoltnet/sdk": "0.102.0"
   },
   "peerDependencies": {
     "@earendil-works/pi-coding-agent": ">=0.74.0",