npm - @themoltnet/pi-extension - Versions diffs - 0.10.0 → 0.11.0 - Mend

@themoltnet/pi-extension 0.10.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/index.js CHANGED Viewed

@@ -7,11 +7,10 @@ import { createHash } from "node:crypto";
 import crypto, { createHash as createHash$1 } from "crypto";
 import { readFile } from "node:fs/promises";
 import { homedir } from "node:os";
-import { Type, complete, getModel } from "@mariozechner/pi-ai";
+import { Type, getModel } from "@mariozechner/pi-ai";
 import { RealFSProvider, ShadowProvider, VM, VmCheckpoint, createHttpHooks, createShadowPathPredicate, ensureImageSelector, loadGuestAssets } from "@earendil-works/gondolin";
 import { parseEnv } from "node:util";
-import { fileURLToPath } from "node:url";
-import { SpanStatusCode, context, trace } from "@opentelemetry/api";
+import { SpanStatusCode, context, metrics, trace } from "@opentelemetry/api";
 import { FormatRegistry, Type as Type$1 } from "@sinclair/typebox";
 import { Value } from "@sinclair/typebox/value";
 //#region \0rolldown/runtime.js
@@ -3848,7 +3847,7 @@ var cidSymbol = Symbol.for("@ipld/js-cid/CID");
 * naturally prevents field delimiter collision.
 */
 /** SHA-256 multicodec code per multihash table */
-var SHA2_256_CODE$1 = 18;
+var SHA2_256_CODE = 18;
 /**
 * Build the canonical JSON input for content hashing.
 *
@@ -3880,7 +3879,7 @@ function computeCanonicalHash(entryType, title, content, tags) {
 * Example output: "bafkreig..."
 */
 function computeContentCid(entryType, title, content, tags) {
-	const digest = create(SHA2_256_CODE$1, computeCanonicalHash(entryType, title, content, tags));
+	const digest = create(SHA2_256_CODE, computeCanonicalHash(entryType, title, content, tags));
 	return CID.createV1(85, digest).toString(base32);
 }
 var { p: P, n: N, Gx, Gy, a: _a, d: _d } = {
@@ -7135,159 +7134,6 @@ var registerSandboxCommand = (pi, state) => {
 	});
 };
 //#endregion
-//#region src/moltnet/judge/assets.ts
-/** Default fidelity rubric — kept verbatim from the Go judge. */
-var DEFAULT_RUBRIC = `Evaluate the rendered content against the source entries on three axes:
-COVERAGE (0.0-1.0):
-- Identify each distinct topic/fact in the source entries
-- Check if each is represented in the rendered content
-- Score = (represented topics) / (total source topics)
-- A topic can be restructured or summarized but must be present
-GROUNDING (0.0-1.0):
-- Identify each distinct claim/fact in the rendered content
-- Check if each is traceable to a specific source entry
-- Score = (grounded claims) / (total rendered claims)
-- Restructured content is fine if the underlying fact comes from a source
-FAITHFULNESS (0.0-1.0):
-- For content that IS represented, check semantic accuracy
-- Is the meaning preserved? Any distortions, inversions, or misquotes?
-- Score = (accurate representations) / (total representations)
-- Summarization is fine; misrepresentation is not
-`;
-/** Judge system prompt — kept verbatim from the Go judge signature. */
-var JUDGE_SYSTEM_PROMPT = `You are a fidelity judge for rendered context packs. Your job is to evaluate
-whether a rendered markdown document faithfully represents its source entries.
-Score each axis independently and precisely. Be critical — the purpose is to
-catch content drift, hallucination, and cherry-picking.
-You will be given three inputs:
-1. \`source_entries\` — the original source entries from the context pack, in
-   markdown format.
-2. \`rendered_content\` — the agent-rendered markdown derived from the source
-   entries.
-3. \`rubric\` — the fidelity scoring rubric with criteria definitions.
-Return a JSON object matching the requested schema with these fields:
-- \`coverage\` (number, 0.0–1.0): fraction of source entries represented in
-  rendered content. 1.0 means all source entries are covered.
-- \`grounding\` (number, 0.0–1.0): fraction of rendered content traceable to
-  source entries. 1.0 means everything comes from sources.
-- \`faithfulness\` (number, 0.0–1.0): semantic accuracy of represented content.
-  1.0 means source content is accurately represented.
-- \`reasoning\` (string): detailed step-by-step analysis explaining each score.
-Respond with ONLY a single JSON object. No prose before or after.
-`;
-//#endregion
-//#region src/moltnet/judge/fidelity.ts
-/**
-* Pi-native port of the Go fidelity judge
-* (libs/dspy-adapters/fidelity/fidelity.go).
-*
-* Same inputs (source_entries, rendered_content, rubric), same outputs
-* (coverage, grounding, faithfulness, reasoning). Uses pi-ai `complete()`
-* instead of dspy-go; no process-global state.
-*/
-var JSON_FENCE_RE = /```(?:json)?\s*([\s\S]*?)```/i;
-function extractJson(text) {
-	const fenceMatch = text.match(JSON_FENCE_RE);
-	if (fenceMatch && fenceMatch[1]) return fenceMatch[1].trim();
-	const firstBrace = text.indexOf("{");
-	const lastBrace = text.lastIndexOf("}");
-	if (firstBrace >= 0 && lastBrace > firstBrace) return text.slice(firstBrace, lastBrace + 1);
-	return text.trim();
-}
-function clamp01(value) {
-	const n = typeof value === "number" ? value : Number(value);
-	if (!Number.isFinite(n)) return 0;
-	if (n < 0) return 0;
-	if (n > 1) return 1;
-	return n;
-}
-function coerceString(value) {
-	if (typeof value === "string") return value;
-	if (value === null || value === void 0) return "";
-	if (typeof value === "number" || typeof value === "boolean") return String(value);
-	try {
-		return JSON.stringify(value);
-	} catch {
-		return "";
-	}
-}
-function parseScores(raw) {
-	const jsonText = extractJson(raw);
-	let parsed;
-	try {
-		parsed = JSON.parse(jsonText);
-	} catch (err) {
-		throw new Error(`judge returned an invalid structured response: ${err.message}\n---raw---\n${raw}`);
-	}
-	const coverage = clamp01(parsed.coverage);
-	const grounding = clamp01(parsed.grounding);
-	const faithfulness = clamp01(parsed.faithfulness);
-	const reasoning = coerceString(parsed.reasoning);
-	return {
-		coverage,
-		grounding,
-		faithfulness,
-		composite: (coverage + grounding + faithfulness) / 3,
-		reasoning
-	};
-}
-function buildUserMessage(sourceEntries, renderedContent, rubric) {
-	return [
-		"## Rubric",
-		rubric,
-		"",
-		"## Source entries",
-		sourceEntries,
-		"",
-		"## Rendered content",
-		renderedContent,
-		"",
-		"Produce the JSON object now."
-	].join("\n");
-}
-/**
-* Run the fidelity judge via pi-ai `complete()`. Mirrors `fidelity.Run` in
-* libs/dspy-adapters/fidelity/fidelity.go.
-*/
-async function runFidelityJudge(req, options = {}) {
-	const rubric = req.rubric?.trim() ? req.rubric : DEFAULT_RUBRIC;
-	const userPrompt = buildUserMessage(req.sourceEntries, req.renderedContent, rubric);
-	const message = await complete(req.model, {
-		systemPrompt: JUDGE_SYSTEM_PROMPT,
-		messages: [{
-			role: "user",
-			content: userPrompt,
-			timestamp: Date.now()
-		}]
-	}, options.signal ? { signal: options.signal } : void 0);
-	if (message.stopReason === "error" || message.stopReason === "aborted") throw new Error(`judge failed: ${message.errorMessage ?? message.stopReason}`);
-	const textContent = message.content.filter((c) => c.type === "text" && typeof c.text === "string").map((c) => c.text).join("\n").trim();
-	if (!textContent) throw new Error("judge returned empty response");
-	return parseScores(textContent);
-}
-/**
-* Build a stable markdown blob of source entries for the judge prompt.
-* Mirrors `buildSourceEntriesFromPack` / `buildSourceEntriesMarkdown` in the
-* Go CLI so that local and proctored modes produce the same input shape.
-*/
-function buildSourceEntriesMarkdown(entries) {
-	const parts = [];
-	for (const entry of entries) {
-		const title = entry.title?.trim() || "Untitled";
-		parts.push(`## ${title}\n${entry.content}\n`);
-	}
-	return parts.join("\n");
-}
-//#endregion
 //#region src/moltnet/render-phase6.ts
 function slugToTitle(value) {
 	return value.split(/[:/_-]+/).filter(Boolean).map((part) => part[0]?.toUpperCase() + part.slice(1)).join(" ");
@@ -7434,6 +7280,21 @@ function ensureConnected(config) {
 	};
 }
 /**
+* Expand the `taskFilter` shorthand on the diary list/search tools into
+* the matching `task:*` provenance tags emitted by `moltnet_create_entry`
+* during a task. Returning an array (possibly empty) lets callers spread
+* it into a larger `tags` AND-filter without conditionals.
+*/
+function compileTaskFilterTags(filter) {
+	if (!filter) return [];
+	const tags = [];
+	if (filter.taskId) tags.push(`task:id:${filter.taskId}`);
+	if (filter.taskType) tags.push(`task:type:${filter.taskType}`);
+	if (filter.correlationId) tags.push(`task:correlation:${filter.correlationId}`);
+	if (typeof filter.attemptN === "number") tags.push(`task:attempt:${filter.attemptN}`);
+	return tags;
+}
+/**
 * Create all MoltNet tool definitions, ready to pass to `pi.registerTool()`.
 */
 function createMoltNetTools(config) {
@@ -7596,122 +7457,6 @@ function createMoltNetTools(config) {
 			};
 		}
 	});
-	const createJudgePackTask = defineTool({
-		name: "moltnet_judge_pack_task_create",
-		label: "Create Judge Pack Task",
-		description: "Create a judge_pack task for a rendered pack. Returns a taskId that moltnet_rendered_pack_judge can claim and execute. The rubric is required — pass the structured rubric JSON from @moltnet/tasks Rubric schema.",
-		parameters: Type.Object({
-			renderedPackId: Type.String({ description: "Rendered pack ID to judge" }),
-			sourcePackId: Type.String({ description: "Source pack ID. Fetch it from the rendered pack if unknown." }),
-			rubric: Type.Any({ description: "Structured rubric object (Rubric schema from @moltnet/tasks). Must have rubricId, version, criteria[]." }),
-			diaryId: Type.Optional(Type.String({ description: "Diary ID to impose the task on. Defaults to the connected diary." }))
-		}),
-		async execute(_id, params) {
-			const { agent, diaryId: connectedDiaryId, teamId: connectedTeamId } = ensureConnected(config);
-			const task = await agent.tasks.create({
-				taskType: "judge_pack",
-				input: {
-					renderedPackId: params.renderedPackId,
-					sourcePackId: params.sourcePackId,
-					rubric: params.rubric
-				},
-				diaryId: params.diaryId ?? connectedDiaryId,
-				teamId: connectedTeamId
-			});
-			return {
-				content: [{
-					type: "text",
-					text: JSON.stringify({
-						taskId: task.id,
-						task
-					}, null, 2)
-				}],
-				details: {}
-			};
-		}
-	});
-	const judgeRenderedPack = defineTool({
-		name: "moltnet_rendered_pack_judge",
-		label: "Judge MoltNet Rendered Pack",
-		description: "Claim a judge_pack task, run the fidelity judge locally, complete the task with structured scores, and set verifiedTaskId on the rendered pack. Create the task first with moltnet_judge_pack_task_create.",
-		parameters: Type.Object({
-			taskId: Type.String({ description: "judge_pack task ID from moltnet_judge_pack_task_create" }),
-			rubricOverride: Type.Optional(Type.String({ description: "Freeform rubric string override for the LLM judge prompt. When omitted the task rubric preamble (or built-in default) is used." }))
-		}),
-		async execute(_id, params, _signal, _onUpdate, ctx) {
-			const { agent } = ensureConnected(config);
-			const model = ctx?.model;
-			if (!model) throw new Error("No active model in pi session — cannot run the fidelity judge.");
-			const claimed = await agent.tasks.claim(params.taskId);
-			const input = claimed.task.input;
-			const rendered = await agent.packs.getRendered(input.renderedPackId);
-			if (!rendered.content?.trim()) throw new Error(`rendered pack ${input.renderedPackId} has empty content`);
-			const sourcePack = await agent.packs.get(input.sourcePackId, { expand: "entries" });
-			if (!sourcePack.entries || sourcePack.entries.length === 0) throw new Error(`source pack ${input.sourcePackId} has no entries`);
-			const sourceEntriesMd = buildSourceEntriesMarkdown(sourcePack.entries.map((entry) => ({
-				title: entry.entry.title,
-				content: entry.entry.content
-			})));
-			const rubric = params.rubricOverride?.trim() || input.rubric?.preamble?.trim() || DEFAULT_RUBRIC;
-			let scores;
-			try {
-				scores = await runFidelityJudge({
-					model,
-					sourceEntries: sourceEntriesMd,
-					renderedContent: rendered.content,
-					rubric
-				});
-			} catch (err) {
-				await agent.tasks.fail(params.taskId, claimed.attempt.attemptN, { error: {
-					code: "judge_failed",
-					message: err.message ?? String(err)
-				} }).catch(() => {});
-				throw new Error(`judge failed: ${err.message ?? String(err)}`);
-			}
-			const modelId = model.provider && model.id ? `${model.provider}:${model.id}` : model.id ?? "pi:unknown";
-			const output = {
-				scores: [
-					{
-						criterionId: "coverage",
-						score: scores.coverage
-					},
-					{
-						criterionId: "grounding",
-						score: scores.grounding
-					},
-					{
-						criterionId: "faithfulness",
-						score: scores.faithfulness
-					}
-				],
-				composite: scores.composite,
-				verdict: scores.reasoning,
-				judgeModel: modelId
-			};
-			const outputCid = await computeJsonCid(output);
-			const completed = await agent.tasks.complete(params.taskId, claimed.attempt.attemptN, {
-				output,
-				outputCid,
-				usage: {
-					inputTokens: 0,
-					outputTokens: 0
-				}
-			});
-			await agent.packs.updateRendered(input.renderedPackId, { verifiedTaskId: params.taskId });
-			return {
-				content: [{
-					type: "text",
-					text: JSON.stringify({
-						renderedPackId: input.renderedPackId,
-						taskId: params.taskId,
-						scores,
-						task: completed
-					}, null, 2)
-				}],
-				details: {}
-			};
-		}
-	});
 	const diaryTags = defineTool({
 		name: "moltnet_diary_tags",
 		label: "List MoltNet Diary Tags",
@@ -7747,12 +7492,32 @@ function createMoltNetTools(config) {
 	const listEntries = defineTool({
 		name: "moltnet_list_entries",
 		label: "List MoltNet Diary Entries",
-		description: "List entries from the MoltNet diary. When `entryIds` is provided, batch-fetches those specific entries (max 50) and returns full fields including entryType, contentSignature, and contentHash for signature checks. Otherwise returns recent entries with a content preview.",
+		description: "List entries from the MoltNet diary. When `entryIds` is provided, batch-fetches those specific entries (max 50) and returns full fields including entryType, contentSignature, and contentHash for signature checks. Otherwise returns recent entries with a content preview, filtered by any combination of tags (AND), excludeTags (NONE), entryType, and the taskFilter shorthand which expands into the right `task:*` tags.",
 		parameters: Type.Object({
 			limit: Type.Optional(Type.Number({ description: "Max entries to return (default 10)" })),
-			tag: Type.Optional(Type.String({ description: "Filter by tag (optional)" })),
+			tags: Type.Optional(Type.Array(Type.String({
+				minLength: 1,
+				maxLength: 50
+			}), {
+				description: "Tags filter — entry must have ALL listed tags (AND). Max 20.",
+				maxItems: 20
+			})),
+			excludeTags: Type.Optional(Type.Array(Type.String({
+				minLength: 1,
+				maxLength: 50
+			}), {
+				description: "Tags to exclude — entry must have NONE of these. Max 20.",
+				maxItems: 20
+			})),
+			entryType: Type.Optional(Type.String({ description: "Filter by entry type (procedural, semantic, episodic, reflection, identity, soul)." })),
+			taskFilter: Type.Optional(Type.Object({
+				taskId: Type.Optional(Type.String()),
+				taskType: Type.Optional(Type.String()),
+				correlationId: Type.Optional(Type.String()),
+				attemptN: Type.Optional(Type.Number())
+			}, { description: "Shorthand: any combination compiles to the matching task:* tags (task:id:<id>, task:type:<type>, task:correlation:<id>, task:attempt:<n>) and is merged into the tags filter." })),
 			entryIds: Type.Optional(Type.Array(Type.String(), {
-				description: "Batch-fetch specific entries by UUID (max 50). Overrides `limit` and `tag` for selection.",
+				description: "Batch-fetch specific entries by UUID (max 50). Overrides every other filter.",
 				maxItems: 50
 			}))
 		}),
@@ -7766,7 +7531,11 @@ function createMoltNetTools(config) {
 			if (batchMode) query.ids = params.entryIds;
 			else {
 				query.limit = params.limit ?? 10;
-				if (params.tag) query.tag = params.tag;
+				const expandedTags = compileTaskFilterTags(params.taskFilter);
+				const allTags = [...params.tags ?? [], ...expandedTags];
+				if (allTags.length) query.tags = allTags;
+				if (params.excludeTags?.length) query.excludeTags = params.excludeTags;
+				if (params.entryType) query.entryType = params.entryType;
 			}
 			const entries = await agent.entries.list(diaryId, query);
 			return {
@@ -7822,17 +7591,46 @@ function createMoltNetTools(config) {
 	const searchEntries = defineTool({
 		name: "moltnet_search_entries",
 		label: "Search MoltNet Diary Entries",
-		description: "Search diary entries by semantic query. Uses vector similarity to find relevant entries.",
+		description: "Hybrid (semantic + lexical) search over diary entries. Optional tags / excludeTags / entryTypes filters AND with the query; the taskFilter shorthand expands into task:* provenance tags so `taskFilter: { taskType: \"fulfill_brief\" }` returns only entries from fulfill_brief attempts. Filters apply server-side before ranking.",
 		parameters: Type.Object({
 			query: Type.String({ description: "Natural language search query" }),
-			limit: Type.Optional(Type.Number({ description: "Max results (default 5)" }))
+			limit: Type.Optional(Type.Number({ description: "Max results (default 5)" })),
+			tags: Type.Optional(Type.Array(Type.String({
+				minLength: 1,
+				maxLength: 50
+			}), {
+				description: "Entry must have ALL listed tags (AND). Max 20.",
+				maxItems: 20
+			})),
+			excludeTags: Type.Optional(Type.Array(Type.String({
+				minLength: 1,
+				maxLength: 50
+			}), {
+				description: "Entry must have NONE of these tags. Max 20.",
+				maxItems: 20
+			})),
+			entryTypes: Type.Optional(Type.Array(Type.String(), {
+				description: "Restrict to these entry types (procedural, semantic, episodic, reflection, identity, soul). Max 6.",
+				maxItems: 6
+			})),
+			taskFilter: Type.Optional(Type.Object({
+				taskId: Type.Optional(Type.String()),
+				taskType: Type.Optional(Type.String()),
+				correlationId: Type.Optional(Type.String()),
+				attemptN: Type.Optional(Type.Number())
+			}, { description: "Shorthand: any combination compiles to the matching task:* tags and is merged into the tags filter." }))
 		}),
 		async execute(_id, params) {
 			const { agent, diaryId } = ensureConnected(config);
+			const expandedTags = compileTaskFilterTags(params.taskFilter);
+			const allTags = [...params.tags ?? [], ...expandedTags];
 			const results = await agent.entries.search({
 				diaryId,
 				query: params.query,
-				limit: params.limit ?? 5
+				limit: params.limit ?? 5,
+				...allTags.length ? { tags: allTags } : {},
+				...params.excludeTags?.length ? { excludeTags: params.excludeTags } : {},
+				...params.entryTypes?.length ? { entryTypes: params.entryTypes } : {}
 			});
 			return {
 				content: [{
@@ -7852,7 +7650,7 @@ function createMoltNetTools(config) {
 	const createEntry = defineTool({
 		name: "moltnet_create_entry",
 		label: "Create MoltNet Diary Entry",
-		description: "Create a new diary entry to record decisions, findings, incidents, or reflections. During an active task, the entry is forced into the task diary and tagged with task:<id>, task_type:<type>, task_attempt:<n>, and correlation:<id> when set; an explicit diaryId mismatching the task diary is rejected.",
+		description: "Create a new diary entry to record decisions, findings, incidents, or reflections. During an active task, the entry is forced into the task diary and tagged with the task:* provenance namespace (task:id:<id>, task:type:<type>, task:attempt:<n>, plus task:correlation:<id> when set); an explicit diaryId mismatching the task diary is rejected.",
 		parameters: Type.Object({
 			title: Type.String({ description: "Entry title (concise, descriptive)" }),
 			content: Type.String({ description: "Entry content (markdown)" }),
@@ -7869,10 +7667,10 @@ function createMoltNetTools(config) {
 				if (params.diaryId && params.diaryId !== taskCtx.diaryId) throw new Error(`entries_create: diaryId "${params.diaryId}" does not match the active task diary "${taskCtx.diaryId}". Entries created during a task must land in the task diary.`);
 				targetDiaryId = taskCtx.diaryId;
 				autoTags = [
-					`task:${taskCtx.taskId}`,
-					`task_type:${taskCtx.taskType}`,
-					`task_attempt:${taskCtx.attemptN}`,
-					...taskCtx.correlationId ? [`correlation:${taskCtx.correlationId}`] : []
+					`task:id:${taskCtx.taskId}`,
+					`task:type:${taskCtx.taskType}`,
+					`task:attempt:${taskCtx.attemptN}`,
+					...taskCtx.correlationId ? [`task:correlation:${taskCtx.correlationId}`] : []
 				];
 			} else targetDiaryId = params.diaryId ?? envDiaryId;
 			const userTags = params.tags ?? [];
@@ -7973,8 +7771,6 @@ function createMoltNetTools(config) {
 		renderPack,
 		listRenderedPacks,
 		getRenderedPack,
-		createJudgePackTask,
-		judgeRenderedPack,
 		diaryTags,
 		listEntries,
 		getEntry,
@@ -8591,135 +8387,6 @@ function ensureRelativeWorktreePaths(gitconfig) {
 	return `${gitconfig}${gitconfig.endsWith("\n") ? "" : "\n"}[worktree]\n\tuseRelativePaths = true\n`;
 }
 //#endregion
-//#region src/moltnet/judge-recipe-cid.ts
-var require$1 = createRequire(import.meta.url);
-var SELF_PACKAGE_NAME = "@themoltnet/pi-extension";
-var PI_PACKAGE_NAME = "@mariozechner/pi-coding-agent";
-var SDK_PACKAGE_NAME = "@themoltnet/sdk";
-var CID_VERSION = 1;
-var RAW_CODEC = 85;
-var SHA2_256_CODE = 18;
-var BASE32_ALPHABET = "abcdefghijklmnopqrstuvwxyz234567";
-function findSelfPackageDir() {
-	const start = path.dirname(fileURLToPath(import.meta.url));
-	let dir = start;
-	while (true) {
-		const candidate = path.join(dir, "package.json");
-		if (existsSync(candidate)) {
-			if (JSON.parse(readFileSync(candidate, "utf8")).name === SELF_PACKAGE_NAME) return dir;
-		}
-		const parent = path.dirname(dir);
-		if (parent === dir) return start;
-		dir = parent;
-	}
-}
-var PACKAGE_DIR = findSelfPackageDir();
-function sha256Hex(value) {
-	return createHash("sha256").update(value, "utf8").digest("hex");
-}
-function encodeVarint(value) {
-	const bytes = [];
-	let current = value >>> 0;
-	while (current >= 128) {
-		bytes.push(current & 127 | 128);
-		current >>>= 7;
-	}
-	bytes.push(current);
-	return bytes;
-}
-function base32Lower(bytes) {
-	let bits = 0;
-	let value = 0;
-	let output = "";
-	for (const byte of bytes) {
-		value = value << 8 | byte;
-		bits += 8;
-		while (bits >= 5) {
-			output += BASE32_ALPHABET[value >>> bits - 5 & 31];
-			bits -= 5;
-		}
-	}
-	if (bits > 0) output += BASE32_ALPHABET[value << 5 - bits & 31];
-	return `b${output}`;
-}
-function stableStringify(value) {
-	if (value === null || typeof value !== "object") return JSON.stringify(value);
-	if (Array.isArray(value)) return `[${value.map((item) => stableStringify(item)).join(",")}]`;
-	return `{${Object.entries(value).sort(([left], [right]) => left.localeCompare(right)).map(([key, item]) => `${JSON.stringify(key)}:${stableStringify(item)}`).join(",")}}`;
-}
-function readPackageVersion(pkgPath, expectedName) {
-	if (!existsSync(pkgPath)) return null;
-	const parsed = JSON.parse(readFileSync(pkgPath, "utf8"));
-	if (expectedName && parsed.name !== expectedName) return null;
-	return typeof parsed.version === "string" ? parsed.version : null;
-}
-function resolveInstalledPackageVersion(packageName) {
-	const candidates = [];
-	try {
-		candidates.push(path.dirname(require$1.resolve(packageName)));
-	} catch {}
-	let dir = PACKAGE_DIR;
-	while (true) {
-		candidates.push(path.join(dir, "node_modules", packageName));
-		const parent = path.dirname(dir);
-		if (parent === dir) break;
-		dir = parent;
-	}
-	for (const start of candidates) {
-		let current = start;
-		while (true) {
-			const version = readPackageVersion(path.join(current, "package.json"), packageName);
-			if (version) return version;
-			const parent = path.dirname(current);
-			if (parent === current) break;
-			current = parent;
-		}
-	}
-	return null;
-}
-function resolvePiJudgeRecipeVersions() {
-	return {
-		pi: resolveInstalledPackageVersion(PI_PACKAGE_NAME),
-		piExtension: readPackageVersion(path.join(PACKAGE_DIR, "package.json"), SELF_PACKAGE_NAME),
-		sdk: resolveInstalledPackageVersion(SDK_PACKAGE_NAME)
-	};
-}
-function buildPiJudgeRecipeManifest(inputs) {
-	return {
-		kind: "pi-judge-recipe/v1",
-		versions: {
-			...resolvePiJudgeRecipeVersions(),
-			...inputs.overrides
-		},
-		assets: {
-			promptAsset: inputs.promptAsset ?? null,
-			rubricAsset: inputs.rubricAsset ?? null,
-			skillSourcePath: inputs.skillSourcePath ?? null
-		},
-		hashes: {
-			judgePromptSha256: sha256Hex(inputs.judgePrompt),
-			rubricSha256: sha256Hex(inputs.rubric),
-			skillFragmentSha256: inputs.skillFragment ? sha256Hex(inputs.skillFragment) : null,
-			implementationSha256: inputs.implementationSource ? sha256Hex(inputs.implementationSource) : null
-		}
-	};
-}
-function computePiJudgeRecipeCid(inputs) {
-	const manifest = buildPiJudgeRecipeManifest(inputs);
-	const manifestBytes = Buffer.from(stableStringify(manifest), "utf8");
-	const digestBytes = createHash("sha256").update(manifestBytes).digest();
-	return {
-		cid: base32Lower(Uint8Array.from([
-			...encodeVarint(CID_VERSION),
-			...encodeVarint(RAW_CODEC),
-			...encodeVarint(SHA2_256_CODE),
-			...encodeVarint(digestBytes.length),
-			...digestBytes
-		])),
-		manifest
-	};
-}
-//#endregion
 //#region src/otel/index.ts
 var TRACER_NAME = "@themoltnet/pi-extension/otel";
 function stripReservedAttrs(attrs) {
@@ -9321,6 +8988,14 @@ function validateTaskOutput(taskType, output) {
 	}];
 	return schemaErrors("output", entry.outputSchema, output);
 }
+/**
+* Resolve the TypeBox output schema registered for `taskType`. Returns
+* `null` for unknown task types — callers (e.g. submit-tool factories)
+* decide how to surface that.
+*/
+function getTaskOutputSchema(taskType) {
+	return getTaskTypeEntry(taskType)?.outputSchema ?? null;
+}
 //#endregion
 //#region ../tasks/src/wire.ts
 /**
@@ -9549,6 +9224,98 @@ Type$1.Object({
 	additionalProperties: false
 });
 //#endregion
+//#region ../agent-runtime/src/output-tools.ts
+/**
+* Submit-output tool contract.
+*
+* The runtime advertises a per-task-type "submit output" tool in every
+* prompt. The tool's name and schema must be the same wherever the
+* agent encounters it: in the system prompt the model reads, in the
+* executor that registers it, in any future executor that wires it
+* into a different coding-agent SDK.
+*
+* This module is the single source of truth for the (toolName,
+* description, parametersSchema) triple. It has no executor-specific
+* dependencies — `agent-runtime` is intentionally agnostic of the
+* concrete coding-agent runtime — so anything that wants to register
+* the tool (pi-extension today, a Codex-SDK adapter tomorrow, a local
+* MCP bridge if we ever go that route) can read the contract here and
+* wire it into its own tool API.
+*
+* Conventions captured here:
+*
+*   - Tool name shape: `submit_<task_type>_output` (e.g.
+*     `submit_fulfill_brief_output`). This is the string the model
+*     sees in the prompt's "preferred path" instruction.
+*   - Parameters schema: the task type's TypeBox `*Output` schema
+*     **directly**, NOT wrapped in `{ output: <schema> }`. Tool args
+*     ARE the payload, so the model gets field-level guidance at
+*     planning time.
+*   - Description text: shared across executors so the tool's
+*     advertised purpose is identical regardless of who registers it.
+*/
+/**
+* Build the submit-output contract for a task type. Returns `null` if
+* no output schema is registered for that type — callers (executors)
+* decide whether that's a hard error, a fallback to the parser-only
+* path, or anything else.
+*/
+function getSubmitOutputContract(taskType) {
+	const schema = getTaskOutputSchema(taskType);
+	if (!schema) return null;
+	return {
+		toolName: submitOutputToolName(taskType),
+		taskType,
+		description: `Submit the structured output for this ${taskType} task. Call exactly once when done. The arguments below ARE the output payload — pass each top-level field of the task type's output schema directly. The runtime validates the args against the schema; mismatches return a tool error you can recover from in the same session. On a valid call the runtime captures the payload and ends the session — you do not need to repeat the JSON in your final assistant message.`,
+		parametersSchema: schema
+	};
+}
+/**
+* Plain-string name builder. Exposed separately so the prompt builder
+* can advertise the tool name even when the schema lookup is deferred
+* to the executor (the prompt is built before any tool registration
+* happens).
+*/
+function submitOutputToolName(taskType) {
+	return `submit_${taskType}_output`;
+}
+//#endregion
+//#region ../agent-runtime/src/prompts/final-output.ts
+function buildFinalOutputBlock(opts) {
+	const { taskType, outputSchemaName, shapeSketch, extraNotes } = opts;
+	const submitTool = submitOutputToolName(taskType);
+	const lines = [
+		"## Final output (read this carefully)",
+		"",
+		`Your VERY LAST action in this conversation MUST report the structured`,
+		`output matching \`${outputSchemaName}\`. Two ways to do it, in order of`,
+		`preference:`,
+		"",
+		`1. **Preferred — call \`${submitTool}\` exactly once** with the payload.`,
+		`   The runtime captures the validated arguments and ends the session.`,
+		`   If the tool is registered, prefer this path.`,
+		`2. **Fallback** — if the submit tool is unavailable, your very last`,
+		`   assistant message MUST be a single JSON object matching`,
+		`   \`${outputSchemaName}\`. No prose before or after. No code fences.`,
+		`   No "ok" or "done". The runtime parses the last balanced top-level`,
+		`   JSON object as the output.`,
+		"",
+		`Failing to report structured output as the very last action means the`,
+		`attempt is marked failed even if the underlying work succeeded.`,
+		"",
+		`Output shape:`,
+		"",
+		"```json",
+		shapeSketch,
+		"```"
+	];
+	if (extraNotes?.length) {
+		lines.push("");
+		for (const note of extraNotes) lines.push(note);
+	}
+	return lines.join("\n");
+}
+//#endregion
 //#region ../agent-runtime/src/prompts/assess-brief.ts
 /**
 * Build the system prompt for an `assess_brief` judge attempt.
@@ -9606,6 +9373,20 @@ function buildAssessBriefPrompt(input, ctx) {
 		"   - `summary` set → use as orientation, not as ground truth.",
 		"Adapt your investigation to whatever the output actually contains. Score conservatively when the producer's output is opaque or thin.",
 		"",
+		"### Querying the producer's diary entries",
+		"",
+		`Beyond the explicit \`diaryEntryIds[]\` from step 3, the producer's`,
+		"attempts auto-tag every entry with the `task:*` provenance namespace.",
+		"You can pull the full set without enumerating ids by passing the",
+		"`taskFilter` shorthand to `moltnet_list_entries` or",
+		"`moltnet_search_entries`:",
+		"",
+		`- All entries from the producer task: \`taskFilter: { taskId: "${input.targetTaskId}" }\`.`,
+		"- Just the accepted attempt: add `attemptN: <acceptedAttemptN>`.",
+		"- The producer plus any prior chain (when a correlationId was set):",
+		"  read it from the task you fetched in step 1 and pass",
+		"  `taskFilter: { correlationId: \"<id>\" }`.",
+		"",
 		preambleSection,
 		"## Criteria",
 		"",
@@ -9617,12 +9398,23 @@ function buildAssessBriefPrompt(input, ctx) {
 		"- `boolean`: score exactly 0 or 1. `rationale` optional.",
 		"- `deterministic_signature_check`: run `moltnet entry verify` on every diary entry returned by step 3 above AND `git verify-commit` on every commit. Score 1 iff ALL signatures are valid; otherwise 0. Populate `evidence.commitsVerified`, `evidence.commitsTotal`, `evidence.signatureFailures`.",
 		"",
-		"### Final output",
+		"Write a signed diary entry (tags: \"judgment\", \"assess_brief\") capturing the rationale before reporting structured output.",
 		"",
-		"Emit a JSON object matching `AssessBriefOutput`:",
-		"  { \"scores\": [{criterionId, score, rationale?, evidence?}], \"composite\", \"verdict\", \"judgeModel\"? }",
-		"`composite` = Σ(weight_i × score_i) recomputed. The runtime will reject a mismatch.",
-		"Write a signed diary entry (tags: \"judgment\", \"assess_brief\") capturing the rationale before emitting the JSON."
+		buildFinalOutputBlock({
+			taskType: "assess_brief",
+			outputSchemaName: "AssessBriefOutput",
+			shapeSketch: [
+				"{",
+				"  \"scores\": [",
+				"    { \"criterionId\": \"...\", \"score\": 0.0, \"rationale\": \"...\", \"evidence\": {} }",
+				"  ],",
+				"  \"composite\": <sum>,",
+				"  \"verdict\": \"<1-3 sentence overall>\",",
+				"  \"judgeModel\": \"<provider:model>\"",
+				"}"
+			].join("\n"),
+			extraNotes: ["`composite` = Σ(weight_i × score_i) recomputed. The runtime rejects a mismatch."]
+		})
 	].filter(Boolean).join("\n");
 }
 //#endregion
@@ -9699,9 +9491,16 @@ function buildCuratePackPrompt(input, ctx) {
 		"## Tools available (not a recipe — use what the situation calls for)",
 		"",
 		"- `moltnet_diary_tags` — tag inventory with counts. Cheap reconnaissance",
-		"  when the prompt implies a scope but not a tag.",
+		"  when the prompt implies a scope but not a tag. Pass",
+		"  `prefix: \"task:\"` to enumerate task-provenance tags only",
+		"  (`task:type:*`, `task:correlation:*`, etc.).",
 		"- `moltnet_search_entries` — hybrid semantic + lexical search.",
-		"- `moltnet_list_entries` — tag-filtered listing.",
+		"  Filters AND with the query: pass `tags`, `excludeTags`,",
+		"  `entryTypes`, or the `taskFilter` shorthand to narrow before",
+		"  ranking. Example: `taskFilter: { taskType: \"fulfill_brief\" }`",
+		"  returns only entries from fulfill_brief attempts.",
+		"- `moltnet_list_entries` — multi-tag (AND) listing with optional",
+		"  `excludeTags`, `entryType`, and the same `taskFilter` shorthand.",
 		"- `moltnet_get_entry` — full entry read, for disambiguation.",
 		"- `moltnet_pack_create` — terminal call that persists the pack.",
 		"",
@@ -9747,31 +9546,30 @@ function buildCuratePackPrompt(input, ctx) {
 		"",
 		"## Hard constraints",
 		"",
-		"- Do NOT call `moltnet_pack_render` or `moltnet_rendered_pack_judge` —",
-		"  those belong to the next sessions.",
+		"- Do NOT call `moltnet_pack_render` — that belongs to the next session.",
 		"- Do NOT write diary entries unless curation surfaces a genuine",
 		"  incident worth recording. The curation reasoning lives in the task",
 		"  output, not in the diary.",
 		"- Respect hard include/exclude filters literally.",
 		"",
-		"## Final output",
-		"",
-		"Write to stdout a JSON object matching `CuratePackOutput`:",
-		"```",
-		"{",
-		"  \"packId\": \"<uuid>\",",
-		"  \"packCid\": \"<cid>\",",
-		"  \"entries\": [",
-		"    { \"entryId\": \"<uuid>\", \"rank\": 1, \"rationale\": \"<why>\" }",
-		"  ],",
-		"  \"recipeParams\": { \"recipe\": \"...\", \"prompt\": \"...\", ... },",
-		"  \"checkpoints\": [",
-		"    { \"phase\": \"recon\", \"candidateIds\": [...], \"droppedIds\": [...], \"notes\": \"...\" }",
-		"  ],",
-		"  \"summary\": \"<2-4 sentences: what you looked for, how you narrowed, what defines the final set>\"",
-		"}",
-		"```",
-		"The runtime parses this. Failing to emit it is a task failure."
+		buildFinalOutputBlock({
+			taskType: "curate_pack",
+			outputSchemaName: "CuratePackOutput",
+			shapeSketch: [
+				"{",
+				"  \"packId\": \"<uuid>\",",
+				"  \"packCid\": \"<cid>\",",
+				"  \"entries\": [",
+				"    { \"entryId\": \"<uuid>\", \"rank\": 1, \"rationale\": \"<why>\" }",
+				"  ],",
+				"  \"recipeParams\": { \"recipe\": \"...\", \"prompt\": \"...\", ... },",
+				"  \"checkpoints\": [",
+				"    { \"phase\": \"recon\", \"candidateIds\": [...], \"droppedIds\": [...], \"notes\": \"...\" }",
+				"  ],",
+				"  \"summary\": \"<2-4 sentences: what you looked for, how you narrowed, what defines the final set>\"",
+				"}"
+			].join("\n")
+		})
 	].filter((l) => l !== null).join("\n");
 }
 //#endregion
@@ -9829,11 +9627,19 @@ function buildFulfillBriefPrompt(input, ctx) {
 		"   `MoltNet-Diary: <id>` (per the runtime instructor).",
 		"6. Push the branch and open a PR.",
 		"",
-		"### Final output",
-		"",
-		"When done, write to stdout a JSON object with shape matching `FulfillBriefOutput`:",
-		"  { \"branch\", \"commits\": [{sha, message, diaryEntryId}], \"pullRequestUrl\", \"diaryEntryIds\", \"summary\" }",
-		"The runtime parses this as the structured task output. Failing to emit it is a failure."
+		buildFinalOutputBlock({
+			taskType: "fulfill_brief",
+			outputSchemaName: "FulfillBriefOutput",
+			shapeSketch: [
+				"{",
+				"  \"branch\": \"<branch-name>\",",
+				"  \"commits\": [{ \"sha\": \"...\", \"message\": \"...\", \"diaryEntryId\": \"...\" }],",
+				"  \"pullRequestUrl\": \"<url-or-null>\",",
+				"  \"diaryEntryIds\": [\"...\"],",
+				"  \"summary\": \"<1-3 sentence recap>\"",
+				"}"
+			].join("\n")
+		})
 	].filter(Boolean).join("\n");
 }
 //#endregion
@@ -9915,23 +9721,29 @@ function buildJudgePackPrompt(input, ctx) {
 		"  may leak guidance that biases judgment.",
 		"- Keep the session focused on scoring; no speculative exploration.",
 		"",
-		"## Final output",
-		"",
-		"Write to stdout a JSON object matching `JudgePackOutput`:",
-		"```",
-		"{",
-		"  \"scores\": [{\"criterionId\": \"...\", \"score\": 0.0, \"rationale\": \"...\", \"evidence\": {...}}],",
-		"  \"composite\": <sum-of-weighted-scores>,",
-		"  \"verdict\": \"<1-3 sentence overall>\",",
-		"  \"judgeModel\": \"<provider:model>\",",
-		"  \"rendererBinaryCid\": \"<cid-string-only-if-available>\"",
-		"}",
-		"```",
-		"Omit `rendererBinaryCid` entirely when no binary CID is exposed by",
-		"`moltnet_rendered_pack_get`. Do NOT emit `null` — the field is optional",
-		"and absence is the correct representation when unavailable.",
 		`Write a signed diary entry (tags: \`judgment\`, \`judge_pack\`, \`rubric:${rubric.rubricId}\`) capturing the rationale before`,
-		"emitting the JSON."
+		"reporting structured output.",
+		"",
+		buildFinalOutputBlock({
+			taskType: "judge_pack",
+			outputSchemaName: "JudgePackOutput",
+			shapeSketch: [
+				"{",
+				"  \"scores\": [",
+				"    { \"criterionId\": \"...\", \"score\": 0.0, \"rationale\": \"...\", \"evidence\": {} }",
+				"  ],",
+				"  \"composite\": <sum-of-weighted-scores>,",
+				"  \"verdict\": \"<1-3 sentence overall>\",",
+				"  \"judgeModel\": \"<provider:model>\",",
+				"  \"rendererBinaryCid\": \"<cid-string-only-if-available>\"",
+				"}"
+			].join("\n"),
+			extraNotes: [
+				"Omit `rendererBinaryCid` entirely when no binary CID is exposed by",
+				"`moltnet_rendered_pack_get`. Do NOT emit `null` — the field is",
+				"optional and absence is the correct representation when unavailable."
+			]
+		})
 	].filter((l) => l !== null).join("\n");
 }
 //#endregion
@@ -9972,24 +9784,23 @@ function buildRenderPackPrompt(input, ctx) {
 		"## Constraints",
 		"",
 		"- Do NOT modify the source pack or its entries.",
-		"- Do NOT call `moltnet_rendered_pack_judge`.",
 		"- Do NOT write diary entries unless a genuine incident occurs",
 		"  (rendering failure, invariant violation).",
 		"",
-		"## Final output",
-		"",
-		"Write to stdout a JSON object matching `RenderPackOutput`:",
-		"```",
-		"{",
-		"  \"renderedPackId\": \"<uuid-or-null>\",",
-		"  \"renderedCid\": \"<cid>\",",
-		"  \"renderMethod\": \"<label>\",",
-		"  \"byteSize\": <int>,",
-		"  \"entriesRendered\": <int>,",
-		"  \"summary\": \"<1-3 sentence recap>\"",
-		"}",
-		"```",
-		"Failing to emit it is a task failure."
+		buildFinalOutputBlock({
+			taskType: "render_pack",
+			outputSchemaName: "RenderPackOutput",
+			shapeSketch: [
+				"{",
+				"  \"renderedPackId\": \"<uuid-or-null>\",",
+				"  \"renderedCid\": \"<cid>\",",
+				"  \"renderMethod\": \"<label>\",",
+				"  \"byteSize\": <int>,",
+				"  \"entriesRendered\": <int>,",
+				"  \"summary\": \"<1-3 sentence recap>\"",
+				"}"
+			].join("\n")
+		})
 	].join("\n");
 }
 //#endregion
@@ -12020,7 +11831,7 @@ var require_transport = /* @__PURE__ */ __commonJSMin(((exports, module) => {
 	var { existsSync: existsSync$1 } = __require("node:fs");
 	var getCallers = require_caller();
 	var { join: join$1, isAbsolute, sep } = __require("node:path");
-	var { fileURLToPath: fileURLToPath$1 } = __require("node:url");
+	var { fileURLToPath } = __require("node:url");
 	var sleep = require_atomic_sleep();
 	var onExit = require_on_exit_leak_free();
 	var ThreadStream = require_thread_stream();
@@ -12076,7 +11887,7 @@ var require_transport = /* @__PURE__ */ __commonJSMin(((exports, module) => {
 		if (!unquoted) return false;
 		let path = unquoted;
 		if (path.startsWith("file://")) try {
-			path = fileURLToPath$1(path);
+			path = fileURLToPath(path);
 		} catch {
 			return false;
 		}
@@ -13567,9 +13378,13 @@ function buildRuntimeInstructor(ctx) {
 		`- During this task, every diary entry MUST land in \`${ctx.diaryId}\``,
 		"  (the task diary). The MCP `moltnet_create_entry` tool enforces this",
 		"  and rejects mismatched explicit `diaryId` parameters.",
-		`- Provenance tags \`task:${ctx.taskId}\`, \`task_type:${ctx.taskType}\`,`,
-		`  and \`task_attempt:${ctx.attemptN}\`${ctx.correlationId ? `, plus \`correlation:${ctx.correlationId}\`` : ""} are auto-injected on every entry.`,
-		"  You may add additional tags; you cannot remove the auto-tags.",
+		`- Provenance tags \`task:id:${ctx.taskId}\`, \`task:type:${ctx.taskType}\`,`,
+		`  and \`task:attempt:${ctx.attemptN}\`${ctx.correlationId ? `, plus \`task:correlation:${ctx.correlationId}\`` : ""} are auto-injected on every entry.`,
+		"  These share the `task:` namespace so `moltnet_diary_tags` with",
+		"  `prefix: \"task:\"` lists every task-scoped tag, and the",
+		"  `taskFilter` shorthand on `moltnet_list_entries` /",
+		"  `moltnet_search_entries` expands into them. You may add additional",
+		"  tags but you cannot remove the auto-injected ones.",
 		"",
 		"## Accountable commits",
 		"",
@@ -13598,42 +13413,78 @@ function buildRuntimeInstructor(ctx) {
 }
 //#endregion
 //#region src/runtime/task-output.ts
-async function parseStructuredTaskOutput(assistantText, taskType) {
+var METER_NAME = "@themoltnet/pi-extension/task-output";
+var parseResultCounter = null;
+function getParseResultCounter() {
+	if (parseResultCounter) return parseResultCounter;
+	parseResultCounter = metrics.getMeter(METER_NAME).createCounter("agent_runtime.task_output.parse_result", {
+		description: "Outcome of structured task-output capture, labelled by task_type, model, and code (success | output_missing | output_validation_failed | unknown_task_type | output_cid_compute_failed | captured_via_tool).",
+		unit: "1"
+	});
+	return parseResultCounter;
+}
+/**
+* Record one parse-result observation. Exposed so the executor can also
+* record the `captured_via_tool` outcome from the submit-tool path
+* without bouncing through the parser. Labels: `task_type`, `model`, `code`.
+*/
+function recordTaskOutputParseResult(args) {
+	getParseResultCounter().add(1, {
+		task_type: args.taskType,
+		model: args.model ?? "unknown",
+		code: args.code
+	});
+}
+async function parseStructuredTaskOutput(assistantText, taskType, opts = {}) {
+	const record = (code) => recordTaskOutputParseResult({
+		taskType,
+		model: opts.model,
+		code
+	});
 	const extracted = extractJsonObject(assistantText);
-	if (!extracted) return {
-		output: null,
-		outputCid: null,
-		error: {
-			code: "output_missing",
-			message: "Agent did not emit a parseable JSON object as its final message."
-		}
-	};
+	if (!extracted) {
+		record("output_missing");
+		return {
+			output: null,
+			outputCid: null,
+			error: {
+				code: "output_missing",
+				message: "Agent did not emit a parseable JSON object as its final message."
+			}
+		};
+	}
 	const errors = validateTaskOutput(taskType, extracted);
 	if (errors.length > 0) {
 		const details = errors.slice(0, 3).map((error) => `${error.field}: ${error.message}`);
 		const [firstError] = errors;
+		const code = firstError?.field === "taskType" ? "unknown_task_type" : "output_validation_failed";
+		record(code);
 		return {
 			output: null,
 			outputCid: null,
 			error: {
-				code: firstError?.field === "taskType" ? "unknown_task_type" : "output_validation_failed",
+				code,
 				message: `Output failed schema validation: ${details.join("; ")}`
 			}
 		};
 	}
 	try {
+		const outputCid = await computeJsonCid(extracted);
+		record("success");
 		return {
 			output: extracted,
-			outputCid: await computeJsonCid(extracted),
+			outputCid,
 			error: null
 		};
 	} catch (error) {
+		const message = error instanceof Error ? error.message : String(error);
+		record("output_cid_compute_failed");
 		return {
 			output: null,
 			outputCid: null,
 			error: {
 				code: "output_cid_compute_failed",
-				message: `Validated output could not be canonicalized: ${error instanceof Error ? error.message : String(error)}`
+				message: `Validated output could not be canonicalized: ${message}`
 			}
 		};
 	}
@@ -13689,6 +13540,99 @@ function extractJsonObject(text) {
 	return null;
 }
 //#endregion
+//#region src/runtime/submit-output-tool.ts
+/**
+* Sentinel thrown when the requested task type has no registered output
+* schema. The executor recognises this specific error class and falls
+* back to the parser path; any other error from `createSubmitOutputTool`
+* is unexpected and must propagate.
+*/
+var UnknownTaskTypeForSubmitToolError = class extends Error {
+	constructor(taskType) {
+		super(`createSubmitOutputTool: no output schema registered for task type "${taskType}"`);
+		this.taskType = taskType;
+		this.name = "UnknownTaskTypeForSubmitToolError";
+	}
+};
+function createSubmitOutputTool(taskType, opts = {}) {
+	const contract = getSubmitOutputContract(taskType);
+	if (!contract) throw new UnknownTaskTypeForSubmitToolError(taskType);
+	const schema = contract.parametersSchema;
+	let captured = null;
+	let callCount = 0;
+	return {
+		tool: defineTool({
+			name: contract.toolName,
+			label: `Submit ${taskType} output`,
+			description: contract.description,
+			parameters: schema,
+			async execute(_id, params) {
+				const errors = [...Value.Errors(schema, params)];
+				if (errors.length > 0) {
+					const detailMsg = errors.slice(0, 3).map((err) => `${err.path || "<root>"}: ${err.message}`).join("; ");
+					const details = {
+						captured: false,
+						callCount,
+						error: "output_validation_failed"
+					};
+					recordTaskOutputParseResult({
+						taskType,
+						model: opts.model,
+						code: "output_validation_failed"
+					});
+					return {
+						content: [{
+							type: "text",
+							text: `Output failed schema validation: ${detailMsg}. Re-call this tool with a corrected output.`
+						}],
+						details,
+						isError: true
+					};
+				}
+				captured = params;
+				callCount += 1;
+				return {
+					content: [{
+						type: "text",
+						text: "Output captured. The runtime now has the validated payload; no further action is needed for output reporting."
+					}],
+					details: {
+						captured: true,
+						callCount,
+						error: null
+					},
+					terminate: true
+				};
+			}
+		}),
+		getCaptured: () => captured,
+		getCallCount: () => callCount
+	};
+}
+/**
+* Build the submit-tool wiring for one task attempt. Returns a handle
+* (or `null` if no submit-tool should be registered) plus the
+* `customTools`-shaped array ready to spread into the session config.
+*
+* The catch is **narrowed** to `UnknownTaskTypeForSubmitToolError` —
+* exporters/dependency-API drift would otherwise be silently degraded
+* to parser-only behaviour, which reintroduces the failure mode this
+* change is fixing. Any other error from the factory propagates.
+*/
+function resolveSubmitTools(taskType, opts = {}) {
+	let handle;
+	try {
+		handle = createSubmitOutputTool(taskType, opts);
+	} catch (err) {
+		if (err instanceof UnknownTaskTypeForSubmitToolError) handle = null;
+		else throw err;
+	}
+	return {
+		handle,
+		tools: handle ? [handle.tool] : []
+	};
+}
+//#endregion
 //#region src/runtime/execute-pi-task.ts
 /**
 * executePiTask — run a single Task attempt using pi-coding-agent inside a
@@ -13834,6 +13778,8 @@ async function executePiTask(claimedTask, reporter, opts) {
 			createEditToolDefinition(mountPath, { operations: createGondolinEditOps(managed.vm, mountPath) }),
 			createBashToolDefinition(mountPath, { operations: createGondolinBashOps(managed.vm, mountPath) })
 		];
+		const { handle: submitToolHandle, tools: submitToolDefs } = resolveSubmitTools(task.taskType, { model: opts.model });
+		const submitTools = submitToolDefs;
 		try {
 			const moltnetAgent = await connect({ configDir: managed.agentDir });
 			const moltnetTools = createMoltNetTools({
@@ -13885,7 +13831,11 @@ async function executePiTask(claimedTask, reporter, opts) {
 				agentDir: piAuthDir,
 				cwd: mountPath,
 				model: modelHandle,
-				customTools: [...gondolinCustomTools, ...moltnetTools],
+				customTools: [
+					...gondolinCustomTools,
+					...moltnetTools,
+					...submitTools
+				],
 				sessionManager: SessionManager.inMemory(),
 				resourceLoader
 			})).session;
@@ -13962,14 +13912,43 @@ async function executePiTask(claimedTask, reporter, opts) {
 		let parsedOutputCid = null;
 		let parseError = null;
 		if (!runError && !llmAbort && !cancelled) {
-			const parsed = await parseStructuredTaskOutput(assistantText, task.taskType);
-			parsedOutput = parsed.output;
-			parsedOutputCid = parsed.outputCid;
-			parseError = parsed.error;
-			if (parseError) await emit("error", {
-				message: parseError.message,
-				phase: "output_validation"
-			});
+			const captured = submitToolHandle?.getCaptured() ?? null;
+			if (captured) try {
+				parsedOutput = captured;
+				parsedOutputCid = await computeJsonCid(captured);
+				recordTaskOutputParseResult({
+					taskType: task.taskType,
+					model: opts.model,
+					code: "captured_via_tool"
+				});
+			} catch (err) {
+				const message = err instanceof Error ? err.message : String(err);
+				parsedOutput = null;
+				parsedOutputCid = null;
+				parseError = {
+					code: "output_cid_compute_failed",
+					message: `Captured submit-tool output could not be canonicalized: ${message}`
+				};
+				recordTaskOutputParseResult({
+					taskType: task.taskType,
+					model: opts.model,
+					code: "output_cid_compute_failed"
+				});
+				await emit("error", {
+					message: parseError.message,
+					phase: "output_validation"
+				});
+			}
+			else {
+				const parsed = await parseStructuredTaskOutput(assistantText, task.taskType, { model: opts.model });
+				parsedOutput = parsed.output;
+				parsedOutputCid = parsed.outputCid;
+				parseError = parsed.error;
+				if (parseError) await emit("error", {
+					message: parseError.message,
+					phase: "output_validation"
+				});
+			}
 		}
 		if (cancelled) return {
 			taskId: task.id,
@@ -14365,4 +14344,4 @@ function moltnetExtension(pi) {
 	registerMoltnetReflectCommand(pi, state);
 }
 //#endregion
-export { HOST_EXEC_DEFAULT_BASE_ENV, activateAgentEnv, buildPiJudgeRecipeManifest, computePiJudgeRecipeCid, createGondolinBashOps, createGondolinEditOps, createGondolinReadOps, createGondolinWriteOps, createMoltNetTools, createPiOtelExtension, createPiTaskExecutor, moltnetExtension as default, ensureSnapshot, executePiTask, findMainWorktree, loadCredentials, resolvePiJudgeRecipeVersions, resumeVm, toGuestPath };
+export { HOST_EXEC_DEFAULT_BASE_ENV, activateAgentEnv, createGondolinBashOps, createGondolinEditOps, createGondolinReadOps, createGondolinWriteOps, createMoltNetTools, createPiOtelExtension, createPiTaskExecutor, moltnetExtension as default, ensureSnapshot, executePiTask, findMainWorktree, loadCredentials, resumeVm, toGuestPath };