npm - @themoltnet/pi-extension - Versions diffs - 0.10.0 → 0.12.0 - Mend

@themoltnet/pi-extension 0.10.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/index.js CHANGED Viewed

@@ -7,11 +7,10 @@ import { createHash } from "node:crypto";
 import crypto, { createHash as createHash$1 } from "crypto";
 import { readFile } from "node:fs/promises";
 import { homedir } from "node:os";
-import { Type, complete, getModel } from "@mariozechner/pi-ai";
+import { Type, getModel } from "@mariozechner/pi-ai";
 import { RealFSProvider, ShadowProvider, VM, VmCheckpoint, createHttpHooks, createShadowPathPredicate, ensureImageSelector, loadGuestAssets } from "@earendil-works/gondolin";
 import { parseEnv } from "node:util";
-import { fileURLToPath } from "node:url";
-import { SpanStatusCode, context, trace } from "@opentelemetry/api";
+import { SpanStatusCode, context, metrics, trace } from "@opentelemetry/api";
 import { FormatRegistry, Type as Type$1 } from "@sinclair/typebox";
 import { Value } from "@sinclair/typebox/value";
 //#region \0rolldown/runtime.js
@@ -3848,7 +3847,7 @@ var cidSymbol = Symbol.for("@ipld/js-cid/CID");
 * naturally prevents field delimiter collision.
 */
 /** SHA-256 multicodec code per multihash table */
-var SHA2_256_CODE$1 = 18;
+var SHA2_256_CODE = 18;
 /**
 * Build the canonical JSON input for content hashing.
 *
@@ -3880,7 +3879,7 @@ function computeCanonicalHash(entryType, title, content, tags) {
 * Example output: "bafkreig..."
 */
 function computeContentCid(entryType, title, content, tags) {
-	const digest = create(SHA2_256_CODE$1, computeCanonicalHash(entryType, title, content, tags));
+	const digest = create(SHA2_256_CODE, computeCanonicalHash(entryType, title, content, tags));
 	return CID.createV1(85, digest).toString(base32);
 }
 var { p: P, n: N, Gx, Gy, a: _a, d: _d } = {
@@ -7135,159 +7134,6 @@ var registerSandboxCommand = (pi, state) => {
 	});
 };
 //#endregion
-//#region src/moltnet/judge/assets.ts
-/** Default fidelity rubric — kept verbatim from the Go judge. */
-var DEFAULT_RUBRIC = `Evaluate the rendered content against the source entries on three axes:
-COVERAGE (0.0-1.0):
-- Identify each distinct topic/fact in the source entries
-- Check if each is represented in the rendered content
-- Score = (represented topics) / (total source topics)
-- A topic can be restructured or summarized but must be present
-GROUNDING (0.0-1.0):
-- Identify each distinct claim/fact in the rendered content
-- Check if each is traceable to a specific source entry
-- Score = (grounded claims) / (total rendered claims)
-- Restructured content is fine if the underlying fact comes from a source
-FAITHFULNESS (0.0-1.0):
-- For content that IS represented, check semantic accuracy
-- Is the meaning preserved? Any distortions, inversions, or misquotes?
-- Score = (accurate representations) / (total representations)
-- Summarization is fine; misrepresentation is not
-`;
-/** Judge system prompt — kept verbatim from the Go judge signature. */
-var JUDGE_SYSTEM_PROMPT = `You are a fidelity judge for rendered context packs. Your job is to evaluate
-whether a rendered markdown document faithfully represents its source entries.
-Score each axis independently and precisely. Be critical — the purpose is to
-catch content drift, hallucination, and cherry-picking.
-You will be given three inputs:
-1. \`source_entries\` — the original source entries from the context pack, in
-   markdown format.
-2. \`rendered_content\` — the agent-rendered markdown derived from the source
-   entries.
-3. \`rubric\` — the fidelity scoring rubric with criteria definitions.
-Return a JSON object matching the requested schema with these fields:
-- \`coverage\` (number, 0.0–1.0): fraction of source entries represented in
-  rendered content. 1.0 means all source entries are covered.
-- \`grounding\` (number, 0.0–1.0): fraction of rendered content traceable to
-  source entries. 1.0 means everything comes from sources.
-- \`faithfulness\` (number, 0.0–1.0): semantic accuracy of represented content.
-  1.0 means source content is accurately represented.
-- \`reasoning\` (string): detailed step-by-step analysis explaining each score.
-Respond with ONLY a single JSON object. No prose before or after.
-`;
-//#endregion
-//#region src/moltnet/judge/fidelity.ts
-/**
-* Pi-native port of the Go fidelity judge
-* (libs/dspy-adapters/fidelity/fidelity.go).
-*
-* Same inputs (source_entries, rendered_content, rubric), same outputs
-* (coverage, grounding, faithfulness, reasoning). Uses pi-ai `complete()`
-* instead of dspy-go; no process-global state.
-*/
-var JSON_FENCE_RE = /```(?:json)?\s*([\s\S]*?)```/i;
-function extractJson(text) {
-	const fenceMatch = text.match(JSON_FENCE_RE);
-	if (fenceMatch && fenceMatch[1]) return fenceMatch[1].trim();
-	const firstBrace = text.indexOf("{");
-	const lastBrace = text.lastIndexOf("}");
-	if (firstBrace >= 0 && lastBrace > firstBrace) return text.slice(firstBrace, lastBrace + 1);
-	return text.trim();
-}
-function clamp01(value) {
-	const n = typeof value === "number" ? value : Number(value);
-	if (!Number.isFinite(n)) return 0;
-	if (n < 0) return 0;
-	if (n > 1) return 1;
-	return n;
-}
-function coerceString(value) {
-	if (typeof value === "string") return value;
-	if (value === null || value === void 0) return "";
-	if (typeof value === "number" || typeof value === "boolean") return String(value);
-	try {
-		return JSON.stringify(value);
-	} catch {
-		return "";
-	}
-}
-function parseScores(raw) {
-	const jsonText = extractJson(raw);
-	let parsed;
-	try {
-		parsed = JSON.parse(jsonText);
-	} catch (err) {
-		throw new Error(`judge returned an invalid structured response: ${err.message}\n---raw---\n${raw}`);
-	}
-	const coverage = clamp01(parsed.coverage);
-	const grounding = clamp01(parsed.grounding);
-	const faithfulness = clamp01(parsed.faithfulness);
-	const reasoning = coerceString(parsed.reasoning);
-	return {
-		coverage,
-		grounding,
-		faithfulness,
-		composite: (coverage + grounding + faithfulness) / 3,
-		reasoning
-	};
-}
-function buildUserMessage(sourceEntries, renderedContent, rubric) {
-	return [
-		"## Rubric",
-		rubric,
-		"",
-		"## Source entries",
-		sourceEntries,
-		"",
-		"## Rendered content",
-		renderedContent,
-		"",
-		"Produce the JSON object now."
-	].join("\n");
-}
-/**
-* Run the fidelity judge via pi-ai `complete()`. Mirrors `fidelity.Run` in
-* libs/dspy-adapters/fidelity/fidelity.go.
-*/
-async function runFidelityJudge(req, options = {}) {
-	const rubric = req.rubric?.trim() ? req.rubric : DEFAULT_RUBRIC;
-	const userPrompt = buildUserMessage(req.sourceEntries, req.renderedContent, rubric);
-	const message = await complete(req.model, {
-		systemPrompt: JUDGE_SYSTEM_PROMPT,
-		messages: [{
-			role: "user",
-			content: userPrompt,
-			timestamp: Date.now()
-		}]
-	}, options.signal ? { signal: options.signal } : void 0);
-	if (message.stopReason === "error" || message.stopReason === "aborted") throw new Error(`judge failed: ${message.errorMessage ?? message.stopReason}`);
-	const textContent = message.content.filter((c) => c.type === "text" && typeof c.text === "string").map((c) => c.text).join("\n").trim();
-	if (!textContent) throw new Error("judge returned empty response");
-	return parseScores(textContent);
-}
-/**
-* Build a stable markdown blob of source entries for the judge prompt.
-* Mirrors `buildSourceEntriesFromPack` / `buildSourceEntriesMarkdown` in the
-* Go CLI so that local and proctored modes produce the same input shape.
-*/
-function buildSourceEntriesMarkdown(entries) {
-	const parts = [];
-	for (const entry of entries) {
-		const title = entry.title?.trim() || "Untitled";
-		parts.push(`## ${title}\n${entry.content}\n`);
-	}
-	return parts.join("\n");
-}
-//#endregion
 //#region src/moltnet/render-phase6.ts
 function slugToTitle(value) {
 	return value.split(/[:/_-]+/).filter(Boolean).map((part) => part[0]?.toUpperCase() + part.slice(1)).join(" ");
@@ -7434,6 +7280,21 @@ function ensureConnected(config) {
 	};
 }
 /**
+* Expand the `taskFilter` shorthand on the diary list/search tools into
+* the matching `task:*` provenance tags emitted by `moltnet_create_entry`
+* during a task. Returning an array (possibly empty) lets callers spread
+* it into a larger `tags` AND-filter without conditionals.
+*/
+function compileTaskFilterTags(filter) {
+	if (!filter) return [];
+	const tags = [];
+	if (filter.taskId) tags.push(`task:id:${filter.taskId}`);
+	if (filter.taskType) tags.push(`task:type:${filter.taskType}`);
+	if (filter.correlationId) tags.push(`task:correlation:${filter.correlationId}`);
+	if (typeof filter.attemptN === "number") tags.push(`task:attempt:${filter.attemptN}`);
+	return tags;
+}
+/**
 * Create all MoltNet tool definitions, ready to pass to `pi.registerTool()`.
 */
 function createMoltNetTools(config) {
@@ -7596,122 +7457,6 @@ function createMoltNetTools(config) {
 			};
 		}
 	});
-	const createJudgePackTask = defineTool({
-		name: "moltnet_judge_pack_task_create",
-		label: "Create Judge Pack Task",
-		description: "Create a judge_pack task for a rendered pack. Returns a taskId that moltnet_rendered_pack_judge can claim and execute. The rubric is required — pass the structured rubric JSON from @moltnet/tasks Rubric schema.",
-		parameters: Type.Object({
-			renderedPackId: Type.String({ description: "Rendered pack ID to judge" }),
-			sourcePackId: Type.String({ description: "Source pack ID. Fetch it from the rendered pack if unknown." }),
-			rubric: Type.Any({ description: "Structured rubric object (Rubric schema from @moltnet/tasks). Must have rubricId, version, criteria[]." }),
-			diaryId: Type.Optional(Type.String({ description: "Diary ID to impose the task on. Defaults to the connected diary." }))
-		}),
-		async execute(_id, params) {
-			const { agent, diaryId: connectedDiaryId, teamId: connectedTeamId } = ensureConnected(config);
-			const task = await agent.tasks.create({
-				taskType: "judge_pack",
-				input: {
-					renderedPackId: params.renderedPackId,
-					sourcePackId: params.sourcePackId,
-					rubric: params.rubric
-				},
-				diaryId: params.diaryId ?? connectedDiaryId,
-				teamId: connectedTeamId
-			});
-			return {
-				content: [{
-					type: "text",
-					text: JSON.stringify({
-						taskId: task.id,
-						task
-					}, null, 2)
-				}],
-				details: {}
-			};
-		}
-	});
-	const judgeRenderedPack = defineTool({
-		name: "moltnet_rendered_pack_judge",
-		label: "Judge MoltNet Rendered Pack",
-		description: "Claim a judge_pack task, run the fidelity judge locally, complete the task with structured scores, and set verifiedTaskId on the rendered pack. Create the task first with moltnet_judge_pack_task_create.",
-		parameters: Type.Object({
-			taskId: Type.String({ description: "judge_pack task ID from moltnet_judge_pack_task_create" }),
-			rubricOverride: Type.Optional(Type.String({ description: "Freeform rubric string override for the LLM judge prompt. When omitted the task rubric preamble (or built-in default) is used." }))
-		}),
-		async execute(_id, params, _signal, _onUpdate, ctx) {
-			const { agent } = ensureConnected(config);
-			const model = ctx?.model;
-			if (!model) throw new Error("No active model in pi session — cannot run the fidelity judge.");
-			const claimed = await agent.tasks.claim(params.taskId);
-			const input = claimed.task.input;
-			const rendered = await agent.packs.getRendered(input.renderedPackId);
-			if (!rendered.content?.trim()) throw new Error(`rendered pack ${input.renderedPackId} has empty content`);
-			const sourcePack = await agent.packs.get(input.sourcePackId, { expand: "entries" });
-			if (!sourcePack.entries || sourcePack.entries.length === 0) throw new Error(`source pack ${input.sourcePackId} has no entries`);
-			const sourceEntriesMd = buildSourceEntriesMarkdown(sourcePack.entries.map((entry) => ({
-				title: entry.entry.title,
-				content: entry.entry.content
-			})));
-			const rubric = params.rubricOverride?.trim() || input.rubric?.preamble?.trim() || DEFAULT_RUBRIC;
-			let scores;
-			try {
-				scores = await runFidelityJudge({
-					model,
-					sourceEntries: sourceEntriesMd,
-					renderedContent: rendered.content,
-					rubric
-				});
-			} catch (err) {
-				await agent.tasks.fail(params.taskId, claimed.attempt.attemptN, { error: {
-					code: "judge_failed",
-					message: err.message ?? String(err)
-				} }).catch(() => {});
-				throw new Error(`judge failed: ${err.message ?? String(err)}`);
-			}
-			const modelId = model.provider && model.id ? `${model.provider}:${model.id}` : model.id ?? "pi:unknown";
-			const output = {
-				scores: [
-					{
-						criterionId: "coverage",
-						score: scores.coverage
-					},
-					{
-						criterionId: "grounding",
-						score: scores.grounding
-					},
-					{
-						criterionId: "faithfulness",
-						score: scores.faithfulness
-					}
-				],
-				composite: scores.composite,
-				verdict: scores.reasoning,
-				judgeModel: modelId
-			};
-			const outputCid = await computeJsonCid(output);
-			const completed = await agent.tasks.complete(params.taskId, claimed.attempt.attemptN, {
-				output,
-				outputCid,
-				usage: {
-					inputTokens: 0,
-					outputTokens: 0
-				}
-			});
-			await agent.packs.updateRendered(input.renderedPackId, { verifiedTaskId: params.taskId });
-			return {
-				content: [{
-					type: "text",
-					text: JSON.stringify({
-						renderedPackId: input.renderedPackId,
-						taskId: params.taskId,
-						scores,
-						task: completed
-					}, null, 2)
-				}],
-				details: {}
-			};
-		}
-	});
 	const diaryTags = defineTool({
 		name: "moltnet_diary_tags",
 		label: "List MoltNet Diary Tags",
@@ -7747,12 +7492,32 @@ function createMoltNetTools(config) {
 	const listEntries = defineTool({
 		name: "moltnet_list_entries",
 		label: "List MoltNet Diary Entries",
-		description: "List entries from the MoltNet diary. When `entryIds` is provided, batch-fetches those specific entries (max 50) and returns full fields including entryType, contentSignature, and contentHash for signature checks. Otherwise returns recent entries with a content preview.",
+		description: "List entries from the MoltNet diary. When `entryIds` is provided, batch-fetches those specific entries (max 50) and returns full fields including entryType, contentSignature, and contentHash for signature checks. Otherwise returns recent entries with a content preview, filtered by any combination of tags (AND), excludeTags (NONE), entryType, and the taskFilter shorthand which expands into the right `task:*` tags.",
 		parameters: Type.Object({
 			limit: Type.Optional(Type.Number({ description: "Max entries to return (default 10)" })),
-			tag: Type.Optional(Type.String({ description: "Filter by tag (optional)" })),
+			tags: Type.Optional(Type.Array(Type.String({
+				minLength: 1,
+				maxLength: 50
+			}), {
+				description: "Tags filter — entry must have ALL listed tags (AND). Max 20.",
+				maxItems: 20
+			})),
+			excludeTags: Type.Optional(Type.Array(Type.String({
+				minLength: 1,
+				maxLength: 50
+			}), {
+				description: "Tags to exclude — entry must have NONE of these. Max 20.",
+				maxItems: 20
+			})),
+			entryType: Type.Optional(Type.String({ description: "Filter by entry type (procedural, semantic, episodic, reflection, identity, soul)." })),
+			taskFilter: Type.Optional(Type.Object({
+				taskId: Type.Optional(Type.String()),
+				taskType: Type.Optional(Type.String()),
+				correlationId: Type.Optional(Type.String()),
+				attemptN: Type.Optional(Type.Number())
+			}, { description: "Shorthand: any combination compiles to the matching task:* tags (task:id:<id>, task:type:<type>, task:correlation:<id>, task:attempt:<n>) and is merged into the tags filter." })),
 			entryIds: Type.Optional(Type.Array(Type.String(), {
-				description: "Batch-fetch specific entries by UUID (max 50). Overrides `limit` and `tag` for selection.",
+				description: "Batch-fetch specific entries by UUID (max 50). Overrides every other filter.",
 				maxItems: 50
 			}))
 		}),
@@ -7766,7 +7531,11 @@ function createMoltNetTools(config) {
 			if (batchMode) query.ids = params.entryIds;
 			else {
 				query.limit = params.limit ?? 10;
-				if (params.tag) query.tag = params.tag;
+				const expandedTags = compileTaskFilterTags(params.taskFilter);
+				const allTags = [...params.tags ?? [], ...expandedTags];
+				if (allTags.length) query.tags = allTags;
+				if (params.excludeTags?.length) query.excludeTags = params.excludeTags;
+				if (params.entryType) query.entryType = params.entryType;
 			}
 			const entries = await agent.entries.list(diaryId, query);
 			return {
@@ -7822,17 +7591,46 @@ function createMoltNetTools(config) {
 	const searchEntries = defineTool({
 		name: "moltnet_search_entries",
 		label: "Search MoltNet Diary Entries",
-		description: "Search diary entries by semantic query. Uses vector similarity to find relevant entries.",
+		description: "Hybrid (semantic + lexical) search over diary entries. Optional tags / excludeTags / entryTypes filters AND with the query; the taskFilter shorthand expands into task:* provenance tags so `taskFilter: { taskType: \"fulfill_brief\" }` returns only entries from fulfill_brief attempts. Filters apply server-side before ranking.",
 		parameters: Type.Object({
 			query: Type.String({ description: "Natural language search query" }),
-			limit: Type.Optional(Type.Number({ description: "Max results (default 5)" }))
+			limit: Type.Optional(Type.Number({ description: "Max results (default 5)" })),
+			tags: Type.Optional(Type.Array(Type.String({
+				minLength: 1,
+				maxLength: 50
+			}), {
+				description: "Entry must have ALL listed tags (AND). Max 20.",
+				maxItems: 20
+			})),
+			excludeTags: Type.Optional(Type.Array(Type.String({
+				minLength: 1,
+				maxLength: 50
+			}), {
+				description: "Entry must have NONE of these tags. Max 20.",
+				maxItems: 20
+			})),
+			entryTypes: Type.Optional(Type.Array(Type.String(), {
+				description: "Restrict to these entry types (procedural, semantic, episodic, reflection, identity, soul). Max 6.",
+				maxItems: 6
+			})),
+			taskFilter: Type.Optional(Type.Object({
+				taskId: Type.Optional(Type.String()),
+				taskType: Type.Optional(Type.String()),
+				correlationId: Type.Optional(Type.String()),
+				attemptN: Type.Optional(Type.Number())
+			}, { description: "Shorthand: any combination compiles to the matching task:* tags and is merged into the tags filter." }))
 		}),
 		async execute(_id, params) {
 			const { agent, diaryId } = ensureConnected(config);
+			const expandedTags = compileTaskFilterTags(params.taskFilter);
+			const allTags = [...params.tags ?? [], ...expandedTags];
 			const results = await agent.entries.search({
 				diaryId,
 				query: params.query,
-				limit: params.limit ?? 5
+				limit: params.limit ?? 5,
+				...allTags.length ? { tags: allTags } : {},
+				...params.excludeTags?.length ? { excludeTags: params.excludeTags } : {},
+				...params.entryTypes?.length ? { entryTypes: params.entryTypes } : {}
 			});
 			return {
 				content: [{
@@ -7852,7 +7650,7 @@ function createMoltNetTools(config) {
 	const createEntry = defineTool({
 		name: "moltnet_create_entry",
 		label: "Create MoltNet Diary Entry",
-		description: "Create a new diary entry to record decisions, findings, incidents, or reflections. During an active task, the entry is forced into the task diary and tagged with task:<id>, task_type:<type>, task_attempt:<n>, and correlation:<id> when set; an explicit diaryId mismatching the task diary is rejected.",
+		description: "Create a new diary entry to record decisions, findings, incidents, or reflections. During an active task, the entry is forced into the task diary and tagged with the task:* provenance namespace (task:id:<id>, task:type:<type>, task:attempt:<n>, plus task:correlation:<id> when set); an explicit diaryId mismatching the task diary is rejected.",
 		parameters: Type.Object({
 			title: Type.String({ description: "Entry title (concise, descriptive)" }),
 			content: Type.String({ description: "Entry content (markdown)" }),
@@ -7869,10 +7667,10 @@ function createMoltNetTools(config) {
 				if (params.diaryId && params.diaryId !== taskCtx.diaryId) throw new Error(`entries_create: diaryId "${params.diaryId}" does not match the active task diary "${taskCtx.diaryId}". Entries created during a task must land in the task diary.`);
 				targetDiaryId = taskCtx.diaryId;
 				autoTags = [
-					`task:${taskCtx.taskId}`,
-					`task_type:${taskCtx.taskType}`,
-					`task_attempt:${taskCtx.attemptN}`,
-					...taskCtx.correlationId ? [`correlation:${taskCtx.correlationId}`] : []
+					`task:id:${taskCtx.taskId}`,
+					`task:type:${taskCtx.taskType}`,
+					`task:attempt:${taskCtx.attemptN}`,
+					...taskCtx.correlationId ? [`task:correlation:${taskCtx.correlationId}`] : []
 				];
 			} else targetDiaryId = params.diaryId ?? envDiaryId;
 			const userTags = params.tags ?? [];
@@ -7973,8 +7771,6 @@ function createMoltNetTools(config) {
 		renderPack,
 		listRenderedPacks,
 		getRenderedPack,
-		createJudgePackTask,
-		judgeRenderedPack,
 		diaryTags,
 		listEntries,
 		getEntry,
@@ -8591,135 +8387,6 @@ function ensureRelativeWorktreePaths(gitconfig) {
 	return `${gitconfig}${gitconfig.endsWith("\n") ? "" : "\n"}[worktree]\n\tuseRelativePaths = true\n`;
 }
 //#endregion
-//#region src/moltnet/judge-recipe-cid.ts
-var require$1 = createRequire(import.meta.url);
-var SELF_PACKAGE_NAME = "@themoltnet/pi-extension";
-var PI_PACKAGE_NAME = "@mariozechner/pi-coding-agent";
-var SDK_PACKAGE_NAME = "@themoltnet/sdk";
-var CID_VERSION = 1;
-var RAW_CODEC = 85;
-var SHA2_256_CODE = 18;
-var BASE32_ALPHABET = "abcdefghijklmnopqrstuvwxyz234567";
-function findSelfPackageDir() {
-	const start = path.dirname(fileURLToPath(import.meta.url));
-	let dir = start;
-	while (true) {
-		const candidate = path.join(dir, "package.json");
-		if (existsSync(candidate)) {
-			if (JSON.parse(readFileSync(candidate, "utf8")).name === SELF_PACKAGE_NAME) return dir;
-		}
-		const parent = path.dirname(dir);
-		if (parent === dir) return start;
-		dir = parent;
-	}
-}
-var PACKAGE_DIR = findSelfPackageDir();
-function sha256Hex(value) {
-	return createHash("sha256").update(value, "utf8").digest("hex");
-}
-function encodeVarint(value) {
-	const bytes = [];
-	let current = value >>> 0;
-	while (current >= 128) {
-		bytes.push(current & 127 | 128);
-		current >>>= 7;
-	}
-	bytes.push(current);
-	return bytes;
-}
-function base32Lower(bytes) {
-	let bits = 0;
-	let value = 0;
-	let output = "";
-	for (const byte of bytes) {
-		value = value << 8 | byte;
-		bits += 8;
-		while (bits >= 5) {
-			output += BASE32_ALPHABET[value >>> bits - 5 & 31];
-			bits -= 5;
-		}
-	}
-	if (bits > 0) output += BASE32_ALPHABET[value << 5 - bits & 31];
-	return `b${output}`;
-}
-function stableStringify(value) {
-	if (value === null || typeof value !== "object") return JSON.stringify(value);
-	if (Array.isArray(value)) return `[${value.map((item) => stableStringify(item)).join(",")}]`;
-	return `{${Object.entries(value).sort(([left], [right]) => left.localeCompare(right)).map(([key, item]) => `${JSON.stringify(key)}:${stableStringify(item)}`).join(",")}}`;
-}
-function readPackageVersion(pkgPath, expectedName) {
-	if (!existsSync(pkgPath)) return null;
-	const parsed = JSON.parse(readFileSync(pkgPath, "utf8"));
-	if (expectedName && parsed.name !== expectedName) return null;
-	return typeof parsed.version === "string" ? parsed.version : null;
-}
-function resolveInstalledPackageVersion(packageName) {
-	const candidates = [];
-	try {
-		candidates.push(path.dirname(require$1.resolve(packageName)));
-	} catch {}
-	let dir = PACKAGE_DIR;
-	while (true) {
-		candidates.push(path.join(dir, "node_modules", packageName));
-		const parent = path.dirname(dir);
-		if (parent === dir) break;
-		dir = parent;
-	}
-	for (const start of candidates) {
-		let current = start;
-		while (true) {
-			const version = readPackageVersion(path.join(current, "package.json"), packageName);
-			if (version) return version;
-			const parent = path.dirname(current);
-			if (parent === current) break;
-			current = parent;
-		}
-	}
-	return null;
-}
-function resolvePiJudgeRecipeVersions() {
-	return {
-		pi: resolveInstalledPackageVersion(PI_PACKAGE_NAME),
-		piExtension: readPackageVersion(path.join(PACKAGE_DIR, "package.json"), SELF_PACKAGE_NAME),
-		sdk: resolveInstalledPackageVersion(SDK_PACKAGE_NAME)
-	};
-}
-function buildPiJudgeRecipeManifest(inputs) {
-	return {
-		kind: "pi-judge-recipe/v1",
-		versions: {
-			...resolvePiJudgeRecipeVersions(),
-			...inputs.overrides
-		},
-		assets: {
-			promptAsset: inputs.promptAsset ?? null,
-			rubricAsset: inputs.rubricAsset ?? null,
-			skillSourcePath: inputs.skillSourcePath ?? null
-		},
-		hashes: {
-			judgePromptSha256: sha256Hex(inputs.judgePrompt),
-			rubricSha256: sha256Hex(inputs.rubric),
-			skillFragmentSha256: inputs.skillFragment ? sha256Hex(inputs.skillFragment) : null,
-			implementationSha256: inputs.implementationSource ? sha256Hex(inputs.implementationSource) : null
-		}
-	};
-}
-function computePiJudgeRecipeCid(inputs) {
-	const manifest = buildPiJudgeRecipeManifest(inputs);
-	const manifestBytes = Buffer.from(stableStringify(manifest), "utf8");
-	const digestBytes = createHash("sha256").update(manifestBytes).digest();
-	return {
-		cid: base32Lower(Uint8Array.from([
-			...encodeVarint(CID_VERSION),
-			...encodeVarint(RAW_CODEC),
-			...encodeVarint(SHA2_256_CODE),
-			...encodeVarint(digestBytes.length),
-			...digestBytes
-		])),
-		manifest
-	};
-}
-//#endregion
 //#region src/otel/index.ts
 var TRACER_NAME = "@themoltnet/pi-extension/otel";
 function stripReservedAttrs(attrs) {
@@ -8891,7 +8558,13 @@ if (!FormatRegistry.Has("date-time")) FormatRegistry.Set("date-time", (v) => !Nu
 /**
 * How a judge must score a single criterion.
 *
-* - `llm_judged`: 0..1 continuous, `rationale` required.
+* - `llm_score`: 0..1 continuous, `rationale` required. Smooths failures
+*   into the gradient — use `llm_checklist` instead for properties where
+*   a single failure is a real failure (grounding, faithfulness).
+* - `llm_checklist`: judge enumerates per-claim assertions with
+*   `{passed, evidence}`. The criterion's numeric `score` is derived:
+*   `1` iff every assertion passes, else `0`. Per-claim evidence is the
+*   dataset for cluster-analysis of failure modes. See #999.
 * - `boolean`: 0 or 1, `rationale` optional.
 * - `deterministic_signature_check`: judge runs a signature check;
 *   result is 0 or 1. No LLM discretion.
@@ -8899,11 +8572,31 @@ if (!FormatRegistry.Has("date-time")) FormatRegistry.Set("date-time", (v) => !Nu
 *   appears in the rendered output; 0 or 1.
 */
 var RubricScoringMode = Type$1.Union([
-	Type$1.Literal("llm_judged"),
+	Type$1.Literal("llm_score"),
+	Type$1.Literal("llm_checklist"),
 	Type$1.Literal("boolean"),
 	Type$1.Literal("deterministic_signature_check"),
 	Type$1.Literal("deterministic_coverage_check")
 ], { $id: "RubricScoringMode" });
+/**
+* One binary check produced by an `llm_checklist`-mode criterion.
+*
+* `evidence` is REQUIRED for both PASS and FAIL — agentskills.io grading
+* principle: \"Don't give the benefit of the doubt.\" A PASS without
+* concrete evidence (a quoted span, an entry id, a source location)
+* cannot be audited. A FAIL without evidence cannot be clustered into
+* structural fixes. The same shape is reused by `judge-eval-variant`
+* (#943) so tooling, dashboards, and analysis stay uniform.
+*/
+var AssertionResult = Type$1.Object({
+	id: Type$1.String({ minLength: 1 }),
+	text: Type$1.String({ minLength: 1 }),
+	passed: Type$1.Boolean(),
+	evidence: Type$1.String({ minLength: 1 })
+}, {
+	$id: "AssertionResult",
+	additionalProperties: false
+});
 var RubricCriterion = Type$1.Object({
 	id: Type$1.String({ minLength: 1 }),
 	description: Type$1.String({ minLength: 1 }),
@@ -8963,44 +8656,165 @@ unrelated subsystems and the test coverage on the auth path is
 unchanged" is.
 `.trim();
 //#endregion
+//#region ../tasks/src/success-criteria.ts
+/**
+* SuccessCriteria — imposer-stated acceptance criteria, evaluated in two
+* complementary places.
+*
+* Before this envelope existed, criteria were scattered: a vestigial
+* `criteriaCid` column nobody resolved, an `acceptanceCriteria: string[]`
+* field on `fulfill_brief.input` that was "interpreted by the claiming
+* agent," and inline `rubric` / `criteria[]` fields on judgment-task
+* inputs. None of those were machine-verifiable end-to-end.
+*
+* This module defines a single, content-addressable envelope an imposer
+* attaches to any task type. It has four orthogonal sections — pick
+* whichever apply per task type:
+*
+*   - `gates`        Deterministic structural checks (CID/schema match)
+*   - `assertions`   Declarative claims about output JSON
+*   - `rubric`       Weighted-criteria scoring instrument, reused
+*                    verbatim from `./rubric.ts`.
+*   - `sideEffects`  Required process side-effects (e.g. diary entry)
+*
+* ## Two roles, two task types
+*
+* **Producer self-assessment** (fulfillment tasks: `fulfill_brief`,
+* `curate_pack`, `render_pack`). The producer **LLM** evaluates the
+* criteria against its own output and emits a `VerificationRecord`
+* inside `output.verification`. The daemon is pure passthrough — it
+* does not run `evaluateAssertions`, does not inspect the verification
+* record. The REST API is dumb storage; it never re-runs assertions and
+* never runs LLMs. The cross-field rule
+* `requireVerificationWhenCriteriaPresent` enforces "verification
+* required iff successCriteria present" at task-output validation time
+* (server-side schema check). Self-assessment is a truthful self-rating,
+* NOT enforcement — `verification.passed=false` does not block /complete
+* and does not affect `acceptedAttemptN`. See
+* `docs/agent-runtime.md` for the full producer/judge flow.
+*
+* **Binding evaluation** (judgment tasks: `assess_brief`, `judge_pack`).
+* A separate task whose IS the application of `successCriteria` to
+* someone else's output. Different agent (enforced at claim time), same
+* envelope. The judge's verdict is binding: this is the *gate* in the
+* MoltNet model. The rubric inside `successCriteria.rubric` IS the job
+* spec for the judge.
+*
+* The clean chain: producer task with `successCriteria` → producer
+* self-assesses honestly → imposer (or automation) creates a downstream
+* judgment task that references the same `successCriteria` (or a
+* stricter rubric) → judgment task delivers the binding verdict.
+*
+* Storage: SuccessCriteria lives inline at `task.input.successCriteria`,
+* pinned via the task's `inputCid`. No separate column or hash. When
+* #881 lands, the `rubric` field can graduate to `{ rubricCid }` lookup
+* without changing this envelope, and producer + judge tasks can pin
+* the SAME rubric across the chain for end-to-end auditability.
+*/
+var SchemaCheckSpec = Type$1.Object({ schemaCid: Type$1.String({ minLength: 1 }) }, { additionalProperties: false });
+var CidEqualsSpec = Type$1.Object({
+	path: Type$1.String({ minLength: 1 }),
+	expected: Type$1.String({ minLength: 1 })
+}, { additionalProperties: false });
+var Gate = Type$1.Union([Type$1.Object({
+	id: Type$1.String({ minLength: 1 }),
+	kind: Type$1.Literal("schema-check"),
+	spec: SchemaCheckSpec,
+	required: Type$1.Boolean()
+}, { additionalProperties: false }), Type$1.Object({
+	id: Type$1.String({ minLength: 1 }),
+	kind: Type$1.Literal("cid-equals"),
+	spec: CidEqualsSpec,
+	required: Type$1.Boolean()
+}, { additionalProperties: false })], { $id: "Gate" });
+var AssertionOp = Type$1.Union([
+	Type$1.Literal("exists"),
+	Type$1.Literal("equals"),
+	Type$1.Literal("matches"),
+	Type$1.Literal("in-range"),
+	Type$1.Literal("min-length")
+], { $id: "AssertionOp" });
+var Assertion = Type$1.Object({
+	id: Type$1.String({ minLength: 1 }),
+	path: Type$1.String({ minLength: 1 }),
+	op: AssertionOp,
+	value: Type$1.Optional(Type$1.Unknown())
+}, {
+	$id: "Assertion",
+	additionalProperties: false
+});
+var SideEffectsSpec = Type$1.Object({
+	diaryEntryRequired: Type$1.Optional(Type$1.Boolean()),
+	diaryEntryTags: Type$1.Optional(Type$1.Array(Type$1.String({ minLength: 1 }))),
+	referencedEntries: Type$1.Optional(Type$1.Integer({ minimum: 0 }))
+}, {
+	$id: "SideEffectsSpec",
+	additionalProperties: false
+});
+var SuccessCriteria = Type$1.Object({
+	version: Type$1.Literal(1),
+	gates: Type$1.Optional(Type$1.Array(Gate)),
+	assertions: Type$1.Optional(Type$1.Array(Assertion)),
+	rubric: Type$1.Optional(Rubric),
+	minComposite: Type$1.Optional(Type$1.Number({
+		minimum: 0,
+		maximum: 1
+	})),
+	sideEffects: Type$1.Optional(SideEffectsSpec)
+}, {
+	$id: "SuccessCriteria",
+	additionalProperties: false
+});
+var VerificationResultStatus = Type$1.Union([
+	Type$1.Literal("pass"),
+	Type$1.Literal("fail"),
+	Type$1.Literal("skip")
+], { $id: "VerificationResultStatus" });
+var VerificationResultKind = Type$1.Union([
+	Type$1.Literal("gate"),
+	Type$1.Literal("assertion"),
+	Type$1.Literal("rubric"),
+	Type$1.Literal("sideEffect")
+], { $id: "VerificationResultKind" });
+var VerificationResult = Type$1.Object({
+	id: Type$1.String({ minLength: 1 }),
+	kind: VerificationResultKind,
+	status: VerificationResultStatus,
+	detail: Type$1.Optional(Type$1.String())
+}, {
+	$id: "VerificationResult",
+	additionalProperties: false
+});
+var VerificationRecord = Type$1.Object({
+	inputCid: Type$1.String({ minLength: 1 }),
+	results: Type$1.Array(VerificationResult),
+	passed: Type$1.Boolean()
+}, {
+	$id: "VerificationRecord",
+	additionalProperties: false
+});
+//#endregion
 //#region ../tasks/src/task-types/assess-brief.ts
 /**
 * `assess_brief` — independently evaluate a fulfilled brief.
 *
 * output_kind: judgment
-* criteria: required (rubric lives as a diary entry with tag='rubric';
-*   the Task's `criteria_cid` points at that entry)
+* criteria: required (`successCriteria.rubric` — same envelope as
+*   `judge_pack`)
 * references: required (must reference the target `fulfill_brief` task)
 *
 * The assessor is a different agent from the producer (enforced by the
 * server / runtime at claim time — not in the wire schema).
+*
+* The rubric in `successCriteria` IS the job spec — the assessor applies
+* it to the target task's output and emits per-criterion scores. Other
+* sections (`assertions`, `gates`, `sideEffects`) MAY be present and are
+* evaluated against the *assessor's output*.
 */
 var ASSESS_BRIEF_TYPE = "assess_brief";
-/**
-* One criterion lifted from the rubric. Denormalized into the input so the
-* assessor prompt can be built without a second fetch; the `criteria_cid`
-* on the Task row remains authoritative for verification.
-*/
-var AssessBriefCriterion = Type$1.Object({
-	id: Type$1.String({ minLength: 1 }),
-	description: Type$1.String({ minLength: 1 }),
-	weight: Type$1.Number({
-		minimum: 0,
-		maximum: 1
-	}),
-	scoring: Type$1.Union([
-		Type$1.Literal("llm_judged"),
-		Type$1.Literal("boolean"),
-		Type$1.Literal("deterministic_signature_check")
-	])
-}, {
-	$id: "AssessBriefCriterion",
-	additionalProperties: false
-});
 var AssessBriefInput = Type$1.Object({
 	targetTaskId: Type$1.String({ format: "uuid" }),
-	criteria: Type$1.Array(AssessBriefCriterion, { minItems: 1 }),
-	rubricPreamble: Type$1.Optional(Type$1.String())
+	successCriteria: SuccessCriteria
 }, {
 	$id: "AssessBriefInput",
 	additionalProperties: false
@@ -9069,7 +8883,8 @@ var CuratePackInput = Type$1.Object({
 		prefix: Type$1.Optional(Type$1.String())
 	}, { additionalProperties: false })),
 	tokenBudget: Type$1.Optional(Type$1.Number({ minimum: 500 })),
-	recipe: Type$1.Optional(Type$1.Union([Type$1.Literal("topic-focused-v1"), Type$1.Literal("scope-inventory-v1")]))
+	recipe: Type$1.Optional(Type$1.Union([Type$1.Literal("topic-focused-v1"), Type$1.Literal("scope-inventory-v1")])),
+	successCriteria: Type$1.Optional(SuccessCriteria)
 }, {
 	$id: "CuratePackInput",
 	additionalProperties: false
@@ -9094,7 +8909,8 @@ var CuratePackOutput = Type$1.Object({
 		droppedIds: Type$1.Optional(Type$1.Array(Type$1.String({ format: "uuid" }))),
 		notes: Type$1.String({ minLength: 1 })
 	}, { additionalProperties: false }))),
-	summary: Type$1.String({ minLength: 1 })
+	summary: Type$1.String({ minLength: 1 }),
+	verification: Type$1.Optional(VerificationRecord)
 }, {
 	$id: "CuratePackOutput",
 	additionalProperties: false
@@ -9113,6 +8929,7 @@ var FulfillBriefInput = Type$1.Object({
 	brief: Type$1.String({ minLength: 1 }),
 	title: Type$1.Optional(Type$1.String()),
 	acceptanceCriteria: Type$1.Optional(Type$1.Array(Type$1.String())),
+	successCriteria: Type$1.Optional(SuccessCriteria),
 	seedFiles: Type$1.Optional(Type$1.Array(Type$1.String())),
 	scopeHint: Type$1.Optional(Type$1.String())
 }, {
@@ -9132,7 +8949,8 @@ var FulfillBriefOutput = Type$1.Object({
 	}, { additionalProperties: false })),
 	pullRequestUrl: Type$1.Union([Type$1.String(), Type$1.Null()]),
 	diaryEntryIds: Type$1.Array(Type$1.String({ format: "uuid" })),
-	summary: Type$1.String({ minLength: 1 })
+	summary: Type$1.String({ minLength: 1 }),
+	verification: Type$1.Optional(VerificationRecord)
 }, {
 	$id: "FulfillBriefOutput",
 	additionalProperties: false
@@ -9143,19 +8961,18 @@ var FulfillBriefOutput = Type$1.Object({
 * `judge_pack` — independently score a rendered pack against a rubric.
 *
 * output_kind: judgment
-* criteria: required (embedded `rubric` — see Phase 1 design in #852
-*   amendment and Phase 2 issue #881)
+* criteria: required (`successCriteria.rubric` — see #852 amendment and
+*   Phase 2 issue #881)
 * references: required (must reference the `render_pack` task it judges,
 *   role='judged_work')
 *
 * Step 3 of the three-session attribution loop (#875). Mirrors
 * `assess_brief` in shape, but over a rendered context pack.
 *
-* Phase 1 rubric storage: the rubric body is inlined in `input.rubric`.
-* Integrity is pinned via the task's `input_cid`. Phase 2 (#881) will
-* replace the inline body with a `rubric_cid` referencing a `rubrics`
-* table row; the denormalized `criteria[]` projection stays for prompt
-* building without a fetch.
+* Phase 1 rubric storage: the rubric body lives at
+* `input.successCriteria.rubric` and is pinned via the task's `inputCid`.
+* Phase 2 (#881) will replace the inline body with a `rubricCid`
+* referencing a stored `rubrics` row; the envelope stays the same.
 *
 * The judge MUST be a different agent from the renderer. Enforced at
 * claim time by the runtime, not in the wire schema.
@@ -9164,7 +8981,7 @@ var JUDGE_PACK_TYPE = "judge_pack";
 var JudgePackInput = Type$1.Object({
 	renderedPackId: Type$1.String({ format: "uuid" }),
 	sourcePackId: Type$1.String({ format: "uuid" }),
-	rubric: Rubric
+	successCriteria: SuccessCriteria
 }, {
 	$id: "JudgePackInput",
 	additionalProperties: false
@@ -9177,6 +8994,7 @@ var JudgePackScore = Type$1.Object({
 		maximum: 1
 	}),
 	rationale: Type$1.Optional(Type$1.String()),
+	assertions: Type$1.Optional(Type$1.Array(AssertionResult, { minItems: 1 })),
 	evidence: Type$1.Optional(Type$1.Record(Type$1.String(), Type$1.Unknown()))
 }, {
 	$id: "JudgePackScore",
@@ -9195,6 +9013,39 @@ var JudgePackOutput = Type$1.Object({
 	$id: "JudgePackOutput",
 	additionalProperties: false
 });
+/**
+* Cross-field validator for JudgePackOutput. Run after the TypeBox
+* schema check passes. Enforces invariants the schema can't express:
+*
+* 1. If a `JudgePackScore` carries an `assertions` array (i.e. the
+*    judge ran the criterion in `llm_checklist` mode), its numeric
+*    `score` MUST equal `1` if every `assertions[i].passed` is true,
+*    else `0`. The prompt instructs the judge to derive `score` from
+*    the array, but the LLM can drift — without this check, the
+*    runtime accepts inconsistent payloads and propagates them into
+*    composite scores and judge attestations (#999 P1).
+*
+* 2. If `score` is exactly `1` AND `assertions` is present, every
+*    assertion must have `passed: true`. Catches the failure mode in
+*    the issue: "score: 1 with a failing assertion accepted."
+*
+* Cross-rubric checks (e.g. "did the judge populate `assertions` for
+* every criterion the rubric marked `llm_checklist`?") require the
+* input rubric and live in a separate, runtime-side validator. This
+* one is rubric-agnostic on purpose — it catches within-score
+* inconsistency without needing the original task input.
+*/
+function validateJudgePackOutput(output) {
+	const scores = output.scores;
+	for (let i = 0; i < scores.length; i++) {
+		const s = scores[i];
+		if (!s.assertions) continue;
+		const allPassed = s.assertions.every((a) => a.passed);
+		const expected = allPassed ? 1 : 0;
+		if (s.score !== expected) return `scores[${i}] (criterionId="${s.criterionId}"): assertions ${allPassed ? "all pass" : "have at least one fail"} but score=${s.score}. Score must be derived: 1 iff every assertion passes, else 0 (#999 llm_checklist rule).`;
+	}
+	return null;
+}
 //#endregion
 //#region ../tasks/src/task-types/render-pack.ts
 /**
@@ -9216,7 +9067,8 @@ var RENDER_PACK_TYPE = "render_pack";
 var RenderPackInput = Type$1.Object({
 	packId: Type$1.String({ format: "uuid" }),
 	persist: Type$1.Optional(Type$1.Boolean()),
-	pinned: Type$1.Optional(Type$1.Boolean())
+	pinned: Type$1.Optional(Type$1.Boolean()),
+	successCriteria: Type$1.Optional(SuccessCriteria)
 }, {
 	$id: "RenderPackInput",
 	additionalProperties: false
@@ -9227,7 +9079,8 @@ var RenderPackOutput = Type$1.Object({
 	renderMethod: Type$1.String({ minLength: 1 }),
 	byteSize: Type$1.Number({ minimum: 0 }),
 	entriesRendered: Type$1.Number({ minimum: 0 }),
-	summary: Type$1.String({ minLength: 1 })
+	summary: Type$1.String({ minLength: 1 }),
+	verification: Type$1.Optional(VerificationRecord)
 }, {
 	$id: "RenderPackOutput",
 	additionalProperties: false
@@ -9235,6 +9088,33 @@ var RenderPackOutput = Type$1.Object({
 //#endregion
 //#region ../tasks/src/task-types/index.ts
 /**
+* Validate that a judgment-task input carries a rubric inside its
+* `successCriteria` envelope, and that the rubric's weights sum to 1.
+* Used for `assess_brief` and `judge_pack`.
+*/
+function validateJudgmentInput(input) {
+	const sc = input.successCriteria;
+	if (!sc) return "successCriteria is required for judgment tasks";
+	if (!sc.rubric) return "successCriteria.rubric is required for judgment tasks";
+	return validateRubricWeights(sc.rubric);
+}
+/**
+* Cross-field rule: when `input.successCriteria` is set, the producer's
+* output MUST carry a `verification` block (the LLM's self-assessment).
+* When it is unset, the output MUST NOT carry one (avoid garbage data).
+*
+* Used by all three fulfillment task types. Judgment task outputs do
+* NOT use this — their entire output IS a structured judgment, so a
+* separate self-assessment field would be circular.
+*/
+function requireVerificationWhenCriteriaPresent(output, input) {
+	const hasCriteria = input !== void 0 && input !== null && input.successCriteria !== void 0;
+	const hasVerification = output.verification !== void 0;
+	if (hasCriteria && !hasVerification) return "output.verification is required because input.successCriteria is set; the producer LLM must self-assess against the criteria";
+	if (!hasCriteria && hasVerification) return "output.verification was supplied but input.successCriteria is unset; omit verification when there are no criteria to assess against";
+	return null;
+}
+/**
 * Client-side task-type registry. Mirrors the server-owned DB registry
 * (PR 2). PR 0 shipped the two brief types; this PR adds the three
 * pack-pipeline types for the three-session attribution loop (#875).
@@ -9249,41 +9129,41 @@ var BUILT_IN_TASK_TYPES = {
 		inputSchema: FulfillBriefInput,
 		outputSchema: FulfillBriefOutput,
 		outputKind: "artifact",
-		requiresCriteria: false,
-		requiresReferences: false
+		requiresReferences: false,
+		validateOutput: requireVerificationWhenCriteriaPresent
 	},
 	[ASSESS_BRIEF_TYPE]: {
 		name: ASSESS_BRIEF_TYPE,
 		inputSchema: AssessBriefInput,
 		outputSchema: AssessBriefOutput,
 		outputKind: "judgment",
-		requiresCriteria: true,
-		requiresReferences: true
+		requiresReferences: true,
+		validateInput: validateJudgmentInput
 	},
 	[CURATE_PACK_TYPE]: {
 		name: CURATE_PACK_TYPE,
 		inputSchema: CuratePackInput,
 		outputSchema: CuratePackOutput,
 		outputKind: "artifact",
-		requiresCriteria: false,
-		requiresReferences: false
+		requiresReferences: false,
+		validateOutput: requireVerificationWhenCriteriaPresent
 	},
 	[RENDER_PACK_TYPE]: {
 		name: RENDER_PACK_TYPE,
 		inputSchema: RenderPackInput,
 		outputSchema: RenderPackOutput,
 		outputKind: "artifact",
-		requiresCriteria: false,
-		requiresReferences: false
+		requiresReferences: false,
+		validateOutput: requireVerificationWhenCriteriaPresent
 	},
 	[JUDGE_PACK_TYPE]: {
 		name: JUDGE_PACK_TYPE,
 		inputSchema: JudgePackInput,
 		outputSchema: JudgePackOutput,
 		outputKind: "judgment",
-		requiresCriteria: false,
 		requiresReferences: true,
-		validateInput: (input) => validateRubricWeights(input.rubric)
+		validateInput: validateJudgmentInput,
+		validateOutput: validateJudgePackOutput
 	}
 };
 //#endregion
@@ -9313,13 +9193,30 @@ function schemaErrors(prefix, schema, value) {
 		message: error.message
 	}));
 }
-function validateTaskOutput(taskType, output) {
+function validateTaskOutput(taskType, output, input) {
 	const entry = getTaskTypeEntry(taskType);
 	if (!entry) return [{
 		field: "taskType",
 		message: `Unknown task type: ${taskType}`
 	}];
-	return schemaErrors("output", entry.outputSchema, output);
+	const errors = schemaErrors("output", entry.outputSchema, output);
+	if (errors.length > 0) return errors;
+	if (entry.validateOutput) {
+		const validationError = entry.validateOutput(output, input);
+		if (validationError) return [{
+			field: "output",
+			message: validationError
+		}];
+	}
+	return [];
+}
+/**
+* Resolve the TypeBox output schema registered for `taskType`. Returns
+* `null` for unknown task types — callers (e.g. submit-tool factories)
+* decide how to surface that.
+*/
+function getTaskOutputSchema(taskType) {
+	return getTaskTypeEntry(taskType)?.outputSchema ?? null;
 }
 //#endregion
 //#region ../tasks/src/wire.ts
@@ -9451,7 +9348,6 @@ Type$1.Object({
 	input: Type$1.Record(Type$1.String(), Type$1.Unknown()),
 	inputSchemaCid: Cid,
 	inputCid: Cid,
-	criteriaCid: Type$1.Union([Cid, Type$1.Null()]),
 	references: Type$1.Array(TaskRef),
 	correlationId: Type$1.Union([Uuid, Type$1.Null()]),
 	imposedByAgentId: Type$1.Union([Uuid, Type$1.Null()]),
@@ -9549,6 +9445,98 @@ Type$1.Object({
 	additionalProperties: false
 });
 //#endregion
+//#region ../agent-runtime/src/output-tools.ts
+/**
+* Submit-output tool contract.
+*
+* The runtime advertises a per-task-type "submit output" tool in every
+* prompt. The tool's name and schema must be the same wherever the
+* agent encounters it: in the system prompt the model reads, in the
+* executor that registers it, in any future executor that wires it
+* into a different coding-agent SDK.
+*
+* This module is the single source of truth for the (toolName,
+* description, parametersSchema) triple. It has no executor-specific
+* dependencies — `agent-runtime` is intentionally agnostic of the
+* concrete coding-agent runtime — so anything that wants to register
+* the tool (pi-extension today, a Codex-SDK adapter tomorrow, a local
+* MCP bridge if we ever go that route) can read the contract here and
+* wire it into its own tool API.
+*
+* Conventions captured here:
+*
+*   - Tool name shape: `submit_<task_type>_output` (e.g.
+*     `submit_fulfill_brief_output`). This is the string the model
+*     sees in the prompt's "preferred path" instruction.
+*   - Parameters schema: the task type's TypeBox `*Output` schema
+*     **directly**, NOT wrapped in `{ output: <schema> }`. Tool args
+*     ARE the payload, so the model gets field-level guidance at
+*     planning time.
+*   - Description text: shared across executors so the tool's
+*     advertised purpose is identical regardless of who registers it.
+*/
+/**
+* Build the submit-output contract for a task type. Returns `null` if
+* no output schema is registered for that type — callers (executors)
+* decide whether that's a hard error, a fallback to the parser-only
+* path, or anything else.
+*/
+function getSubmitOutputContract(taskType) {
+	const schema = getTaskOutputSchema(taskType);
+	if (!schema) return null;
+	return {
+		toolName: submitOutputToolName(taskType),
+		taskType,
+		description: `Submit the structured output for this ${taskType} task. Call exactly once when done. The arguments below ARE the output payload — pass each top-level field of the task type's output schema directly. The runtime validates the args against the schema; mismatches return a tool error you can recover from in the same session. On a valid call the runtime captures the payload and ends the session — you do not need to repeat the JSON in your final assistant message.`,
+		parametersSchema: schema
+	};
+}
+/**
+* Plain-string name builder. Exposed separately so the prompt builder
+* can advertise the tool name even when the schema lookup is deferred
+* to the executor (the prompt is built before any tool registration
+* happens).
+*/
+function submitOutputToolName(taskType) {
+	return `submit_${taskType}_output`;
+}
+//#endregion
+//#region ../agent-runtime/src/prompts/final-output.ts
+function buildFinalOutputBlock(opts) {
+	const { taskType, outputSchemaName, shapeSketch, extraNotes } = opts;
+	const submitTool = submitOutputToolName(taskType);
+	const lines = [
+		"## Final output (read this carefully)",
+		"",
+		`Your VERY LAST action in this conversation MUST report the structured`,
+		`output matching \`${outputSchemaName}\`. Two ways to do it, in order of`,
+		`preference:`,
+		"",
+		`1. **Preferred — call \`${submitTool}\` exactly once** with the payload.`,
+		`   The runtime captures the validated arguments and ends the session.`,
+		`   If the tool is registered, prefer this path.`,
+		`2. **Fallback** — if the submit tool is unavailable, your very last`,
+		`   assistant message MUST be a single JSON object matching`,
+		`   \`${outputSchemaName}\`. No prose before or after. No code fences.`,
+		`   No "ok" or "done". The runtime parses the last balanced top-level`,
+		`   JSON object as the output.`,
+		"",
+		`Failing to report structured output as the very last action means the`,
+		`attempt is marked failed even if the underlying work succeeded.`,
+		"",
+		`Output shape:`,
+		"",
+		"```json",
+		shapeSketch,
+		"```"
+	];
+	if (extraNotes?.length) {
+		lines.push("");
+		for (const note of extraNotes) lines.push(note);
+	}
+	return lines.join("\n");
+}
+//#endregion
 //#region ../agent-runtime/src/prompts/assess-brief.ts
 /**
 * Build the system prompt for an `assess_brief` judge attempt.
@@ -9573,11 +9561,12 @@ Type$1.Object({
 * anything) work without any code path here.
 */
 function buildAssessBriefPrompt(input, ctx) {
-	const criteriaList = input.criteria.map((c, i) => `${i + 1}. **${c.id}** (weight ${c.weight}, scoring: \`${c.scoring}\`) — ${c.description}`).join("\n");
-	const preambleSection = input.rubricPreamble ? [
+	const rubric = input.successCriteria.rubric;
+	const criteriaList = rubric.criteria.map((c, i) => `${i + 1}. **${c.id}** (weight ${c.weight}, scoring: \`${c.scoring}\`) — ${c.description}`).join("\n");
+	const preambleSection = rubric.preamble ? [
 		"### Rubric preamble",
 		"",
-		input.rubricPreamble,
+		rubric.preamble,
 		""
 	].join("\n") : "";
 	return [
@@ -9606,6 +9595,20 @@ function buildAssessBriefPrompt(input, ctx) {
 		"   - `summary` set → use as orientation, not as ground truth.",
 		"Adapt your investigation to whatever the output actually contains. Score conservatively when the producer's output is opaque or thin.",
 		"",
+		"### Querying the producer's diary entries",
+		"",
+		`Beyond the explicit \`diaryEntryIds[]\` from step 3, the producer's`,
+		"attempts auto-tag every entry with the `task:*` provenance namespace.",
+		"You can pull the full set without enumerating ids by passing the",
+		"`taskFilter` shorthand to `moltnet_list_entries` or",
+		"`moltnet_search_entries`:",
+		"",
+		`- All entries from the producer task: \`taskFilter: { taskId: "${input.targetTaskId}" }\`.`,
+		"- Just the accepted attempt: add `attemptN: <acceptedAttemptN>`.",
+		"- The producer plus any prior chain (when a correlationId was set):",
+		"  read it from the task you fetched in step 1 and pass",
+		"  `taskFilter: { correlationId: \"<id>\" }`.",
+		"",
 		preambleSection,
 		"## Criteria",
 		"",
@@ -9613,19 +9616,63 @@ function buildAssessBriefPrompt(input, ctx) {
 		"",
 		"### Scoring rules",
 		"",
-		"- `llm_judged`: score 0..1 continuous. `rationale` REQUIRED (2–4 sentences).",
+		"- `llm_score`: score 0..1 continuous. `rationale` REQUIRED (2–4 sentences).",
 		"- `boolean`: score exactly 0 or 1. `rationale` optional.",
 		"- `deterministic_signature_check`: run `moltnet entry verify` on every diary entry returned by step 3 above AND `git verify-commit` on every commit. Score 1 iff ALL signatures are valid; otherwise 0. Populate `evidence.commitsVerified`, `evidence.commitsTotal`, `evidence.signatureFailures`.",
 		"",
-		"### Final output",
+		"Write a signed diary entry (tags: \"judgment\", \"assess_brief\") capturing the rationale before reporting structured output.",
 		"",
-		"Emit a JSON object matching `AssessBriefOutput`:",
-		"  { \"scores\": [{criterionId, score, rationale?, evidence?}], \"composite\", \"verdict\", \"judgeModel\"? }",
-		"`composite` = Σ(weight_i × score_i) recomputed. The runtime will reject a mismatch.",
-		"Write a signed diary entry (tags: \"judgment\", \"assess_brief\") capturing the rationale before emitting the JSON."
+		buildFinalOutputBlock({
+			taskType: "assess_brief",
+			outputSchemaName: "AssessBriefOutput",
+			shapeSketch: [
+				"{",
+				"  \"scores\": [",
+				"    { \"criterionId\": \"...\", \"score\": 0.0, \"rationale\": \"...\", \"evidence\": {} }",
+				"  ],",
+				"  \"composite\": <sum>,",
+				"  \"verdict\": \"<1-3 sentence overall>\",",
+				"  \"judgeModel\": \"<provider:model>\"",
+				"}"
+			].join("\n"),
+			extraNotes: ["`composite` = Σ(weight_i × score_i) recomputed. The runtime rejects a mismatch."]
+		})
 	].filter(Boolean).join("\n");
 }
 //#endregion
+//#region ../agent-runtime/src/prompts/self-verification.ts
+function buildSelfVerificationBlock(taskId) {
+	return [
+		"## Self-verification",
+		"",
+		`Call \`moltnet_get_task\` with task id \`${taskId}\` and read \`input.successCriteria\`.`,
+		"",
+		"- If `input.successCriteria` is **absent**, omit `verification` from your",
+		"  final output entirely.",
+		"- If `input.successCriteria` is **present**, you MUST include a",
+		"  `verification` block in your final output. Evaluate every applicable",
+		"  item — `gates`, `assertions`, `rubric` criteria, `sideEffects` — against",
+		"  your produced work and emit one result per id. Be honest: a `fail` with",
+		"  a one-line reason is more useful than a false `pass`. Use `skip` (with a",
+		"  `detail`) when you genuinely could not determine a result. Compute",
+		"  `passed = results.every(r => r.status !== 'fail')`.",
+		"",
+		"Verification shape:",
+		"",
+		"```json",
+		"{",
+		"  \"inputCid\": \"<the inputCid you saw on the task>\",",
+		"  \"results\": [",
+		"    { \"id\": \"<criterion id>\", \"kind\": \"assertion|gate|rubric|sideEffect\",",
+		"      \"status\": \"pass|fail|skip\", \"detail\": \"<optional one-liner>\" }",
+		"  ],",
+		"  \"passed\": <boolean>",
+		"}",
+		"```",
+		""
+	].join("\n");
+}
+//#endregion
 //#region ../agent-runtime/src/prompts/curate-pack.ts
 /**
 * Build the system prompt for a `curate_pack` task.
@@ -9699,9 +9746,16 @@ function buildCuratePackPrompt(input, ctx) {
 		"## Tools available (not a recipe — use what the situation calls for)",
 		"",
 		"- `moltnet_diary_tags` — tag inventory with counts. Cheap reconnaissance",
-		"  when the prompt implies a scope but not a tag.",
+		"  when the prompt implies a scope but not a tag. Pass",
+		"  `prefix: \"task:\"` to enumerate task-provenance tags only",
+		"  (`task:type:*`, `task:correlation:*`, etc.).",
 		"- `moltnet_search_entries` — hybrid semantic + lexical search.",
-		"- `moltnet_list_entries` — tag-filtered listing.",
+		"  Filters AND with the query: pass `tags`, `excludeTags`,",
+		"  `entryTypes`, or the `taskFilter` shorthand to narrow before",
+		"  ranking. Example: `taskFilter: { taskType: \"fulfill_brief\" }`",
+		"  returns only entries from fulfill_brief attempts.",
+		"- `moltnet_list_entries` — multi-tag (AND) listing with optional",
+		"  `excludeTags`, `entryType`, and the same `taskFilter` shorthand.",
 		"- `moltnet_get_entry` — full entry read, for disambiguation.",
 		"- `moltnet_pack_create` — terminal call that persists the pack.",
 		"",
@@ -9747,31 +9801,32 @@ function buildCuratePackPrompt(input, ctx) {
 		"",
 		"## Hard constraints",
 		"",
-		"- Do NOT call `moltnet_pack_render` or `moltnet_rendered_pack_judge` —",
-		"  those belong to the next sessions.",
+		"- Do NOT call `moltnet_pack_render` — that belongs to the next session.",
 		"- Do NOT write diary entries unless curation surfaces a genuine",
 		"  incident worth recording. The curation reasoning lives in the task",
 		"  output, not in the diary.",
 		"- Respect hard include/exclude filters literally.",
 		"",
-		"## Final output",
-		"",
-		"Write to stdout a JSON object matching `CuratePackOutput`:",
-		"```",
-		"{",
-		"  \"packId\": \"<uuid>\",",
-		"  \"packCid\": \"<cid>\",",
-		"  \"entries\": [",
-		"    { \"entryId\": \"<uuid>\", \"rank\": 1, \"rationale\": \"<why>\" }",
-		"  ],",
-		"  \"recipeParams\": { \"recipe\": \"...\", \"prompt\": \"...\", ... },",
-		"  \"checkpoints\": [",
-		"    { \"phase\": \"recon\", \"candidateIds\": [...], \"droppedIds\": [...], \"notes\": \"...\" }",
-		"  ],",
-		"  \"summary\": \"<2-4 sentences: what you looked for, how you narrowed, what defines the final set>\"",
-		"}",
-		"```",
-		"The runtime parses this. Failing to emit it is a task failure."
+		buildSelfVerificationBlock(ctx.taskId),
+		buildFinalOutputBlock({
+			taskType: "curate_pack",
+			outputSchemaName: "CuratePackOutput",
+			shapeSketch: [
+				"{",
+				"  \"packId\": \"<uuid>\",",
+				"  \"packCid\": \"<cid>\",",
+				"  \"entries\": [",
+				"    { \"entryId\": \"<uuid>\", \"rank\": 1, \"rationale\": \"<why>\" }",
+				"  ],",
+				"  \"recipeParams\": { \"recipe\": \"...\", \"prompt\": \"...\", ... },",
+				"  \"checkpoints\": [",
+				"    { \"phase\": \"recon\", \"candidateIds\": [...], \"droppedIds\": [...], \"notes\": \"...\" }",
+				"  ],",
+				"  \"summary\": \"<2-4 sentences: what you looked for, how you narrowed, what defines the final set>\",",
+				"  \"verification\": <required iff input.successCriteria; see Self-verification>",
+				"}"
+			].join("\n")
+		})
 	].filter((l) => l !== null).join("\n");
 }
 //#endregion
@@ -9829,17 +9884,28 @@ function buildFulfillBriefPrompt(input, ctx) {
 		"   `MoltNet-Diary: <id>` (per the runtime instructor).",
 		"6. Push the branch and open a PR.",
 		"",
-		"### Final output",
-		"",
-		"When done, write to stdout a JSON object with shape matching `FulfillBriefOutput`:",
-		"  { \"branch\", \"commits\": [{sha, message, diaryEntryId}], \"pullRequestUrl\", \"diaryEntryIds\", \"summary\" }",
-		"The runtime parses this as the structured task output. Failing to emit it is a failure."
+		buildSelfVerificationBlock(ctx.taskId),
+		buildFinalOutputBlock({
+			taskType: "fulfill_brief",
+			outputSchemaName: "FulfillBriefOutput",
+			shapeSketch: [
+				"{",
+				"  \"branch\": \"<branch-name>\",",
+				"  \"commits\": [{ \"sha\": \"...\", \"message\": \"...\", \"diaryEntryId\": \"...\" }],",
+				"  \"pullRequestUrl\": \"<url-or-null>\",",
+				"  \"diaryEntryIds\": [\"...\"],",
+				"  \"summary\": \"<1-3 sentence recap>\",",
+				"  \"verification\": <required iff input.successCriteria; see Self-verification>",
+				"}"
+			].join("\n")
+		})
 	].filter(Boolean).join("\n");
 }
 //#endregion
 //#region ../agent-runtime/src/prompts/judge-pack.ts
 function buildJudgePackPrompt(input, ctx) {
-	const { renderedPackId, sourcePackId, rubric } = input;
+	const { renderedPackId, sourcePackId, successCriteria } = input;
+	const rubric = successCriteria.rubric;
 	const criteriaList = rubric.criteria.map((c, i) => `${i + 1}. **${c.id}** (weight ${c.weight}, scoring: \`${c.scoring}\`) — ${c.description}`).join("\n");
 	const preambleSection = rubric.preamble ? [
 		"### Rubric preamble",
@@ -9869,7 +9935,7 @@ function buildJudgePackPrompt(input, ctx) {
 		"",
 		"1. Call `moltnet_rendered_pack_get` for the rendered pack. Keep the",
 		"   `content` string — you will score it.",
-		"2. Call `moltnet_pack_get` with `expand: \"entries\"` for the source",
+		"2. Call `moltnet_pack_get` with `expandEntries: true` for the source",
 		"   pack. Keep the source entries for grounding / coverage checks.",
 		"3. For each criterion, score according to its `scoring` mode (see",
 		"   Scoring rules below). Produce rationales where required.",
@@ -9882,9 +9948,23 @@ function buildJudgePackPrompt(input, ctx) {
 		"",
 		"### Scoring rules",
 		"",
-		"- `llm_judged`: score 0..1 continuous. `rationale` REQUIRED (2–4",
+		"- `llm_score`: score 0..1 continuous. `rationale` REQUIRED (2–4",
 		"  sentences pointing at specific evidence in the rendered content or",
-		"  the source entries).",
+		"  the source entries). NOTE: this mode smooths individual failures",
+		"  into the gradient. Prefer `llm_checklist` for grounding,",
+		"  faithfulness, or any property where one failure is a real failure.",
+		"- `llm_checklist`: enumerate per-claim binary assertions instead of",
+		"  picking a continuous score. For each assertion, return",
+		"  `{ id, text, passed: bool, evidence: string }`. `evidence` is",
+		"  REQUIRED for both PASS and FAIL — for PASS, quote the supporting",
+		"  span (rendered or source) or cite the source entry id; for FAIL,",
+		"  quote the offending claim verbatim and explain why it fails.",
+		"  Don't give the benefit of the doubt: if a claim looks supported but",
+		"  you cannot point at the supporting source span, mark it FAIL with",
+		"  evidence = \"no supporting span found\". Set the criterion `score`",
+		"  to `1` iff every assertion passes, else `0` — the runtime checks",
+		"  this matches the assertions array. Populate `assertions` on the",
+		"  score object; leave `evidence` (the structured record) empty.",
 		"- `boolean`: score exactly 0 or 1. `rationale` optional.",
 		"- `deterministic_signature_check`: batch-fetch ALL referenced source",
 		"  entries in a single call — `moltnet_list_entries` with `entryIds` set",
@@ -9915,23 +9995,36 @@ function buildJudgePackPrompt(input, ctx) {
 		"  may leak guidance that biases judgment.",
 		"- Keep the session focused on scoring; no speculative exploration.",
 		"",
-		"## Final output",
-		"",
-		"Write to stdout a JSON object matching `JudgePackOutput`:",
-		"```",
-		"{",
-		"  \"scores\": [{\"criterionId\": \"...\", \"score\": 0.0, \"rationale\": \"...\", \"evidence\": {...}}],",
-		"  \"composite\": <sum-of-weighted-scores>,",
-		"  \"verdict\": \"<1-3 sentence overall>\",",
-		"  \"judgeModel\": \"<provider:model>\",",
-		"  \"rendererBinaryCid\": \"<cid-string-only-if-available>\"",
-		"}",
-		"```",
-		"Omit `rendererBinaryCid` entirely when no binary CID is exposed by",
-		"`moltnet_rendered_pack_get`. Do NOT emit `null` — the field is optional",
-		"and absence is the correct representation when unavailable.",
 		`Write a signed diary entry (tags: \`judgment\`, \`judge_pack\`, \`rubric:${rubric.rubricId}\`) capturing the rationale before`,
-		"emitting the JSON."
+		"reporting structured output.",
+		"",
+		buildFinalOutputBlock({
+			taskType: "judge_pack",
+			outputSchemaName: "JudgePackOutput",
+			shapeSketch: [
+				"{",
+				"  \"scores\": [",
+				"    { \"criterionId\": \"...\", \"score\": 0.0, \"rationale\": \"...\", \"evidence\": {} },",
+				"    {",
+				"      \"criterionId\": \"<llm_checklist criterion>\",",
+				"      \"score\": 0,                          // 1 iff every assertion passed",
+				"      \"assertions\": [",
+				"        { \"id\": \"claim-1\", \"text\": \"...\", \"passed\": false, \"evidence\": \"...\" }",
+				"      ]",
+				"    }",
+				"  ],",
+				"  \"composite\": <sum-of-weighted-scores>,",
+				"  \"verdict\": \"<1-3 sentence overall>\",",
+				"  \"judgeModel\": \"<provider:model>\",",
+				"  \"rendererBinaryCid\": \"<cid-string-only-if-available>\"",
+				"}"
+			].join("\n"),
+			extraNotes: [
+				"Omit `rendererBinaryCid` entirely when no binary CID is exposed by",
+				"`moltnet_rendered_pack_get`. Do NOT emit `null` — the field is",
+				"optional and absence is the correct representation when unavailable."
+			]
+		})
 	].filter((l) => l !== null).join("\n");
 }
 //#endregion
@@ -9960,7 +10053,7 @@ function buildRenderPackPrompt(input, ctx) {
 		"",
 		"## Workflow",
 		"",
-		"1. Call `moltnet_pack_get` with `expand: \"entries\"` to inspect the",
+		"1. Call `moltnet_pack_get` with `expandEntries: true` to inspect the",
 		"   source entries. Read it — you need the entry count for your output.",
 		"2. Call `moltnet_pack_render` with:",
 		`   - \`packId\`: \`${packId}\``,
@@ -9972,24 +10065,25 @@ function buildRenderPackPrompt(input, ctx) {
 		"## Constraints",
 		"",
 		"- Do NOT modify the source pack or its entries.",
-		"- Do NOT call `moltnet_rendered_pack_judge`.",
 		"- Do NOT write diary entries unless a genuine incident occurs",
 		"  (rendering failure, invariant violation).",
 		"",
-		"## Final output",
-		"",
-		"Write to stdout a JSON object matching `RenderPackOutput`:",
-		"```",
-		"{",
-		"  \"renderedPackId\": \"<uuid-or-null>\",",
-		"  \"renderedCid\": \"<cid>\",",
-		"  \"renderMethod\": \"<label>\",",
-		"  \"byteSize\": <int>,",
-		"  \"entriesRendered\": <int>,",
-		"  \"summary\": \"<1-3 sentence recap>\"",
-		"}",
-		"```",
-		"Failing to emit it is a task failure."
+		buildSelfVerificationBlock(ctx.taskId),
+		buildFinalOutputBlock({
+			taskType: "render_pack",
+			outputSchemaName: "RenderPackOutput",
+			shapeSketch: [
+				"{",
+				"  \"renderedPackId\": \"<uuid-or-null>\",",
+				"  \"renderedCid\": \"<cid>\",",
+				"  \"renderMethod\": \"<label>\",",
+				"  \"byteSize\": <int>,",
+				"  \"entriesRendered\": <int>,",
+				"  \"summary\": \"<1-3 sentence recap>\",",
+				"  \"verification\": <required iff input.successCriteria; see Self-verification>",
+				"}"
+			].join("\n")
+		})
 	].join("\n");
 }
 //#endregion
@@ -12020,7 +12114,7 @@ var require_transport = /* @__PURE__ */ __commonJSMin(((exports, module) => {
 	var { existsSync: existsSync$1 } = __require("node:fs");
 	var getCallers = require_caller();
 	var { join: join$1, isAbsolute, sep } = __require("node:path");
-	var { fileURLToPath: fileURLToPath$1 } = __require("node:url");
+	var { fileURLToPath } = __require("node:url");
 	var sleep = require_atomic_sleep();
 	var onExit = require_on_exit_leak_free();
 	var ThreadStream = require_thread_stream();
@@ -12076,7 +12170,7 @@ var require_transport = /* @__PURE__ */ __commonJSMin(((exports, module) => {
 		if (!unquoted) return false;
 		let path = unquoted;
 		if (path.startsWith("file://")) try {
-			path = fileURLToPath$1(path);
+			path = fileURLToPath(path);
 		} catch {
 			return false;
 		}
@@ -13567,9 +13661,13 @@ function buildRuntimeInstructor(ctx) {
 		`- During this task, every diary entry MUST land in \`${ctx.diaryId}\``,
 		"  (the task diary). The MCP `moltnet_create_entry` tool enforces this",
 		"  and rejects mismatched explicit `diaryId` parameters.",
-		`- Provenance tags \`task:${ctx.taskId}\`, \`task_type:${ctx.taskType}\`,`,
-		`  and \`task_attempt:${ctx.attemptN}\`${ctx.correlationId ? `, plus \`correlation:${ctx.correlationId}\`` : ""} are auto-injected on every entry.`,
-		"  You may add additional tags; you cannot remove the auto-tags.",
+		`- Provenance tags \`task:id:${ctx.taskId}\`, \`task:type:${ctx.taskType}\`,`,
+		`  and \`task:attempt:${ctx.attemptN}\`${ctx.correlationId ? `, plus \`task:correlation:${ctx.correlationId}\`` : ""} are auto-injected on every entry.`,
+		"  These share the `task:` namespace so `moltnet_diary_tags` with",
+		"  `prefix: \"task:\"` lists every task-scoped tag, and the",
+		"  `taskFilter` shorthand on `moltnet_list_entries` /",
+		"  `moltnet_search_entries` expands into them. You may add additional",
+		"  tags but you cannot remove the auto-injected ones.",
 		"",
 		"## Accountable commits",
 		"",
@@ -13598,42 +13696,78 @@ function buildRuntimeInstructor(ctx) {
 }
 //#endregion
 //#region src/runtime/task-output.ts
-async function parseStructuredTaskOutput(assistantText, taskType) {
+var METER_NAME = "@themoltnet/pi-extension/task-output";
+var parseResultCounter = null;
+function getParseResultCounter() {
+	if (parseResultCounter) return parseResultCounter;
+	parseResultCounter = metrics.getMeter(METER_NAME).createCounter("agent_runtime.task_output.parse_result", {
+		description: "Outcome of structured task-output capture, labelled by task_type, model, and code (success | output_missing | output_validation_failed | unknown_task_type | output_cid_compute_failed | captured_via_tool).",
+		unit: "1"
+	});
+	return parseResultCounter;
+}
+/**
+* Record one parse-result observation. Exposed so the executor can also
+* record the `captured_via_tool` outcome from the submit-tool path
+* without bouncing through the parser. Labels: `task_type`, `model`, `code`.
+*/
+function recordTaskOutputParseResult(args) {
+	getParseResultCounter().add(1, {
+		task_type: args.taskType,
+		model: args.model ?? "unknown",
+		code: args.code
+	});
+}
+async function parseStructuredTaskOutput(assistantText, taskType, opts = {}) {
+	const record = (code) => recordTaskOutputParseResult({
+		taskType,
+		model: opts.model,
+		code
+	});
 	const extracted = extractJsonObject(assistantText);
-	if (!extracted) return {
-		output: null,
-		outputCid: null,
-		error: {
-			code: "output_missing",
-			message: "Agent did not emit a parseable JSON object as its final message."
-		}
-	};
+	if (!extracted) {
+		record("output_missing");
+		return {
+			output: null,
+			outputCid: null,
+			error: {
+				code: "output_missing",
+				message: "Agent did not emit a parseable JSON object as its final message."
+			}
+		};
+	}
 	const errors = validateTaskOutput(taskType, extracted);
 	if (errors.length > 0) {
 		const details = errors.slice(0, 3).map((error) => `${error.field}: ${error.message}`);
 		const [firstError] = errors;
+		const code = firstError?.field === "taskType" ? "unknown_task_type" : "output_validation_failed";
+		record(code);
 		return {
 			output: null,
 			outputCid: null,
 			error: {
-				code: firstError?.field === "taskType" ? "unknown_task_type" : "output_validation_failed",
+				code,
 				message: `Output failed schema validation: ${details.join("; ")}`
 			}
 		};
 	}
 	try {
+		const outputCid = await computeJsonCid(extracted);
+		record("success");
 		return {
 			output: extracted,
-			outputCid: await computeJsonCid(extracted),
+			outputCid,
 			error: null
 		};
 	} catch (error) {
+		const message = error instanceof Error ? error.message : String(error);
+		record("output_cid_compute_failed");
 		return {
 			output: null,
 			outputCid: null,
 			error: {
 				code: "output_cid_compute_failed",
-				message: `Validated output could not be canonicalized: ${error instanceof Error ? error.message : String(error)}`
+				message: `Validated output could not be canonicalized: ${message}`
 			}
 		};
 	}
@@ -13689,6 +13823,99 @@ function extractJsonObject(text) {
 	return null;
 }
 //#endregion
+//#region src/runtime/submit-output-tool.ts
+/**
+* Sentinel thrown when the requested task type has no registered output
+* schema. The executor recognises this specific error class and falls
+* back to the parser path; any other error from `createSubmitOutputTool`
+* is unexpected and must propagate.
+*/
+var UnknownTaskTypeForSubmitToolError = class extends Error {
+	constructor(taskType) {
+		super(`createSubmitOutputTool: no output schema registered for task type "${taskType}"`);
+		this.taskType = taskType;
+		this.name = "UnknownTaskTypeForSubmitToolError";
+	}
+};
+function createSubmitOutputTool(taskType, opts = {}) {
+	const contract = getSubmitOutputContract(taskType);
+	if (!contract) throw new UnknownTaskTypeForSubmitToolError(taskType);
+	const schema = contract.parametersSchema;
+	let captured = null;
+	let callCount = 0;
+	return {
+		tool: defineTool({
+			name: contract.toolName,
+			label: `Submit ${taskType} output`,
+			description: contract.description,
+			parameters: schema,
+			async execute(_id, params) {
+				const errors = validateTaskOutput(taskType, params);
+				if (errors.length > 0) {
+					const detailMsg = errors.slice(0, 3).map((err) => `${err.field}: ${err.message}`).join("; ");
+					const details = {
+						captured: false,
+						callCount,
+						error: "output_validation_failed"
+					};
+					recordTaskOutputParseResult({
+						taskType,
+						model: opts.model,
+						code: "output_validation_failed"
+					});
+					return {
+						content: [{
+							type: "text",
+							text: `Output failed validation: ${detailMsg}. Re-call this tool with a corrected output.`
+						}],
+						details,
+						isError: true
+					};
+				}
+				captured = params;
+				callCount += 1;
+				return {
+					content: [{
+						type: "text",
+						text: "Output captured. The runtime now has the validated payload; no further action is needed for output reporting."
+					}],
+					details: {
+						captured: true,
+						callCount,
+						error: null
+					},
+					terminate: true
+				};
+			}
+		}),
+		getCaptured: () => captured,
+		getCallCount: () => callCount
+	};
+}
+/**
+* Build the submit-tool wiring for one task attempt. Returns a handle
+* (or `null` if no submit-tool should be registered) plus the
+* `customTools`-shaped array ready to spread into the session config.
+*
+* The catch is **narrowed** to `UnknownTaskTypeForSubmitToolError` —
+* exporters/dependency-API drift would otherwise be silently degraded
+* to parser-only behaviour, which reintroduces the failure mode this
+* change is fixing. Any other error from the factory propagates.
+*/
+function resolveSubmitTools(taskType, opts = {}) {
+	let handle;
+	try {
+		handle = createSubmitOutputTool(taskType, opts);
+	} catch (err) {
+		if (err instanceof UnknownTaskTypeForSubmitToolError) handle = null;
+		else throw err;
+	}
+	return {
+		handle,
+		tools: handle ? [handle.tool] : []
+	};
+}
+//#endregion
 //#region src/runtime/execute-pi-task.ts
 /**
 * executePiTask — run a single Task attempt using pi-coding-agent inside a
@@ -13834,6 +14061,8 @@ async function executePiTask(claimedTask, reporter, opts) {
 			createEditToolDefinition(mountPath, { operations: createGondolinEditOps(managed.vm, mountPath) }),
 			createBashToolDefinition(mountPath, { operations: createGondolinBashOps(managed.vm, mountPath) })
 		];
+		const { handle: submitToolHandle, tools: submitToolDefs } = resolveSubmitTools(task.taskType, { model: opts.model });
+		const submitTools = submitToolDefs;
 		try {
 			const moltnetAgent = await connect({ configDir: managed.agentDir });
 			const moltnetTools = createMoltNetTools({
@@ -13885,7 +14114,11 @@ async function executePiTask(claimedTask, reporter, opts) {
 				agentDir: piAuthDir,
 				cwd: mountPath,
 				model: modelHandle,
-				customTools: [...gondolinCustomTools, ...moltnetTools],
+				customTools: [
+					...gondolinCustomTools,
+					...moltnetTools,
+					...submitTools
+				],
 				sessionManager: SessionManager.inMemory(),
 				resourceLoader
 			})).session;
@@ -13962,14 +14195,43 @@ async function executePiTask(claimedTask, reporter, opts) {
 		let parsedOutputCid = null;
 		let parseError = null;
 		if (!runError && !llmAbort && !cancelled) {
-			const parsed = await parseStructuredTaskOutput(assistantText, task.taskType);
-			parsedOutput = parsed.output;
-			parsedOutputCid = parsed.outputCid;
-			parseError = parsed.error;
-			if (parseError) await emit("error", {
-				message: parseError.message,
-				phase: "output_validation"
-			});
+			const captured = submitToolHandle?.getCaptured() ?? null;
+			if (captured) try {
+				parsedOutput = captured;
+				parsedOutputCid = await computeJsonCid(captured);
+				recordTaskOutputParseResult({
+					taskType: task.taskType,
+					model: opts.model,
+					code: "captured_via_tool"
+				});
+			} catch (err) {
+				const message = err instanceof Error ? err.message : String(err);
+				parsedOutput = null;
+				parsedOutputCid = null;
+				parseError = {
+					code: "output_cid_compute_failed",
+					message: `Captured submit-tool output could not be canonicalized: ${message}`
+				};
+				recordTaskOutputParseResult({
+					taskType: task.taskType,
+					model: opts.model,
+					code: "output_cid_compute_failed"
+				});
+				await emit("error", {
+					message: parseError.message,
+					phase: "output_validation"
+				});
+			}
+			else {
+				const parsed = await parseStructuredTaskOutput(assistantText, task.taskType, { model: opts.model });
+				parsedOutput = parsed.output;
+				parsedOutputCid = parsed.outputCid;
+				parseError = parsed.error;
+				if (parseError) await emit("error", {
+					message: parseError.message,
+					phase: "output_validation"
+				});
+			}
 		}
 		if (cancelled) return {
 			taskId: task.id,
@@ -14365,4 +14627,4 @@ function moltnetExtension(pi) {
 	registerMoltnetReflectCommand(pi, state);
 }
 //#endregion
-export { HOST_EXEC_DEFAULT_BASE_ENV, activateAgentEnv, buildPiJudgeRecipeManifest, computePiJudgeRecipeCid, createGondolinBashOps, createGondolinEditOps, createGondolinReadOps, createGondolinWriteOps, createMoltNetTools, createPiOtelExtension, createPiTaskExecutor, moltnetExtension as default, ensureSnapshot, executePiTask, findMainWorktree, loadCredentials, resolvePiJudgeRecipeVersions, resumeVm, toGuestPath };
+export { HOST_EXEC_DEFAULT_BASE_ENV, activateAgentEnv, createGondolinBashOps, createGondolinEditOps, createGondolinReadOps, createGondolinWriteOps, createMoltNetTools, createPiOtelExtension, createPiTaskExecutor, moltnetExtension as default, ensureSnapshot, executePiTask, findMainWorktree, loadCredentials, resumeVm, toGuestPath };