npm - @infinitedusky/indusk-mcp - Versions diffs - 1.14.10 → 1.15.0 - Mend

@infinitedusky/indusk-mcp 1.14.10 → 1.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/hooks/check-gates.js +121 -0
package/hooks/gate-reminder.js +111 -0
package/hooks/validate-impl-structure.js +344 -4
package/lessons/community/community-tests-first-within-each-phase.md +11 -0
package/package.json +1 -1
package/skills/planner.md +34 -2
package/skills/retrospective.md +21 -0
package/skills/verify.md +13 -0
package/skills/work.md +56 -0

package/hooks/check-gates.js CHANGED Viewed

@@ -304,5 +304,126 @@ for (const item of newlyChecked) {
 	}
 }
+// ------------------------------------------------------------------
+// Trajectory enforcement: if advancing past Phase N (checking an
+// implementation item in Phase N+1 or later), every trajectory row
+// with `Passes at: Phase K` where K <= N must be in state `passing`,
+// `skipped`, or `blocked`. Planned/writable/written states fail the
+// phase close — the whole point of the tests-first-planning system is
+// that deferral is structurally impossible.
+//
+// Skipped if the impl has no `## Test Trajectory` section (grandfathered).
+// ------------------------------------------------------------------
+const hasTrajectorySection = /^##\s+Test Trajectory\b/m.test(newFullContent);
+if (hasTrajectorySection) {
+	const advancingPhases = new Set();
+	for (const item of newlyChecked) {
+		if (item.gate === "implementation") advancingPhases.add(item.phase);
+	}
+	if (advancingPhases.size > 0) {
+		const trajectory = parseTrajectoryFromBody(newFullContent);
+		const allBlockers = [];
+		for (const advancingPhase of advancingPhases) {
+			// Closing phases = every phase strictly before advancingPhase
+			for (let closingPhase = 1; closingPhase < advancingPhase; closingPhase++) {
+				const blockers = trajectory.rows.filter(
+					(row) =>
+						row.passesAt === closingPhase &&
+						row.state !== "passing" &&
+						row.state !== "skipped" &&
+						row.state !== "blocked",
+				);
+				for (const row of blockers) {
+					allBlockers.push({ phase: closingPhase, row });
+				}
+			}
+		}
+		if (allBlockers.length > 0) {
+			const msg = allBlockers
+				.map(
+					(b) =>
+						`  [${b.row.id}] ${b.row.asserts} — state: ${b.row.state} (Phase ${b.phase} cannot close until this row is 'passing' or 'skipped')`,
+				)
+				.join("\n");
+			process.stderr.write(
+				`Trajectory blocks phase advance (policy: ${gatePolicy}):\n${msg}\n\nEvery trajectory row with 'Passes at: Phase N' must be 'passing', 'skipped', or 'blocked' before advancing past Phase N. See .indusk/planning/tests-first-planning/adr.md Section 6.\n`,
+			);
+			process.exit(2);
+		}
+	}
+}
 // All checks passed
 process.exit(0);
+// ------------------------------------------------------------------
+// Trajectory parser (pure JS, mirrors parser.ts — simplified to read
+// just id, passesAt, and state which is all this hook needs).
+// ------------------------------------------------------------------
+function parseTrajectoryFromBody(implContent) {
+	const fmMatch = implContent.match(/^---\n[\s\S]*?\n---\n/);
+	const body = fmMatch ? implContent.slice(fmMatch[0].length) : implContent;
+	const lines = body.split("\n");
+	let inTrajectory = false;
+	const tableLines = [];
+	for (const line of lines) {
+		if (/^##\s+Test Trajectory\b/.test(line)) {
+			inTrajectory = true;
+			continue;
+		}
+		if (!inTrajectory) continue;
+		if (/^#{1,3}\s+/.test(line) && !/^###\s+Deferred Verification\b/.test(line)) {
+			const depth = (line.match(/^(#{1,6})/) || ["", ""])[1].length;
+			if (depth <= 3) break;
+		}
+		if (/^###\s+Deferred Verification\b/.test(line)) break;
+		tableLines.push(line);
+	}
+	const pipeLines = tableLines.filter((l) => l.trim().startsWith("|"));
+	if (pipeLines.length < 2) return { rows: [] };
+	const header = parseRowCells(pipeLines[0]);
+	const sep = parseRowCells(pipeLines[1]);
+	if (!sep.every((c) => /^:?-+:?$/.test(c))) return { rows: [] };
+	const keys = header.map((h) => {
+		const n = h.toLowerCase().trim();
+		if (n === "id") return "id";
+		if (n === "passes at") return "passesAt";
+		if (n === "state") return "state";
+		if (n === "writable at") return "writableAt";
+		if (n === "asserts") return "asserts";
+		return n;
+	});
+	const rows = [];
+	for (let i = 2; i < pipeLines.length; i++) {
+		const cells = parseRowCells(pipeLines[i]);
+		if (cells.length !== keys.length) continue;
+		const rec = {};
+		for (let j = 0; j < keys.length; j++) rec[keys[j]] = cells[j];
+		if (!rec.id) continue;
+		const passesMatch = (rec.passesAt || "").match(/^\s*Phase\s+(\d+)\s*$/i);
+		rows.push({
+			id: rec.id.trim(),
+			asserts: (rec.asserts || "").trim(),
+			passesAt: passesMatch ? Number.parseInt(passesMatch[1], 10) : Number.NaN,
+			state: (rec.state || "").toLowerCase().trim(),
+		});
+	}
+	return { rows };
+}
+function parseRowCells(line) {
+	const trimmed = line.trim();
+	if (!trimmed.startsWith("|") || !trimmed.endsWith("|")) return [];
+	return trimmed
+		.slice(1, -1)
+		.split("|")
+		.map((c) => c.trim());
+}

package/hooks/gate-reminder.js CHANGED Viewed

@@ -103,9 +103,120 @@ for (const phase of phases) {
 			console.error(
 				`Phase ${phase.number} (${phase.name}) is fully complete. Call advance_plan to validate gates before starting Phase ${nextPhase.number}.`,
 			);
+			// Add trajectory nudges if applicable
+			const trajectory = parseTrajectoryRows(content);
+			if (trajectory.rows.length > 0) {
+				const startNudge = writableAtNudge(trajectory, nextPhase.number);
+				if (startNudge) console.error(`\n${startNudge}`);
+			}
 			process.exit(0);
 		}
 	}
 }
+// Additional nudge: if any phase is mid-execution (some items checked, some not)
+// and has trajectory rows blocking close, warn about them.
+const trajectory = parseTrajectoryRows(content);
+if (trajectory.rows.length > 0) {
+	for (const phase of phases) {
+		const anyChecked = phase.items.some((i) => i.checked);
+		const allChecked = phase.items.every((i) => i.checked);
+		if (!anyChecked || allChecked) continue;
+		const blockers = trajectory.rows.filter(
+			(row) =>
+				row.passesAt === phase.number &&
+				row.state !== "passing" &&
+				row.state !== "skipped" &&
+				row.state !== "blocked",
+		);
+		if (blockers.length > 0) {
+			const lines = blockers.map((r) => `  [${r.id}] ${r.asserts} — state: ${r.state}`);
+			console.error(
+				`Phase ${phase.number} trajectory rows still not passing (will block phase close):\n${lines.join("\n")}`,
+			);
+			break; // one nudge per hook invocation
+		}
+	}
+}
 process.exit(0);
+// ------------------------------------------------------------------
+// Trajectory parsing (minimal — just id, passesAt, state)
+// ------------------------------------------------------------------
+function parseTrajectoryRows(implContent) {
+	const fmMatch = implContent.match(/^---\n[\s\S]*?\n---\n/);
+	const body = fmMatch ? implContent.slice(fmMatch[0].length) : implContent;
+	const lines = body.split("\n");
+	let inTrajectory = false;
+	const tableLines = [];
+	for (const line of lines) {
+		if (/^##\s+Test Trajectory\b/.test(line)) {
+			inTrajectory = true;
+			continue;
+		}
+		if (!inTrajectory) continue;
+		if (/^###\s+Deferred Verification\b/.test(line)) break;
+		if (/^#{1,3}\s+/.test(line)) {
+			const depth = (line.match(/^(#{1,6})/) || ["", ""])[1].length;
+			if (depth <= 3) break;
+		}
+		tableLines.push(line);
+	}
+	const pipeLines = tableLines.filter((l) => l.trim().startsWith("|"));
+	if (pipeLines.length < 2) return { rows: [] };
+	const header = parseRow(pipeLines[0]);
+	const sep = parseRow(pipeLines[1]);
+	if (!sep.every((c) => /^:?-+:?$/.test(c))) return { rows: [] };
+	const keys = header.map((h) => {
+		const n = h.toLowerCase().trim();
+		if (n === "id") return "id";
+		if (n === "writable at") return "writableAt";
+		if (n === "passes at") return "passesAt";
+		if (n === "state") return "state";
+		if (n === "asserts") return "asserts";
+		return n;
+	});
+	const rows = [];
+	for (let i = 2; i < pipeLines.length; i++) {
+		const cells = parseRow(pipeLines[i]);
+		if (cells.length !== keys.length) continue;
+		const rec = {};
+		for (let j = 0; j < keys.length; j++) rec[keys[j]] = cells[j];
+		if (!rec.id) continue;
+		const w = (rec.writableAt || "").match(/^\s*Phase\s+(\d+)\s*$/i);
+		const p = (rec.passesAt || "").match(/^\s*Phase\s+(\d+)\s*$/i);
+		rows.push({
+			id: rec.id.trim(),
+			asserts: (rec.asserts || "").trim(),
+			writableAt: w ? Number.parseInt(w[1], 10) : Number.NaN,
+			passesAt: p ? Number.parseInt(p[1], 10) : Number.NaN,
+			state: (rec.state || "").toLowerCase().trim(),
+		});
+	}
+	return { rows };
+}
+function parseRow(line) {
+	const t = line.trim();
+	if (!t.startsWith("|") || !t.endsWith("|")) return [];
+	return t
+		.slice(1, -1)
+		.split("|")
+		.map((c) => c.trim());
+}
+function writableAtNudge(trajectory, phase) {
+	const rows = trajectory.rows.filter(
+		(r) =>
+			r.writableAt === phase && (r.state === "planned" || r.state === "writable" || r.state === ""),
+	);
+	if (rows.length === 0) return null;
+	const lines = rows.map((r) => `  [${r.id}] ${r.asserts}`);
+	return `Phase ${phase} opens with these tests to author (commit as failing before implementation work):\n${lines.join("\n")}`;
+}

package/hooks/validate-impl-structure.js CHANGED Viewed

@@ -34,7 +34,7 @@ if (!filePath.endsWith("/impl.md") && !filePath.endsWith("\\impl.md")) {
 /**
  * Find the project root by walking up from a starting directory looking for
- * a .indusk/ or .claude/ directory. Falls back to event.cwd if none found.
+ * a .indusk/ or .claude/ directory. Falls back to startDir if none found.
  * Mirrors the pattern used in check-catchup.js.
  */
 function findProjectRoot(startDir) {
@@ -48,6 +48,25 @@ function findProjectRoot(startDir) {
 	return startDir;
 }
+/**
+ * Resolve the project root for the file being edited. Prefer walking up from
+ * the file's own directory — the file being edited is always inside the
+ * project, and its directory chain reliably contains `.indusk/` even when
+ * `event.cwd` is set to something unrelated by the calling environment
+ * (observed from the Claude Code VS Code extension on impl edits). Falls
+ * back to `event.cwd` and finally `process.cwd()`.
+ */
+function resolveProjectRoot(filePath, eventCwd) {
+	if (filePath) {
+		const fileDir = resolve(filePath, "..");
+		const fromFile = findProjectRoot(fileDir);
+		if (existsSync(`${fromFile}/.indusk`) || existsSync(`${fromFile}/.claude`)) {
+			return fromFile;
+		}
+	}
+	return findProjectRoot(eventCwd ?? process.cwd());
+}
 /**
  * Whether the OTel gate should fire for this project. Reads .indusk/config.json
  * and checks otel.role. Returns true if the config is missing, if otel.role is
@@ -66,7 +85,7 @@ function shouldEmitOtelGate(projectRoot) {
 	}
 }
-const projectRoot = findProjectRoot(event.cwd ?? process.cwd());
+const projectRoot = resolveProjectRoot(filePath, event.cwd);
 const otelGateEnabled = shouldEmitOtelGate(projectRoot);
 // Check for skip-gates escape hatch
@@ -257,8 +276,7 @@ for (const phase of phases) {
 		const optOuts = [];
 		if (requirements.verification && phase.hasVerification && phase.verificationIsOptOut)
 			optOuts.push("Verification");
-		if (requirements.otel && phase.hasOtel && phase.otelIsOptOut)
-			optOuts.push("OTel");
+		if (requirements.otel && phase.hasOtel && phase.otelIsOptOut) optOuts.push("OTel");
 		if (requirements.context && phase.hasContext && phase.contextIsOptOut) optOuts.push("Context");
 		if (requirements.document && phase.hasDocument && phase.documentIsOptOut)
 			optOuts.push("Document");
@@ -274,6 +292,36 @@ for (const phase of phases) {
 	}
 }
+// ------------------------------------------------------------------
+// Trajectory validation (tests-first-planning, Phase 1)
+//
+// Four additive rules run when either:
+//   (a) frontmatter includes `trajectory: required`, OR
+//   (b) the body contains a `## Test Trajectory` section
+//
+// Otherwise this section is skipped — grandfathered impls pass through.
+//
+// Rules:
+//   1. trajectory-presence: `## Test Trajectory` section is present
+//   2. cross-reference-integrity: phase Verification test-ID references exist in trajectory
+//   3. temporal-coherence: every row has Writable at ≤ Passes at
+//   4. deferred-completeness: every Deferred Verification row has reason, would require, mitigation
+// ------------------------------------------------------------------
+const trajectoryRequiredFrontmatter = /trajectory:\s*required/.test(frontmatter);
+const hasTrajectoryHeading = /^##\s+Test Trajectory\b/m.test(body);
+const trajectoryValidationEnabled = trajectoryRequiredFrontmatter || hasTrajectoryHeading;
+if (trajectoryValidationEnabled) {
+	const trajectoryErrors = validateTrajectory(body);
+	if (trajectoryErrors.length > 0) {
+		process.stderr.write(
+			`Test Trajectory validation failed (policy: ${gatePolicy}):\n${trajectoryErrors.map((e) => `  [${e.rule}] ${e.message}`).join("\n")}\n\nSee .indusk/planning/tests-first-planning/adr.md Sections 3-6 for the Test Trajectory shape and validator rules.\n`,
+		);
+		process.exit(2);
+	}
+}
 if (errors.length > 0) {
 	const msg = errors.join("\n");
 	const reqNames = Object.entries(requirements)
@@ -292,3 +340,295 @@ if (errors.length > 0) {
 }
 process.exit(0);
+// ------------------------------------------------------------------
+// Trajectory validation helpers (pure JS, mirrors
+// apps/indusk-mcp/src/lib/trajectory/validator.ts and parser.ts)
+// ------------------------------------------------------------------
+function validateTrajectory(implBody) {
+	const errors = [];
+	// Rule 1: trajectory presence
+	if (!/^##\s+Test Trajectory\b/m.test(implBody)) {
+		errors.push({
+			rule: "trajectory-presence",
+			message:
+				"Impl is missing the `## Test Trajectory` section. Every impl using the new shape must declare its tests at the top as a table with columns: ID | Asserts | Writable at | Passes at | State.",
+		});
+		return errors;
+	}
+	const trajectory = parseTrajectoryFromBody(implBody);
+	errors.push(...validateCrossReferenceIntegrity(implBody, trajectory));
+	errors.push(...validateTemporalCoherence(trajectory));
+	errors.push(...validateDeferredCompleteness(trajectory));
+	return errors;
+}
+function parseTrajectoryFromBody(implBody) {
+	const lines = implBody.split("\n");
+	let inTrajectory = false;
+	let inDeferred = false;
+	const tableLines = [];
+	const deferredLines = [];
+	for (const line of lines) {
+		if (/^##\s+Test Trajectory\b/.test(line)) {
+			inTrajectory = true;
+			inDeferred = false;
+			continue;
+		}
+		if (!inTrajectory) continue;
+		if (/^###\s+Deferred Verification\b/.test(line)) {
+			inDeferred = true;
+			continue;
+		}
+		if (/^#{1,3}\s+/.test(line) && !/^###\s+Deferred Verification\b/.test(line)) {
+			const depth = (line.match(/^(#{1,6})/) || ["", ""])[1].length;
+			if (depth <= 3) break;
+		}
+		if (inDeferred) deferredLines.push(line);
+		else tableLines.push(line);
+	}
+	return {
+		rows: parseTrajectoryTable(tableLines),
+		deferred: parseDeferredBlock(deferredLines),
+	};
+}
+function parseTableRow(line) {
+	const trimmed = line.trim();
+	if (!trimmed.startsWith("|") || !trimmed.endsWith("|")) return [];
+	return trimmed
+		.slice(1, -1)
+		.split("|")
+		.map((cell) => cell.trim());
+}
+function normalizeHeader(header) {
+	const normalized = header.toLowerCase().replace(/\s+/g, " ").trim();
+	const aliases = {
+		id: "id",
+		asserts: "asserts",
+		"writable at": "writableAt",
+		"passes at": "passesAt",
+		state: "state",
+		kind: "kind",
+		scope: "scope",
+	};
+	return aliases[normalized] || normalized;
+}
+function parsePhaseRef(cell) {
+	const match = cell.match(/^\s*Phase\s+(\d+)\s*$/i);
+	return match ? Number.parseInt(match[1], 10) : Number.NaN;
+}
+function parseTrajectoryTable(lines) {
+	const pipeLines = lines.filter((l) => l.trim().startsWith("|"));
+	if (pipeLines.length < 2) return [];
+	const header = parseTableRow(pipeLines[0]);
+	const sep = parseTableRow(pipeLines[1]);
+	if (!sep.every((c) => /^:?-+:?$/.test(c))) return [];
+	const keys = header.map(normalizeHeader);
+	const rows = [];
+	for (let i = 2; i < pipeLines.length; i++) {
+		const cells = parseTableRow(pipeLines[i]);
+		if (cells.length !== keys.length) continue;
+		const rec = {};
+		for (let j = 0; j < keys.length; j++) rec[keys[j]] = cells[j];
+		if (!rec.id || !rec.asserts) continue;
+		rows.push({
+			id: rec.id.trim(),
+			asserts: rec.asserts.trim(),
+			writableAt: parsePhaseRef(rec.writableAt || ""),
+			passesAt: parsePhaseRef(rec.passesAt || ""),
+		});
+	}
+	return rows;
+}
+function parseDeferredBlock(lines) {
+	const rows = [];
+	let current = null;
+	const flush = () => {
+		if (current && current.name !== undefined) {
+			rows.push({
+				name: current.name,
+				reason: current.reason || "",
+				wouldRequire: current.wouldRequire || "",
+				mitigation: current.mitigation || "",
+			});
+		}
+		current = null;
+	};
+	for (const rawLine of lines) {
+		const line = rawLine.replace(/\s+$/, "");
+		const nameMatch = line.match(/^-\s+\*\*(.+?)\*\*\s*(?:—\s*(.*))?$/);
+		if (nameMatch) {
+			flush();
+			current = { name: nameMatch[1].trim() };
+			const rest = nameMatch[2];
+			if (rest) {
+				const rm = rest.match(/reason:\s*([^—]+?)(?:\s*—|$)/i);
+				const wm = rest.match(/would require:\s*([^—]+?)(?:\s*—|$)/i);
+				const mm = rest.match(/mitigation:\s*(.+)$/i);
+				if (rm) current.reason = rm[1].trim();
+				if (wm) current.wouldRequire = wm[1].trim();
+				if (mm) current.mitigation = mm[1].trim();
+			}
+			continue;
+		}
+		if (!current) continue;
+		const subMatch = line.match(/^\s+-\s+(reason|would require|mitigation):\s*(.*)$/i);
+		if (subMatch) {
+			const key = subMatch[1].toLowerCase();
+			const value = subMatch[2].trim();
+			if (key === "reason") current.reason = value;
+			else if (key === "would require") current.wouldRequire = value;
+			else if (key === "mitigation") current.mitigation = value;
+		}
+	}
+	flush();
+	return rows;
+}
+function validateCrossReferenceIntegrity(implBody, trajectory) {
+	const errors = [];
+	const knownIds = new Set(trajectory.rows.map((r) => r.id));
+	const allowed = new Set(["schema-only", "delete", "refactor", "infra"]);
+	const noTestsRegex = /\(no tests flip at this phase\s*[—–-]+\s*reason:\s*([a-z-]+)\s*\)/i;
+	const testIdPattern = /\bT\d+\b/g;
+	const lines = implBody.split("\n");
+	let currentPhase = null;
+	let inVerification = false;
+	let foundRef = false;
+	let foundDecl = false;
+	let itemCount = 0;
+	const flushPhase = () => {
+		if (currentPhase !== null && inVerification && itemCount > 0 && !foundRef && !foundDecl) {
+			errors.push({
+				rule: "cross-reference-integrity",
+				message: `Phase ${currentPhase} Verification has no test ID references and no "(no tests flip at this phase — reason: {schema-only|delete|refactor|infra})" declaration.`,
+			});
+		}
+	};
+	for (let i = 0; i < lines.length; i++) {
+		const line = lines[i];
+		const phaseMatch = line.match(/^###\s+Phase\s+(\d+)\b/);
+		if (phaseMatch) {
+			flushPhase();
+			currentPhase = Number.parseInt(phaseMatch[1], 10);
+			inVerification = false;
+			foundRef = false;
+			foundDecl = false;
+			itemCount = 0;
+			continue;
+		}
+		const verMatch = line.match(/^####\s+Phase\s+(\d+)\s+Verification\b/);
+		if (verMatch && currentPhase !== null) {
+			flushPhase();
+			inVerification = true;
+			foundRef = false;
+			foundDecl = false;
+			itemCount = 0;
+			continue;
+		}
+		if (
+			inVerification &&
+			/^####\s+Phase\s+\d+\s+(OTel|Context|Document|Forward Intelligence)\b/.test(line)
+		) {
+			flushPhase();
+			inVerification = false;
+			continue;
+		}
+		if (inVerification) {
+			const item = line.match(/^-\s+\[[ xX]\]\s+(.*)/);
+			if (item) {
+				itemCount++;
+				const text = item[1];
+				const noTests = text.match(noTestsRegex);
+				if (noTests) {
+					foundDecl = true;
+					const reason = noTests[1].toLowerCase();
+					if (!allowed.has(reason)) {
+						errors.push({
+							rule: "cross-reference-integrity",
+							line: i + 1,
+							message: `Phase ${currentPhase} Verification: "(no tests flip at this phase — reason: ${reason})" uses disallowed reason. Allowed: schema-only, delete, refactor, infra.`,
+						});
+					}
+					continue;
+				}
+				const ids = text.match(testIdPattern);
+				if (ids) {
+					foundRef = true;
+					for (const id of ids) {
+						if (!knownIds.has(id)) {
+							errors.push({
+								rule: "cross-reference-integrity",
+								line: i + 1,
+								message: `Phase ${currentPhase} Verification references test ID \`${id}\` but no such row exists in the Test Trajectory table.`,
+							});
+						}
+					}
+				}
+			}
+		}
+	}
+	flushPhase();
+	return errors;
+}
+function validateTemporalCoherence(trajectory) {
+	const errors = [];
+	for (const row of trajectory.rows) {
+		if (!Number.isFinite(row.writableAt)) {
+			errors.push({
+				rule: "temporal-coherence",
+				message: `Trajectory row \`${row.id}\` has invalid "Writable at" — expected "Phase N" where N is a number.`,
+			});
+			continue;
+		}
+		if (!Number.isFinite(row.passesAt)) {
+			errors.push({
+				rule: "temporal-coherence",
+				message: `Trajectory row \`${row.id}\` has invalid "Passes at" — expected "Phase N" where N is a number.`,
+			});
+			continue;
+		}
+		if (row.writableAt > row.passesAt) {
+			errors.push({
+				rule: "temporal-coherence",
+				message: `Trajectory row \`${row.id}\` has "Writable at" Phase ${row.writableAt} > "Passes at" Phase ${row.passesAt}. A test cannot pass before its dependencies exist.`,
+			});
+		}
+	}
+	return errors;
+}
+function validateDeferredCompleteness(trajectory) {
+	const errors = [];
+	for (const row of trajectory.deferred) {
+		const missing = [];
+		if (!row.reason) missing.push("reason");
+		if (!row.wouldRequire) missing.push("would require");
+		if (!row.mitigation) missing.push("mitigation");
+		if (missing.length > 0) {
+			errors.push({
+				rule: "deferred-completeness",
+				message: `Deferred Verification row "${row.name}" is missing: ${missing.join(", ")}. Every deferred row requires all three fields — reason, would require, mitigation.`,
+			});
+		}
+	}
+	return errors;
+}

package/lessons/community/community-tests-first-within-each-phase.md ADDED Viewed

@@ -0,0 +1,11 @@
+# Tests first within each phase
+Every impl document opens with a Test Trajectory table listing every test the plan commits to, with `Writable at` and `Passes at` columns.
+At the start of a phase, commit any test whose `Writable at` equals this phase — as failing. Close the phase only when every test whose `Passes at` equals this phase is passing. If a test isn't writable yet, that's fine — but its `Writable at` must name a later phase, and the reason must be structural (the test's dependencies don't exist yet), not aspirational ("we'll get to it").
+If a plan has items that are genuinely not testable — LLM quality, UX judgment, paid external integrations — put them in Deferred Verification with `reason:` (why not testable), `would require:` (what would unlock a proper test), and `mitigation:` (compensating control — alert, scheduled review, downstream plan, canary). If you cannot name a mitigation, that is itself a signal: reshape the plan so the capability becomes testable, or scope it out. Untestability is a declaration, not an omission.
+The test suite's pass count across phases is the plan's progress bar. Read it to know where you are.
+The `check-gates` hook blocks phase advancement when any `Passes at: Phase N` trajectory row is still in `planned`, `writable`, or `written` state. This is structural enforcement — deferral is impossible by construction. See `.indusk/planning/tests-first-planning/adr.md` for the full design.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
 	"name": "@infinitedusky/indusk-mcp",
-	"version": "1.14.10",
+	"version": "1.15.0",
 	"description": "InDusk development system — skills, MCP tools, and CLI for structured AI-assisted development",
 	"type": "module",
 	"files": [

package/skills/planner.md CHANGED Viewed

@@ -102,11 +102,21 @@ Workflow templates are in `templates/workflows/` in the package. They describe w
 6. **If ADR is accepted** (or brief is accepted for bugfix/refactor), write the impl. Break into phased checklists with concrete tasks. For refactor workflows, include a `## Boundary Map` section. For multi-phase impls of any type, consider adding a boundary map.
+   **Author the Test Trajectory first.** Every new impl opens with a `## Test Trajectory` table (after `## Boundary Map`, before `## Checklist`) that enumerates the tests the plan commits to. Columns: `ID | Asserts | Writable at | Passes at | State` (plus optional `Kind`, `Scope`). Walk the ADR's Decision section — for each decision, ask "what test would prove this works?" and add a row. Then walk each planned phase and ask "what becomes writable at this phase, and what flips to passing?" Every phase's Verification block references test IDs from the trajectory rather than restating the checks.
+   **Trajectory sizing:** 3–5 tests for a bugfix or small feature, 10–25 for a multi-phase infrastructure plan. Prefer one high-level property test over five example tests where possible. If your trajectory has more rows than lines of new code, the plan is over-specified — consolidate. If it has fewer than one row per phase, you probably have untested phases — add rows or declare `(no tests flip at this phase — reason: {schema-only|delete|refactor|infra})` in the phase's Verification.
+   **Declare untestable items explicitly.** If a plan includes something that genuinely cannot be tested (LLM quality, paid external integrations, UX judgment), add a `### Deferred Verification` subsection below the trajectory table. Every deferred row requires three fields: `reason:` (why not testable here), `would require:` (what would unlock a proper test), and `mitigation:` (compensating control — alert, scheduled review, downstream plan, canary). Missing any field is a write-time error. If you can't name a mitigation, that's a signal: either reshape the plan so the capability becomes testable, or scope it out.
+   **Set `trajectory: required` in the impl frontmatter.** This opts the impl into trajectory validation by `validate-impl-structure.js`. Omitting it means the hook skips trajectory rules (grandfathering for legacy impls); every NEW impl should set it.
+   See [`apps/indusk-docs/src/guide/test-trajectory.md`](../../indusk-docs/src/guide/test-trajectory.md) for the full user-facing guide (published in the `tests-first-planning` plan's Phase 5) and [`apps/indusk-docs/src/reference/trajectory/parser.md`](../../indusk-docs/src/reference/trajectory/parser.md) for the parser/validator API reference. The design rationale lives in `.indusk/planning/tests-first-planning/adr.md`.
    **Gate policy applies when writing impls.** Set `gate_policy` in the impl frontmatter (`strict`, `ask`, or `auto`). The `validate-impl-structure` hook enforces this at write time:
    - **`strict` / `ask`**: Every gate section (Verification, Context, Document) must have a real item — `(none needed)` and `skip-reason:` are blocked at write time. Opt-outs only happen during `/work` execution.
    - **`auto`**: Gate sections can be pre-filled with `(none needed)` or `skip-reason:` at write time.
-   Default is `ask`. See the work skill "Gate Override Policy" for full details on what each mode enforces at execution time.
+   Default is `ask`. See the work skill "Gate Override Policy" for full details on what each mode enforces at execution time. Trajectory enforcement (the four trajectory rules) applies regardless of `gate_policy` — the rules are structural, not policy-dependent.
    **OTel gate is conditional on `otel.role`.** Read `.indusk/config.json` for the project's `otel.role` field (or use the `shouldEmitOtelGate(projectRoot)` helper from `apps/indusk-mcp/src/lib/config.ts`). The OTel gate fires for projects whose `otel.role` is unset or `"service"` — these are user-facing apps that produce telemetry you want to collect. **Do NOT write `#### Phase N OTel` sections** for projects whose `otel.role` is `"library"`, `"tool"`, or `"none"` — these are libraries, CLIs, or scripts that should never emit telemetry and writing OTel gates for them is friction without value. The `validate-impl-structure` and `check-gates` hooks apply the same rule. The other gates (verify, context, document) always apply regardless of `otel.role`.
@@ -252,6 +262,8 @@ Include code snippets in checklist items when the syntax matters — function si
 title: "{Title}"
 date: {YYYY-MM-DD}
 status: draft | approved | in-progress | completed | abandoned
+trajectory: required
+gate_policy: ask
 ---
 # {Title}
@@ -274,6 +286,22 @@ For multi-phase impls, include a boundary map showing what each phase produces a
 | Phase 1 | {exports, types, modules created} | {inputs, dependencies used} |
 | Phase 2 | {what this phase adds} | {what it needs from Phase 1} |
+## Test Trajectory
+| ID | Asserts | Writable at | Passes at | State |
+|----|---------|-------------|-----------|-------|
+| T1 | {one-line assertion — what the test claims is true} | Phase 1 | Phase 1 | planned |
+| T2 | {another assertion} | Phase 1 | Phase 2 | planned |
+{Optional subsection — include ONLY if this plan has items that are genuinely untestable within its scope. Each row requires all three fields: reason, would require, mitigation.}
+### Deferred Verification
+- **{short name of the untestable item}**
+  - reason: {why this cannot be tested in this plan}
+  - would require: {what would unlock a proper test — a new environment, a future plan, production data}
+  - mitigation: {compensating control — telemetry alert, scheduled review, downstream plan, canary procedure, feedback signal}
 ## Checklist
 ### Phase 1: {Name}
 - [ ] {Task — include code snippets when syntax matters}
@@ -288,7 +316,11 @@ For multi-phase impls, include a boundary map showing what each phase produces a
 - [ ] {Instrumentation check — are new code paths observable? See the OTel skill for patterns. Example items: "New endpoints have manual spans with `otel.category` and domain attributes", "Errors recorded with `recordException` + `setStatus(ERROR)` + trace-correlated log". Ask: "did this phase add endpoints, business logic, state transitions, or error paths?" If not, this section can be opted out per gate policy.}
 #### Phase 1 Verification
-- [ ] {Verification step — prove this phase works. Must be a specific runnable command with expected output, not "verify it works." See the verify skill for guidance on what checks a phase needs based on what changed. Can include trace verification if OTel was added.}
+- [ ] T1 passes (`{runnable command, e.g. pnpm test}`)
+- [ ] T2 flips to `written` state (skipped until Phase 2)
+{If a phase has no tests flipping at it, declare it explicitly — NOT silently:}
+{- [ ] (no tests flip at this phase — reason: {schema-only | delete | refactor | infra})}
 #### Phase 1 Context
 - [ ] {Concrete CLAUDE.md edit this phase produces — e.g., "Add to Architecture: ...", "Add to Conventions: ...", "Update Current State: ...". Ask: "what does this phase change about how the project works?" If nothing, omit this section.}

package/skills/retrospective.md CHANGED Viewed

@@ -66,6 +66,27 @@ Review the test files created or modified during this plan.
 Flag gaps but don't necessarily fix them all now — add them as items to a follow-up plan if they're significant.
+#### Step 4a: Test Trajectory Audit
+If the impl used a `## Test Trajectory` (frontmatter `trajectory: required`), run the trajectory audit:
+```ts
+// From apps/indusk-mcp/src/lib/trajectory/audit.ts
+import { auditPlanAtClose } from "./audit.js";
+const result = auditPlanAtClose(implBody);
+// result.deferred: MitigationClassification[] — one per Deferred Verification row
+// result.blocked: BlockedRowFinding[] — rows ending in `blocked` state
+```
+For each finding, act on it:
+- **Blocked rows** — these ended the plan unresolved. For each: either (a) fix the test and update State to `passing` as a retroactive phase-close, (b) move the row's `Passes at` to a later plan with a link, or (c) promote to Deferred Verification with a real mitigation. Do not leave blocked rows unresolved — they're a debt flag.
+- **Deferred rows with vague mitigations** (`warning` non-null) — the mitigation text was too short or unclassifiable. Propose a more concrete commitment: a specific OTel metric name, a named review owner with cadence, a linked plan ID, a documented canary procedure. Update the impl.md's Deferred Verification row before archiving.
+- **Deferred rows classified as `downstream-plan`** — verify the referenced plan exists and is either `accepted` or `in-progress`. If it's `draft` or missing, either accept the referenced plan now or pick a different mitigation.
+- **Deferred rows classified as `telemetry-alert`** — verify the named metric actually exists in the codebase (grep for it). If the metric hasn't been wired up, the mitigation is aspirational — either wire it up now or change the mitigation.
+Capture findings as a `retrospective-audit-{plan-slug}` episode in Graphiti (use `mcp__indusk__graph_capture` to dual-write to the semantic log). Include the classification, the warning (if any), and what was done. This is the signal the eval agent uses to detect mitigation drift over time.
 ### Step 5: Quality Audit
 Review mistakes made during this plan's implementation.

package/skills/verify.md CHANGED Viewed

@@ -42,6 +42,19 @@ When unsure, run the check. False negatives (missing a real error) are worse tha
 When the work skill is executing an impl and reaches verification items, run checks in this order (fastest first):
+### Test ID references
+If the impl has a `## Test Trajectory` section and a Verification item says "T3 passes (`...`)", resolve the ID to a runnable command:
+1. Read the Trajectory table, find the row with matching ID
+2. The item's parenthetical usually contains the command directly — use that
+3. If the parenthetical is missing or generic (`pnpm test`), derive a filter from the row's `Asserts` column:
+   - Extract backtick-quoted code identifiers (highest priority)
+   - Fall back to the longest camelCase/kebab-case identifier
+   - Use as `-t "{keyword}"` filter with the project's test runner
+Use the `resolveTestIdCommand(trajectory, id)` helper from [`apps/indusk-mcp/src/lib/trajectory/audit.ts`](/reference/trajectory/parser) for mechanical resolution. A phase cannot close (via the `check-gates` hook) until every `Passes at: Phase N` row is in `State: passing` — so verify MUST run those tests, not just the command the author typed.
 ### Check Order
 1. **Type check** — `tsc --noEmit` or `pnpm turbo typecheck --filter={app}` if wired

package/skills/work.md CHANGED Viewed

@@ -62,6 +62,62 @@ Implementation plans live in `.indusk/planning/{plan-name}/impl.md` as checklist
    A phase is not complete until all five are done. **Enforced by hooks:** if you try to check off a Phase N+1 implementation item while Phase N has unchecked gates, the edit will be blocked with a message listing what's missing. Complete the gates first.
+## Test Trajectory — Phase Responsibilities
+If the impl has a `## Test Trajectory` table (frontmatter `trajectory: required`), the work skill takes on two additional responsibilities at phase boundaries.
+### At phase start — author writable-at-phase tests
+Before starting implementation items for Phase N:
+1. Read the Test Trajectory. Collect every row with `Writable at: Phase N` whose `State` is `planned` or `writable`.
+2. For each such row: create the test file (or add the test case to an existing file) implementing the `Asserts` description. Commit it as failing. If the test cannot yet run against a compiled symbol, use `.skip()` with a comment naming the unlock phase.
+3. Update each row's `State` to `written` in the trajectory table.
+These tests are the contract for the phase. They fail when the phase begins; they pass when it ends.
+### At phase close — verify passes-at-phase tests
+Before advancing past Phase N (i.e., before checking the first implementation item in Phase N+1):
+1. Collect every row with `Passes at: Phase N`.
+2. Run the tests. For each row whose test now passes, update its `State` to `passing` in the trajectory table.
+3. If a test is explicitly skipped (approval test awaiting first run, platform-specific test), update to `skipped` with an inline comment on the reason.
+4. If a test regressed or its dependencies changed unexpectedly, update to `blocked` — then resolve it (fix the test, or move its `Passes at` to a later phase with a reason).
+5. The `check-gates` hook rejects the phase transition if any `Passes at: Phase N` row is still in `planned`, `writable`, or `written` state. This is structural enforcement of "deferral is impossible."
+### State lifecycle
+```
+planned → writable → written → passing
+                              ↘ skipped (with reason)
+                              ↘ blocked (needs investigation)
+```
+| State | Meaning |
+|-------|---------|
+| `planned` | Row exists in the trajectory, no file yet |
+| `writable` | Dependencies exist; test can now be authored |
+| `written` | Test file exists and runs (fails or is `.skip()`) |
+| `passing` | Test runs and passes |
+| `skipped` | Intentionally `.skip()` with a documented reason |
+| `blocked` | Was writable/written, now regressed or changed; needs investigation |
+### Library helpers
+The `apps/indusk-mcp/src/lib/trajectory/state-ops.ts` module provides:
+- `getRowsWritableAt(trajectory, phase)` — rows to author at phase start
+- `getRowsBlockingPhaseClose(trajectory, phase)` — rows preventing phase close
+- `updateRowState(body, id, newState)` — rewrite the State cell in impl.md body
+- `getPhaseStartNudge(body, phase)` / `getPhaseCloseNudge(body, phase)` — human-readable reminder text
+Call these via `tsx` or through the InDusk MCP (once wired) rather than re-parsing the table by hand.
+### Deferred Verification audit
+The retrospective skill audits Deferred Verification rows at plan close — checking that each row's `mitigation:` field was actually wired up (telemetry configured, review scheduled, downstream plan linked). Work skill just maintains the Trajectory; retrospective validates completeness at the end.
 ## Gate Override Policy
 Gates exist to prevent skipping important work. But sometimes a gate genuinely doesn't apply. The override policy controls what happens when the agent wants to skip a gate item.