@infinitedusky/indusk-mcp 1.14.10 → 1.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -304,5 +304,126 @@ for (const item of newlyChecked) {
304
304
  }
305
305
  }
306
306
 
307
+ // ------------------------------------------------------------------
308
+ // Trajectory enforcement: if advancing past Phase N (checking an
309
+ // implementation item in Phase N+1 or later), every trajectory row
310
+ // with `Passes at: Phase K` where K <= N must be in state `passing`,
311
+ // `skipped`, or `blocked`. Planned/writable/written states fail the
312
+ // phase close — the whole point of the tests-first-planning system is
313
+ // that deferral is structurally impossible.
314
+ //
315
+ // Skipped if the impl has no `## Test Trajectory` section (grandfathered).
316
+ // ------------------------------------------------------------------
317
+
318
+ const hasTrajectorySection = /^##\s+Test Trajectory\b/m.test(newFullContent);
319
+ if (hasTrajectorySection) {
320
+ const advancingPhases = new Set();
321
+ for (const item of newlyChecked) {
322
+ if (item.gate === "implementation") advancingPhases.add(item.phase);
323
+ }
324
+
325
+ if (advancingPhases.size > 0) {
326
+ const trajectory = parseTrajectoryFromBody(newFullContent);
327
+ const allBlockers = [];
328
+ for (const advancingPhase of advancingPhases) {
329
+ // Closing phases = every phase strictly before advancingPhase
330
+ for (let closingPhase = 1; closingPhase < advancingPhase; closingPhase++) {
331
+ const blockers = trajectory.rows.filter(
332
+ (row) =>
333
+ row.passesAt === closingPhase &&
334
+ row.state !== "passing" &&
335
+ row.state !== "skipped" &&
336
+ row.state !== "blocked",
337
+ );
338
+ for (const row of blockers) {
339
+ allBlockers.push({ phase: closingPhase, row });
340
+ }
341
+ }
342
+ }
343
+
344
+ if (allBlockers.length > 0) {
345
+ const msg = allBlockers
346
+ .map(
347
+ (b) =>
348
+ ` [${b.row.id}] ${b.row.asserts} — state: ${b.row.state} (Phase ${b.phase} cannot close until this row is 'passing' or 'skipped')`,
349
+ )
350
+ .join("\n");
351
+ process.stderr.write(
352
+ `Trajectory blocks phase advance (policy: ${gatePolicy}):\n${msg}\n\nEvery trajectory row with 'Passes at: Phase N' must be 'passing', 'skipped', or 'blocked' before advancing past Phase N. See .indusk/planning/tests-first-planning/adr.md Section 6.\n`,
353
+ );
354
+ process.exit(2);
355
+ }
356
+ }
357
+ }
358
+
307
359
  // All checks passed
308
360
  process.exit(0);
361
+
362
+ // ------------------------------------------------------------------
363
+ // Trajectory parser (pure JS, mirrors parser.ts — simplified to read
364
+ // just id, passesAt, and state which is all this hook needs).
365
+ // ------------------------------------------------------------------
366
+
367
+ function parseTrajectoryFromBody(implContent) {
368
+ const fmMatch = implContent.match(/^---\n[\s\S]*?\n---\n/);
369
+ const body = fmMatch ? implContent.slice(fmMatch[0].length) : implContent;
370
+ const lines = body.split("\n");
371
+
372
+ let inTrajectory = false;
373
+ const tableLines = [];
374
+ for (const line of lines) {
375
+ if (/^##\s+Test Trajectory\b/.test(line)) {
376
+ inTrajectory = true;
377
+ continue;
378
+ }
379
+ if (!inTrajectory) continue;
380
+ if (/^#{1,3}\s+/.test(line) && !/^###\s+Deferred Verification\b/.test(line)) {
381
+ const depth = (line.match(/^(#{1,6})/) || ["", ""])[1].length;
382
+ if (depth <= 3) break;
383
+ }
384
+ if (/^###\s+Deferred Verification\b/.test(line)) break;
385
+ tableLines.push(line);
386
+ }
387
+
388
+ const pipeLines = tableLines.filter((l) => l.trim().startsWith("|"));
389
+ if (pipeLines.length < 2) return { rows: [] };
390
+ const header = parseRowCells(pipeLines[0]);
391
+ const sep = parseRowCells(pipeLines[1]);
392
+ if (!sep.every((c) => /^:?-+:?$/.test(c))) return { rows: [] };
393
+
394
+ const keys = header.map((h) => {
395
+ const n = h.toLowerCase().trim();
396
+ if (n === "id") return "id";
397
+ if (n === "passes at") return "passesAt";
398
+ if (n === "state") return "state";
399
+ if (n === "writable at") return "writableAt";
400
+ if (n === "asserts") return "asserts";
401
+ return n;
402
+ });
403
+
404
+ const rows = [];
405
+ for (let i = 2; i < pipeLines.length; i++) {
406
+ const cells = parseRowCells(pipeLines[i]);
407
+ if (cells.length !== keys.length) continue;
408
+ const rec = {};
409
+ for (let j = 0; j < keys.length; j++) rec[keys[j]] = cells[j];
410
+ if (!rec.id) continue;
411
+ const passesMatch = (rec.passesAt || "").match(/^\s*Phase\s+(\d+)\s*$/i);
412
+ rows.push({
413
+ id: rec.id.trim(),
414
+ asserts: (rec.asserts || "").trim(),
415
+ passesAt: passesMatch ? Number.parseInt(passesMatch[1], 10) : Number.NaN,
416
+ state: (rec.state || "").toLowerCase().trim(),
417
+ });
418
+ }
419
+ return { rows };
420
+ }
421
+
422
+ function parseRowCells(line) {
423
+ const trimmed = line.trim();
424
+ if (!trimmed.startsWith("|") || !trimmed.endsWith("|")) return [];
425
+ return trimmed
426
+ .slice(1, -1)
427
+ .split("|")
428
+ .map((c) => c.trim());
429
+ }
@@ -103,9 +103,120 @@ for (const phase of phases) {
103
103
  console.error(
104
104
  `Phase ${phase.number} (${phase.name}) is fully complete. Call advance_plan to validate gates before starting Phase ${nextPhase.number}.`,
105
105
  );
106
+ // Add trajectory nudges if applicable
107
+ const trajectory = parseTrajectoryRows(content);
108
+ if (trajectory.rows.length > 0) {
109
+ const startNudge = writableAtNudge(trajectory, nextPhase.number);
110
+ if (startNudge) console.error(`\n${startNudge}`);
111
+ }
106
112
  process.exit(0);
107
113
  }
108
114
  }
109
115
  }
110
116
 
117
+ // Additional nudge: if any phase is mid-execution (some items checked, some not)
118
+ // and has trajectory rows blocking close, warn about them.
119
+ const trajectory = parseTrajectoryRows(content);
120
+ if (trajectory.rows.length > 0) {
121
+ for (const phase of phases) {
122
+ const anyChecked = phase.items.some((i) => i.checked);
123
+ const allChecked = phase.items.every((i) => i.checked);
124
+ if (!anyChecked || allChecked) continue;
125
+ const blockers = trajectory.rows.filter(
126
+ (row) =>
127
+ row.passesAt === phase.number &&
128
+ row.state !== "passing" &&
129
+ row.state !== "skipped" &&
130
+ row.state !== "blocked",
131
+ );
132
+ if (blockers.length > 0) {
133
+ const lines = blockers.map((r) => ` [${r.id}] ${r.asserts} — state: ${r.state}`);
134
+ console.error(
135
+ `Phase ${phase.number} trajectory rows still not passing (will block phase close):\n${lines.join("\n")}`,
136
+ );
137
+ break; // one nudge per hook invocation
138
+ }
139
+ }
140
+ }
141
+
111
142
  process.exit(0);
143
+
144
+ // ------------------------------------------------------------------
145
+ // Trajectory parsing (minimal — just id, passesAt, state)
146
+ // ------------------------------------------------------------------
147
+
148
+ function parseTrajectoryRows(implContent) {
149
+ const fmMatch = implContent.match(/^---\n[\s\S]*?\n---\n/);
150
+ const body = fmMatch ? implContent.slice(fmMatch[0].length) : implContent;
151
+ const lines = body.split("\n");
152
+
153
+ let inTrajectory = false;
154
+ const tableLines = [];
155
+ for (const line of lines) {
156
+ if (/^##\s+Test Trajectory\b/.test(line)) {
157
+ inTrajectory = true;
158
+ continue;
159
+ }
160
+ if (!inTrajectory) continue;
161
+ if (/^###\s+Deferred Verification\b/.test(line)) break;
162
+ if (/^#{1,3}\s+/.test(line)) {
163
+ const depth = (line.match(/^(#{1,6})/) || ["", ""])[1].length;
164
+ if (depth <= 3) break;
165
+ }
166
+ tableLines.push(line);
167
+ }
168
+
169
+ const pipeLines = tableLines.filter((l) => l.trim().startsWith("|"));
170
+ if (pipeLines.length < 2) return { rows: [] };
171
+ const header = parseRow(pipeLines[0]);
172
+ const sep = parseRow(pipeLines[1]);
173
+ if (!sep.every((c) => /^:?-+:?$/.test(c))) return { rows: [] };
174
+
175
+ const keys = header.map((h) => {
176
+ const n = h.toLowerCase().trim();
177
+ if (n === "id") return "id";
178
+ if (n === "writable at") return "writableAt";
179
+ if (n === "passes at") return "passesAt";
180
+ if (n === "state") return "state";
181
+ if (n === "asserts") return "asserts";
182
+ return n;
183
+ });
184
+
185
+ const rows = [];
186
+ for (let i = 2; i < pipeLines.length; i++) {
187
+ const cells = parseRow(pipeLines[i]);
188
+ if (cells.length !== keys.length) continue;
189
+ const rec = {};
190
+ for (let j = 0; j < keys.length; j++) rec[keys[j]] = cells[j];
191
+ if (!rec.id) continue;
192
+ const w = (rec.writableAt || "").match(/^\s*Phase\s+(\d+)\s*$/i);
193
+ const p = (rec.passesAt || "").match(/^\s*Phase\s+(\d+)\s*$/i);
194
+ rows.push({
195
+ id: rec.id.trim(),
196
+ asserts: (rec.asserts || "").trim(),
197
+ writableAt: w ? Number.parseInt(w[1], 10) : Number.NaN,
198
+ passesAt: p ? Number.parseInt(p[1], 10) : Number.NaN,
199
+ state: (rec.state || "").toLowerCase().trim(),
200
+ });
201
+ }
202
+ return { rows };
203
+ }
204
+
205
+ function parseRow(line) {
206
+ const t = line.trim();
207
+ if (!t.startsWith("|") || !t.endsWith("|")) return [];
208
+ return t
209
+ .slice(1, -1)
210
+ .split("|")
211
+ .map((c) => c.trim());
212
+ }
213
+
214
+ function writableAtNudge(trajectory, phase) {
215
+ const rows = trajectory.rows.filter(
216
+ (r) =>
217
+ r.writableAt === phase && (r.state === "planned" || r.state === "writable" || r.state === ""),
218
+ );
219
+ if (rows.length === 0) return null;
220
+ const lines = rows.map((r) => ` [${r.id}] ${r.asserts}`);
221
+ return `Phase ${phase} opens with these tests to author (commit as failing before implementation work):\n${lines.join("\n")}`;
222
+ }
@@ -34,7 +34,7 @@ if (!filePath.endsWith("/impl.md") && !filePath.endsWith("\\impl.md")) {
34
34
 
35
35
  /**
36
36
  * Find the project root by walking up from a starting directory looking for
37
- * a .indusk/ or .claude/ directory. Falls back to event.cwd if none found.
37
+ * a .indusk/ or .claude/ directory. Falls back to startDir if none found.
38
38
  * Mirrors the pattern used in check-catchup.js.
39
39
  */
40
40
  function findProjectRoot(startDir) {
@@ -48,6 +48,25 @@ function findProjectRoot(startDir) {
48
48
  return startDir;
49
49
  }
50
50
 
51
+ /**
52
+ * Resolve the project root for the file being edited. Prefer walking up from
53
+ * the file's own directory — the file being edited is always inside the
54
+ * project, and its directory chain reliably contains `.indusk/` even when
55
+ * `event.cwd` is set to something unrelated by the calling environment
56
+ * (observed from the Claude Code VS Code extension on impl edits). Falls
57
+ * back to `event.cwd` and finally `process.cwd()`.
58
+ */
59
+ function resolveProjectRoot(filePath, eventCwd) {
60
+ if (filePath) {
61
+ const fileDir = resolve(filePath, "..");
62
+ const fromFile = findProjectRoot(fileDir);
63
+ if (existsSync(`${fromFile}/.indusk`) || existsSync(`${fromFile}/.claude`)) {
64
+ return fromFile;
65
+ }
66
+ }
67
+ return findProjectRoot(eventCwd ?? process.cwd());
68
+ }
69
+
51
70
  /**
52
71
  * Whether the OTel gate should fire for this project. Reads .indusk/config.json
53
72
  * and checks otel.role. Returns true if the config is missing, if otel.role is
@@ -66,7 +85,7 @@ function shouldEmitOtelGate(projectRoot) {
66
85
  }
67
86
  }
68
87
 
69
- const projectRoot = findProjectRoot(event.cwd ?? process.cwd());
88
+ const projectRoot = resolveProjectRoot(filePath, event.cwd);
70
89
  const otelGateEnabled = shouldEmitOtelGate(projectRoot);
71
90
 
72
91
  // Check for skip-gates escape hatch
@@ -257,8 +276,7 @@ for (const phase of phases) {
257
276
  const optOuts = [];
258
277
  if (requirements.verification && phase.hasVerification && phase.verificationIsOptOut)
259
278
  optOuts.push("Verification");
260
- if (requirements.otel && phase.hasOtel && phase.otelIsOptOut)
261
- optOuts.push("OTel");
279
+ if (requirements.otel && phase.hasOtel && phase.otelIsOptOut) optOuts.push("OTel");
262
280
  if (requirements.context && phase.hasContext && phase.contextIsOptOut) optOuts.push("Context");
263
281
  if (requirements.document && phase.hasDocument && phase.documentIsOptOut)
264
282
  optOuts.push("Document");
@@ -274,6 +292,36 @@ for (const phase of phases) {
274
292
  }
275
293
  }
276
294
 
295
+ // ------------------------------------------------------------------
296
+ // Trajectory validation (tests-first-planning, Phase 1)
297
+ //
298
+ // Four additive rules run when either:
299
+ // (a) frontmatter includes `trajectory: required`, OR
300
+ // (b) the body contains a `## Test Trajectory` section
301
+ //
302
+ // Otherwise this section is skipped — grandfathered impls pass through.
303
+ //
304
+ // Rules:
305
+ // 1. trajectory-presence: `## Test Trajectory` section is present
306
+ // 2. cross-reference-integrity: phase Verification test-ID references exist in trajectory
307
+ // 3. temporal-coherence: every row has Writable at ≤ Passes at
308
+ // 4. deferred-completeness: every Deferred Verification row has reason, would require, mitigation
309
+ // ------------------------------------------------------------------
310
+
311
+ const trajectoryRequiredFrontmatter = /trajectory:\s*required/.test(frontmatter);
312
+ const hasTrajectoryHeading = /^##\s+Test Trajectory\b/m.test(body);
313
+ const trajectoryValidationEnabled = trajectoryRequiredFrontmatter || hasTrajectoryHeading;
314
+
315
+ if (trajectoryValidationEnabled) {
316
+ const trajectoryErrors = validateTrajectory(body);
317
+ if (trajectoryErrors.length > 0) {
318
+ process.stderr.write(
319
+ `Test Trajectory validation failed (policy: ${gatePolicy}):\n${trajectoryErrors.map((e) => ` [${e.rule}] ${e.message}`).join("\n")}\n\nSee .indusk/planning/tests-first-planning/adr.md Sections 3-6 for the Test Trajectory shape and validator rules.\n`,
320
+ );
321
+ process.exit(2);
322
+ }
323
+ }
324
+
277
325
  if (errors.length > 0) {
278
326
  const msg = errors.join("\n");
279
327
  const reqNames = Object.entries(requirements)
@@ -292,3 +340,295 @@ if (errors.length > 0) {
292
340
  }
293
341
 
294
342
  process.exit(0);
343
+
344
+ // ------------------------------------------------------------------
345
+ // Trajectory validation helpers (pure JS, mirrors
346
+ // apps/indusk-mcp/src/lib/trajectory/validator.ts and parser.ts)
347
+ // ------------------------------------------------------------------
348
+
349
+ function validateTrajectory(implBody) {
350
+ const errors = [];
351
+
352
+ // Rule 1: trajectory presence
353
+ if (!/^##\s+Test Trajectory\b/m.test(implBody)) {
354
+ errors.push({
355
+ rule: "trajectory-presence",
356
+ message:
357
+ "Impl is missing the `## Test Trajectory` section. Every impl using the new shape must declare its tests at the top as a table with columns: ID | Asserts | Writable at | Passes at | State.",
358
+ });
359
+ return errors;
360
+ }
361
+
362
+ const trajectory = parseTrajectoryFromBody(implBody);
363
+ errors.push(...validateCrossReferenceIntegrity(implBody, trajectory));
364
+ errors.push(...validateTemporalCoherence(trajectory));
365
+ errors.push(...validateDeferredCompleteness(trajectory));
366
+ return errors;
367
+ }
368
+
369
+ function parseTrajectoryFromBody(implBody) {
370
+ const lines = implBody.split("\n");
371
+ let inTrajectory = false;
372
+ let inDeferred = false;
373
+ const tableLines = [];
374
+ const deferredLines = [];
375
+
376
+ for (const line of lines) {
377
+ if (/^##\s+Test Trajectory\b/.test(line)) {
378
+ inTrajectory = true;
379
+ inDeferred = false;
380
+ continue;
381
+ }
382
+ if (!inTrajectory) continue;
383
+
384
+ if (/^###\s+Deferred Verification\b/.test(line)) {
385
+ inDeferred = true;
386
+ continue;
387
+ }
388
+
389
+ if (/^#{1,3}\s+/.test(line) && !/^###\s+Deferred Verification\b/.test(line)) {
390
+ const depth = (line.match(/^(#{1,6})/) || ["", ""])[1].length;
391
+ if (depth <= 3) break;
392
+ }
393
+
394
+ if (inDeferred) deferredLines.push(line);
395
+ else tableLines.push(line);
396
+ }
397
+
398
+ return {
399
+ rows: parseTrajectoryTable(tableLines),
400
+ deferred: parseDeferredBlock(deferredLines),
401
+ };
402
+ }
403
+
404
+ function parseTableRow(line) {
405
+ const trimmed = line.trim();
406
+ if (!trimmed.startsWith("|") || !trimmed.endsWith("|")) return [];
407
+ return trimmed
408
+ .slice(1, -1)
409
+ .split("|")
410
+ .map((cell) => cell.trim());
411
+ }
412
+
413
+ function normalizeHeader(header) {
414
+ const normalized = header.toLowerCase().replace(/\s+/g, " ").trim();
415
+ const aliases = {
416
+ id: "id",
417
+ asserts: "asserts",
418
+ "writable at": "writableAt",
419
+ "passes at": "passesAt",
420
+ state: "state",
421
+ kind: "kind",
422
+ scope: "scope",
423
+ };
424
+ return aliases[normalized] || normalized;
425
+ }
426
+
427
+ function parsePhaseRef(cell) {
428
+ const match = cell.match(/^\s*Phase\s+(\d+)\s*$/i);
429
+ return match ? Number.parseInt(match[1], 10) : Number.NaN;
430
+ }
431
+
432
+ function parseTrajectoryTable(lines) {
433
+ const pipeLines = lines.filter((l) => l.trim().startsWith("|"));
434
+ if (pipeLines.length < 2) return [];
435
+ const header = parseTableRow(pipeLines[0]);
436
+ const sep = parseTableRow(pipeLines[1]);
437
+ if (!sep.every((c) => /^:?-+:?$/.test(c))) return [];
438
+ const keys = header.map(normalizeHeader);
439
+
440
+ const rows = [];
441
+ for (let i = 2; i < pipeLines.length; i++) {
442
+ const cells = parseTableRow(pipeLines[i]);
443
+ if (cells.length !== keys.length) continue;
444
+ const rec = {};
445
+ for (let j = 0; j < keys.length; j++) rec[keys[j]] = cells[j];
446
+ if (!rec.id || !rec.asserts) continue;
447
+ rows.push({
448
+ id: rec.id.trim(),
449
+ asserts: rec.asserts.trim(),
450
+ writableAt: parsePhaseRef(rec.writableAt || ""),
451
+ passesAt: parsePhaseRef(rec.passesAt || ""),
452
+ });
453
+ }
454
+ return rows;
455
+ }
456
+
457
+ function parseDeferredBlock(lines) {
458
+ const rows = [];
459
+ let current = null;
460
+ const flush = () => {
461
+ if (current && current.name !== undefined) {
462
+ rows.push({
463
+ name: current.name,
464
+ reason: current.reason || "",
465
+ wouldRequire: current.wouldRequire || "",
466
+ mitigation: current.mitigation || "",
467
+ });
468
+ }
469
+ current = null;
470
+ };
471
+ for (const rawLine of lines) {
472
+ const line = rawLine.replace(/\s+$/, "");
473
+ const nameMatch = line.match(/^-\s+\*\*(.+?)\*\*\s*(?:—\s*(.*))?$/);
474
+ if (nameMatch) {
475
+ flush();
476
+ current = { name: nameMatch[1].trim() };
477
+ const rest = nameMatch[2];
478
+ if (rest) {
479
+ const rm = rest.match(/reason:\s*([^—]+?)(?:\s*—|$)/i);
480
+ const wm = rest.match(/would require:\s*([^—]+?)(?:\s*—|$)/i);
481
+ const mm = rest.match(/mitigation:\s*(.+)$/i);
482
+ if (rm) current.reason = rm[1].trim();
483
+ if (wm) current.wouldRequire = wm[1].trim();
484
+ if (mm) current.mitigation = mm[1].trim();
485
+ }
486
+ continue;
487
+ }
488
+ if (!current) continue;
489
+ const subMatch = line.match(/^\s+-\s+(reason|would require|mitigation):\s*(.*)$/i);
490
+ if (subMatch) {
491
+ const key = subMatch[1].toLowerCase();
492
+ const value = subMatch[2].trim();
493
+ if (key === "reason") current.reason = value;
494
+ else if (key === "would require") current.wouldRequire = value;
495
+ else if (key === "mitigation") current.mitigation = value;
496
+ }
497
+ }
498
+ flush();
499
+ return rows;
500
+ }
501
+
502
+ function validateCrossReferenceIntegrity(implBody, trajectory) {
503
+ const errors = [];
504
+ const knownIds = new Set(trajectory.rows.map((r) => r.id));
505
+ const allowed = new Set(["schema-only", "delete", "refactor", "infra"]);
506
+ const noTestsRegex = /\(no tests flip at this phase\s*[—–-]+\s*reason:\s*([a-z-]+)\s*\)/i;
507
+ const testIdPattern = /\bT\d+\b/g;
508
+
509
+ const lines = implBody.split("\n");
510
+ let currentPhase = null;
511
+ let inVerification = false;
512
+ let foundRef = false;
513
+ let foundDecl = false;
514
+ let itemCount = 0;
515
+
516
+ const flushPhase = () => {
517
+ if (currentPhase !== null && inVerification && itemCount > 0 && !foundRef && !foundDecl) {
518
+ errors.push({
519
+ rule: "cross-reference-integrity",
520
+ message: `Phase ${currentPhase} Verification has no test ID references and no "(no tests flip at this phase — reason: {schema-only|delete|refactor|infra})" declaration.`,
521
+ });
522
+ }
523
+ };
524
+
525
+ for (let i = 0; i < lines.length; i++) {
526
+ const line = lines[i];
527
+ const phaseMatch = line.match(/^###\s+Phase\s+(\d+)\b/);
528
+ if (phaseMatch) {
529
+ flushPhase();
530
+ currentPhase = Number.parseInt(phaseMatch[1], 10);
531
+ inVerification = false;
532
+ foundRef = false;
533
+ foundDecl = false;
534
+ itemCount = 0;
535
+ continue;
536
+ }
537
+ const verMatch = line.match(/^####\s+Phase\s+(\d+)\s+Verification\b/);
538
+ if (verMatch && currentPhase !== null) {
539
+ flushPhase();
540
+ inVerification = true;
541
+ foundRef = false;
542
+ foundDecl = false;
543
+ itemCount = 0;
544
+ continue;
545
+ }
546
+ if (
547
+ inVerification &&
548
+ /^####\s+Phase\s+\d+\s+(OTel|Context|Document|Forward Intelligence)\b/.test(line)
549
+ ) {
550
+ flushPhase();
551
+ inVerification = false;
552
+ continue;
553
+ }
554
+ if (inVerification) {
555
+ const item = line.match(/^-\s+\[[ xX]\]\s+(.*)/);
556
+ if (item) {
557
+ itemCount++;
558
+ const text = item[1];
559
+ const noTests = text.match(noTestsRegex);
560
+ if (noTests) {
561
+ foundDecl = true;
562
+ const reason = noTests[1].toLowerCase();
563
+ if (!allowed.has(reason)) {
564
+ errors.push({
565
+ rule: "cross-reference-integrity",
566
+ line: i + 1,
567
+ message: `Phase ${currentPhase} Verification: "(no tests flip at this phase — reason: ${reason})" uses disallowed reason. Allowed: schema-only, delete, refactor, infra.`,
568
+ });
569
+ }
570
+ continue;
571
+ }
572
+ const ids = text.match(testIdPattern);
573
+ if (ids) {
574
+ foundRef = true;
575
+ for (const id of ids) {
576
+ if (!knownIds.has(id)) {
577
+ errors.push({
578
+ rule: "cross-reference-integrity",
579
+ line: i + 1,
580
+ message: `Phase ${currentPhase} Verification references test ID \`${id}\` but no such row exists in the Test Trajectory table.`,
581
+ });
582
+ }
583
+ }
584
+ }
585
+ }
586
+ }
587
+ }
588
+ flushPhase();
589
+ return errors;
590
+ }
591
+
592
+ function validateTemporalCoherence(trajectory) {
593
+ const errors = [];
594
+ for (const row of trajectory.rows) {
595
+ if (!Number.isFinite(row.writableAt)) {
596
+ errors.push({
597
+ rule: "temporal-coherence",
598
+ message: `Trajectory row \`${row.id}\` has invalid "Writable at" — expected "Phase N" where N is a number.`,
599
+ });
600
+ continue;
601
+ }
602
+ if (!Number.isFinite(row.passesAt)) {
603
+ errors.push({
604
+ rule: "temporal-coherence",
605
+ message: `Trajectory row \`${row.id}\` has invalid "Passes at" — expected "Phase N" where N is a number.`,
606
+ });
607
+ continue;
608
+ }
609
+ if (row.writableAt > row.passesAt) {
610
+ errors.push({
611
+ rule: "temporal-coherence",
612
+ message: `Trajectory row \`${row.id}\` has "Writable at" Phase ${row.writableAt} > "Passes at" Phase ${row.passesAt}. A test cannot pass before its dependencies exist.`,
613
+ });
614
+ }
615
+ }
616
+ return errors;
617
+ }
618
+
619
+ function validateDeferredCompleteness(trajectory) {
620
+ const errors = [];
621
+ for (const row of trajectory.deferred) {
622
+ const missing = [];
623
+ if (!row.reason) missing.push("reason");
624
+ if (!row.wouldRequire) missing.push("would require");
625
+ if (!row.mitigation) missing.push("mitigation");
626
+ if (missing.length > 0) {
627
+ errors.push({
628
+ rule: "deferred-completeness",
629
+ message: `Deferred Verification row "${row.name}" is missing: ${missing.join(", ")}. Every deferred row requires all three fields — reason, would require, mitigation.`,
630
+ });
631
+ }
632
+ }
633
+ return errors;
634
+ }
@@ -0,0 +1,11 @@
1
+ # Tests first within each phase
2
+
3
+ Every impl document opens with a Test Trajectory table listing every test the plan commits to, with `Writable at` and `Passes at` columns.
4
+
5
+ At the start of a phase, commit any test whose `Writable at` equals this phase — as failing. Close the phase only when every test whose `Passes at` equals this phase is passing. If a test isn't writable yet, that's fine — but its `Writable at` must name a later phase, and the reason must be structural (the test's dependencies don't exist yet), not aspirational ("we'll get to it").
6
+
7
+ If a plan has items that are genuinely not testable — LLM quality, UX judgment, paid external integrations — put them in Deferred Verification with `reason:` (why not testable), `would require:` (what would unlock a proper test), and `mitigation:` (compensating control — alert, scheduled review, downstream plan, canary). If you cannot name a mitigation, that is itself a signal: reshape the plan so the capability becomes testable, or scope it out. Untestability is a declaration, not an omission.
8
+
9
+ The test suite's pass count across phases is the plan's progress bar. Read it to know where you are.
10
+
11
+ The `check-gates` hook blocks phase advancement when any `Passes at: Phase N` trajectory row is still in `planned`, `writable`, or `written` state. This is structural enforcement — deferral is impossible by construction. See `.indusk/planning/tests-first-planning/adr.md` for the full design.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@infinitedusky/indusk-mcp",
3
- "version": "1.14.10",
3
+ "version": "1.15.0",
4
4
  "description": "InDusk development system — skills, MCP tools, and CLI for structured AI-assisted development",
5
5
  "type": "module",
6
6
  "files": [
package/skills/planner.md CHANGED
@@ -102,11 +102,21 @@ Workflow templates are in `templates/workflows/` in the package. They describe w
102
102
 
103
103
  6. **If ADR is accepted** (or brief is accepted for bugfix/refactor), write the impl. Break into phased checklists with concrete tasks. For refactor workflows, include a `## Boundary Map` section. For multi-phase impls of any type, consider adding a boundary map.
104
104
 
105
+ **Author the Test Trajectory first.** Every new impl opens with a `## Test Trajectory` table (after `## Boundary Map`, before `## Checklist`) that enumerates the tests the plan commits to. Columns: `ID | Asserts | Writable at | Passes at | State` (plus optional `Kind`, `Scope`). Walk the ADR's Decision section — for each decision, ask "what test would prove this works?" and add a row. Then walk each planned phase and ask "what becomes writable at this phase, and what flips to passing?" Every phase's Verification block references test IDs from the trajectory rather than restating the checks.
106
+
107
+ **Trajectory sizing:** 3–5 tests for a bugfix or small feature, 10–25 for a multi-phase infrastructure plan. Prefer one high-level property test over five example tests where possible. If your trajectory has more rows than lines of new code, the plan is over-specified — consolidate. If it has fewer than one row per phase, you probably have untested phases — add rows or declare `(no tests flip at this phase — reason: {schema-only|delete|refactor|infra})` in the phase's Verification.
108
+
109
+ **Declare untestable items explicitly.** If a plan includes something that genuinely cannot be tested (LLM quality, paid external integrations, UX judgment), add a `### Deferred Verification` subsection below the trajectory table. Every deferred row requires three fields: `reason:` (why not testable here), `would require:` (what would unlock a proper test), and `mitigation:` (compensating control — alert, scheduled review, downstream plan, canary). Missing any field is a write-time error. If you can't name a mitigation, that's a signal: either reshape the plan so the capability becomes testable, or scope it out.
110
+
111
+ **Set `trajectory: required` in the impl frontmatter.** This opts the impl into trajectory validation by `validate-impl-structure.js`. Omitting it means the hook skips trajectory rules (grandfathering for legacy impls); every NEW impl should set it.
112
+
113
+ See [`apps/indusk-docs/src/guide/test-trajectory.md`](../../indusk-docs/src/guide/test-trajectory.md) for the full user-facing guide (published in the `tests-first-planning` plan's Phase 5) and [`apps/indusk-docs/src/reference/trajectory/parser.md`](../../indusk-docs/src/reference/trajectory/parser.md) for the parser/validator API reference. The design rationale lives in `.indusk/planning/tests-first-planning/adr.md`.
114
+
105
115
  **Gate policy applies when writing impls.** Set `gate_policy` in the impl frontmatter (`strict`, `ask`, or `auto`). The `validate-impl-structure` hook enforces this at write time:
106
116
  - **`strict` / `ask`**: Every gate section (Verification, Context, Document) must have a real item — `(none needed)` and `skip-reason:` are blocked at write time. Opt-outs only happen during `/work` execution.
107
117
  - **`auto`**: Gate sections can be pre-filled with `(none needed)` or `skip-reason:` at write time.
108
118
 
109
- Default is `ask`. See the work skill "Gate Override Policy" for full details on what each mode enforces at execution time.
119
+ Default is `ask`. See the work skill "Gate Override Policy" for full details on what each mode enforces at execution time. Trajectory enforcement (the four trajectory rules) applies regardless of `gate_policy` — the rules are structural, not policy-dependent.
110
120
 
111
121
  **OTel gate is conditional on `otel.role`.** Read `.indusk/config.json` for the project's `otel.role` field (or use the `shouldEmitOtelGate(projectRoot)` helper from `apps/indusk-mcp/src/lib/config.ts`). The OTel gate fires for projects whose `otel.role` is unset or `"service"` — these are user-facing apps that produce telemetry you want to collect. **Do NOT write `#### Phase N OTel` sections** for projects whose `otel.role` is `"library"`, `"tool"`, or `"none"` — these are libraries, CLIs, or scripts that should never emit telemetry and writing OTel gates for them is friction without value. The `validate-impl-structure` and `check-gates` hooks apply the same rule. The other gates (verify, context, document) always apply regardless of `otel.role`.
112
122
 
@@ -252,6 +262,8 @@ Include code snippets in checklist items when the syntax matters — function si
252
262
  title: "{Title}"
253
263
  date: {YYYY-MM-DD}
254
264
  status: draft | approved | in-progress | completed | abandoned
265
+ trajectory: required
266
+ gate_policy: ask
255
267
  ---
256
268
 
257
269
  # {Title}
@@ -274,6 +286,22 @@ For multi-phase impls, include a boundary map showing what each phase produces a
274
286
  | Phase 1 | {exports, types, modules created} | {inputs, dependencies used} |
275
287
  | Phase 2 | {what this phase adds} | {what it needs from Phase 1} |
276
288
 
289
+ ## Test Trajectory
290
+
291
+ | ID | Asserts | Writable at | Passes at | State |
292
+ |----|---------|-------------|-----------|-------|
293
+ | T1 | {one-line assertion — what the test claims is true} | Phase 1 | Phase 1 | planned |
294
+ | T2 | {another assertion} | Phase 1 | Phase 2 | planned |
295
+
296
+ {Optional subsection — include ONLY if this plan has items that are genuinely untestable within its scope. Each row requires all three fields: reason, would require, mitigation.}
297
+
298
+ ### Deferred Verification
299
+
300
+ - **{short name of the untestable item}**
301
+ - reason: {why this cannot be tested in this plan}
302
+ - would require: {what would unlock a proper test — a new environment, a future plan, production data}
303
+ - mitigation: {compensating control — telemetry alert, scheduled review, downstream plan, canary procedure, feedback signal}
304
+
277
305
  ## Checklist
278
306
  ### Phase 1: {Name}
279
307
  - [ ] {Task — include code snippets when syntax matters}
@@ -288,7 +316,11 @@ For multi-phase impls, include a boundary map showing what each phase produces a
288
316
  - [ ] {Instrumentation check — are new code paths observable? See the OTel skill for patterns. Example items: "New endpoints have manual spans with `otel.category` and domain attributes", "Errors recorded with `recordException` + `setStatus(ERROR)` + trace-correlated log". Ask: "did this phase add endpoints, business logic, state transitions, or error paths?" If not, this section can be opted out per gate policy.}
289
317
 
290
318
  #### Phase 1 Verification
291
- - [ ] {Verification step — prove this phase works. Must be a specific runnable command with expected output, not "verify it works." See the verify skill for guidance on what checks a phase needs based on what changed. Can include trace verification if OTel was added.}
319
+ - [ ] T1 passes (`{runnable command, e.g. pnpm test}`)
320
+ - [ ] T2 flips to `written` state (skipped until Phase 2)
321
+
322
+ {If a phase has no tests flipping at it, declare it explicitly — NOT silently:}
323
+ {- [ ] (no tests flip at this phase — reason: {schema-only | delete | refactor | infra})}
292
324
 
293
325
  #### Phase 1 Context
294
326
  - [ ] {Concrete CLAUDE.md edit this phase produces — e.g., "Add to Architecture: ...", "Add to Conventions: ...", "Update Current State: ...". Ask: "what does this phase change about how the project works?" If nothing, omit this section.}
@@ -66,6 +66,27 @@ Review the test files created or modified during this plan.
66
66
 
67
67
  Flag gaps but don't necessarily fix them all now — add them as items to a follow-up plan if they're significant.
68
68
 
69
+ #### Step 4a: Test Trajectory Audit
70
+
71
+ If the impl used a `## Test Trajectory` (frontmatter `trajectory: required`), run the trajectory audit:
72
+
73
+ ```ts
74
+ // From apps/indusk-mcp/src/lib/trajectory/audit.ts
75
+ import { auditPlanAtClose } from "./audit.js";
76
+ const result = auditPlanAtClose(implBody);
77
+ // result.deferred: MitigationClassification[] — one per Deferred Verification row
78
+ // result.blocked: BlockedRowFinding[] — rows ending in `blocked` state
79
+ ```
80
+
81
+ For each finding, act on it:
82
+
83
+ - **Blocked rows** — these ended the plan unresolved. For each: either (a) fix the test and update State to `passing` as a retroactive phase-close, (b) move the row's `Passes at` to a later plan with a link, or (c) promote to Deferred Verification with a real mitigation. Do not leave blocked rows unresolved — they're a debt flag.
84
+ - **Deferred rows with vague mitigations** (`warning` non-null) — the mitigation text was too short or unclassifiable. Propose a more concrete commitment: a specific OTel metric name, a named review owner with cadence, a linked plan ID, a documented canary procedure. Update the impl.md's Deferred Verification row before archiving.
85
+ - **Deferred rows classified as `downstream-plan`** — verify the referenced plan exists and is either `accepted` or `in-progress`. If it's `draft` or missing, either accept the referenced plan now or pick a different mitigation.
86
+ - **Deferred rows classified as `telemetry-alert`** — verify the named metric actually exists in the codebase (grep for it). If the metric hasn't been wired up, the mitigation is aspirational — either wire it up now or change the mitigation.
87
+
88
+ Capture findings as a `retrospective-audit-{plan-slug}` episode in Graphiti (use `mcp__indusk__graph_capture` to dual-write to the semantic log). Include the classification, the warning (if any), and what was done. This is the signal the eval agent uses to detect mitigation drift over time.
89
+
69
90
  ### Step 5: Quality Audit
70
91
 
71
92
  Review mistakes made during this plan's implementation.
package/skills/verify.md CHANGED
@@ -42,6 +42,19 @@ When unsure, run the check. False negatives (missing a real error) are worse tha
42
42
 
43
43
  When the work skill is executing an impl and reaches verification items, run checks in this order (fastest first):
44
44
 
45
+ ### Test ID references
46
+
47
+ If the impl has a `## Test Trajectory` section and a Verification item says "T3 passes (`...`)", resolve the ID to a runnable command:
48
+
49
+ 1. Read the Trajectory table, find the row with matching ID
50
+ 2. The item's parenthetical usually contains the command directly — use that
51
+ 3. If the parenthetical is missing or generic (`pnpm test`), derive a filter from the row's `Asserts` column:
52
+ - Extract backtick-quoted code identifiers (highest priority)
53
+ - Fall back to the longest camelCase/kebab-case identifier
54
+ - Use as `-t "{keyword}"` filter with the project's test runner
55
+
56
+ Use the `resolveTestIdCommand(trajectory, id)` helper from [`apps/indusk-mcp/src/lib/trajectory/audit.ts`](/reference/trajectory/parser) for mechanical resolution. A phase cannot close (via the `check-gates` hook) until every `Passes at: Phase N` row is in `State: passing` — so verify MUST run those tests, not just the command the author typed.
57
+
45
58
  ### Check Order
46
59
 
47
60
  1. **Type check** — `tsc --noEmit` or `pnpm turbo typecheck --filter={app}` if wired
package/skills/work.md CHANGED
@@ -62,6 +62,62 @@ Implementation plans live in `.indusk/planning/{plan-name}/impl.md` as checklist
62
62
 
63
63
  A phase is not complete until all five are done. **Enforced by hooks:** if you try to check off a Phase N+1 implementation item while Phase N has unchecked gates, the edit will be blocked with a message listing what's missing. Complete the gates first.
64
64
 
65
+ ## Test Trajectory — Phase Responsibilities
66
+
67
+ If the impl has a `## Test Trajectory` table (frontmatter `trajectory: required`), the work skill takes on two additional responsibilities at phase boundaries.
68
+
69
+ ### At phase start — author writable-at-phase tests
70
+
71
+ Before starting implementation items for Phase N:
72
+
73
+ 1. Read the Test Trajectory. Collect every row with `Writable at: Phase N` whose `State` is `planned` or `writable`.
74
+ 2. For each such row: create the test file (or add the test case to an existing file) implementing the `Asserts` description. Commit it as failing. If the test cannot yet run against a compiled symbol, use `.skip()` with a comment naming the unlock phase.
75
+ 3. Update each row's `State` to `written` in the trajectory table.
76
+
77
+ These tests are the contract for the phase. They fail when the phase begins; they pass when it ends.
78
+
79
+ ### At phase close — verify passes-at-phase tests
80
+
81
+ Before advancing past Phase N (i.e., before checking the first implementation item in Phase N+1):
82
+
83
+ 1. Collect every row with `Passes at: Phase N`.
84
+ 2. Run the tests. For each row whose test now passes, update its `State` to `passing` in the trajectory table.
85
+ 3. If a test is explicitly skipped (approval test awaiting first run, platform-specific test), update to `skipped` with an inline comment on the reason.
86
+ 4. If a test regressed or its dependencies changed unexpectedly, update to `blocked` — then resolve it (fix the test, or move its `Passes at` to a later phase with a reason).
87
+ 5. The `check-gates` hook rejects the phase transition if any `Passes at: Phase N` row is still in `planned`, `writable`, or `written` state. This is structural enforcement of "deferral is impossible."
88
+
89
+ ### State lifecycle
90
+
91
+ ```
92
+ planned → writable → written → passing
93
+ ↘ skipped (with reason)
94
+ ↘ blocked (needs investigation)
95
+ ```
96
+
97
+ | State | Meaning |
98
+ |-------|---------|
99
+ | `planned` | Row exists in the trajectory, no file yet |
100
+ | `writable` | Dependencies exist; test can now be authored |
101
+ | `written` | Test file exists and runs (fails or is `.skip()`) |
102
+ | `passing` | Test runs and passes |
103
+ | `skipped` | Intentionally `.skip()` with a documented reason |
104
+ | `blocked` | Was writable/written, now regressed or changed; needs investigation |
105
+
106
+ ### Library helpers
107
+
108
+ The `apps/indusk-mcp/src/lib/trajectory/state-ops.ts` module provides:
109
+
110
+ - `getRowsWritableAt(trajectory, phase)` — rows to author at phase start
111
+ - `getRowsBlockingPhaseClose(trajectory, phase)` — rows preventing phase close
112
+ - `updateRowState(body, id, newState)` — rewrite the State cell in impl.md body
113
+ - `getPhaseStartNudge(body, phase)` / `getPhaseCloseNudge(body, phase)` — human-readable reminder text
114
+
115
+ Call these via `tsx` or through the InDusk MCP (once wired) rather than re-parsing the table by hand.
116
+
117
+ ### Deferred Verification audit
118
+
119
+ The retrospective skill audits Deferred Verification rows at plan close — checking that each row's `mitigation:` field was actually wired up (telemetry configured, review scheduled, downstream plan linked). Work skill just maintains the Trajectory; retrospective validates completeness at the end.
120
+
65
121
  ## Gate Override Policy
66
122
 
67
123
  Gates exist to prevent skipping important work. But sometimes a gate genuinely doesn't apply. The override policy controls what happens when the agent wants to skip a gate item.