npm - @infinitedusky/indusk-mcp - Versions diffs - 1.24.4 → 1.25.0 - Mend

@infinitedusky/indusk-mcp 1.24.4 → 1.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/dist/lib/eval/findings.js +4 -7
package/dist/lib/eval/persistent-evaluator.js +6 -2
package/dist/lib/eval/scorecard-extractor.d.ts +20 -0
package/dist/lib/eval/scorecard-extractor.js +20 -0
package/dist/lib/trajectory/validator.d.ts +24 -11
package/dist/lib/trajectory/validator.js +20 -15
package/hooks/validate-impl-structure.js +12 -7
package/package.json +1 -1

package/dist/lib/eval/findings.js CHANGED Viewed

@@ -6,6 +6,7 @@
  */
 import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
 import { dirname, join } from "node:path";
+import { getScorecardQuestions } from "./scorecard-extractor.js";
 function getFindingsPath(projectRoot) {
     return join(projectRoot, ".indusk", "eval", "findings.json");
 }
@@ -47,13 +48,9 @@ export function ingestScorecard(projectRoot, scorecard) {
     const findings = readFindings(projectRoot);
     let added = 0;
     // Defensive: the model occasionally returns a scorecard with a missing,
-    // null, or non-array `questions` field (it invents its own schema). The
-    // outer wrapper has already written the (wrong-shape) scorecard to
-    // results.log by this point — if we throw here, a misleading `error: true`
-    // entry lands right after, falsely implying the scorecard was lost.
-    // Tolerate the malformed shape silently; downstream consumers can still
-    // see the raw scorecard in results.log.
-    const questions = Array.isArray(scorecard.questions) ? scorecard.questions : [];
+    // null, or non-array `questions` field (it invents its own schema). See
+    // `scorecard-extractor.ts` getScorecardQuestions for the central guard.
+    const questions = getScorecardQuestions(scorecard);
     for (const q of questions) {
         if (q.answer === "yes")
             continue; // no finding for passing questions

package/dist/lib/eval/persistent-evaluator.js CHANGED Viewed

@@ -16,7 +16,7 @@ import { EvalLogWriter } from "./log-writer.js";
 import { initEvalOtel, initEvalOtelLogs, logEvalContent, shutdownEvalOtel, withSpan, } from "./otel.js";
 import { buildEvaluatorPrompt } from "./prompt-builder.js";
 import { V1_RUBRIC } from "./rubric.js";
-import { extractScorecardJson, formatParseError } from "./scorecard-extractor.js";
+import { extractScorecardJson, formatParseError, getScorecardQuestions, } from "./scorecard-extractor.js";
 function getSessionPath(projectRoot) {
     return join(projectRoot, ".indusk", "eval", "evaluator-session.json");
 }
@@ -263,7 +263,11 @@ Output ONLY the JSON scorecard as before — no commentary.`;
                 rootSpan.setAttribute("scorecard.output_tokens", scorecard.usage.outputTokens);
             }
             const answerCounts = { yes: 0, no: 0, partial: 0 };
-            for (const q of scorecard.questions ?? []) {
+            // Use the central guard from scorecard-extractor — `?? []` here was
+            // the bug: it only catches null/undefined, not non-array shapes like
+            // `{}` (which the model has been observed to return — e.g. on Numero
+            // 2026-04-19 19:54 with `questions: { conventions: {...} }` keyed by id).
+            for (const q of getScorecardQuestions(scorecard)) {
                 if (q.answer in answerCounts)
                     answerCounts[q.answer]++;
             }

package/dist/lib/eval/scorecard-extractor.d.ts CHANGED Viewed

@@ -26,6 +26,26 @@
  * This function only locates the JSON; it doesn't validate it.
  */
 export declare function extractScorecardJson(text: string): string | null;
+/**
+ * Defensive accessor for `scorecard.questions`. Returns the array if the
+ * field is array-shaped; returns `[]` for any other shape (missing, null,
+ * boolean, number, object-keyed-by-id, etc.). The model occasionally invents
+ * its own scorecard schema and puts non-arrays here — the wrapper must not
+ * crash when that happens.
+ *
+ * Use this everywhere the wrapper iterates `scorecard.questions`. Never
+ * iterate the field directly (with `?? []` or otherwise) — `?? []` only
+ * catches null/undefined, not falsy-but-not-nullish values like `false`,
+ * `0`, `""`, or non-array objects.
+ *
+ * Surfaced bugs this prevents:
+ *   - `for (const q of scorecard.questions)` when `questions` is missing
+ *   - `for (const q of scorecard.questions ?? [])` when `questions` is `{}`
+ *     (e.g., model returned `questions: { conventions: {...} }` keyed by id)
+ */
+export declare function getScorecardQuestions<T>(scorecard: {
+    questions?: unknown;
+}): T[];
 /**
  * Build an error message for the case where scorecard parsing failed.
  * Includes the underlying error and a snippet of the raw stdout so post-

package/dist/lib/eval/scorecard-extractor.js CHANGED Viewed

@@ -118,6 +118,26 @@ function findFirstBalancedJsonObject(text) {
     // Walked to end of string without closing the outermost brace.
     return null;
 }
+/**
+ * Defensive accessor for `scorecard.questions`. Returns the array if the
+ * field is array-shaped; returns `[]` for any other shape (missing, null,
+ * boolean, number, object-keyed-by-id, etc.). The model occasionally invents
+ * its own scorecard schema and puts non-arrays here — the wrapper must not
+ * crash when that happens.
+ *
+ * Use this everywhere the wrapper iterates `scorecard.questions`. Never
+ * iterate the field directly (with `?? []` or otherwise) — `?? []` only
+ * catches null/undefined, not falsy-but-not-nullish values like `false`,
+ * `0`, `""`, or non-array objects.
+ *
+ * Surfaced bugs this prevents:
+ *   - `for (const q of scorecard.questions)` when `questions` is missing
+ *   - `for (const q of scorecard.questions ?? [])` when `questions` is `{}`
+ *     (e.g., model returned `questions: { conventions: {...} }` keyed by id)
+ */
+export function getScorecardQuestions(scorecard) {
+    return Array.isArray(scorecard.questions) ? scorecard.questions : [];
+}
 /**
  * Build an error message for the case where scorecard parsing failed.
  * Includes the underlying error and a snippet of the raw stdout so post-

package/dist/lib/trajectory/validator.d.ts CHANGED Viewed

@@ -14,6 +14,15 @@ export interface ValidateTrajectoryOptions {
      * check at apps/indusk-mcp/hooks/validate-impl-structure.js.
      */
     rationaleRequired?: boolean;
+    /**
+     * The phase number that counts as "writable today against the current stack."
+     * Trajectory rows whose `Writable at` is ≤ baseline are exempt from the
+     * rationale-completeness rule. Defaults to 0 (the original behavior:
+     * Phase 0 rows are exempt). Plans where Phase 1 IS the enabling work
+     * (refactors, schema migrations, scaffolding) set this to 1 so rows
+     * authored at Phase 1 don't require justification entries.
+     */
+    rationaleBaseline?: number;
 }
 /**
  * Rule 1: Every impl document must have a `## Test Trajectory` section.
@@ -39,23 +48,27 @@ export declare function validateTemporalCoherence(trajectory: Trajectory): Valid
 export declare function validateDeferredCompleteness(trajectory: Trajectory): ValidationError[];
 /**
  * Rule 5: When the impl frontmatter sets `rationale: required`, every
- * trajectory row whose `Writable at` is later than Phase 0 (the pre-plan
- * baseline) must have an entry in the `### Trajectory Rationale` subsection.
+ * trajectory row whose `Writable at` is later than the configured baseline
+ * (default Phase 0) must have an entry in the `### Trajectory Rationale`
+ * subsection.
  *
- * Phase 0 rows do NOT need a rationale — Phase 0 means "writable today
- * against the current stack, before any plan implementation," which is
- * the default and needs no justification. We only require rationale when
- * a test will be authored after some plan code lands (Writable at: Phase 1+).
+ * The baseline names the phase that counts as "writable today against the
+ * current stack" for this plan. Default 0 — Phase 0 rows are exempt because
+ * they're writable before any plan code lands. Plans where Phase 1 IS the
+ * enabling work (refactors, schema migrations, scaffolding) can declare
+ * `rationale_baseline: 1` in frontmatter so Phase 1 rows are exempt too.
  *
- * If no row needs a rationale (every row is Phase 0), the subsection itself
- * is optional. If any row is Phase 1+, the subsection must exist and contain
- * an entry for every Phase 1+ row. Stale entries (entries for IDs not in the
- * trajectory) are always flagged.
+ * If no row needs a rationale (every row is ≤ baseline), the subsection
+ * itself is optional. If any row is later than baseline, the subsection
+ * must exist and contain an entry for every such row. Stale entries
+ * (entries for IDs not in the trajectory) are always flagged.
  *
  * Mirrors `validateRationaleCompleteness` in
  * `.claude/hooks/validate-impl-structure.js`.
  */
-export declare function validateRationaleCompleteness(body: string, trajectory: Trajectory): ValidationError[];
+export declare function validateRationaleCompleteness(body: string, trajectory: Trajectory, options?: {
+    baseline?: number;
+}): ValidationError[];
 /**
  * Run all trajectory validation rules against an impl body. The body is the
  * markdown content after the frontmatter — pass the output of `gray-matter`

package/dist/lib/trajectory/validator.js CHANGED Viewed

@@ -185,31 +185,34 @@ export function validateDeferredCompleteness(trajectory) {
 }
 /**
  * Rule 5: When the impl frontmatter sets `rationale: required`, every
- * trajectory row whose `Writable at` is later than Phase 0 (the pre-plan
- * baseline) must have an entry in the `### Trajectory Rationale` subsection.
+ * trajectory row whose `Writable at` is later than the configured baseline
+ * (default Phase 0) must have an entry in the `### Trajectory Rationale`
+ * subsection.
  *
- * Phase 0 rows do NOT need a rationale — Phase 0 means "writable today
- * against the current stack, before any plan implementation," which is
- * the default and needs no justification. We only require rationale when
- * a test will be authored after some plan code lands (Writable at: Phase 1+).
+ * The baseline names the phase that counts as "writable today against the
+ * current stack" for this plan. Default 0 — Phase 0 rows are exempt because
+ * they're writable before any plan code lands. Plans where Phase 1 IS the
+ * enabling work (refactors, schema migrations, scaffolding) can declare
+ * `rationale_baseline: 1` in frontmatter so Phase 1 rows are exempt too.
  *
- * If no row needs a rationale (every row is Phase 0), the subsection itself
- * is optional. If any row is Phase 1+, the subsection must exist and contain
- * an entry for every Phase 1+ row. Stale entries (entries for IDs not in the
- * trajectory) are always flagged.
+ * If no row needs a rationale (every row is ≤ baseline), the subsection
+ * itself is optional. If any row is later than baseline, the subsection
+ * must exist and contain an entry for every such row. Stale entries
+ * (entries for IDs not in the trajectory) are always flagged.
  *
  * Mirrors `validateRationaleCompleteness` in
  * `.claude/hooks/validate-impl-structure.js`.
  */
-export function validateRationaleCompleteness(body, trajectory) {
+export function validateRationaleCompleteness(body, trajectory, options = {}) {
     const errors = [];
-    const rowsNeedingRationale = trajectory.rows.filter((r) => Number.isFinite(r.writableAt) && r.writableAt > 0);
+    const baseline = Number.isFinite(options.baseline) ? Number(options.baseline) : 0;
+    const rowsNeedingRationale = trajectory.rows.filter((r) => Number.isFinite(r.writableAt) && r.writableAt > baseline);
     const hasSubsection = /^###\s+Trajectory Rationale\b/m.test(body);
     const rationaleIds = hasSubsection ? parseRationaleBlock(body) : new Set();
     if (rowsNeedingRationale.length > 0 && !hasSubsection) {
         errors.push({
             rule: "rationale-completeness",
-            message: `\`rationale: required\` is set and ${rowsNeedingRationale.length} trajectory row(s) have \`Writable at\` later than Phase 0, but the impl is missing the \`### Trajectory Rationale\` subsection. Phase 0 rows don't need rationale; rows where authoring waits on plan code do — add an entry for ${rowsNeedingRationale.map((r) => r.id).join(", ")}.`,
+            message: `\`rationale: required\` is set and ${rowsNeedingRationale.length} trajectory row(s) have \`Writable at\` later than Phase ${baseline}, but the impl is missing the \`### Trajectory Rationale\` subsection. Rows at or below the baseline don't need rationale; rows where authoring waits on later plan code do — add an entry for ${rowsNeedingRationale.map((r) => r.id).join(", ")}.`,
         });
         // Even without the subsection, fall through to also check for stale entries
         // (there are none in this case, but the structure is symmetric).
@@ -222,7 +225,7 @@ export function validateRationaleCompleteness(body, trajectory) {
     if (missing.length > 0 && hasSubsection) {
         errors.push({
             rule: "rationale-completeness",
-            message: `Trajectory rows with \`Writable at\` later than Phase 0 missing from \`### Trajectory Rationale\`: ${missing.join(", ")}. Every row whose authoring waits on plan code needs a \`- **TN** \`Writable at: Phase N\` — {reason}\` entry. Phase 0 rows (writable today against the current stack) do not need rationale.`,
+            message: `Trajectory rows with \`Writable at\` later than Phase ${baseline} missing from \`### Trajectory Rationale\`: ${missing.join(", ")}. Every row whose authoring waits on later plan code needs a \`- **TN** \`Writable at: Phase N\` — {reason}\` entry. Rows at or below the baseline (Phase ${baseline}) do not need rationale.`,
         });
     }
     const knownIds = new Set(trajectory.rows.map((r) => r.id));
@@ -278,7 +281,9 @@ export function validateTrajectory(body, options = {}) {
         ...validateDeferredCompleteness(trajectory),
     ];
     if (options.rationaleRequired) {
-        errors.push(...validateRationaleCompleteness(body, trajectory));
+        errors.push(...validateRationaleCompleteness(body, trajectory, {
+            baseline: options.rationaleBaseline,
+        }));
     }
     return errors;
 }

package/hooks/validate-impl-structure.js CHANGED Viewed

@@ -312,9 +312,13 @@ const trajectoryRequiredFrontmatter = /trajectory:\s*required/.test(frontmatter)
 const hasTrajectoryHeading = /^##\s+Test Trajectory\b/m.test(body);
 const trajectoryValidationEnabled = trajectoryRequiredFrontmatter || hasTrajectoryHeading;
 const rationaleRequiredFrontmatter = /rationale:\s*required/.test(frontmatter);
+const rationaleBaselineMatch = frontmatter.match(/rationale_baseline:\s*(\d+)/);
+const rationaleBaseline = rationaleBaselineMatch
+	? Number.parseInt(rationaleBaselineMatch[1], 10)
+	: 0;
 if (trajectoryValidationEnabled) {
-	const trajectoryErrors = validateTrajectory(body, rationaleRequiredFrontmatter);
+	const trajectoryErrors = validateTrajectory(body, rationaleRequiredFrontmatter, rationaleBaseline);
 	if (trajectoryErrors.length > 0) {
 		process.stderr.write(
 			`Test Trajectory validation failed (policy: ${gatePolicy}):\n${trajectoryErrors.map((e) => `  [${e.rule}] ${e.message}`).join("\n")}\n\nSee .indusk/planning/tests-first-planning/adr.md Sections 3-6 for the Test Trajectory shape and validator rules.\n`,
@@ -347,7 +351,7 @@ process.exit(0);
 // apps/indusk-mcp/src/lib/trajectory/validator.ts and parser.ts)
 // ------------------------------------------------------------------
-function validateTrajectory(implBody, rationaleRequired) {
+function validateTrajectory(implBody, rationaleRequired, rationaleBaseline = 0) {
 	const errors = [];
 	// Rule 1: trajectory presence
@@ -365,7 +369,7 @@ function validateTrajectory(implBody, rationaleRequired) {
 	errors.push(...validateTemporalCoherence(trajectory));
 	errors.push(...validateDeferredCompleteness(trajectory));
 	if (rationaleRequired) {
-		errors.push(...validateRationaleCompleteness(implBody, trajectory));
+		errors.push(...validateRationaleCompleteness(implBody, trajectory, rationaleBaseline));
 	}
 	return errors;
 }
@@ -646,11 +650,12 @@ function validateDeferredCompleteness(trajectory) {
 // Read the entries together: shared weak excuses signal over-sequencing.
 // ------------------------------------------------------------------
-function validateRationaleCompleteness(implBody, trajectory) {
+function validateRationaleCompleteness(implBody, trajectory, baseline = 0) {
 	const errors = [];
+	const baselineNum = Number.isFinite(baseline) ? Number(baseline) : 0;
 	const rowsNeedingRationale = trajectory.rows.filter(
-		(r) => Number.isFinite(r.writableAt) && r.writableAt > 0,
+		(r) => Number.isFinite(r.writableAt) && r.writableAt > baselineNum,
 	);
 	const hasSubsection = /^###\s+Trajectory Rationale\b/m.test(implBody);
 	const rationaleIds = hasSubsection ? parseRationaleBlock(implBody) : new Set();
@@ -658,7 +663,7 @@ function validateRationaleCompleteness(implBody, trajectory) {
 	if (rowsNeedingRationale.length > 0 && !hasSubsection) {
 		errors.push({
 			rule: "rationale-completeness",
-			message: `\`rationale: required\` is set and ${rowsNeedingRationale.length} trajectory row(s) have \`Writable at\` later than Phase 0, but the impl is missing the \`### Trajectory Rationale\` subsection. Phase 0 rows don't need rationale; rows where authoring waits on plan code do — add an entry for ${rowsNeedingRationale.map((r) => r.id).join(", ")}.`,
+			message: `\`rationale: required\` is set and ${rowsNeedingRationale.length} trajectory row(s) have \`Writable at\` later than Phase ${baselineNum}, but the impl is missing the \`### Trajectory Rationale\` subsection. Rows at or below the baseline don't need rationale; rows where authoring waits on later plan code do — add an entry for ${rowsNeedingRationale.map((r) => r.id).join(", ")}.`,
 		});
 	}
@@ -670,7 +675,7 @@ function validateRationaleCompleteness(implBody, trajectory) {
 	if (missing.length > 0 && hasSubsection) {
 		errors.push({
 			rule: "rationale-completeness",
-			message: `Trajectory rows with \`Writable at\` later than Phase 0 missing from \`### Trajectory Rationale\`: ${missing.join(", ")}. Every row whose authoring waits on plan code needs a \`- **TN** \`Writable at: Phase N\` — {reason}\` entry. Phase 0 rows (writable today against the current stack) do not need rationale.`,
+			message: `Trajectory rows with \`Writable at\` later than Phase ${baselineNum} missing from \`### Trajectory Rationale\`: ${missing.join(", ")}. Every row whose authoring waits on later plan code needs a \`- **TN** \`Writable at: Phase N\` — {reason}\` entry. Rows at or below the baseline (Phase ${baselineNum}) do not need rationale.`,
 		});
 	}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
 	"name": "@infinitedusky/indusk-mcp",
-	"version": "1.24.4",
+	"version": "1.25.0",
 	"description": "InDusk development system — skills, MCP tools, and CLI for structured AI-assisted development",
 	"type": "module",
 	"files": [