npm - @nathapp/nax - Versions diffs - 0.68.2 → 0.68.4 - Mend

@nathapp/nax 0.68.2 → 0.68.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/dist/nax.js +310 -34
package/package.json +1 -1

package/dist/nax.js CHANGED Viewed

@@ -17203,7 +17203,8 @@ var init_schemas_review = __esm(() => {
       requote: true,
       maxRequotes: 5
     }),
-    excludePatterns: exports_external.array(exports_external.string()).optional()
+    excludePatterns: exports_external.array(exports_external.string()).optional(),
+    demandInspectionTrail: exports_external.boolean().default(true)
   });
   AdversarialReviewConfigSchema = exports_external.object({
     model: ConfiguredModelSchema.default("balanced"),
@@ -17214,6 +17215,7 @@ var init_schemas_review = __esm(() => {
     parallel: exports_external.boolean().default(false),
     maxConcurrentSessions: exports_external.number().int().min(1).max(4).default(2),
     acRegroundOnDrop: exports_external.boolean().default(true),
+    demandInspectionTrail: exports_external.boolean().default(true),
     substantiation: exports_external.object({
       requote: exports_external.boolean().default(true),
       maxRequotes: exports_external.number().int().min(0).default(5)
@@ -17428,6 +17430,7 @@ var init_schemas3 = __esm(() => {
         resetRefOnRerun: false,
         rules: [],
         timeoutMs: 600000,
+        demandInspectionTrail: true,
         substantiation: {
           requote: true,
           maxRequotes: 5
@@ -17451,6 +17454,7 @@ var init_schemas3 = __esm(() => {
         parallel: false,
         maxConcurrentSessions: 2,
         acRegroundOnDrop: true,
+        demandInspectionTrail: true,
         substantiation: {
           requote: true,
           maxRequotes: 5
@@ -19178,7 +19182,7 @@ GOOD (write ACs like these):
 When a spec is provided, these rules govern acceptance criteria generation:
-1. **Preserve spec ACs.** Every acceptance criterion stated in the spec must appear in \`acceptanceCriteria\`, verbatim or lightly rephrased for testability. Never silently drop a spec AC.
+1. **Preserve spec ACs.** Every acceptance criterion stated in the spec must appear in \`acceptanceCriteria\`. Never silently drop a spec AC. ACs the spec tags \`[verbatim]\` (typically executable greps, file-existence checks, regex/count assertions, or architectural invariants) MUST be copied **character-for-character** into an \`acceptanceCriteria\` entry \u2014 preserve every backtick-quoted command, file path, regex, and count exactly; do not paraphrase, retag, split, or move them into a description. Untagged ACs may be lightly rephrased for testability, but must retain the same assertion and concrete identifiers.
 2. **Do not invent spec ACs.** If you identify useful behavioral edge cases or negative paths that the spec did not explicitly list, place them in \`suggestedCriteria\` (a string array on the same story object) \u2014 never in \`acceptanceCriteria\`. These go through a separate hardening pass.
 3. **Respect story scope.** Each story's criteria must only cover what the spec says for that story. Do not assign criteria that belong to a different story's scope (wrong feature area, wrong file, wrong dependency chain).
 4. **\`suggestedCriteria\` format.** Each element must be a plain behavioral assertion \u2014 an observable output, return value, state change, or error condition that a test can assert. Never include implementation details (imports, internal structure), design suggestions, or vague descriptions.
@@ -27489,6 +27493,75 @@ function isStalled(prd) {
   return remaining.every((s) => s.status === "blocked" || s.status === "failed" || s.status === "paused" || s.status === "regression-failed" || s.dependencies.some((dep) => blockedIds.has(dep)));
 }
+// src/prd/verbatim-fidelity.ts
+function normalizeWs(text) {
+  return text.replace(/\s+/g, " ").trim();
+}
+function stripBackticks(text) {
+  return text.replace(/`/g, "");
+}
+function canonical(text) {
+  return normalizeWs(stripBackticks(text));
+}
+function leadingTagGroup(line) {
+  return line.match(LEADING_TAG_GROUP)?.[1] ?? null;
+}
+function isVerbatimBullet(line) {
+  const tags = leadingTagGroup(line);
+  return tags !== null && /\[verbatim\]/i.test(tags);
+}
+function isContinuation(line) {
+  if (line.trim().length === 0)
+    return false;
+  if (HEADING.test(line))
+    return false;
+  if (LIST_ITEM_START.test(line))
+    return false;
+  return true;
+}
+function stripTagPrefix(block) {
+  return block.replace(LEADING_TAG_GROUP, "");
+}
+function extractVerbatimAcs(specContent) {
+  const lines = specContent.split(`
+`);
+  const blocks = [];
+  for (let i = 0;i < lines.length; i++) {
+    if (!isVerbatimBullet(lines[i]))
+      continue;
+    const parts = [lines[i].trim()];
+    let j = i + 1;
+    while (j < lines.length && isContinuation(lines[j])) {
+      parts.push(lines[j].trim());
+      j += 1;
+    }
+    blocks.push(parts.join(" "));
+    i = j - 1;
+  }
+  return blocks;
+}
+function prdAcPayloads(prd) {
+  return (prd.userStories ?? []).flatMap((story) => (story.acceptanceCriteria ?? []).map(canonical));
+}
+function findMissingVerbatimAcs(specContent, prd) {
+  const prdAcs = prdAcPayloads(prd);
+  const missing = [];
+  for (const block of extractVerbatimAcs(specContent)) {
+    const payload = canonical(stripTagPrefix(block));
+    if (payload.length === 0)
+      continue;
+    if (!prdAcs.some((ac) => ac.includes(payload)))
+      missing.push(block);
+  }
+  return missing;
+}
+var LEADING_TAG_GROUP, LIST_ITEM_START, HEADING;
+var init_verbatim_fidelity = __esm(() => {
+  LEADING_TAG_GROUP = /^\s*(?:[-*]|\d+\.)?\s*((?:\[[a-z][a-z-]*\]\s*)+)/i;
+  LIST_ITEM_START = /^\s*(?:[-*]|\d+\.)\s/;
+  HEADING = /^\s*#/;
+});
 // src/prd/validate.ts
 function validateStoryId(id) {
   if (!id || id.length === 0) {
@@ -27893,6 +27966,7 @@ function markStoryPaused(prd, storyId) {
 var PRD_MAX_FILE_SIZE;
 var init_prd = __esm(() => {
   init_json_file();
+  init_verbatim_fidelity();
   init_schema2();
   PRD_MAX_FILE_SIZE = 5 * 1024 * 1024;
 });
@@ -30897,6 +30971,7 @@ Flag issues only when you have confirmed:
 Do NOT flag: style issues, naming conventions, import ordering, file length, or anything lint handles.`, SEMANTIC_OUTPUT_SCHEMA = `Respond with JSON only \u2014 no explanation text before or after:
 {
   "passed": boolean,
+  "inspectedFiles": ["relative/path/you/actually/opened.ts"],
   "findings": [
     {
       "severity": "error" | "warning" | "info" | "unverifiable",
@@ -30920,7 +30995,8 @@ Notes:
 - \`acIndex\` is required when severity is "error" (1-based, into the Acceptance Criteria list above).
 - \`acQuote\` is optional advisory metadata for human auditors \u2014 not validated.
 - Omit both for "warning", "info", "unverifiable".
-If all ACs are correctly implemented, respond with { "passed": true, "findings": [] }.`, ReviewPromptBuilder;
+- \`inspectedFiles\` must list the relative paths you actually opened while reviewing. A \`passed:true\` verdict with an empty or absent \`inspectedFiles\` is invalid \u2014 walk each AC against the real files before passing.
+If all ACs are correctly implemented after inspecting the code, respond with { "passed": true, "inspectedFiles": ["..."], "findings": [] }.`, ReviewPromptBuilder;
 var init_review_builder = __esm(() => {
   SEMANTIC_ROLE = "You are a semantic code reviewer with access to the repository files. " + "Your job is to walk each acceptance criterion (AC) and judge whether the production code fulfills it \u2014 fully, partially, or not at all. " + "Test coverage gaps and convention/lint issues are out of scope \u2014 adversarial review and lint/typecheck handle those.";
   ReviewPromptBuilder = class ReviewPromptBuilder {
@@ -30973,6 +31049,16 @@ Respond with a condensed summary:
 - Keep \`verifiedBy\` for every finding. If \`verifiedBy.observed\` is long, abbreviate it to one line \u2014 never drop the field.
 Output ONLY a complete, valid JSON object. It must start with { and end with }.
 Schema: {"passed": boolean, "findings": [{"severity": string, "category": string, "file": string, "line": number, "issue": string, "suggestion": string, "verifiedBy": {"command": string, "file": string, "line": number, "observed": string}}]}`;
+    }
+    static demandInspection() {
+      return `Your previous review returned \`passed:true\` with no findings and an empty (or absent) \`inspectedFiles\` list. That means you did not open any of the changed files \u2014 a verdict reached without reading the code is not valid.
+Use your file-reading tools NOW to open the changed files and walk each acceptance criterion against the real implementation. Then re-issue your verdict as the same JSON object.
+Rules:
+- Populate \`inspectedFiles\` with the relative paths you actually opened.
+- If, after genuinely inspecting the code, every AC is satisfied, you may still return \`passed:true\` \u2014 but \`inspectedFiles\` must list the files you read.
+- Return ONLY the JSON object \u2014 no markdown fences, no explanation.`;
     }
     static requoteVerbatim(opts) {
       const file3 = opts.finding.verifiedBy?.file ?? opts.finding.file;
@@ -31147,6 +31233,13 @@ What new exported units lack corresponding test files?
 - New public functions that only appear in implementation, not in tests
 - Acceptance criteria that touch a code path with no test coverage
+**Placeholder / tautological tests (blocking).** A test that exists but verifies nothing is worse than a missing test \u2014 it manufactures a false green. Treat the following as a **\`severity:"error"\`, \`category:"test-gap"\`** finding whenever the fake test is the only coverage for an acceptance criterion:
+- Bodies that always pass: \`expect(true).toBe(true)\`, \`expect(x).toBe(x)\`, \`expect(1).toBe(1)\`, an empty test body, or \`assert(true)\`.
+- Tests skipped/disabled (\`it.skip\`, \`test.todo\`, \`xit\`, commented-out assertions) that an AC depends on.
+- Assertions that never exercise the implementation (e.g. asserting on a literal, not on a value the production code produced).
+For each such finding: set \`acIndex\` to the AC the fake test purports to cover, \`acQuote\` to a verbatim substring of that AC, and \`verifiedBy.observed\` to the placeholder line itself (e.g. \`expect(true).toBe(true)\`). Do **not** downgrade these to \`warning\` \u2014 a green suite built on placeholder assertions is a failing implementation with hidden evidence.
 ### 5. Convention Breaks
 What pattern exists elsewhere that this code does not follow?
 - Logger calls missing \`storyId\` as first key in data object
@@ -31166,6 +31259,7 @@ Respond with ONLY a JSON object \u2014 no preamble, no explanation outside the J
 \`\`\`json
 {
   "passed": true | false,
+  "inspectedFiles": ["relative/path/you/actually/opened.ts"],
   "findings": [
     {
       "severity": "error" | "warning" | "info" | "unverifiable",
@@ -31187,6 +31281,8 @@ Respond with ONLY a JSON object \u2014 no preamble, no explanation outside the J
 }
 \`\`\`
+**No rubber-stamping:** \`inspectedFiles\` must list the relative paths you actually opened with your tools while reviewing. A \`passed:true\` verdict with an empty or absent \`inspectedFiles\` is invalid \u2014 it means you never looked at the code. Fetch the diff and open the changed files before forming any verdict.
 Severity guide:
 - \`"error"\`: confident this will cause real failure or regression
 - \`"warning"\`: fragile or incomplete but may ship without immediate breakage
@@ -31216,6 +31312,8 @@ Worked example:
 **Convention / coding-standard violations almost always belong as \`"info"\`** unless an AC specifically names the convention or the symbol it concerns.
+**Exception \u2014 the trap does NOT apply to \`category:"test-gap"\` findings.** A fake/placeholder/missing test is the *absence* of verification for an AC's behaviour; it cannot name a symbol that is present in the (worthless) test file, because the defect is precisely that the symbol-under-test is never exercised. A \`test-gap\` finding is grounded by the AC whose behaviour goes unverified \u2014 cite that AC's \`acIndex\` and a verbatim \`acQuote\` substring from it, and keep severity \`"error"\`. The symbol-naming requirement is waived for this category.
 **Scope constraints are not Acceptance Criteria:**
 The story description may contain a "Scope" section with "In:" and "Out:" bullets. These are implementation guidelines, not ACs. A finding about code changed outside the stated scope (e.g., a file listed under "Out:") cannot cite a scope constraint as its \`acQuote\`/\`acIndex\` because scope text is not in the numbered AC list. Emit scope-violation findings as \`"warning"\` \u2014 never \`"error"\`. Never use \`acIndex: 0\`; \`acIndex\` is 1-based (first AC bullet = 1).
@@ -31304,6 +31402,16 @@ Rules:
 - observed must be a 1-3 line excerpt that proves the claim, taken from at or near line ${line}.
 - If after reading the file you cannot find anything that proves the claim, set observed to "".
 - Do not return a full review. Do not include markdown fences or explanation.`;
+    }
+    static demandInspection() {
+      return `Your previous review returned \`passed:true\` with no findings and an empty (or absent) \`inspectedFiles\` list. That means you did not open any of the changed files \u2014 a verdict reached without inspecting the code is not valid.
+Use your git and file-reading tools NOW to fetch the diff and open the changed files for this story. Then re-issue your verdict as the same JSON object.
+Rules:
+- Populate \`inspectedFiles\` with the relative paths you actually opened.
+- If, after genuinely inspecting the code, there is truly nothing to flag, you may still return \`passed:true\` \u2014 but \`inspectedFiles\` must list the files you read.
+- Return ONLY the JSON object \u2014 no markdown fences, no explanation.`;
     }
     static DROP_CODE_MESSAGES_QUOTE = {
       missing_ac_quote: "no `acQuote` field was provided \u2014 every blocking finding must cite an AC",
@@ -31683,7 +31791,7 @@ var init_acceptance_builder = __esm(() => {
 });
 // src/review/ac-quote-validator.ts
-function normalizeWs(s) {
+function normalizeWs2(s) {
   return s.replace(/\s+/g, " ").trim();
 }
 function stripMarkdownInline(s) {
@@ -31718,11 +31826,14 @@ function validateAcQuote(finding, acceptanceCriteria) {
   if (typeof acIndex !== "number" || acIndex < 1 || acIndex > acceptanceCriteria.length) {
     return { valid: false, code: "ac_index_out_of_range" };
   }
-  const acText = normalizeWs(stripMarkdownInline(acceptanceCriteria[acIndex - 1]));
-  const normalizedQuote = normalizeWs(stripMarkdownInline(acQuote));
+  const acText = normalizeWs2(stripMarkdownInline(acceptanceCriteria[acIndex - 1]));
+  const normalizedQuote = normalizeWs2(stripMarkdownInline(acQuote));
   if (!acText.toLowerCase().includes(normalizedQuote.toLowerCase())) {
     return { valid: false, code: "ac_quote_not_substring" };
   }
+  if (finding.category === "test-gap") {
+    return { valid: true };
+  }
   const keywords = extractLocusKeywords(finding);
   if (keywords.length === 0) {
     return { valid: false, code: "ac_quote_does_not_constrain_locus" };
@@ -32041,6 +32152,10 @@ var init_semantic_evidence = __esm(() => {
 });
 // src/review/finding-filters.ts
+function hasInspectionTrail(raw) {
+  const files = raw?.inspectedFiles;
+  return Array.isArray(files) && files.some((f) => typeof f === "string" && f.trim().length > 0);
+}
 async function substantiateAdversarialFindings(opts) {
   const { findings, workdir, storyId, blockingThreshold } = opts;
   return Promise.all(findings.map(async (finding) => {
@@ -32072,9 +32187,9 @@ function parseRequoteResponse(output) {
   const parsed = tryParseLLMJson(output);
   if (!isRecord(parsed))
     return null;
-  const canonical = extractCanonical(parsed);
-  if (canonical)
-    return canonical;
+  const canonical2 = extractCanonical(parsed);
+  if (canonical2)
+    return canonical2;
   const findings = parsed.findings;
   if (!Array.isArray(findings) || findings.length !== 1)
     return null;
@@ -32268,6 +32383,23 @@ async function performAdversarialReground(turn, firstParsed, drops, ctx) {
     output: withRepromptMarker(turn.output, { dropCount, outcome: "still-dropped", costUsd })
   };
 }
+async function maybeRepromptForInspection(turn, parsed, rawObject, ctx) {
+  if (ctx.input.adversarialConfig.demandInspectionTrail === false)
+    return null;
+  if (!parsed.passed || parsed.findings.length !== 0)
+    return null;
+  if (hasInspectionTrail(rawObject))
+    return null;
+  const secondTurn = await ctx.send(AdversarialReviewPromptBuilder.demandInspection());
+  const costUsd = (turn.estimatedCostUsd ?? 0) + (secondTurn.estimatedCostUsd ?? 0);
+  const secondParsed = validateAdversarialShape(tryParseLLMJson(secondTurn.output));
+  getSafeLogger()?.warn("review", "Adversarial reviewer returned empty pass with no inspection trail \u2014 re-prompted", {
+    storyId: ctx.input.story.id,
+    event: "review.adversarial.inspection_trail.reprompted",
+    recovered: secondParsed !== null
+  });
+  return secondParsed ? { ...turn, output: secondTurn.output, estimatedCostUsd: costUsd } : { ...turn, estimatedCostUsd: costUsd };
+}
 var FAIL_OPEN, ADVERSARIAL_REQUOTE_RECOVERED_EVENT = "review.adversarial.finding.requote_recovered", ADVERSARIAL_REQUOTE_FAILED_EVENT = "review.adversarial.finding.requote_failed", DEFAULT_MAX_REQUOTES = 5, adversarialParseRetry = (input) => makeParseRetryStrategy({
   validate: (parsed) => validateAdversarialShape(parsed) !== null,
   reviewerKind: "adversarial",
@@ -32305,11 +32437,15 @@ var init_adversarial_review = __esm(() => {
     retry: (input) => adversarialParseRetry(input),
     async hopBody(initialPrompt, ctx) {
       const turn = await ctx.sendWithParseRetry(initialPrompt);
-      const parsed = validateAdversarialShape(tryParseLLMJson(turn.output));
+      const rawObject = tryParseLLMJson(turn.output);
+      const parsed = validateAdversarialShape(rawObject);
       if (!parsed)
         return turn;
       if (ctx.input.mode !== "ref")
         return turn;
+      const inspectionGuard = await maybeRepromptForInspection(turn, parsed, rawObject, ctx);
+      if (inspectionGuard)
+        return inspectionGuard;
       const regroundEnabled = ctx.input.adversarialConfig.acRegroundOnDrop !== false;
       if (regroundEnabled) {
         const firstShape = { passed: parsed.passed, findings: parsed.findings };
@@ -33945,6 +34081,18 @@ var init_adapters = __esm(() => {
   init_typecheck();
 });
+// src/operations/verbatim-warn.ts
+function warnOnDroppedVerbatimAcs(prd, specContent, featureName) {
+  const missing = findMissingVerbatimAcs(specContent, prd);
+  if (missing.length > 0) {
+    getSafeLogger()?.warn("plan", "[verbatim] spec acceptance criteria dropped from PRD \u2014 run spec-review --prd before executing", { featureName, missingCount: missing.length, missing });
+  }
+}
+var init_verbatim_warn = __esm(() => {
+  init_logger2();
+  init_prd();
+});
 // src/operations/plan.ts
 var planInteractiveOp;
 var init_plan = __esm(() => {
@@ -33952,6 +34100,7 @@ var init_plan = __esm(() => {
   init_config();
   init_schema2();
   init_prompts();
+  init_verbatim_warn();
   planInteractiveOp = {
     kind: "run",
     name: "plan-interactive",
@@ -33991,9 +34140,10 @@ ${outputFormat}`, overridable: false }
     parse(output, input, _ctx) {
       return validatePlanOutput(output, input.featureName, input.branchName);
     },
-    verify: async (parsed, _input, _ctx) => {
+    verify: async (parsed, input, _ctx) => {
       if (!parsed.userStories || parsed.userStories.length === 0)
         return null;
+      warnOnDroppedVerbatimAcs(parsed, input.specContent, input.featureName);
       return parsed;
     },
     recover: async (input, ctx) => {
@@ -34044,13 +34194,39 @@ function validateRefinedPrd(prd) {
     validateRefinedStory(story);
   return prd;
 }
-var NEGATIVE_PATH_TOKENS, planRefineOp;
+async function readMissingVerbatimAcs(input) {
+  const content = await _planRefineDeps.readFile(input.outputPath);
+  if (!content)
+    return [];
+  try {
+    const prd = validatePlanOutput(content, input.featureName, input.branchName);
+    return findMissingVerbatimAcs(input.specContent, prd);
+  } catch {
+    getSafeLogger()?.debug("plan", "Skipped [verbatim] self-heal \u2014 draft PRD not yet parseable", {
+      featureName: input.featureName
+    });
+    return [];
+  }
+}
+var _planRefineDeps, NEGATIVE_PATH_TOKENS, planRefineOp;
 var init_plan_refine = __esm(() => {
   init_retry();
   init_config();
   init_errors();
+  init_logger2();
+  init_prd();
   init_schema2();
   init_prompts();
+  init_verbatim_warn();
+  _planRefineDeps = {
+    readFile: async (path3) => {
+      try {
+        return await Bun.file(path3).text();
+      } catch {
+        return null;
+      }
+    }
+  };
   NEGATIVE_PATH_TOKENS = [
     "error",
     "fail",
@@ -34114,19 +34290,30 @@ ${outputFormat}`,
       };
     },
     async hopBody(initialPrompt, ctx) {
+      const builder = new PlanPromptBuilder;
       const turn1 = await ctx.sendWithParseRetry(initialPrompt);
-      const refinePrompt = new PlanPromptBuilder().buildRefineContinuation(ctx.input.outputPath);
-      const turn2 = await ctx.send(refinePrompt);
-      return {
-        ...turn2,
-        estimatedCostUsd: (turn1.estimatedCostUsd ?? 0) + (turn2.estimatedCostUsd ?? 0)
-      };
+      const turn2 = await ctx.send(builder.buildRefineContinuation(ctx.input.outputPath));
+      let totalCost = (turn1.estimatedCostUsd ?? 0) + (turn2.estimatedCostUsd ?? 0);
+      let last = turn2;
+      const missing = await readMissingVerbatimAcs(ctx.input);
+      if (missing.length > 0) {
+        getSafeLogger()?.info("plan", "Refine dropped [verbatim] spec ACs \u2014 issuing one repair turn", {
+          featureName: ctx.input.featureName,
+          missingCount: missing.length
+        });
+        const turn3 = await ctx.send(builder.buildVerbatimRepair(missing, ctx.input.outputPath));
+        totalCost += turn3.estimatedCostUsd ?? 0;
+        last = turn3;
+      }
+      return { ...last, estimatedCostUsd: totalCost };
     },
     parse(output, input) {
       return validatePlanOutput(output, input.featureName, input.branchName);
     },
-    verify: async (parsed, _input, _ctx) => {
-      return validateRefinedPrd(parsed);
+    verify: async (parsed, input, _ctx) => {
+      const validated = validateRefinedPrd(parsed);
+      warnOnDroppedVerbatimAcs(validated, input.specContent, input.featureName);
+      return validated;
     },
     recover: async (input, ctx) => {
       const content = await ctx.readFile(input.outputPath);
@@ -35260,6 +35447,17 @@ function parseRefinementResponse(response, criteria) {
     return fallbackCriteria(criteria);
   }
 }
+function refinementWouldFallback(response) {
+  if (!response || !response.trim())
+    return true;
+  try {
+    const fromFence = extractJsonFromMarkdown(response);
+    const cleaned = stripTrailingCommas(fromFence !== response ? fromFence : response);
+    return !Array.isArray(JSON.parse(cleaned));
+  } catch {
+    return true;
+  }
+}
 function fallbackCriteria(criteria, storyId = "") {
   return criteria.map((c) => ({
     original: c,
@@ -35275,6 +35473,7 @@ var acceptanceRefineOp;
 var init_acceptance_refine = __esm(() => {
   init_refinement();
   init_config();
+  init_logger2();
   init_prompts();
   acceptanceRefineOp = {
     kind: "complete",
@@ -35297,6 +35496,9 @@ var init_acceptance_refine = __esm(() => {
       };
     },
     parse(output, input, _ctx) {
+      if (refinementWouldFallback(output)) {
+        getSafeLogger()?.warn("acceptance", "AC refinement returned no usable JSON \u2014 falling back to unrefined criteria", { storyId: input.storyId, criteriaCount: input.criteria.length, responseBytes: output?.length ?? 0 });
+      }
       const items = parseRefinementResponse(output, input.criteria);
       return items.map((item) => ({ ...item, storyId: item.storyId || input.storyId }));
     }
@@ -35448,6 +35650,25 @@ function evaluateRepromptTrigger2(shape, input) {
     return { shouldReprompt: false };
   return { shouldReprompt: true, acDropped: dropped };
 }
+async function maybeRepromptForInspection2(turn, parsed, rawObject, ctx) {
+  if (ctx.input.mode !== "ref")
+    return null;
+  if (ctx.input.semanticConfig.demandInspectionTrail === false)
+    return null;
+  if (!parsed.passed || parsed.findings.length !== 0)
+    return null;
+  if (hasInspectionTrail(rawObject))
+    return null;
+  const secondTurn = await ctx.send(ReviewPromptBuilder.demandInspection());
+  const costUsd = (turn.estimatedCostUsd ?? 0) + (secondTurn.estimatedCostUsd ?? 0);
+  const secondParsed = validateLLMShape(tryParseLLMJson(secondTurn.output));
+  getSafeLogger()?.warn("review", "Semantic reviewer returned empty pass with no inspection trail \u2014 re-prompted", {
+    storyId: ctx.input.story.id,
+    event: "review.semantic.inspection_trail.reprompted",
+    recovered: secondParsed !== null
+  });
+  return secondParsed ? { ...turn, output: secondTurn.output, estimatedCostUsd: costUsd } : { ...turn, estimatedCostUsd: costUsd };
+}
 async function performSemanticReground(turn, firstParsed, drops, ctx) {
   const threshold = ctx.input.blockingThreshold ?? "error";
   const acceptanceCriteria = ctx.input.story.acceptanceCriteria;
@@ -35568,9 +35789,13 @@ async function requoteBlockingFindings(findings, ctx) {
 }
 var FAIL_OPEN2, SEMANTIC_REQUOTE_RECOVERED_EVENT = "review.semantic.finding.requote_recovered", SEMANTIC_REQUOTE_FAILED_EVENT = "review.semantic.finding.requote_failed", DEFAULT_MAX_REQUOTES2 = 5, semanticReviewHopBody = async (initialPrompt, ctx) => {
   const turn = await ctx.sendWithParseRetry(initialPrompt);
-  const parsed = validateLLMShape(tryParseLLMJson(turn.output));
+  const rawObject = tryParseLLMJson(turn.output);
+  const parsed = validateLLMShape(rawObject);
   if (!parsed)
     return turn;
+  const inspectionGuard = await maybeRepromptForInspection2(turn, parsed, rawObject, ctx);
+  if (inspectionGuard)
+    return inspectionGuard;
   const requoted = await requoteBlockingFindings(parsed.findings, ctx);
   if (requoted.changed) {
     const passed = !requoted.findings.some((finding) => isBlockingSeverity(finding.severity, ctx.input.blockingThreshold ?? "error"));
@@ -38925,6 +39150,7 @@ var init_operations = __esm(() => {
   init_call();
   init_plan();
   init_plan_refine();
+  init_verbatim_warn();
   init_decompose2();
   init_build_hop_callback();
   init_classify_route();
@@ -40851,10 +41077,18 @@ ${exceptions.join(`
 `)}`;
 }
+function implementerOwnsTests(story) {
+  return SINGLE_SESSION_TEST_OWNING_STRATEGIES.has(story.routing?.testStrategy ?? "");
+}
+function testEditHeadline(story, prohibition) {
+  return implementerOwnsTests(story) ? SINGLE_SESSION_PERMIT_HEADLINE : prohibition;
+}
 function exceptionCountWord(story) {
   return THREE_SESSION_STRATEGIES.has(story.routing?.testStrategy ?? "") ? "four" : "three";
 }
 function escapeHatchFor(story) {
+  if (implementerOwnsTests(story))
+    return SINGLE_SESSION_TEST_EDIT_POLICY;
   const isTdd = THREE_SESSION_STRATEGIES.has(story.routing?.testStrategy ?? "");
   return buildEscapeHatch({ includeMockHandoff: isTdd });
 }
@@ -40926,7 +41160,7 @@ ${errors3}
 2. Only fix findings that are actually valid problems
 3. Do NOT add keys, functions, or imports that already exist \u2014 check first
-Do NOT change test files or test behavior \u2014 see the ${exceptionCountWord(story)} narrow exceptions appended below.
+${testEditHeadline(story, `Do NOT change test files or test behavior \u2014 see the ${exceptionCountWord(story)} narrow exceptions appended below.`)}
 Do NOT add new features \u2014 only fix valid issues.
 Commit your fixes when done.${scopeConstraint}${noTestIsolationBlock(story)}${escapeHatchFor(story)}`;
 }
@@ -40980,6 +41214,7 @@ Commit your fixes when done.${scopeConstraint}${noTestIsolationBlock(story)}${es
 }
 function mechanicalRectification(checks3, story, scopeConstraint, opts) {
   const errors3 = formatCheckErrors(checks3, opts);
+  const scopeDirective = implementerOwnsTests(story) ? `Fix all errors listed above that are within this story's scope. ${SINGLE_SESSION_PERMIT_HEADLINE}` : `Fix all errors listed above that are within this story's scope \u2014 see the ${exceptionCountWord(story)} narrow exceptions appended below for sibling-story spillover. Do NOT change test files or test behavior except via those exceptions.`;
   return `You are fixing lint/typecheck errors from a code review.
 Story: ${story.title} (${story.id})
@@ -40988,7 +41223,7 @@ The following quality checks failed after implementation:
 ${errors3}
-Fix all errors listed above that are within this story's scope \u2014 see the ${exceptionCountWord(story)} narrow exceptions appended below for sibling-story spillover. Do NOT change test files or test behavior except via those exceptions.
+${scopeDirective}
 Do NOT add new features \u2014 only fix the quality check errors.
 After fixing, re-run the failing check(s) to verify they pass, then commit your changes.${scopeConstraint}${noTestIsolationBlock(story)}${escapeHatchFor(story)}`;
 }
@@ -41059,11 +41294,29 @@ REASON: <one paragraph: which mock is wrong vs which dispatch the new code uses,
 Rules:
 - Do NOT make any edits yourself; the test-writer will fulfill.
 - Do NOT also emit \`UNRESOLVED:\` in the same turn \u2014 this declaration IS the handoff.
-- FILES must list real test files. Each path must exist and be a test file.`, THREE_SESSION_STRATEGIES, CONTRADICTION_ESCAPE_HATCH, MAX_STRUCTURED_FINDINGS = 10, RAW_WITH_FINDINGS_LIMIT = 1000, RAW_FALLBACK_LIMIT = 4000;
+- FILES must list real test files. Each path must exist and be a test file.`, THREE_SESSION_STRATEGIES, SINGLE_SESSION_TEST_OWNING_STRATEGIES, SINGLE_SESSION_PERMIT_HEADLINE = "You authored these tests in the same session as the implementation, so you MAY edit test files \u2014 but ONLY to resolve a genuine contradiction between a test and this story's acceptance criteria (or between two acceptance criteria). NEVER weaken, delete, loosen, or skip a test merely to make it pass. See the test-edit guidance appended below.", SINGLE_SESSION_TEST_EDIT_POLICY = `
+## Test-edit guidance (single-session implementer)
+You wrote both the tests and the implementation for this story in one session, so no
+separate test-writer owns the test contract. You therefore MAY edit test files during
+rectification \u2014 subject to these limits:
+- Edit a test ONLY to resolve a genuine contradiction between the test and an acceptance
+  criterion, a contradiction between two acceptance criteria, or a clear defect in a test
+  you authored (wrong arity/type, impossible setup, or asserting behavior the ACs do not require).
+- NEVER weaken, delete, loosen, or \`skip\` a test simply because the implementation fails it.
+  A failing test usually means the SOURCE is wrong \u2014 fix the source first.
+- The semantic and adversarial reviewers still gate correctness; gaming a test to pass will be caught.
+If two findings or two acceptance criteria contradict each other and you cannot satisfy
+both even after adjusting tests, do not guess. Emit:
+UNRESOLVED: <which findings/ACs conflicted and why they cannot both be satisfied>`, CONTRADICTION_ESCAPE_HATCH, MAX_STRUCTURED_FINDINGS = 10, RAW_WITH_FINDINGS_LIMIT = 1000, RAW_FALLBACK_LIMIT = 4000;
 var init_rectifier_builder_helpers = __esm(() => {
   init_review();
   init_sections2();
   THREE_SESSION_STRATEGIES = new Set(["three-session-tdd", "three-session-tdd-lite"]);
+  SINGLE_SESSION_TEST_OWNING_STRATEGIES = new Set(["tdd-simple", "test-after"]);
   CONTRADICTION_ESCAPE_HATCH = buildEscapeHatch({ includeMockHandoff: false });
 });
@@ -41149,11 +41402,13 @@ class RectifierPromptBuilder {
     const parts = [];
     const attemptWord = maxAttempts === 1 ? "1 attempt" : `${maxAttempts} attempts`;
     const exCount = story ? exceptionCountWord(story) : "three";
+    const prohibition = `Do NOT change test files or test behavior \u2014 see the ${exCount} narrow exceptions appended below.`;
+    const testDirective = story ? testEditHeadline(story, prohibition) : prohibition;
     parts.push(`Review failed after your implementation. Fix the following issues (${attemptWord} available before escalation):
 `);
     parts.push(renderPrioritizedFailures(failedChecks));
     parts.push(`
-Fix in priority order. After fixing each priority, re-run the failing check(s) at that level to verify they pass before moving on. Do NOT change test files or test behavior \u2014 see the ${exCount} narrow exceptions appended below. Commit your changes when all checks pass.`);
+Fix in priority order. After fixing each priority, re-run the failing check(s) at that level to verify they pass before moving on. ${testDirective} Commit your changes when all checks pass.`);
     parts.push(story ? escapeHatchFor(story) : CONTRADICTION_ESCAPE_HATCH);
     const guardrails = buildBehavioralGuardrailsSection("implementer", guardrailLevel ?? "lite");
     if (guardrails) {
@@ -41416,7 +41671,7 @@ ${testCommands}
 6. Ensure ALL tests pass before completing.
 **IMPORTANT:**
-- Do NOT modify test files \u2014 see the ${exceptionCountWord(story)} narrow exceptions appended below if you believe a test has a lint error, a PRD-contract mismatch, or belongs to a sibling story.
+- ${testEditHeadline(story, `Do NOT modify test files \u2014 see the ${exceptionCountWord(story)} narrow exceptions appended below if you believe a test has a lint error, a PRD-contract mismatch, or belongs to a sibling story.`)}
 - Do NOT loosen assertions to mask implementation bugs.
 - Focus on fixing the source code to meet the test requirements.
 - When running tests, run ONLY the failing test files shown above${cmd ? ` \u2014 NEVER run \`${cmd}\` without a file filter` : " \u2014 never run the full test suite without a file filter"}.
@@ -41516,7 +41771,7 @@ ${errors3}${reasoningSection}${historySection}
 2. Only fix findings that are actually valid problems
 3. Do NOT add keys, functions, or imports that already exist \u2014 check first
-Do NOT change test files or test behavior \u2014 see the ${exceptionCountWord(story)} narrow exceptions appended below.
+${testEditHeadline(story, `Do NOT change test files or test behavior \u2014 see the ${exceptionCountWord(story)} narrow exceptions appended below.`)}
 Do NOT add new features \u2014 only fix valid issues.
 Commit your fixes when done.${scopeConstraint}${escapeHatchFor(story)}`;
   }
@@ -41610,7 +41865,7 @@ Tests are failing. Fix the source so all tests pass \u2014 not just the ones lis
 4. Do not declare done until step 3 shows 0 failures.
 **IMPORTANT:**
-- Do NOT modify test files \u2014 see the ${exceptionCountWord(opts.story)} narrow exceptions appended below if you believe a test has a lint error, a PRD-contract mismatch, or belongs to a sibling story.
+- ${testEditHeadline(opts.story, `Do NOT modify test files \u2014 see the ${exceptionCountWord(opts.story)} narrow exceptions appended below if you believe a test has a lint error, a PRD-contract mismatch, or belongs to a sibling story.`)}
 - Do NOT loosen assertions to mask implementation bugs.
 - Focus on fixing the source code to meet the test requirements.`);
     parts.push(escapeHatchFor(opts.story));
@@ -41798,6 +42053,9 @@ Output ONLY the JSON object. Do not include markdown fences or explanation.`;
 Review the draft with a strict self-audit mindset. Re-read the codebase context and compare the PRD against it. Focus only on the issues below, then rewrite the PRD if needed.
+#### spec-ac-preservation
+Enumerate every acceptance criterion the spec states. Confirm each one appears in some story's acceptanceCriteria \u2014 never drop a spec AC during this audit. If an AC looks unsupported by the current codebase, keep it: the story may be adding that capability. Any AC the spec tags \`[verbatim]\` MUST appear character-for-character in an acceptanceCriteria entry \u2014 preserve every backtick-quoted command, file path, regex, and count exactly. If a \`[verbatim]\` AC is missing or altered, restore it verbatim.
 #### ac-testable
 For each acceptance criterion, ask whether the assertion is observable through a return value, exception, log output, file content, or state change. If any AC is not directly testable, rewrite it so it is observable.
@@ -41810,7 +42068,7 @@ Check whether any sentence in any description contradicts an acceptance criterio
 #### codebase-fit
 For each story, verify:
 1. Proposed files, helpers, tests, dependencies, and implementation notes match the codebase context. Remove invented helpers, files, call sites, or dependencies unless the change clearly requires creating them.
-2. Each acceptance criterion's semantic meaning matches the spec's actual interface and data flow. Criteria that assert incorrect parameter semantics, wrong data flow, or behavior that contradicts the spec must be corrected or removed. Cross-check each AC against the spec's interface definitions, pseudocode, and design notes.
+2. Each acceptance criterion's semantic meaning matches the spec's actual interface and data flow. Criteria that assert incorrect parameter semantics, wrong data flow, or behavior that contradicts the spec must be corrected. Never delete an AC that restates a spec AC \u2014 correct its wording to match the spec instead. Cross-check each AC against the spec's interface definitions, pseudocode, and design notes.
 #### contextfiles-spec-alignment
 For each story, compare contextFiles against files the spec explicitly lists as context (e.g., in "Context Files" sections). Ensure the most critical spec-recommended files are included, up to the 5-file limit. If a spec-recommended file is absent, add it (removing the least critical one if already at 5). Files the story will CREATE must not appear here.
@@ -41828,6 +42086,23 @@ If a story changes existing behavior, extracts a shared helper, extends an exist
 Check each story's title, description, scope, contextFiles, and acceptance criteria for internal consistency. If the story says a file or command is in scope anywhere else, do not list it as out of scope. If the title or acceptance criteria clearly include CLI, output, tests, or helper extraction work, the Scope section must reflect that accurately.
 Write the revised PRD to this file path: ${outputFilePath}
+Do not output the PRD in chat. After writing the file, reply with a brief text confirmation only.`;
+  }
+  buildVerbatimRepair(missingAcs, outputFilePath) {
+    const list = missingAcs.map((ac) => `- ${ac}`).join(`
+`);
+    return `Your revised PRD dropped or altered acceptance criteria the spec marked \`[verbatim]\`. These are load-bearing executable checks (greps, file-existence checks, regex/count assertions, or architectural invariants) and MUST survive character-for-character \u2014 paraphrasing destroys the verification mechanism.
+The following \`[verbatim]\` spec acceptance criteria are missing or altered in the PRD:
+${list}
+For each one:
+- Add it to the \`acceptanceCriteria\` array of the single most relevant user story.
+- Preserve every backtick-quoted command, file path, regex, and count exactly as written in the spec. Do not paraphrase, retag, split, or move them into a description.
+- Do not remove or weaken any acceptance criteria that are already correct.
+Write the corrected PRD to this file path: ${outputFilePath}
 Do not output the PRD in chat. After writing the file, reply with a brief text confirmation only.`;
   }
   build(specContent, codebaseContext, outputFilePath, packages, packageDetails, projectProfile, proposers) {
@@ -58108,7 +58383,7 @@ var package_default;
 var init_package = __esm(() => {
   package_default = {
     name: "@nathapp/nax",
-    version: "0.68.2",
+    version: "0.68.4",
     description: "AI Coding Agent Orchestrator \u2014 loops until done",
     type: "module",
     bin: {
@@ -58203,8 +58478,8 @@ var init_version = __esm(() => {
   NAX_VERSION = package_default.version;
   NAX_COMMIT = (() => {
     try {
-      if (/^[0-9a-f]{6,10}$/.test("27a81a5e"))
-        return "27a81a5e";
+      if (/^[0-9a-f]{6,10}$/.test("197c6530"))
+        return "197c6530";
     } catch {}
     try {
       const result = Bun.spawnSync(["git", "rev-parse", "--short", "HEAD"], {
@@ -60854,7 +61129,7 @@ function extractQuoteTriples(reason) {
   }
   return triples;
 }
-function normalizeWs2(s) {
+function normalizeWs3(s) {
   return s.replace(/\s+/g, " ").trim();
 }
 async function verifyQuoteTriple(triple, workdir, deps = _quoteIntegrityDeps) {
@@ -60868,7 +61143,7 @@ async function verifyQuoteTriple(triple, workdir, deps = _quoteIntegrityDeps) {
   const end = Math.min(lines.length, triple.line + CONTEXT_LINES);
   const window2 = lines.slice(start, end).join(`
 `);
-  return normalizeWs2(window2).toLowerCase().includes(normalizeWs2(triple.quote).toLowerCase());
+  return normalizeWs3(window2).toLowerCase().includes(normalizeWs3(triple.quote).toLowerCase());
 }
 async function verifyEscalationQuotes(reason, workdir, storyId, deps = _quoteIntegrityDeps) {
   const triples = extractQuoteTriples(reason);
@@ -95189,6 +95464,7 @@ class DebatePlanStrategy {
       });
       if (debateResult.outcome !== "failed" && debateResult.output) {
         const prd2 = validatePlanOutput(debateResult.output, ctx.options.feature, ctx.branchName);
+        warnOnDroppedVerbatimAcs(prd2, ctx.specContent, ctx.options.feature);
         const withProject2 = { ...prd2, project: ctx.projectName };
         return _debatePlanDeps.writeOrRecoverPrd(ctx, withProject2);
       }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@nathapp/nax",
-  "version": "0.68.2",
+  "version": "0.68.4",
   "description": "AI Coding Agent Orchestrator — loops until done",
   "type": "module",
   "bin": {