@nathapp/nax 0.68.2 → 0.68.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/nax.js +274 -27
  2. package/package.json +1 -1
package/dist/nax.js CHANGED
@@ -17203,7 +17203,8 @@ var init_schemas_review = __esm(() => {
17203
17203
  requote: true,
17204
17204
  maxRequotes: 5
17205
17205
  }),
17206
- excludePatterns: exports_external.array(exports_external.string()).optional()
17206
+ excludePatterns: exports_external.array(exports_external.string()).optional(),
17207
+ demandInspectionTrail: exports_external.boolean().default(true)
17207
17208
  });
17208
17209
  AdversarialReviewConfigSchema = exports_external.object({
17209
17210
  model: ConfiguredModelSchema.default("balanced"),
@@ -17214,6 +17215,7 @@ var init_schemas_review = __esm(() => {
17214
17215
  parallel: exports_external.boolean().default(false),
17215
17216
  maxConcurrentSessions: exports_external.number().int().min(1).max(4).default(2),
17216
17217
  acRegroundOnDrop: exports_external.boolean().default(true),
17218
+ demandInspectionTrail: exports_external.boolean().default(true),
17217
17219
  substantiation: exports_external.object({
17218
17220
  requote: exports_external.boolean().default(true),
17219
17221
  maxRequotes: exports_external.number().int().min(0).default(5)
@@ -17428,6 +17430,7 @@ var init_schemas3 = __esm(() => {
17428
17430
  resetRefOnRerun: false,
17429
17431
  rules: [],
17430
17432
  timeoutMs: 600000,
17433
+ demandInspectionTrail: true,
17431
17434
  substantiation: {
17432
17435
  requote: true,
17433
17436
  maxRequotes: 5
@@ -17451,6 +17454,7 @@ var init_schemas3 = __esm(() => {
17451
17454
  parallel: false,
17452
17455
  maxConcurrentSessions: 2,
17453
17456
  acRegroundOnDrop: true,
17457
+ demandInspectionTrail: true,
17454
17458
  substantiation: {
17455
17459
  requote: true,
17456
17460
  maxRequotes: 5
@@ -19178,7 +19182,7 @@ GOOD (write ACs like these):
19178
19182
 
19179
19183
  When a spec is provided, these rules govern acceptance criteria generation:
19180
19184
 
19181
- 1. **Preserve spec ACs.** Every acceptance criterion stated in the spec must appear in \`acceptanceCriteria\`, verbatim or lightly rephrased for testability. Never silently drop a spec AC.
19185
+ 1. **Preserve spec ACs.** Every acceptance criterion stated in the spec must appear in \`acceptanceCriteria\`. Never silently drop a spec AC. ACs the spec tags \`[verbatim]\` (typically executable greps, file-existence checks, regex/count assertions, or architectural invariants) MUST be copied **character-for-character** into an \`acceptanceCriteria\` entry \u2014 preserve every backtick-quoted command, file path, regex, and count exactly; do not paraphrase, retag, split, or move them into a description. Untagged ACs may be lightly rephrased for testability, but must retain the same assertion and concrete identifiers.
19182
19186
  2. **Do not invent spec ACs.** If you identify useful behavioral edge cases or negative paths that the spec did not explicitly list, place them in \`suggestedCriteria\` (a string array on the same story object) \u2014 never in \`acceptanceCriteria\`. These go through a separate hardening pass.
19183
19187
  3. **Respect story scope.** Each story's criteria must only cover what the spec says for that story. Do not assign criteria that belong to a different story's scope (wrong feature area, wrong file, wrong dependency chain).
19184
19188
  4. **\`suggestedCriteria\` format.** Each element must be a plain behavioral assertion \u2014 an observable output, return value, state change, or error condition that a test can assert. Never include implementation details (imports, internal structure), design suggestions, or vague descriptions.
@@ -27489,6 +27493,75 @@ function isStalled(prd) {
27489
27493
  return remaining.every((s) => s.status === "blocked" || s.status === "failed" || s.status === "paused" || s.status === "regression-failed" || s.dependencies.some((dep) => blockedIds.has(dep)));
27490
27494
  }
27491
27495
 
27496
+ // src/prd/verbatim-fidelity.ts
27497
+ function normalizeWs(text) {
27498
+ return text.replace(/\s+/g, " ").trim();
27499
+ }
27500
+ function stripBackticks(text) {
27501
+ return text.replace(/`/g, "");
27502
+ }
27503
+ function canonical(text) {
27504
+ return normalizeWs(stripBackticks(text));
27505
+ }
27506
+ function leadingTagGroup(line) {
27507
+ return line.match(LEADING_TAG_GROUP)?.[1] ?? null;
27508
+ }
27509
+ function isVerbatimBullet(line) {
27510
+ const tags = leadingTagGroup(line);
27511
+ return tags !== null && /\[verbatim\]/i.test(tags);
27512
+ }
27513
+ function isContinuation(line) {
27514
+ if (line.trim().length === 0)
27515
+ return false;
27516
+ if (HEADING.test(line))
27517
+ return false;
27518
+ if (LIST_ITEM_START.test(line))
27519
+ return false;
27520
+ return true;
27521
+ }
27522
+ function stripTagPrefix(block) {
27523
+ return block.replace(LEADING_TAG_GROUP, "");
27524
+ }
27525
+ function extractVerbatimAcs(specContent) {
27526
+ const lines = specContent.split(`
27527
+ `);
27528
+ const blocks = [];
27529
+ for (let i = 0;i < lines.length; i++) {
27530
+ if (!isVerbatimBullet(lines[i]))
27531
+ continue;
27532
+ const parts = [lines[i].trim()];
27533
+ let j = i + 1;
27534
+ while (j < lines.length && isContinuation(lines[j])) {
27535
+ parts.push(lines[j].trim());
27536
+ j += 1;
27537
+ }
27538
+ blocks.push(parts.join(" "));
27539
+ i = j - 1;
27540
+ }
27541
+ return blocks;
27542
+ }
27543
+ function prdAcPayloads(prd) {
27544
+ return (prd.userStories ?? []).flatMap((story) => (story.acceptanceCriteria ?? []).map(canonical));
27545
+ }
27546
+ function findMissingVerbatimAcs(specContent, prd) {
27547
+ const prdAcs = prdAcPayloads(prd);
27548
+ const missing = [];
27549
+ for (const block of extractVerbatimAcs(specContent)) {
27550
+ const payload = canonical(stripTagPrefix(block));
27551
+ if (payload.length === 0)
27552
+ continue;
27553
+ if (!prdAcs.some((ac) => ac.includes(payload)))
27554
+ missing.push(block);
27555
+ }
27556
+ return missing;
27557
+ }
27558
+ var LEADING_TAG_GROUP, LIST_ITEM_START, HEADING;
27559
+ var init_verbatim_fidelity = __esm(() => {
27560
+ LEADING_TAG_GROUP = /^\s*(?:[-*]|\d+\.)?\s*((?:\[[a-z][a-z-]*\]\s*)+)/i;
27561
+ LIST_ITEM_START = /^\s*(?:[-*]|\d+\.)\s/;
27562
+ HEADING = /^\s*#/;
27563
+ });
27564
+
27492
27565
  // src/prd/validate.ts
27493
27566
  function validateStoryId(id) {
27494
27567
  if (!id || id.length === 0) {
@@ -27893,6 +27966,7 @@ function markStoryPaused(prd, storyId) {
27893
27966
  var PRD_MAX_FILE_SIZE;
27894
27967
  var init_prd = __esm(() => {
27895
27968
  init_json_file();
27969
+ init_verbatim_fidelity();
27896
27970
  init_schema2();
27897
27971
  PRD_MAX_FILE_SIZE = 5 * 1024 * 1024;
27898
27972
  });
@@ -30897,6 +30971,7 @@ Flag issues only when you have confirmed:
30897
30971
  Do NOT flag: style issues, naming conventions, import ordering, file length, or anything lint handles.`, SEMANTIC_OUTPUT_SCHEMA = `Respond with JSON only \u2014 no explanation text before or after:
30898
30972
  {
30899
30973
  "passed": boolean,
30974
+ "inspectedFiles": ["relative/path/you/actually/opened.ts"],
30900
30975
  "findings": [
30901
30976
  {
30902
30977
  "severity": "error" | "warning" | "info" | "unverifiable",
@@ -30920,7 +30995,8 @@ Notes:
30920
30995
  - \`acIndex\` is required when severity is "error" (1-based, into the Acceptance Criteria list above).
30921
30996
  - \`acQuote\` is optional advisory metadata for human auditors \u2014 not validated.
30922
30997
  - Omit both for "warning", "info", "unverifiable".
30923
- If all ACs are correctly implemented, respond with { "passed": true, "findings": [] }.`, ReviewPromptBuilder;
30998
+ - \`inspectedFiles\` must list the relative paths you actually opened while reviewing. A \`passed:true\` verdict with an empty or absent \`inspectedFiles\` is invalid \u2014 walk each AC against the real files before passing.
30999
+ If all ACs are correctly implemented after inspecting the code, respond with { "passed": true, "inspectedFiles": ["..."], "findings": [] }.`, ReviewPromptBuilder;
30924
31000
  var init_review_builder = __esm(() => {
30925
31001
  SEMANTIC_ROLE = "You are a semantic code reviewer with access to the repository files. " + "Your job is to walk each acceptance criterion (AC) and judge whether the production code fulfills it \u2014 fully, partially, or not at all. " + "Test coverage gaps and convention/lint issues are out of scope \u2014 adversarial review and lint/typecheck handle those.";
30926
31002
  ReviewPromptBuilder = class ReviewPromptBuilder {
@@ -30973,6 +31049,16 @@ Respond with a condensed summary:
30973
31049
  - Keep \`verifiedBy\` for every finding. If \`verifiedBy.observed\` is long, abbreviate it to one line \u2014 never drop the field.
30974
31050
  Output ONLY a complete, valid JSON object. It must start with { and end with }.
30975
31051
  Schema: {"passed": boolean, "findings": [{"severity": string, "category": string, "file": string, "line": number, "issue": string, "suggestion": string, "verifiedBy": {"command": string, "file": string, "line": number, "observed": string}}]}`;
31052
+ }
31053
+ static demandInspection() {
31054
+ return `Your previous review returned \`passed:true\` with no findings and an empty (or absent) \`inspectedFiles\` list. That means you did not open any of the changed files \u2014 a verdict reached without reading the code is not valid.
31055
+
31056
+ Use your file-reading tools NOW to open the changed files and walk each acceptance criterion against the real implementation. Then re-issue your verdict as the same JSON object.
31057
+
31058
+ Rules:
31059
+ - Populate \`inspectedFiles\` with the relative paths you actually opened.
31060
+ - If, after genuinely inspecting the code, every AC is satisfied, you may still return \`passed:true\` \u2014 but \`inspectedFiles\` must list the files you read.
31061
+ - Return ONLY the JSON object \u2014 no markdown fences, no explanation.`;
30976
31062
  }
30977
31063
  static requoteVerbatim(opts) {
30978
31064
  const file3 = opts.finding.verifiedBy?.file ?? opts.finding.file;
@@ -31147,6 +31233,13 @@ What new exported units lack corresponding test files?
31147
31233
  - New public functions that only appear in implementation, not in tests
31148
31234
  - Acceptance criteria that touch a code path with no test coverage
31149
31235
 
31236
+ **Placeholder / tautological tests (blocking).** A test that exists but verifies nothing is worse than a missing test \u2014 it manufactures a false green. Treat the following as a **\`severity:"error"\`, \`category:"test-gap"\`** finding whenever the fake test is the only coverage for an acceptance criterion:
31237
+ - Bodies that always pass: \`expect(true).toBe(true)\`, \`expect(x).toBe(x)\`, \`expect(1).toBe(1)\`, an empty test body, or \`assert(true)\`.
31238
+ - Tests skipped/disabled (\`it.skip\`, \`test.todo\`, \`xit\`, commented-out assertions) that an AC depends on.
31239
+ - Assertions that never exercise the implementation (e.g. asserting on a literal, not on a value the production code produced).
31240
+
31241
+ For each such finding: set \`acIndex\` to the AC the fake test purports to cover, \`acQuote\` to a verbatim substring of that AC, and \`verifiedBy.observed\` to the placeholder line itself (e.g. \`expect(true).toBe(true)\`). Do **not** downgrade these to \`warning\` \u2014 a green suite built on placeholder assertions is a failing implementation with hidden evidence.
31242
+
31150
31243
  ### 5. Convention Breaks
31151
31244
  What pattern exists elsewhere that this code does not follow?
31152
31245
  - Logger calls missing \`storyId\` as first key in data object
@@ -31166,6 +31259,7 @@ Respond with ONLY a JSON object \u2014 no preamble, no explanation outside the J
31166
31259
  \`\`\`json
31167
31260
  {
31168
31261
  "passed": true | false,
31262
+ "inspectedFiles": ["relative/path/you/actually/opened.ts"],
31169
31263
  "findings": [
31170
31264
  {
31171
31265
  "severity": "error" | "warning" | "info" | "unverifiable",
@@ -31187,6 +31281,8 @@ Respond with ONLY a JSON object \u2014 no preamble, no explanation outside the J
31187
31281
  }
31188
31282
  \`\`\`
31189
31283
 
31284
+ **No rubber-stamping:** \`inspectedFiles\` must list the relative paths you actually opened with your tools while reviewing. A \`passed:true\` verdict with an empty or absent \`inspectedFiles\` is invalid \u2014 it means you never looked at the code. Fetch the diff and open the changed files before forming any verdict.
31285
+
31190
31286
  Severity guide:
31191
31287
  - \`"error"\`: confident this will cause real failure or regression
31192
31288
  - \`"warning"\`: fragile or incomplete but may ship without immediate breakage
@@ -31216,6 +31312,8 @@ Worked example:
31216
31312
 
31217
31313
  **Convention / coding-standard violations almost always belong as \`"info"\`** unless an AC specifically names the convention or the symbol it concerns.
31218
31314
 
31315
+ **Exception \u2014 the trap does NOT apply to \`category:"test-gap"\` findings.** A fake/placeholder/missing test is the *absence* of verification for an AC's behaviour; it cannot name a symbol that is present in the (worthless) test file, because the defect is precisely that the symbol-under-test is never exercised. A \`test-gap\` finding is grounded by the AC whose behaviour goes unverified \u2014 cite that AC's \`acIndex\` and a verbatim \`acQuote\` substring from it, and keep severity \`"error"\`. The symbol-naming requirement is waived for this category.
31316
+
31219
31317
  **Scope constraints are not Acceptance Criteria:**
31220
31318
  The story description may contain a "Scope" section with "In:" and "Out:" bullets. These are implementation guidelines, not ACs. A finding about code changed outside the stated scope (e.g., a file listed under "Out:") cannot cite a scope constraint as its \`acQuote\`/\`acIndex\` because scope text is not in the numbered AC list. Emit scope-violation findings as \`"warning"\` \u2014 never \`"error"\`. Never use \`acIndex: 0\`; \`acIndex\` is 1-based (first AC bullet = 1).
31221
31319
 
@@ -31304,6 +31402,16 @@ Rules:
31304
31402
  - observed must be a 1-3 line excerpt that proves the claim, taken from at or near line ${line}.
31305
31403
  - If after reading the file you cannot find anything that proves the claim, set observed to "".
31306
31404
  - Do not return a full review. Do not include markdown fences or explanation.`;
31405
+ }
31406
+ static demandInspection() {
31407
+ return `Your previous review returned \`passed:true\` with no findings and an empty (or absent) \`inspectedFiles\` list. That means you did not open any of the changed files \u2014 a verdict reached without inspecting the code is not valid.
31408
+
31409
+ Use your git and file-reading tools NOW to fetch the diff and open the changed files for this story. Then re-issue your verdict as the same JSON object.
31410
+
31411
+ Rules:
31412
+ - Populate \`inspectedFiles\` with the relative paths you actually opened.
31413
+ - If, after genuinely inspecting the code, there is truly nothing to flag, you may still return \`passed:true\` \u2014 but \`inspectedFiles\` must list the files you read.
31414
+ - Return ONLY the JSON object \u2014 no markdown fences, no explanation.`;
31307
31415
  }
31308
31416
  static DROP_CODE_MESSAGES_QUOTE = {
31309
31417
  missing_ac_quote: "no `acQuote` field was provided \u2014 every blocking finding must cite an AC",
@@ -31683,7 +31791,7 @@ var init_acceptance_builder = __esm(() => {
31683
31791
  });
31684
31792
 
31685
31793
  // src/review/ac-quote-validator.ts
31686
- function normalizeWs(s) {
31794
+ function normalizeWs2(s) {
31687
31795
  return s.replace(/\s+/g, " ").trim();
31688
31796
  }
31689
31797
  function stripMarkdownInline(s) {
@@ -31718,11 +31826,14 @@ function validateAcQuote(finding, acceptanceCriteria) {
31718
31826
  if (typeof acIndex !== "number" || acIndex < 1 || acIndex > acceptanceCriteria.length) {
31719
31827
  return { valid: false, code: "ac_index_out_of_range" };
31720
31828
  }
31721
- const acText = normalizeWs(stripMarkdownInline(acceptanceCriteria[acIndex - 1]));
31722
- const normalizedQuote = normalizeWs(stripMarkdownInline(acQuote));
31829
+ const acText = normalizeWs2(stripMarkdownInline(acceptanceCriteria[acIndex - 1]));
31830
+ const normalizedQuote = normalizeWs2(stripMarkdownInline(acQuote));
31723
31831
  if (!acText.toLowerCase().includes(normalizedQuote.toLowerCase())) {
31724
31832
  return { valid: false, code: "ac_quote_not_substring" };
31725
31833
  }
31834
+ if (finding.category === "test-gap") {
31835
+ return { valid: true };
31836
+ }
31726
31837
  const keywords = extractLocusKeywords(finding);
31727
31838
  if (keywords.length === 0) {
31728
31839
  return { valid: false, code: "ac_quote_does_not_constrain_locus" };
@@ -32041,6 +32152,10 @@ var init_semantic_evidence = __esm(() => {
32041
32152
  });
32042
32153
 
32043
32154
  // src/review/finding-filters.ts
32155
+ function hasInspectionTrail(raw) {
32156
+ const files = raw?.inspectedFiles;
32157
+ return Array.isArray(files) && files.some((f) => typeof f === "string" && f.trim().length > 0);
32158
+ }
32044
32159
  async function substantiateAdversarialFindings(opts) {
32045
32160
  const { findings, workdir, storyId, blockingThreshold } = opts;
32046
32161
  return Promise.all(findings.map(async (finding) => {
@@ -32072,9 +32187,9 @@ function parseRequoteResponse(output) {
32072
32187
  const parsed = tryParseLLMJson(output);
32073
32188
  if (!isRecord(parsed))
32074
32189
  return null;
32075
- const canonical = extractCanonical(parsed);
32076
- if (canonical)
32077
- return canonical;
32190
+ const canonical2 = extractCanonical(parsed);
32191
+ if (canonical2)
32192
+ return canonical2;
32078
32193
  const findings = parsed.findings;
32079
32194
  if (!Array.isArray(findings) || findings.length !== 1)
32080
32195
  return null;
@@ -32268,6 +32383,23 @@ async function performAdversarialReground(turn, firstParsed, drops, ctx) {
32268
32383
  output: withRepromptMarker(turn.output, { dropCount, outcome: "still-dropped", costUsd })
32269
32384
  };
32270
32385
  }
32386
+ async function maybeRepromptForInspection(turn, parsed, rawObject, ctx) {
32387
+ if (ctx.input.adversarialConfig.demandInspectionTrail === false)
32388
+ return null;
32389
+ if (!parsed.passed || parsed.findings.length !== 0)
32390
+ return null;
32391
+ if (hasInspectionTrail(rawObject))
32392
+ return null;
32393
+ const secondTurn = await ctx.send(AdversarialReviewPromptBuilder.demandInspection());
32394
+ const costUsd = (turn.estimatedCostUsd ?? 0) + (secondTurn.estimatedCostUsd ?? 0);
32395
+ const secondParsed = validateAdversarialShape(tryParseLLMJson(secondTurn.output));
32396
+ getSafeLogger()?.warn("review", "Adversarial reviewer returned empty pass with no inspection trail \u2014 re-prompted", {
32397
+ storyId: ctx.input.story.id,
32398
+ event: "review.adversarial.inspection_trail.reprompted",
32399
+ recovered: secondParsed !== null
32400
+ });
32401
+ return secondParsed ? { ...turn, output: secondTurn.output, estimatedCostUsd: costUsd } : { ...turn, estimatedCostUsd: costUsd };
32402
+ }
32271
32403
  var FAIL_OPEN, ADVERSARIAL_REQUOTE_RECOVERED_EVENT = "review.adversarial.finding.requote_recovered", ADVERSARIAL_REQUOTE_FAILED_EVENT = "review.adversarial.finding.requote_failed", DEFAULT_MAX_REQUOTES = 5, adversarialParseRetry = (input) => makeParseRetryStrategy({
32272
32404
  validate: (parsed) => validateAdversarialShape(parsed) !== null,
32273
32405
  reviewerKind: "adversarial",
@@ -32305,11 +32437,15 @@ var init_adversarial_review = __esm(() => {
32305
32437
  retry: (input) => adversarialParseRetry(input),
32306
32438
  async hopBody(initialPrompt, ctx) {
32307
32439
  const turn = await ctx.sendWithParseRetry(initialPrompt);
32308
- const parsed = validateAdversarialShape(tryParseLLMJson(turn.output));
32440
+ const rawObject = tryParseLLMJson(turn.output);
32441
+ const parsed = validateAdversarialShape(rawObject);
32309
32442
  if (!parsed)
32310
32443
  return turn;
32311
32444
  if (ctx.input.mode !== "ref")
32312
32445
  return turn;
32446
+ const inspectionGuard = await maybeRepromptForInspection(turn, parsed, rawObject, ctx);
32447
+ if (inspectionGuard)
32448
+ return inspectionGuard;
32313
32449
  const regroundEnabled = ctx.input.adversarialConfig.acRegroundOnDrop !== false;
32314
32450
  if (regroundEnabled) {
32315
32451
  const firstShape = { passed: parsed.passed, findings: parsed.findings };
@@ -33945,6 +34081,18 @@ var init_adapters = __esm(() => {
33945
34081
  init_typecheck();
33946
34082
  });
33947
34083
 
34084
+ // src/operations/verbatim-warn.ts
34085
+ function warnOnDroppedVerbatimAcs(prd, specContent, featureName) {
34086
+ const missing = findMissingVerbatimAcs(specContent, prd);
34087
+ if (missing.length > 0) {
34088
+ getSafeLogger()?.warn("plan", "[verbatim] spec acceptance criteria dropped from PRD \u2014 run spec-review --prd before executing", { featureName, missingCount: missing.length, missing });
34089
+ }
34090
+ }
34091
+ var init_verbatim_warn = __esm(() => {
34092
+ init_logger2();
34093
+ init_prd();
34094
+ });
34095
+
33948
34096
  // src/operations/plan.ts
33949
34097
  var planInteractiveOp;
33950
34098
  var init_plan = __esm(() => {
@@ -33952,6 +34100,7 @@ var init_plan = __esm(() => {
33952
34100
  init_config();
33953
34101
  init_schema2();
33954
34102
  init_prompts();
34103
+ init_verbatim_warn();
33955
34104
  planInteractiveOp = {
33956
34105
  kind: "run",
33957
34106
  name: "plan-interactive",
@@ -33991,9 +34140,10 @@ ${outputFormat}`, overridable: false }
33991
34140
  parse(output, input, _ctx) {
33992
34141
  return validatePlanOutput(output, input.featureName, input.branchName);
33993
34142
  },
33994
- verify: async (parsed, _input, _ctx) => {
34143
+ verify: async (parsed, input, _ctx) => {
33995
34144
  if (!parsed.userStories || parsed.userStories.length === 0)
33996
34145
  return null;
34146
+ warnOnDroppedVerbatimAcs(parsed, input.specContent, input.featureName);
33997
34147
  return parsed;
33998
34148
  },
33999
34149
  recover: async (input, ctx) => {
@@ -34044,13 +34194,39 @@ function validateRefinedPrd(prd) {
34044
34194
  validateRefinedStory(story);
34045
34195
  return prd;
34046
34196
  }
34047
- var NEGATIVE_PATH_TOKENS, planRefineOp;
34197
+ async function readMissingVerbatimAcs(input) {
34198
+ const content = await _planRefineDeps.readFile(input.outputPath);
34199
+ if (!content)
34200
+ return [];
34201
+ try {
34202
+ const prd = validatePlanOutput(content, input.featureName, input.branchName);
34203
+ return findMissingVerbatimAcs(input.specContent, prd);
34204
+ } catch {
34205
+ getSafeLogger()?.debug("plan", "Skipped [verbatim] self-heal \u2014 draft PRD not yet parseable", {
34206
+ featureName: input.featureName
34207
+ });
34208
+ return [];
34209
+ }
34210
+ }
34211
+ var _planRefineDeps, NEGATIVE_PATH_TOKENS, planRefineOp;
34048
34212
  var init_plan_refine = __esm(() => {
34049
34213
  init_retry();
34050
34214
  init_config();
34051
34215
  init_errors();
34216
+ init_logger2();
34217
+ init_prd();
34052
34218
  init_schema2();
34053
34219
  init_prompts();
34220
+ init_verbatim_warn();
34221
+ _planRefineDeps = {
34222
+ readFile: async (path3) => {
34223
+ try {
34224
+ return await Bun.file(path3).text();
34225
+ } catch {
34226
+ return null;
34227
+ }
34228
+ }
34229
+ };
34054
34230
  NEGATIVE_PATH_TOKENS = [
34055
34231
  "error",
34056
34232
  "fail",
@@ -34114,19 +34290,30 @@ ${outputFormat}`,
34114
34290
  };
34115
34291
  },
34116
34292
  async hopBody(initialPrompt, ctx) {
34293
+ const builder = new PlanPromptBuilder;
34117
34294
  const turn1 = await ctx.sendWithParseRetry(initialPrompt);
34118
- const refinePrompt = new PlanPromptBuilder().buildRefineContinuation(ctx.input.outputPath);
34119
- const turn2 = await ctx.send(refinePrompt);
34120
- return {
34121
- ...turn2,
34122
- estimatedCostUsd: (turn1.estimatedCostUsd ?? 0) + (turn2.estimatedCostUsd ?? 0)
34123
- };
34295
+ const turn2 = await ctx.send(builder.buildRefineContinuation(ctx.input.outputPath));
34296
+ let totalCost = (turn1.estimatedCostUsd ?? 0) + (turn2.estimatedCostUsd ?? 0);
34297
+ let last = turn2;
34298
+ const missing = await readMissingVerbatimAcs(ctx.input);
34299
+ if (missing.length > 0) {
34300
+ getSafeLogger()?.info("plan", "Refine dropped [verbatim] spec ACs \u2014 issuing one repair turn", {
34301
+ featureName: ctx.input.featureName,
34302
+ missingCount: missing.length
34303
+ });
34304
+ const turn3 = await ctx.send(builder.buildVerbatimRepair(missing, ctx.input.outputPath));
34305
+ totalCost += turn3.estimatedCostUsd ?? 0;
34306
+ last = turn3;
34307
+ }
34308
+ return { ...last, estimatedCostUsd: totalCost };
34124
34309
  },
34125
34310
  parse(output, input) {
34126
34311
  return validatePlanOutput(output, input.featureName, input.branchName);
34127
34312
  },
34128
- verify: async (parsed, _input, _ctx) => {
34129
- return validateRefinedPrd(parsed);
34313
+ verify: async (parsed, input, _ctx) => {
34314
+ const validated = validateRefinedPrd(parsed);
34315
+ warnOnDroppedVerbatimAcs(validated, input.specContent, input.featureName);
34316
+ return validated;
34130
34317
  },
34131
34318
  recover: async (input, ctx) => {
34132
34319
  const content = await ctx.readFile(input.outputPath);
@@ -35260,6 +35447,17 @@ function parseRefinementResponse(response, criteria) {
35260
35447
  return fallbackCriteria(criteria);
35261
35448
  }
35262
35449
  }
35450
+ function refinementWouldFallback(response) {
35451
+ if (!response || !response.trim())
35452
+ return true;
35453
+ try {
35454
+ const fromFence = extractJsonFromMarkdown(response);
35455
+ const cleaned = stripTrailingCommas(fromFence !== response ? fromFence : response);
35456
+ return !Array.isArray(JSON.parse(cleaned));
35457
+ } catch {
35458
+ return true;
35459
+ }
35460
+ }
35263
35461
  function fallbackCriteria(criteria, storyId = "") {
35264
35462
  return criteria.map((c) => ({
35265
35463
  original: c,
@@ -35275,6 +35473,7 @@ var acceptanceRefineOp;
35275
35473
  var init_acceptance_refine = __esm(() => {
35276
35474
  init_refinement();
35277
35475
  init_config();
35476
+ init_logger2();
35278
35477
  init_prompts();
35279
35478
  acceptanceRefineOp = {
35280
35479
  kind: "complete",
@@ -35297,6 +35496,9 @@ var init_acceptance_refine = __esm(() => {
35297
35496
  };
35298
35497
  },
35299
35498
  parse(output, input, _ctx) {
35499
+ if (refinementWouldFallback(output)) {
35500
+ getSafeLogger()?.warn("acceptance", "AC refinement returned no usable JSON \u2014 falling back to unrefined criteria", { storyId: input.storyId, criteriaCount: input.criteria.length, responseBytes: output?.length ?? 0 });
35501
+ }
35300
35502
  const items = parseRefinementResponse(output, input.criteria);
35301
35503
  return items.map((item) => ({ ...item, storyId: item.storyId || input.storyId }));
35302
35504
  }
@@ -35448,6 +35650,25 @@ function evaluateRepromptTrigger2(shape, input) {
35448
35650
  return { shouldReprompt: false };
35449
35651
  return { shouldReprompt: true, acDropped: dropped };
35450
35652
  }
35653
+ async function maybeRepromptForInspection2(turn, parsed, rawObject, ctx) {
35654
+ if (ctx.input.mode !== "ref")
35655
+ return null;
35656
+ if (ctx.input.semanticConfig.demandInspectionTrail === false)
35657
+ return null;
35658
+ if (!parsed.passed || parsed.findings.length !== 0)
35659
+ return null;
35660
+ if (hasInspectionTrail(rawObject))
35661
+ return null;
35662
+ const secondTurn = await ctx.send(ReviewPromptBuilder.demandInspection());
35663
+ const costUsd = (turn.estimatedCostUsd ?? 0) + (secondTurn.estimatedCostUsd ?? 0);
35664
+ const secondParsed = validateLLMShape(tryParseLLMJson(secondTurn.output));
35665
+ getSafeLogger()?.warn("review", "Semantic reviewer returned empty pass with no inspection trail \u2014 re-prompted", {
35666
+ storyId: ctx.input.story.id,
35667
+ event: "review.semantic.inspection_trail.reprompted",
35668
+ recovered: secondParsed !== null
35669
+ });
35670
+ return secondParsed ? { ...turn, output: secondTurn.output, estimatedCostUsd: costUsd } : { ...turn, estimatedCostUsd: costUsd };
35671
+ }
35451
35672
  async function performSemanticReground(turn, firstParsed, drops, ctx) {
35452
35673
  const threshold = ctx.input.blockingThreshold ?? "error";
35453
35674
  const acceptanceCriteria = ctx.input.story.acceptanceCriteria;
@@ -35568,9 +35789,13 @@ async function requoteBlockingFindings(findings, ctx) {
35568
35789
  }
35569
35790
  var FAIL_OPEN2, SEMANTIC_REQUOTE_RECOVERED_EVENT = "review.semantic.finding.requote_recovered", SEMANTIC_REQUOTE_FAILED_EVENT = "review.semantic.finding.requote_failed", DEFAULT_MAX_REQUOTES2 = 5, semanticReviewHopBody = async (initialPrompt, ctx) => {
35570
35791
  const turn = await ctx.sendWithParseRetry(initialPrompt);
35571
- const parsed = validateLLMShape(tryParseLLMJson(turn.output));
35792
+ const rawObject = tryParseLLMJson(turn.output);
35793
+ const parsed = validateLLMShape(rawObject);
35572
35794
  if (!parsed)
35573
35795
  return turn;
35796
+ const inspectionGuard = await maybeRepromptForInspection2(turn, parsed, rawObject, ctx);
35797
+ if (inspectionGuard)
35798
+ return inspectionGuard;
35574
35799
  const requoted = await requoteBlockingFindings(parsed.findings, ctx);
35575
35800
  if (requoted.changed) {
35576
35801
  const passed = !requoted.findings.some((finding) => isBlockingSeverity(finding.severity, ctx.input.blockingThreshold ?? "error"));
@@ -38925,6 +39150,7 @@ var init_operations = __esm(() => {
38925
39150
  init_call();
38926
39151
  init_plan();
38927
39152
  init_plan_refine();
39153
+ init_verbatim_warn();
38928
39154
  init_decompose2();
38929
39155
  init_build_hop_callback();
38930
39156
  init_classify_route();
@@ -41798,6 +42024,9 @@ Output ONLY the JSON object. Do not include markdown fences or explanation.`;
41798
42024
 
41799
42025
  Review the draft with a strict self-audit mindset. Re-read the codebase context and compare the PRD against it. Focus only on the issues below, then rewrite the PRD if needed.
41800
42026
 
42027
+ #### spec-ac-preservation
42028
+ Enumerate every acceptance criterion the spec states. Confirm each one appears in some story's acceptanceCriteria \u2014 never drop a spec AC during this audit. If an AC looks unsupported by the current codebase, keep it: the story may be adding that capability. Any AC the spec tags \`[verbatim]\` MUST appear character-for-character in an acceptanceCriteria entry \u2014 preserve every backtick-quoted command, file path, regex, and count exactly. If a \`[verbatim]\` AC is missing or altered, restore it verbatim.
42029
+
41801
42030
  #### ac-testable
41802
42031
  For each acceptance criterion, ask whether the assertion is observable through a return value, exception, log output, file content, or state change. If any AC is not directly testable, rewrite it so it is observable.
41803
42032
 
@@ -41810,7 +42039,7 @@ Check whether any sentence in any description contradicts an acceptance criterio
41810
42039
  #### codebase-fit
41811
42040
  For each story, verify:
41812
42041
  1. Proposed files, helpers, tests, dependencies, and implementation notes match the codebase context. Remove invented helpers, files, call sites, or dependencies unless the change clearly requires creating them.
41813
- 2. Each acceptance criterion's semantic meaning matches the spec's actual interface and data flow. Criteria that assert incorrect parameter semantics, wrong data flow, or behavior that contradicts the spec must be corrected or removed. Cross-check each AC against the spec's interface definitions, pseudocode, and design notes.
42042
+ 2. Each acceptance criterion's semantic meaning matches the spec's actual interface and data flow. Criteria that assert incorrect parameter semantics, wrong data flow, or behavior that contradicts the spec must be corrected. Never delete an AC that restates a spec AC \u2014 correct its wording to match the spec instead. Cross-check each AC against the spec's interface definitions, pseudocode, and design notes.
41814
42043
 
41815
42044
  #### contextfiles-spec-alignment
41816
42045
  For each story, compare contextFiles against files the spec explicitly lists as context (e.g., in "Context Files" sections). Ensure the most critical spec-recommended files are included, up to the 5-file limit. If a spec-recommended file is absent, add it (removing the least critical one if already at 5). Files the story will CREATE must not appear here.
@@ -41828,6 +42057,23 @@ If a story changes existing behavior, extracts a shared helper, extends an exist
41828
42057
  Check each story's title, description, scope, contextFiles, and acceptance criteria for internal consistency. If the story says a file or command is in scope anywhere else, do not list it as out of scope. If the title or acceptance criteria clearly include CLI, output, tests, or helper extraction work, the Scope section must reflect that accurately.
41829
42058
 
41830
42059
  Write the revised PRD to this file path: ${outputFilePath}
42060
+ Do not output the PRD in chat. After writing the file, reply with a brief text confirmation only.`;
42061
+ }
42062
+ buildVerbatimRepair(missingAcs, outputFilePath) {
42063
+ const list = missingAcs.map((ac) => `- ${ac}`).join(`
42064
+ `);
42065
+ return `Your revised PRD dropped or altered acceptance criteria the spec marked \`[verbatim]\`. These are load-bearing executable checks (greps, file-existence checks, regex/count assertions, or architectural invariants) and MUST survive character-for-character \u2014 paraphrasing destroys the verification mechanism.
42066
+
42067
+ The following \`[verbatim]\` spec acceptance criteria are missing or altered in the PRD:
42068
+
42069
+ ${list}
42070
+
42071
+ For each one:
42072
+ - Add it to the \`acceptanceCriteria\` array of the single most relevant user story.
42073
+ - Preserve every backtick-quoted command, file path, regex, and count exactly as written in the spec. Do not paraphrase, retag, split, or move them into a description.
42074
+ - Do not remove or weaken any acceptance criteria that are already correct.
42075
+
42076
+ Write the corrected PRD to this file path: ${outputFilePath}
41831
42077
  Do not output the PRD in chat. After writing the file, reply with a brief text confirmation only.`;
41832
42078
  }
41833
42079
  build(specContent, codebaseContext, outputFilePath, packages, packageDetails, projectProfile, proposers) {
@@ -58108,7 +58354,7 @@ var package_default;
58108
58354
  var init_package = __esm(() => {
58109
58355
  package_default = {
58110
58356
  name: "@nathapp/nax",
58111
- version: "0.68.2",
58357
+ version: "0.68.3",
58112
58358
  description: "AI Coding Agent Orchestrator \u2014 loops until done",
58113
58359
  type: "module",
58114
58360
  bin: {
@@ -58203,8 +58449,8 @@ var init_version = __esm(() => {
58203
58449
  NAX_VERSION = package_default.version;
58204
58450
  NAX_COMMIT = (() => {
58205
58451
  try {
58206
- if (/^[0-9a-f]{6,10}$/.test("27a81a5e"))
58207
- return "27a81a5e";
58452
+ if (/^[0-9a-f]{6,10}$/.test("a1007103"))
58453
+ return "a1007103";
58208
58454
  } catch {}
58209
58455
  try {
58210
58456
  const result = Bun.spawnSync(["git", "rev-parse", "--short", "HEAD"], {
@@ -60854,7 +61100,7 @@ function extractQuoteTriples(reason) {
60854
61100
  }
60855
61101
  return triples;
60856
61102
  }
60857
- function normalizeWs2(s) {
61103
+ function normalizeWs3(s) {
60858
61104
  return s.replace(/\s+/g, " ").trim();
60859
61105
  }
60860
61106
  async function verifyQuoteTriple(triple, workdir, deps = _quoteIntegrityDeps) {
@@ -60868,7 +61114,7 @@ async function verifyQuoteTriple(triple, workdir, deps = _quoteIntegrityDeps) {
60868
61114
  const end = Math.min(lines.length, triple.line + CONTEXT_LINES);
60869
61115
  const window2 = lines.slice(start, end).join(`
60870
61116
  `);
60871
- return normalizeWs2(window2).toLowerCase().includes(normalizeWs2(triple.quote).toLowerCase());
61117
+ return normalizeWs3(window2).toLowerCase().includes(normalizeWs3(triple.quote).toLowerCase());
60872
61118
  }
60873
61119
  async function verifyEscalationQuotes(reason, workdir, storyId, deps = _quoteIntegrityDeps) {
60874
61120
  const triples = extractQuoteTriples(reason);
@@ -95189,6 +95435,7 @@ class DebatePlanStrategy {
95189
95435
  });
95190
95436
  if (debateResult.outcome !== "failed" && debateResult.output) {
95191
95437
  const prd2 = validatePlanOutput(debateResult.output, ctx.options.feature, ctx.branchName);
95438
+ warnOnDroppedVerbatimAcs(prd2, ctx.specContent, ctx.options.feature);
95192
95439
  const withProject2 = { ...prd2, project: ctx.projectName };
95193
95440
  return _debatePlanDeps.writeOrRecoverPrd(ctx, withProject2);
95194
95441
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@nathapp/nax",
3
- "version": "0.68.2",
3
+ "version": "0.68.3",
4
4
  "description": "AI Coding Agent Orchestrator — loops until done",
5
5
  "type": "module",
6
6
  "bin": {