@nathapp/nax 0.20.0 → 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (233) hide show
  1. package/.claude/settings.json +15 -0
  2. package/.mcp.json +8 -0
  3. package/docs/20260304-review-nax.md +492 -0
  4. package/docs/ROADMAP.md +65 -18
  5. package/docs/adr/ADR-005-implementation-plan.md +655 -0
  6. package/docs/adr/ADR-005-pipeline-re-architecture.md +464 -0
  7. package/docs/specs/bug-039-orphan-processes.md +131 -0
  8. package/docs/specs/bug-040-review-rectification.md +82 -0
  9. package/docs/specs/bug-041-cross-story-test-isolation.md +88 -0
  10. package/docs/specs/bug-042-verifier-failure-capture.md +117 -0
  11. package/docs/specs/feat-010-smart-runner-git-history.md +96 -0
  12. package/docs/specs/feat-011-file-context-strategy.md +73 -0
  13. package/docs/specs/feat-012-tdd-writer-tier.md +79 -0
  14. package/docs/specs/feat-013-test-after-review.md +89 -0
  15. package/docs/specs/feat-014-heartbeat-observability.md +127 -0
  16. package/memory/topic/feat-010-baseref.md +28 -0
  17. package/memory/topic/feat-013-test-after-deprecation.md +22 -0
  18. package/nax/config.json +7 -4
  19. package/nax/features/bug-039-medium/prd.json +45 -0
  20. package/package.json +2 -2
  21. package/src/agents/claude.ts +109 -15
  22. package/src/config/types.ts +11 -0
  23. package/src/context/builder.ts +9 -1
  24. package/src/execution/dry-run.ts +81 -0
  25. package/src/execution/escalation/tier-outcome.ts +29 -44
  26. package/src/execution/executor-types.ts +65 -0
  27. package/src/execution/index.ts +0 -17
  28. package/src/execution/iteration-runner.ts +132 -0
  29. package/src/execution/lifecycle/index.ts +0 -1
  30. package/src/execution/lifecycle/run-regression.ts +5 -5
  31. package/src/execution/pipeline-result-handler.ts +51 -254
  32. package/src/execution/sequential-executor.ts +72 -315
  33. package/src/execution/story-selector.ts +75 -0
  34. package/src/pipeline/event-bus.ts +276 -0
  35. package/src/pipeline/runner.ts +51 -77
  36. package/src/pipeline/stages/autofix.ts +133 -0
  37. package/src/pipeline/stages/completion.ts +22 -30
  38. package/src/pipeline/stages/index.ts +30 -13
  39. package/src/pipeline/stages/rectify.ts +93 -0
  40. package/src/pipeline/stages/regression.ts +88 -0
  41. package/src/pipeline/stages/review.ts +19 -153
  42. package/src/pipeline/stages/verify.ts +19 -3
  43. package/src/pipeline/subscribers/hooks.ts +133 -0
  44. package/src/pipeline/subscribers/interaction.ts +68 -0
  45. package/src/pipeline/subscribers/reporters.ts +174 -0
  46. package/src/pipeline/types.ts +12 -1
  47. package/src/review/orchestrator.ts +105 -0
  48. package/src/review/runner.ts +39 -4
  49. package/src/routing/router.ts +3 -3
  50. package/src/routing/strategies/keyword.ts +5 -2
  51. package/src/routing/strategies/llm.ts +27 -1
  52. package/src/tdd/prompts.ts +1 -1
  53. package/src/utils/git.ts +49 -25
  54. package/src/verification/executor.ts +8 -2
  55. package/src/verification/index.ts +1 -1
  56. package/src/verification/orchestrator-types.ts +145 -0
  57. package/src/verification/orchestrator.ts +76 -0
  58. package/src/{execution/post-verify-rectification.ts → verification/rectification-loop.ts} +13 -20
  59. package/src/verification/{gate.ts → runners.ts} +17 -105
  60. package/src/verification/smart-runner.ts +6 -10
  61. package/src/verification/strategies/acceptance.ts +133 -0
  62. package/src/verification/strategies/regression.ts +90 -0
  63. package/src/verification/strategies/scoped.ts +123 -0
  64. package/test/COVERAGE-GAPS.md +333 -0
  65. package/test/{acceptance → e2e}/cm-003-default-view.test.ts +1 -0
  66. package/test/{integration/e2e.test.ts → e2e/plan-analyze-run.test.ts} +1 -0
  67. package/test/integration/{agent-validation.test.ts → cli/agent-validation.test.ts} +3 -3
  68. package/test/integration/{cli-config-default-edge-cases.test.ts → cli/cli-config-default-edge-cases.test.ts} +6 -5
  69. package/test/integration/{cli-config-default-view.test.ts → cli/cli-config-default-view.test.ts} +8 -7
  70. package/test/integration/{cli-config-diff.test.ts → cli/cli-config-diff.test.ts} +3 -2
  71. package/test/integration/{cli-config.test.ts → cli/cli-config.test.ts} +3 -2
  72. package/test/integration/{cli-diagnose.test.ts → cli/cli-diagnose.test.ts} +5 -4
  73. package/test/integration/{cli-logs.test.ts → cli/cli-logs.test.ts} +12 -3
  74. package/test/integration/{cli-plugins.test.ts → cli/cli-plugins.test.ts} +4 -3
  75. package/test/integration/{cli-precheck.test.ts → cli/cli-precheck.test.ts} +4 -3
  76. package/test/integration/{cli-run-headless.test.ts → cli/cli-run-headless.test.ts} +3 -2
  77. package/test/integration/{cli.test.ts → cli/cli.test.ts} +2 -1
  78. package/test/integration/{precheck-integration.test.ts → cli/precheck-integration.test.ts} +10 -9
  79. package/test/integration/{precheck-orchestrator.test.ts → cli/precheck-orchestrator.test.ts} +4 -3
  80. package/test/integration/{precheck.test.ts → cli/precheck.test.ts} +5 -4
  81. package/test/integration/{config-loader.test.ts → config/config-loader.test.ts} +2 -1
  82. package/test/integration/{config.test.ts → config/config.test.ts} +2 -2
  83. package/test/integration/config/merger.test.ts +1 -0
  84. package/test/integration/config/paths.test.ts +1 -0
  85. package/test/integration/{security-loader.test.ts → config/security-loader.test.ts} +2 -2
  86. package/test/integration/{context-integration.test.ts → context/context-integration.test.ts} +7 -6
  87. package/test/integration/{path-security.test.ts → context/context-path-security.test.ts} +2 -2
  88. package/test/integration/{context-provider-injection.test.ts → context/context-provider-injection.test.ts} +7 -6
  89. package/test/integration/{context-verification-integration.test.ts → context/context-verification-integration.test.ts} +5 -4
  90. package/test/integration/{s5-greenfield-fallback.test.ts → context/s5-greenfield-fallback.test.ts} +4 -3
  91. package/test/integration/{isolation.test.ts → execution/execution-isolation.test.ts} +1 -1
  92. package/test/integration/{execution.test.ts → execution/execution.test.ts} +8 -8
  93. package/test/integration/{parallel.test.ts → execution/parallel.test.ts} +2 -1
  94. package/test/integration/{prd-pause.test.ts → execution/prd-pause.test.ts} +2 -2
  95. package/test/integration/{prd-resolvers.test.ts → execution/prd-resolvers.test.ts} +3 -2
  96. package/test/integration/{progress.test.ts → execution/progress.test.ts} +1 -1
  97. package/test/integration/execution/runner-batching.test.ts +682 -0
  98. package/test/integration/{runner-config-plugins.test.ts → execution/runner-config-plugins.test.ts} +3 -2
  99. package/test/integration/execution/runner-escalation.test.ts +561 -0
  100. package/test/integration/{runner-fixes.test.ts → execution/runner-fixes.test.ts} +4 -3
  101. package/test/integration/{runner-plugin-integration.test.ts → execution/runner-plugin-integration.test.ts} +6 -5
  102. package/test/integration/execution/runner-queue-and-attempts.test.ts +476 -0
  103. package/test/integration/{status-file-integration.test.ts → execution/status-file-integration.test.ts} +9 -8
  104. package/test/integration/{status-file.test.ts → execution/status-file.test.ts} +3 -2
  105. package/test/integration/{status-writer.test.ts → execution/status-writer.test.ts} +5 -4
  106. package/test/integration/{story-id-in-events.test.ts → execution/story-id-in-events.test.ts} +9 -8
  107. package/test/integration/{interaction-chain-pipeline.test.ts → interaction/interaction-chain-pipeline.test.ts} +26 -14
  108. package/test/integration/{hooks.test.ts → pipeline/hooks.test.ts} +4 -2
  109. package/test/integration/{pipeline-acceptance.test.ts → pipeline/pipeline-acceptance.test.ts} +7 -6
  110. package/test/integration/{pipeline-events.test.ts → pipeline/pipeline-events.test.ts} +7 -6
  111. package/test/integration/{pipeline.test.ts → pipeline/pipeline.test.ts} +9 -7
  112. package/test/integration/{reporter-lifecycle.test.ts → pipeline/reporter-lifecycle.test.ts} +9 -7
  113. package/test/integration/{verify-stage.test.ts → pipeline/verify-stage.test.ts} +7 -5
  114. package/test/integration/{analyze-integration.test.ts → plan/analyze-integration.test.ts} +3 -2
  115. package/test/integration/{analyze-scanner.test.ts → plan/analyze-scanner.test.ts} +8 -7
  116. package/test/integration/{logger.test.ts → plan/logger.test.ts} +1 -1
  117. package/test/integration/{plan.test.ts → plan/plan.test.ts} +3 -3
  118. package/test/integration/plugins/config-integration.test.ts +1 -0
  119. package/test/integration/plugins/config-resolution.test.ts +1 -0
  120. package/test/integration/plugins/loader.test.ts +1 -0
  121. package/test/integration/plugins/{registry.test.ts → plugins-registry.test.ts} +1 -0
  122. package/test/integration/plugins/validator.test.ts +1 -0
  123. package/test/integration/{review-config-commands.test.ts → review/review-config-commands.test.ts} +4 -3
  124. package/test/integration/{review-config-schema.test.ts → review/review-config-schema.test.ts} +3 -2
  125. package/test/integration/{review-plugin-integration.test.ts → review/review-plugin-integration.test.ts} +5 -4
  126. package/test/integration/{review.test.ts → review/review.test.ts} +3 -2
  127. package/test/integration/routing/plugin-routing-advanced.test.ts +461 -0
  128. package/test/integration/{plugin-routing.test.ts → routing/plugin-routing-core.test.ts} +10 -404
  129. package/test/integration/{routing-stage-bug-021.test.ts → routing/routing-stage-bug-021.test.ts} +8 -7
  130. package/test/integration/{routing-stage-greenfield.test.ts → routing/routing-stage-greenfield.test.ts} +7 -6
  131. package/test/integration/{tdd-cleanup.test.ts → tdd/tdd-cleanup.test.ts} +1 -1
  132. package/test/integration/tdd/tdd-orchestrator-core.test.ts +565 -0
  133. package/test/integration/tdd/tdd-orchestrator-failureCategory.test.ts +355 -0
  134. package/test/integration/tdd/tdd-orchestrator-fallback.test.ts +311 -0
  135. package/test/integration/tdd/tdd-orchestrator-lite.test.ts +289 -0
  136. package/test/integration/tdd/tdd-orchestrator-prompts.test.ts +260 -0
  137. package/test/integration/tdd/tdd-orchestrator-verdict.test.ts +536 -0
  138. package/test/integration/tmp/headless-test/test.jsonl +30 -0
  139. package/test/integration/{test-scanner.test.ts → verification/test-scanner.test.ts} +1 -1
  140. package/test/integration/{verification-asset-check.test.ts → verification/verification-asset-check.test.ts} +3 -2
  141. package/test/unit/acceptance.test.ts +1 -0
  142. package/test/unit/agent-stderr-capture.test.ts +1 -0
  143. package/test/unit/agents/claude.test.ts +107 -0
  144. package/test/unit/analyze-classifier.test.ts +1 -0
  145. package/test/unit/auto-detect.test.ts +1 -0
  146. package/test/unit/cli-status.test.ts +1 -0
  147. package/test/unit/commands/common.test.ts +1 -0
  148. package/test/unit/commands/logs.test.ts +1 -0
  149. package/test/unit/commands/unlock.test.ts +1 -0
  150. package/test/unit/config/defaults.test.ts +1 -0
  151. package/test/unit/config/regression-gate-schema.test.ts +1 -0
  152. package/test/unit/config/smart-runner-flag.test.ts +1 -0
  153. package/test/unit/constitution-generators.test.ts +1 -0
  154. package/test/unit/constitution.test.ts +1 -0
  155. package/test/unit/context/context-autodetect.test.ts +297 -0
  156. package/test/unit/context/context-build.test.ts +575 -0
  157. package/test/unit/context/context-coverage.test.ts +236 -0
  158. package/test/unit/context/context-error.test.ts +93 -0
  159. package/test/unit/context/context-estimate-tokens.test.ts +201 -0
  160. package/test/unit/context/context-format.test.ts +302 -0
  161. package/test/unit/context/context-isolation.test.ts +267 -0
  162. package/test/unit/context/context-sort.test.ts +93 -0
  163. package/test/unit/context/context-story.test.ts +108 -0
  164. package/test/{context → unit/context}/prior-failures.test.ts +5 -4
  165. package/test/unit/context.test.ts +7 -3
  166. package/test/unit/crash-recovery.test.ts +1 -0
  167. package/test/unit/escalation.test.ts +1 -0
  168. package/test/unit/execution/lifecycle/run-completion.test.ts +1 -0
  169. package/test/unit/execution/lifecycle/run-regression.test.ts +2 -0
  170. package/test/{execution → unit/execution}/pid-registry.test.ts +2 -1
  171. package/test/{execution → unit/execution}/structured-failure.test.ts +3 -2
  172. package/test/unit/execution-logging-stderr.test.ts +1 -0
  173. package/test/unit/execution-stage.test.ts +1 -0
  174. package/test/unit/fix-generator.test.ts +1 -0
  175. package/test/unit/greenfield.test.ts +1 -0
  176. package/test/unit/interaction/human-review-trigger.test.ts +1 -0
  177. package/test/unit/interaction-network-failures.test.ts +1 -0
  178. package/test/unit/interaction-plugins.test.ts +1 -0
  179. package/test/unit/logging/formatter.test.ts +1 -0
  180. package/test/unit/merge.test.ts +1 -0
  181. package/test/unit/pipeline/event-bus.test.ts +105 -0
  182. package/test/unit/pipeline/routing-partial-override.test.ts +1 -0
  183. package/test/unit/pipeline/runner-retry.test.ts +89 -0
  184. package/test/unit/pipeline/stages/autofix.test.ts +97 -0
  185. package/test/unit/pipeline/stages/rectify.test.ts +101 -0
  186. package/test/unit/pipeline/stages/regression-stage.test.ts +69 -0
  187. package/test/unit/pipeline/stages/verify.test.ts +1 -0
  188. package/test/unit/pipeline/subscribers/hooks.test.ts +45 -0
  189. package/test/unit/pipeline/subscribers/interaction.test.ts +31 -0
  190. package/test/unit/pipeline/subscribers/reporters.test.ts +90 -0
  191. package/test/unit/pipeline/verify-smart-runner.test.ts +2 -1
  192. package/test/unit/prd-auto-default.test.ts +3 -2
  193. package/test/unit/prd-failure-category.test.ts +1 -0
  194. package/test/unit/prd-get-next-story.test.ts +1 -0
  195. package/test/unit/precheck-checks.test.ts +1 -0
  196. package/test/unit/precheck-story-size-gate.test.ts +1 -0
  197. package/test/unit/precheck-types.test.ts +1 -0
  198. package/test/unit/prompts.test.ts +1 -0
  199. package/test/unit/rectification.test.ts +2 -1
  200. package/test/unit/registry.test.ts +1 -0
  201. package/test/unit/routing/routing-stability.test.ts +2 -1
  202. package/test/unit/routing/strategies/llm.test.ts +251 -0
  203. package/test/unit/routing-advanced.test.ts +313 -0
  204. package/test/unit/routing-core.test.ts +341 -0
  205. package/test/unit/routing-strategies.test.ts +442 -0
  206. package/test/unit/storyid-events.test.ts +1 -0
  207. package/test/{ui → unit/ui}/tui-controls.test.ts +8 -7
  208. package/test/{ui → unit/ui}/tui-cost-and-pty.test.ts +4 -3
  209. package/test/{ui → unit/ui}/tui-layout.test.ts +5 -4
  210. package/test/{ui → unit/ui}/tui-stories.test.ts +5 -4
  211. package/test/unit/{isolation.test.ts → unit-isolation.test.ts} +1 -0
  212. package/test/unit/{helpers.test.ts → utils-helpers.test.ts} +1 -0
  213. package/test/unit/verdict.test.ts +1 -0
  214. package/test/unit/verification/orchestrator-types.test.ts +54 -0
  215. package/test/unit/verification/orchestrator.test.ts +66 -0
  216. package/test/unit/verification/smart-runner-config.test.ts +1 -0
  217. package/test/unit/verification/smart-runner-discovery.test.ts +8 -7
  218. package/test/unit/verification/strategies/acceptance.test.ts +33 -0
  219. package/test/unit/verification/strategies/regression.test.ts +87 -0
  220. package/test/unit/verification/strategies/scoped.test.ts +100 -0
  221. package/test/unit/worktree-manager.test.ts +1 -0
  222. package/src/execution/lifecycle/story-hooks.ts +0 -38
  223. package/src/execution/post-verify.ts +0 -193
  224. package/src/execution/rectification.ts +0 -13
  225. package/src/execution/verification.ts +0 -72
  226. package/test/integration/rectification-flow.test.ts +0 -512
  227. package/test/integration/runner.test.ts +0 -1679
  228. package/test/integration/tdd-orchestrator.test.ts +0 -1762
  229. package/test/unit/execution/post-verify-regression.test.ts +0 -362
  230. package/test/unit/execution/post-verify.test.ts +0 -236
  231. package/test/unit/routing.test.ts +0 -1039
  232. /package/test/{integration → helpers}/helpers.test.ts +0 -0
  233. /package/test/integration/worktree/{merge.test.ts → worktree-merge.test.ts} +0 -0
@@ -0,0 +1,88 @@
1
+ # BUG-041 — Cross-Story Test Isolation
2
+
3
+ **Status:** Won't Fix — superseded by FEAT-010 (baseRef tracking eliminates root cause)
4
+ **Target:** N/A
5
+ **Author:** Nax Dev
6
+ **Date:** 2026-03-06
7
+
8
+ ---
9
+
10
+ ## 1. Problem
11
+
12
+ **Scenario:**
13
+ 1. Story A touches `src/parser.ts`. Verify runs `test/unit/parser.test.ts` → 2 tests fail. Story A escalates.
14
+ 2. Story B touches `src/formatter.ts`. Smart runner also picks up `test/unit/parser.test.ts` (both changed since common base). Formatter tests pass, parser tests still fail (inherited from Story A).
15
+ 3. Story B is marked failed — its implementation was correct. It escalates needlessly.
16
+
17
+ **Root cause:** Verify has no memory of which test failures pre-existed before a story's session. All failures are attributed to the current story.
18
+
19
+ ---
20
+
21
+ ## 2. Root Cause
22
+
23
+ The verify stage runs tests and reports pass/fail with no concept of:
24
+ - Which tests were already failing before this story ran
25
+ - Whether a failure is "inherited" vs "introduced by this story"
26
+
27
+ ---
28
+
29
+ ## 3. Proposed Solution
30
+
31
+ ### 3.1 Baseline snapshot at story start
32
+
33
+ Before the agent session starts (same time as FEAT-010's `baseRef` capture), record which test files the smart runner would pick up for this story and which are already failing. Store as `story.inheritedFailures: string[]`.
34
+
35
+ ### 3.2 Verify: filter inherited failures
36
+
37
+ After running tests and parsing `TestFailure[]`:
38
+ - If ALL failures are in `inheritedFailures` files → return `{ action: "continue" }` with warning: *"Failures are pre-existing — not attributed to this story"*
39
+ - If ANY failure is in a new file → escalate normally
40
+
41
+ ### 3.3 Re-verify when source story resolves
42
+
43
+ When Story A eventually passes verify, clear its test files from downstream stories' `inheritedFailures` so they get re-evaluated on the next run.
44
+
45
+ ---
46
+
47
+ ## 4. Data Model Changes
48
+
49
+ ```typescript
50
+ // src/prd/types.ts
51
+ interface UserStory {
52
+ baseRef?: string; // from FEAT-010
53
+ inheritedFailures?: string[]; // NEW — test files already failing before this story
54
+ }
55
+ ```
56
+
57
+ ---
58
+
59
+ ## 5. Files Affected
60
+
61
+ | File | Change |
62
+ |---|---|
63
+ | `src/prd/types.ts` | Add `inheritedFailures?: string[]` to `UserStory` |
64
+ | `src/execution/sequential-executor.ts` | Capture `inheritedFailures` baseline before agent runs |
65
+ | `src/verification/smart-runner.ts` | Export `runBaselineCheck(testFiles, workdir)` helper |
66
+ | `src/pipeline/stages/verify.ts` | Filter inherited failures from escalation decision |
67
+ | `src/execution/lifecycle/run-regression.ts` | Clear inherited failures when source story passes |
68
+
69
+ ---
70
+
71
+ ## 6. Edge Cases
72
+
73
+ | Scenario | Handling |
74
+ |---|---|
75
+ | Baseline check times out | `inheritedFailures: []` — conservative, may incorrectly blame story but no false passes |
76
+ | Flaky inherited failure disappears | Story B's verify finds no inherited failures → correct attribution |
77
+ | ALL test files in `inheritedFailures` | Return `continue` with warning |
78
+ | First story in a run | No prior failures → `inheritedFailures: []` → normal behavior |
79
+ | Deferred regression gate | Runs after all stories pass — inherited failures expected to be resolved |
80
+
81
+ ---
82
+
83
+ ## 7. Test Plan
84
+
85
+ - Story B inherits Story A's failing test file → verify returns `continue` (not escalated)
86
+ - Story B introduces new failing test → escalated normally
87
+ - Story A passes → Story B's `inheritedFailures` cleared for next run
88
+ - Baseline check timeout → `inheritedFailures: []` → conservative
@@ -0,0 +1,117 @@
1
+ # BUG-042 — Verifier Test Failure Capture
2
+
3
+ **Status:** Proposal
4
+ **Target:** v0.21.0
5
+ **Author:** Nax Dev
6
+ **Date:** 2026-03-06
7
+
8
+ ---
9
+
10
+ ## 1. Problem
11
+
12
+ The deferred regression gate (`run-regression.ts`) calls `parseBunTestOutput()` → gets structured `TestFailure[]` (file, testName, error, stackTrace) → targeted rectification works well.
13
+
14
+ The per-story verify stage (`verify.ts`) does NOT call `parseBunTestOutput()` on failure → passes raw output string to rectification → agent receives a wall of text and must parse it mentally.
15
+
16
+ **Same failure, two different agent experiences:**
17
+
18
+ | Path | Agent gets | Quality |
19
+ |---|---|---|
20
+ | Deferred regression | Structured `TestFailure[]` | ✅ Precise context |
21
+ | Per-story verify | Raw output (last 20 lines) | ⚠️ Noisy, may miss root cause |
22
+
23
+ ---
24
+
25
+ ## 2. Current vs Proposed Data Flow
26
+
27
+ **Current:**
28
+ ```
29
+ verify.ts → runVerification() → { success: false, output: "...raw..." }
30
+ → rectification: testOutput = raw string
31
+ → priorFailures[].testFailures = undefined
32
+ → agent prompt: wall of text
33
+ ```
34
+
35
+ **Proposed:**
36
+ ```
37
+ verify.ts → runVerification() → { success: false, output: "...raw..." }
38
+ → parseBunTestOutput(output) → TestFailure[]
39
+ → VerificationResult.failures = TestFailure[]
40
+ → rectification: testOutput + structured failures
41
+ → priorFailures[].testFailures = TestFailure[]
42
+ → agent prompt: structured failure table
43
+ ```
44
+
45
+ ---
46
+
47
+ ## 3. Code Changes
48
+
49
+ **`src/verification/types.ts`** — add failures field:
50
+ ```typescript
51
+ interface VerificationResult {
52
+ success: boolean;
53
+ output?: string;
54
+ status: "SUCCESS" | "TEST_FAILURE" | "TIMEOUT" | "ERROR";
55
+ passCount?: number;
56
+ failCount?: number;
57
+ failures?: TestFailure[]; // NEW
58
+ }
59
+ ```
60
+
61
+ **`src/pipeline/stages/verify.ts`** — parse on failure:
62
+ ```typescript
63
+ // Add to _verifyDeps:
64
+ export const _verifyDeps = {
65
+ regression,
66
+ parseBunTestOutput, // NEW — injectable for tests
67
+ };
68
+
69
+ // After runVerification() failure:
70
+ if (!result.success && result.output) {
71
+ result.failures = _verifyDeps.parseBunTestOutput(result.output).failures;
72
+ }
73
+ ```
74
+
75
+ **Structured log** — replace last-20-lines with failure summary:
76
+ ```typescript
77
+ // Current: logger.warn("verify", "Test failures", { output: last20lines });
78
+ // Proposed:
79
+ for (const f of (result.failures ?? []).slice(0, 5)) {
80
+ logger.warn("verify", `FAIL: ${f.testName}`, { file: f.file, error: f.error });
81
+ }
82
+ ```
83
+
84
+ **`src/execution/post-verify-rectification.ts`** — populate `testFailures` in `StructuredFailure`:
85
+ ```typescript
86
+ const structuredFailure: StructuredFailure = {
87
+ // ...existing fields
88
+ testFailures: result.failures?.map(f => ({
89
+ file: f.file ?? "",
90
+ testName: f.testName,
91
+ error: f.error,
92
+ stackTrace: f.stackTrace ?? [],
93
+ })),
94
+ };
95
+ ```
96
+
97
+ ---
98
+
99
+ ## 4. Files Affected
100
+
101
+ | File | Change |
102
+ |---|---|
103
+ | `src/verification/types.ts` | Add `failures?: TestFailure[]` to `VerificationResult` |
104
+ | `src/pipeline/stages/verify.ts` | Call `parseBunTestOutput()` on failure; add to `_verifyDeps` |
105
+ | `src/execution/post-verify-rectification.ts` | Populate `testFailures` from `result.failures` |
106
+ | `src/execution/verification.ts` | Pass `failures` through if available |
107
+
108
+ ---
109
+
110
+ ## 5. Test Plan
111
+
112
+ - `verify.ts` test failure → `result.failures` populated with `TestFailure[]`
113
+ - `result.failures` forwarded to rectification loop and `priorFailures`
114
+ - Agent prompt includes structured failure table (via existing priorFailures formatter)
115
+ - `parseBunTestOutput` in `_verifyDeps` is mockable
116
+ - Empty/no output → `result.failures = []` (no crash)
117
+ - Timeout → `result.failures` not set (timeout ≠ test failure)
@@ -0,0 +1,96 @@
1
+ # FEAT-010 — Smart Test Runner: Git-History Mode
2
+
3
+ **Status:** Proposal
4
+ **Target:** v0.21.0
5
+ **Author:** Nax Dev
6
+ **Date:** 2026-03-06
7
+
8
+ ---
9
+
10
+ ## 1. Problem with Current Approach
11
+
12
+ Smart Test Runner uses `git diff --name-only HEAD` (or `HEAD~1`) to find changed files. This breaks in several scenarios:
13
+
14
+ | Scenario | Problem |
15
+ |---|---|
16
+ | Agent makes 3 commits | `HEAD~1` only sees last commit; earlier changes missed |
17
+ | Agent uses `git commit --amend` | HEAD stays same; diff shows nothing |
18
+ | Uncommitted staged changes | Picks up unrelated staged changes |
19
+ | Story retried after partial commit | Baseline resets to wrong point |
20
+
21
+ Result: empty `[]` → full suite fallback (150s+) → deferred mode skips → no per-story tests.
22
+
23
+ ---
24
+
25
+ ## 2. Proposed Solution
26
+
27
+ Track a **baseCommitHash** per story at session start. On verify, diff `HEAD` vs `baseCommitHash` — exact files the agent touched regardless of commit count.
28
+
29
+ ```
30
+ Story starts → capture git HEAD → store as story.baseRef
31
+ Agent runs → makes N commits (any pattern)
32
+ Verify runs → git diff --name-only story.baseRef HEAD → precise file list
33
+ ```
34
+
35
+ ---
36
+
37
+ ## 3. Implementation Details
38
+
39
+ **Capture baseRef** in `sequential-executor.ts` before agent launch:
40
+ ```typescript
41
+ story.baseRef = await captureGitRef(workdir); // already exists in utils/git.ts
42
+ await savePrd(prd, prdPath);
43
+ ```
44
+
45
+ **New mode branch** in `smart-runner.ts`:
46
+ ```typescript
47
+ if (mode === "git-history" && story?.baseRef) {
48
+ return gitWithTimeout(["diff", "--name-only", story.baseRef, "HEAD"], workdir);
49
+ }
50
+ // fallback: existing git-diff logic
51
+ ```
52
+
53
+ ---
54
+
55
+ ## 4. Files Affected
56
+
57
+ | File | Change |
58
+ |---|---|
59
+ | `src/prd/types.ts` | Add `baseRef?: string` to `UserStory` |
60
+ | `src/execution/sequential-executor.ts` | Capture `baseRef` before agent, persist to PRD |
61
+ | `src/verification/smart-runner.ts` | Add `"git-history"` mode |
62
+ | `src/config/schemas.ts` | Add `smartTestRunner.mode: "git-diff" | "git-history"` |
63
+ | `src/config/types.ts` | Add `mode` to `SmartTestRunnerConfig` |
64
+
65
+ ---
66
+
67
+ ## 5. Config Changes
68
+
69
+ ```jsonc
70
+ {
71
+ "execution": {
72
+ "smartTestRunnerConfig": {
73
+ "mode": "git-history", // "git-diff" (default) | "git-history"
74
+ "enabled": true
75
+ }
76
+ }
77
+ }
78
+ ```
79
+
80
+ ---
81
+
82
+ ## 6. Migration / Compatibility
83
+
84
+ - Default: `"git-diff"` — no behavior change
85
+ - `"git-history"` opt-in
86
+ - Missing `story.baseRef` → falls back to `"git-diff"` (no crash)
87
+ - nax self-dev config should switch to `"git-history"` immediately
88
+
89
+ ---
90
+
91
+ ## 7. Test Plan
92
+
93
+ - `baseRef` captured and persisted before agent runs
94
+ - Multi-commit session: all files detected (not just last commit's)
95
+ - Missing `baseRef` → graceful fallback to `"git-diff"`
96
+ - `captureGitRef()` failure → `baseRef` undefined, fallback used
@@ -0,0 +1,73 @@
1
+ # FEAT-011 — File Context Strategy
2
+
3
+ **Status:** Proposal
4
+ **Target:** v0.21.0
5
+ **Author:** Nax Dev
6
+ **Date:** 2026-03-06
7
+
8
+ ---
9
+
10
+ ## 1. Problem
11
+
12
+ nax injects full file content into agent prompts for all relevant source files. For large files (500+ lines), this bloats the context window — increasing cost and reducing focus. The agent has tool access to read files directly, making full content injection for large files redundant.
13
+
14
+ ---
15
+
16
+ ## 2. Proposed Config
17
+
18
+ ```jsonc
19
+ {
20
+ "context": {
21
+ "fileContext": {
22
+ "strategy": "auto", // "auto" | "full" | "path-only"
23
+ "maxInlineLines": 500, // threshold for "auto" mode
24
+ "previewLines": 20 // lines shown in path-only / large-file preview
25
+ }
26
+ }
27
+ }
28
+ ```
29
+
30
+ ---
31
+
32
+ ## 3. Injection Logic
33
+
34
+ | Strategy | Condition | Agent receives |
35
+ |---|---|---|
36
+ | `"full"` | always | Complete file content |
37
+ | `"path-only"` | always | Relative path + line count only |
38
+ | `"auto"` | file ≤ `maxInlineLines` | Complete file content |
39
+ | `"auto"` | file > `maxInlineLines` | Path + line count + first `previewLines` lines |
40
+
41
+ **Large file preview format:**
42
+ ```
43
+ // src/execution/sequential-executor.ts (847 lines — use Read tool for full content)
44
+ import { ... } from "...";
45
+ // first 20 lines...
46
+ ```
47
+
48
+ ---
49
+
50
+ ## 4. Files Affected
51
+
52
+ | File | Change |
53
+ |---|---|
54
+ | `src/config/schemas.ts` | Add `context.fileContext` schema |
55
+ | `src/config/types.ts` | Add `FileContextConfig` interface |
56
+ | `src/context/builder.ts` | Apply strategy when injecting file content |
57
+ | `src/context/providers/` | Update providers that inject raw file content |
58
+
59
+ ---
60
+
61
+ ## 5. Cost Impact
62
+
63
+ Primary benefit is **quality** (more focused context), not raw cost savings. Rough estimate for a typical 5-story run: ~3000 tokens saved if avg file is 800 lines. At sonnet pricing: <$0.01 per run — marginal, but compounds.
64
+
65
+ ---
66
+
67
+ ## 6. Test Plan
68
+
69
+ - `strategy: "full"` → always full content regardless of line count
70
+ - `strategy: "path-only"` → always path + count only
71
+ - `strategy: "auto"`, 300-line file → full content
72
+ - `strategy: "auto"`, 600-line file → path + 20-line preview
73
+ - Default: `"auto"` with `maxInlineLines: 500`
@@ -0,0 +1,79 @@
1
+ # FEAT-012 — TDD Test Writer Tier Validation
2
+
3
+ **Status:** Won't Fix — balanced tier is sufficient for test-writer; not worth the added complexity
4
+ **Target:** v0.21.0
5
+ **Author:** Nax Dev
6
+ **Date:** 2026-03-06
7
+
8
+ ---
9
+
10
+ ## 1. Problem
11
+
12
+ nax TDD runs two sessions: **testWriter** then **implementer**. The testWriter tier is configured separately (`tdd.sessionTiers.testWriter`, default `"balanced"`). The implementer uses the story's routed `modelTier`.
13
+
14
+ **Risk:** If testWriter runs `"fast"` and the implementer runs `"powerful"`, the tests written may be too shallow — they test happy paths but miss edge cases a powerful model's implementation handles. Result: powerful implementer writes sophisticated code, all tests pass (trivially), then the deferred regression gate catches real failures.
15
+
16
+ ---
17
+
18
+ ## 2. Tier Ordering
19
+
20
+ ```
21
+ fast (1) < balanced (2) < powerful (3)
22
+ ```
23
+
24
+ **Invariant:** `testWriterTier >= implementerTier`
25
+
26
+ ---
27
+
28
+ ## 3. Validation Logic
29
+
30
+ In `src/tdd/session-runner.ts` before launching testWriter:
31
+
32
+ ```typescript
33
+ const tierOrder = { fast: 1, balanced: 2, powerful: 3 };
34
+ const writerTier = config.tdd.sessionTiers?.testWriter ?? "balanced";
35
+ const implementerTier = story.routing.modelTier ?? "balanced";
36
+
37
+ if (tierOrder[writerTier] < tierOrder[implementerTier]) {
38
+ if (config.tdd.enforceWriterTierParity) {
39
+ effectiveWriterTier = implementerTier; // auto-elevate
40
+ logger.warn("tdd", `Auto-elevated testWriter tier ${writerTier} → ${implementerTier}`);
41
+ } else {
42
+ logger.warn("tdd", `testWriter tier (${writerTier}) < implementer tier (${implementerTier}) — tests may be shallow`);
43
+ }
44
+ }
45
+ ```
46
+
47
+ ---
48
+
49
+ ## 4. Config Changes
50
+
51
+ ```jsonc
52
+ {
53
+ "tdd": {
54
+ "sessionTiers": { "testWriter": "balanced", "verifier": "fast" },
55
+ "enforceWriterTierParity": false // NEW — auto-elevates testWriter when true
56
+ }
57
+ }
58
+ ```
59
+
60
+ `nax config --explain`: *"testWriter tier should be ≥ implementer tier. Enable enforceWriterTierParity to auto-elevate."*
61
+
62
+ ---
63
+
64
+ ## 5. Files Affected
65
+
66
+ | File | Change |
67
+ |---|---|
68
+ | `src/tdd/session-runner.ts` | Tier comparison + warn/elevate logic |
69
+ | `src/config/schemas.ts` | Add `tdd.enforceWriterTierParity` (boolean, default false) |
70
+ | `src/config/types.ts` | Add `enforceWriterTierParity` to `TddConfig` |
71
+ | `src/config/defaults.ts` | Default: `false` |
72
+
73
+ ---
74
+
75
+ ## 6. Test Plan
76
+
77
+ - `writerTier < implementerTier`, `enforceWriterTierParity: false` → warning logged, tier unchanged
78
+ - `writerTier < implementerTier`, `enforceWriterTierParity: true` → tier elevated, warning logged
79
+ - `writerTier >= implementerTier` → no warning, no change
@@ -0,0 +1,89 @@
1
+ # FEAT-013 — Test-After Strategy Review & Deprecation Path
2
+
3
+ **Status:** Proposal
4
+ **Target:** v0.21.0
5
+ **Author:** Nax Dev
6
+ **Date:** 2026-03-06
7
+
8
+ ---
9
+
10
+ ## 1. Problem with `test-after`
11
+
12
+ `test-after` runs the agent in a single session: implement first, then write tests. Structural problem: **the agent writes tests to match its own (possibly broken) implementation.** Tests confirm buggy behavior rather than guarding against it.
13
+
14
+ ---
15
+
16
+ ## 2. Strategy Comparison
17
+
18
+ | Strategy | Order | Sessions | Quality | Risk |
19
+ |---|---|---|---|---|
20
+ | `tdd-lite` | Tests → Impl | 2 | ✅ High | Low |
21
+ | `three-session-tdd` | Tests → Impl → Verify | 3 | ✅✅ Highest | Very low |
22
+ | `test-after` | Impl → Tests | 1 | ⚠️ Variable | High — tests may confirm bugs |
23
+
24
+ ---
25
+
26
+ ## 3. Proposed Changes
27
+
28
+ ### 3.1 Post-write isolation verify (opt-in fix)
29
+
30
+ After agent's session completes, run new test files against a clean stash of the implementation — tests should **fail** without the implementation (proving they actually test something):
31
+
32
+ ```
33
+ 1. Agent writes impl + tests
34
+ 2. git stash (hide impl changes)
35
+ 3. Run new test files → should FAIL (no impl)
36
+ 4. git stash pop
37
+ 5. If tests PASSED in step 3 → escalate ("trivially passing tests")
38
+ 6. Normal verify (impl + tests together)
39
+ ```
40
+
41
+ Config: `tdd.testAfterIsolationVerify: true` (default: false)
42
+
43
+ ### 3.2 Remove from auto-routing
44
+
45
+ LLM router and keyword router no longer auto-assign `test-after`. It only runs when:
46
+ - Explicitly set in PRD (`testStrategy: "test-after"`)
47
+ - OR `execution.allowTestAfter: true` and router returns it
48
+
49
+ ### 3.3 Warning in `nax config --explain`
50
+
51
+ ### 3.4 Config gate
52
+
53
+ ```jsonc
54
+ {
55
+ "execution": { "allowTestAfter": true }, // NEW — false blocks test-after
56
+ "tdd": { "testAfterIsolationVerify": false } // NEW — opt-in isolation check
57
+ }
58
+ ```
59
+
60
+ ---
61
+
62
+ ## 4. Migration Path
63
+
64
+ | Version | Change |
65
+ |---|---|
66
+ | v0.21.0 | Warning in --explain. Remove from auto-routing. Add `allowTestAfter` config. |
67
+ | v0.22.0 | `allowTestAfter` default → `false`. Explicit opt-in required. |
68
+ | v0.23.0+ | Evaluate full removal. |
69
+
70
+ ---
71
+
72
+ ## 5. Files Affected
73
+
74
+ | File | Change |
75
+ |---|---|
76
+ | `src/routing/strategies/llm.ts` | Remove `test-after` from auto-assignable set |
77
+ | `src/routing/strategies/keyword.ts` | Remove `test-after` from auto-assignable set |
78
+ | `src/tdd/session-runner.ts` | Add isolation verify step for `test-after` |
79
+ | `src/config/schemas.ts` | Add `execution.allowTestAfter`, `tdd.testAfterIsolationVerify` |
80
+ | `src/cli/config.ts` | Add warning in `--explain` for `test-after` |
81
+
82
+ ---
83
+
84
+ ## 6. Test Plan
85
+
86
+ - `allowTestAfter: false` + router selects `test-after` → fallback to `tdd-lite` + warning
87
+ - `testAfterIsolationVerify: true` + tests pass on clean stash → escalate
88
+ - `testAfterIsolationVerify: true` + tests fail on clean stash → normal (tests are genuine)
89
+ - LLM router no longer returns `test-after` in auto-routing
@@ -0,0 +1,127 @@
1
+ # FEAT-014 — Structured Log & Heartbeat
2
+
3
+ **Status:** Proposal
4
+ **Target:** v0.21.0
5
+ **Author:** Nax Dev
6
+ **Date:** 2026-03-06
7
+
8
+ ---
9
+
10
+ ## 1. Problem
11
+
12
+ nax runs take 30–120 minutes for multi-story features with no "where are we?" view:
13
+ - `nax status` shows last known state (stale, no stage detail)
14
+ - `nax logs --follow` is raw JSONL event stream (too noisy)
15
+
16
+ Users have no visibility into current story, current stage, elapsed time, cost, or pass/fail counts during a run.
17
+
18
+ ---
19
+
20
+ ## 2. Heartbeat Data Model
21
+
22
+ ```typescript
23
+ // src/events/types.ts
24
+ interface RunHeartbeat {
25
+ type: "run.heartbeat";
26
+ timestamp: string;
27
+ runId: string;
28
+ elapsedSeconds: number;
29
+ currentStory: {
30
+ id: string;
31
+ title: string;
32
+ status: string;
33
+ currentStage: string; // "routing" | "execution" | "verify" | "review" | "completion"
34
+ stageElapsedSeconds: number;
35
+ attempts: number;
36
+ modelTier: string;
37
+ } | null; // null between stories (e.g. deferred regression)
38
+ storyCounts: {
39
+ total: number;
40
+ passed: number;
41
+ failed: number;
42
+ pending: number;
43
+ running: number;
44
+ };
45
+ estimatedCostUsd: number;
46
+ lastActivityAt: string;
47
+ }
48
+ ```
49
+
50
+ ---
51
+
52
+ ## 3. Implementation Plan
53
+
54
+ **Heartbeat emitter** (`runner.ts`):
55
+ ```typescript
56
+ const intervalSec = config.logging?.heartbeatIntervalSeconds ?? 30;
57
+ if (intervalSec > 0) {
58
+ const id = setInterval(async () => {
59
+ const hb = buildHeartbeat(runState);
60
+ emitEvent("run.heartbeat", hb);
61
+ await statusWriter.writeHeartbeat(hb);
62
+ }, intervalSec * 1000);
63
+ runCleanup(() => clearInterval(id));
64
+ }
65
+ ```
66
+
67
+ **Stage transition events** — each pipeline stage emits:
68
+ ```typescript
69
+ emitEvent("stage.enter", { storyId, stage: "verify", timestamp });
70
+ // ... logic ...
71
+ emitEvent("stage.exit", { storyId, stage: "verify", result: action, durationMs });
72
+ ```
73
+
74
+ ---
75
+
76
+ ## 4. CLI Changes
77
+
78
+ **`nax status`** (extended output):
79
+ ```
80
+ ┌─ Run Status ──────────────────────────────────────────────────┐
81
+ │ Feature: verify-v2 Elapsed: 12m 34s Cost: $0.42 │
82
+ │ Stories: ✅ 2 passed ❌ 0 failed ⏳ 3 pending │
83
+ ├─ Current Story ───────────────────────────────────────────────┤
84
+ │ US-003: Smart test runner baseline tracking │
85
+ │ Stage: execution (fast tier, attempt 1) — 2m 18s in stage │
86
+ └───────────────────────────────────────────────────────────────┘
87
+ ```
88
+
89
+ **`nax logs --follow --heartbeat`** — filter to heartbeat-only lines (progress bar style, replaces previous line).
90
+
91
+ ---
92
+
93
+ ## 5. Files Affected
94
+
95
+ | File | Change |
96
+ |---|---|
97
+ | `src/execution/runner.ts` | Add heartbeat `setInterval`, clear on cleanup |
98
+ | `src/events/types.ts` | Add `RunHeartbeat` interface |
99
+ | `src/execution/status-writer.ts` | Add `writeHeartbeat()` method |
100
+ | `src/pipeline/stages/*.ts` | Emit `stage.enter` / `stage.exit` |
101
+ | `src/cli/status.ts` | Render heartbeat table from `status.json` |
102
+ | `src/cli/logs.ts` | Add `--heartbeat` filter flag |
103
+ | `src/config/schemas.ts` | Add `logging.heartbeatIntervalSeconds` |
104
+ | `src/config/types.ts` | Add `LoggingConfig` interface |
105
+
106
+ ---
107
+
108
+ ## 6. Config Changes
109
+
110
+ ```jsonc
111
+ {
112
+ "logging": {
113
+ "heartbeatIntervalSeconds": 30 // 0 = disabled
114
+ }
115
+ }
116
+ ```
117
+
118
+ ---
119
+
120
+ ## 7. Test Plan
121
+
122
+ - Heartbeat emitted every N seconds (mock `setInterval`)
123
+ - Heartbeat written to `status.json`
124
+ - `stage.enter` / `stage.exit` emitted by each pipeline stage
125
+ - `heartbeatIntervalSeconds: 0` → no interval, no events
126
+ - Interval cleared on run completion (no leak)
127
+ - `nax status` renders table when `status.json` has `heartbeat` field
@@ -0,0 +1,28 @@
1
+ # FEAT-010 — baseRef Tracking Design Decision
2
+
3
+ ## Decision
4
+ Capture `baseRef = current HEAD` **in-memory at each attempt start** (not stored in PRD).
5
+ Use `git diff <baseRef>..HEAD` in smart-runner instead of `HEAD~1`.
6
+
7
+ ## Why per-attempt, not per-story
8
+ - Story may retry after other stories have committed
9
+ - Storing in PRD: retry would use stale baseRef from first attempt → includes other stories' files ❌
10
+ - Capturing fresh per attempt: retry anchors to HEAD at that moment → only sees its own commits ✅
11
+
12
+ ## Why no cross-story pollution
13
+ - Story 1 retry baseRef = HEAD after stories 2+3 committed
14
+ - diff <baseRef>..HEAD = only story 1 retry's own commits
15
+ - Other stories' commits are BEFORE baseRef → excluded automatically
16
+
17
+ ## Flow
18
+ ```
19
+ attempt start → captureGitRef() → baseRef (in-memory)
20
+ agent runs → makes N commits
21
+ verify → getChangedSourceFiles(workdir, baseRef)
22
+ → git diff <baseRef>..HEAD
23
+ → only this attempt's changed files ✅
24
+ ```
25
+
26
+ ## Edge Cases
27
+ - Agent makes 0 commits → diff = empty → fallback to full suite (existing behavior)
28
+ - Partial commits on failure → next attempt captures new baseRef → clean isolation