@nathapp/nax 0.18.2 → 0.18.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/.claude/rules/01-project-conventions.md +34 -0
  2. package/.claude/rules/02-test-architecture.md +39 -0
  3. package/.claude/rules/03-test-writing.md +58 -0
  4. package/.claude/rules/04-forbidden-patterns.md +29 -0
  5. package/.githooks/pre-commit +13 -0
  6. package/.gitlab-ci.yml +11 -5
  7. package/CHANGELOG.md +9 -0
  8. package/CLAUDE.md +45 -122
  9. package/bun.lock +1 -1
  10. package/bunfig.toml +2 -1
  11. package/docker-compose.test.yml +15 -0
  12. package/docs/ROADMAP.md +83 -14
  13. package/docs/specs/verification-architecture-v2.md +343 -0
  14. package/nax/config.json +7 -7
  15. package/nax/features/v0.18.3-execution-reliability/prd.json +80 -0
  16. package/nax/features/v0.18.3-execution-reliability/progress.txt +3 -0
  17. package/package.json +2 -2
  18. package/src/config/defaults.ts +1 -0
  19. package/src/config/schema.ts +1 -0
  20. package/src/config/schemas.ts +26 -1
  21. package/src/config/types.ts +21 -4
  22. package/src/context/builder.ts +11 -0
  23. package/src/context/elements.ts +38 -1
  24. package/src/execution/escalation/tier-escalation.ts +28 -3
  25. package/src/execution/post-verify-rectification.ts +4 -2
  26. package/src/execution/post-verify.ts +102 -20
  27. package/src/execution/progress.ts +2 -0
  28. package/src/pipeline/stages/execution.ts +10 -2
  29. package/src/pipeline/stages/review.ts +5 -3
  30. package/src/pipeline/stages/routing.ts +28 -9
  31. package/src/pipeline/stages/verify.ts +49 -8
  32. package/src/prd/index.ts +16 -1
  33. package/src/prd/types.ts +33 -0
  34. package/src/routing/strategies/keyword.ts +7 -4
  35. package/src/routing/strategies/llm.ts +45 -4
  36. package/src/verification/gate.ts +2 -1
  37. package/src/verification/smart-runner.ts +68 -0
  38. package/src/verification/types.ts +2 -0
  39. package/test/context/prior-failures.test.ts +462 -0
  40. package/test/execution/structured-failure.test.ts +414 -0
  41. package/test/integration/logger.test.ts +1 -1
  42. package/test/{US-002-orchestrator.test.ts → integration/precheck-orchestrator.test.ts} +3 -3
  43. package/test/integration/review-plugin-integration.test.ts +2 -1
  44. package/test/integration/story-id-in-events.test.ts +1 -1
  45. package/test/unit/config/smart-runner-flag.test.ts +36 -12
  46. package/test/unit/execution/post-verify-regression.test.ts +415 -0
  47. package/test/{execution → unit/execution}/post-verify.test.ts +33 -1
  48. package/test/unit/pipeline/routing-partial-override.test.ts +15 -36
  49. package/test/unit/pipeline/verify-smart-runner.test.ts +8 -6
  50. package/test/unit/prd-get-next-story.test.ts +28 -0
  51. package/test/unit/routing/routing-stability.test.ts +207 -0
  52. package/test/unit/routing.test.ts +102 -0
  53. package/test/unit/storyid-events.test.ts +20 -32
  54. package/test/unit/verification/smart-runner-config.test.ts +162 -0
  55. package/test/unit/verification/smart-runner-discovery.test.ts +353 -0
  56. package/test/TEST_COVERAGE_US001.md +0 -217
  57. package/test/TEST_COVERAGE_US003.md +0 -84
  58. package/test/TEST_COVERAGE_US005.md +0 -86
@@ -0,0 +1,343 @@
1
+ # Verification Architecture v2
2
+
3
+ **Status:** Proposal
4
+ **Target:** v0.19.0
5
+ **Author:** Nax Dev
6
+ **Date:** 2026-03-04
7
+ **Fixes:** BUG-026, BUG-028, plus architectural debt in verification pipeline
8
+
9
+ ---
10
+
11
+ ## 1. Problems with Current Architecture
12
+
13
+ ### 1.1 Triple Test Execution (Waste)
14
+
15
+ Current per-story flow runs tests up to 3 times:
16
+
17
+ ```
18
+ Pipeline verify stage → scoped tests (Smart Test Runner)
19
+ Pipeline review stage → test command (if review.commands.test configured)
20
+ Post-verify → scoped tests AGAIN + full regression gate
21
+ ```
22
+
23
+ On Mac01 with ~2000 tests, this means:
24
+ - Scoped: ~10-20s × 2 (duplicate) = 20-40s wasted
25
+ - Full regression: ~125s per story
26
+ - Total: ~150s+ of test execution per story
27
+
28
+ ### 1.2 Regression Gate Per Story (BUG-026)
29
+
30
+ The regression gate runs a **full test suite after every story**. Problems:
31
+ - **Timeout:** Full suite frequently times out on Mac01 (~125s)
32
+ - **False escalation:** Timeout is treated as story failure → bumps `story.attempts` → triggers tier escalation
33
+ - **Wasted compute:** Agent's implementation was correct (scoped tests passed), but full suite timeout causes a complete redo at a higher (more expensive) tier
34
+ - **Cascading waste:** N stories × 1 full suite each = N full suite runs. Most are redundant.
35
+
36
+ ### 1.3 Escalation Context Loss
37
+
38
+ When a story fails and escalates to a higher tier, the error context passed is:
39
+
40
+ ```
41
+ priorErrors: ["Attempt 1 failed with model tier: fast"]
42
+ ```
43
+
44
+ The actual test output — which tests failed, error messages, stack traces — is **discarded**. The escalated agent gets a vague hint instead of actionable failure context.
45
+
46
+ | Stage | Context Available | What's Stored in priorErrors |
47
+ |-------|-------------------|------------------------------|
48
+ | Rectification loop | Full `TestFailure[]` with file, testName, error, stackTrace | *(used internally, then discarded)* |
49
+ | Post-verify failure | `verificationResult.error` (summary string) | Generic: `"Verification failed: TEST_FAILURE"` |
50
+ | Regression gate failure | Full test output | Generic: `"REGRESSION: full-suite regression detected"` |
51
+ | Tier escalation | Nothing new | `"Attempt N failed with model tier: X"` |
52
+
53
+ Result: `fast → balanced → powerful` escalation chain has **zero actionable context** about what actually failed.
54
+
55
+ ### 1.4 Routing Cache Ignores Escalation Tier (BUG-028)
56
+
57
+ LLM routing cache is keyed by `story.id` only. When escalation updates `story.routing.modelTier` from `balanced` → `powerful`, the next iteration hits the cache and returns the old `balanced` routing decision, overriding the escalation.
58
+
59
+ ---
60
+
61
+ ## 2. Proposed Architecture
62
+
63
+ ### 2.1 Verification Flow (Simplified)
64
+
65
+ ```
66
+ Pipeline per-story:
67
+ 1. Agent execution
68
+ 2. Scoped verify (Smart Test Runner) ← ONLY test run per story
69
+ 3. Scoped rectification (if verify fails) ← has full test failure context
70
+ 4. Review (typecheck + lint only) ← NO test re-run
71
+ 5. Story marked "passed" or escalated
72
+
73
+ Run-end (after all stories pass):
74
+ 6. Deferred regression gate (full suite) ← ONE full suite run total
75
+ 7. Targeted regression rectification ← per-story, with failure context
76
+ 8. Run marked complete or stalled
77
+ ```
78
+
79
+ **Key changes:**
80
+ - **Remove duplicate test runs** — pipeline verify is the single source of truth
81
+ - **Review stage runs typecheck + lint only** — no test command
82
+ - **Remove post-verify scoped re-test** — pipeline verify already did this
83
+ - **Move regression gate to run-end** — one full suite run instead of N
84
+ - **Targeted regression rectification** — map failing tests back to responsible stories
85
+
86
+ ### 2.2 Deferred Regression Gate
87
+
88
+ Instead of running the full suite after every story, run it **once** after all stories complete.
89
+
90
+ ```typescript
91
+ // New: src/execution/lifecycle/run-regression.ts
92
+
93
+ interface DeferredRegressionOptions {
94
+ config: NaxConfig;
95
+ workdir: string;
96
+ prd: PRD;
97
+ prdPath: string;
98
+ allStoryMetrics: StoryMetrics[];
99
+ }
100
+
101
+ interface DeferredRegressionResult {
102
+ passed: boolean;
103
+ failedTests?: TestFailure[];
104
+ storyMapping?: Map<string, TestFailure[]>; // storyId → failures caused by that story
105
+ }
106
+ ```
107
+
108
+ **Failure handling:**
109
+ 1. Run full suite
110
+ 2. Parse failures into `TestFailure[]`
111
+ 3. For each failing test, use reverse Smart Test Runner mapping:
112
+ - `test/unit/foo/bar.test.ts` → `src/foo/bar.ts` → which story touched this file? (from git log per story)
113
+ 4. Group failures by responsible story
114
+ 5. Attempt targeted rectification per story (agent gets FULL failure context)
115
+ 6. Re-run full suite to confirm fix
116
+ 7. If still failing → mark responsible stories as failed
117
+
118
+ **Config:**
119
+
120
+ ```jsonc
121
+ {
122
+ "execution": {
123
+ "regressionGate": {
124
+ "enabled": true,
125
+ "mode": "deferred", // "deferred" | "per-story" | "disabled"
126
+ "timeoutSeconds": 300,
127
+ "maxRectificationAttempts": 2
128
+ }
129
+ }
130
+ }
131
+ ```
132
+
133
+ ### 2.3 Structured Failure Context for Escalation
134
+
135
+ Replace vague `priorErrors` strings with structured failure data.
136
+
137
+ **New PRD field:** `priorFailures` (alongside existing `priorErrors` for backward compat)
138
+
139
+ ```typescript
140
+ // In src/prd/types.ts
141
+
142
+ interface StructuredFailure {
143
+ /** Which attempt this failure occurred on */
144
+ attempt: number;
145
+ /** Model tier that was used */
146
+ modelTier: string;
147
+ /** What stage failed */
148
+ stage: "verify" | "review" | "regression" | "rectification" | "agent-session";
149
+ /** Human-readable summary */
150
+ summary: string;
151
+ /** Structured test failures (if applicable) */
152
+ testFailures?: TestFailureContext[];
153
+ /** Timestamp */
154
+ timestamp: string;
155
+ }
156
+
157
+ interface TestFailureContext {
158
+ file: string;
159
+ testName: string;
160
+ error: string;
161
+ /** First 5 lines of stack trace */
162
+ stackTrace: string[];
163
+ }
164
+ ```
165
+
166
+ **How it flows through escalation:**
167
+
168
+ ```
169
+ fast attempt 1 → verify fails
170
+ → priorFailures: [{
171
+ attempt: 1,
172
+ modelTier: "fast",
173
+ stage: "verify",
174
+ summary: "3 tests failed in src/routing/router.ts",
175
+ testFailures: [
176
+ { file: "test/unit/routing/router.test.ts",
177
+ testName: "should route to balanced",
178
+ error: "Expected 'balanced' got 'fast'",
179
+ stackTrace: [...] },
180
+ ...
181
+ ]
182
+ }]
183
+
184
+ balanced attempt 1 → agent gets FULL context of what fast couldn't fix
185
+ ```
186
+
187
+ **Context injection** (`context/builder.ts`):
188
+
189
+ Format `priorFailures` into actionable markdown for the agent prompt:
190
+
191
+ ```markdown
192
+ ## Prior Attempt 1 (fast, verify)
193
+ 3 tests failed in src/routing/router.ts
194
+
195
+ ### Test Failures:
196
+ - **test/unit/routing/router.test.ts** > should route to balanced
197
+ Error: Expected 'balanced' got 'fast'
198
+ Stack: at Router.route (src/routing/router.ts:42)
199
+ ```
200
+
201
+ ### 2.4 BUG-028 Fix: Cache Invalidation on Escalation
202
+
203
+ Add `clearCacheForStory(storyId)` to `src/routing/strategies/llm.ts`.
204
+
205
+ Call it in `tier-escalation.ts` when updating `story.routing.modelTier`.
206
+
207
+ ---
208
+
209
+ ## 3. Migration Plan
210
+
211
+ ### Phase 1: v0.18.3 — Minimal Fixes (no architecture change)
212
+
213
+ 1. **BUG-026 quick fix:** Regression gate timeout → accept scoped pass + warn (not escalate)
214
+ 2. **BUG-028 fix:** `clearCacheForStory()` on escalation
215
+ 3. **Store structured failures:** Start populating `priorFailures` alongside `priorErrors` (backward compat)
216
+
217
+ ### Phase 2: v0.19.0 — Architecture v2
218
+
219
+ 1. **Remove post-verify duplicate test run** — pipeline verify is authoritative
220
+ 2. **Review stage: typecheck + lint only** — remove test command from review
221
+ 3. **Deferred regression gate** — run-end full suite with targeted rectification
222
+ 4. **Reverse Smart Test Runner mapping** — failing test → source file → responsible story
223
+ 5. **Full structured failure context** — `priorFailures` injected into agent prompts
224
+ 6. **Config:** `regressionGate.mode: "deferred"` (default)
225
+
226
+ ### Phase 3: Future
227
+
228
+ - **Incremental regression:** Only run tests related to ALL changed files across all stories (union of Smart Test Runner scopes)
229
+ - **Test impact analysis:** AST-based dependency graph for more precise test scoping
230
+ - **Parallel story regression:** Run rectification for multiple stories concurrently
231
+
232
+ ---
233
+
234
+ ## 4. Files Affected
235
+
236
+ ### Phase 1 (v0.18.3)
237
+
238
+ | File | Change |
239
+ |------|--------|
240
+ | `src/execution/post-verify.ts` | Regression gate timeout → accept + warn |
241
+ | `src/routing/strategies/llm.ts` | Add `clearCacheForStory()` export |
242
+ | `src/execution/escalation/tier-escalation.ts` | Call `clearCacheForStory()` on escalation |
243
+ | `src/execution/post-verify-rectification.ts` | Store `StructuredFailure` in `priorFailures` |
244
+ | `src/prd/types.ts` | Add `priorFailures?: StructuredFailure[]` to `UserStory` |
245
+
246
+ ### Phase 2 (v0.19.0)
247
+
248
+ | File | Change |
249
+ |------|--------|
250
+ | `src/pipeline/stages/review.ts` | Remove test command execution |
251
+ | `src/execution/post-verify.ts` | Remove scoped re-test, keep regression call only |
252
+ | `src/execution/lifecycle/run-regression.ts` | **New:** Deferred regression gate + targeted rectification |
253
+ | `src/execution/lifecycle/run-completion.ts` | Call deferred regression before final metrics |
254
+ | `src/verification/smart-runner.ts` | Add reverse mapping: test file → source file → story |
255
+ | `src/context/builder.ts` | Format `priorFailures` into agent prompt |
256
+ | `src/config/schemas.ts` | Add `regressionGate.mode` enum |
257
+
258
+ ---
259
+
260
+ ## 5. Test Plan
261
+
262
+ ### Phase 1 Tests
263
+ - Regression gate timeout returns "passed" with warning (not "failed")
264
+ - `clearCacheForStory()` removes cached decision; next route() re-evaluates
265
+ - `priorFailures` populated with structured `TestFailureContext` on verify failure
266
+ - Backward compat: `priorErrors` still populated alongside `priorFailures`
267
+
268
+ ### Phase 2 Tests
269
+ - Pipeline verify is single test execution (no duplicate)
270
+ - Review stage skips test command
271
+ - Deferred regression runs once at run-end
272
+ - Reverse mapping correctly identifies responsible story
273
+ - Targeted rectification receives full failure context
274
+ - Escalated agent prompt includes formatted `priorFailures`
275
+ - Config `regressionGate.mode: "per-story"` preserves current behavior
276
+
277
+ ---
278
+
279
+ ## 6. Historical Context (Why It's Like This)
280
+
281
+ ### Why post-verify exists separately from pipeline verify
282
+
283
+ The pipeline (`src/pipeline/pipeline.ts`) runs stages in sequence: routing → context → prompt → execution → **verify** → review → completion. This was the original single verification point.
284
+
285
+ Later, **post-agent verification** was added in `src/execution/pipeline-result-handler.ts` → `handlePipelineSuccess()` → `runPostAgentVerification()`. This was meant to handle:
286
+ - **Scoped verification** with git-diff-based test file detection (before Smart Test Runner existed in the pipeline)
287
+ - **Rectification** — retry loop with agent when tests fail
288
+ - **Regression gate** (BUG-009 fix) — full suite after scoped pass
289
+
290
+ When Smart Test Runner was added to the **pipeline verify stage** (v0.18.2), it duplicated the scoped test logic that post-verify already had. Nobody removed the post-verify scoped test.
291
+
292
+ ### Current code flow with exact locations
293
+
294
+ ```
295
+ sequential-executor.ts:170 → pipelineRunner.run(story)
296
+ pipeline.ts:execute() → runs stages in order:
297
+ verify.ts:execute() → Smart Test Runner scoped tests [TEST RUN #1]
298
+ review.ts:execute() → runReview() which may run tests [TEST RUN #2 if review.commands.test set]
299
+
300
+ pipeline-result-handler.ts:76 → runPostAgentVerification()
301
+ post-verify.ts:85 → runVerification(scopedCommand) [TEST RUN #3 — duplicate of #1]
302
+ post-verify.ts:118 → runRegressionGate()
303
+ post-verify.ts:180 → runVerification(fullSuite) [TEST RUN #4 — full suite]
304
+ ```
305
+
306
+ ### Review stage test command
307
+
308
+ `review.ts` calls `runReview()` from `src/review/index.ts` which runs `config.review.commands.test` if configured. In default config, `review.commands` includes `test`, `typecheck`, and `lint`. So yes — review runs tests by default, creating the triple-test problem.
309
+
310
+ ### Decision rationale
311
+
312
+ **Why deferred regression (Option C) over per-story (A) or disabled (B):**
313
+ - **Option A (keep per-story):** 125s timeout per story is the root cause of BUG-026. Even with timeout-acceptance, it's wasteful.
314
+ - **Option B (disable entirely):** Too risky — cross-story regressions are real (BUG-009 was filed for this exact reason).
315
+ - **Option C (deferred):** One full suite run at the end. If it fails, we can trace back to responsible stories via reverse file mapping. Best balance of safety vs speed.
316
+
317
+ **Why cache invalidation (Option C for BUG-028) over cache key change (A) or bypass (B):**
318
+ - **Option A (include tier in key):** Works but creates multiple cache entries per story. If story is re-routed 3 times, 3 entries exist. Cache eviction becomes unpredictable.
319
+ - **Option B (bypass when routing set):** Almost all stories have `story.routing` set after first pass, so cache would rarely be used at all — defeats the purpose.
320
+ - **Option C (clear on escalation):** Surgical — one `delete()` call at the exact moment routing changes. Cache works normally for non-escalated stories.
321
+
322
+ ## 7. Edge Cases
323
+
324
+ ### Partial completion (stalled run)
325
+
326
+ If only 3 of 5 stories pass and nax stalls (remaining stories failed/paused):
327
+ - Deferred regression still runs on the 3 passed stories
328
+ - If regression fails, only the passed stories are candidates for rectification
329
+ - Failed/paused stories are untouched
330
+
331
+ ### Stories that touch the same files
332
+
333
+ If story A and story B both modify `src/utils/parser.ts`:
334
+ - Reverse mapping may attribute the same failing test to both stories
335
+ - Rectification should try the **last story that touched the file** first (git log order)
336
+ - If that doesn't fix it, try the other story
337
+
338
+ ### No test mapping possible
339
+
340
+ If a failing test can't be mapped to any story's changed files:
341
+ - Log warning: "Unmapped regression — cannot attribute to a specific story"
342
+ - Mark ALL passed stories as needing re-verification
343
+ - This is the worst case but should be rare with good test naming conventions
package/nax/config.json CHANGED
@@ -63,18 +63,19 @@
63
63
  "verificationTimeoutSeconds": 300,
64
64
  "maxStoriesPerFeature": 15,
65
65
  "rectification": {
66
- "enabled": false,
66
+ "enabled": true,
67
67
  "maxRetries": 2,
68
68
  "fullSuiteTimeoutSeconds": 600,
69
69
  "maxFailureSummaryChars": 2000,
70
70
  "abortOnIncreasingFailures": true
71
71
  },
72
72
  "regressionGate": {
73
- "enabled": false
73
+ "enabled": false,
74
+ "timeoutSeconds": 300
74
75
  }
75
76
  },
76
77
  "quality": {
77
- "requireTypecheck": false,
78
+ "requireTypecheck": true,
78
79
  "requireLint": true,
79
80
  "requireTests": true,
80
81
  "commands": {
@@ -117,13 +118,12 @@
117
118
  "maxCodebaseSummaryTokens": 5000
118
119
  },
119
120
  "review": {
120
- "enabled": false,
121
+ "enabled": true,
121
122
  "checks": [
122
- "test",
123
+ "typecheck",
123
124
  "lint"
124
125
  ],
125
126
  "commands": {
126
- "test": "bun run test",
127
127
  "typecheck": "bun run typecheck",
128
128
  "lint": "bun run lint"
129
129
  }
@@ -147,4 +147,4 @@
147
147
  "scopeToStory": true
148
148
  }
149
149
  }
150
- }
150
+ }
@@ -0,0 +1,80 @@
1
+ {
2
+ "project": "nax",
3
+ "branchName": "feat/v0.18.3-execution-reliability",
4
+ "feature": "v0.18.3-execution-reliability",
5
+ "userStories": [
6
+ {
7
+ "id": "BUG-026",
8
+ "title": "Regression gate timeout accepts scoped pass instead of escalating",
9
+ "description": "In src/execution/post-verify.ts, the runRegressionGate() function returns 'failed' when the full suite times out. This causes revertStoriesOnFailure() to bump story.attempts and trigger tier escalation, even though scoped verification already passed. Fix: when regressionResult.status === 'TIMEOUT', return 'passed' with a warning log instead of 'failed'. The scoped Smart Test Runner already verified the story's changes. A timeout is not evidence of regression \u2014 only TEST_FAILURE is. Keep existing behavior for TEST_FAILURE (attempt rectification). Add config flag execution.regressionGate.acceptOnTimeout (default true).",
10
+ "complexity": 2,
11
+ "status": "passed",
12
+ "testStrategy": "test-after",
13
+ "attempts": 0,
14
+ "priorErrors": [
15
+ "Attempt 1 failed with model tier: fast"
16
+ ],
17
+ "escalations": [],
18
+ "dependencies": [],
19
+ "tags": [],
20
+ "acceptanceCriteria": [],
21
+ "storyPoints": 1,
22
+ "routing": {
23
+ "complexity": "simple",
24
+ "modelTier": "balanced",
25
+ "testStrategy": "three-session-tdd-lite",
26
+ "reasoning": "three-session-tdd-lite: strategy:lite"
27
+ },
28
+ "passes": true
29
+ },
30
+ {
31
+ "id": "BUG-028",
32
+ "title": "Clear LLM routing cache on tier escalation",
33
+ "description": "In src/routing/strategies/llm.ts, the routing cache is keyed by story.id only. When tier escalation updates story.routing.modelTier (e.g. balanced \u2192 powerful), the next iteration hits the cache and returns the old routing decision, overriding the escalation. Fix: add a clearCacheForStory(storyId: string) export to llm.ts that calls cachedDecisions.delete(storyId). Call clearCacheForStory() in src/execution/escalation/tier-escalation.ts in both escalation paths: (1) preIterationTierCheck() after updating story routing, (2) handleTierEscalation() after updating story routing. Write unit tests verifying: cache hit returns old decision, clearCacheForStory removes it, next route() call re-evaluates.",
34
+ "complexity": 2,
35
+ "status": "passed",
36
+ "testStrategy": "test-after",
37
+ "attempts": 0,
38
+ "priorErrors": [],
39
+ "escalations": [],
40
+ "dependencies": [],
41
+ "tags": [],
42
+ "acceptanceCriteria": [],
43
+ "storyPoints": 1,
44
+ "passes": true
45
+ },
46
+ {
47
+ "id": "SFC-001",
48
+ "title": "Add StructuredFailure type and populate priorFailures on verify failure",
49
+ "description": "Add structured failure context so escalated tiers know exactly what failed. Changes: (1) In src/prd/types.ts, add StructuredFailure interface with fields: attempt (number), modelTier (string), stage ('verify'|'review'|'regression'|'rectification'|'agent-session'), summary (string), testFailures (optional TestFailureContext[]), timestamp (string). Add TestFailureContext interface with fields: file (string), testName (string), error (string), stackTrace (string[]). Add priorFailures?: StructuredFailure[] to UserStory type. (2) In src/execution/post-verify.ts, when verification fails (both scoped and regression), build a StructuredFailure from the VerificationResult and parseBunTestOutput(), and push to story.priorFailures. (3) In src/execution/post-verify-rectification.ts revertStoriesOnFailure(), also populate priorFailures alongside existing priorErrors. (4) In src/execution/escalation/tier-escalation.ts, populate priorFailures with stage='escalation' when escalating. (5) Ensure priorFailures is initialized to [] in prd/index.ts loadPRD() like priorErrors.",
50
+ "complexity": 3,
51
+ "status": "passed",
52
+ "testStrategy": "test-after",
53
+ "attempts": 1,
54
+ "priorErrors": [],
55
+ "escalations": [],
56
+ "dependencies": [],
57
+ "tags": [],
58
+ "acceptanceCriteria": [],
59
+ "storyPoints": 1,
60
+ "passes": true
61
+ },
62
+ {
63
+ "id": "SFC-002",
64
+ "title": "Format priorFailures into agent prompt context",
65
+ "description": "In src/context/builder.ts, the existing code injects priorErrors as high-priority context elements (priority 90). Add similar handling for priorFailures: format each StructuredFailure into actionable markdown showing the attempt number, model tier, stage, summary, and detailed test failures (file, testName, error, first stack trace line). Inject as priority 95 (higher than priorErrors) so the agent sees structured failures first. Keep priorErrors injection for backward compat. Write unit tests verifying: (1) priorFailures formatted correctly, (2) test failure details included, (3) empty priorFailures produces no context element, (4) priority ordering is correct.",
66
+ "complexity": 2,
67
+ "status": "passed",
68
+ "testStrategy": "test-after",
69
+ "attempts": 1,
70
+ "priorErrors": [],
71
+ "escalations": [],
72
+ "dependencies": [],
73
+ "tags": [],
74
+ "acceptanceCriteria": [],
75
+ "storyPoints": 1,
76
+ "passes": true
77
+ }
78
+ ],
79
+ "updatedAt": "2026-03-04T05:01:36.021Z"
80
+ }
@@ -0,0 +1,3 @@
1
+ [2026-03-04T04:36:19.998Z] BUG-028 — PASSED — Clear LLM routing cache on tier escalation — Cost: $0.0779
2
+ [2026-03-04T04:51:46.868Z] SFC-001 — FAILED — Add StructuredFailure type and populate priorFailures on verify failure — Review failed: lint failed (exit code 1)
3
+ [2026-03-04T05:01:36.022Z] SFC-002 — FAILED — Format priorFailures into agent prompt context — Review failed: lint failed (exit code 1)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@nathapp/nax",
3
- "version": "0.18.2",
3
+ "version": "0.18.4",
4
4
  "description": "AI Coding Agent Orchestrator \u2014 loops until done",
5
5
  "type": "module",
6
6
  "bin": {
@@ -31,7 +31,7 @@
31
31
  },
32
32
  "devDependencies": {
33
33
  "@biomejs/biome": "^1.9.4",
34
- "@types/bun": "^1.2.4",
34
+ "@types/bun": "^1.3.8",
35
35
  "react-devtools-core": "^7.0.1",
36
36
  "typescript": "^5.7.3"
37
37
  },
@@ -66,6 +66,7 @@ export const DEFAULT_CONFIG: NaxConfig = {
66
66
  regressionGate: {
67
67
  enabled: true,
68
68
  timeoutSeconds: 120,
69
+ acceptOnTimeout: true,
69
70
  },
70
71
  contextProviderTokenBudget: 2000,
71
72
  smartTestRunner: true,
@@ -44,6 +44,7 @@ export type {
44
44
  RoutingConfig,
45
45
  StorySizeGateConfig,
46
46
  PrecheckConfig,
47
+ SmartTestRunnerConfig,
47
48
  NaxConfig,
48
49
  } from "./types";
49
50
 
@@ -62,8 +62,31 @@ const RectificationConfigSchema = z.object({
62
62
  const RegressionGateConfigSchema = z.object({
63
63
  enabled: z.boolean().default(true),
64
64
  timeoutSeconds: z.number().int().min(10).max(600).default(120),
65
+ acceptOnTimeout: z.boolean().default(true),
65
66
  });
66
67
 
68
+ const SmartTestRunnerConfigSchema = z.object({
69
+ enabled: z.boolean().default(true),
70
+ testFilePatterns: z.array(z.string()).default(["test/**/*.test.ts"]),
71
+ fallback: z.enum(["import-grep", "full-suite"]).default("import-grep"),
72
+ });
73
+
74
+ const SMART_TEST_RUNNER_DEFAULT = {
75
+ enabled: true,
76
+ testFilePatterns: ["test/**/*.test.ts"],
77
+ fallback: "import-grep" as const,
78
+ };
79
+
80
+ /** Coerces boolean → SmartTestRunnerConfig for backward compat */
81
+ const smartTestRunnerFieldSchema = z
82
+ .preprocess((val) => {
83
+ if (typeof val === "boolean") {
84
+ return { enabled: val, testFilePatterns: ["test/**/*.test.ts"], fallback: "import-grep" };
85
+ }
86
+ return val;
87
+ }, SmartTestRunnerConfigSchema)
88
+ .default(SMART_TEST_RUNNER_DEFAULT);
89
+
67
90
  const ExecutionConfigSchema = z.object({
68
91
  maxIterations: z.number().int().positive({ message: "maxIterations must be > 0" }),
69
92
  iterationDelayMs: z.number().int().nonnegative(),
@@ -81,7 +104,7 @@ const ExecutionConfigSchema = z.object({
81
104
  lintCommand: z.string().nullable().optional(),
82
105
  typecheckCommand: z.string().nullable().optional(),
83
106
  dangerouslySkipPermissions: z.boolean().default(true),
84
- smartTestRunner: z.boolean().default(true),
107
+ smartTestRunner: smartTestRunnerFieldSchema,
85
108
  });
86
109
 
87
110
  const QualityConfigSchema = z.object({
@@ -189,6 +212,8 @@ const LlmRoutingConfigSchema = z.object({
189
212
  mode: z.enum(["one-shot", "per-story", "hybrid"]).optional(),
190
213
  batchMode: z.boolean().optional(), // deprecated, for backward compat
191
214
  timeoutMs: z.number().int().positive({ message: "llm.timeoutMs must be > 0" }).optional(),
215
+ retries: z.number().int().min(0, { message: "llm.retries must be >= 0" }).optional(),
216
+ retryDelayMs: z.number().int().min(0, { message: "llm.retryDelayMs must be >= 0" }).optional(),
192
217
  });
193
218
 
194
219
  const RoutingConfigSchema = z
@@ -70,12 +70,24 @@ export interface RectificationConfig {
70
70
  abortOnIncreasingFailures: boolean;
71
71
  }
72
72
 
73
- /** Regression gate config (BUG-009) */
73
+ /** Regression gate config (BUG-009, BUG-026) */
74
74
  export interface RegressionGateConfig {
75
75
  /** Enable full-suite regression gate after scoped verification (default: true) */
76
76
  enabled: boolean;
77
77
  /** Timeout for full-suite regression run in seconds (default: 120) */
78
78
  timeoutSeconds: number;
79
+ /** Accept timeout as pass instead of failing (BUG-026, default: true) */
80
+ acceptOnTimeout?: boolean;
81
+ }
82
+
83
+ /** Smart test runner configuration (STR-007) */
84
+ export interface SmartTestRunnerConfig {
85
+ /** Enable smart test runner (default: true) */
86
+ enabled: boolean;
87
+ /** Glob patterns to scan for test files during import-grep fallback */
88
+ testFilePatterns: string[];
89
+ /** Fallback strategy when path-convention mapping yields no results */
90
+ fallback: "import-grep" | "full-suite";
79
91
  }
80
92
 
81
93
  /** Execution limits */
@@ -106,8 +118,9 @@ export interface ExecutionConfig {
106
118
  typecheckCommand?: string | null;
107
119
  /** Use --dangerously-skip-permissions flag for agent (default: true for backward compat, SEC-1 fix) */
108
120
  dangerouslySkipPermissions?: boolean;
109
- /** Enable smart test runner to scope test runs to changed files (default: true) */
110
- smartTestRunner?: boolean;
121
+ /** Enable smart test runner to scope test runs to changed files (default: true).
122
+ * Accepts boolean for backward compat or a SmartTestRunnerConfig object. */
123
+ smartTestRunner?: boolean | SmartTestRunnerConfig;
111
124
  }
112
125
 
113
126
  /** Quality gate config */
@@ -352,8 +365,12 @@ export interface LlmRoutingConfig {
352
365
  mode?: LlmRoutingMode;
353
366
  /** @deprecated Use mode instead. Will be removed in v1.0 */
354
367
  batchMode?: boolean;
355
- /** Timeout for LLM call in milliseconds (default: 15000) */
368
+ /** Timeout for LLM call in milliseconds (default: 30000) */
356
369
  timeoutMs?: number;
370
+ /** Number of retries on LLM timeout or transient failure (default: 1) */
371
+ retries?: number;
372
+ /** Delay between retries in milliseconds (default: 1000) */
373
+ retryDelayMs?: number;
357
374
  }
358
375
 
359
376
  /** Routing config */
@@ -14,6 +14,7 @@ import {
14
14
  createDependencyContext,
15
15
  createErrorContext,
16
16
  createFileContext,
17
+ createPriorFailuresContext,
17
18
  createProgressContext,
18
19
  createStoryContext,
19
20
  createTestCoverageContext,
@@ -31,6 +32,7 @@ export {
31
32
  createProgressContext,
32
33
  createFileContext,
33
34
  createTestCoverageContext,
35
+ createPriorFailuresContext,
34
36
  } from "./elements";
35
37
  export { formatContextAsMarkdown } from "./formatter";
36
38
 
@@ -89,6 +91,15 @@ export async function buildContext(storyContext: StoryContext, budget: ContextBu
89
91
  // Add progress summary (highest priority)
90
92
  elements.push(createProgressContext(generateProgressSummary(prd), 100));
91
93
 
94
+ // Add prior failures (highest priority after progress, priority 95)
95
+ if (
96
+ currentStory.priorFailures &&
97
+ Array.isArray(currentStory.priorFailures) &&
98
+ currentStory.priorFailures.length > 0
99
+ ) {
100
+ elements.push(createPriorFailuresContext(currentStory.priorFailures, 95));
101
+ }
102
+
92
103
  // Add prior errors (high priority)
93
104
  if (currentStory.priorErrors && Array.isArray(currentStory.priorErrors) && currentStory.priorErrors.length > 0) {
94
105
  for (const error of currentStory.priorErrors) {