@nathapp/nax 0.18.1 → 0.18.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/.gitlab-ci.yml +12 -6
  2. package/bun.lock +1 -1
  3. package/bunfig.toml +2 -1
  4. package/docker-compose.test.yml +17 -0
  5. package/docs/ROADMAP.md +121 -36
  6. package/docs/specs/verification-architecture-v2.md +343 -0
  7. package/nax/config.json +13 -10
  8. package/nax/features/smart-test-runner/plan.md +7 -0
  9. package/nax/features/smart-test-runner/prd.json +203 -0
  10. package/nax/features/smart-test-runner/progress.txt +13 -0
  11. package/nax/features/smart-test-runner/spec.md +7 -0
  12. package/nax/features/smart-test-runner/tasks.md +8 -0
  13. package/nax/features/v0.18.3-execution-reliability/prd.json +80 -0
  14. package/nax/features/v0.18.3-execution-reliability/progress.txt +3 -0
  15. package/package.json +2 -2
  16. package/src/config/defaults.ts +2 -0
  17. package/src/config/schema.ts +1 -0
  18. package/src/config/schemas.ts +24 -0
  19. package/src/config/types.ts +16 -1
  20. package/src/context/builder.ts +11 -0
  21. package/src/context/elements.ts +38 -1
  22. package/src/execution/escalation/tier-escalation.ts +28 -3
  23. package/src/execution/post-verify-rectification.ts +4 -2
  24. package/src/execution/post-verify.ts +73 -9
  25. package/src/execution/progress.ts +2 -0
  26. package/src/pipeline/stages/review.ts +5 -3
  27. package/src/pipeline/stages/routing.ts +14 -9
  28. package/src/pipeline/stages/verify.ts +54 -1
  29. package/src/prd/index.ts +16 -1
  30. package/src/prd/types.ts +33 -0
  31. package/src/precheck/index.ts +9 -4
  32. package/src/routing/strategies/llm.ts +5 -0
  33. package/src/verification/gate.ts +2 -1
  34. package/src/verification/smart-runner.ts +214 -0
  35. package/src/verification/types.ts +2 -0
  36. package/test/US-002-orchestrator.test.ts +5 -5
  37. package/test/context/prior-failures.test.ts +462 -0
  38. package/test/execution/post-verify-bug026.test.ts +443 -0
  39. package/test/execution/post-verify.test.ts +32 -0
  40. package/test/execution/structured-failure.test.ts +414 -0
  41. package/test/integration/logger.test.ts +1 -1
  42. package/test/integration/review-plugin-integration.test.ts +2 -1
  43. package/test/integration/story-id-in-events.test.ts +1 -1
  44. package/test/unit/config/smart-runner-flag.test.ts +249 -0
  45. package/test/unit/pipeline/routing-partial-override.test.ts +141 -0
  46. package/test/unit/pipeline/verify-smart-runner.test.ts +344 -0
  47. package/test/unit/prd-get-next-story.test.ts +28 -0
  48. package/test/unit/routing.test.ts +102 -0
  49. package/test/unit/smart-test-runner.test.ts +512 -0
  50. package/test/unit/verification/smart-runner.test.ts +246 -0
@@ -0,0 +1,443 @@
1
+ /**
2
+ * BUG-026: Regression gate timeout accepts scoped pass instead of escalating
3
+ *
4
+ * Tests that runRegressionGate (via runPostAgentVerification):
5
+ * - Returns passed when regression gate TIMES OUT and acceptOnTimeout=true (default)
6
+ * - Returns failed when regression gate TIMES OUT and acceptOnTimeout=false
7
+ * - Returns failed when regression gate returns TEST_FAILURE (existing behavior unchanged)
8
+ * - Defaults acceptOnTimeout to true when not set in config
9
+ *
10
+ * These are behavioral tests that call the actual function with mocked dependencies.
11
+ * They complement the type-level tests already in post-verify.test.ts.
12
+ */
13
+
14
+ import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
15
+ import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
16
+ import { join } from "node:path";
17
+ import { tmpdir } from "node:os";
18
+ import type { NaxConfig } from "../../src/config";
19
+ import type { PRD, UserStory } from "../../src/prd/types";
20
+ import type { StoryMetrics } from "../../src/metrics";
21
+ import type { VerificationResult } from "../../src/verification";
22
+
23
+ // ---------------------------------------------------------------------------
24
+ // Mock runVerification with call-order-based responses
25
+ // ---------------------------------------------------------------------------
26
+
27
+ type VerResult = Pick<VerificationResult, "success" | "status" | "countsTowardEscalation" | "output" | "error">;
28
+
29
+ let _verificationResponses: VerResult[] = [];
30
+ let _verificationCallIndex = 0;
31
+
32
+ const mockRunVerification = mock(async (): Promise<VerResult> => {
33
+ const resp =
34
+ _verificationResponses[_verificationCallIndex] ??
35
+ _verificationResponses[_verificationResponses.length - 1];
36
+ _verificationCallIndex++;
37
+ return resp;
38
+ });
39
+
40
+ const mockRevertStoriesOnFailure = mock(async ({ prd }: { prd: PRD; [k: string]: unknown }) => prd);
41
+ const mockRunRectificationLoop = mock(async () => false);
42
+
43
+ // ---------------------------------------------------------------------------
44
+ // Module mocks — must be top-level (Bun ESM hoisting)
45
+ // ---------------------------------------------------------------------------
46
+
47
+ mock.module("../../src/execution/verification", () => ({
48
+ runVerification: mockRunVerification,
49
+ parseTestOutput: () => ({ passCount: 5, failCount: 0, isEnvironmentalFailure: false }),
50
+ getEnvironmentalEscalationThreshold: () => 3,
51
+ }));
52
+
53
+ mock.module("../../src/execution/post-verify-rectification", () => ({
54
+ revertStoriesOnFailure: mockRevertStoriesOnFailure,
55
+ runRectificationLoop: mockRunRectificationLoop,
56
+ }));
57
+
58
+ mock.module("../../src/prd", () => ({
59
+ getExpectedFiles: () => [],
60
+ savePRD: mock(async () => {}),
61
+ }));
62
+
63
+ mock.module("../../src/execution/progress", () => ({
64
+ appendProgress: mock(async () => {}),
65
+ }));
66
+
67
+ mock.module("../../src/execution/escalation", () => ({
68
+ getTierConfig: () => undefined,
69
+ }));
70
+
71
+ mock.module("../../src/verification/parser", () => ({
72
+ parseBunTestOutput: () => ({ failed: 0, passed: 5, failures: [] }),
73
+ }));
74
+
75
+ mock.module("../../src/logger", () => ({
76
+ getSafeLogger: () => ({
77
+ info: () => {},
78
+ warn: () => {},
79
+ debug: () => {},
80
+ error: () => {},
81
+ }),
82
+ getLogger: () => ({
83
+ info: () => {},
84
+ warn: () => {},
85
+ debug: () => {},
86
+ error: () => {},
87
+ }),
88
+ }));
89
+
90
+ // Dynamic import after mocks
91
+ const { runPostAgentVerification } = await import("../../src/execution/post-verify");
92
+
93
+ // ---------------------------------------------------------------------------
94
+ // Fixtures
95
+ // ---------------------------------------------------------------------------
96
+
97
+ /** Run a git command in a directory using Bun-native spawn. */
98
+ function gitSync(args: string[], cwd: string): void {
99
+ const proc = Bun.spawnSync(["git", ...args], { cwd, stdin: "ignore", stdout: "ignore", stderr: "ignore" });
100
+ if (proc.exitCode !== 0) {
101
+ throw new Error(`git ${args[0]} failed in ${cwd}`);
102
+ }
103
+ }
104
+
105
+ /** Read stdout from a git command. */
106
+ function gitOutput(args: string[], cwd: string): string {
107
+ const proc = Bun.spawnSync(["git", ...args], { cwd, stdin: "ignore", stdout: "pipe", stderr: "ignore" });
108
+ return new TextDecoder().decode(proc.stdout).trim();
109
+ }
110
+
111
+ /**
112
+ * Create a temp git repo with two commits so that `git diff storyGitRef HEAD`
113
+ * returns at least one test file — needed for the regression gate to activate.
114
+ */
115
+ function makeGitRepo(): { dir: string; storyGitRef: string } {
116
+ const dir = mkdtempSync(join(tmpdir(), "nax-bug026-"));
117
+
118
+ gitSync(["init"], dir);
119
+ gitSync(["config", "user.email", "test@example.com"], dir);
120
+ gitSync(["config", "user.name", "test"], dir);
121
+
122
+ // Initial commit → becomes storyGitRef
123
+ writeFileSync(join(dir, "src.ts"), "export const x = 1;");
124
+ gitSync(["add", "."], dir);
125
+ gitSync(["commit", "-m", "initial"], dir);
126
+ const storyGitRef = gitOutput(["rev-parse", "HEAD"], dir);
127
+
128
+ // Second commit: adds a test file (changed after storyGitRef)
129
+ mkdirSync(join(dir, "test"), { recursive: true });
130
+ writeFileSync(
131
+ join(dir, "test", "example.test.ts"),
132
+ 'import { test, expect } from "bun:test";\ntest("x", () => expect(1).toBe(1));',
133
+ );
134
+ gitSync(["add", "."], dir);
135
+ gitSync(["commit", "-m", "add test"], dir);
136
+
137
+ return { dir, storyGitRef };
138
+ }
139
+
140
+ function makeConfig(
141
+ regressionGateOverrides: Partial<NaxConfig["execution"]["regressionGate"]> = {},
142
+ ): NaxConfig {
143
+ return {
144
+ version: 1,
145
+ models: {
146
+ fast: "claude-sonnet-4-5",
147
+ balanced: "claude-sonnet-4-5",
148
+ powerful: "claude-opus-4-6",
149
+ },
150
+ autoMode: {
151
+ enabled: true,
152
+ defaultAgent: "nax-agent-claude",
153
+ fallbackOrder: ["nax-agent-claude"],
154
+ complexityRouting: {
155
+ simple: "fast",
156
+ medium: "balanced",
157
+ complex: "powerful",
158
+ expert: "powerful",
159
+ },
160
+ escalation: {
161
+ enabled: true,
162
+ tierOrder: [],
163
+ },
164
+ },
165
+ execution: {
166
+ maxIterations: 100,
167
+ iterationDelayMs: 0,
168
+ costLimit: 50,
169
+ sessionTimeoutSeconds: 600,
170
+ verificationTimeoutSeconds: 30,
171
+ maxStoriesPerFeature: 50,
172
+ smartTestRunner: false,
173
+ rectification: {
174
+ enabled: false,
175
+ maxRetries: 2,
176
+ fullSuiteTimeoutSeconds: 120,
177
+ maxFailureSummaryChars: 2000,
178
+ abortOnIncreasingFailures: true,
179
+ },
180
+ regressionGate: {
181
+ enabled: true,
182
+ timeoutSeconds: 120,
183
+ ...regressionGateOverrides,
184
+ },
185
+ contextProviderTokenBudget: 2000,
186
+ },
187
+ quality: {
188
+ requireTypecheck: false,
189
+ requireLint: false,
190
+ requireTests: true,
191
+ commands: { test: "bun test" },
192
+ forceExit: false,
193
+ detectOpenHandles: false,
194
+ detectOpenHandlesRetries: 0,
195
+ gracePeriodMs: 0,
196
+ drainTimeoutMs: 0,
197
+ shell: false,
198
+ stripEnvVars: [],
199
+ environmentalEscalationDivisor: 3,
200
+ },
201
+ tdd: {
202
+ maxRetries: 2,
203
+ autoVerifyIsolation: false,
204
+ strategy: "off",
205
+ autoApproveVerifier: false,
206
+ },
207
+ constitution: { enabled: false, path: "constitution.md", maxTokens: 2000 },
208
+ analyze: {
209
+ llmEnhanced: false,
210
+ model: "balanced",
211
+ fallbackToKeywords: true,
212
+ maxCodebaseSummaryTokens: 4000,
213
+ },
214
+ review: { enabled: false, checks: [], commands: {} },
215
+ plan: { model: "balanced", outputPath: "features" },
216
+ acceptance: { enabled: false, maxRetries: 2, generateTests: false, testPath: "acceptance.test.ts" },
217
+ routing: { strategy: "keyword" },
218
+ context: {
219
+ testCoverage: {
220
+ enabled: false,
221
+ detail: "names-only",
222
+ maxTokens: 500,
223
+ testPattern: "**/*.test.ts",
224
+ scopeToStory: false,
225
+ },
226
+ autoDetect: { enabled: false, maxFiles: 10, traceImports: false },
227
+ },
228
+ } as unknown as NaxConfig;
229
+ }
230
+
231
+ function makeStory(id = "US-001"): UserStory {
232
+ return {
233
+ id,
234
+ title: "Test story",
235
+ description: "Test",
236
+ acceptanceCriteria: [],
237
+ tags: [],
238
+ dependencies: [],
239
+ status: "in-progress",
240
+ passes: false,
241
+ escalations: [],
242
+ attempts: 0,
243
+ contextFiles: [],
244
+ } as unknown as UserStory;
245
+ }
246
+
247
+ function makePRD(story: UserStory): PRD {
248
+ return {
249
+ id: "prd-001",
250
+ title: "Test PRD",
251
+ userStories: [story],
252
+ version: "1.0",
253
+ createdAt: new Date().toISOString(),
254
+ updatedAt: new Date().toISOString(),
255
+ } as unknown as PRD;
256
+ }
257
+
258
+ function makeOpts(
259
+ workdir: string,
260
+ storyGitRef: string,
261
+ config: NaxConfig,
262
+ story: UserStory,
263
+ prd: PRD,
264
+ ) {
265
+ return {
266
+ config,
267
+ prd,
268
+ prdPath: join(workdir, "prd.json"),
269
+ workdir,
270
+ story,
271
+ storiesToExecute: [story],
272
+ allStoryMetrics: [] as StoryMetrics[],
273
+ timeoutRetryCountMap: new Map<string, number>(),
274
+ storyGitRef,
275
+ };
276
+ }
277
+
278
+ // ---------------------------------------------------------------------------
279
+ // Test lifecycle
280
+ // ---------------------------------------------------------------------------
281
+
282
+ let tempDir: string;
283
+ let storyGitRef: string;
284
+
285
+ beforeEach(() => {
286
+ mockRunVerification.mockClear();
287
+ mockRevertStoriesOnFailure.mockClear();
288
+ mockRunRectificationLoop.mockClear();
289
+ _verificationResponses = [];
290
+ _verificationCallIndex = 0;
291
+
292
+ const repo = makeGitRepo();
293
+ tempDir = repo.dir;
294
+ storyGitRef = repo.storyGitRef;
295
+ });
296
+
297
+ afterEach(() => {
298
+ rmSync(tempDir, { recursive: true, force: true });
299
+ });
300
+
301
+ // ---------------------------------------------------------------------------
302
+ // BUG-026 behavioral tests
303
+ // ---------------------------------------------------------------------------
304
+
305
+ describe("BUG-026: regression gate TIMEOUT acceptance", () => {
306
+ test("TIMEOUT + acceptOnTimeout=true → runPostAgentVerification returns passed", async () => {
307
+ // Call 1: scoped verification passes; Call 2: regression gate times out
308
+ _verificationResponses = [
309
+ { success: true, status: "SUCCESS", countsTowardEscalation: true, output: "pass 5" },
310
+ { success: false, status: "TIMEOUT", countsTowardEscalation: false },
311
+ ];
312
+
313
+ const config = makeConfig({ acceptOnTimeout: true });
314
+ const story = makeStory();
315
+ const prd = makePRD(story);
316
+
317
+ const result = await runPostAgentVerification(makeOpts(tempDir, storyGitRef, config, story, prd));
318
+
319
+ expect(result.passed).toBe(true);
320
+ });
321
+
322
+ test("TIMEOUT + acceptOnTimeout=true → revertStoriesOnFailure is NOT called", async () => {
323
+ _verificationResponses = [
324
+ { success: true, status: "SUCCESS", countsTowardEscalation: true, output: "pass 5" },
325
+ { success: false, status: "TIMEOUT", countsTowardEscalation: false },
326
+ ];
327
+
328
+ const config = makeConfig({ acceptOnTimeout: true });
329
+ const story = makeStory();
330
+ const prd = makePRD(story);
331
+
332
+ await runPostAgentVerification(makeOpts(tempDir, storyGitRef, config, story, prd));
333
+
334
+ expect(mockRevertStoriesOnFailure).not.toHaveBeenCalled();
335
+ });
336
+
337
+ test("TIMEOUT + acceptOnTimeout=false → runPostAgentVerification returns failed", async () => {
338
+ _verificationResponses = [
339
+ { success: true, status: "SUCCESS", countsTowardEscalation: true, output: "pass 5" },
340
+ { success: false, status: "TIMEOUT", countsTowardEscalation: false },
341
+ ];
342
+
343
+ const config = makeConfig({ acceptOnTimeout: false });
344
+ const story = makeStory();
345
+ const prd = makePRD(story);
346
+
347
+ const result = await runPostAgentVerification(makeOpts(tempDir, storyGitRef, config, story, prd));
348
+
349
+ expect(result.passed).toBe(false);
350
+ });
351
+
352
+ test("TIMEOUT + acceptOnTimeout=false → revertStoriesOnFailure IS called", async () => {
353
+ _verificationResponses = [
354
+ { success: true, status: "SUCCESS", countsTowardEscalation: true, output: "pass 5" },
355
+ { success: false, status: "TIMEOUT", countsTowardEscalation: false },
356
+ ];
357
+
358
+ const config = makeConfig({ acceptOnTimeout: false });
359
+ const story = makeStory();
360
+ const prd = makePRD(story);
361
+
362
+ await runPostAgentVerification(makeOpts(tempDir, storyGitRef, config, story, prd));
363
+
364
+ expect(mockRevertStoriesOnFailure).toHaveBeenCalledTimes(1);
365
+ });
366
+
367
+ test("TIMEOUT + acceptOnTimeout not set → defaults to true → returns passed", async () => {
368
+ _verificationResponses = [
369
+ { success: true, status: "SUCCESS", countsTowardEscalation: true, output: "pass 5" },
370
+ { success: false, status: "TIMEOUT", countsTowardEscalation: false },
371
+ ];
372
+
373
+ // No acceptOnTimeout — should default to true per BUG-026 spec
374
+ const config = makeConfig({});
375
+ const story = makeStory();
376
+ const prd = makePRD(story);
377
+
378
+ const result = await runPostAgentVerification(makeOpts(tempDir, storyGitRef, config, story, prd));
379
+
380
+ expect(result.passed).toBe(true);
381
+ });
382
+
383
+ test("TEST_FAILURE in regression gate → returns failed regardless of acceptOnTimeout", async () => {
384
+ _verificationResponses = [
385
+ { success: true, status: "SUCCESS", countsTowardEscalation: true, output: "pass 5" },
386
+ { success: false, status: "TEST_FAILURE", countsTowardEscalation: true, output: "FAIL 1" },
387
+ ];
388
+
389
+ const config = makeConfig({ acceptOnTimeout: true });
390
+ const story = makeStory();
391
+ const prd = makePRD(story);
392
+
393
+ const result = await runPostAgentVerification(makeOpts(tempDir, storyGitRef, config, story, prd));
394
+
395
+ expect(result.passed).toBe(false);
396
+ });
397
+
398
+ test("TEST_FAILURE in regression gate → revertStoriesOnFailure IS called", async () => {
399
+ _verificationResponses = [
400
+ { success: true, status: "SUCCESS", countsTowardEscalation: true, output: "pass 5" },
401
+ { success: false, status: "TEST_FAILURE", countsTowardEscalation: true, output: "FAIL 1" },
402
+ ];
403
+
404
+ const config = makeConfig({ acceptOnTimeout: true });
405
+ const story = makeStory();
406
+ const prd = makePRD(story);
407
+
408
+ await runPostAgentVerification(makeOpts(tempDir, storyGitRef, config, story, prd));
409
+
410
+ expect(mockRevertStoriesOnFailure).toHaveBeenCalledTimes(1);
411
+ });
412
+
413
+ test("regression gate runs second → runVerification called twice (scoped + full suite)", async () => {
414
+ _verificationResponses = [
415
+ { success: true, status: "SUCCESS", countsTowardEscalation: true, output: "pass 5" },
416
+ { success: false, status: "TIMEOUT", countsTowardEscalation: false },
417
+ ];
418
+
419
+ const config = makeConfig({ acceptOnTimeout: true });
420
+ const story = makeStory();
421
+ const prd = makePRD(story);
422
+
423
+ await runPostAgentVerification(makeOpts(tempDir, storyGitRef, config, story, prd));
424
+
425
+ // Once for scoped verification, once for regression gate
426
+ expect(mockRunVerification).toHaveBeenCalledTimes(2);
427
+ });
428
+
429
+ test("regression gate disabled → only scoped test runs (one call to runVerification)", async () => {
430
+ _verificationResponses = [
431
+ { success: true, status: "SUCCESS", countsTowardEscalation: true, output: "pass 5" },
432
+ ];
433
+
434
+ const config = makeConfig({ enabled: false, timeoutSeconds: 120 });
435
+ const story = makeStory();
436
+ const prd = makePRD(story);
437
+
438
+ const result = await runPostAgentVerification(makeOpts(tempDir, storyGitRef, config, story, prd));
439
+
440
+ expect(result.passed).toBe(true);
441
+ expect(mockRunVerification).toHaveBeenCalledTimes(1);
442
+ });
443
+ });
@@ -146,6 +146,38 @@ describe("Regression Gate Timeout", () => {
146
146
  expect(regressionGateTimeoutSeconds).not.toBe(verificationTimeoutSeconds);
147
147
  expect(regressionGateTimeoutSeconds).toBeLessThan(verificationTimeoutSeconds);
148
148
  });
149
+
150
+ test("should have acceptOnTimeout config option (BUG-026)", () => {
151
+ const regressionGateConfig: RegressionGateConfig = {
152
+ enabled: true,
153
+ timeoutSeconds: 120,
154
+ acceptOnTimeout: true,
155
+ };
156
+
157
+ expect(regressionGateConfig.acceptOnTimeout).toBe(true);
158
+ });
159
+
160
+ test("should default acceptOnTimeout to true (BUG-026)", () => {
161
+ const regressionGateConfig: RegressionGateConfig = {
162
+ enabled: true,
163
+ timeoutSeconds: 120,
164
+ // acceptOnTimeout not specified - should default to true
165
+ };
166
+
167
+ // When acceptOnTimeout is undefined, it should be treated as true
168
+ const acceptOnTimeout = regressionGateConfig.acceptOnTimeout ?? true;
169
+ expect(acceptOnTimeout).toBe(true);
170
+ });
171
+
172
+ test("should allow disabling acceptOnTimeout (BUG-026)", () => {
173
+ const regressionGateConfig: RegressionGateConfig = {
174
+ enabled: true,
175
+ timeoutSeconds: 120,
176
+ acceptOnTimeout: false,
177
+ };
178
+
179
+ expect(regressionGateConfig.acceptOnTimeout).toBe(false);
180
+ });
149
181
  });
150
182
 
151
183
  describe("Story State After Regression Failure", () => {