npm - @nathapp/nax - Versions diffs - 0.18.3 → 0.18.4 - Mend

@nathapp/nax 0.18.3 → 0.18.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

package/.claude/rules/01-project-conventions.md +34 -0
package/.claude/rules/02-test-architecture.md +39 -0
package/.claude/rules/03-test-writing.md +58 -0
package/.claude/rules/04-forbidden-patterns.md +29 -0
package/.githooks/pre-commit +13 -0
package/CHANGELOG.md +9 -0
package/CLAUDE.md +45 -122
package/docker-compose.test.yml +1 -3
package/docs/ROADMAP.md +9 -27
package/package.json +1 -1
package/src/config/schemas.ts +2 -0
package/src/config/types.ts +5 -1
package/src/execution/post-verify.ts +30 -12
package/src/pipeline/stages/execution.ts +10 -2
package/src/pipeline/stages/routing.ts +18 -4
package/src/pipeline/stages/verify.ts +8 -1
package/src/routing/strategies/keyword.ts +7 -4
package/src/routing/strategies/llm.ts +40 -4
package/test/{US-002-orchestrator.test.ts → integration/precheck-orchestrator.test.ts} +3 -3
package/test/{execution/post-verify-bug026.test.ts → unit/execution/post-verify-regression.test.ts} +22 -50
package/test/{execution → unit/execution}/post-verify.test.ts +1 -1
package/test/unit/pipeline/routing-partial-override.test.ts +15 -36
package/test/unit/pipeline/verify-smart-runner.test.ts +5 -6
package/test/unit/routing/routing-stability.test.ts +207 -0
package/test/unit/storyid-events.test.ts +20 -32
package/test/unit/verification/smart-runner-config.test.ts +162 -0
package/test/unit/{smart-test-runner.test.ts → verification/smart-runner-discovery.test.ts} +5 -164
package/test/TEST_COVERAGE_US001.md +0 -217
package/test/TEST_COVERAGE_US003.md +0 -84
package/test/TEST_COVERAGE_US005.md +0 -86

package/src/pipeline/stages/execution.ts CHANGED Viewed

@@ -85,7 +85,7 @@ export const executionStage: PipelineStage = {
     const logger = getLogger();
     // HARD FAILURE: No agent available — cannot proceed without an agent
-    const agent = getAgent(ctx.config.autoMode.defaultAgent);
+    const agent = _executionDeps.getAgent(ctx.config.autoMode.defaultAgent);
     if (!agent) {
       return {
         action: "fail",
@@ -152,7 +152,7 @@ export const executionStage: PipelineStage = {
     }
     // Validate agent supports the requested tier
-    if (!validateAgentForTier(agent, ctx.routing.modelTier)) {
+    if (!_executionDeps.validateAgentForTier(agent, ctx.routing.modelTier)) {
       logger.warn("execution", "Agent tier mismatch", {
         storyId: ctx.story.id,
         agentName: agent.name,
@@ -192,3 +192,11 @@ export const executionStage: PipelineStage = {
     return { action: "continue" };
   },
 };
+/**
+ * Swappable dependencies for testing (avoids mock.module() which leaks in Bun 1.x).
+ */
+export const _executionDeps = {
+  getAgent,
+  validateAgentForTier,
+};

package/src/pipeline/stages/routing.ts CHANGED Viewed

@@ -35,7 +35,7 @@ export const routingStage: PipelineStage = {
     let routing: { complexity: string; testStrategy: string; modelTier: string; reasoning?: string };
     if (ctx.story.routing) {
       // Use cached complexity/testStrategy/modelTier
-      routing = await routeStory(ctx.story, { config: ctx.config }, ctx.workdir, ctx.plugins);
+      routing = await _routingDeps.routeStory(ctx.story, { config: ctx.config }, ctx.workdir, ctx.plugins);
       // Override with cached values only when they are actually set
       if (ctx.story.routing?.complexity) routing.complexity = ctx.story.routing.complexity;
       if (ctx.story.routing?.testStrategy) routing.testStrategy = ctx.story.routing.testStrategy;
@@ -44,17 +44,20 @@ export const routingStage: PipelineStage = {
       if (ctx.story.routing?.modelTier) {
         routing.modelTier = ctx.story.routing.modelTier;
       } else {
-        routing.modelTier = complexityToModelTier(routing.complexity as import("../../config").Complexity, ctx.config);
+        routing.modelTier = _routingDeps.complexityToModelTier(
+          routing.complexity as import("../../config").Complexity,
+          ctx.config,
+        );
       }
     } else {
       // Fresh classification
-      routing = await routeStory(ctx.story, { config: ctx.config }, ctx.workdir, ctx.plugins);
+      routing = await _routingDeps.routeStory(ctx.story, { config: ctx.config }, ctx.workdir, ctx.plugins);
     }
     // BUG-010: Greenfield detection — force test-after if no test files exist
     const greenfieldDetectionEnabled = ctx.config.tdd.greenfieldDetection ?? true;
     if (greenfieldDetectionEnabled && routing.testStrategy.startsWith("three-session-tdd")) {
-      const isGreenfield = await isGreenfieldStory(ctx.story, ctx.workdir);
+      const isGreenfield = await _routingDeps.isGreenfieldStory(ctx.story, ctx.workdir);
       if (isGreenfield) {
         logger.info("routing", "Greenfield detected — forcing test-after strategy", {
           storyId: ctx.story.id,
@@ -84,3 +87,14 @@ export const routingStage: PipelineStage = {
     return { action: "continue" };
   },
 };
+/**
+ * Swappable dependencies for testing (avoids mock.module() which leaks in Bun 1.x).
+ * Tests can override individual functions without poisoning the module registry.
+ */
+export const _routingDeps = {
+  routeStory,
+  complexityToModelTier,
+  isGreenfieldStory,
+  clearCache,
+};

package/src/pipeline/stages/verify.ts CHANGED Viewed

@@ -99,7 +99,7 @@ export const verifyStage: PipelineStage = {
     }
     // Use unified regression gate (includes 2s wait for agent process cleanup)
-    const result = await regression({
+    const result = await _verifyDeps.regression({
       workdir: ctx.workdir,
       command: effectiveCommand,
       timeoutSeconds: ctx.config.execution.verificationTimeoutSeconds,
@@ -151,3 +151,10 @@ export const verifyStage: PipelineStage = {
     return { action: "continue" };
   },
 };
+/**
+ * Swappable dependencies for testing (avoids mock.module() which leaks in Bun 1.x).
+ */
+export const _verifyDeps = {
+  regression,
+};

package/src/routing/strategies/keyword.ts CHANGED Viewed

@@ -71,11 +71,13 @@ const PUBLIC_API_KEYWORDS = [
  */
 function classifyComplexity(
   title: string,
-  description: string,
+  _description: string,
   acceptanceCriteria: string[],
   tags: string[] = [],
 ): Complexity {
-  const text = [title, description, ...acceptanceCriteria, ...tags].join(" ").toLowerCase();
+  // BUG-031: Exclude description — it accumulates priorErrors across retries and
+  // causes classification drift. Only use stable, immutable story fields.
+  const text = [title, ...acceptanceCriteria, ...tags].join(" ").toLowerCase();
   if (EXPERT_KEYWORDS.some((kw) => text.includes(kw))) {
     return "expert";
@@ -98,10 +100,11 @@ function classifyComplexity(
 function determineTestStrategy(
   complexity: Complexity,
   title: string,
-  description: string,
+  _description: string,
   tags: string[] = [],
 ): TestStrategy {
-  const text = [title, description, ...tags].join(" ").toLowerCase();
+  // BUG-031: Exclude description — only use stable, immutable story fields.
+  const text = [title, ...tags].join(" ").toLowerCase();
   const isSecurityCritical = SECURITY_KEYWORDS.some((kw) => text.includes(kw));
   const isPublicApi = PUBLIC_API_KEYWORDS.some((kw) => text.includes(kw));

package/src/routing/strategies/llm.ts CHANGED Viewed

@@ -58,10 +58,7 @@ function evictOldest(): void {
  * @returns LLM response text
  * @throws Error on timeout or spawn failure
  */
-async function callLlm(modelTier: string, prompt: string, config: NaxConfig): Promise<string> {
-  const llmConfig = config.routing.llm;
-  const timeoutMs = llmConfig?.timeoutMs ?? 15000;
+async function callLlmOnce(modelTier: string, prompt: string, config: NaxConfig, timeoutMs: number): Promise<string> {
   // Resolve model tier to actual model identifier
   const modelEntry = config.models[modelTier];
   if (!modelEntry) {
@@ -108,6 +105,45 @@ async function callLlm(modelTier: string, prompt: string, config: NaxConfig): Pr
   }
 }
+/**
+ * Call LLM via claude CLI with timeout and retry (BUG-033).
+ *
+ * @param modelTier - Model tier to use for routing call
+ * @param prompt - Prompt to send to LLM
+ * @param config - nax configuration
+ * @returns LLM response text
+ * @throws Error after all retries exhausted
+ */
+async function callLlm(modelTier: string, prompt: string, config: NaxConfig): Promise<string> {
+  const llmConfig = config.routing.llm;
+  const timeoutMs = llmConfig?.timeoutMs ?? 30000;
+  const maxRetries = llmConfig?.retries ?? 1;
+  const retryDelayMs = llmConfig?.retryDelayMs ?? 1000;
+  let lastError: Error | undefined;
+  for (let attempt = 0; attempt <= maxRetries; attempt++) {
+    try {
+      return await callLlmOnce(modelTier, prompt, config, timeoutMs);
+    } catch (err) {
+      lastError = err as Error;
+      if (attempt < maxRetries) {
+        const logger = getLogger();
+        logger.warn(
+          "routing",
+          `LLM call failed (attempt ${attempt + 1}/${maxRetries + 1}), retrying in ${retryDelayMs}ms`,
+          {
+            error: lastError.message,
+          },
+        );
+        await Bun.sleep(retryDelayMs);
+      }
+    }
+  }
+  throw lastError ?? new Error("LLM call failed with unknown error");
+}
 /**
  * Route multiple stories in a single batch LLM call.
  *

package/test/{US-002-orchestrator.test.ts → integration/precheck-orchestrator.test.ts} RENAMED Viewed

@@ -14,9 +14,9 @@ const skipInCI = process.env.CI ? test.skip : test;
 import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
 import { tmpdir } from "node:os";
 import { join } from "node:path";
-import type { NaxConfig } from "../src/config";
-import type { PRD } from "../src/prd/types";
-import { EXIT_CODES, runPrecheck } from "../src/precheck";
+import type { NaxConfig } from "../../src/config";
+import type { PRD } from "../../src/prd/types";
+import { EXIT_CODES, runPrecheck } from "../../src/precheck";
 // Helper to create a minimal valid git environment
 async function setupGitRepo(dir: string): Promise<void> {

package/test/{execution/post-verify-bug026.test.ts → unit/execution/post-verify-regression.test.ts} RENAMED Viewed

@@ -15,10 +15,10 @@ import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
 import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
 import { join } from "node:path";
 import { tmpdir } from "node:os";
-import type { NaxConfig } from "../../src/config";
-import type { PRD, UserStory } from "../../src/prd/types";
-import type { StoryMetrics } from "../../src/metrics";
-import type { VerificationResult } from "../../src/verification";
+import type { NaxConfig } from "../../../src/config";
+import type { PRD, UserStory } from "../../../src/prd/types";
+import type { StoryMetrics } from "../../../src/metrics";
+import type { VerificationResult } from "../../../src/verification";
 // ---------------------------------------------------------------------------
 // Mock runVerification with call-order-based responses
@@ -41,54 +41,13 @@ const mockRevertStoriesOnFailure = mock(async ({ prd }: { prd: PRD; [k: string]:
 const mockRunRectificationLoop = mock(async () => false);
 // ---------------------------------------------------------------------------
-// Module mocks — must be top-level (Bun ESM hoisting)
+// Static imports — uses _postVerifyDeps pattern (no mock.module() needed)
 // ---------------------------------------------------------------------------
-mock.module("../../src/execution/verification", () => ({
-  runVerification: mockRunVerification,
-  parseTestOutput: () => ({ passCount: 5, failCount: 0, isEnvironmentalFailure: false }),
-  getEnvironmentalEscalationThreshold: () => 3,
-}));
-mock.module("../../src/execution/post-verify-rectification", () => ({
-  revertStoriesOnFailure: mockRevertStoriesOnFailure,
-  runRectificationLoop: mockRunRectificationLoop,
-}));
-mock.module("../../src/prd", () => ({
-  getExpectedFiles: () => [],
-  savePRD: mock(async () => {}),
-}));
-mock.module("../../src/execution/progress", () => ({
-  appendProgress: mock(async () => {}),
-}));
-mock.module("../../src/execution/escalation", () => ({
-  getTierConfig: () => undefined,
-}));
-mock.module("../../src/verification/parser", () => ({
-  parseBunTestOutput: () => ({ failed: 0, passed: 5, failures: [] }),
-}));
-mock.module("../../src/logger", () => ({
-  getSafeLogger: () => ({
-    info: () => {},
-    warn: () => {},
-    debug: () => {},
-    error: () => {},
-  }),
-  getLogger: () => ({
-    info: () => {},
-    warn: () => {},
-    debug: () => {},
-    error: () => {},
-  }),
-}));
-// Dynamic import after mocks
-const { runPostAgentVerification } = await import("../../src/execution/post-verify");
+import { _postVerifyDeps, runPostAgentVerification } from "../../../src/execution/post-verify";
+// ── Capture originals for afterEach restoration ───────────────────────────────
+const _origPostVerifyDeps = { ..._postVerifyDeps };
 // ---------------------------------------------------------------------------
 // Fixtures
@@ -283,6 +242,17 @@ let tempDir: string;
 let storyGitRef: string;
 beforeEach(() => {
+  // Wire _postVerifyDeps to mocks
+  _postVerifyDeps.runVerification = mockRunVerification as typeof _postVerifyDeps.runVerification;
+  _postVerifyDeps.parseTestOutput = () => ({ passCount: 5, failCount: 0, isEnvironmentalFailure: false }) as any;
+  _postVerifyDeps.getEnvironmentalEscalationThreshold = () => 3;
+  _postVerifyDeps.revertStoriesOnFailure = mockRevertStoriesOnFailure as typeof _postVerifyDeps.revertStoriesOnFailure;
+  _postVerifyDeps.runRectificationLoop = mockRunRectificationLoop as typeof _postVerifyDeps.runRectificationLoop;
+  _postVerifyDeps.getExpectedFiles = () => [];
+  _postVerifyDeps.savePRD = mock(async () => {}) as typeof _postVerifyDeps.savePRD;
+  _postVerifyDeps.appendProgress = mock(async () => {}) as typeof _postVerifyDeps.appendProgress;
+  _postVerifyDeps.getTierConfig = () => undefined as any;
+  _postVerifyDeps.parseBunTestOutput = () => ({ failed: 0, passed: 5, failures: [] }) as any;
   mockRunVerification.mockClear();
   mockRevertStoriesOnFailure.mockClear();
   mockRunRectificationLoop.mockClear();
@@ -295,6 +265,8 @@ beforeEach(() => {
 });
 afterEach(() => {
+  Object.assign(_postVerifyDeps, _origPostVerifyDeps);
+  mock.restore();
   rmSync(tempDir, { recursive: true, force: true });
 });

package/test/{execution → unit/execution}/post-verify.test.ts RENAMED Viewed

@@ -8,7 +8,7 @@
  */
 import { describe, expect, test } from "bun:test";
-import type { RegressionGateConfig } from "../../src/config/schema";
+import type { RegressionGateConfig } from "../../../src/config/schema";
 describe("RegressionGateConfig", () => {
   test("should have correct default values", () => {

package/test/unit/pipeline/routing-partial-override.test.ts CHANGED Viewed

@@ -6,13 +6,14 @@
  * a fresh classification.
  */
-import { beforeEach, afterEach, describe, expect, mock, test } from "bun:test";
+import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
 import { initLogger, resetLogger } from "../../../src/logger";
-import type { PipelineContext } from "../../../src/pipeline/types";
+import { _routingDeps, routingStage } from "../../../src/pipeline/stages/routing";
 import type { NaxConfig } from "../../../src/config";
+import type { PipelineContext } from "../../../src/pipeline/types";
 import type { UserStory } from "../../../src/prd/types";
-// ── Module mocks (must be declared before dynamic imports) ────────────────────
+// ── Mock functions ────────────────────────────────────────────────────────────
 const mockRouteStory = mock(async () => ({
   complexity: "medium",
@@ -22,26 +23,11 @@ const mockRouteStory = mock(async () => ({
 }));
 const mockComplexityToModelTier = mock((_complexity: string, _config: unknown) => "balanced" as const);
+const mockIsGreenfieldStory = mock(async () => false);
-mock.module("../../../src/routing", () => ({
-  routeStory: mockRouteStory,
-  complexityToModelTier: mockComplexityToModelTier,
-}));
-// Greenfield check: return false so it never interferes with test strategy
-mock.module("../../../src/context/greenfield", () => ({
-  isGreenfieldStory: mock(async () => false),
-}));
-// LLM batch cache is not relevant here
-mock.module("../../../src/routing/strategies/llm", () => ({
-  clearCache: mock(() => {}),
-  routeBatch: mock(async () => []),
-}));
-// ── Dynamic imports after mocks ───────────────────────────────────────────────
+// ── Capture originals for afterEach restoration ───────────────────────────────
-const { routingStage } = await import("../../../src/pipeline/stages/routing");
+const _origDeps = { ..._routingDeps };
 // ── Fixtures ──────────────────────────────────────────────────────────────────
@@ -58,11 +44,9 @@ function makeStory(routingOverride?: Partial<UserStory["routing"]>): UserStory {
     tags: [],
     dependencies: [],
   };
   if (routingOverride !== undefined) {
     story.routing = routingOverride as UserStory["routing"];
   }
   return story;
 }
@@ -82,16 +66,22 @@ function makeCtx(story: UserStory): PipelineContext {
   } as PipelineContext;
 }
-// ── Logger setup ──────────────────────────────────────────────────────────────
+// ── Lifecycle ─────────────────────────────────────────────────────────────────
 beforeEach(() => {
   resetLogger();
   initLogger({ level: "error", useChalk: false });
+  _routingDeps.routeStory = mockRouteStory as typeof _routingDeps.routeStory;
+  _routingDeps.complexityToModelTier = mockComplexityToModelTier as typeof _routingDeps.complexityToModelTier;
+  _routingDeps.isGreenfieldStory = mockIsGreenfieldStory as typeof _routingDeps.isGreenfieldStory;
   mockRouteStory.mockClear();
   mockComplexityToModelTier.mockClear();
+  mockIsGreenfieldStory.mockClear();
 });
 afterEach(() => {
+  Object.assign(_routingDeps, _origDeps);
+  mock.restore();
   resetLogger();
 });
@@ -99,42 +89,31 @@ afterEach(() => {
 describe("routing stage — partial override (FIX-001)", () => {
   test("(1) partial override with only testStrategy preserves LLM complexity", async () => {
-    // Story sets only testStrategy — complexity should come from LLM
     const story = makeStory({ testStrategy: "test-after", complexity: undefined as any, reasoning: "manual" });
     const ctx = makeCtx(story);
     await routingStage.execute(ctx);
-    // testStrategy is overridden by the story field
     expect(ctx.routing.testStrategy).toBe("test-after");
-    // complexity should remain from the LLM result ("medium"), not undefined
     expect(ctx.routing.complexity).toBe("medium");
   });
   test("(2) LLM-classified complexity is preserved when story.routing has no complexity", async () => {
-    // story.routing is present but complexity is undefined (falsy)
     const story = makeStory({ testStrategy: "test-after", complexity: undefined as any, reasoning: "" });
     const ctx = makeCtx(story);
     await routingStage.execute(ctx);
-    // LLM returned "medium" — it must not be overwritten with undefined
     expect(ctx.routing.complexity).toBe("medium");
     expect(ctx.routing.complexity).not.toBeUndefined();
   });
   test("(3) full override works when both complexity and testStrategy are set", async () => {
-    // Story has explicit values for both fields
-    const story = makeStory({
-      complexity: "simple",
-      testStrategy: "test-after",
-      reasoning: "manual override",
-    });
+    const story = makeStory({ complexity: "simple", testStrategy: "test-after", reasoning: "manual override" });
     const ctx = makeCtx(story);
     await routingStage.execute(ctx);
-    // Both fields should be overridden from the story
     expect(ctx.routing.complexity).toBe("simple");
     expect(ctx.routing.testStrategy).toBe("test-after");
   });

package/test/unit/pipeline/verify-smart-runner.test.ts CHANGED Viewed

@@ -23,15 +23,12 @@ import type { PRD, UserStory } from "../../../src/prd/types";
 const mockRegression = mock(async () => ({ success: true, status: "SUCCESS" as const }));
-mock.module("../../../src/verification/gate", () => ({
-  regression: mockRegression,
-}));
+// ---- Static imports — no mock.module() needed (uses _deps pattern) ----------
+import { _verifyDeps, verifyStage } from "../../../src/pipeline/stages/verify";
 // ---- Capture originals for afterEach restoration ----------------------------
 const _origDeps = { ..._smartRunnerDeps };
-// ---- Dynamic import after gate mock -----------------------------------------
-const { verifyStage } = await import("../../../src/pipeline/stages/verify");
+const _origVerifyDeps = { ..._verifyDeps };
 // ---- Mock functions ---------------------------------------------------------
@@ -160,6 +157,7 @@ describe("Verify Stage --- Smart Runner Integration", () => {
     _smartRunnerDeps.mapSourceToTests = mockMapSourceToTests;
     _smartRunnerDeps.importGrepFallback = mockImportGrepFallback;
     _smartRunnerDeps.buildSmartTestCommand = mockBuildSmartTestCommand;
+    _verifyDeps.regression = mockRegression as typeof _verifyDeps.regression;
     mockRegression.mockClear();
     mockGetChangedSourceFiles.mockClear();
     mockMapSourceToTests.mockClear();
@@ -170,6 +168,7 @@ describe("Verify Stage --- Smart Runner Integration", () => {
   afterEach(() => {
     resetLogger();
     Object.assign(_smartRunnerDeps, _origDeps);
+    Object.assign(_verifyDeps, _origVerifyDeps);
   });
   describe("AC1: uses scoped test command when smart runner finds test files", () => {