npm - @nathapp/nax - Versions diffs - 0.24.0 → 0.26.0 - Mend

@nathapp/nax 0.24.0 → 0.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

package/CLAUDE.md +70 -56
package/docs/ROADMAP.md +45 -15
package/docs/specs/trigger-completion.md +145 -0
package/nax/features/routing-persistence/prd.json +104 -0
package/nax/features/routing-persistence/progress.txt +1 -0
package/nax/features/trigger-completion/prd.json +150 -0
package/nax/features/trigger-completion/progress.txt +7 -0
package/nax/status.json +15 -16
package/package.json +1 -1
package/src/config/types.ts +3 -1
package/src/execution/crash-recovery.ts +11 -0
package/src/execution/executor-types.ts +1 -1
package/src/execution/iteration-runner.ts +1 -0
package/src/execution/lifecycle/run-setup.ts +4 -0
package/src/execution/sequential-executor.ts +45 -7
package/src/interaction/plugins/auto.ts +10 -1
package/src/metrics/aggregator.ts +2 -1
package/src/metrics/tracker.ts +26 -14
package/src/metrics/types.ts +2 -0
package/src/pipeline/event-bus.ts +14 -1
package/src/pipeline/stages/completion.ts +20 -0
package/src/pipeline/stages/execution.ts +62 -0
package/src/pipeline/stages/review.ts +25 -1
package/src/pipeline/stages/routing.ts +42 -8
package/src/pipeline/subscribers/hooks.ts +32 -0
package/src/pipeline/subscribers/interaction.ts +36 -1
package/src/pipeline/types.ts +2 -0
package/src/prd/types.ts +4 -0
package/src/routing/content-hash.ts +25 -0
package/src/routing/index.ts +3 -0
package/src/routing/router.ts +3 -2
package/src/routing/strategies/keyword.ts +2 -1
package/src/routing/strategies/llm-prompts.ts +29 -28
package/src/utils/git.ts +21 -0
package/test/integration/routing/plugin-routing-core.test.ts +1 -1
package/test/unit/execution/sequential-executor.test.ts +235 -0
package/test/unit/interaction/auto-plugin.test.ts +162 -0
package/test/unit/interaction-plugins.test.ts +308 -1
package/test/unit/metrics/aggregator.test.ts +164 -0
package/test/unit/metrics/tracker.test.ts +186 -0
package/test/unit/pipeline/stages/completion-review-gate.test.ts +218 -0
package/test/unit/pipeline/stages/execution-ambiguity.test.ts +311 -0
package/test/unit/pipeline/stages/execution-merge-conflict.test.ts +218 -0
package/test/unit/pipeline/stages/review.test.ts +201 -0
package/test/unit/pipeline/stages/routing-idempotence.test.ts +139 -0
package/test/unit/pipeline/stages/routing-initial-complexity.test.ts +321 -0
package/test/unit/pipeline/stages/routing-persistence.test.ts +380 -0
package/test/unit/pipeline/subscribers/hooks.test.ts +43 -4
package/test/unit/pipeline/subscribers/interaction.test.ts +284 -2
package/test/unit/prd-auto-default.test.ts +2 -2
package/test/unit/routing/content-hash.test.ts +99 -0
package/test/unit/routing/routing-stability.test.ts +1 -1
package/test/unit/routing-core.test.ts +5 -5
package/test/unit/routing-strategies.test.ts +1 -3
package/test/unit/utils/git.test.ts +50 -0

package/src/pipeline/stages/review.ts CHANGED Viewed

@@ -6,10 +6,12 @@
  * @returns
  * - `continue`: Review passed
  * - `escalate`: Built-in check failed (lint/typecheck) — autofix stage handles retry
- * - `fail`: Plugin reviewer hard-failed
+ * - `escalate`: Plugin reviewer failed and security-review trigger responded non-abort
+ * - `fail`: Plugin reviewer hard-failed (no trigger, or trigger responded abort)
  */
 // RE-ARCH: rewrite
+import { checkSecurityReview, isTriggerEnabled } from "../../interaction/triggers";
 import { getLogger } from "../../logger";
 import { reviewOrchestrator } from "../../review/orchestrator";
 import type { PipelineContext, PipelineStage, StageResult } from "../types";
@@ -29,6 +31,21 @@ export const reviewStage: PipelineStage = {
     if (!result.success) {
       if (result.pluginFailed) {
+        // security-review trigger: prompt before permanently failing
+        if (ctx.interaction && isTriggerEnabled("security-review", ctx.config)) {
+          const shouldContinue = await _reviewDeps.checkSecurityReview(
+            { featureName: ctx.prd.feature, storyId: ctx.story.id },
+            ctx.config,
+            ctx.interaction,
+          );
+          if (!shouldContinue) {
+            logger.error("review", `Plugin reviewer failed: ${result.failureReason}`, { storyId: ctx.story.id });
+            return { action: "fail", reason: `Review failed: ${result.failureReason}` };
+          }
+          logger.warn("review", "Security-review trigger escalated — retrying story", { storyId: ctx.story.id });
+          return { action: "escalate", reason: `Review failed: ${result.failureReason}` };
+        }
         logger.error("review", `Plugin reviewer failed: ${result.failureReason}`, { storyId: ctx.story.id });
         return { action: "fail", reason: `Review failed: ${result.failureReason}` };
       }
@@ -47,3 +64,10 @@ export const reviewStage: PipelineStage = {
     return { action: "continue" };
   },
 };
+/**
+ * Swappable dependencies for testing (avoids mock.module() which leaks in Bun 1.x).
+ */
+export const _reviewDeps = {
+  checkSecurityReview,
+};

package/src/pipeline/stages/routing.ts CHANGED Viewed

@@ -2,15 +2,18 @@
  * Routing Stage
  *
  * Classifies story complexity and determines model tier + test strategy.
- * Uses cached complexity/testStrategy/modelTier from story if available.
+ * Uses cached complexity/testStrategy/modelTier from story if contentHash matches.
  * modelTier: uses escalated tier if explicitly set (BUG-032), otherwise derives from config.
  *
+ * RRP-003: contentHash staleness detection — if story.routing.contentHash is missing or
+ * does not match the current story content, treats cached routing as a miss and re-classifies.
+ *
  * @returns
  * - `continue`: Routing determined, proceed to next stage
  *
  * @example
  * ```ts
- * // Story has cached routing with complexity
+ * // Story has cached routing with matching contentHash
  * await routingStage.execute(ctx);
  * // ctx.routing: { complexity: "simple", modelTier: "fast", testStrategy: "test-after", reasoning: "..." }
  * // modelTier is derived from current config.autoMode.complexityRouting
@@ -19,7 +22,8 @@
 import { isGreenfieldStory } from "../../context/greenfield";
 import { getLogger } from "../../logger";
-import { complexityToModelTier, routeStory } from "../../routing";
+import { savePRD } from "../../prd";
+import { complexityToModelTier, computeStoryContentHash, routeStory } from "../../routing";
 import { clearCache, routeBatch } from "../../routing/strategies/llm";
 import type { PipelineContext, PipelineStage, RoutingResult, StageResult } from "../types";
@@ -30,11 +34,25 @@ export const routingStage: PipelineStage = {
   async execute(ctx: PipelineContext): Promise<StageResult> {
     const logger = getLogger();
-    // If story has cached routing, use cached values (escalated modelTier takes priority)
-    // Otherwise, perform fresh classification
+    // Staleness detection (RRP-003):
+    // - story.routing absent                   → cache miss (no prior routing)
+    // - story.routing + no contentHash         → legacy cache hit (manual / pre-RRP-003 routing, honor as-is)
+    // - story.routing + contentHash matches    → cache hit
+    // - story.routing + contentHash mismatches → cache miss (stale, re-classify)
+    const hasExistingRouting = ctx.story.routing !== undefined;
+    const hasContentHash = ctx.story.routing?.contentHash !== undefined;
+    let currentHash: string | undefined;
+    let hashMatch = false;
+    if (hasContentHash) {
+      currentHash = _routingDeps.computeStoryContentHash(ctx.story);
+      hashMatch = ctx.story.routing?.contentHash === currentHash;
+    }
+    const isCacheHit = hasExistingRouting && (!hasContentHash || hashMatch);
     let routing: { complexity: string; testStrategy: string; modelTier: string; reasoning?: string };
-    if (ctx.story.routing) {
-      // Use cached complexity/testStrategy/modelTier
+    if (isCacheHit) {
+      // Cache hit: legacy routing (no contentHash) or matching contentHash — use cached values
       routing = await _routingDeps.routeStory(ctx.story, { config: ctx.config }, ctx.workdir, ctx.plugins);
       // Override with cached values only when they are actually set
       if (ctx.story.routing?.complexity) routing.complexity = ctx.story.routing.complexity;
@@ -50,8 +68,22 @@ export const routingStage: PipelineStage = {
         );
       }
     } else {
-      // Fresh classification
+      // Cache miss: no routing, or contentHash present but mismatched — fresh classification
       routing = await _routingDeps.routeStory(ctx.story, { config: ctx.config }, ctx.workdir, ctx.plugins);
+      // currentHash already computed if a mismatch was detected; compute now if starting fresh
+      currentHash = currentHash ?? _routingDeps.computeStoryContentHash(ctx.story);
+      ctx.story.routing = {
+        ...(ctx.story.routing ?? {}),
+        complexity: routing.complexity as import("../../config").Complexity,
+        initialComplexity:
+          ctx.story.routing?.initialComplexity ?? (routing.complexity as import("../../config").Complexity),
+        testStrategy: routing.testStrategy as import("../../config").TestStrategy,
+        reasoning: routing.reasoning ?? "",
+        contentHash: currentHash,
+      };
+      if (ctx.prdPath) {
+        await _routingDeps.savePRD(ctx.prd, ctx.prdPath);
+      }
     }
     // BUG-010: Greenfield detection — force test-after if no test files exist
@@ -97,4 +129,6 @@ export const _routingDeps = {
   complexityToModelTier,
   isGreenfieldStory,
   clearCache,
+  savePRD,
+  computeStoryContentHash,
 };

package/src/pipeline/subscribers/hooks.ts CHANGED Viewed

@@ -127,6 +127,38 @@ export function wireHooks(
     }),
   );
+  // run:resumed → on-resume
+  unsubs.push(
+    bus.on("run:resumed", (ev) => {
+      safe("on-resume", () => fireHook(hooks, "on-resume", hookCtx(feature, { status: "running" }), workdir));
+    }),
+  );
+  // story:completed → on-session-end (passed)
+  unsubs.push(
+    bus.on("story:completed", (ev) => {
+      safe("on-session-end (completed)", () =>
+        fireHook(hooks, "on-session-end", hookCtx(feature, { storyId: ev.storyId, status: "passed" }), workdir),
+      );
+    }),
+  );
+  // story:failed → on-session-end (failed)
+  unsubs.push(
+    bus.on("story:failed", (ev) => {
+      safe("on-session-end (failed)", () =>
+        fireHook(hooks, "on-session-end", hookCtx(feature, { storyId: ev.storyId, status: "failed" }), workdir),
+      );
+    }),
+  );
+  // run:errored → on-error
+  unsubs.push(
+    bus.on("run:errored", (ev) => {
+      safe("on-error", () => fireHook(hooks, "on-error", hookCtx(feature, { reason: ev.reason }), workdir));
+    }),
+  );
   return () => {
     for (const u of unsubs) u();
   };

package/src/pipeline/subscribers/interaction.ts CHANGED Viewed

@@ -19,7 +19,7 @@ import type { NaxConfig } from "../../config";
 import type { InteractionChain } from "../../interaction/chain";
 import { executeTrigger, isTriggerEnabled } from "../../interaction/triggers";
 import { getSafeLogger } from "../../logger";
-import type { PipelineEventBus } from "../event-bus";
+import type { PipelineEventBus, StoryFailedEvent } from "../event-bus";
 import type { UnsubscribeFn } from "./hooks";
 /**
@@ -62,6 +62,41 @@ export function wireInteraction(
     );
   }
+  // story:failed (countsTowardEscalation=true) → executeTrigger("max-retries")
+  if (interactionChain && isTriggerEnabled("max-retries", config)) {
+    unsubs.push(
+      bus.on("story:failed", (ev: StoryFailedEvent) => {
+        if (!ev.countsTowardEscalation) {
+          return;
+        }
+        executeTrigger(
+          "max-retries",
+          {
+            featureName: ev.feature ?? "",
+            storyId: ev.storyId,
+            iteration: ev.attempts ?? 0,
+          },
+          config,
+          interactionChain,
+        )
+          .then((response) => {
+            if (response.action === "abort") {
+              logger?.warn("interaction-subscriber", "max-retries abort requested", {
+                storyId: ev.storyId,
+              });
+            }
+          })
+          .catch((err) => {
+            logger?.warn("interaction-subscriber", "max-retries trigger failed", {
+              storyId: ev.storyId,
+              error: String(err),
+            });
+          });
+      }),
+    );
+  }
   return () => {
     for (const u of unsubs) u();
   };

package/src/pipeline/types.ts CHANGED Viewed

@@ -65,6 +65,8 @@ export interface PipelineContext {
   routing: RoutingResult;
   /** Working directory (project root) */
   workdir: string;
+  /** Absolute path to the prd.json file (used by routing stage to persist initial classification) */
+  prdPath?: string;
   /** Feature directory (optional, e.g., nax/features/my-feature/) */
   featureDir?: string;
   /** Hooks configuration */

package/src/prd/types.ts CHANGED Viewed

@@ -45,6 +45,10 @@ export interface StructuredFailure {
 /** Routing metadata per story */
 export interface StoryRouting {
   complexity: Complexity;
+  /** Initial complexity from first classification — written once, never overwritten by escalation */
+  initialComplexity?: Complexity;
+  /** Content hash of story fields at time of routing — used to detect stale cached routing (RRP-003) */
+  contentHash?: string;
   /** Model tier (derived at runtime from config, not persisted) */
   modelTier?: ModelTier;
   testStrategy: TestStrategy;

package/src/routing/content-hash.ts ADDED Viewed

@@ -0,0 +1,25 @@
+/**
+ * Story Content Hash
+ *
+ * Computes a deterministic hash of the story content fields used for routing.
+ * Used by the routing stage (RRP-003) to detect stale cached routing.
+ */
+import type { UserStory } from "../prd/types";
+/**
+ * Compute a deterministic hash of the story content fields used for routing.
+ * Hash input: title + "\0" + description + "\0" + acceptanceCriteria.join("") + "\0" + tags.join("")
+ *
+ * Null-byte separators between fields prevent cross-field collisions.
+ *
+ * @param story - The user story to hash
+ * @returns A hex string content hash
+ */
+export function computeStoryContentHash(story: UserStory): string {
+  const input = `${story.title}\0${story.description}\0${story.acceptanceCriteria.join("")}\0${story.tags.join("")}`;
+  const hasher = new Bun.CryptoHasher("sha256");
+  hasher.update(input);
+  return hasher.digest("hex");
+}

package/src/routing/index.ts CHANGED Viewed

@@ -15,3 +15,6 @@ export { keywordStrategy, llmStrategy, manualStrategy } from "./strategies";
 // Custom strategy loader
 export { loadCustomStrategy } from "./loader";
 export { tryLlmBatchRoute } from "./batch-route";
+// Content hash for staleness detection (RRP-003)
+export { computeStoryContentHash } from "./content-hash";

package/src/routing/router.ts CHANGED Viewed

@@ -152,7 +152,7 @@ const LITE_TAGS = ["ui", "layout", "cli", "integration", "polyglot"];
  * - 'auto'   → existing heuristic logic, plus:
  *              if tags include ui/layout/cli/integration/polyglot → three-session-tdd-lite
  *              if security/public-api/complex/expert → three-session-tdd
- *              otherwise → three-session-tdd-lite (test-after deprecated from auto mode)
+ *              simple → test-after, medium → three-session-tdd-lite (BUG-045)
  *
  * @param complexity - Pre-classified complexity level
  * @param title - Story title
@@ -201,7 +201,8 @@ export function determineTestStrategy(
     return hasLiteTag ? "three-session-tdd-lite" : "three-session-tdd";
   }
-  // Simple/medium → three-session-tdd-lite (FEAT-013: test-after deprecated from auto mode)
+  // BUG-045: simple → test-after (low overhead), medium → tdd-lite (sweet spot)
+  if (complexity === "simple") return "test-after";
   return "three-session-tdd-lite";
 }

package/src/routing/strategies/keyword.ts CHANGED Viewed

@@ -117,7 +117,8 @@ function determineTestStrategy(
     return "three-session-tdd";
   }
-  // FEAT-013: test-after deprecated from auto mode — use three-session-tdd-lite as default
+  // BUG-045: simple → test-after (low overhead), medium → tdd-lite (sweet spot)
+  if (complexity === "simple") return "test-after";
   return "three-session-tdd-lite";
 }

package/src/routing/strategies/llm-prompts.ts CHANGED Viewed

@@ -5,8 +5,9 @@
  * for LLM-based routing decisions.
  */
-import type { Complexity, ModelTier, NaxConfig, TestStrategy } from "../../config";
+import type { Complexity, ModelTier, NaxConfig, TddStrategy, TestStrategy } from "../../config";
 import type { UserStory } from "../../prd/types";
+import { determineTestStrategy } from "../router";
 import type { RoutingDecision } from "../strategy";
 /**
@@ -34,18 +35,13 @@ Tags: ${tags.join(", ")}
 - balanced: Standard features, moderate logic, straightforward tests. 30-90 min.
 - powerful: Complex architecture, security-critical, multi-file refactors, novel algorithms. >90 min.
-## Available Test Strategies
-- test-after: Write implementation first, add tests after. For straightforward work.
-- three-session-tdd: Separate test-writer → implementer → verifier sessions. For complex/critical work where test design matters.
 ## Rules
-- Default to the CHEAPEST option that will succeed.
-- three-session-tdd ONLY when: (a) security/auth logic, (b) complex algorithms, (c) public API contracts that consumers depend on.
-- Simple barrel exports, re-exports, or index files are ALWAYS test-after + fast, regardless of keywords.
+- Default to the CHEAPEST tier that will succeed.
+- Simple barrel exports, re-exports, or index files are ALWAYS simple + fast.
 - A story touching many files doesn't automatically mean complex — copy-paste refactors are simple.
 Respond with ONLY this JSON (no markdown, no explanation):
-{"complexity":"simple|medium|complex|expert","modelTier":"fast|balanced|powerful","testStrategy":"test-after|three-session-tdd","reasoning":"<one line>"}`;
+{"complexity":"simple|medium|complex|expert","modelTier":"fast|balanced|powerful","reasoning":"<one line>"}`;
 }
 /**
@@ -77,18 +73,13 @@ ${storyBlocks}
 - balanced: Standard features, moderate logic, straightforward tests. 30-90 min.
 - powerful: Complex architecture, security-critical, multi-file refactors, novel algorithms. >90 min.
-## Available Test Strategies
-- test-after: Write implementation first, add tests after. For straightforward work.
-- three-session-tdd: Separate test-writer → implementer → verifier sessions. For complex/critical work where test design matters.
 ## Rules
-- Default to the CHEAPEST option that will succeed.
-- three-session-tdd ONLY when: (a) security/auth logic, (b) complex algorithms, (c) public API contracts that consumers depend on.
-- Simple barrel exports, re-exports, or index files are ALWAYS test-after + fast, regardless of keywords.
+- Default to the CHEAPEST tier that will succeed.
+- Simple barrel exports, re-exports, or index files are ALWAYS simple + fast.
 - A story touching many files doesn't automatically mean complex — copy-paste refactors are simple.
 Respond with ONLY a JSON array (no markdown, no explanation):
-[{"id":"US-001","complexity":"simple|medium|complex|expert","modelTier":"fast|balanced|powerful","testStrategy":"test-after|three-session-tdd","reasoning":"<one line>"}]`;
+[{"id":"US-001","complexity":"simple|medium|complex|expert","modelTier":"fast|balanced|powerful","reasoning":"<one line>"}]`;
 }
 /**
@@ -99,33 +90,43 @@ Respond with ONLY a JSON array (no markdown, no explanation):
  * @returns Validated routing decision
  * @throws Error if validation fails
  */
-export function validateRoutingDecision(parsed: Record<string, unknown>, config: NaxConfig): RoutingDecision {
-  // Validate required fields
-  if (!parsed.complexity || !parsed.modelTier || !parsed.testStrategy || !parsed.reasoning) {
+export function validateRoutingDecision(
+  parsed: Record<string, unknown>,
+  config: NaxConfig,
+  story?: UserStory,
+): RoutingDecision {
+  // Validate required fields (testStrategy no longer required from LLM — derived via BUG-045)
+  if (!parsed.complexity || !parsed.modelTier || !parsed.reasoning) {
     throw new Error(`Missing required fields in LLM response: ${JSON.stringify(parsed)}`);
   }
   // Validate field values
   const validComplexities: Complexity[] = ["simple", "medium", "complex", "expert"];
-  const validTestStrategies: TestStrategy[] = ["test-after", "three-session-tdd"];
   if (!validComplexities.includes(parsed.complexity as Complexity)) {
     throw new Error(`Invalid complexity: ${parsed.complexity}`);
   }
-  if (!validTestStrategies.includes(parsed.testStrategy as TestStrategy)) {
-    throw new Error(`Invalid testStrategy: ${parsed.testStrategy}`);
-  }
   // Validate modelTier exists in config
   if (!config.models[parsed.modelTier as string]) {
     throw new Error(`Invalid modelTier: ${parsed.modelTier} (not in config.models)`);
   }
+  // BUG-045: Derive testStrategy from determineTestStrategy() — single source of truth.
+  // LLM decides complexity; testStrategy is a policy decision, not a judgment call.
+  const tddStrategy: TddStrategy = config.tdd?.strategy ?? "auto";
+  const testStrategy = determineTestStrategy(
+    parsed.complexity as Complexity,
+    story?.title ?? "",
+    story?.description ?? "",
+    story?.tags ?? [],
+    tddStrategy,
+  );
   return {
     complexity: parsed.complexity as Complexity,
     modelTier: parsed.modelTier as ModelTier,
-    testStrategy: parsed.testStrategy as TestStrategy,
+    testStrategy,
     reasoning: parsed.reasoning as string,
   };
 }
@@ -155,7 +156,7 @@ export function stripCodeFences(text: string): string {
 export function parseRoutingResponse(output: string, story: UserStory, config: NaxConfig): RoutingDecision {
   const jsonText = stripCodeFences(output);
   const parsed = JSON.parse(jsonText);
-  return validateRoutingDecision(parsed, config);
+  return validateRoutingDecision(parsed, config, story);
 }
 /**
@@ -201,7 +202,7 @@ export function parseBatchResponse(
     }
     // Validate entry directly (no re-serialization needed)
-    const decision = validateRoutingDecision(entry, config);
+    const decision = validateRoutingDecision(entry, config, story);
     decisions.set(entry.id, decision);
   }

package/src/utils/git.ts CHANGED Viewed

@@ -105,3 +105,24 @@ export async function hasCommitsForStory(workdir: string, storyId: string, maxCo
     return false;
   }
 }
+/**
+ * Detect if git operation output contains merge conflict markers.
+ *
+ * Git outputs "CONFLICT" in uppercase for merge/rebase conflicts.
+ * Also checks lowercase "conflict" for edge cases.
+ *
+ * @param output - Combined stdout/stderr output from a git operation
+ * @returns true if output contains CONFLICT markers
+ *
+ * @example
+ * ```typescript
+ * const hasConflict = detectMergeConflict(agentOutput);
+ * if (hasConflict) {
+ *   // fire merge-conflict trigger
+ * }
+ * ```
+ */
+export function detectMergeConflict(output: string): boolean {
+  return output.includes("CONFLICT") || output.includes("conflict");
+}

package/test/integration/routing/plugin-routing-core.test.ts CHANGED Viewed

@@ -318,7 +318,7 @@ describe("Plugin router fallback to built-in strategy", () => {
     // Keyword strategy decision (not from plugin)
     expect(decision.complexity).toBe("simple");
     expect(decision.modelTier).toBe("fast");
-    expect(decision.testStrategy).toBe("three-session-tdd-lite");
+    expect(decision.testStrategy).toBe("test-after");
   });
   test("keyword strategy handles complex story when plugins return null", async () => {