npm - @nathapp/nax - Versions diffs - 0.24.0 → 0.25.0 - Mend

@nathapp/nax 0.24.0 → 0.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

package/docs/ROADMAP.md +33 -15
package/docs/specs/trigger-completion.md +145 -0
package/nax/features/trigger-completion/prd.json +150 -0
package/nax/features/trigger-completion/progress.txt +7 -0
package/nax/status.json +14 -24
package/package.json +1 -1
package/src/config/types.ts +3 -1
package/src/execution/crash-recovery.ts +11 -0
package/src/execution/executor-types.ts +1 -1
package/src/execution/lifecycle/run-setup.ts +4 -0
package/src/execution/sequential-executor.ts +45 -7
package/src/interaction/plugins/auto.ts +10 -1
package/src/pipeline/event-bus.ts +14 -1
package/src/pipeline/stages/completion.ts +20 -0
package/src/pipeline/stages/execution.ts +62 -0
package/src/pipeline/stages/review.ts +25 -1
package/src/pipeline/subscribers/hooks.ts +32 -0
package/src/pipeline/subscribers/interaction.ts +36 -1
package/src/routing/router.ts +3 -2
package/src/routing/strategies/keyword.ts +2 -1
package/src/routing/strategies/llm-prompts.ts +29 -28
package/src/utils/git.ts +21 -0
package/test/integration/routing/plugin-routing-core.test.ts +1 -1
package/test/unit/execution/sequential-executor.test.ts +235 -0
package/test/unit/interaction/auto-plugin.test.ts +162 -0
package/test/unit/interaction-plugins.test.ts +308 -1
package/test/unit/pipeline/stages/completion-review-gate.test.ts +218 -0
package/test/unit/pipeline/stages/execution-ambiguity.test.ts +311 -0
package/test/unit/pipeline/stages/execution-merge-conflict.test.ts +218 -0
package/test/unit/pipeline/stages/review.test.ts +201 -0
package/test/unit/pipeline/subscribers/hooks.test.ts +43 -4
package/test/unit/pipeline/subscribers/interaction.test.ts +284 -2
package/test/unit/prd-auto-default.test.ts +2 -2
package/test/unit/routing/routing-stability.test.ts +1 -1
package/test/unit/routing-core.test.ts +5 -5
package/test/unit/routing-strategies.test.ts +1 -3
package/test/unit/utils/git.test.ts +50 -0

package/src/interaction/plugins/auto.ts CHANGED Viewed

@@ -38,6 +38,14 @@ interface DecisionResponse {
   reasoning: string;
 }
+/**
+ * Module-level deps for testability (_deps pattern).
+ * Override callLlm in tests to avoid spawning the claude CLI.
+ */
+export const _deps = {
+  callLlm: null as ((request: InteractionRequest) => Promise<DecisionResponse>) | null,
+};
 /**
  * Auto plugin for AI-powered interaction responses
  */
@@ -80,7 +88,8 @@ export class AutoInteractionPlugin implements InteractionPlugin {
     }
     try {
-      const decision = await this.callLlm(request);
+      const callFn = _deps.callLlm ?? this.callLlm.bind(this);
+      const decision = await callFn(request);
       // Check confidence threshold
       if (decision.confidence < (this.config.confidenceThreshold ?? 0.7)) {

package/src/pipeline/event-bus.ts CHANGED Viewed

@@ -135,6 +135,17 @@ export interface StoryPausedEvent {
   cost: number;
 }
+export interface RunResumedEvent {
+  type: "run:resumed";
+  feature: string;
+}
+export interface RunErroredEvent {
+  type: "run:errored";
+  reason: string;
+  feature?: string;
+}
 /** Discriminated union of all pipeline events. */
 export type PipelineEvent =
   | StoryStartedEvent
@@ -150,7 +161,9 @@ export type PipelineEvent =
   | HumanReviewRequestedEvent
   | RunStartedEvent
   | RunPausedEvent
-  | StoryPausedEvent;
+  | StoryPausedEvent
+  | RunResumedEvent
+  | RunErroredEvent;
 export type PipelineEventType = PipelineEvent["type"];

package/src/pipeline/stages/completion.ts CHANGED Viewed

@@ -13,6 +13,7 @@
  */
 import { appendProgress } from "../../execution/progress";
+import { checkReviewGate, isTriggerEnabled } from "../../interaction/triggers";
 import { getLogger } from "../../logger";
 import { collectBatchMetrics, collectStoryMetrics } from "../../metrics";
 import { countStories, markStoryPassed, savePRD } from "../../prd";
@@ -72,6 +73,18 @@ export const completionStage: PipelineStage = {
         modelTier: ctx.routing?.modelTier,
         testStrategy: ctx.routing?.testStrategy,
       });
+      // review-gate trigger: check if story needs re-review after passing
+      if (ctx.interaction && isTriggerEnabled("review-gate", ctx.config)) {
+        const shouldContinue = await _completionDeps.checkReviewGate(
+          { featureName: ctx.prd.feature, storyId: completedStory.id },
+          ctx.config,
+          ctx.interaction,
+        );
+        if (!shouldContinue) {
+          logger.warn("completion", "Story marked for re-review", { storyId: completedStory.id });
+        }
+      }
     }
     // Save PRD
@@ -89,3 +102,10 @@ export const completionStage: PipelineStage = {
     return { action: "continue" };
   },
 };
+/**
+ * Swappable dependencies for testing (avoids mock.module() which leaks in Bun 1.x).
+ */
+export const _completionDeps = {
+  checkReviewGate,
+};

package/src/pipeline/stages/execution.ts CHANGED Viewed

@@ -32,11 +32,33 @@
 import { getAgent, validateAgentForTier } from "../../agents";
 import { resolveModel } from "../../config";
+import { checkMergeConflict, checkStoryAmbiguity, isTriggerEnabled } from "../../interaction/triggers";
 import { getLogger } from "../../logger";
 import type { FailureCategory } from "../../tdd";
 import { runThreeSessionTdd } from "../../tdd";
+import { detectMergeConflict } from "../../utils/git";
 import type { PipelineContext, PipelineStage, StageResult } from "../types";
+/**
+ * Detect if agent output contains ambiguity signals
+ * Checks for keywords that indicate the agent is unsure about the implementation
+ */
+export function isAmbiguousOutput(output: string): boolean {
+  if (!output) return false;
+  const ambiguityKeywords = [
+    "unclear",
+    "ambiguous",
+    "need clarification",
+    "please clarify",
+    "which one",
+    "not sure which",
+  ];
+  const lowerOutput = output.toLowerCase();
+  return ambiguityKeywords.some((keyword) => lowerOutput.includes(keyword));
+}
 /**
  * Determine the pipeline action for a failed TDD result, based on its failureCategory.
  *
@@ -172,6 +194,42 @@ export const executionStage: PipelineStage = {
     ctx.agentResult = result;
+    // merge-conflict trigger: detect CONFLICT markers in agent output
+    const combinedOutput = (result.output ?? "") + (result.stderr ?? "");
+    if (
+      _executionDeps.detectMergeConflict(combinedOutput) &&
+      ctx.interaction &&
+      isTriggerEnabled("merge-conflict", ctx.config)
+    ) {
+      const shouldProceed = await _executionDeps.checkMergeConflict(
+        { featureName: ctx.prd.feature, storyId: ctx.story.id },
+        ctx.config,
+        ctx.interaction,
+      );
+      if (!shouldProceed) {
+        logger.error("execution", "Merge conflict detected — aborting story", { storyId: ctx.story.id });
+        return { action: "fail", reason: "Merge conflict detected" };
+      }
+    }
+    // story-ambiguity trigger: detect ambiguity signals in agent output
+    if (
+      result.success &&
+      _executionDeps.isAmbiguousOutput(combinedOutput) &&
+      ctx.interaction &&
+      isTriggerEnabled("story-ambiguity", ctx.config)
+    ) {
+      const shouldContinue = await _executionDeps.checkStoryAmbiguity(
+        { featureName: ctx.prd.feature, storyId: ctx.story.id, reason: "Agent output suggests ambiguity" },
+        ctx.config,
+        ctx.interaction,
+      );
+      if (!shouldContinue) {
+        logger.warn("execution", "Story ambiguity detected — escalating story", { storyId: ctx.story.id });
+        return { action: "escalate", reason: "Story ambiguity detected — needs clarification" };
+      }
+    }
     if (!result.success) {
       logger.error("execution", "Agent session failed", {
         exitCode: result.exitCode,
@@ -199,4 +257,8 @@ export const executionStage: PipelineStage = {
 export const _executionDeps = {
   getAgent,
   validateAgentForTier,
+  detectMergeConflict,
+  checkMergeConflict,
+  isAmbiguousOutput,
+  checkStoryAmbiguity,
 };

package/src/pipeline/stages/review.ts CHANGED Viewed

@@ -6,10 +6,12 @@
  * @returns
  * - `continue`: Review passed
  * - `escalate`: Built-in check failed (lint/typecheck) — autofix stage handles retry
- * - `fail`: Plugin reviewer hard-failed
+ * - `escalate`: Plugin reviewer failed and security-review trigger responded non-abort
+ * - `fail`: Plugin reviewer hard-failed (no trigger, or trigger responded abort)
  */
 // RE-ARCH: rewrite
+import { checkSecurityReview, isTriggerEnabled } from "../../interaction/triggers";
 import { getLogger } from "../../logger";
 import { reviewOrchestrator } from "../../review/orchestrator";
 import type { PipelineContext, PipelineStage, StageResult } from "../types";
@@ -29,6 +31,21 @@ export const reviewStage: PipelineStage = {
     if (!result.success) {
       if (result.pluginFailed) {
+        // security-review trigger: prompt before permanently failing
+        if (ctx.interaction && isTriggerEnabled("security-review", ctx.config)) {
+          const shouldContinue = await _reviewDeps.checkSecurityReview(
+            { featureName: ctx.prd.feature, storyId: ctx.story.id },
+            ctx.config,
+            ctx.interaction,
+          );
+          if (!shouldContinue) {
+            logger.error("review", `Plugin reviewer failed: ${result.failureReason}`, { storyId: ctx.story.id });
+            return { action: "fail", reason: `Review failed: ${result.failureReason}` };
+          }
+          logger.warn("review", "Security-review trigger escalated — retrying story", { storyId: ctx.story.id });
+          return { action: "escalate", reason: `Review failed: ${result.failureReason}` };
+        }
         logger.error("review", `Plugin reviewer failed: ${result.failureReason}`, { storyId: ctx.story.id });
         return { action: "fail", reason: `Review failed: ${result.failureReason}` };
       }
@@ -47,3 +64,10 @@ export const reviewStage: PipelineStage = {
     return { action: "continue" };
   },
 };
+/**
+ * Swappable dependencies for testing (avoids mock.module() which leaks in Bun 1.x).
+ */
+export const _reviewDeps = {
+  checkSecurityReview,
+};

package/src/pipeline/subscribers/hooks.ts CHANGED Viewed

@@ -127,6 +127,38 @@ export function wireHooks(
     }),
   );
+  // run:resumed → on-resume
+  unsubs.push(
+    bus.on("run:resumed", (ev) => {
+      safe("on-resume", () => fireHook(hooks, "on-resume", hookCtx(feature, { status: "running" }), workdir));
+    }),
+  );
+  // story:completed → on-session-end (passed)
+  unsubs.push(
+    bus.on("story:completed", (ev) => {
+      safe("on-session-end (completed)", () =>
+        fireHook(hooks, "on-session-end", hookCtx(feature, { storyId: ev.storyId, status: "passed" }), workdir),
+      );
+    }),
+  );
+  // story:failed → on-session-end (failed)
+  unsubs.push(
+    bus.on("story:failed", (ev) => {
+      safe("on-session-end (failed)", () =>
+        fireHook(hooks, "on-session-end", hookCtx(feature, { storyId: ev.storyId, status: "failed" }), workdir),
+      );
+    }),
+  );
+  // run:errored → on-error
+  unsubs.push(
+    bus.on("run:errored", (ev) => {
+      safe("on-error", () => fireHook(hooks, "on-error", hookCtx(feature, { reason: ev.reason }), workdir));
+    }),
+  );
   return () => {
     for (const u of unsubs) u();
   };

package/src/pipeline/subscribers/interaction.ts CHANGED Viewed

@@ -19,7 +19,7 @@ import type { NaxConfig } from "../../config";
 import type { InteractionChain } from "../../interaction/chain";
 import { executeTrigger, isTriggerEnabled } from "../../interaction/triggers";
 import { getSafeLogger } from "../../logger";
-import type { PipelineEventBus } from "../event-bus";
+import type { PipelineEventBus, StoryFailedEvent } from "../event-bus";
 import type { UnsubscribeFn } from "./hooks";
 /**
@@ -62,6 +62,41 @@ export function wireInteraction(
     );
   }
+  // story:failed (countsTowardEscalation=true) → executeTrigger("max-retries")
+  if (interactionChain && isTriggerEnabled("max-retries", config)) {
+    unsubs.push(
+      bus.on("story:failed", (ev: StoryFailedEvent) => {
+        if (!ev.countsTowardEscalation) {
+          return;
+        }
+        executeTrigger(
+          "max-retries",
+          {
+            featureName: ev.feature ?? "",
+            storyId: ev.storyId,
+            iteration: ev.attempts ?? 0,
+          },
+          config,
+          interactionChain,
+        )
+          .then((response) => {
+            if (response.action === "abort") {
+              logger?.warn("interaction-subscriber", "max-retries abort requested", {
+                storyId: ev.storyId,
+              });
+            }
+          })
+          .catch((err) => {
+            logger?.warn("interaction-subscriber", "max-retries trigger failed", {
+              storyId: ev.storyId,
+              error: String(err),
+            });
+          });
+      }),
+    );
+  }
   return () => {
     for (const u of unsubs) u();
   };

package/src/routing/router.ts CHANGED Viewed

@@ -152,7 +152,7 @@ const LITE_TAGS = ["ui", "layout", "cli", "integration", "polyglot"];
  * - 'auto'   → existing heuristic logic, plus:
  *              if tags include ui/layout/cli/integration/polyglot → three-session-tdd-lite
  *              if security/public-api/complex/expert → three-session-tdd
- *              otherwise → three-session-tdd-lite (test-after deprecated from auto mode)
+ *              simple → test-after, medium → three-session-tdd-lite (BUG-045)
  *
  * @param complexity - Pre-classified complexity level
  * @param title - Story title
@@ -201,7 +201,8 @@ export function determineTestStrategy(
     return hasLiteTag ? "three-session-tdd-lite" : "three-session-tdd";
   }
-  // Simple/medium → three-session-tdd-lite (FEAT-013: test-after deprecated from auto mode)
+  // BUG-045: simple → test-after (low overhead), medium → tdd-lite (sweet spot)
+  if (complexity === "simple") return "test-after";
   return "three-session-tdd-lite";
 }

package/src/routing/strategies/keyword.ts CHANGED Viewed

@@ -117,7 +117,8 @@ function determineTestStrategy(
     return "three-session-tdd";
   }
-  // FEAT-013: test-after deprecated from auto mode — use three-session-tdd-lite as default
+  // BUG-045: simple → test-after (low overhead), medium → tdd-lite (sweet spot)
+  if (complexity === "simple") return "test-after";
   return "three-session-tdd-lite";
 }

package/src/routing/strategies/llm-prompts.ts CHANGED Viewed

@@ -5,8 +5,9 @@
  * for LLM-based routing decisions.
  */
-import type { Complexity, ModelTier, NaxConfig, TestStrategy } from "../../config";
+import type { Complexity, ModelTier, NaxConfig, TddStrategy, TestStrategy } from "../../config";
 import type { UserStory } from "../../prd/types";
+import { determineTestStrategy } from "../router";
 import type { RoutingDecision } from "../strategy";
 /**
@@ -34,18 +35,13 @@ Tags: ${tags.join(", ")}
 - balanced: Standard features, moderate logic, straightforward tests. 30-90 min.
 - powerful: Complex architecture, security-critical, multi-file refactors, novel algorithms. >90 min.
-## Available Test Strategies
-- test-after: Write implementation first, add tests after. For straightforward work.
-- three-session-tdd: Separate test-writer → implementer → verifier sessions. For complex/critical work where test design matters.
 ## Rules
-- Default to the CHEAPEST option that will succeed.
-- three-session-tdd ONLY when: (a) security/auth logic, (b) complex algorithms, (c) public API contracts that consumers depend on.
-- Simple barrel exports, re-exports, or index files are ALWAYS test-after + fast, regardless of keywords.
+- Default to the CHEAPEST tier that will succeed.
+- Simple barrel exports, re-exports, or index files are ALWAYS simple + fast.
 - A story touching many files doesn't automatically mean complex — copy-paste refactors are simple.
 Respond with ONLY this JSON (no markdown, no explanation):
-{"complexity":"simple|medium|complex|expert","modelTier":"fast|balanced|powerful","testStrategy":"test-after|three-session-tdd","reasoning":"<one line>"}`;
+{"complexity":"simple|medium|complex|expert","modelTier":"fast|balanced|powerful","reasoning":"<one line>"}`;
 }
 /**
@@ -77,18 +73,13 @@ ${storyBlocks}
 - balanced: Standard features, moderate logic, straightforward tests. 30-90 min.
 - powerful: Complex architecture, security-critical, multi-file refactors, novel algorithms. >90 min.
-## Available Test Strategies
-- test-after: Write implementation first, add tests after. For straightforward work.
-- three-session-tdd: Separate test-writer → implementer → verifier sessions. For complex/critical work where test design matters.
 ## Rules
-- Default to the CHEAPEST option that will succeed.
-- three-session-tdd ONLY when: (a) security/auth logic, (b) complex algorithms, (c) public API contracts that consumers depend on.
-- Simple barrel exports, re-exports, or index files are ALWAYS test-after + fast, regardless of keywords.
+- Default to the CHEAPEST tier that will succeed.
+- Simple barrel exports, re-exports, or index files are ALWAYS simple + fast.
 - A story touching many files doesn't automatically mean complex — copy-paste refactors are simple.
 Respond with ONLY a JSON array (no markdown, no explanation):
-[{"id":"US-001","complexity":"simple|medium|complex|expert","modelTier":"fast|balanced|powerful","testStrategy":"test-after|three-session-tdd","reasoning":"<one line>"}]`;
+[{"id":"US-001","complexity":"simple|medium|complex|expert","modelTier":"fast|balanced|powerful","reasoning":"<one line>"}]`;
 }
 /**
@@ -99,33 +90,43 @@ Respond with ONLY a JSON array (no markdown, no explanation):
  * @returns Validated routing decision
  * @throws Error if validation fails
  */
-export function validateRoutingDecision(parsed: Record<string, unknown>, config: NaxConfig): RoutingDecision {
-  // Validate required fields
-  if (!parsed.complexity || !parsed.modelTier || !parsed.testStrategy || !parsed.reasoning) {
+export function validateRoutingDecision(
+  parsed: Record<string, unknown>,
+  config: NaxConfig,
+  story?: UserStory,
+): RoutingDecision {
+  // Validate required fields (testStrategy no longer required from LLM — derived via BUG-045)
+  if (!parsed.complexity || !parsed.modelTier || !parsed.reasoning) {
     throw new Error(`Missing required fields in LLM response: ${JSON.stringify(parsed)}`);
   }
   // Validate field values
   const validComplexities: Complexity[] = ["simple", "medium", "complex", "expert"];
-  const validTestStrategies: TestStrategy[] = ["test-after", "three-session-tdd"];
   if (!validComplexities.includes(parsed.complexity as Complexity)) {
     throw new Error(`Invalid complexity: ${parsed.complexity}`);
   }
-  if (!validTestStrategies.includes(parsed.testStrategy as TestStrategy)) {
-    throw new Error(`Invalid testStrategy: ${parsed.testStrategy}`);
-  }
   // Validate modelTier exists in config
   if (!config.models[parsed.modelTier as string]) {
     throw new Error(`Invalid modelTier: ${parsed.modelTier} (not in config.models)`);
   }
+  // BUG-045: Derive testStrategy from determineTestStrategy() — single source of truth.
+  // LLM decides complexity; testStrategy is a policy decision, not a judgment call.
+  const tddStrategy: TddStrategy = config.tdd?.strategy ?? "auto";
+  const testStrategy = determineTestStrategy(
+    parsed.complexity as Complexity,
+    story?.title ?? "",
+    story?.description ?? "",
+    story?.tags ?? [],
+    tddStrategy,
+  );
   return {
     complexity: parsed.complexity as Complexity,
     modelTier: parsed.modelTier as ModelTier,
-    testStrategy: parsed.testStrategy as TestStrategy,
+    testStrategy,
     reasoning: parsed.reasoning as string,
   };
 }
@@ -155,7 +156,7 @@ export function stripCodeFences(text: string): string {
 export function parseRoutingResponse(output: string, story: UserStory, config: NaxConfig): RoutingDecision {
   const jsonText = stripCodeFences(output);
   const parsed = JSON.parse(jsonText);
-  return validateRoutingDecision(parsed, config);
+  return validateRoutingDecision(parsed, config, story);
 }
 /**
@@ -201,7 +202,7 @@ export function parseBatchResponse(
     }
     // Validate entry directly (no re-serialization needed)
-    const decision = validateRoutingDecision(entry, config);
+    const decision = validateRoutingDecision(entry, config, story);
     decisions.set(entry.id, decision);
   }

package/src/utils/git.ts CHANGED Viewed

@@ -105,3 +105,24 @@ export async function hasCommitsForStory(workdir: string, storyId: string, maxCo
     return false;
   }
 }
+/**
+ * Detect if git operation output contains merge conflict markers.
+ *
+ * Git outputs "CONFLICT" in uppercase for merge/rebase conflicts.
+ * Also checks lowercase "conflict" for edge cases.
+ *
+ * @param output - Combined stdout/stderr output from a git operation
+ * @returns true if output contains CONFLICT markers
+ *
+ * @example
+ * ```typescript
+ * const hasConflict = detectMergeConflict(agentOutput);
+ * if (hasConflict) {
+ *   // fire merge-conflict trigger
+ * }
+ * ```
+ */
+export function detectMergeConflict(output: string): boolean {
+  return output.includes("CONFLICT") || output.includes("conflict");
+}

package/test/integration/routing/plugin-routing-core.test.ts CHANGED Viewed

@@ -318,7 +318,7 @@ describe("Plugin router fallback to built-in strategy", () => {
     // Keyword strategy decision (not from plugin)
     expect(decision.complexity).toBe("simple");
     expect(decision.modelTier).toBe("fast");
-    expect(decision.testStrategy).toBe("three-session-tdd-lite");
+    expect(decision.testStrategy).toBe("test-after");
   });
   test("keyword strategy handles complex story when plugins return null", async () => {