npm - @nathapp/nax - Versions diffs - 0.43.1 → 0.45.0 - Mend

@nathapp/nax 0.43.1 → 0.45.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/bin/nax.ts +22 -0
package/dist/nax.js +320 -88
package/package.json +1 -1
package/src/agents/acp/adapter.ts +98 -5
package/src/agents/claude-decompose.ts +6 -21
package/src/agents/types-extended.ts +1 -1
package/src/cli/plan.ts +4 -11
package/src/cli/status-features.ts +19 -0
package/src/config/test-strategy.ts +70 -0
package/src/execution/lifecycle/acceptance-loop.ts +2 -0
package/src/execution/lifecycle/run-setup.ts +4 -0
package/src/execution/parallel-coordinator.ts +3 -1
package/src/execution/parallel-executor.ts +3 -0
package/src/execution/runner-execution.ts +16 -2
package/src/execution/runner.ts +4 -0
package/src/execution/story-context.ts +6 -0
package/src/prd/schema.ts +4 -14
package/src/precheck/index.ts +155 -44
package/src/verification/rectification-loop.ts +18 -5

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@nathapp/nax",
-  "version": "0.43.1",
+  "version": "0.45.0",
   "description": "AI Coding Agent Orchestrator — loops until done",
   "type": "module",
   "bin": {

package/src/agents/acp/adapter.ts CHANGED Viewed

@@ -307,6 +307,88 @@ export async function readAcpSession(workdir: string, featureName: string, story
   }
 }
+// ─────────────────────────────────────────────────────────────────────────────
+// Session sweep — close open sessions at run boundaries
+// ─────────────────────────────────────────────────────────────────────────────
+const MAX_SESSION_AGE_MS = 2 * 60 * 60 * 1000; // 2 hours
+/**
+ * Close all open sessions tracked in the sidecar file for a feature.
+ * Called at run-end to ensure no sessions leak past the run boundary.
+ */
+export async function sweepFeatureSessions(workdir: string, featureName: string): Promise<void> {
+  const path = acpSessionsPath(workdir, featureName);
+  let sessions: Record<string, string>;
+  try {
+    const text = await Bun.file(path).text();
+    sessions = JSON.parse(text) as Record<string, string>;
+  } catch {
+    return; // No sidecar — nothing to sweep
+  }
+  const entries = Object.entries(sessions);
+  if (entries.length === 0) return;
+  const logger = getSafeLogger();
+  logger?.info("acp-adapter", `[sweep] Closing ${entries.length} open sessions for feature: ${featureName}`);
+  const cmdStr = "acpx claude";
+  const client = _acpAdapterDeps.createClient(cmdStr, workdir);
+  try {
+    await client.start();
+    for (const [, sessionName] of entries) {
+      try {
+        if (client.loadSession) {
+          const session = await client.loadSession(sessionName, "claude", "approve-reads");
+          if (session) {
+            await session.close().catch(() => {});
+          }
+        }
+      } catch (err) {
+        logger?.warn("acp-adapter", `[sweep] Failed to close session ${sessionName}`, { error: String(err) });
+      }
+    }
+  } finally {
+    await client.close().catch(() => {});
+  }
+  // Clear sidecar after sweep
+  try {
+    await Bun.write(path, JSON.stringify({}, null, 2));
+  } catch (err) {
+    logger?.warn("acp-adapter", "[sweep] Failed to clear sidecar after sweep", { error: String(err) });
+  }
+}
+/**
+ * Sweep stale sessions if the sidecar file is older than maxAgeMs.
+ * Called at startup as a safety net for sessions orphaned by crashes.
+ */
+export async function sweepStaleFeatureSessions(
+  workdir: string,
+  featureName: string,
+  maxAgeMs = MAX_SESSION_AGE_MS,
+): Promise<void> {
+  const path = acpSessionsPath(workdir, featureName);
+  const file = Bun.file(path);
+  if (!(await file.exists())) return;
+  const ageMs = Date.now() - file.lastModified;
+  if (ageMs < maxAgeMs) return; // Recent sidecar — skip
+  getSafeLogger()?.info(
+    "acp-adapter",
+    `[sweep] Sidecar is ${Math.round(ageMs / 60000)}m old — sweeping stale sessions`,
+    {
+      featureName,
+      ageMs,
+    },
+  );
+  await sweepFeatureSessions(workdir, featureName);
+}
 // ─────────────────────────────────────────────────────────────────────────────
 // Output helpers
 // ─────────────────────────────────────────────────────────────────────────────
@@ -470,6 +552,9 @@ export class AcpAgentAdapter implements AgentAdapter {
     let lastResponse: AcpSessionResponse | null = null;
     let timedOut = false;
+    // Tracks whether the run completed successfully — used by finally to decide
+    // whether to close the session (success) or keep it open for retry (failure).
+    const runState = { succeeded: false };
     const totalTokenUsage = { input_tokens: 0, output_tokens: 0 };
     try {
@@ -525,13 +610,21 @@ export class AcpAgentAdapter implements AgentAdapter {
       if (turnCount >= MAX_TURNS && options.interactionBridge) {
         getSafeLogger()?.warn("acp-adapter", "Reached max turns limit", { sessionName, maxTurns: MAX_TURNS });
       }
+      // Compute success here so finally can use it for conditional close.
+      runState.succeeded = !timedOut && lastResponse?.stopReason === "end_turn";
     } finally {
-      // 6. Cleanup — always close session and client, then clear sidecar
-      await closeAcpSession(session);
-      await client.close().catch(() => {});
-      if (options.featureName && options.storyId) {
-        await clearAcpSession(options.workdir, options.featureName, options.storyId);
+      // 6. Cleanup — close session and clear sidecar only on success.
+      // On failure, keep session open so retry can resume with full context.
+      if (runState.succeeded) {
+        await closeAcpSession(session);
+        if (options.featureName && options.storyId) {
+          await clearAcpSession(options.workdir, options.featureName, options.storyId);
+        }
+      } else {
+        getSafeLogger()?.info("acp-adapter", "Keeping session open for retry", { sessionName });
       }
+      await client.close().catch(() => {});
     }
     const durationMs = Date.now() - startTime;

package/src/agents/claude-decompose.ts CHANGED Viewed

@@ -5,6 +5,7 @@
  * parseDecomposeOutput(), validateComplexity()
  */
+import { COMPLEXITY_GUIDE, GROUPING_RULES, TEST_STRATEGY_GUIDE, resolveTestStrategy } from "../config/test-strategy";
 import type { DecomposeOptions, DecomposeResult, DecomposedStory } from "./types";
 /**
@@ -31,24 +32,13 @@ Decompose this spec into user stories. For each story, provide:
 9. reasoning: Why this complexity level
 10. estimatedLOC: Estimated lines of code to change
 11. risks: Array of implementation risks
-12. testStrategy: "three-session-tdd" | "test-after"
+12. testStrategy: "test-after" | "tdd-simple" | "three-session-tdd" | "three-session-tdd-lite"
-testStrategy rules:
-- "three-session-tdd": ONLY for complex/expert tasks that are security-critical (auth, encryption, tokens, credentials) or define public API contracts consumers depend on
-- "test-after": for all other tasks including simple/medium complexity
-- A "simple" complexity task should almost never be "three-session-tdd"
+${COMPLEXITY_GUIDE}
-Complexity classification rules:
-- simple: 1-3 files, <100 LOC, straightforward implementation, existing patterns
-- medium: 3-6 files, 100-300 LOC, moderate logic, some new patterns
-- complex: 6+ files, 300-800 LOC, architectural changes, cross-cutting concerns
-- expert: Security/crypto/real-time/distributed systems, >800 LOC, new infrastructure
+${TEST_STRATEGY_GUIDE}
-Grouping Guidelines:
-- Combine small, related tasks (e.g., multiple utility functions, interfaces) into a single "simple" or "medium" story.
-- Do NOT create separate stories for every single file or function unless complex.
-- Aim for coherent units of value (e.g., "Implement User Authentication" vs "Create User Interface", "Create Login Service").
-- Maximum recommended stories: 10-15 per feature. Group aggressively if list grows too long.
+${GROUPING_RULES}
 Consider:
 1. Does infrastructure exist? (e.g., "add caching" when no cache layer exists = complex)
@@ -141,12 +131,7 @@ export function parseDecomposeOutput(output: string): DecomposedStory[] {
       reasoning: String(record.reasoning || "No reasoning provided"),
       estimatedLOC: Number(record.estimatedLOC) || 0,
       risks: Array.isArray(record.risks) ? record.risks : [],
-      testStrategy:
-        record.testStrategy === "three-session-tdd"
-          ? "three-session-tdd"
-          : record.testStrategy === "test-after"
-            ? "test-after"
-            : undefined,
+      testStrategy: resolveTestStrategy(typeof record.testStrategy === "string" ? record.testStrategy : undefined),
     };
   });

package/src/agents/types-extended.ts CHANGED Viewed

@@ -117,7 +117,7 @@ export interface DecomposedStory {
   /** Implementation risks */
   risks: string[];
   /** Test strategy recommendation from LLM */
-  testStrategy?: "three-session-tdd" | "test-after";
+  testStrategy?: import("../config/test-strategy").TestStrategy;
 }
 /**

package/src/cli/plan.ts CHANGED Viewed

@@ -16,6 +16,7 @@ import { scanCodebase } from "../analyze/scanner";
 import type { CodebaseScan } from "../analyze/types";
 import type { NaxConfig } from "../config";
 import { resolvePermissions } from "../config/permissions";
+import { COMPLEXITY_GUIDE, GROUPING_RULES, TEST_STRATEGY_GUIDE } from "../config/test-strategy";
 import { PidRegistry } from "../execution/pid-registry";
 import { getLogger } from "../logger";
 import { validatePlanOutput } from "../prd/schema";
@@ -320,19 +321,11 @@ Generate a JSON object with this exact structure (no markdown, no explanation
   ]
 }
-## Complexity Classification Guide
+${COMPLEXITY_GUIDE}
-- simple: ≤50 LOC, single-file change, purely additive, no new dependencies → test-after
-- medium: 50–200 LOC, 2–5 files, standard patterns, clear requirements → tdd-simple
-- complex: 200–500 LOC, multiple modules, new abstractions or integrations → three-session-tdd
-- expert: 500+ LOC, architectural changes, cross-cutting concerns, high risk → three-session-tdd-lite
+${TEST_STRATEGY_GUIDE}
-## Test Strategy Guide
-- test-after: Simple changes with well-understood behavior. Write tests after implementation.
-- tdd-simple: Medium complexity. Write key tests first, implement, then fill coverage.
-- three-session-tdd: Complex stories. Full TDD cycle with separate test-writer and implementer sessions.
-- three-session-tdd-lite: Expert/high-risk stories. Full TDD with additional verifier session.
+${GROUPING_RULES}
 ${
   outputFilePath

package/src/cli/status-features.ts CHANGED Viewed

@@ -85,6 +85,17 @@ async function loadProjectStatusFile(projectDir: string): Promise<NaxStatusFile
 async function getFeatureSummary(featureName: string, featureDir: string): Promise<FeatureSummary> {
   const prdPath = join(featureDir, "prd.json");
+  // Guard: prd.json may not exist (e.g. plan failed before writing it)
+  if (!existsSync(prdPath)) {
+    return {
+      name: featureName,
+      done: 0,
+      failed: 0,
+      pending: 0,
+      total: 0,
+    };
+  }
   // Load PRD for story counts
   const prd = await loadPRD(prdPath);
   const counts = countStories(prd);
@@ -240,6 +251,14 @@ async function displayAllFeatures(projectDir: string): Promise<void> {
 /** Display single feature details */
 async function displayFeatureDetails(featureName: string, featureDir: string): Promise<void> {
   const prdPath = join(featureDir, "prd.json");
+  // Guard: prd.json may not exist (e.g. plan failed or feature just created)
+  if (!existsSync(prdPath)) {
+    console.log(chalk.bold(`\n📊 ${featureName}\n`));
+    console.log(chalk.dim(`No prd.json found. Run: nax plan -f ${featureName} --from <spec>`));
+    return;
+  }
   const prd = await loadPRD(prdPath);
   const counts = countStories(prd);

package/src/config/test-strategy.ts ADDED Viewed

@@ -0,0 +1,70 @@
+/**
+ * Test Strategy — Single Source of Truth
+ *
+ * Defines all valid test strategies, the normalizer, and shared prompt
+ * fragments used by plan.ts and claude-decompose.ts.
+ */
+import type { TestStrategy } from "./schema-types";
+// ─── Re-export type ───────────────────────────────────────────────────────────
+export type { TestStrategy };
+// ─── Valid values ─────────────────────────────────────────────────────────────
+export const VALID_TEST_STRATEGIES: readonly TestStrategy[] = [
+  "test-after",
+  "tdd-simple",
+  "three-session-tdd",
+  "three-session-tdd-lite",
+];
+// ─── Resolver ────────────────────────────────────────────────────────────────
+/**
+ * Validate and normalize a test strategy string.
+ * Returns a valid TestStrategy or falls back to "test-after".
+ */
+export function resolveTestStrategy(raw: string | undefined): TestStrategy {
+  if (!raw) return "test-after";
+  if (VALID_TEST_STRATEGIES.includes(raw as TestStrategy)) return raw as TestStrategy;
+  // Map legacy/typo values
+  if (raw === "tdd") return "tdd-simple";
+  if (raw === "three-session") return "three-session-tdd";
+  if (raw === "tdd-lite") return "three-session-tdd-lite";
+  return "test-after"; // safe fallback
+}
+// ─── Prompt fragments (shared by plan.ts and claude-decompose.ts) ────────────
+export const COMPLEXITY_GUIDE = `## Complexity Classification Guide
+- simple: ≤50 LOC, single-file change, purely additive, no new dependencies → test-after
+- medium: 50–200 LOC, 2–5 files, standard patterns, clear requirements → tdd-simple
+- complex: 200–500 LOC, multiple modules, new abstractions or integrations → three-session-tdd
+- expert: 500+ LOC, architectural changes, cross-cutting concerns, high risk → three-session-tdd-lite
+### Security Override
+Security-critical functions (authentication, cryptography, tokens, sessions, credentials,
+password hashing, access control) must be classified at MINIMUM "medium" complexity
+regardless of LOC count. These require at minimum "tdd-simple" test strategy.`;
+export const TEST_STRATEGY_GUIDE = `## Test Strategy Guide
+- test-after: Simple changes with well-understood behavior. Write tests after implementation.
+- tdd-simple: Medium complexity. Write key tests first, implement, then fill coverage.
+- three-session-tdd: Complex stories. Full TDD cycle with separate test-writer and implementer sessions.
+- three-session-tdd-lite: Expert/high-risk stories. Full TDD with additional verifier session.`;
+export const GROUPING_RULES = `## Grouping Rules
+- Combine small, related tasks into a single "simple" or "medium" story.
+- Do NOT create separate stories for every single file or function unless complex.
+- Do NOT create standalone stories purely for test coverage or testing.
+  Each story's testStrategy already handles testing (tdd-simple writes tests first,
+  three-session-tdd uses separate test-writer session, test-after writes tests after).
+  Only create a dedicated test story for unique integration/E2E test logic that spans
+  multiple stories and cannot be covered by individual story test strategies.
+- Aim for coherent units of value. Maximum recommended stories: 10-15 per feature.`;

package/src/execution/lifecycle/acceptance-loop.ts CHANGED Viewed

@@ -143,6 +143,7 @@ async function executeFixStory(
     hooks: ctx.hooks,
     plugins: ctx.pluginRegistry,
     storyStartTime: new Date().toISOString(),
+    agentGetFn: ctx.agentGetFn,
   };
   const result = await runPipeline(defaultPipeline, fixContext, ctx.eventEmitter);
   logger?.info("acceptance", `Fix story ${story.id} ${result.success ? "passed" : "failed"}`);
@@ -189,6 +190,7 @@ export async function runAcceptanceLoop(ctx: AcceptanceLoopContext): Promise<Acc
       featureDir: ctx.featureDir,
       hooks: ctx.hooks,
       plugins: ctx.pluginRegistry,
+      agentGetFn: ctx.agentGetFn,
     };
     const { acceptanceStage } = await import("../../pipeline/stages/acceptance");

package/src/execution/lifecycle/run-setup.ts CHANGED Viewed

@@ -159,6 +159,10 @@ export async function setupRun(options: RunSetupOptions): Promise<RunSetupResult
     logger?.warn("precheck", "Precheck validations skipped (--skip-precheck)");
   }
+  // Sweep stale ACP sessions from previous crashed runs (safety net)
+  const { sweepStaleFeatureSessions } = await import("../../agents/acp/adapter");
+  await sweepStaleFeatureSessions(workdir, feature).catch(() => {});
   // Acquire lock to prevent concurrent execution
   const lockAcquired = await acquireLock(workdir);
   if (!lockAcquired) {

package/src/execution/parallel-coordinator.ts CHANGED Viewed

@@ -8,7 +8,7 @@ import type { NaxConfig } from "../config";
 import type { LoadedHooksConfig } from "../hooks";
 import { getSafeLogger } from "../logger";
 import type { PipelineEventEmitter } from "../pipeline/events";
-import type { PipelineContext } from "../pipeline/types";
+import type { AgentGetFn } from "../pipeline/types";
 import type { PluginRegistry } from "../plugins/registry";
 import type { PRD, UserStory } from "../prd";
 import { markStoryFailed, markStoryPassed, savePRD } from "../prd";
@@ -108,6 +108,7 @@ export async function executeParallel(
   featureDir: string | undefined,
   parallel: number,
   eventEmitter?: PipelineEventEmitter,
+  agentGetFn?: AgentGetFn,
 ): Promise<{
   storiesCompleted: number;
   totalCost: number;
@@ -152,6 +153,7 @@ export async function executeParallel(
       hooks,
       plugins,
       storyStartTime: new Date().toISOString(),
+      agentGetFn,
     };
     // Create worktrees for all stories in batch

package/src/execution/parallel-executor.ts CHANGED Viewed

@@ -17,6 +17,7 @@ import { fireHook } from "../hooks";
 import { getSafeLogger } from "../logger";
 import type { StoryMetrics } from "../metrics";
 import type { PipelineEventEmitter } from "../pipeline/events";
+import type { AgentGetFn } from "../pipeline/types";
 import type { PluginRegistry } from "../plugins/registry";
 import type { PRD } from "../prd";
 import { countStories, isComplete } from "../prd";
@@ -57,6 +58,7 @@ export interface ParallelExecutorOptions {
   pluginRegistry: PluginRegistry;
   formatterMode: "quiet" | "normal" | "verbose" | "json";
   headless: boolean;
+  agentGetFn?: AgentGetFn;
 }
 export interface RectificationStats {
@@ -158,6 +160,7 @@ export async function runParallelExecution(
       featureDir,
       parallelCount,
       eventEmitter,
+      options.agentGetFn,
     );
     const batchDurationMs = Date.now() - batchStartMs;

package/src/execution/runner-execution.ts CHANGED Viewed

@@ -129,10 +129,24 @@ export async function runExecutionPhase(
   clearLlmCache();
   // PERF-1: Precompute batch plan once from ready stories
-  const batchPlan = options.useBatch ? precomputeBatchPlan(getAllReadyStories(prd), 4) : [];
+  const readyStories = getAllReadyStories(prd);
+  // BUG-068: debug log to diagnose unexpected storyCount in batch routing
+  logger?.debug("routing", "Ready stories for batch routing", {
+    readyCount: readyStories.length,
+    readyIds: readyStories.map((s) => s.id),
+    allStories: prd.userStories.map((s) => ({
+      id: s.id,
+      status: s.status,
+      passes: s.passes,
+      deps: s.dependencies,
+    })),
+  });
+  const batchPlan = options.useBatch ? precomputeBatchPlan(readyStories, 4) : [];
   if (options.useBatch) {
-    await tryLlmBatchRoute(options.config, getAllReadyStories(prd), "routing");
+    await tryLlmBatchRoute(options.config, readyStories, "routing");
   }
   // Parallel Execution Path (when --parallel is set)

package/src/execution/runner.ts CHANGED Viewed

@@ -13,6 +13,7 @@
  * - runner-completion.ts: Acceptance loop, hooks, metrics
  */
+import { sweepFeatureSessions } from "../agents/acp/adapter";
 import { createAgentRegistry } from "../agents/registry";
 import type { NaxConfig } from "../config";
 import type { LoadedHooksConfig } from "../hooks";
@@ -241,6 +242,9 @@ export async function run(options: RunOptions): Promise<RunResult> {
     // Cleanup crash handlers (MEM-1 fix)
     cleanupCrashHandlers();
+    // Sweep any remaining open ACP sessions for this feature
+    await sweepFeatureSessions(workdir, feature).catch(() => {});
     // Execute cleanup operations
     const { cleanupRun } = await import("./lifecycle/run-cleanup");
     await cleanupRun({

package/src/execution/story-context.ts CHANGED Viewed

@@ -175,6 +175,12 @@ export async function buildStoryContextFull(
 export function getAllReadyStories(prd: PRD): UserStory[] {
   const completedIds = new Set(prd.userStories.filter((s) => s.passes || s.status === "skipped").map((s) => s.id));
+  const logger = getSafeLogger();
+  logger?.debug("routing", "getAllReadyStories: completed set", {
+    completedIds: [...completedIds],
+    totalStories: prd.userStories.length,
+  });
   return prd.userStories.filter(
     (s) =>
       !s.passes &&

package/src/prd/schema.ts CHANGED Viewed

@@ -5,6 +5,7 @@
  */
 import type { Complexity, TestStrategy } from "../config";
+import { resolveTestStrategy } from "../config/test-strategy";
 import type { PRD, UserStory } from "./types";
 import { validateStoryId } from "./validate";
@@ -13,12 +14,6 @@ import { validateStoryId } from "./validate";
 // ---------------------------------------------------------------------------
 const VALID_COMPLEXITY: Complexity[] = ["simple", "medium", "complex", "expert"];
-const VALID_TEST_STRATEGIES: TestStrategy[] = [
-  "test-after",
-  "tdd-simple",
-  "three-session-tdd",
-  "three-session-tdd-lite",
-];
 /** Pattern matching ST001 → ST-001 style IDs (prefix letters + digits, no separator) */
 const STORY_ID_NO_SEPARATOR = /^([A-Za-z]+)(\d+)$/;
@@ -140,15 +135,10 @@ function validateStory(raw: unknown, index: number, allIds: Set<string>): UserSt
   }
   // testStrategy — accept from routing.testStrategy or top-level testStrategy
-  // Also map legacy/LLM-hallucinated aliases: tdd-lite → tdd-simple
   const rawTestStrategy = routing.testStrategy ?? s.testStrategy;
-  const STRATEGY_ALIASES: Record<string, TestStrategy> = { "tdd-lite": "three-session-tdd-lite" };
-  const normalizedStrategy =
-    typeof rawTestStrategy === "string" ? (STRATEGY_ALIASES[rawTestStrategy] ?? rawTestStrategy) : rawTestStrategy;
-  const testStrategy: TestStrategy =
-    normalizedStrategy !== undefined && (VALID_TEST_STRATEGIES as unknown[]).includes(normalizedStrategy)
-      ? (normalizedStrategy as TestStrategy)
-      : "tdd-simple";
+  const testStrategy: TestStrategy = resolveTestStrategy(
+    typeof rawTestStrategy === "string" ? rawTestStrategy : undefined,
+  );
   // dependencies
   const rawDeps = s.dependencies;