@nathapp/nax 0.44.0 → 0.46.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/CHANGELOG.md +19 -0
  2. package/bin/nax.ts +7 -6
  3. package/dist/nax.js +266 -161
  4. package/package.json +1 -1
  5. package/src/agents/acp/adapter.ts +34 -6
  6. package/src/agents/acp/index.ts +0 -2
  7. package/src/agents/acp/parser.ts +57 -104
  8. package/src/agents/acp/spawn-client.ts +2 -1
  9. package/src/agents/{claude.ts → claude/adapter.ts} +15 -12
  10. package/src/agents/{claude-complete.ts → claude/complete.ts} +3 -3
  11. package/src/agents/{cost.ts → claude/cost.ts} +1 -1
  12. package/src/agents/{claude-execution.ts → claude/execution.ts} +5 -5
  13. package/src/agents/claude/index.ts +3 -0
  14. package/src/agents/{claude-interactive.ts → claude/interactive.ts} +4 -4
  15. package/src/agents/{claude-plan.ts → claude/plan.ts} +12 -9
  16. package/src/agents/index.ts +5 -5
  17. package/src/agents/registry.ts +5 -5
  18. package/src/agents/{claude-decompose.ts → shared/decompose.ts} +7 -22
  19. package/src/agents/{model-resolution.ts → shared/model-resolution.ts} +2 -2
  20. package/src/agents/{types-extended.ts → shared/types-extended.ts} +4 -4
  21. package/src/agents/{validation.ts → shared/validation.ts} +2 -2
  22. package/src/agents/{version-detection.ts → shared/version-detection.ts} +3 -3
  23. package/src/agents/types.ts +8 -4
  24. package/src/cli/agents.ts +1 -1
  25. package/src/cli/plan.ts +4 -11
  26. package/src/config/test-strategy.ts +70 -0
  27. package/src/execution/lifecycle/acceptance-loop.ts +2 -0
  28. package/src/execution/parallel-coordinator.ts +3 -1
  29. package/src/execution/parallel-executor.ts +3 -0
  30. package/src/execution/runner-execution.ts +16 -2
  31. package/src/execution/story-context.ts +6 -0
  32. package/src/pipeline/stages/acceptance.ts +5 -8
  33. package/src/pipeline/stages/regression.ts +2 -0
  34. package/src/pipeline/stages/verify.ts +5 -10
  35. package/src/prd/schema.ts +4 -14
  36. package/src/precheck/checks-agents.ts +1 -1
  37. package/src/utils/log-test-output.ts +25 -0
  38. /package/src/agents/{adapters/aider.ts → aider/adapter.ts} +0 -0
  39. /package/src/agents/{adapters/codex.ts → codex/adapter.ts} +0 -0
  40. /package/src/agents/{adapters/gemini.ts → gemini/adapter.ts} +0 -0
  41. /package/src/agents/{adapters/opencode.ts → opencode/adapter.ts} +0 -0
@@ -5,8 +5,8 @@
5
5
  * by running `<agent> --version` and parsing the output.
6
6
  */
7
7
 
8
- import { getInstalledAgents } from "./registry";
9
- import type { AgentAdapter } from "./types";
8
+ import { getInstalledAgents } from "../registry";
9
+ import type { AgentAdapter } from "../types";
10
10
 
11
11
  /**
12
12
  * Information about an installed agent including its version
@@ -90,7 +90,7 @@ export async function getAgentVersions(): Promise<AgentVersionInfo[]> {
90
90
  const agentsByName = new Map(agents.map((a) => [a.name, a]));
91
91
 
92
92
  // Import ALL_AGENTS to include non-installed ones
93
- const { ALL_AGENTS } = await import("./registry");
93
+ const { ALL_AGENTS } = await import("../registry");
94
94
 
95
95
  const versions = await Promise.all(
96
96
  ALL_AGENTS.map(async (agent: AgentAdapter): Promise<AgentVersionInfo> => {
@@ -18,7 +18,7 @@ export type {
18
18
  DecomposedStory,
19
19
  PtyHandle,
20
20
  InteractiveRunOptions,
21
- } from "./types-extended";
21
+ } from "./shared/types-extended";
22
22
 
23
23
  /**
24
24
  * Agent execution result returned after running a coding agent.
@@ -165,10 +165,12 @@ export interface AgentAdapter {
165
165
  buildCommand(options: AgentRunOptions): string[];
166
166
 
167
167
  /** Run the agent in plan mode to generate a feature specification. */
168
- plan(options: import("./types-extended").PlanOptions): Promise<import("./types-extended").PlanResult>;
168
+ plan(options: import("./shared/types-extended").PlanOptions): Promise<import("./shared/types-extended").PlanResult>;
169
169
 
170
170
  /** Run the agent in decompose mode to break spec into classified stories. */
171
- decompose(options: import("./types-extended").DecomposeOptions): Promise<import("./types-extended").DecomposeResult>;
171
+ decompose(
172
+ options: import("./shared/types-extended").DecomposeOptions,
173
+ ): Promise<import("./shared/types-extended").DecomposeResult>;
172
174
 
173
175
  /**
174
176
  * Run a one-shot LLM call and return the plain text response.
@@ -181,5 +183,7 @@ export interface AgentAdapter {
181
183
  * This method is optional — only implemented by agents that support
182
184
  * interactive terminal sessions (e.g., Claude Code).
183
185
  */
184
- runInteractive?(options: import("./types-extended").InteractiveRunOptions): import("./types-extended").PtyHandle;
186
+ runInteractive?(
187
+ options: import("./shared/types-extended").InteractiveRunOptions,
188
+ ): import("./shared/types-extended").PtyHandle;
185
189
  }
package/src/cli/agents.ts CHANGED
@@ -5,7 +5,7 @@
5
5
  */
6
6
 
7
7
  import { ALL_AGENTS } from "../agents/registry";
8
- import { getAgentVersion } from "../agents/version-detection";
8
+ import { getAgentVersion } from "../agents/shared/version-detection";
9
9
  import type { NaxConfig } from "../config/schema";
10
10
 
11
11
  /**
package/src/cli/plan.ts CHANGED
@@ -16,6 +16,7 @@ import { scanCodebase } from "../analyze/scanner";
16
16
  import type { CodebaseScan } from "../analyze/types";
17
17
  import type { NaxConfig } from "../config";
18
18
  import { resolvePermissions } from "../config/permissions";
19
+ import { COMPLEXITY_GUIDE, GROUPING_RULES, TEST_STRATEGY_GUIDE } from "../config/test-strategy";
19
20
  import { PidRegistry } from "../execution/pid-registry";
20
21
  import { getLogger } from "../logger";
21
22
  import { validatePlanOutput } from "../prd/schema";
@@ -320,19 +321,11 @@ Generate a JSON object with this exact structure (no markdown, no explanation
320
321
  ]
321
322
  }
322
323
 
323
- ## Complexity Classification Guide
324
+ ${COMPLEXITY_GUIDE}
324
325
 
325
- - simple: ≤50 LOC, single-file change, purely additive, no new dependencies → test-after
326
- - medium: 50–200 LOC, 2–5 files, standard patterns, clear requirements → tdd-simple
327
- - complex: 200–500 LOC, multiple modules, new abstractions or integrations → three-session-tdd
328
- - expert: 500+ LOC, architectural changes, cross-cutting concerns, high risk → three-session-tdd-lite
326
+ ${TEST_STRATEGY_GUIDE}
329
327
 
330
- ## Test Strategy Guide
331
-
332
- - test-after: Simple changes with well-understood behavior. Write tests after implementation.
333
- - tdd-simple: Medium complexity. Write key tests first, implement, then fill coverage.
334
- - three-session-tdd: Complex stories. Full TDD cycle with separate test-writer and implementer sessions.
335
- - three-session-tdd-lite: Expert/high-risk stories. Full TDD with additional verifier session.
328
+ ${GROUPING_RULES}
336
329
 
337
330
  ${
338
331
  outputFilePath
@@ -0,0 +1,70 @@
1
+ /**
2
+ * Test Strategy — Single Source of Truth
3
+ *
4
+ * Defines all valid test strategies, the normalizer, and shared prompt
5
+ * fragments used by plan.ts and claude-decompose.ts.
6
+ */
7
+
8
+ import type { TestStrategy } from "./schema-types";
9
+
10
+ // ─── Re-export type ───────────────────────────────────────────────────────────
11
+
12
+ export type { TestStrategy };
13
+
14
+ // ─── Valid values ─────────────────────────────────────────────────────────────
15
+
16
+ export const VALID_TEST_STRATEGIES: readonly TestStrategy[] = [
17
+ "test-after",
18
+ "tdd-simple",
19
+ "three-session-tdd",
20
+ "three-session-tdd-lite",
21
+ ];
22
+
23
+ // ─── Resolver ────────────────────────────────────────────────────────────────
24
+
25
+ /**
26
+ * Validate and normalize a test strategy string.
27
+ * Returns a valid TestStrategy or falls back to "test-after".
28
+ */
29
+ export function resolveTestStrategy(raw: string | undefined): TestStrategy {
30
+ if (!raw) return "test-after";
31
+ if (VALID_TEST_STRATEGIES.includes(raw as TestStrategy)) return raw as TestStrategy;
32
+ // Map legacy/typo values
33
+ if (raw === "tdd") return "tdd-simple";
34
+ if (raw === "three-session") return "three-session-tdd";
35
+ if (raw === "tdd-lite") return "three-session-tdd-lite";
36
+ return "test-after"; // safe fallback
37
+ }
38
+
39
+ // ─── Prompt fragments (shared by plan.ts and claude-decompose.ts) ────────────
40
+
41
+ export const COMPLEXITY_GUIDE = `## Complexity Classification Guide
42
+
43
+ - simple: ≤50 LOC, single-file change, purely additive, no new dependencies → test-after
44
+ - medium: 50–200 LOC, 2–5 files, standard patterns, clear requirements → tdd-simple
45
+ - complex: 200–500 LOC, multiple modules, new abstractions or integrations → three-session-tdd
46
+ - expert: 500+ LOC, architectural changes, cross-cutting concerns, high risk → three-session-tdd-lite
47
+
48
+ ### Security Override
49
+
50
+ Security-critical functions (authentication, cryptography, tokens, sessions, credentials,
51
+ password hashing, access control) must be classified at MINIMUM "medium" complexity
52
+ regardless of LOC count. These require at minimum "tdd-simple" test strategy.`;
53
+
54
+ export const TEST_STRATEGY_GUIDE = `## Test Strategy Guide
55
+
56
+ - test-after: Simple changes with well-understood behavior. Write tests after implementation.
57
+ - tdd-simple: Medium complexity. Write key tests first, implement, then fill coverage.
58
+ - three-session-tdd: Complex stories. Full TDD cycle with separate test-writer and implementer sessions.
59
+ - three-session-tdd-lite: Expert/high-risk stories. Full TDD with additional verifier session.`;
60
+
61
+ export const GROUPING_RULES = `## Grouping Rules
62
+
63
+ - Combine small, related tasks into a single "simple" or "medium" story.
64
+ - Do NOT create separate stories for every single file or function unless complex.
65
+ - Do NOT create standalone stories purely for test coverage or testing.
66
+ Each story's testStrategy already handles testing (tdd-simple writes tests first,
67
+ three-session-tdd uses separate test-writer session, test-after writes tests after).
68
+ Only create a dedicated test story for unique integration/E2E test logic that spans
69
+ multiple stories and cannot be covered by individual story test strategies.
70
+ - Aim for coherent units of value. Maximum recommended stories: 10-15 per feature.`;
@@ -143,6 +143,7 @@ async function executeFixStory(
143
143
  hooks: ctx.hooks,
144
144
  plugins: ctx.pluginRegistry,
145
145
  storyStartTime: new Date().toISOString(),
146
+ agentGetFn: ctx.agentGetFn,
146
147
  };
147
148
  const result = await runPipeline(defaultPipeline, fixContext, ctx.eventEmitter);
148
149
  logger?.info("acceptance", `Fix story ${story.id} ${result.success ? "passed" : "failed"}`);
@@ -189,6 +190,7 @@ export async function runAcceptanceLoop(ctx: AcceptanceLoopContext): Promise<Acc
189
190
  featureDir: ctx.featureDir,
190
191
  hooks: ctx.hooks,
191
192
  plugins: ctx.pluginRegistry,
193
+ agentGetFn: ctx.agentGetFn,
192
194
  };
193
195
 
194
196
  const { acceptanceStage } = await import("../../pipeline/stages/acceptance");
@@ -8,7 +8,7 @@ import type { NaxConfig } from "../config";
8
8
  import type { LoadedHooksConfig } from "../hooks";
9
9
  import { getSafeLogger } from "../logger";
10
10
  import type { PipelineEventEmitter } from "../pipeline/events";
11
- import type { PipelineContext } from "../pipeline/types";
11
+ import type { AgentGetFn } from "../pipeline/types";
12
12
  import type { PluginRegistry } from "../plugins/registry";
13
13
  import type { PRD, UserStory } from "../prd";
14
14
  import { markStoryFailed, markStoryPassed, savePRD } from "../prd";
@@ -108,6 +108,7 @@ export async function executeParallel(
108
108
  featureDir: string | undefined,
109
109
  parallel: number,
110
110
  eventEmitter?: PipelineEventEmitter,
111
+ agentGetFn?: AgentGetFn,
111
112
  ): Promise<{
112
113
  storiesCompleted: number;
113
114
  totalCost: number;
@@ -152,6 +153,7 @@ export async function executeParallel(
152
153
  hooks,
153
154
  plugins,
154
155
  storyStartTime: new Date().toISOString(),
156
+ agentGetFn,
155
157
  };
156
158
 
157
159
  // Create worktrees for all stories in batch
@@ -17,6 +17,7 @@ import { fireHook } from "../hooks";
17
17
  import { getSafeLogger } from "../logger";
18
18
  import type { StoryMetrics } from "../metrics";
19
19
  import type { PipelineEventEmitter } from "../pipeline/events";
20
+ import type { AgentGetFn } from "../pipeline/types";
20
21
  import type { PluginRegistry } from "../plugins/registry";
21
22
  import type { PRD } from "../prd";
22
23
  import { countStories, isComplete } from "../prd";
@@ -57,6 +58,7 @@ export interface ParallelExecutorOptions {
57
58
  pluginRegistry: PluginRegistry;
58
59
  formatterMode: "quiet" | "normal" | "verbose" | "json";
59
60
  headless: boolean;
61
+ agentGetFn?: AgentGetFn;
60
62
  }
61
63
 
62
64
  export interface RectificationStats {
@@ -158,6 +160,7 @@ export async function runParallelExecution(
158
160
  featureDir,
159
161
  parallelCount,
160
162
  eventEmitter,
163
+ options.agentGetFn,
161
164
  );
162
165
 
163
166
  const batchDurationMs = Date.now() - batchStartMs;
@@ -129,10 +129,24 @@ export async function runExecutionPhase(
129
129
  clearLlmCache();
130
130
 
131
131
  // PERF-1: Precompute batch plan once from ready stories
132
- const batchPlan = options.useBatch ? precomputeBatchPlan(getAllReadyStories(prd), 4) : [];
132
+ const readyStories = getAllReadyStories(prd);
133
+
134
+ // BUG-068: debug log to diagnose unexpected storyCount in batch routing
135
+ logger?.debug("routing", "Ready stories for batch routing", {
136
+ readyCount: readyStories.length,
137
+ readyIds: readyStories.map((s) => s.id),
138
+ allStories: prd.userStories.map((s) => ({
139
+ id: s.id,
140
+ status: s.status,
141
+ passes: s.passes,
142
+ deps: s.dependencies,
143
+ })),
144
+ });
145
+
146
+ const batchPlan = options.useBatch ? precomputeBatchPlan(readyStories, 4) : [];
133
147
 
134
148
  if (options.useBatch) {
135
- await tryLlmBatchRoute(options.config, getAllReadyStories(prd), "routing");
149
+ await tryLlmBatchRoute(options.config, readyStories, "routing");
136
150
  }
137
151
 
138
152
  // Parallel Execution Path (when --parallel is set)
@@ -175,6 +175,12 @@ export async function buildStoryContextFull(
175
175
  export function getAllReadyStories(prd: PRD): UserStory[] {
176
176
  const completedIds = new Set(prd.userStories.filter((s) => s.passes || s.status === "skipped").map((s) => s.id));
177
177
 
178
+ const logger = getSafeLogger();
179
+ logger?.debug("routing", "getAllReadyStories: completed set", {
180
+ completedIds: [...completedIds],
181
+ totalStories: prd.userStories.length,
182
+ });
183
+
178
184
  return prd.userStories.filter(
179
185
  (s) =>
180
186
  !s.passes &&
@@ -27,6 +27,7 @@
27
27
  import path from "node:path";
28
28
  import { getLogger } from "../../logger";
29
29
  import { countStories } from "../../prd";
30
+ import { logTestOutput } from "../../utils/log-test-output";
30
31
  import type { PipelineContext, PipelineStage, StageResult } from "../types";
31
32
 
32
33
  /**
@@ -163,10 +164,8 @@ export const acceptanceStage: PipelineStage = {
163
164
 
164
165
  // Non-zero exit but no AC failures parsed at all — test crashed (syntax error, import failure, etc.)
165
166
  if (failedACs.length === 0 && exitCode !== 0) {
166
- logger.error("acceptance", "Tests errored with no AC failures parsed", {
167
- exitCode,
168
- output,
169
- });
167
+ logger.error("acceptance", "Tests errored with no AC failures parsed", { exitCode });
168
+ logTestOutput(logger, "acceptance", output);
170
169
 
171
170
  ctx.acceptanceFailures = {
172
171
  failedACs: ["AC-ERROR"],
@@ -190,10 +189,8 @@ export const acceptanceStage: PipelineStage = {
190
189
  });
191
190
  }
192
191
 
193
- logger.error("acceptance", "Acceptance tests failed", {
194
- failedACs: actualFailures,
195
- output,
196
- });
192
+ logger.error("acceptance", "Acceptance tests failed", { failedACs: actualFailures });
193
+ logTestOutput(logger, "acceptance", output);
197
194
 
198
195
  // Store failed ACs and test output in context for fix generation
199
196
  ctx.acceptanceFailures = {
@@ -14,6 +14,7 @@
14
14
  */
15
15
 
16
16
  import { getLogger } from "../../logger";
17
+ import { logTestOutput } from "../../utils/log-test-output";
17
18
  import { verificationOrchestrator } from "../../verification/orchestrator";
18
19
  import type { VerifyContext } from "../../verification/orchestrator-types";
19
20
  import { pipelineEventBus } from "../event-bus";
@@ -71,6 +72,7 @@ export const regressionStage: PipelineStage = {
71
72
  storyId: ctx.story.id,
72
73
  failCount: result.failCount,
73
74
  });
75
+ logTestOutput(logger, "regression", result.rawOutput, { storyId: ctx.story.id });
74
76
 
75
77
  pipelineEventBus.emit({
76
78
  type: "regression:detected",
@@ -11,6 +11,7 @@
11
11
 
12
12
  import type { SmartTestRunnerConfig } from "../../config/types";
13
13
  import { getLogger } from "../../logger";
14
+ import { logTestOutput } from "../../utils/log-test-output";
14
15
  import { detectRuntimeCrash } from "../../verification/crash-detector";
15
16
  import type { VerifyStatus } from "../../verification/orchestrator-types";
16
17
  import { regression } from "../../verification/runners";
@@ -173,16 +174,10 @@ export const verifyStage: PipelineStage = {
173
174
  });
174
175
  }
175
176
 
176
- // Log first few lines of output for context
177
- // BUG-037: Changed from .slice(0, 10) to .slice(-20) to show failures, not prechecks
178
- if (result.output && result.status !== "TIMEOUT") {
179
- const outputLines = result.output.split("\n").slice(-20);
180
- if (outputLines.length > 0) {
181
- logger.debug("verify", "Test output preview", {
182
- storyId: ctx.story.id,
183
- output: outputLines.join("\n"),
184
- });
185
- }
177
+ // Log tail of output at debug level for context (ENH-001)
178
+ // BUG-037: Use .slice(-20) to show failures, not prechecks
179
+ if (result.status !== "TIMEOUT") {
180
+ logTestOutput(logger, "verify", result.output, { storyId: ctx.story.id });
186
181
  }
187
182
 
188
183
  return {
package/src/prd/schema.ts CHANGED
@@ -5,6 +5,7 @@
5
5
  */
6
6
 
7
7
  import type { Complexity, TestStrategy } from "../config";
8
+ import { resolveTestStrategy } from "../config/test-strategy";
8
9
  import type { PRD, UserStory } from "./types";
9
10
  import { validateStoryId } from "./validate";
10
11
 
@@ -13,12 +14,6 @@ import { validateStoryId } from "./validate";
13
14
  // ---------------------------------------------------------------------------
14
15
 
15
16
  const VALID_COMPLEXITY: Complexity[] = ["simple", "medium", "complex", "expert"];
16
- const VALID_TEST_STRATEGIES: TestStrategy[] = [
17
- "test-after",
18
- "tdd-simple",
19
- "three-session-tdd",
20
- "three-session-tdd-lite",
21
- ];
22
17
 
23
18
  /** Pattern matching ST001 → ST-001 style IDs (prefix letters + digits, no separator) */
24
19
  const STORY_ID_NO_SEPARATOR = /^([A-Za-z]+)(\d+)$/;
@@ -140,15 +135,10 @@ function validateStory(raw: unknown, index: number, allIds: Set<string>): UserSt
140
135
  }
141
136
 
142
137
  // testStrategy — accept from routing.testStrategy or top-level testStrategy
143
- // Also map legacy/LLM-hallucinated aliases: tdd-lite → tdd-simple
144
138
  const rawTestStrategy = routing.testStrategy ?? s.testStrategy;
145
- const STRATEGY_ALIASES: Record<string, TestStrategy> = { "tdd-lite": "three-session-tdd-lite" };
146
- const normalizedStrategy =
147
- typeof rawTestStrategy === "string" ? (STRATEGY_ALIASES[rawTestStrategy] ?? rawTestStrategy) : rawTestStrategy;
148
- const testStrategy: TestStrategy =
149
- normalizedStrategy !== undefined && (VALID_TEST_STRATEGIES as unknown[]).includes(normalizedStrategy)
150
- ? (normalizedStrategy as TestStrategy)
151
- : "tdd-simple";
139
+ const testStrategy: TestStrategy = resolveTestStrategy(
140
+ typeof rawTestStrategy === "string" ? rawTestStrategy : undefined,
141
+ );
152
142
 
153
143
  // dependencies
154
144
  const rawDeps = s.dependencies;
@@ -5,7 +5,7 @@
5
5
  * and checks health status for each configured agent.
6
6
  */
7
7
 
8
- import { getAgentVersions } from "../agents/version-detection";
8
+ import { getAgentVersions } from "../agents/shared/version-detection";
9
9
  import type { Check } from "./types";
10
10
 
11
11
  /**
@@ -0,0 +1,25 @@
1
+ import type { Logger } from "../logger";
2
+
3
+ /**
4
+ * Log test output consistently across all pipeline stages.
5
+ *
6
+ * Summary (exitCode, storyId) is logged at the caller's level (error/warn).
7
+ * Raw output is logged at debug level only — last `tailLines` lines.
8
+ *
9
+ * `storyId` is optional: works for per-story verify/acceptance AND for
10
+ * deferred runs (deferred acceptance, deferred regression) with no story context.
11
+ */
12
+ export function logTestOutput(
13
+ logger: Logger | null | undefined,
14
+ stage: string,
15
+ output: string | undefined,
16
+ opts: { storyId?: string; tailLines?: number } = {},
17
+ ): void {
18
+ if (!logger || !output) return;
19
+ const tailLines = opts.tailLines ?? 20;
20
+ const lines = output.split("\n").slice(-tailLines).join("\n");
21
+ logger.debug(stage, "Test output (tail)", {
22
+ ...(opts.storyId !== undefined && { storyId: opts.storyId }),
23
+ output: lines,
24
+ });
25
+ }