@nathapp/nax 0.45.0 → 0.46.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/CHANGELOG.md +33 -0
  2. package/bin/nax.ts +7 -6
  3. package/dist/nax.js +340 -202
  4. package/package.json +1 -1
  5. package/src/acceptance/generator.ts +1 -1
  6. package/src/acceptance/types.ts +2 -0
  7. package/src/agents/acp/adapter.ts +34 -6
  8. package/src/agents/acp/cost.ts +5 -75
  9. package/src/agents/acp/index.ts +0 -2
  10. package/src/agents/acp/parser.ts +57 -104
  11. package/src/agents/acp/spawn-client.ts +13 -2
  12. package/src/agents/{claude.ts → claude/adapter.ts} +15 -12
  13. package/src/agents/{claude-complete.ts → claude/complete.ts} +3 -3
  14. package/src/agents/claude/cost.ts +16 -0
  15. package/src/agents/{claude-execution.ts → claude/execution.ts} +17 -6
  16. package/src/agents/claude/index.ts +3 -0
  17. package/src/agents/{claude-interactive.ts → claude/interactive.ts} +4 -4
  18. package/src/agents/{claude-plan.ts → claude/plan.ts} +12 -9
  19. package/src/agents/cost/calculate.ts +154 -0
  20. package/src/agents/cost/index.ts +10 -0
  21. package/src/agents/cost/parse.ts +97 -0
  22. package/src/agents/cost/pricing.ts +59 -0
  23. package/src/agents/cost/types.ts +45 -0
  24. package/src/agents/index.ts +6 -4
  25. package/src/agents/registry.ts +5 -5
  26. package/src/agents/{claude-decompose.ts → shared/decompose.ts} +2 -2
  27. package/src/agents/{model-resolution.ts → shared/model-resolution.ts} +2 -2
  28. package/src/agents/{types-extended.ts → shared/types-extended.ts} +4 -4
  29. package/src/agents/{validation.ts → shared/validation.ts} +2 -2
  30. package/src/agents/{version-detection.ts → shared/version-detection.ts} +3 -3
  31. package/src/agents/types.ts +11 -4
  32. package/src/cli/agents.ts +1 -1
  33. package/src/cli/init.ts +15 -1
  34. package/src/pipeline/stages/acceptance-setup.ts +1 -0
  35. package/src/pipeline/stages/acceptance.ts +5 -8
  36. package/src/pipeline/stages/regression.ts +2 -0
  37. package/src/pipeline/stages/verify.ts +5 -10
  38. package/src/precheck/checks-agents.ts +1 -1
  39. package/src/precheck/checks-git.ts +28 -2
  40. package/src/precheck/checks-warnings.ts +30 -2
  41. package/src/precheck/checks.ts +1 -0
  42. package/src/precheck/index.ts +2 -0
  43. package/src/utils/log-test-output.ts +25 -0
  44. package/src/agents/cost.ts +0 -268
  45. /package/src/agents/{adapters/aider.ts → aider/adapter.ts} +0 -0
  46. /package/src/agents/{adapters/codex.ts → codex/adapter.ts} +0 -0
  47. /package/src/agents/{adapters/gemini.ts → gemini/adapter.ts} +0 -0
  48. /package/src/agents/{adapters/opencode.ts → opencode/adapter.ts} +0 -0
@@ -5,7 +5,7 @@
5
5
  * and checks health status for each configured agent.
6
6
  */
7
7
 
8
- import { getAgentVersions } from "../agents/version-detection";
8
+ import { getAgentVersions } from "../agents/shared/version-detection";
9
9
  import type { Check } from "./types";
10
10
 
11
11
  /**
@@ -32,6 +32,25 @@ export async function checkGitRepoExists(workdir: string): Promise<Check> {
32
32
  };
33
33
  }
34
34
 
35
+ /**
36
+ * nax runtime files that are allowed to be dirty without blocking the precheck.
37
+ * These are written during nax execution and should be gitignored by `nax init`.
38
+ */
39
+ const NAX_RUNTIME_PATTERNS = [
40
+ /^.{2} nax\.lock$/,
41
+ /^.{2} nax\/metrics\.json$/,
42
+ /^.{2} nax\/features\/[^/]+\/status\.json$/,
43
+ /^.{2} nax\/features\/[^/]+\/runs\//,
44
+ /^.{2} nax\/features\/[^/]+\/plan\//,
45
+ /^.{2} nax\/features\/[^/]+\/acp-sessions\.json$/,
46
+ /^.{2} nax\/features\/[^/]+\/interactions\//,
47
+ /^.{2} nax\/features\/[^/]+\/progress\.txt$/,
48
+ /^.{2} nax\/features\/[^/]+\/acceptance-refined\.json$/,
49
+ /^.{2} \.nax-verifier-verdict\.json$/,
50
+ /^.{2} \.nax-pids$/,
51
+ /^.{2} \.nax-wt\//,
52
+ ];
53
+
35
54
  /** Check if working tree is clean. Uses: git status --porcelain */
36
55
  export async function checkWorkingTreeClean(workdir: string): Promise<Check> {
37
56
  const proc = Bun.spawn(["git", "status", "--porcelain"], {
@@ -42,13 +61,20 @@ export async function checkWorkingTreeClean(workdir: string): Promise<Check> {
42
61
 
43
62
  const output = await new Response(proc.stdout).text();
44
63
  const exitCode = await proc.exited;
45
- const passed = exitCode === 0 && output.trim() === "";
64
+
65
+ // Split without trimming the full output — porcelain lines start with status chars
66
+ // including leading spaces (e.g. " M file.ts"). trim() would corrupt the first line.
67
+ const lines = output.trim() === "" ? [] : output.split("\n").filter(Boolean);
68
+ const nonNaxDirtyFiles = lines.filter((line) => !NAX_RUNTIME_PATTERNS.some((pattern) => pattern.test(line)));
69
+ const passed = exitCode === 0 && nonNaxDirtyFiles.length === 0;
46
70
 
47
71
  return {
48
72
  name: "working-tree-clean",
49
73
  tier: "blocker",
50
74
  passed,
51
- message: passed ? "Working tree is clean" : "Uncommitted changes detected",
75
+ message: passed
76
+ ? "Working tree is clean"
77
+ : `Uncommitted changes detected: ${nonNaxDirtyFiles.map((l) => l.slice(3)).join(", ")}`,
52
78
  };
53
79
  }
54
80
 
@@ -6,6 +6,7 @@
6
6
  */
7
7
 
8
8
  import { existsSync } from "node:fs";
9
+ import { isAbsolute } from "node:path";
9
10
  import type { NaxConfig } from "../config";
10
11
  import type { PRD } from "../prd/types";
11
12
  import type { Check } from "./types";
@@ -126,7 +127,7 @@ async function hasPackageScript(workdir: string, name: string): Promise<boolean>
126
127
 
127
128
  /**
128
129
  * Check if .gitignore covers nax runtime files.
129
- * Patterns: nax.lock, runs/, test/tmp/
130
+ * Patterns: nax.lock, runs/, status.json, .nax-pids, .nax-wt/
130
131
  */
131
132
  export async function checkGitignoreCoversNax(workdir: string): Promise<Check> {
132
133
  const gitignorePath = `${workdir}/.gitignore`;
@@ -143,7 +144,14 @@ export async function checkGitignoreCoversNax(workdir: string): Promise<Check> {
143
144
 
144
145
  const file = Bun.file(gitignorePath);
145
146
  const content = await file.text();
146
- const patterns = ["nax.lock", "runs/", "test/tmp/"];
147
+ const patterns = [
148
+ "nax.lock",
149
+ "nax/**/runs/",
150
+ "nax/metrics.json",
151
+ "nax/features/*/status.json",
152
+ ".nax-pids",
153
+ ".nax-wt/",
154
+ ];
147
155
  const missing = patterns.filter((pattern) => !content.includes(pattern));
148
156
  const passed = missing.length === 0;
149
157
 
@@ -191,3 +199,23 @@ export async function checkPromptOverrideFiles(config: NaxConfig, workdir: strin
191
199
 
192
200
  return checks;
193
201
  }
202
+
203
+ /**
204
+ * Check if HOME env is set and is an absolute path.
205
+ * An unexpanded "~" in HOME causes agent spawns to create a literal ~/
206
+ * directory inside the repo cwd instead of resolving to the user home dir.
207
+ */
208
+ export async function checkHomeEnvValid(): Promise<Check> {
209
+ const home = process.env.HOME ?? "";
210
+ const passed = home !== "" && isAbsolute(home);
211
+ return {
212
+ name: "home-env-valid",
213
+ tier: "warning",
214
+ passed,
215
+ message: passed
216
+ ? `HOME env is valid: ${home}`
217
+ : home === ""
218
+ ? "HOME env is not set — agent may write files to unexpected locations"
219
+ : `HOME env is not an absolute path ("${home}") — may cause literal "~" directories in repo`,
220
+ };
221
+ }
@@ -28,6 +28,7 @@ export {
28
28
  checkPendingStories,
29
29
  checkOptionalCommands,
30
30
  checkGitignoreCoversNax,
31
+ checkHomeEnvValid,
31
32
  checkPromptOverrideFiles,
32
33
  } from "./checks-warnings";
33
34
 
@@ -20,6 +20,7 @@ import {
20
20
  checkGitRepoExists,
21
21
  checkGitUserConfigured,
22
22
  checkGitignoreCoversNax,
23
+ checkHomeEnvValid,
23
24
  checkLintCommand,
24
25
  checkMultiAgentHealth,
25
26
  checkOptionalCommands,
@@ -126,6 +127,7 @@ function getEnvironmentWarnings(config: NaxConfig, workdir: string): CheckFn[] {
126
127
  () => checkDiskSpace(),
127
128
  () => checkOptionalCommands(config, workdir),
128
129
  () => checkGitignoreCoversNax(workdir),
130
+ () => checkHomeEnvValid(),
129
131
  () => checkPromptOverrideFiles(config, workdir),
130
132
  () => checkMultiAgentHealth(),
131
133
  ];
@@ -0,0 +1,25 @@
1
+ import type { Logger } from "../logger";
2
+
3
+ /**
4
+ * Log test output consistently across all pipeline stages.
5
+ *
6
+ * Summary (exitCode, storyId) is logged at the caller's level (error/warn).
7
+ * Raw output is logged at debug level only — last `tailLines` lines.
8
+ *
9
+ * `storyId` is optional: works for per-story verify/acceptance AND for
10
+ * deferred runs (deferred acceptance, deferred regression) with no story context.
11
+ */
12
+ export function logTestOutput(
13
+ logger: Logger | null | undefined,
14
+ stage: string,
15
+ output: string | undefined,
16
+ opts: { storyId?: string; tailLines?: number } = {},
17
+ ): void {
18
+ if (!logger || !output) return;
19
+ const tailLines = opts.tailLines ?? 20;
20
+ const lines = output.split("\n").slice(-tailLines).join("\n");
21
+ logger.debug(stage, "Test output (tail)", {
22
+ ...(opts.storyId !== undefined && { storyId: opts.storyId }),
23
+ output: lines,
24
+ });
25
+ }
@@ -1,268 +0,0 @@
1
- /**
2
- * Cost Tracking
3
- *
4
- * Token-based cost estimation for AI coding agents.
5
- * Parses agent output for token usage and calculates costs.
6
- */
7
-
8
- import type { ModelTier } from "../config/schema";
9
-
10
- /** Cost rates per 1M tokens (USD) */
11
- export interface ModelCostRates {
12
- inputPer1M: number;
13
- outputPer1M: number;
14
- }
15
-
16
- /** Token usage data */
17
- export interface TokenUsage {
18
- inputTokens: number;
19
- outputTokens: number;
20
- }
21
-
22
- /** Cost estimate with confidence indicator */
23
- export interface CostEstimate {
24
- cost: number;
25
- confidence: "exact" | "estimated" | "fallback";
26
- }
27
-
28
- /** Model tier cost rates (as of 2025-01) */
29
- export const COST_RATES: Record<ModelTier, ModelCostRates> = {
30
- fast: {
31
- // Haiku 4.5
32
- inputPer1M: 0.8,
33
- outputPer1M: 4.0,
34
- },
35
- balanced: {
36
- // Sonnet 4.5
37
- inputPer1M: 3.0,
38
- outputPer1M: 15.0,
39
- },
40
- powerful: {
41
- // Opus 4
42
- inputPer1M: 15.0,
43
- outputPer1M: 75.0,
44
- },
45
- };
46
-
47
- /**
48
- * Token usage with confidence indicator.
49
- */
50
- export interface TokenUsageWithConfidence {
51
- inputTokens: number;
52
- outputTokens: number;
53
- confidence: "exact" | "estimated";
54
- }
55
-
56
- /**
57
- * Parse Claude Code output for token usage.
58
- *
59
- * Supports multiple formats with varying confidence levels:
60
- * - JSON structured output → "exact" confidence
61
- * - Markdown/plain text patterns → "estimated" confidence
62
- *
63
- * Uses specific regex patterns to reduce false positives.
64
- *
65
- * @param output - Agent stdout + stderr combined
66
- * @returns Token usage with confidence indicator, or null if tokens cannot be parsed
67
- *
68
- * @example
69
- * ```ts
70
- * // JSON format (exact)
71
- * const usage1 = parseTokenUsage('{"usage": {"input_tokens": 1234, "output_tokens": 5678}}');
72
- * // { inputTokens: 1234, outputTokens: 5678, confidence: 'exact' }
73
- *
74
- * // Markdown format (estimated)
75
- * const usage2 = parseTokenUsage('Input tokens: 1234\nOutput tokens: 5678');
76
- * // { inputTokens: 1234, outputTokens: 5678, confidence: 'estimated' }
77
- *
78
- * // Unparseable
79
- * const usage3 = parseTokenUsage('No token data here');
80
- * // null
81
- * ```
82
- */
83
- export function parseTokenUsage(output: string): TokenUsageWithConfidence | null {
84
- // Try JSON format first (most reliable) - confidence: exact
85
- try {
86
- const jsonMatch = output.match(
87
- /\{[^}]*"usage"\s*:\s*\{[^}]*"input_tokens"\s*:\s*(\d+)[^}]*"output_tokens"\s*:\s*(\d+)[^}]*\}[^}]*\}/,
88
- );
89
- if (jsonMatch) {
90
- return {
91
- inputTokens: Number.parseInt(jsonMatch[1], 10),
92
- outputTokens: Number.parseInt(jsonMatch[2], 10),
93
- confidence: "exact",
94
- };
95
- }
96
-
97
- // Try parsing as full JSON object
98
- const lines = output.split("\n");
99
- for (const line of lines) {
100
- if (line.trim().startsWith("{")) {
101
- try {
102
- const parsed = JSON.parse(line);
103
- if (parsed.usage?.input_tokens && parsed.usage?.output_tokens) {
104
- return {
105
- inputTokens: parsed.usage.input_tokens,
106
- outputTokens: parsed.usage.output_tokens,
107
- confidence: "exact",
108
- };
109
- }
110
- } catch {
111
- // Not valid JSON, continue
112
- }
113
- }
114
- }
115
- } catch {
116
- // JSON parsing failed, try regex patterns
117
- }
118
-
119
- // Try specific markdown-style patterns (more specific to reduce false positives)
120
- // Match "Input tokens: 1234" or "input_tokens: 1234" or "INPUT TOKENS: 1234"
121
- // Use word boundary at start, require colon or space after keyword, then digits
122
- // confidence: estimated (regex-based)
123
- const inputMatch = output.match(/\b(?:input|input_tokens)\s*:\s*(\d{2,})|(?:input)\s+(?:tokens?)\s*:\s*(\d{2,})/i);
124
- const outputMatch = output.match(
125
- /\b(?:output|output_tokens)\s*:\s*(\d{2,})|(?:output)\s+(?:tokens?)\s*:\s*(\d{2,})/i,
126
- );
127
-
128
- if (inputMatch && outputMatch) {
129
- // Extract token counts (may be in capture group 1 or 2)
130
- const inputTokens = Number.parseInt(inputMatch[1] || inputMatch[2], 10);
131
- const outputTokens = Number.parseInt(outputMatch[1] || outputMatch[2], 10);
132
-
133
- // Sanity check: reject if tokens seem unreasonably large (> 1M each)
134
- if (inputTokens > 1_000_000 || outputTokens > 1_000_000) {
135
- return null;
136
- }
137
-
138
- return {
139
- inputTokens,
140
- outputTokens,
141
- confidence: "estimated",
142
- };
143
- }
144
-
145
- return null;
146
- }
147
-
148
- /**
149
- * Estimate cost in USD based on token usage.
150
- *
151
- * Calculates total cost using tier-specific rates per 1M tokens.
152
- *
153
- * @param modelTier - Model tier (fast/balanced/powerful)
154
- * @param inputTokens - Number of input tokens consumed
155
- * @param outputTokens - Number of output tokens generated
156
- * @returns Total cost in USD
157
- *
158
- * @example
159
- * ```ts
160
- * const cost = estimateCost("balanced", 10000, 5000);
161
- * // Sonnet 4.5: (10000/1M * $3.00) + (5000/1M * $15.00) = $0.105
162
- * ```
163
- */
164
- export function estimateCost(
165
- modelTier: ModelTier,
166
- inputTokens: number,
167
- outputTokens: number,
168
- customRates?: ModelCostRates,
169
- ): number {
170
- const rates = customRates ?? COST_RATES[modelTier];
171
- const inputCost = (inputTokens / 1_000_000) * rates.inputPer1M;
172
- const outputCost = (outputTokens / 1_000_000) * rates.outputPer1M;
173
- return inputCost + outputCost;
174
- }
175
-
176
- /**
177
- * Estimate cost from agent output by parsing token usage.
178
- *
179
- * Attempts to extract token counts from stdout/stderr, then calculates cost.
180
- * Returns null if tokens cannot be parsed (caller should use fallback estimation).
181
- *
182
- * @param modelTier - Model tier for cost calculation
183
- * @param output - Agent stdout + stderr combined
184
- * @returns Cost estimate with confidence indicator, or null if unparseable
185
- *
186
- * @example
187
- * ```ts
188
- * const estimate = estimateCostFromOutput("balanced", agentOutput);
189
- * if (estimate) {
190
- * console.log(`Cost: $${estimate.cost.toFixed(4)} (${estimate.confidence})`);
191
- * } else {
192
- * // Fall back to duration-based estimation
193
- * }
194
- * ```
195
- */
196
- export function estimateCostFromOutput(modelTier: ModelTier, output: string): CostEstimate | null {
197
- const usage = parseTokenUsage(output);
198
- if (!usage) {
199
- return null;
200
- }
201
- const cost = estimateCost(modelTier, usage.inputTokens, usage.outputTokens);
202
- return {
203
- cost,
204
- confidence: usage.confidence,
205
- };
206
- }
207
-
208
- /**
209
- * Fallback cost estimation based on runtime duration.
210
- *
211
- * Used when token usage cannot be parsed from agent output.
212
- * Provides conservative estimates using per-minute rates.
213
- *
214
- * @param modelTier - Model tier for cost calculation
215
- * @param durationMs - Agent runtime in milliseconds
216
- * @returns Cost estimate with 'fallback' confidence
217
- *
218
- * @example
219
- * ```ts
220
- * const estimate = estimateCostByDuration("balanced", 120000); // 2 minutes
221
- * // { cost: 0.10, confidence: 'fallback' }
222
- * // Sonnet: 2 min * $0.05/min = $0.10
223
- * ```
224
- */
225
- export function estimateCostByDuration(modelTier: ModelTier, durationMs: number): CostEstimate {
226
- const costPerMinute: Record<ModelTier, number> = {
227
- fast: 0.01,
228
- balanced: 0.05,
229
- powerful: 0.15,
230
- };
231
- const minutes = durationMs / 60000;
232
- const cost = minutes * costPerMinute[modelTier];
233
- return {
234
- cost,
235
- confidence: "fallback",
236
- };
237
- }
238
-
239
- /**
240
- * Format cost estimate with confidence indicator for display.
241
- *
242
- * @param estimate - Cost estimate with confidence level
243
- * @returns Formatted cost string with confidence indicator
244
- *
245
- * @example
246
- * ```ts
247
- * formatCostWithConfidence({ cost: 0.12, confidence: 'exact' });
248
- * // "$0.12"
249
- *
250
- * formatCostWithConfidence({ cost: 0.15, confidence: 'estimated' });
251
- * // "~$0.15"
252
- *
253
- * formatCostWithConfidence({ cost: 0.05, confidence: 'fallback' });
254
- * // "~$0.05 (duration-based)"
255
- * ```
256
- */
257
- export function formatCostWithConfidence(estimate: CostEstimate): string {
258
- const formattedCost = `$${estimate.cost.toFixed(2)}`;
259
-
260
- switch (estimate.confidence) {
261
- case "exact":
262
- return formattedCost;
263
- case "estimated":
264
- return `~${formattedCost}`;
265
- case "fallback":
266
- return `~${formattedCost} (duration-based)`;
267
- }
268
- }