@nathapp/nax 0.45.0 → 0.46.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +33 -0
- package/bin/nax.ts +7 -6
- package/dist/nax.js +340 -202
- package/package.json +1 -1
- package/src/acceptance/generator.ts +1 -1
- package/src/acceptance/types.ts +2 -0
- package/src/agents/acp/adapter.ts +34 -6
- package/src/agents/acp/cost.ts +5 -75
- package/src/agents/acp/index.ts +0 -2
- package/src/agents/acp/parser.ts +57 -104
- package/src/agents/acp/spawn-client.ts +13 -2
- package/src/agents/{claude.ts → claude/adapter.ts} +15 -12
- package/src/agents/{claude-complete.ts → claude/complete.ts} +3 -3
- package/src/agents/claude/cost.ts +16 -0
- package/src/agents/{claude-execution.ts → claude/execution.ts} +17 -6
- package/src/agents/claude/index.ts +3 -0
- package/src/agents/{claude-interactive.ts → claude/interactive.ts} +4 -4
- package/src/agents/{claude-plan.ts → claude/plan.ts} +12 -9
- package/src/agents/cost/calculate.ts +154 -0
- package/src/agents/cost/index.ts +10 -0
- package/src/agents/cost/parse.ts +97 -0
- package/src/agents/cost/pricing.ts +59 -0
- package/src/agents/cost/types.ts +45 -0
- package/src/agents/index.ts +6 -4
- package/src/agents/registry.ts +5 -5
- package/src/agents/{claude-decompose.ts → shared/decompose.ts} +2 -2
- package/src/agents/{model-resolution.ts → shared/model-resolution.ts} +2 -2
- package/src/agents/{types-extended.ts → shared/types-extended.ts} +4 -4
- package/src/agents/{validation.ts → shared/validation.ts} +2 -2
- package/src/agents/{version-detection.ts → shared/version-detection.ts} +3 -3
- package/src/agents/types.ts +11 -4
- package/src/cli/agents.ts +1 -1
- package/src/cli/init.ts +15 -1
- package/src/pipeline/stages/acceptance-setup.ts +1 -0
- package/src/pipeline/stages/acceptance.ts +5 -8
- package/src/pipeline/stages/regression.ts +2 -0
- package/src/pipeline/stages/verify.ts +5 -10
- package/src/precheck/checks-agents.ts +1 -1
- package/src/precheck/checks-git.ts +28 -2
- package/src/precheck/checks-warnings.ts +30 -2
- package/src/precheck/checks.ts +1 -0
- package/src/precheck/index.ts +2 -0
- package/src/utils/log-test-output.ts +25 -0
- package/src/agents/cost.ts +0 -268
- /package/src/agents/{adapters/aider.ts → aider/adapter.ts} +0 -0
- /package/src/agents/{adapters/codex.ts → codex/adapter.ts} +0 -0
- /package/src/agents/{adapters/gemini.ts → gemini/adapter.ts} +0 -0
- /package/src/agents/{adapters/opencode.ts → opencode/adapter.ts} +0 -0
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cost calculation functions for all agent adapters.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import type { ModelTier } from "../../config/schema";
|
|
6
|
+
import { parseTokenUsage } from "./parse";
|
|
7
|
+
import { COST_RATES, MODEL_PRICING } from "./pricing";
|
|
8
|
+
import type { CostEstimate, ModelCostRates, SessionTokenUsage } from "./types";
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Estimate cost in USD based on token usage and model tier.
|
|
12
|
+
*
|
|
13
|
+
* @param modelTier - Model tier (fast/balanced/powerful)
|
|
14
|
+
* @param inputTokens - Number of input tokens consumed
|
|
15
|
+
* @param outputTokens - Number of output tokens generated
|
|
16
|
+
* @param customRates - Optional custom rates (overrides tier defaults)
|
|
17
|
+
* @returns Total cost in USD
|
|
18
|
+
*
|
|
19
|
+
* @example
|
|
20
|
+
* ```ts
|
|
21
|
+
* const cost = estimateCost("balanced", 10000, 5000);
|
|
22
|
+
* // Sonnet 4.5: (10000/1M * $3.00) + (5000/1M * $15.00) = $0.105
|
|
23
|
+
* ```
|
|
24
|
+
*/
|
|
25
|
+
export function estimateCost(
|
|
26
|
+
modelTier: ModelTier,
|
|
27
|
+
inputTokens: number,
|
|
28
|
+
outputTokens: number,
|
|
29
|
+
customRates?: ModelCostRates,
|
|
30
|
+
): number {
|
|
31
|
+
const rates = customRates ?? COST_RATES[modelTier];
|
|
32
|
+
const inputCost = (inputTokens / 1_000_000) * rates.inputPer1M;
|
|
33
|
+
const outputCost = (outputTokens / 1_000_000) * rates.outputPer1M;
|
|
34
|
+
return inputCost + outputCost;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Estimate cost from agent output by parsing token usage.
|
|
39
|
+
*
|
|
40
|
+
* Attempts to extract token counts from stdout/stderr, then calculates cost.
|
|
41
|
+
* Returns null if tokens cannot be parsed.
|
|
42
|
+
*
|
|
43
|
+
* @param modelTier - Model tier for cost calculation
|
|
44
|
+
* @param output - Agent stdout + stderr combined
|
|
45
|
+
* @returns Cost estimate with confidence indicator, or null if unparseable
|
|
46
|
+
*/
|
|
47
|
+
export function estimateCostFromOutput(modelTier: ModelTier, output: string): CostEstimate | null {
|
|
48
|
+
const usage = parseTokenUsage(output);
|
|
49
|
+
if (!usage) {
|
|
50
|
+
return null;
|
|
51
|
+
}
|
|
52
|
+
const cost = estimateCost(modelTier, usage.inputTokens, usage.outputTokens);
|
|
53
|
+
return {
|
|
54
|
+
cost,
|
|
55
|
+
confidence: usage.confidence,
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Fallback cost estimation based on runtime duration.
|
|
61
|
+
*
|
|
62
|
+
* Used when token usage cannot be parsed from agent output.
|
|
63
|
+
* Provides conservative estimates using per-minute rates.
|
|
64
|
+
*
|
|
65
|
+
* @param modelTier - Model tier for cost calculation
|
|
66
|
+
* @param durationMs - Agent runtime in milliseconds
|
|
67
|
+
* @returns Cost estimate with 'fallback' confidence
|
|
68
|
+
*
|
|
69
|
+
* @example
|
|
70
|
+
* ```ts
|
|
71
|
+
* const estimate = estimateCostByDuration("balanced", 120000); // 2 minutes
|
|
72
|
+
* // { cost: 0.10, confidence: 'fallback' }
|
|
73
|
+
* // Sonnet: 2 min * $0.05/min = $0.10
|
|
74
|
+
* ```
|
|
75
|
+
*/
|
|
76
|
+
export function estimateCostByDuration(modelTier: ModelTier, durationMs: number): CostEstimate {
|
|
77
|
+
const costPerMinute: Record<ModelTier, number> = {
|
|
78
|
+
fast: 0.01,
|
|
79
|
+
balanced: 0.05,
|
|
80
|
+
powerful: 0.15,
|
|
81
|
+
};
|
|
82
|
+
const minutes = durationMs / 60000;
|
|
83
|
+
const cost = minutes * costPerMinute[modelTier];
|
|
84
|
+
return {
|
|
85
|
+
cost,
|
|
86
|
+
confidence: "fallback",
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Format cost estimate with confidence indicator for display.
|
|
92
|
+
*
|
|
93
|
+
* @param estimate - Cost estimate with confidence level
|
|
94
|
+
* @returns Formatted cost string with confidence indicator
|
|
95
|
+
*
|
|
96
|
+
* @example
|
|
97
|
+
* ```ts
|
|
98
|
+
* formatCostWithConfidence({ cost: 0.12, confidence: 'exact' });
|
|
99
|
+
* // "$0.12"
|
|
100
|
+
*
|
|
101
|
+
* formatCostWithConfidence({ cost: 0.15, confidence: 'estimated' });
|
|
102
|
+
* // "~$0.15"
|
|
103
|
+
*
|
|
104
|
+
* formatCostWithConfidence({ cost: 0.05, confidence: 'fallback' });
|
|
105
|
+
* // "~$0.05 (duration-based)"
|
|
106
|
+
* ```
|
|
107
|
+
*/
|
|
108
|
+
export function formatCostWithConfidence(estimate: CostEstimate): string {
|
|
109
|
+
const formattedCost = `$${estimate.cost.toFixed(2)}`;
|
|
110
|
+
|
|
111
|
+
switch (estimate.confidence) {
|
|
112
|
+
case "exact":
|
|
113
|
+
return formattedCost;
|
|
114
|
+
case "estimated":
|
|
115
|
+
return `~${formattedCost}`;
|
|
116
|
+
case "fallback":
|
|
117
|
+
return `~${formattedCost} (duration-based)`;
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
/**
|
|
122
|
+
* Calculate USD cost from ACP session token counts using per-model pricing.
|
|
123
|
+
*
|
|
124
|
+
* @param usage - Token counts from cumulative_token_usage
|
|
125
|
+
* @param model - Model identifier (e.g., 'claude-sonnet-4', 'claude-haiku-4-5')
|
|
126
|
+
* @returns Estimated cost in USD
|
|
127
|
+
*/
|
|
128
|
+
export function estimateCostFromTokenUsage(usage: SessionTokenUsage, model: string): number {
|
|
129
|
+
const pricing = MODEL_PRICING[model];
|
|
130
|
+
|
|
131
|
+
if (!pricing) {
|
|
132
|
+
// Fallback: use average rate for unknown models
|
|
133
|
+
const fallbackInputRate = 3 / 1_000_000;
|
|
134
|
+
const fallbackOutputRate = 15 / 1_000_000;
|
|
135
|
+
const inputCost = (usage.input_tokens ?? 0) * fallbackInputRate;
|
|
136
|
+
const outputCost = (usage.output_tokens ?? 0) * fallbackOutputRate;
|
|
137
|
+
const cacheReadCost = (usage.cache_read_input_tokens ?? 0) * (0.5 / 1_000_000);
|
|
138
|
+
const cacheCreationCost = (usage.cache_creation_input_tokens ?? 0) * (2 / 1_000_000);
|
|
139
|
+
return inputCost + outputCost + cacheReadCost + cacheCreationCost;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// Convert $/1M rates to $/token
|
|
143
|
+
const inputRate = pricing.input / 1_000_000;
|
|
144
|
+
const outputRate = pricing.output / 1_000_000;
|
|
145
|
+
const cacheReadRate = (pricing.cacheRead ?? pricing.input * 0.1) / 1_000_000;
|
|
146
|
+
const cacheCreationRate = (pricing.cacheCreation ?? pricing.input * 0.33) / 1_000_000;
|
|
147
|
+
|
|
148
|
+
const inputCost = (usage.input_tokens ?? 0) * inputRate;
|
|
149
|
+
const outputCost = (usage.output_tokens ?? 0) * outputRate;
|
|
150
|
+
const cacheReadCost = (usage.cache_read_input_tokens ?? 0) * cacheReadRate;
|
|
151
|
+
const cacheCreationCost = (usage.cache_creation_input_tokens ?? 0) * cacheCreationRate;
|
|
152
|
+
|
|
153
|
+
return inputCost + outputCost + cacheReadCost + cacheCreationCost;
|
|
154
|
+
}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
export type { ModelCostRates, TokenUsage, CostEstimate, TokenUsageWithConfidence, SessionTokenUsage } from "./types";
|
|
2
|
+
export { COST_RATES, MODEL_PRICING } from "./pricing";
|
|
3
|
+
export { parseTokenUsage } from "./parse";
|
|
4
|
+
export {
|
|
5
|
+
estimateCost,
|
|
6
|
+
estimateCostFromOutput,
|
|
7
|
+
estimateCostByDuration,
|
|
8
|
+
formatCostWithConfidence,
|
|
9
|
+
estimateCostFromTokenUsage,
|
|
10
|
+
} from "./calculate";
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Token usage parsing from raw agent output strings.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import type { TokenUsageWithConfidence } from "./types";
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Parse Claude Code output for token usage.
|
|
9
|
+
*
|
|
10
|
+
* Supports multiple formats with varying confidence levels:
|
|
11
|
+
* - JSON structured output → "exact" confidence
|
|
12
|
+
* - Markdown/plain text patterns → "estimated" confidence
|
|
13
|
+
*
|
|
14
|
+
* Uses specific regex patterns to reduce false positives.
|
|
15
|
+
*
|
|
16
|
+
* @param output - Agent stdout + stderr combined
|
|
17
|
+
* @returns Token usage with confidence indicator, or null if tokens cannot be parsed
|
|
18
|
+
*
|
|
19
|
+
* @example
|
|
20
|
+
* ```ts
|
|
21
|
+
* // JSON format (exact)
|
|
22
|
+
* const usage1 = parseTokenUsage('{"usage": {"input_tokens": 1234, "output_tokens": 5678}}');
|
|
23
|
+
* // { inputTokens: 1234, outputTokens: 5678, confidence: 'exact' }
|
|
24
|
+
*
|
|
25
|
+
* // Markdown format (estimated)
|
|
26
|
+
* const usage2 = parseTokenUsage('Input tokens: 1234\nOutput tokens: 5678');
|
|
27
|
+
* // { inputTokens: 1234, outputTokens: 5678, confidence: 'estimated' }
|
|
28
|
+
*
|
|
29
|
+
* // Unparseable
|
|
30
|
+
* const usage3 = parseTokenUsage('No token data here');
|
|
31
|
+
* // null
|
|
32
|
+
* ```
|
|
33
|
+
*/
|
|
34
|
+
export function parseTokenUsage(output: string): TokenUsageWithConfidence | null {
|
|
35
|
+
// Try JSON format first (most reliable) - confidence: exact
|
|
36
|
+
try {
|
|
37
|
+
const jsonMatch = output.match(
|
|
38
|
+
/\{[^}]*"usage"\s*:\s*\{[^}]*"input_tokens"\s*:\s*(\d+)[^}]*"output_tokens"\s*:\s*(\d+)[^}]*\}[^}]*\}/,
|
|
39
|
+
);
|
|
40
|
+
if (jsonMatch) {
|
|
41
|
+
return {
|
|
42
|
+
inputTokens: Number.parseInt(jsonMatch[1], 10),
|
|
43
|
+
outputTokens: Number.parseInt(jsonMatch[2], 10),
|
|
44
|
+
confidence: "exact",
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// Try parsing as full JSON object
|
|
49
|
+
const lines = output.split("\n");
|
|
50
|
+
for (const line of lines) {
|
|
51
|
+
if (line.trim().startsWith("{")) {
|
|
52
|
+
try {
|
|
53
|
+
const parsed = JSON.parse(line);
|
|
54
|
+
if (parsed.usage?.input_tokens && parsed.usage?.output_tokens) {
|
|
55
|
+
return {
|
|
56
|
+
inputTokens: parsed.usage.input_tokens,
|
|
57
|
+
outputTokens: parsed.usage.output_tokens,
|
|
58
|
+
confidence: "exact",
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
} catch {
|
|
62
|
+
// Not valid JSON, continue
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
} catch {
|
|
67
|
+
// JSON parsing failed, try regex patterns
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// Try specific markdown-style patterns (more specific to reduce false positives)
|
|
71
|
+
// Match "Input tokens: 1234" or "input_tokens: 1234" or "INPUT TOKENS: 1234"
|
|
72
|
+
// Use word boundary at start, require colon or space after keyword, then digits
|
|
73
|
+
// confidence: estimated (regex-based)
|
|
74
|
+
const inputMatch = output.match(/\b(?:input|input_tokens)\s*:\s*(\d{2,})|(?:input)\s+(?:tokens?)\s*:\s*(\d{2,})/i);
|
|
75
|
+
const outputMatch = output.match(
|
|
76
|
+
/\b(?:output|output_tokens)\s*:\s*(\d{2,})|(?:output)\s+(?:tokens?)\s*:\s*(\d{2,})/i,
|
|
77
|
+
);
|
|
78
|
+
|
|
79
|
+
if (inputMatch && outputMatch) {
|
|
80
|
+
// Extract token counts (may be in capture group 1 or 2)
|
|
81
|
+
const inputTokens = Number.parseInt(inputMatch[1] || inputMatch[2], 10);
|
|
82
|
+
const outputTokens = Number.parseInt(outputMatch[1] || outputMatch[2], 10);
|
|
83
|
+
|
|
84
|
+
// Sanity check: reject if tokens seem unreasonably large (> 1M each)
|
|
85
|
+
if (inputTokens > 1_000_000 || outputTokens > 1_000_000) {
|
|
86
|
+
return null;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
return {
|
|
90
|
+
inputTokens,
|
|
91
|
+
outputTokens,
|
|
92
|
+
confidence: "estimated",
|
|
93
|
+
};
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
return null;
|
|
97
|
+
}
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cost rate tables for all supported model tiers and specific models.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import type { ModelTier } from "../../config/schema";
|
|
6
|
+
import type { ModelCostRates } from "./types";
|
|
7
|
+
|
|
8
|
+
/** Model tier cost rates (as of 2025-01) */
|
|
9
|
+
export const COST_RATES: Record<ModelTier, ModelCostRates> = {
|
|
10
|
+
fast: {
|
|
11
|
+
// Haiku 4.5
|
|
12
|
+
inputPer1M: 0.8,
|
|
13
|
+
outputPer1M: 4.0,
|
|
14
|
+
},
|
|
15
|
+
balanced: {
|
|
16
|
+
// Sonnet 4.5
|
|
17
|
+
inputPer1M: 3.0,
|
|
18
|
+
outputPer1M: 15.0,
|
|
19
|
+
},
|
|
20
|
+
powerful: {
|
|
21
|
+
// Opus 4
|
|
22
|
+
inputPer1M: 15.0,
|
|
23
|
+
outputPer1M: 75.0,
|
|
24
|
+
},
|
|
25
|
+
};
|
|
26
|
+
|
|
27
|
+
/** Per-model pricing in $/1M tokens: { input, output } */
|
|
28
|
+
export const MODEL_PRICING: Record<
|
|
29
|
+
string,
|
|
30
|
+
{ input: number; output: number; cacheRead?: number; cacheCreation?: number }
|
|
31
|
+
> = {
|
|
32
|
+
// Anthropic Claude models (short aliases)
|
|
33
|
+
sonnet: { input: 3, output: 15 },
|
|
34
|
+
haiku: { input: 0.8, output: 4.0, cacheRead: 0.1, cacheCreation: 1.0 },
|
|
35
|
+
opus: { input: 15, output: 75 },
|
|
36
|
+
|
|
37
|
+
// Anthropic Claude models (full names)
|
|
38
|
+
"claude-sonnet-4": { input: 3, output: 15 },
|
|
39
|
+
"claude-sonnet-4-5": { input: 3, output: 15 },
|
|
40
|
+
"claude-sonnet-4-6": { input: 3, output: 15 },
|
|
41
|
+
"claude-haiku": { input: 0.8, output: 4.0, cacheRead: 0.1, cacheCreation: 1.0 },
|
|
42
|
+
"claude-haiku-4-5": { input: 0.8, output: 4.0, cacheRead: 0.1, cacheCreation: 1.0 },
|
|
43
|
+
"claude-opus": { input: 15, output: 75 },
|
|
44
|
+
"claude-opus-4": { input: 15, output: 75 },
|
|
45
|
+
"claude-opus-4-6": { input: 15, output: 75 },
|
|
46
|
+
|
|
47
|
+
// OpenAI models
|
|
48
|
+
"gpt-4.1": { input: 10, output: 30 },
|
|
49
|
+
"gpt-4": { input: 30, output: 60 },
|
|
50
|
+
"gpt-3.5-turbo": { input: 0.5, output: 1.5 },
|
|
51
|
+
|
|
52
|
+
// Google Gemini
|
|
53
|
+
"gemini-2.5-pro": { input: 0.075, output: 0.3 },
|
|
54
|
+
"gemini-2-pro": { input: 0.075, output: 0.3 },
|
|
55
|
+
|
|
56
|
+
// OpenAI Codex
|
|
57
|
+
codex: { input: 0.02, output: 0.06 },
|
|
58
|
+
"code-davinci-002": { input: 0.02, output: 0.06 },
|
|
59
|
+
};
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cost tracking types — shared across all agent adapters.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import type { ModelTier } from "../../config/schema";
|
|
6
|
+
|
|
7
|
+
export type { ModelTier };
|
|
8
|
+
|
|
9
|
+
/** Cost rates per 1M tokens (USD) */
|
|
10
|
+
export interface ModelCostRates {
|
|
11
|
+
inputPer1M: number;
|
|
12
|
+
outputPer1M: number;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
/** Token usage data (camelCase — nax-internal representation) */
|
|
16
|
+
export interface TokenUsage {
|
|
17
|
+
inputTokens: number;
|
|
18
|
+
outputTokens: number;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
/** Cost estimate with confidence indicator */
|
|
22
|
+
export interface CostEstimate {
|
|
23
|
+
cost: number;
|
|
24
|
+
confidence: "exact" | "estimated" | "fallback";
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/** Token usage with confidence indicator */
|
|
28
|
+
export interface TokenUsageWithConfidence {
|
|
29
|
+
inputTokens: number;
|
|
30
|
+
outputTokens: number;
|
|
31
|
+
confidence: "exact" | "estimated";
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Token usage from an ACP session's cumulative_token_usage field.
|
|
36
|
+
* Uses snake_case to match the ACP wire format.
|
|
37
|
+
*/
|
|
38
|
+
export interface SessionTokenUsage {
|
|
39
|
+
input_tokens: number;
|
|
40
|
+
output_tokens: number;
|
|
41
|
+
/** Cache read tokens — billed at a reduced rate */
|
|
42
|
+
cache_read_input_tokens?: number;
|
|
43
|
+
/** Cache creation tokens — billed at a higher creation rate */
|
|
44
|
+
cache_creation_input_tokens?: number;
|
|
45
|
+
}
|
package/src/agents/index.ts
CHANGED
|
@@ -2,15 +2,17 @@ export type { AgentAdapter, AgentCapabilities, AgentResult, AgentRunOptions, Com
|
|
|
2
2
|
export { CompleteError } from "./types";
|
|
3
3
|
export { ClaudeCodeAdapter } from "./claude";
|
|
4
4
|
export { getAllAgentNames, getAgent, getInstalledAgents, checkAgentHealth } from "./registry";
|
|
5
|
-
export type { ModelCostRates, TokenUsage, CostEstimate, TokenUsageWithConfidence } from "./cost";
|
|
5
|
+
export type { ModelCostRates, TokenUsage, CostEstimate, TokenUsageWithConfidence, SessionTokenUsage } from "./cost";
|
|
6
6
|
export {
|
|
7
7
|
COST_RATES,
|
|
8
|
+
MODEL_PRICING,
|
|
8
9
|
parseTokenUsage,
|
|
9
10
|
estimateCost,
|
|
10
11
|
estimateCostFromOutput,
|
|
11
12
|
estimateCostByDuration,
|
|
12
13
|
formatCostWithConfidence,
|
|
14
|
+
estimateCostFromTokenUsage,
|
|
13
15
|
} from "./cost";
|
|
14
|
-
export { validateAgentForTier, validateAgentFeature, describeAgentCapabilities } from "./validation";
|
|
15
|
-
export type { AgentVersionInfo } from "./version-detection";
|
|
16
|
-
export { getAgentVersion, getAgentVersions } from "./version-detection";
|
|
16
|
+
export { validateAgentForTier, validateAgentFeature, describeAgentCapabilities } from "./shared/validation";
|
|
17
|
+
export type { AgentVersionInfo } from "./shared/version-detection";
|
|
18
|
+
export { getAgentVersion, getAgentVersions } from "./shared/version-detection";
|
package/src/agents/registry.ts
CHANGED
|
@@ -7,11 +7,11 @@
|
|
|
7
7
|
import type { NaxConfig } from "../config/schema";
|
|
8
8
|
import { getLogger } from "../logger";
|
|
9
9
|
import { AcpAgentAdapter } from "./acp/adapter";
|
|
10
|
-
import { AiderAdapter } from "./
|
|
11
|
-
import {
|
|
12
|
-
import {
|
|
13
|
-
import {
|
|
14
|
-
import {
|
|
10
|
+
import { AiderAdapter } from "./aider/adapter";
|
|
11
|
+
import { ClaudeCodeAdapter } from "./claude/adapter";
|
|
12
|
+
import { CodexAdapter } from "./codex/adapter";
|
|
13
|
+
import { GeminiAdapter } from "./gemini/adapter";
|
|
14
|
+
import { OpenCodeAdapter } from "./opencode/adapter";
|
|
15
15
|
import type { AgentAdapter } from "./types";
|
|
16
16
|
|
|
17
17
|
/** All known agent adapters */
|
|
@@ -5,8 +5,8 @@
|
|
|
5
5
|
* parseDecomposeOutput(), validateComplexity()
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
-
import { COMPLEXITY_GUIDE, GROUPING_RULES, TEST_STRATEGY_GUIDE, resolveTestStrategy } from "
|
|
9
|
-
import type { DecomposeOptions, DecomposeResult, DecomposedStory } from "
|
|
8
|
+
import { COMPLEXITY_GUIDE, GROUPING_RULES, TEST_STRATEGY_GUIDE, resolveTestStrategy } from "../../config/test-strategy";
|
|
9
|
+
import type { DecomposeOptions, DecomposeResult, DecomposedStory } from "../types";
|
|
10
10
|
|
|
11
11
|
/**
|
|
12
12
|
* Build the decompose prompt combining spec content and codebase context.
|
|
@@ -7,8 +7,8 @@
|
|
|
7
7
|
* Implementation placeholder — logic to be filled in by the implementer.
|
|
8
8
|
*/
|
|
9
9
|
|
|
10
|
-
import { resolveModel } from "
|
|
11
|
-
import type { ModelDef, NaxConfig } from "
|
|
10
|
+
import { resolveModel } from "../../config/schema";
|
|
11
|
+
import type { ModelDef, NaxConfig } from "../../config/schema";
|
|
12
12
|
|
|
13
13
|
/**
|
|
14
14
|
* Resolve the balanced model definition from config, with optional adapter default fallback.
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
* Separated from core types to keep each file under 400 lines.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
-
import type { ModelDef, ModelTier, NaxConfig } from "
|
|
8
|
+
import type { ModelDef, ModelTier, NaxConfig } from "../../config/schema";
|
|
9
9
|
|
|
10
10
|
/**
|
|
11
11
|
* Configuration options for running an agent in plan mode.
|
|
@@ -56,7 +56,7 @@ export interface PlanOptions {
|
|
|
56
56
|
*/
|
|
57
57
|
onAcpSessionCreated?: (sessionName: string) => Promise<void> | void;
|
|
58
58
|
/** PID registry for tracking spawned agent processes — cleanup on crash/SIGTERM */
|
|
59
|
-
pidRegistry?: import("
|
|
59
|
+
pidRegistry?: import("../../execution/pid-registry").PidRegistry;
|
|
60
60
|
}
|
|
61
61
|
|
|
62
62
|
/**
|
|
@@ -117,7 +117,7 @@ export interface DecomposedStory {
|
|
|
117
117
|
/** Implementation risks */
|
|
118
118
|
risks: string[];
|
|
119
119
|
/** Test strategy recommendation from LLM */
|
|
120
|
-
testStrategy?: import("
|
|
120
|
+
testStrategy?: import("../../config/test-strategy").TestStrategy;
|
|
121
121
|
}
|
|
122
122
|
|
|
123
123
|
/**
|
|
@@ -161,4 +161,4 @@ export interface InteractiveRunOptions extends AgentRunOptions {
|
|
|
161
161
|
}
|
|
162
162
|
|
|
163
163
|
// Re-import for the extends clause
|
|
164
|
-
import type { AgentRunOptions } from "
|
|
164
|
+
import type { AgentRunOptions } from "../types";
|
|
@@ -4,8 +4,8 @@
|
|
|
4
4
|
* Runtime validation for agent capabilities and tier compatibility.
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
|
-
import type { ModelTier } from "
|
|
8
|
-
import type { AgentAdapter } from "
|
|
7
|
+
import type { ModelTier } from "../../config/schema";
|
|
8
|
+
import type { AgentAdapter } from "../types";
|
|
9
9
|
|
|
10
10
|
/**
|
|
11
11
|
* Check if an agent supports a given model tier.
|
|
@@ -5,8 +5,8 @@
|
|
|
5
5
|
* by running `<agent> --version` and parsing the output.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
-
import { getInstalledAgents } from "
|
|
9
|
-
import type { AgentAdapter } from "
|
|
8
|
+
import { getInstalledAgents } from "../registry";
|
|
9
|
+
import type { AgentAdapter } from "../types";
|
|
10
10
|
|
|
11
11
|
/**
|
|
12
12
|
* Information about an installed agent including its version
|
|
@@ -90,7 +90,7 @@ export async function getAgentVersions(): Promise<AgentVersionInfo[]> {
|
|
|
90
90
|
const agentsByName = new Map(agents.map((a) => [a.name, a]));
|
|
91
91
|
|
|
92
92
|
// Import ALL_AGENTS to include non-installed ones
|
|
93
|
-
const { ALL_AGENTS } = await import("
|
|
93
|
+
const { ALL_AGENTS } = await import("../registry");
|
|
94
94
|
|
|
95
95
|
const versions = await Promise.all(
|
|
96
96
|
ALL_AGENTS.map(async (agent: AgentAdapter): Promise<AgentVersionInfo> => {
|
package/src/agents/types.ts
CHANGED
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
|
|
9
9
|
import type { NaxConfig } from "../config";
|
|
10
10
|
import type { ModelDef, ModelTier } from "../config/schema";
|
|
11
|
+
import type { TokenUsage } from "./cost";
|
|
11
12
|
|
|
12
13
|
// Re-export extended types for backward compatibility
|
|
13
14
|
export type {
|
|
@@ -18,7 +19,7 @@ export type {
|
|
|
18
19
|
DecomposedStory,
|
|
19
20
|
PtyHandle,
|
|
20
21
|
InteractiveRunOptions,
|
|
21
|
-
} from "./types-extended";
|
|
22
|
+
} from "./shared/types-extended";
|
|
22
23
|
|
|
23
24
|
/**
|
|
24
25
|
* Agent execution result returned after running a coding agent.
|
|
@@ -38,6 +39,8 @@ export interface AgentResult {
|
|
|
38
39
|
durationMs: number;
|
|
39
40
|
/** Estimated cost for this run (USD) */
|
|
40
41
|
estimatedCost: number;
|
|
42
|
+
/** Token usage for this run (when available) */
|
|
43
|
+
tokenUsage?: TokenUsage;
|
|
41
44
|
/** Process ID of the spawned agent (for cleanup on failure) */
|
|
42
45
|
pid?: number;
|
|
43
46
|
}
|
|
@@ -165,10 +168,12 @@ export interface AgentAdapter {
|
|
|
165
168
|
buildCommand(options: AgentRunOptions): string[];
|
|
166
169
|
|
|
167
170
|
/** Run the agent in plan mode to generate a feature specification. */
|
|
168
|
-
plan(options: import("./types-extended").PlanOptions): Promise<import("./types-extended").PlanResult>;
|
|
171
|
+
plan(options: import("./shared/types-extended").PlanOptions): Promise<import("./shared/types-extended").PlanResult>;
|
|
169
172
|
|
|
170
173
|
/** Run the agent in decompose mode to break spec into classified stories. */
|
|
171
|
-
decompose(
|
|
174
|
+
decompose(
|
|
175
|
+
options: import("./shared/types-extended").DecomposeOptions,
|
|
176
|
+
): Promise<import("./shared/types-extended").DecomposeResult>;
|
|
172
177
|
|
|
173
178
|
/**
|
|
174
179
|
* Run a one-shot LLM call and return the plain text response.
|
|
@@ -181,5 +186,7 @@ export interface AgentAdapter {
|
|
|
181
186
|
* This method is optional — only implemented by agents that support
|
|
182
187
|
* interactive terminal sessions (e.g., Claude Code).
|
|
183
188
|
*/
|
|
184
|
-
runInteractive?(
|
|
189
|
+
runInteractive?(
|
|
190
|
+
options: import("./shared/types-extended").InteractiveRunOptions,
|
|
191
|
+
): import("./shared/types-extended").PtyHandle;
|
|
185
192
|
}
|
package/src/cli/agents.ts
CHANGED
package/src/cli/init.ts
CHANGED
|
@@ -33,7 +33,21 @@ export interface InitProjectOptions {
|
|
|
33
33
|
/**
|
|
34
34
|
* Gitignore entries added by nax init
|
|
35
35
|
*/
|
|
36
|
-
const NAX_GITIGNORE_ENTRIES = [
|
|
36
|
+
const NAX_GITIGNORE_ENTRIES = [
|
|
37
|
+
".nax-verifier-verdict.json",
|
|
38
|
+
"nax.lock",
|
|
39
|
+
"nax/**/runs/",
|
|
40
|
+
"nax/metrics.json",
|
|
41
|
+
"nax/features/*/status.json",
|
|
42
|
+
"nax/features/*/plan/",
|
|
43
|
+
"nax/features/*/acp-sessions.json",
|
|
44
|
+
"nax/features/*/interactions/",
|
|
45
|
+
"nax/features/*/progress.txt",
|
|
46
|
+
"nax/features/*/acceptance-refined.json",
|
|
47
|
+
".nax-pids",
|
|
48
|
+
".nax-wt/",
|
|
49
|
+
"~/",
|
|
50
|
+
];
|
|
37
51
|
|
|
38
52
|
/**
|
|
39
53
|
* Add nax-specific entries to .gitignore if not already present.
|
|
@@ -106,6 +106,7 @@ export const acceptanceSetupStage: PipelineStage = {
|
|
|
106
106
|
const result = await _acceptanceSetupDeps.generate(ctx.prd.userStories, refinedCriteria, {
|
|
107
107
|
featureName: ctx.prd.feature,
|
|
108
108
|
workdir: ctx.workdir,
|
|
109
|
+
featureDir: ctx.featureDir,
|
|
109
110
|
codebaseContext: "",
|
|
110
111
|
modelTier: ctx.config.acceptance.model ?? "fast",
|
|
111
112
|
modelDef: resolveModel(ctx.config.models[ctx.config.acceptance.model ?? "fast"]),
|
|
@@ -27,6 +27,7 @@
|
|
|
27
27
|
import path from "node:path";
|
|
28
28
|
import { getLogger } from "../../logger";
|
|
29
29
|
import { countStories } from "../../prd";
|
|
30
|
+
import { logTestOutput } from "../../utils/log-test-output";
|
|
30
31
|
import type { PipelineContext, PipelineStage, StageResult } from "../types";
|
|
31
32
|
|
|
32
33
|
/**
|
|
@@ -163,10 +164,8 @@ export const acceptanceStage: PipelineStage = {
|
|
|
163
164
|
|
|
164
165
|
// Non-zero exit but no AC failures parsed at all — test crashed (syntax error, import failure, etc.)
|
|
165
166
|
if (failedACs.length === 0 && exitCode !== 0) {
|
|
166
|
-
logger.error("acceptance", "Tests errored with no AC failures parsed", {
|
|
167
|
-
|
|
168
|
-
output,
|
|
169
|
-
});
|
|
167
|
+
logger.error("acceptance", "Tests errored with no AC failures parsed", { exitCode });
|
|
168
|
+
logTestOutput(logger, "acceptance", output);
|
|
170
169
|
|
|
171
170
|
ctx.acceptanceFailures = {
|
|
172
171
|
failedACs: ["AC-ERROR"],
|
|
@@ -190,10 +189,8 @@ export const acceptanceStage: PipelineStage = {
|
|
|
190
189
|
});
|
|
191
190
|
}
|
|
192
191
|
|
|
193
|
-
logger.error("acceptance", "Acceptance tests failed", {
|
|
194
|
-
|
|
195
|
-
output,
|
|
196
|
-
});
|
|
192
|
+
logger.error("acceptance", "Acceptance tests failed", { failedACs: actualFailures });
|
|
193
|
+
logTestOutput(logger, "acceptance", output);
|
|
197
194
|
|
|
198
195
|
// Store failed ACs and test output in context for fix generation
|
|
199
196
|
ctx.acceptanceFailures = {
|
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
*/
|
|
15
15
|
|
|
16
16
|
import { getLogger } from "../../logger";
|
|
17
|
+
import { logTestOutput } from "../../utils/log-test-output";
|
|
17
18
|
import { verificationOrchestrator } from "../../verification/orchestrator";
|
|
18
19
|
import type { VerifyContext } from "../../verification/orchestrator-types";
|
|
19
20
|
import { pipelineEventBus } from "../event-bus";
|
|
@@ -71,6 +72,7 @@ export const regressionStage: PipelineStage = {
|
|
|
71
72
|
storyId: ctx.story.id,
|
|
72
73
|
failCount: result.failCount,
|
|
73
74
|
});
|
|
75
|
+
logTestOutput(logger, "regression", result.rawOutput, { storyId: ctx.story.id });
|
|
74
76
|
|
|
75
77
|
pipelineEventBus.emit({
|
|
76
78
|
type: "regression:detected",
|
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
|
|
12
12
|
import type { SmartTestRunnerConfig } from "../../config/types";
|
|
13
13
|
import { getLogger } from "../../logger";
|
|
14
|
+
import { logTestOutput } from "../../utils/log-test-output";
|
|
14
15
|
import { detectRuntimeCrash } from "../../verification/crash-detector";
|
|
15
16
|
import type { VerifyStatus } from "../../verification/orchestrator-types";
|
|
16
17
|
import { regression } from "../../verification/runners";
|
|
@@ -173,16 +174,10 @@ export const verifyStage: PipelineStage = {
|
|
|
173
174
|
});
|
|
174
175
|
}
|
|
175
176
|
|
|
176
|
-
// Log
|
|
177
|
-
// BUG-037:
|
|
178
|
-
if (result.
|
|
179
|
-
|
|
180
|
-
if (outputLines.length > 0) {
|
|
181
|
-
logger.debug("verify", "Test output preview", {
|
|
182
|
-
storyId: ctx.story.id,
|
|
183
|
-
output: outputLines.join("\n"),
|
|
184
|
-
});
|
|
185
|
-
}
|
|
177
|
+
// Log tail of output at debug level for context (ENH-001)
|
|
178
|
+
// BUG-037: Use .slice(-20) to show failures, not prechecks
|
|
179
|
+
if (result.status !== "TIMEOUT") {
|
|
180
|
+
logTestOutput(logger, "verify", result.output, { storyId: ctx.story.id });
|
|
186
181
|
}
|
|
187
182
|
|
|
188
183
|
return {
|