@nathapp/nax 0.46.0 → 0.46.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +22 -0
- package/dist/nax.js +343 -188
- package/package.json +1 -1
- package/src/acceptance/generator.ts +1 -1
- package/src/acceptance/types.ts +2 -0
- package/src/agents/acp/cost.ts +5 -75
- package/src/agents/acp/spawn-client.ts +11 -1
- package/src/agents/claude/cost.ts +12 -264
- package/src/agents/claude/execution.ts +12 -1
- package/src/agents/cost/calculate.ts +154 -0
- package/src/agents/cost/index.ts +10 -0
- package/src/agents/cost/parse.ts +97 -0
- package/src/agents/cost/pricing.ts +59 -0
- package/src/agents/cost/types.ts +45 -0
- package/src/agents/index.ts +4 -2
- package/src/agents/types.ts +3 -0
- package/src/cli/init.ts +15 -1
- package/src/pipeline/stages/acceptance-setup.ts +1 -0
- package/src/pipeline/stages/autofix.ts +112 -25
- package/src/precheck/checks-git.ts +28 -2
- package/src/precheck/checks-warnings.ts +30 -2
- package/src/precheck/checks.ts +1 -0
- package/src/precheck/index.ts +2 -0
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Token usage parsing from raw agent output strings.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import type { TokenUsageWithConfidence } from "./types";
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Parse Claude Code output for token usage.
|
|
9
|
+
*
|
|
10
|
+
* Supports multiple formats with varying confidence levels:
|
|
11
|
+
* - JSON structured output → "exact" confidence
|
|
12
|
+
* - Markdown/plain text patterns → "estimated" confidence
|
|
13
|
+
*
|
|
14
|
+
* Uses specific regex patterns to reduce false positives.
|
|
15
|
+
*
|
|
16
|
+
* @param output - Agent stdout + stderr combined
|
|
17
|
+
* @returns Token usage with confidence indicator, or null if tokens cannot be parsed
|
|
18
|
+
*
|
|
19
|
+
* @example
|
|
20
|
+
* ```ts
|
|
21
|
+
* // JSON format (exact)
|
|
22
|
+
* const usage1 = parseTokenUsage('{"usage": {"input_tokens": 1234, "output_tokens": 5678}}');
|
|
23
|
+
* // { inputTokens: 1234, outputTokens: 5678, confidence: 'exact' }
|
|
24
|
+
*
|
|
25
|
+
* // Markdown format (estimated)
|
|
26
|
+
* const usage2 = parseTokenUsage('Input tokens: 1234\nOutput tokens: 5678');
|
|
27
|
+
* // { inputTokens: 1234, outputTokens: 5678, confidence: 'estimated' }
|
|
28
|
+
*
|
|
29
|
+
* // Unparseable
|
|
30
|
+
* const usage3 = parseTokenUsage('No token data here');
|
|
31
|
+
* // null
|
|
32
|
+
* ```
|
|
33
|
+
*/
|
|
34
|
+
export function parseTokenUsage(output: string): TokenUsageWithConfidence | null {
|
|
35
|
+
// Try JSON format first (most reliable) - confidence: exact
|
|
36
|
+
try {
|
|
37
|
+
const jsonMatch = output.match(
|
|
38
|
+
/\{[^}]*"usage"\s*:\s*\{[^}]*"input_tokens"\s*:\s*(\d+)[^}]*"output_tokens"\s*:\s*(\d+)[^}]*\}[^}]*\}/,
|
|
39
|
+
);
|
|
40
|
+
if (jsonMatch) {
|
|
41
|
+
return {
|
|
42
|
+
inputTokens: Number.parseInt(jsonMatch[1], 10),
|
|
43
|
+
outputTokens: Number.parseInt(jsonMatch[2], 10),
|
|
44
|
+
confidence: "exact",
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// Try parsing as full JSON object
|
|
49
|
+
const lines = output.split("\n");
|
|
50
|
+
for (const line of lines) {
|
|
51
|
+
if (line.trim().startsWith("{")) {
|
|
52
|
+
try {
|
|
53
|
+
const parsed = JSON.parse(line);
|
|
54
|
+
if (parsed.usage?.input_tokens && parsed.usage?.output_tokens) {
|
|
55
|
+
return {
|
|
56
|
+
inputTokens: parsed.usage.input_tokens,
|
|
57
|
+
outputTokens: parsed.usage.output_tokens,
|
|
58
|
+
confidence: "exact",
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
} catch {
|
|
62
|
+
// Not valid JSON, continue
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
} catch {
|
|
67
|
+
// JSON parsing failed, try regex patterns
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// Try specific markdown-style patterns (more specific to reduce false positives)
|
|
71
|
+
// Match "Input tokens: 1234" or "input_tokens: 1234" or "INPUT TOKENS: 1234"
|
|
72
|
+
// Use word boundary at start, require colon or space after keyword, then digits
|
|
73
|
+
// confidence: estimated (regex-based)
|
|
74
|
+
const inputMatch = output.match(/\b(?:input|input_tokens)\s*:\s*(\d{2,})|(?:input)\s+(?:tokens?)\s*:\s*(\d{2,})/i);
|
|
75
|
+
const outputMatch = output.match(
|
|
76
|
+
/\b(?:output|output_tokens)\s*:\s*(\d{2,})|(?:output)\s+(?:tokens?)\s*:\s*(\d{2,})/i,
|
|
77
|
+
);
|
|
78
|
+
|
|
79
|
+
if (inputMatch && outputMatch) {
|
|
80
|
+
// Extract token counts (may be in capture group 1 or 2)
|
|
81
|
+
const inputTokens = Number.parseInt(inputMatch[1] || inputMatch[2], 10);
|
|
82
|
+
const outputTokens = Number.parseInt(outputMatch[1] || outputMatch[2], 10);
|
|
83
|
+
|
|
84
|
+
// Sanity check: reject if tokens seem unreasonably large (> 1M each)
|
|
85
|
+
if (inputTokens > 1_000_000 || outputTokens > 1_000_000) {
|
|
86
|
+
return null;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
return {
|
|
90
|
+
inputTokens,
|
|
91
|
+
outputTokens,
|
|
92
|
+
confidence: "estimated",
|
|
93
|
+
};
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
return null;
|
|
97
|
+
}
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cost rate tables for all supported model tiers and specific models.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import type { ModelTier } from "../../config/schema";
|
|
6
|
+
import type { ModelCostRates } from "./types";
|
|
7
|
+
|
|
8
|
+
/** Model tier cost rates (as of 2025-01) */
|
|
9
|
+
export const COST_RATES: Record<ModelTier, ModelCostRates> = {
|
|
10
|
+
fast: {
|
|
11
|
+
// Haiku 4.5
|
|
12
|
+
inputPer1M: 0.8,
|
|
13
|
+
outputPer1M: 4.0,
|
|
14
|
+
},
|
|
15
|
+
balanced: {
|
|
16
|
+
// Sonnet 4.5
|
|
17
|
+
inputPer1M: 3.0,
|
|
18
|
+
outputPer1M: 15.0,
|
|
19
|
+
},
|
|
20
|
+
powerful: {
|
|
21
|
+
// Opus 4
|
|
22
|
+
inputPer1M: 15.0,
|
|
23
|
+
outputPer1M: 75.0,
|
|
24
|
+
},
|
|
25
|
+
};
|
|
26
|
+
|
|
27
|
+
/** Per-model pricing in $/1M tokens: { input, output } */
|
|
28
|
+
export const MODEL_PRICING: Record<
|
|
29
|
+
string,
|
|
30
|
+
{ input: number; output: number; cacheRead?: number; cacheCreation?: number }
|
|
31
|
+
> = {
|
|
32
|
+
// Anthropic Claude models (short aliases)
|
|
33
|
+
sonnet: { input: 3, output: 15 },
|
|
34
|
+
haiku: { input: 0.8, output: 4.0, cacheRead: 0.1, cacheCreation: 1.0 },
|
|
35
|
+
opus: { input: 15, output: 75 },
|
|
36
|
+
|
|
37
|
+
// Anthropic Claude models (full names)
|
|
38
|
+
"claude-sonnet-4": { input: 3, output: 15 },
|
|
39
|
+
"claude-sonnet-4-5": { input: 3, output: 15 },
|
|
40
|
+
"claude-sonnet-4-6": { input: 3, output: 15 },
|
|
41
|
+
"claude-haiku": { input: 0.8, output: 4.0, cacheRead: 0.1, cacheCreation: 1.0 },
|
|
42
|
+
"claude-haiku-4-5": { input: 0.8, output: 4.0, cacheRead: 0.1, cacheCreation: 1.0 },
|
|
43
|
+
"claude-opus": { input: 15, output: 75 },
|
|
44
|
+
"claude-opus-4": { input: 15, output: 75 },
|
|
45
|
+
"claude-opus-4-6": { input: 15, output: 75 },
|
|
46
|
+
|
|
47
|
+
// OpenAI models
|
|
48
|
+
"gpt-4.1": { input: 10, output: 30 },
|
|
49
|
+
"gpt-4": { input: 30, output: 60 },
|
|
50
|
+
"gpt-3.5-turbo": { input: 0.5, output: 1.5 },
|
|
51
|
+
|
|
52
|
+
// Google Gemini
|
|
53
|
+
"gemini-2.5-pro": { input: 0.075, output: 0.3 },
|
|
54
|
+
"gemini-2-pro": { input: 0.075, output: 0.3 },
|
|
55
|
+
|
|
56
|
+
// OpenAI Codex
|
|
57
|
+
codex: { input: 0.02, output: 0.06 },
|
|
58
|
+
"code-davinci-002": { input: 0.02, output: 0.06 },
|
|
59
|
+
};
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cost tracking types — shared across all agent adapters.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import type { ModelTier } from "../../config/schema";
|
|
6
|
+
|
|
7
|
+
export type { ModelTier };
|
|
8
|
+
|
|
9
|
+
/** Cost rates per 1M tokens (USD) */
|
|
10
|
+
export interface ModelCostRates {
|
|
11
|
+
inputPer1M: number;
|
|
12
|
+
outputPer1M: number;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
/** Token usage data (camelCase — nax-internal representation) */
|
|
16
|
+
export interface TokenUsage {
|
|
17
|
+
inputTokens: number;
|
|
18
|
+
outputTokens: number;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
/** Cost estimate with confidence indicator */
|
|
22
|
+
export interface CostEstimate {
|
|
23
|
+
cost: number;
|
|
24
|
+
confidence: "exact" | "estimated" | "fallback";
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/** Token usage with confidence indicator */
|
|
28
|
+
export interface TokenUsageWithConfidence {
|
|
29
|
+
inputTokens: number;
|
|
30
|
+
outputTokens: number;
|
|
31
|
+
confidence: "exact" | "estimated";
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Token usage from an ACP session's cumulative_token_usage field.
|
|
36
|
+
* Uses snake_case to match the ACP wire format.
|
|
37
|
+
*/
|
|
38
|
+
export interface SessionTokenUsage {
|
|
39
|
+
input_tokens: number;
|
|
40
|
+
output_tokens: number;
|
|
41
|
+
/** Cache read tokens — billed at a reduced rate */
|
|
42
|
+
cache_read_input_tokens?: number;
|
|
43
|
+
/** Cache creation tokens — billed at a higher creation rate */
|
|
44
|
+
cache_creation_input_tokens?: number;
|
|
45
|
+
}
|
package/src/agents/index.ts
CHANGED
|
@@ -2,15 +2,17 @@ export type { AgentAdapter, AgentCapabilities, AgentResult, AgentRunOptions, Com
|
|
|
2
2
|
export { CompleteError } from "./types";
|
|
3
3
|
export { ClaudeCodeAdapter } from "./claude";
|
|
4
4
|
export { getAllAgentNames, getAgent, getInstalledAgents, checkAgentHealth } from "./registry";
|
|
5
|
-
export type { ModelCostRates, TokenUsage, CostEstimate, TokenUsageWithConfidence } from "./
|
|
5
|
+
export type { ModelCostRates, TokenUsage, CostEstimate, TokenUsageWithConfidence, SessionTokenUsage } from "./cost";
|
|
6
6
|
export {
|
|
7
7
|
COST_RATES,
|
|
8
|
+
MODEL_PRICING,
|
|
8
9
|
parseTokenUsage,
|
|
9
10
|
estimateCost,
|
|
10
11
|
estimateCostFromOutput,
|
|
11
12
|
estimateCostByDuration,
|
|
12
13
|
formatCostWithConfidence,
|
|
13
|
-
|
|
14
|
+
estimateCostFromTokenUsage,
|
|
15
|
+
} from "./cost";
|
|
14
16
|
export { validateAgentForTier, validateAgentFeature, describeAgentCapabilities } from "./shared/validation";
|
|
15
17
|
export type { AgentVersionInfo } from "./shared/version-detection";
|
|
16
18
|
export { getAgentVersion, getAgentVersions } from "./shared/version-detection";
|
package/src/agents/types.ts
CHANGED
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
|
|
9
9
|
import type { NaxConfig } from "../config";
|
|
10
10
|
import type { ModelDef, ModelTier } from "../config/schema";
|
|
11
|
+
import type { TokenUsage } from "./cost";
|
|
11
12
|
|
|
12
13
|
// Re-export extended types for backward compatibility
|
|
13
14
|
export type {
|
|
@@ -38,6 +39,8 @@ export interface AgentResult {
|
|
|
38
39
|
durationMs: number;
|
|
39
40
|
/** Estimated cost for this run (USD) */
|
|
40
41
|
estimatedCost: number;
|
|
42
|
+
/** Token usage for this run (when available) */
|
|
43
|
+
tokenUsage?: TokenUsage;
|
|
41
44
|
/** Process ID of the spawned agent (for cleanup on failure) */
|
|
42
45
|
pid?: number;
|
|
43
46
|
}
|
package/src/cli/init.ts
CHANGED
|
@@ -33,7 +33,21 @@ export interface InitProjectOptions {
|
|
|
33
33
|
/**
|
|
34
34
|
* Gitignore entries added by nax init
|
|
35
35
|
*/
|
|
36
|
-
const NAX_GITIGNORE_ENTRIES = [
|
|
36
|
+
const NAX_GITIGNORE_ENTRIES = [
|
|
37
|
+
".nax-verifier-verdict.json",
|
|
38
|
+
"nax.lock",
|
|
39
|
+
"nax/**/runs/",
|
|
40
|
+
"nax/metrics.json",
|
|
41
|
+
"nax/features/*/status.json",
|
|
42
|
+
"nax/features/*/plan/",
|
|
43
|
+
"nax/features/*/acp-sessions.json",
|
|
44
|
+
"nax/features/*/interactions/",
|
|
45
|
+
"nax/features/*/progress.txt",
|
|
46
|
+
"nax/features/*/acceptance-refined.json",
|
|
47
|
+
".nax-pids",
|
|
48
|
+
".nax-wt/",
|
|
49
|
+
"~/",
|
|
50
|
+
];
|
|
37
51
|
|
|
38
52
|
/**
|
|
39
53
|
* Add nax-specific entries to .gitignore if not already present.
|
|
@@ -106,6 +106,7 @@ export const acceptanceSetupStage: PipelineStage = {
|
|
|
106
106
|
const result = await _acceptanceSetupDeps.generate(ctx.prd.userStories, refinedCriteria, {
|
|
107
107
|
featureName: ctx.prd.feature,
|
|
108
108
|
workdir: ctx.workdir,
|
|
109
|
+
featureDir: ctx.featureDir,
|
|
109
110
|
codebaseContext: "",
|
|
110
111
|
modelTier: ctx.config.acceptance.model ?? "fast",
|
|
111
112
|
modelDef: resolveModel(ctx.config.models[ctx.config.acceptance.model ?? "fast"]),
|
|
@@ -3,7 +3,11 @@
|
|
|
3
3
|
* Autofix Stage (ADR-005, Phase 2)
|
|
4
4
|
*
|
|
5
5
|
* Runs after a failed review stage. Attempts to fix quality issues
|
|
6
|
-
* automatically
|
|
6
|
+
* automatically before escalating:
|
|
7
|
+
*
|
|
8
|
+
* Phase 1 — Mechanical fix: runs lintFix / formatFix commands (if configured)
|
|
9
|
+
* Phase 2 — Agent rectification: spawns an agent session with the review error
|
|
10
|
+
* output as context (reuses the pattern from rectification-loop.ts)
|
|
7
11
|
*
|
|
8
12
|
* Language-agnostic: uses quality.commands.lintFix / formatFix from config.
|
|
9
13
|
* No hardcoded tool names.
|
|
@@ -12,10 +16,15 @@
|
|
|
12
16
|
*
|
|
13
17
|
* Returns:
|
|
14
18
|
* - `retry` fromStage:"review" — autofix resolved the failures
|
|
15
|
-
* - `escalate` — max attempts exhausted or
|
|
19
|
+
* - `escalate` — max attempts exhausted or agent unavailable
|
|
16
20
|
*/
|
|
17
21
|
|
|
22
|
+
import { getAgent } from "../../agents";
|
|
23
|
+
import { resolveModel } from "../../config";
|
|
24
|
+
import { resolvePermissions } from "../../config/permissions";
|
|
18
25
|
import { getLogger } from "../../logger";
|
|
26
|
+
import type { UserStory } from "../../prd";
|
|
27
|
+
import type { ReviewCheckResult } from "../../review/types";
|
|
19
28
|
import { pipelineEventBus } from "../event-bus";
|
|
20
29
|
import type { PipelineContext, PipelineStage, StageResult } from "../types";
|
|
21
30
|
|
|
@@ -45,18 +54,8 @@ export const autofixStage: PipelineStage = {
|
|
|
45
54
|
const lintFixCmd = ctx.config.quality.commands.lintFix;
|
|
46
55
|
const formatFixCmd = ctx.config.quality.commands.formatFix;
|
|
47
56
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
return { action: "escalate", reason: "Review failed and no autofix commands configured" };
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
const maxAttempts = ctx.config.quality.autofix?.maxAttempts ?? 2;
|
|
54
|
-
let fixed = false;
|
|
55
|
-
|
|
56
|
-
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
|
|
57
|
-
logger.info("autofix", `Autofix attempt ${attempt}/${maxAttempts}`, { storyId: ctx.story.id });
|
|
58
|
-
|
|
59
|
-
// Step 1: lint fix
|
|
57
|
+
// Phase 1: Mechanical fix (if commands are configured)
|
|
58
|
+
if (lintFixCmd || formatFixCmd) {
|
|
60
59
|
if (lintFixCmd) {
|
|
61
60
|
pipelineEventBus.emit({ type: "autofix:started", storyId: ctx.story.id, command: lintFixCmd });
|
|
62
61
|
const lintResult = await _autofixDeps.runCommand(lintFixCmd, ctx.workdir);
|
|
@@ -69,7 +68,6 @@ export const autofixStage: PipelineStage = {
|
|
|
69
68
|
}
|
|
70
69
|
}
|
|
71
70
|
|
|
72
|
-
// Step 2: format fix
|
|
73
71
|
if (formatFixCmd) {
|
|
74
72
|
pipelineEventBus.emit({ type: "autofix:started", storyId: ctx.story.id, command: formatFixCmd });
|
|
75
73
|
const fmtResult = await _autofixDeps.runCommand(formatFixCmd, ctx.workdir);
|
|
@@ -82,22 +80,21 @@ export const autofixStage: PipelineStage = {
|
|
|
82
80
|
}
|
|
83
81
|
}
|
|
84
82
|
|
|
85
|
-
// Re-run review to check if fixed
|
|
86
83
|
const recheckPassed = await _autofixDeps.recheckReview(ctx);
|
|
87
84
|
pipelineEventBus.emit({ type: "autofix:completed", storyId: ctx.story.id, fixed: recheckPassed });
|
|
88
85
|
|
|
89
86
|
if (recheckPassed) {
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
}
|
|
94
|
-
fixed = true;
|
|
95
|
-
break;
|
|
87
|
+
if (ctx.reviewResult) ctx.reviewResult = { ...ctx.reviewResult, success: true };
|
|
88
|
+
logger.info("autofix", "Mechanical autofix succeeded — retrying review", { storyId: ctx.story.id });
|
|
89
|
+
return { action: "retry", fromStage: "review" };
|
|
96
90
|
}
|
|
97
91
|
}
|
|
98
92
|
|
|
99
|
-
|
|
100
|
-
|
|
93
|
+
// Phase 2: Agent rectification — spawn agent with review error context
|
|
94
|
+
const agentFixed = await _autofixDeps.runAgentRectification(ctx);
|
|
95
|
+
if (agentFixed) {
|
|
96
|
+
if (ctx.reviewResult) ctx.reviewResult = { ...ctx.reviewResult, success: true };
|
|
97
|
+
logger.info("autofix", "Agent rectification succeeded — retrying review", { storyId: ctx.story.id });
|
|
101
98
|
return { action: "retry", fromStage: "review" };
|
|
102
99
|
}
|
|
103
100
|
|
|
@@ -134,7 +131,97 @@ async function recheckReview(ctx: PipelineContext): Promise<boolean> {
|
|
|
134
131
|
return result.action === "continue";
|
|
135
132
|
}
|
|
136
133
|
|
|
134
|
+
function collectFailedChecks(ctx: PipelineContext): ReviewCheckResult[] {
|
|
135
|
+
return (ctx.reviewResult?.checks ?? []).filter((c) => !c.success);
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
export function buildReviewRectificationPrompt(failedChecks: ReviewCheckResult[], story: UserStory): string {
|
|
139
|
+
const errors = failedChecks
|
|
140
|
+
.map((c) => `## ${c.check} errors (exit code ${c.exitCode})\n\`\`\`\n${c.output}\n\`\`\``)
|
|
141
|
+
.join("\n\n");
|
|
142
|
+
|
|
143
|
+
return `You are fixing lint/typecheck errors from a code review.
|
|
144
|
+
|
|
145
|
+
Story: ${story.title} (${story.id})
|
|
146
|
+
|
|
147
|
+
The following quality checks failed after implementation:
|
|
148
|
+
|
|
149
|
+
${errors}
|
|
150
|
+
|
|
151
|
+
Fix ALL errors listed above. Do NOT change test files or test behavior.
|
|
152
|
+
Do NOT add new features — only fix the quality check errors.
|
|
153
|
+
Commit your fixes when done.`;
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
async function runAgentRectification(ctx: PipelineContext): Promise<boolean> {
|
|
157
|
+
const logger = getLogger();
|
|
158
|
+
const maxAttempts = ctx.config.quality.autofix?.maxAttempts ?? 2;
|
|
159
|
+
const failedChecks = collectFailedChecks(ctx);
|
|
160
|
+
|
|
161
|
+
if (failedChecks.length === 0) {
|
|
162
|
+
logger.debug("autofix", "No failed checks found — skipping agent rectification", { storyId: ctx.story.id });
|
|
163
|
+
return false;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
logger.info("autofix", "Starting agent rectification for review failures", {
|
|
167
|
+
storyId: ctx.story.id,
|
|
168
|
+
failedChecks: failedChecks.map((c) => c.check),
|
|
169
|
+
maxAttempts,
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
const agentGetFn = ctx.agentGetFn ?? getAgent;
|
|
173
|
+
|
|
174
|
+
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
|
|
175
|
+
logger.info("autofix", `Agent rectification attempt ${attempt}/${maxAttempts}`, { storyId: ctx.story.id });
|
|
176
|
+
|
|
177
|
+
const agent = agentGetFn(ctx.config.autoMode.defaultAgent);
|
|
178
|
+
if (!agent) {
|
|
179
|
+
logger.error("autofix", "Agent not found — cannot run agent rectification", { storyId: ctx.story.id });
|
|
180
|
+
return false;
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
const prompt = buildReviewRectificationPrompt(failedChecks, ctx.story);
|
|
184
|
+
const modelTier = ctx.story.routing?.modelTier ?? ctx.config.autoMode.escalation.tierOrder[0]?.tier ?? "balanced";
|
|
185
|
+
const modelDef = resolveModel(ctx.config.models[modelTier]);
|
|
186
|
+
|
|
187
|
+
await agent.run({
|
|
188
|
+
prompt,
|
|
189
|
+
workdir: ctx.workdir,
|
|
190
|
+
modelTier,
|
|
191
|
+
modelDef,
|
|
192
|
+
timeoutSeconds: ctx.config.execution.sessionTimeoutSeconds,
|
|
193
|
+
dangerouslySkipPermissions: resolvePermissions(ctx.config, "rectification").skipPermissions,
|
|
194
|
+
pipelineStage: "rectification",
|
|
195
|
+
config: ctx.config,
|
|
196
|
+
maxInteractionTurns: ctx.config.agent?.maxInteractionTurns,
|
|
197
|
+
storyId: ctx.story.id,
|
|
198
|
+
sessionRole: "implementer",
|
|
199
|
+
});
|
|
200
|
+
|
|
201
|
+
const passed = await _autofixDeps.recheckReview(ctx);
|
|
202
|
+
if (passed) {
|
|
203
|
+
logger.info("autofix", `[OK] Agent rectification succeeded on attempt ${attempt}`, {
|
|
204
|
+
storyId: ctx.story.id,
|
|
205
|
+
});
|
|
206
|
+
return true;
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
// Refresh failed checks for next attempt
|
|
210
|
+
const updatedFailed = collectFailedChecks(ctx);
|
|
211
|
+
if (updatedFailed.length > 0) {
|
|
212
|
+
failedChecks.splice(0, failedChecks.length, ...updatedFailed);
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
logger.warn("autofix", `Agent rectification still failing after attempt ${attempt}`, {
|
|
216
|
+
storyId: ctx.story.id,
|
|
217
|
+
});
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
logger.warn("autofix", "Agent rectification exhausted", { storyId: ctx.story.id });
|
|
221
|
+
return false;
|
|
222
|
+
}
|
|
223
|
+
|
|
137
224
|
/**
|
|
138
225
|
* Injectable deps for testing.
|
|
139
226
|
*/
|
|
140
|
-
export const _autofixDeps = { runCommand, recheckReview };
|
|
227
|
+
export const _autofixDeps = { runCommand, recheckReview, runAgentRectification };
|
|
@@ -32,6 +32,25 @@ export async function checkGitRepoExists(workdir: string): Promise<Check> {
|
|
|
32
32
|
};
|
|
33
33
|
}
|
|
34
34
|
|
|
35
|
+
/**
|
|
36
|
+
* nax runtime files that are allowed to be dirty without blocking the precheck.
|
|
37
|
+
* These are written during nax execution and should be gitignored by `nax init`.
|
|
38
|
+
*/
|
|
39
|
+
const NAX_RUNTIME_PATTERNS = [
|
|
40
|
+
/^.{2} nax\.lock$/,
|
|
41
|
+
/^.{2} nax\/metrics\.json$/,
|
|
42
|
+
/^.{2} nax\/features\/[^/]+\/status\.json$/,
|
|
43
|
+
/^.{2} nax\/features\/[^/]+\/runs\//,
|
|
44
|
+
/^.{2} nax\/features\/[^/]+\/plan\//,
|
|
45
|
+
/^.{2} nax\/features\/[^/]+\/acp-sessions\.json$/,
|
|
46
|
+
/^.{2} nax\/features\/[^/]+\/interactions\//,
|
|
47
|
+
/^.{2} nax\/features\/[^/]+\/progress\.txt$/,
|
|
48
|
+
/^.{2} nax\/features\/[^/]+\/acceptance-refined\.json$/,
|
|
49
|
+
/^.{2} \.nax-verifier-verdict\.json$/,
|
|
50
|
+
/^.{2} \.nax-pids$/,
|
|
51
|
+
/^.{2} \.nax-wt\//,
|
|
52
|
+
];
|
|
53
|
+
|
|
35
54
|
/** Check if working tree is clean. Uses: git status --porcelain */
|
|
36
55
|
export async function checkWorkingTreeClean(workdir: string): Promise<Check> {
|
|
37
56
|
const proc = Bun.spawn(["git", "status", "--porcelain"], {
|
|
@@ -42,13 +61,20 @@ export async function checkWorkingTreeClean(workdir: string): Promise<Check> {
|
|
|
42
61
|
|
|
43
62
|
const output = await new Response(proc.stdout).text();
|
|
44
63
|
const exitCode = await proc.exited;
|
|
45
|
-
|
|
64
|
+
|
|
65
|
+
// Split without trimming the full output — porcelain lines start with status chars
|
|
66
|
+
// including leading spaces (e.g. " M file.ts"). trim() would corrupt the first line.
|
|
67
|
+
const lines = output.trim() === "" ? [] : output.split("\n").filter(Boolean);
|
|
68
|
+
const nonNaxDirtyFiles = lines.filter((line) => !NAX_RUNTIME_PATTERNS.some((pattern) => pattern.test(line)));
|
|
69
|
+
const passed = exitCode === 0 && nonNaxDirtyFiles.length === 0;
|
|
46
70
|
|
|
47
71
|
return {
|
|
48
72
|
name: "working-tree-clean",
|
|
49
73
|
tier: "blocker",
|
|
50
74
|
passed,
|
|
51
|
-
message: passed
|
|
75
|
+
message: passed
|
|
76
|
+
? "Working tree is clean"
|
|
77
|
+
: `Uncommitted changes detected: ${nonNaxDirtyFiles.map((l) => l.slice(3)).join(", ")}`,
|
|
52
78
|
};
|
|
53
79
|
}
|
|
54
80
|
|
|
@@ -6,6 +6,7 @@
|
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
8
|
import { existsSync } from "node:fs";
|
|
9
|
+
import { isAbsolute } from "node:path";
|
|
9
10
|
import type { NaxConfig } from "../config";
|
|
10
11
|
import type { PRD } from "../prd/types";
|
|
11
12
|
import type { Check } from "./types";
|
|
@@ -126,7 +127,7 @@ async function hasPackageScript(workdir: string, name: string): Promise<boolean>
|
|
|
126
127
|
|
|
127
128
|
/**
|
|
128
129
|
* Check if .gitignore covers nax runtime files.
|
|
129
|
-
* Patterns: nax.lock, runs/,
|
|
130
|
+
* Patterns: nax.lock, runs/, status.json, .nax-pids, .nax-wt/
|
|
130
131
|
*/
|
|
131
132
|
export async function checkGitignoreCoversNax(workdir: string): Promise<Check> {
|
|
132
133
|
const gitignorePath = `${workdir}/.gitignore`;
|
|
@@ -143,7 +144,14 @@ export async function checkGitignoreCoversNax(workdir: string): Promise<Check> {
|
|
|
143
144
|
|
|
144
145
|
const file = Bun.file(gitignorePath);
|
|
145
146
|
const content = await file.text();
|
|
146
|
-
const patterns = [
|
|
147
|
+
const patterns = [
|
|
148
|
+
"nax.lock",
|
|
149
|
+
"nax/**/runs/",
|
|
150
|
+
"nax/metrics.json",
|
|
151
|
+
"nax/features/*/status.json",
|
|
152
|
+
".nax-pids",
|
|
153
|
+
".nax-wt/",
|
|
154
|
+
];
|
|
147
155
|
const missing = patterns.filter((pattern) => !content.includes(pattern));
|
|
148
156
|
const passed = missing.length === 0;
|
|
149
157
|
|
|
@@ -191,3 +199,23 @@ export async function checkPromptOverrideFiles(config: NaxConfig, workdir: strin
|
|
|
191
199
|
|
|
192
200
|
return checks;
|
|
193
201
|
}
|
|
202
|
+
|
|
203
|
+
/**
|
|
204
|
+
* Check if HOME env is set and is an absolute path.
|
|
205
|
+
* An unexpanded "~" in HOME causes agent spawns to create a literal ~/
|
|
206
|
+
* directory inside the repo cwd instead of resolving to the user home dir.
|
|
207
|
+
*/
|
|
208
|
+
export async function checkHomeEnvValid(): Promise<Check> {
|
|
209
|
+
const home = process.env.HOME ?? "";
|
|
210
|
+
const passed = home !== "" && isAbsolute(home);
|
|
211
|
+
return {
|
|
212
|
+
name: "home-env-valid",
|
|
213
|
+
tier: "warning",
|
|
214
|
+
passed,
|
|
215
|
+
message: passed
|
|
216
|
+
? `HOME env is valid: ${home}`
|
|
217
|
+
: home === ""
|
|
218
|
+
? "HOME env is not set — agent may write files to unexpected locations"
|
|
219
|
+
: `HOME env is not an absolute path ("${home}") — may cause literal "~" directories in repo`,
|
|
220
|
+
};
|
|
221
|
+
}
|
package/src/precheck/checks.ts
CHANGED
package/src/precheck/index.ts
CHANGED
|
@@ -20,6 +20,7 @@ import {
|
|
|
20
20
|
checkGitRepoExists,
|
|
21
21
|
checkGitUserConfigured,
|
|
22
22
|
checkGitignoreCoversNax,
|
|
23
|
+
checkHomeEnvValid,
|
|
23
24
|
checkLintCommand,
|
|
24
25
|
checkMultiAgentHealth,
|
|
25
26
|
checkOptionalCommands,
|
|
@@ -126,6 +127,7 @@ function getEnvironmentWarnings(config: NaxConfig, workdir: string): CheckFn[] {
|
|
|
126
127
|
() => checkDiskSpace(),
|
|
127
128
|
() => checkOptionalCommands(config, workdir),
|
|
128
129
|
() => checkGitignoreCoversNax(workdir),
|
|
130
|
+
() => checkHomeEnvValid(),
|
|
129
131
|
() => checkPromptOverrideFiles(config, workdir),
|
|
130
132
|
() => checkMultiAgentHealth(),
|
|
131
133
|
];
|