@nathapp/nax 0.35.0 → 0.36.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/nax.ts +18 -9
- package/dist/nax.js +1064 -560
- package/package.json +1 -1
- package/src/agents/adapters/aider.ts +135 -0
- package/src/agents/adapters/gemini.ts +177 -0
- package/src/agents/adapters/opencode.ts +106 -0
- package/src/agents/index.ts +2 -0
- package/src/agents/registry.ts +6 -2
- package/src/agents/version-detection.ts +109 -0
- package/src/cli/agents.ts +87 -0
- package/src/cli/config.ts +28 -14
- package/src/cli/generate.ts +1 -1
- package/src/cli/index.ts +1 -0
- package/src/context/generator.ts +4 -0
- package/src/context/generators/codex.ts +28 -0
- package/src/context/generators/gemini.ts +28 -0
- package/src/context/types.ts +1 -1
- package/src/pipeline/stages/execution.ts +2 -39
- package/src/pipeline/stages/routing.ts +8 -2
- package/src/precheck/checks-agents.ts +63 -0
- package/src/precheck/checks.ts +3 -0
- package/src/precheck/index.ts +2 -0
- package/src/tdd/rectification-gate.ts +2 -46
- package/src/tdd/session-runner.ts +2 -49
- package/src/tdd/verdict.ts +135 -8
- package/src/utils/git.ts +49 -0
package/src/cli/config.ts
CHANGED
|
@@ -23,19 +23,27 @@ const FIELD_DESCRIPTIONS: Record<string, string> = {
|
|
|
23
23
|
"models.powerful": "Powerful model for complex tasks (e.g., opus)",
|
|
24
24
|
|
|
25
25
|
// Auto mode
|
|
26
|
-
autoMode:
|
|
26
|
+
autoMode:
|
|
27
|
+
"Auto mode configuration for agent orchestration. Enables multi-agent routing with model tier selection per task complexity and escalation on failures.",
|
|
27
28
|
"autoMode.enabled": "Enable automatic agent selection and escalation",
|
|
28
|
-
"autoMode.defaultAgent":
|
|
29
|
-
|
|
30
|
-
"autoMode.
|
|
31
|
-
|
|
32
|
-
"autoMode.complexityRouting
|
|
33
|
-
|
|
34
|
-
"autoMode.complexityRouting.
|
|
35
|
-
"autoMode.
|
|
29
|
+
"autoMode.defaultAgent":
|
|
30
|
+
"Default agent to use when no specific agent is requested. Examples: 'claude' (Claude Code), 'codex' (GitHub Copilot), 'opencode' (OpenCode). The agent handles the main coding tasks.",
|
|
31
|
+
"autoMode.fallbackOrder":
|
|
32
|
+
'Fallback order for agent selection when the primary agent is rate-limited, unavailable, or fails. Tries each agent in sequence until one succeeds. Example: ["claude", "codex", "opencode"] means try Claude first, then Copilot, then OpenCode.',
|
|
33
|
+
"autoMode.complexityRouting":
|
|
34
|
+
"Model tier routing rules mapped to story complexity levels. Determines which model (fast/balanced/powerful) to use based on task complexity: simple → fast, medium → balanced, complex → powerful, expert → powerful.",
|
|
35
|
+
"autoMode.complexityRouting.simple": "Model tier for simple tasks (low complexity, straightforward changes)",
|
|
36
|
+
"autoMode.complexityRouting.medium": "Model tier for medium tasks (moderate complexity, multi-file changes)",
|
|
37
|
+
"autoMode.complexityRouting.complex": "Model tier for complex tasks (high complexity, architectural decisions)",
|
|
38
|
+
"autoMode.complexityRouting.expert":
|
|
39
|
+
"Model tier for expert tasks (highest complexity, novel problems, design patterns)",
|
|
40
|
+
"autoMode.escalation":
|
|
41
|
+
"Escalation settings for failed stories. When a story fails after max attempts at current tier, escalate to the next tier in tierOrder. Enables progressive use of more powerful models.",
|
|
36
42
|
"autoMode.escalation.enabled": "Enable tier escalation on failure",
|
|
37
|
-
"autoMode.escalation.tierOrder":
|
|
38
|
-
|
|
43
|
+
"autoMode.escalation.tierOrder":
|
|
44
|
+
'Ordered tier escalation chain with per-tier attempt budgets. Format: [{"tier": "fast", "attempts": 2}, {"tier": "balanced", "attempts": 2}, {"tier": "powerful", "attempts": 1}]. Allows each tier to attempt fixes before escalating to the next.',
|
|
45
|
+
"autoMode.escalation.escalateEntireBatch":
|
|
46
|
+
"When enabled, escalate all stories in a batch if one fails. When disabled, only the failing story escalates (allows parallel attempts at different tiers).",
|
|
39
47
|
|
|
40
48
|
// Routing
|
|
41
49
|
routing: "Model routing strategy configuration",
|
|
@@ -528,9 +536,15 @@ function displayConfigWithDescriptions(
|
|
|
528
536
|
|
|
529
537
|
// Display description comment if available
|
|
530
538
|
if (description) {
|
|
531
|
-
// Include path for
|
|
532
|
-
|
|
533
|
-
const
|
|
539
|
+
// Include path for direct subsections of key configuration sections
|
|
540
|
+
// (to improve clarity of important configs like multi-agent setup)
|
|
541
|
+
const pathParts = currentPathStr.split(".");
|
|
542
|
+
// Only show path for 2-level paths (e.g., "autoMode.enabled", "models.fast")
|
|
543
|
+
// to keep deeply nested descriptions concise
|
|
544
|
+
const isDirectSubsection = pathParts.length === 2;
|
|
545
|
+
const isKeySection = ["prompts", "autoMode", "models", "routing"].includes(pathParts[0]);
|
|
546
|
+
const shouldIncludePath = isKeySection && isDirectSubsection;
|
|
547
|
+
const comment = shouldIncludePath ? `${currentPathStr}: ${description}` : description;
|
|
534
548
|
console.log(`${indentStr}# ${comment}`);
|
|
535
549
|
}
|
|
536
550
|
|
package/src/cli/generate.ts
CHANGED
|
@@ -26,7 +26,7 @@ export interface GenerateCommandOptions {
|
|
|
26
26
|
noAutoInject?: boolean;
|
|
27
27
|
}
|
|
28
28
|
|
|
29
|
-
const VALID_AGENTS: AgentType[] = ["claude", "opencode", "cursor", "windsurf", "aider"];
|
|
29
|
+
const VALID_AGENTS: AgentType[] = ["claude", "codex", "opencode", "cursor", "windsurf", "aider", "gemini"];
|
|
30
30
|
|
|
31
31
|
/**
|
|
32
32
|
* `nax generate` command handler.
|
package/src/cli/index.ts
CHANGED
package/src/context/generator.ts
CHANGED
|
@@ -11,7 +11,9 @@ import type { NaxConfig } from "../config";
|
|
|
11
11
|
import { validateFilePath } from "../config/path-security";
|
|
12
12
|
import { aiderGenerator } from "./generators/aider";
|
|
13
13
|
import { claudeGenerator } from "./generators/claude";
|
|
14
|
+
import { codexGenerator } from "./generators/codex";
|
|
14
15
|
import { cursorGenerator } from "./generators/cursor";
|
|
16
|
+
import { geminiGenerator } from "./generators/gemini";
|
|
15
17
|
import { opencodeGenerator } from "./generators/opencode";
|
|
16
18
|
import { windsurfGenerator } from "./generators/windsurf";
|
|
17
19
|
import { buildProjectMetadata } from "./injector";
|
|
@@ -20,10 +22,12 @@ import type { AgentContextGenerator, AgentType, ContextContent, GeneratorMap } f
|
|
|
20
22
|
/** Generator registry */
|
|
21
23
|
const GENERATORS: GeneratorMap = {
|
|
22
24
|
claude: claudeGenerator,
|
|
25
|
+
codex: codexGenerator,
|
|
23
26
|
opencode: opencodeGenerator,
|
|
24
27
|
cursor: cursorGenerator,
|
|
25
28
|
windsurf: windsurfGenerator,
|
|
26
29
|
aider: aiderGenerator,
|
|
30
|
+
gemini: geminiGenerator,
|
|
27
31
|
};
|
|
28
32
|
|
|
29
33
|
/** Generation result for a single agent */
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Codex Config Generator (v0.16.1)
|
|
3
|
+
*
|
|
4
|
+
* Generates codex.md from nax/context.md + auto-injected metadata.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { formatMetadataSection } from "../injector";
|
|
8
|
+
import type { AgentContextGenerator, ContextContent } from "../types";
|
|
9
|
+
|
|
10
|
+
function generateCodexConfig(context: ContextContent): string {
|
|
11
|
+
const header = `# Codex Instructions
|
|
12
|
+
|
|
13
|
+
This file is auto-generated from \`nax/context.md\`.
|
|
14
|
+
DO NOT EDIT MANUALLY — run \`nax generate\` to regenerate.
|
|
15
|
+
|
|
16
|
+
---
|
|
17
|
+
|
|
18
|
+
`;
|
|
19
|
+
|
|
20
|
+
const metaSection = context.metadata ? formatMetadataSection(context.metadata) : "";
|
|
21
|
+
return header + metaSection + context.markdown;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export const codexGenerator: AgentContextGenerator = {
|
|
25
|
+
name: "codex",
|
|
26
|
+
outputFile: "codex.md",
|
|
27
|
+
generate: generateCodexConfig,
|
|
28
|
+
};
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Gemini CLI Config Generator (v0.16.1)
|
|
3
|
+
*
|
|
4
|
+
* Generates GEMINI.md from nax/context.md + auto-injected metadata.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { formatMetadataSection } from "../injector";
|
|
8
|
+
import type { AgentContextGenerator, ContextContent } from "../types";
|
|
9
|
+
|
|
10
|
+
function generateGeminiConfig(context: ContextContent): string {
|
|
11
|
+
const header = `# Gemini CLI Context
|
|
12
|
+
|
|
13
|
+
This file is auto-generated from \`nax/context.md\`.
|
|
14
|
+
DO NOT EDIT MANUALLY — run \`nax generate\` to regenerate.
|
|
15
|
+
|
|
16
|
+
---
|
|
17
|
+
|
|
18
|
+
`;
|
|
19
|
+
|
|
20
|
+
const metaSection = context.metadata ? formatMetadataSection(context.metadata) : "";
|
|
21
|
+
return header + metaSection + context.markdown;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export const geminiGenerator: AgentContextGenerator = {
|
|
25
|
+
name: "gemini",
|
|
26
|
+
outputFile: "GEMINI.md",
|
|
27
|
+
generate: generateGeminiConfig,
|
|
28
|
+
};
|
package/src/context/types.ts
CHANGED
|
@@ -40,7 +40,7 @@ export interface AgentContextGenerator {
|
|
|
40
40
|
}
|
|
41
41
|
|
|
42
42
|
/** All available generator types */
|
|
43
|
-
export type AgentType = "claude" | "opencode" | "cursor" | "windsurf" | "aider";
|
|
43
|
+
export type AgentType = "claude" | "codex" | "opencode" | "cursor" | "windsurf" | "aider" | "gemini";
|
|
44
44
|
|
|
45
45
|
/** Generator registry map */
|
|
46
46
|
export type GeneratorMap = Record<AgentType, AgentContextGenerator>;
|
|
@@ -36,7 +36,7 @@ import { checkMergeConflict, checkStoryAmbiguity, isTriggerEnabled } from "../..
|
|
|
36
36
|
import { getLogger } from "../../logger";
|
|
37
37
|
import type { FailureCategory } from "../../tdd";
|
|
38
38
|
import { runThreeSessionTdd } from "../../tdd";
|
|
39
|
-
import { detectMergeConflict } from "../../utils/git";
|
|
39
|
+
import { autoCommitIfDirty, detectMergeConflict } from "../../utils/git";
|
|
40
40
|
import type { PipelineContext, PipelineStage, StageResult } from "../types";
|
|
41
41
|
|
|
42
42
|
/**
|
|
@@ -200,7 +200,7 @@ export const executionStage: PipelineStage = {
|
|
|
200
200
|
ctx.agentResult = result;
|
|
201
201
|
|
|
202
202
|
// BUG-058: Auto-commit if agent left uncommitted changes (single-session/test-after)
|
|
203
|
-
await autoCommitIfDirty(ctx.workdir, "single-session", ctx.story.id);
|
|
203
|
+
await autoCommitIfDirty(ctx.workdir, "execution", "single-session", ctx.story.id);
|
|
204
204
|
|
|
205
205
|
// merge-conflict trigger: detect CONFLICT markers in agent output
|
|
206
206
|
const combinedOutput = (result.output ?? "") + (result.stderr ?? "");
|
|
@@ -270,40 +270,3 @@ export const _executionDeps = {
|
|
|
270
270
|
isAmbiguousOutput,
|
|
271
271
|
checkStoryAmbiguity,
|
|
272
272
|
};
|
|
273
|
-
|
|
274
|
-
/**
|
|
275
|
-
* BUG-058: Auto-commit safety net for single-session/test-after.
|
|
276
|
-
* Mirrors the same function in tdd/session-runner.ts for three-session TDD.
|
|
277
|
-
*/
|
|
278
|
-
async function autoCommitIfDirty(workdir: string, role: string, storyId: string): Promise<void> {
|
|
279
|
-
try {
|
|
280
|
-
const statusProc = Bun.spawn(["git", "status", "--porcelain"], {
|
|
281
|
-
cwd: workdir,
|
|
282
|
-
stdout: "pipe",
|
|
283
|
-
stderr: "pipe",
|
|
284
|
-
});
|
|
285
|
-
const statusOutput = await new Response(statusProc.stdout).text();
|
|
286
|
-
await statusProc.exited;
|
|
287
|
-
|
|
288
|
-
if (!statusOutput.trim()) return;
|
|
289
|
-
|
|
290
|
-
const logger = getLogger();
|
|
291
|
-
logger.warn("execution", `Agent did not commit after ${role} session — auto-committing`, {
|
|
292
|
-
role,
|
|
293
|
-
storyId,
|
|
294
|
-
dirtyFiles: statusOutput.trim().split("\n").length,
|
|
295
|
-
});
|
|
296
|
-
|
|
297
|
-
const addProc = Bun.spawn(["git", "add", "-A"], { cwd: workdir, stdout: "pipe", stderr: "pipe" });
|
|
298
|
-
await addProc.exited;
|
|
299
|
-
|
|
300
|
-
const commitProc = Bun.spawn(["git", "commit", "-m", `chore(${storyId}): auto-commit after ${role} session`], {
|
|
301
|
-
cwd: workdir,
|
|
302
|
-
stdout: "pipe",
|
|
303
|
-
stderr: "pipe",
|
|
304
|
-
});
|
|
305
|
-
await commitProc.exited;
|
|
306
|
-
} catch {
|
|
307
|
-
// Silently ignore — auto-commit is best-effort
|
|
308
|
-
}
|
|
309
|
-
}
|
|
@@ -25,6 +25,7 @@
|
|
|
25
25
|
* ```
|
|
26
26
|
*/
|
|
27
27
|
|
|
28
|
+
import { getAgent } from "../../agents/registry";
|
|
28
29
|
import type { NaxConfig } from "../../config";
|
|
29
30
|
import { isGreenfieldStory } from "../../context/greenfield";
|
|
30
31
|
import { applyDecomposition } from "../../decompose/apply";
|
|
@@ -68,6 +69,10 @@ export const routingStage: PipelineStage = {
|
|
|
68
69
|
async execute(ctx: PipelineContext): Promise<StageResult> {
|
|
69
70
|
const logger = getLogger();
|
|
70
71
|
|
|
72
|
+
// Resolve agent adapter for LLM routing (shared with execution)
|
|
73
|
+
const agentName = ctx.config.execution?.agent ?? "claude";
|
|
74
|
+
const adapter = _routingDeps.getAgent(agentName);
|
|
75
|
+
|
|
71
76
|
// Staleness detection (RRP-003):
|
|
72
77
|
// - story.routing absent → cache miss (no prior routing)
|
|
73
78
|
// - story.routing + no contentHash → legacy cache hit (manual / pre-RRP-003 routing, honor as-is)
|
|
@@ -87,7 +92,7 @@ export const routingStage: PipelineStage = {
|
|
|
87
92
|
|
|
88
93
|
if (isCacheHit) {
|
|
89
94
|
// Cache hit: legacy routing (no contentHash) or matching contentHash — use cached values
|
|
90
|
-
routing = await _routingDeps.routeStory(ctx.story, { config: ctx.config }, ctx.workdir, ctx.plugins);
|
|
95
|
+
routing = await _routingDeps.routeStory(ctx.story, { config: ctx.config, adapter }, ctx.workdir, ctx.plugins);
|
|
91
96
|
// Override with cached values only when they are actually set
|
|
92
97
|
if (ctx.story.routing?.complexity) routing.complexity = ctx.story.routing.complexity;
|
|
93
98
|
// BUG-062: Only honor stored testStrategy for legacy/manual routing (no contentHash).
|
|
@@ -106,7 +111,7 @@ export const routingStage: PipelineStage = {
|
|
|
106
111
|
}
|
|
107
112
|
} else {
|
|
108
113
|
// Cache miss: no routing, or contentHash present but mismatched — fresh classification
|
|
109
|
-
routing = await _routingDeps.routeStory(ctx.story, { config: ctx.config }, ctx.workdir, ctx.plugins);
|
|
114
|
+
routing = await _routingDeps.routeStory(ctx.story, { config: ctx.config, adapter }, ctx.workdir, ctx.plugins);
|
|
110
115
|
// currentHash already computed if a mismatch was detected; compute now if starting fresh
|
|
111
116
|
currentHash = currentHash ?? _routingDeps.computeStoryContentHash(ctx.story);
|
|
112
117
|
ctx.story.routing = {
|
|
@@ -223,4 +228,5 @@ export const _routingDeps = {
|
|
|
223
228
|
applyDecomposition,
|
|
224
229
|
runDecompose,
|
|
225
230
|
checkStoryOversized,
|
|
231
|
+
getAgent,
|
|
226
232
|
};
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Precheck for multi-agent health
|
|
3
|
+
*
|
|
4
|
+
* Detects installed agents, reports version information,
|
|
5
|
+
* and checks health status for each configured agent.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { getAgentVersions } from "../agents/version-detection";
|
|
9
|
+
import type { Check } from "./types";
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Check multi-agent health: installed agents and their versions
|
|
13
|
+
*
|
|
14
|
+
* This is a Tier 2 warning check. Reports which agents are available
|
|
15
|
+
* and their versions, but doesn't fail if no agents are installed
|
|
16
|
+
* (since the main configured agent is checked in Tier 1).
|
|
17
|
+
*/
|
|
18
|
+
export async function checkMultiAgentHealth(): Promise<Check> {
|
|
19
|
+
try {
|
|
20
|
+
const versions = await getAgentVersions();
|
|
21
|
+
|
|
22
|
+
// Separate installed from not installed
|
|
23
|
+
const installed = versions.filter((v) => v.installed);
|
|
24
|
+
const notInstalled = versions.filter((v) => !v.installed);
|
|
25
|
+
|
|
26
|
+
// Build message with agent status
|
|
27
|
+
const lines: string[] = [];
|
|
28
|
+
|
|
29
|
+
if (installed.length > 0) {
|
|
30
|
+
lines.push(`Installed agents (${installed.length}):`);
|
|
31
|
+
for (const agent of installed) {
|
|
32
|
+
const versionStr = agent.version ? ` v${agent.version}` : " (version unknown)";
|
|
33
|
+
lines.push(` • ${agent.displayName}${versionStr}`);
|
|
34
|
+
}
|
|
35
|
+
} else {
|
|
36
|
+
lines.push("No additional agents detected (using default configured agent)");
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
if (notInstalled.length > 0) {
|
|
40
|
+
lines.push(`\nAvailable but not installed (${notInstalled.length}):`);
|
|
41
|
+
for (const agent of notInstalled) {
|
|
42
|
+
lines.push(` • ${agent.displayName}`);
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
const message = lines.join("\n");
|
|
47
|
+
|
|
48
|
+
return {
|
|
49
|
+
name: "multi-agent-health",
|
|
50
|
+
tier: "warning",
|
|
51
|
+
passed: true, // Always pass - this is informational
|
|
52
|
+
message,
|
|
53
|
+
};
|
|
54
|
+
} catch (error) {
|
|
55
|
+
// If version detection fails, still pass but report error
|
|
56
|
+
return {
|
|
57
|
+
name: "multi-agent-health",
|
|
58
|
+
tier: "warning",
|
|
59
|
+
passed: true,
|
|
60
|
+
message: `Agent detection: ${error instanceof Error ? error.message : "Unknown error"}`,
|
|
61
|
+
};
|
|
62
|
+
}
|
|
63
|
+
}
|
package/src/precheck/checks.ts
CHANGED
package/src/precheck/index.ts
CHANGED
|
@@ -17,6 +17,7 @@ import {
|
|
|
17
17
|
checkGitUserConfigured,
|
|
18
18
|
checkGitignoreCoversNax,
|
|
19
19
|
checkLintCommand,
|
|
20
|
+
checkMultiAgentHealth,
|
|
20
21
|
checkOptionalCommands,
|
|
21
22
|
checkPRDValid,
|
|
22
23
|
checkPendingStories,
|
|
@@ -144,6 +145,7 @@ export async function runPrecheck(
|
|
|
144
145
|
() => checkOptionalCommands(config, workdir),
|
|
145
146
|
() => checkGitignoreCoversNax(workdir),
|
|
146
147
|
() => checkPromptOverrideFiles(config, workdir),
|
|
148
|
+
() => checkMultiAgentHealth(),
|
|
147
149
|
];
|
|
148
150
|
|
|
149
151
|
for (const checkFn of tier2Checks) {
|
|
@@ -11,7 +11,7 @@ import type { ModelTier, NaxConfig } from "../config";
|
|
|
11
11
|
import { resolveModel } from "../config";
|
|
12
12
|
import type { getLogger } from "../logger";
|
|
13
13
|
import type { UserStory } from "../prd";
|
|
14
|
-
import { captureGitRef } from "../utils/git";
|
|
14
|
+
import { autoCommitIfDirty, captureGitRef } from "../utils/git";
|
|
15
15
|
import {
|
|
16
16
|
type RectificationState,
|
|
17
17
|
executeWithTimeout,
|
|
@@ -178,7 +178,7 @@ async function runRectificationLoop(
|
|
|
178
178
|
|
|
179
179
|
// BUG-063: Auto-commit after rectification agent — prevents uncommitted changes
|
|
180
180
|
// from leaking into verifier/review stages. Same pattern as session-runner.ts.
|
|
181
|
-
await autoCommitIfDirty(workdir, "rectification", story.id
|
|
181
|
+
await autoCommitIfDirty(workdir, "tdd", "rectification", story.id);
|
|
182
182
|
|
|
183
183
|
const rectifyIsolation = lite ? undefined : await verifyImplementerIsolation(workdir, rectifyBeforeRef);
|
|
184
184
|
|
|
@@ -231,47 +231,3 @@ async function runRectificationLoop(
|
|
|
231
231
|
logger.info("tdd", "Full suite gate passed", { storyId: story.id });
|
|
232
232
|
return true;
|
|
233
233
|
}
|
|
234
|
-
|
|
235
|
-
/**
|
|
236
|
-
* BUG-063: Auto-commit safety net for rectification agent sessions.
|
|
237
|
-
*
|
|
238
|
-
* Rectification runs agent.run() directly (not via runTddSession), so it
|
|
239
|
-
* needs its own auto-commit. Without this, uncommitted changes from
|
|
240
|
-
* rectification leak into verifier/review stages causing spurious failures.
|
|
241
|
-
*/
|
|
242
|
-
async function autoCommitIfDirty(
|
|
243
|
-
workdir: string,
|
|
244
|
-
role: string,
|
|
245
|
-
storyId: string,
|
|
246
|
-
logger: ReturnType<typeof getLogger>,
|
|
247
|
-
): Promise<void> {
|
|
248
|
-
try {
|
|
249
|
-
const statusProc = Bun.spawn(["git", "status", "--porcelain"], {
|
|
250
|
-
cwd: workdir,
|
|
251
|
-
stdout: "pipe",
|
|
252
|
-
stderr: "pipe",
|
|
253
|
-
});
|
|
254
|
-
const statusOutput = await new Response(statusProc.stdout).text();
|
|
255
|
-
await statusProc.exited;
|
|
256
|
-
|
|
257
|
-
if (!statusOutput.trim()) return;
|
|
258
|
-
|
|
259
|
-
logger.warn("tdd", `Agent did not commit after ${role} session — auto-committing`, {
|
|
260
|
-
role,
|
|
261
|
-
storyId,
|
|
262
|
-
dirtyFiles: statusOutput.trim().split("\n").length,
|
|
263
|
-
});
|
|
264
|
-
|
|
265
|
-
const addProc = Bun.spawn(["git", "add", "-A"], { cwd: workdir, stdout: "pipe", stderr: "pipe" });
|
|
266
|
-
await addProc.exited;
|
|
267
|
-
|
|
268
|
-
const commitProc = Bun.spawn(["git", "commit", "-m", `chore(${storyId}): auto-commit after ${role} session`], {
|
|
269
|
-
cwd: workdir,
|
|
270
|
-
stdout: "pipe",
|
|
271
|
-
stderr: "pipe",
|
|
272
|
-
});
|
|
273
|
-
await commitProc.exited;
|
|
274
|
-
} catch {
|
|
275
|
-
// Silently ignore — auto-commit is best-effort
|
|
276
|
-
}
|
|
277
|
-
}
|
|
@@ -10,6 +10,7 @@ import { resolveModel } from "../config";
|
|
|
10
10
|
import { getLogger } from "../logger";
|
|
11
11
|
import type { UserStory } from "../prd";
|
|
12
12
|
import { PromptBuilder } from "../prompts";
|
|
13
|
+
import { autoCommitIfDirty } from "../utils/git";
|
|
13
14
|
import { cleanupProcessTree } from "./cleanup";
|
|
14
15
|
import { getChangedFiles, verifyImplementerIsolation, verifyTestWriterIsolation } from "./isolation";
|
|
15
16
|
import type { IsolationCheck } from "./types";
|
|
@@ -146,7 +147,7 @@ export async function runTddSession(
|
|
|
146
147
|
}
|
|
147
148
|
|
|
148
149
|
// BUG-058: Auto-commit if agent left uncommitted changes
|
|
149
|
-
await autoCommitIfDirty(workdir, role, story.id);
|
|
150
|
+
await autoCommitIfDirty(workdir, "tdd", role, story.id);
|
|
150
151
|
|
|
151
152
|
// Check isolation based on role and skipIsolation flag.
|
|
152
153
|
let isolation: IsolationCheck | undefined;
|
|
@@ -200,51 +201,3 @@ export async function runTddSession(
|
|
|
200
201
|
estimatedCost: result.estimatedCost,
|
|
201
202
|
};
|
|
202
203
|
}
|
|
203
|
-
|
|
204
|
-
/**
|
|
205
|
-
* BUG-058: Auto-commit safety net.
|
|
206
|
-
*
|
|
207
|
-
* If the agent left uncommitted changes, stage and commit them automatically.
|
|
208
|
-
* This prevents the review stage from failing with "uncommitted changes" errors.
|
|
209
|
-
* Only triggers when the agent forgot — if tree is clean, this is a no-op.
|
|
210
|
-
*/
|
|
211
|
-
async function autoCommitIfDirty(workdir: string, role: string, storyId: string): Promise<void> {
|
|
212
|
-
const logger = getLogger();
|
|
213
|
-
|
|
214
|
-
// Check if working tree is dirty
|
|
215
|
-
try {
|
|
216
|
-
const statusProc = Bun.spawn(["git", "status", "--porcelain"], {
|
|
217
|
-
cwd: workdir,
|
|
218
|
-
stdout: "pipe",
|
|
219
|
-
stderr: "pipe",
|
|
220
|
-
});
|
|
221
|
-
const statusOutput = await new Response(statusProc.stdout).text();
|
|
222
|
-
await statusProc.exited;
|
|
223
|
-
|
|
224
|
-
if (!statusOutput.trim()) return; // Clean tree, nothing to do
|
|
225
|
-
|
|
226
|
-
logger.warn("tdd", `Agent did not commit after ${role} session — auto-committing`, {
|
|
227
|
-
role,
|
|
228
|
-
storyId,
|
|
229
|
-
dirtyFiles: statusOutput.trim().split("\n").length,
|
|
230
|
-
});
|
|
231
|
-
|
|
232
|
-
// Stage all changes
|
|
233
|
-
const addProc = Bun.spawn(["git", "add", "-A"], {
|
|
234
|
-
cwd: workdir,
|
|
235
|
-
stdout: "pipe",
|
|
236
|
-
stderr: "pipe",
|
|
237
|
-
});
|
|
238
|
-
await addProc.exited;
|
|
239
|
-
|
|
240
|
-
// Commit with descriptive message
|
|
241
|
-
const commitProc = Bun.spawn(["git", "commit", "-m", `chore(${storyId}): auto-commit after ${role} session`], {
|
|
242
|
-
cwd: workdir,
|
|
243
|
-
stdout: "pipe",
|
|
244
|
-
stderr: "pipe",
|
|
245
|
-
});
|
|
246
|
-
await commitProc.exited;
|
|
247
|
-
} catch {
|
|
248
|
-
// Silently ignore — auto-commit is best-effort
|
|
249
|
-
}
|
|
250
|
-
}
|
package/src/tdd/verdict.ts
CHANGED
|
@@ -117,14 +117,127 @@ function isValidVerdict(obj: unknown): obj is VerifierVerdict {
|
|
|
117
117
|
return true;
|
|
118
118
|
}
|
|
119
119
|
|
|
120
|
+
/**
|
|
121
|
+
* Coerce a free-form verdict object into the expected VerifierVerdict schema.
|
|
122
|
+
* Maps common agent-improvised patterns (verdict:"PASS", verification_summary, etc.)
|
|
123
|
+
* to the structured format. Returns null if too malformed to coerce.
|
|
124
|
+
*/
|
|
125
|
+
export function coerceVerdict(obj: Record<string, unknown>): VerifierVerdict | null {
|
|
126
|
+
try {
|
|
127
|
+
// Determine approval status
|
|
128
|
+
const verdictStr = String(obj.verdict ?? "").toUpperCase();
|
|
129
|
+
const approved = verdictStr === "PASS" || verdictStr === "APPROVED" || obj.approved === true;
|
|
130
|
+
|
|
131
|
+
// Parse test results from verification_summary or top-level
|
|
132
|
+
let passCount = 0;
|
|
133
|
+
let failCount = 0;
|
|
134
|
+
let allPassing = approved;
|
|
135
|
+
const summary = obj.verification_summary as Record<string, unknown> | undefined;
|
|
136
|
+
if (summary?.test_results && typeof summary.test_results === "string") {
|
|
137
|
+
// Parse "45/45 PASS" or "42/45 PASS" patterns
|
|
138
|
+
const match = (summary.test_results as string).match(/(\d+)\/(\d+)/);
|
|
139
|
+
if (match) {
|
|
140
|
+
passCount = Number.parseInt(match[1], 10);
|
|
141
|
+
const total = Number.parseInt(match[2], 10);
|
|
142
|
+
failCount = total - passCount;
|
|
143
|
+
allPassing = failCount === 0;
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
// Also check top-level tests object (partial schema compliance)
|
|
147
|
+
if (obj.tests && typeof obj.tests === "object") {
|
|
148
|
+
const t = obj.tests as Record<string, unknown>;
|
|
149
|
+
if (typeof t.passCount === "number") passCount = t.passCount;
|
|
150
|
+
if (typeof t.failCount === "number") failCount = t.failCount;
|
|
151
|
+
if (typeof t.allPassing === "boolean") allPassing = t.allPassing;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
// Parse acceptance criteria from acceptance_criteria_review or acceptanceCriteria
|
|
155
|
+
const criteria: Array<{ criterion: string; met: boolean; note?: string }> = [];
|
|
156
|
+
let allMet = approved;
|
|
157
|
+
const acReview = obj.acceptance_criteria_review as Record<string, unknown> | undefined;
|
|
158
|
+
if (acReview) {
|
|
159
|
+
for (const [key, val] of Object.entries(acReview)) {
|
|
160
|
+
if (key.startsWith("criterion") && val && typeof val === "object") {
|
|
161
|
+
const c = val as Record<string, unknown>;
|
|
162
|
+
const met = String(c.status ?? "").toUpperCase() === "SATISFIED" || c.met === true;
|
|
163
|
+
criteria.push({
|
|
164
|
+
criterion: String(c.name ?? c.criterion ?? key),
|
|
165
|
+
met,
|
|
166
|
+
note: c.evidence ? String(c.evidence).slice(0, 200) : undefined,
|
|
167
|
+
});
|
|
168
|
+
if (!met) allMet = false;
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
// Also check top-level acceptanceCriteria
|
|
173
|
+
if (obj.acceptanceCriteria && typeof obj.acceptanceCriteria === "object") {
|
|
174
|
+
const ac = obj.acceptanceCriteria as Record<string, unknown>;
|
|
175
|
+
if (typeof ac.allMet === "boolean") allMet = ac.allMet;
|
|
176
|
+
if (Array.isArray(ac.criteria)) {
|
|
177
|
+
for (const c of ac.criteria) {
|
|
178
|
+
if (c && typeof c === "object") {
|
|
179
|
+
criteria.push(c as { criterion: string; met: boolean; note?: string });
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
// Parse summary AC count like "4/4 SATISFIED"
|
|
185
|
+
if (criteria.length === 0 && summary?.acceptance_criteria && typeof summary.acceptance_criteria === "string") {
|
|
186
|
+
const acMatch = (summary.acceptance_criteria as string).match(/(\d+)\/(\d+)/);
|
|
187
|
+
if (acMatch) {
|
|
188
|
+
const met = Number.parseInt(acMatch[1], 10);
|
|
189
|
+
const total = Number.parseInt(acMatch[2], 10);
|
|
190
|
+
allMet = met === total;
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
// Parse quality
|
|
195
|
+
let rating: "good" | "acceptable" | "poor" = "acceptable";
|
|
196
|
+
const qualityStr = summary?.code_quality
|
|
197
|
+
? String(summary.code_quality).toLowerCase()
|
|
198
|
+
: obj.quality && typeof obj.quality === "object"
|
|
199
|
+
? String((obj.quality as Record<string, unknown>).rating ?? "acceptable").toLowerCase()
|
|
200
|
+
: "acceptable";
|
|
201
|
+
if (qualityStr === "high" || qualityStr === "good") rating = "good";
|
|
202
|
+
else if (qualityStr === "low" || qualityStr === "poor") rating = "poor";
|
|
203
|
+
|
|
204
|
+
// Build coerced verdict
|
|
205
|
+
return {
|
|
206
|
+
version: 1,
|
|
207
|
+
approved,
|
|
208
|
+
tests: { allPassing, passCount, failCount },
|
|
209
|
+
testModifications: {
|
|
210
|
+
detected: false,
|
|
211
|
+
files: [],
|
|
212
|
+
legitimate: true,
|
|
213
|
+
reasoning: "Not assessed in free-form verdict",
|
|
214
|
+
},
|
|
215
|
+
acceptanceCriteria: { allMet, criteria },
|
|
216
|
+
quality: { rating, issues: [] },
|
|
217
|
+
fixes: Array.isArray(obj.fixes) ? (obj.fixes as string[]) : [],
|
|
218
|
+
reasoning:
|
|
219
|
+
typeof obj.reasoning === "string"
|
|
220
|
+
? obj.reasoning
|
|
221
|
+
: typeof obj.overall_status === "string"
|
|
222
|
+
? (obj.overall_status as string)
|
|
223
|
+
: summary?.overall_status
|
|
224
|
+
? String(summary.overall_status)
|
|
225
|
+
: `Coerced from free-form verdict: ${verdictStr}`,
|
|
226
|
+
};
|
|
227
|
+
} catch {
|
|
228
|
+
return null;
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
|
|
120
232
|
/**
|
|
121
233
|
* Read the verifier verdict file from the workdir.
|
|
122
234
|
*
|
|
123
235
|
* Returns the parsed VerifierVerdict when the file exists and is valid.
|
|
236
|
+
* Attempts tolerant coercion if the file doesn't match the strict schema.
|
|
124
237
|
* Returns null if:
|
|
125
238
|
* - File does not exist
|
|
126
239
|
* - File is not valid JSON
|
|
127
|
-
* - Required fields are missing
|
|
240
|
+
* - Required fields are missing and coercion fails
|
|
128
241
|
*
|
|
129
242
|
* Never throws.
|
|
130
243
|
*/
|
|
@@ -150,15 +263,29 @@ export async function readVerdict(workdir: string): Promise<VerifierVerdict | nu
|
|
|
150
263
|
return null;
|
|
151
264
|
}
|
|
152
265
|
|
|
153
|
-
if (
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
266
|
+
if (isValidVerdict(parsed)) {
|
|
267
|
+
return parsed;
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
// Strict validation failed — attempt tolerant coercion
|
|
271
|
+
if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) {
|
|
272
|
+
const coerced = coerceVerdict(parsed as Record<string, unknown>);
|
|
273
|
+
if (coerced) {
|
|
274
|
+
logger.info("tdd", "Coerced free-form verdict to structured format", {
|
|
275
|
+
path: verdictPath,
|
|
276
|
+
approved: coerced.approved,
|
|
277
|
+
passCount: coerced.tests.passCount,
|
|
278
|
+
failCount: coerced.tests.failCount,
|
|
279
|
+
});
|
|
280
|
+
return coerced;
|
|
281
|
+
}
|
|
159
282
|
}
|
|
160
283
|
|
|
161
|
-
|
|
284
|
+
logger.warn("tdd", "Verifier verdict file missing required fields and coercion failed — ignoring", {
|
|
285
|
+
path: verdictPath,
|
|
286
|
+
content: JSON.stringify(parsed).slice(0, 500),
|
|
287
|
+
});
|
|
288
|
+
return null;
|
|
162
289
|
} catch (err) {
|
|
163
290
|
logger.warn("tdd", "Failed to read verifier verdict file — ignoring", {
|
|
164
291
|
path: verdictPath,
|