@nathapp/nax 0.35.0 → 0.36.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/cli/config.ts CHANGED
@@ -23,19 +23,27 @@ const FIELD_DESCRIPTIONS: Record<string, string> = {
23
23
  "models.powerful": "Powerful model for complex tasks (e.g., opus)",
24
24
 
25
25
  // Auto mode
26
- autoMode: "Auto mode configuration for agent orchestration",
26
+ autoMode:
27
+ "Auto mode configuration for agent orchestration. Enables multi-agent routing with model tier selection per task complexity and escalation on failures.",
27
28
  "autoMode.enabled": "Enable automatic agent selection and escalation",
28
- "autoMode.defaultAgent": "Default agent to use (e.g., claude, codex)",
29
- "autoMode.fallbackOrder": "Fallback order when agent is rate-limited",
30
- "autoMode.complexityRouting": "Model tier per complexity level",
31
- "autoMode.complexityRouting.simple": "Model tier for simple tasks",
32
- "autoMode.complexityRouting.medium": "Model tier for medium tasks",
33
- "autoMode.complexityRouting.complex": "Model tier for complex tasks",
34
- "autoMode.complexityRouting.expert": "Model tier for expert tasks",
35
- "autoMode.escalation": "Escalation settings for failed stories",
29
+ "autoMode.defaultAgent":
30
+ "Default agent to use when no specific agent is requested. Examples: 'claude' (Claude Code), 'codex' (GitHub Copilot), 'opencode' (OpenCode). The agent handles the main coding tasks.",
31
+ "autoMode.fallbackOrder":
32
+ 'Fallback order for agent selection when the primary agent is rate-limited, unavailable, or fails. Tries each agent in sequence until one succeeds. Example: ["claude", "codex", "opencode"] means try Claude first, then Copilot, then OpenCode.',
33
+ "autoMode.complexityRouting":
34
+ "Model tier routing rules mapped to story complexity levels. Determines which model (fast/balanced/powerful) to use based on task complexity: simple fast, medium → balanced, complex → powerful, expert → powerful.",
35
+ "autoMode.complexityRouting.simple": "Model tier for simple tasks (low complexity, straightforward changes)",
36
+ "autoMode.complexityRouting.medium": "Model tier for medium tasks (moderate complexity, multi-file changes)",
37
+ "autoMode.complexityRouting.complex": "Model tier for complex tasks (high complexity, architectural decisions)",
38
+ "autoMode.complexityRouting.expert":
39
+ "Model tier for expert tasks (highest complexity, novel problems, design patterns)",
40
+ "autoMode.escalation":
41
+ "Escalation settings for failed stories. When a story fails after max attempts at current tier, escalate to the next tier in tierOrder. Enables progressive use of more powerful models.",
36
42
  "autoMode.escalation.enabled": "Enable tier escalation on failure",
37
- "autoMode.escalation.tierOrder": "Ordered tier escalation with per-tier attempt budgets",
38
- "autoMode.escalation.escalateEntireBatch": "Escalate all stories in batch when one fails",
43
+ "autoMode.escalation.tierOrder":
44
+ 'Ordered tier escalation chain with per-tier attempt budgets. Format: [{"tier": "fast", "attempts": 2}, {"tier": "balanced", "attempts": 2}, {"tier": "powerful", "attempts": 1}]. Allows each tier to attempt fixes before escalating to the next.',
45
+ "autoMode.escalation.escalateEntireBatch":
46
+ "When enabled, escalate all stories in a batch if one fails. When disabled, only the failing story escalates (allows parallel attempts at different tiers).",
39
47
 
40
48
  // Routing
41
49
  routing: "Model routing strategy configuration",
@@ -528,9 +536,15 @@ function displayConfigWithDescriptions(
528
536
 
529
537
  // Display description comment if available
530
538
  if (description) {
531
- // Include path for prompts section (where tests expect "prompts.overrides" to appear)
532
- const isPromptsSubSection = currentPathStr.startsWith("prompts.");
533
- const comment = isPromptsSubSection ? `${currentPathStr}: ${description}` : description;
539
+ // Include path for direct subsections of key configuration sections
540
+ // (to improve clarity of important configs like multi-agent setup)
541
+ const pathParts = currentPathStr.split(".");
542
+ // Only show path for 2-level paths (e.g., "autoMode.enabled", "models.fast")
543
+ // to keep deeply nested descriptions concise
544
+ const isDirectSubsection = pathParts.length === 2;
545
+ const isKeySection = ["prompts", "autoMode", "models", "routing"].includes(pathParts[0]);
546
+ const shouldIncludePath = isKeySection && isDirectSubsection;
547
+ const comment = shouldIncludePath ? `${currentPathStr}: ${description}` : description;
534
548
  console.log(`${indentStr}# ${comment}`);
535
549
  }
536
550
 
@@ -26,7 +26,7 @@ export interface GenerateCommandOptions {
26
26
  noAutoInject?: boolean;
27
27
  }
28
28
 
29
- const VALID_AGENTS: AgentType[] = ["claude", "opencode", "cursor", "windsurf", "aider"];
29
+ const VALID_AGENTS: AgentType[] = ["claude", "codex", "opencode", "cursor", "windsurf", "aider", "gemini"];
30
30
 
31
31
  /**
32
32
  * `nax generate` command handler.
package/src/cli/index.ts CHANGED
@@ -37,3 +37,4 @@ export {
37
37
  } from "./interact";
38
38
  export { generateCommand, type GenerateCommandOptions } from "./generate";
39
39
  export { configCommand, type ConfigCommandOptions } from "./config";
40
+ export { agentsListCommand } from "./agents";
@@ -11,7 +11,9 @@ import type { NaxConfig } from "../config";
11
11
  import { validateFilePath } from "../config/path-security";
12
12
  import { aiderGenerator } from "./generators/aider";
13
13
  import { claudeGenerator } from "./generators/claude";
14
+ import { codexGenerator } from "./generators/codex";
14
15
  import { cursorGenerator } from "./generators/cursor";
16
+ import { geminiGenerator } from "./generators/gemini";
15
17
  import { opencodeGenerator } from "./generators/opencode";
16
18
  import { windsurfGenerator } from "./generators/windsurf";
17
19
  import { buildProjectMetadata } from "./injector";
@@ -20,10 +22,12 @@ import type { AgentContextGenerator, AgentType, ContextContent, GeneratorMap } f
20
22
  /** Generator registry */
21
23
  const GENERATORS: GeneratorMap = {
22
24
  claude: claudeGenerator,
25
+ codex: codexGenerator,
23
26
  opencode: opencodeGenerator,
24
27
  cursor: cursorGenerator,
25
28
  windsurf: windsurfGenerator,
26
29
  aider: aiderGenerator,
30
+ gemini: geminiGenerator,
27
31
  };
28
32
 
29
33
  /** Generation result for a single agent */
@@ -0,0 +1,28 @@
1
+ /**
2
+ * Codex Config Generator (v0.16.1)
3
+ *
4
+ * Generates codex.md from nax/context.md + auto-injected metadata.
5
+ */
6
+
7
+ import { formatMetadataSection } from "../injector";
8
+ import type { AgentContextGenerator, ContextContent } from "../types";
9
+
10
+ function generateCodexConfig(context: ContextContent): string {
11
+ const header = `# Codex Instructions
12
+
13
+ This file is auto-generated from \`nax/context.md\`.
14
+ DO NOT EDIT MANUALLY — run \`nax generate\` to regenerate.
15
+
16
+ ---
17
+
18
+ `;
19
+
20
+ const metaSection = context.metadata ? formatMetadataSection(context.metadata) : "";
21
+ return header + metaSection + context.markdown;
22
+ }
23
+
24
+ export const codexGenerator: AgentContextGenerator = {
25
+ name: "codex",
26
+ outputFile: "codex.md",
27
+ generate: generateCodexConfig,
28
+ };
@@ -0,0 +1,28 @@
1
+ /**
2
+ * Gemini CLI Config Generator (v0.16.1)
3
+ *
4
+ * Generates GEMINI.md from nax/context.md + auto-injected metadata.
5
+ */
6
+
7
+ import { formatMetadataSection } from "../injector";
8
+ import type { AgentContextGenerator, ContextContent } from "../types";
9
+
10
+ function generateGeminiConfig(context: ContextContent): string {
11
+ const header = `# Gemini CLI Context
12
+
13
+ This file is auto-generated from \`nax/context.md\`.
14
+ DO NOT EDIT MANUALLY — run \`nax generate\` to regenerate.
15
+
16
+ ---
17
+
18
+ `;
19
+
20
+ const metaSection = context.metadata ? formatMetadataSection(context.metadata) : "";
21
+ return header + metaSection + context.markdown;
22
+ }
23
+
24
+ export const geminiGenerator: AgentContextGenerator = {
25
+ name: "gemini",
26
+ outputFile: "GEMINI.md",
27
+ generate: generateGeminiConfig,
28
+ };
@@ -40,7 +40,7 @@ export interface AgentContextGenerator {
40
40
  }
41
41
 
42
42
  /** All available generator types */
43
- export type AgentType = "claude" | "opencode" | "cursor" | "windsurf" | "aider";
43
+ export type AgentType = "claude" | "codex" | "opencode" | "cursor" | "windsurf" | "aider" | "gemini";
44
44
 
45
45
  /** Generator registry map */
46
46
  export type GeneratorMap = Record<AgentType, AgentContextGenerator>;
@@ -36,7 +36,7 @@ import { checkMergeConflict, checkStoryAmbiguity, isTriggerEnabled } from "../..
36
36
  import { getLogger } from "../../logger";
37
37
  import type { FailureCategory } from "../../tdd";
38
38
  import { runThreeSessionTdd } from "../../tdd";
39
- import { detectMergeConflict } from "../../utils/git";
39
+ import { autoCommitIfDirty, detectMergeConflict } from "../../utils/git";
40
40
  import type { PipelineContext, PipelineStage, StageResult } from "../types";
41
41
 
42
42
  /**
@@ -200,7 +200,7 @@ export const executionStage: PipelineStage = {
200
200
  ctx.agentResult = result;
201
201
 
202
202
  // BUG-058: Auto-commit if agent left uncommitted changes (single-session/test-after)
203
- await autoCommitIfDirty(ctx.workdir, "single-session", ctx.story.id);
203
+ await autoCommitIfDirty(ctx.workdir, "execution", "single-session", ctx.story.id);
204
204
 
205
205
  // merge-conflict trigger: detect CONFLICT markers in agent output
206
206
  const combinedOutput = (result.output ?? "") + (result.stderr ?? "");
@@ -270,40 +270,3 @@ export const _executionDeps = {
270
270
  isAmbiguousOutput,
271
271
  checkStoryAmbiguity,
272
272
  };
273
-
274
- /**
275
- * BUG-058: Auto-commit safety net for single-session/test-after.
276
- * Mirrors the same function in tdd/session-runner.ts for three-session TDD.
277
- */
278
- async function autoCommitIfDirty(workdir: string, role: string, storyId: string): Promise<void> {
279
- try {
280
- const statusProc = Bun.spawn(["git", "status", "--porcelain"], {
281
- cwd: workdir,
282
- stdout: "pipe",
283
- stderr: "pipe",
284
- });
285
- const statusOutput = await new Response(statusProc.stdout).text();
286
- await statusProc.exited;
287
-
288
- if (!statusOutput.trim()) return;
289
-
290
- const logger = getLogger();
291
- logger.warn("execution", `Agent did not commit after ${role} session — auto-committing`, {
292
- role,
293
- storyId,
294
- dirtyFiles: statusOutput.trim().split("\n").length,
295
- });
296
-
297
- const addProc = Bun.spawn(["git", "add", "-A"], { cwd: workdir, stdout: "pipe", stderr: "pipe" });
298
- await addProc.exited;
299
-
300
- const commitProc = Bun.spawn(["git", "commit", "-m", `chore(${storyId}): auto-commit after ${role} session`], {
301
- cwd: workdir,
302
- stdout: "pipe",
303
- stderr: "pipe",
304
- });
305
- await commitProc.exited;
306
- } catch {
307
- // Silently ignore — auto-commit is best-effort
308
- }
309
- }
@@ -25,6 +25,7 @@
25
25
  * ```
26
26
  */
27
27
 
28
+ import { getAgent } from "../../agents/registry";
28
29
  import type { NaxConfig } from "../../config";
29
30
  import { isGreenfieldStory } from "../../context/greenfield";
30
31
  import { applyDecomposition } from "../../decompose/apply";
@@ -68,6 +69,10 @@ export const routingStage: PipelineStage = {
68
69
  async execute(ctx: PipelineContext): Promise<StageResult> {
69
70
  const logger = getLogger();
70
71
 
72
+ // Resolve agent adapter for LLM routing (shared with execution)
73
+ const agentName = ctx.config.execution?.agent ?? "claude";
74
+ const adapter = _routingDeps.getAgent(agentName);
75
+
71
76
  // Staleness detection (RRP-003):
72
77
  // - story.routing absent → cache miss (no prior routing)
73
78
  // - story.routing + no contentHash → legacy cache hit (manual / pre-RRP-003 routing, honor as-is)
@@ -87,7 +92,7 @@ export const routingStage: PipelineStage = {
87
92
 
88
93
  if (isCacheHit) {
89
94
  // Cache hit: legacy routing (no contentHash) or matching contentHash — use cached values
90
- routing = await _routingDeps.routeStory(ctx.story, { config: ctx.config }, ctx.workdir, ctx.plugins);
95
+ routing = await _routingDeps.routeStory(ctx.story, { config: ctx.config, adapter }, ctx.workdir, ctx.plugins);
91
96
  // Override with cached values only when they are actually set
92
97
  if (ctx.story.routing?.complexity) routing.complexity = ctx.story.routing.complexity;
93
98
  // BUG-062: Only honor stored testStrategy for legacy/manual routing (no contentHash).
@@ -106,7 +111,7 @@ export const routingStage: PipelineStage = {
106
111
  }
107
112
  } else {
108
113
  // Cache miss: no routing, or contentHash present but mismatched — fresh classification
109
- routing = await _routingDeps.routeStory(ctx.story, { config: ctx.config }, ctx.workdir, ctx.plugins);
114
+ routing = await _routingDeps.routeStory(ctx.story, { config: ctx.config, adapter }, ctx.workdir, ctx.plugins);
110
115
  // currentHash already computed if a mismatch was detected; compute now if starting fresh
111
116
  currentHash = currentHash ?? _routingDeps.computeStoryContentHash(ctx.story);
112
117
  ctx.story.routing = {
@@ -223,4 +228,5 @@ export const _routingDeps = {
223
228
  applyDecomposition,
224
229
  runDecompose,
225
230
  checkStoryOversized,
231
+ getAgent,
226
232
  };
@@ -0,0 +1,63 @@
1
+ /**
2
+ * Precheck for multi-agent health
3
+ *
4
+ * Detects installed agents, reports version information,
5
+ * and checks health status for each configured agent.
6
+ */
7
+
8
+ import { getAgentVersions } from "../agents/version-detection";
9
+ import type { Check } from "./types";
10
+
11
+ /**
12
+ * Check multi-agent health: installed agents and their versions
13
+ *
14
+ * This is a Tier 2 warning check. Reports which agents are available
15
+ * and their versions, but doesn't fail if no agents are installed
16
+ * (since the main configured agent is checked in Tier 1).
17
+ */
18
+ export async function checkMultiAgentHealth(): Promise<Check> {
19
+ try {
20
+ const versions = await getAgentVersions();
21
+
22
+ // Separate installed from not installed
23
+ const installed = versions.filter((v) => v.installed);
24
+ const notInstalled = versions.filter((v) => !v.installed);
25
+
26
+ // Build message with agent status
27
+ const lines: string[] = [];
28
+
29
+ if (installed.length > 0) {
30
+ lines.push(`Installed agents (${installed.length}):`);
31
+ for (const agent of installed) {
32
+ const versionStr = agent.version ? ` v${agent.version}` : " (version unknown)";
33
+ lines.push(` • ${agent.displayName}${versionStr}`);
34
+ }
35
+ } else {
36
+ lines.push("No additional agents detected (using default configured agent)");
37
+ }
38
+
39
+ if (notInstalled.length > 0) {
40
+ lines.push(`\nAvailable but not installed (${notInstalled.length}):`);
41
+ for (const agent of notInstalled) {
42
+ lines.push(` • ${agent.displayName}`);
43
+ }
44
+ }
45
+
46
+ const message = lines.join("\n");
47
+
48
+ return {
49
+ name: "multi-agent-health",
50
+ tier: "warning",
51
+ passed: true, // Always pass - this is informational
52
+ message,
53
+ };
54
+ } catch (error) {
55
+ // If version detection fails, still pass but report error
56
+ return {
57
+ name: "multi-agent-health",
58
+ tier: "warning",
59
+ passed: true,
60
+ message: `Agent detection: ${error instanceof Error ? error.message : "Unknown error"}`,
61
+ };
62
+ }
63
+ }
@@ -30,3 +30,6 @@ export {
30
30
  checkGitignoreCoversNax,
31
31
  checkPromptOverrideFiles,
32
32
  } from "./checks-warnings";
33
+
34
+ // Agent checks
35
+ export { checkMultiAgentHealth } from "./checks-agents";
@@ -17,6 +17,7 @@ import {
17
17
  checkGitUserConfigured,
18
18
  checkGitignoreCoversNax,
19
19
  checkLintCommand,
20
+ checkMultiAgentHealth,
20
21
  checkOptionalCommands,
21
22
  checkPRDValid,
22
23
  checkPendingStories,
@@ -144,6 +145,7 @@ export async function runPrecheck(
144
145
  () => checkOptionalCommands(config, workdir),
145
146
  () => checkGitignoreCoversNax(workdir),
146
147
  () => checkPromptOverrideFiles(config, workdir),
148
+ () => checkMultiAgentHealth(),
147
149
  ];
148
150
 
149
151
  for (const checkFn of tier2Checks) {
@@ -11,7 +11,7 @@ import type { ModelTier, NaxConfig } from "../config";
11
11
  import { resolveModel } from "../config";
12
12
  import type { getLogger } from "../logger";
13
13
  import type { UserStory } from "../prd";
14
- import { captureGitRef } from "../utils/git";
14
+ import { autoCommitIfDirty, captureGitRef } from "../utils/git";
15
15
  import {
16
16
  type RectificationState,
17
17
  executeWithTimeout,
@@ -178,7 +178,7 @@ async function runRectificationLoop(
178
178
 
179
179
  // BUG-063: Auto-commit after rectification agent — prevents uncommitted changes
180
180
  // from leaking into verifier/review stages. Same pattern as session-runner.ts.
181
- await autoCommitIfDirty(workdir, "rectification", story.id, logger);
181
+ await autoCommitIfDirty(workdir, "tdd", "rectification", story.id);
182
182
 
183
183
  const rectifyIsolation = lite ? undefined : await verifyImplementerIsolation(workdir, rectifyBeforeRef);
184
184
 
@@ -231,47 +231,3 @@ async function runRectificationLoop(
231
231
  logger.info("tdd", "Full suite gate passed", { storyId: story.id });
232
232
  return true;
233
233
  }
234
-
235
- /**
236
- * BUG-063: Auto-commit safety net for rectification agent sessions.
237
- *
238
- * Rectification runs agent.run() directly (not via runTddSession), so it
239
- * needs its own auto-commit. Without this, uncommitted changes from
240
- * rectification leak into verifier/review stages causing spurious failures.
241
- */
242
- async function autoCommitIfDirty(
243
- workdir: string,
244
- role: string,
245
- storyId: string,
246
- logger: ReturnType<typeof getLogger>,
247
- ): Promise<void> {
248
- try {
249
- const statusProc = Bun.spawn(["git", "status", "--porcelain"], {
250
- cwd: workdir,
251
- stdout: "pipe",
252
- stderr: "pipe",
253
- });
254
- const statusOutput = await new Response(statusProc.stdout).text();
255
- await statusProc.exited;
256
-
257
- if (!statusOutput.trim()) return;
258
-
259
- logger.warn("tdd", `Agent did not commit after ${role} session — auto-committing`, {
260
- role,
261
- storyId,
262
- dirtyFiles: statusOutput.trim().split("\n").length,
263
- });
264
-
265
- const addProc = Bun.spawn(["git", "add", "-A"], { cwd: workdir, stdout: "pipe", stderr: "pipe" });
266
- await addProc.exited;
267
-
268
- const commitProc = Bun.spawn(["git", "commit", "-m", `chore(${storyId}): auto-commit after ${role} session`], {
269
- cwd: workdir,
270
- stdout: "pipe",
271
- stderr: "pipe",
272
- });
273
- await commitProc.exited;
274
- } catch {
275
- // Silently ignore — auto-commit is best-effort
276
- }
277
- }
@@ -10,6 +10,7 @@ import { resolveModel } from "../config";
10
10
  import { getLogger } from "../logger";
11
11
  import type { UserStory } from "../prd";
12
12
  import { PromptBuilder } from "../prompts";
13
+ import { autoCommitIfDirty } from "../utils/git";
13
14
  import { cleanupProcessTree } from "./cleanup";
14
15
  import { getChangedFiles, verifyImplementerIsolation, verifyTestWriterIsolation } from "./isolation";
15
16
  import type { IsolationCheck } from "./types";
@@ -146,7 +147,7 @@ export async function runTddSession(
146
147
  }
147
148
 
148
149
  // BUG-058: Auto-commit if agent left uncommitted changes
149
- await autoCommitIfDirty(workdir, role, story.id);
150
+ await autoCommitIfDirty(workdir, "tdd", role, story.id);
150
151
 
151
152
  // Check isolation based on role and skipIsolation flag.
152
153
  let isolation: IsolationCheck | undefined;
@@ -200,51 +201,3 @@ export async function runTddSession(
200
201
  estimatedCost: result.estimatedCost,
201
202
  };
202
203
  }
203
-
204
- /**
205
- * BUG-058: Auto-commit safety net.
206
- *
207
- * If the agent left uncommitted changes, stage and commit them automatically.
208
- * This prevents the review stage from failing with "uncommitted changes" errors.
209
- * Only triggers when the agent forgot — if tree is clean, this is a no-op.
210
- */
211
- async function autoCommitIfDirty(workdir: string, role: string, storyId: string): Promise<void> {
212
- const logger = getLogger();
213
-
214
- // Check if working tree is dirty
215
- try {
216
- const statusProc = Bun.spawn(["git", "status", "--porcelain"], {
217
- cwd: workdir,
218
- stdout: "pipe",
219
- stderr: "pipe",
220
- });
221
- const statusOutput = await new Response(statusProc.stdout).text();
222
- await statusProc.exited;
223
-
224
- if (!statusOutput.trim()) return; // Clean tree, nothing to do
225
-
226
- logger.warn("tdd", `Agent did not commit after ${role} session — auto-committing`, {
227
- role,
228
- storyId,
229
- dirtyFiles: statusOutput.trim().split("\n").length,
230
- });
231
-
232
- // Stage all changes
233
- const addProc = Bun.spawn(["git", "add", "-A"], {
234
- cwd: workdir,
235
- stdout: "pipe",
236
- stderr: "pipe",
237
- });
238
- await addProc.exited;
239
-
240
- // Commit with descriptive message
241
- const commitProc = Bun.spawn(["git", "commit", "-m", `chore(${storyId}): auto-commit after ${role} session`], {
242
- cwd: workdir,
243
- stdout: "pipe",
244
- stderr: "pipe",
245
- });
246
- await commitProc.exited;
247
- } catch {
248
- // Silently ignore — auto-commit is best-effort
249
- }
250
- }
@@ -117,14 +117,127 @@ function isValidVerdict(obj: unknown): obj is VerifierVerdict {
117
117
  return true;
118
118
  }
119
119
 
120
+ /**
121
+ * Coerce a free-form verdict object into the expected VerifierVerdict schema.
122
+ * Maps common agent-improvised patterns (verdict:"PASS", verification_summary, etc.)
123
+ * to the structured format. Returns null if too malformed to coerce.
124
+ */
125
+ export function coerceVerdict(obj: Record<string, unknown>): VerifierVerdict | null {
126
+ try {
127
+ // Determine approval status
128
+ const verdictStr = String(obj.verdict ?? "").toUpperCase();
129
+ const approved = verdictStr === "PASS" || verdictStr === "APPROVED" || obj.approved === true;
130
+
131
+ // Parse test results from verification_summary or top-level
132
+ let passCount = 0;
133
+ let failCount = 0;
134
+ let allPassing = approved;
135
+ const summary = obj.verification_summary as Record<string, unknown> | undefined;
136
+ if (summary?.test_results && typeof summary.test_results === "string") {
137
+ // Parse "45/45 PASS" or "42/45 PASS" patterns
138
+ const match = (summary.test_results as string).match(/(\d+)\/(\d+)/);
139
+ if (match) {
140
+ passCount = Number.parseInt(match[1], 10);
141
+ const total = Number.parseInt(match[2], 10);
142
+ failCount = total - passCount;
143
+ allPassing = failCount === 0;
144
+ }
145
+ }
146
+ // Also check top-level tests object (partial schema compliance)
147
+ if (obj.tests && typeof obj.tests === "object") {
148
+ const t = obj.tests as Record<string, unknown>;
149
+ if (typeof t.passCount === "number") passCount = t.passCount;
150
+ if (typeof t.failCount === "number") failCount = t.failCount;
151
+ if (typeof t.allPassing === "boolean") allPassing = t.allPassing;
152
+ }
153
+
154
+ // Parse acceptance criteria from acceptance_criteria_review or acceptanceCriteria
155
+ const criteria: Array<{ criterion: string; met: boolean; note?: string }> = [];
156
+ let allMet = approved;
157
+ const acReview = obj.acceptance_criteria_review as Record<string, unknown> | undefined;
158
+ if (acReview) {
159
+ for (const [key, val] of Object.entries(acReview)) {
160
+ if (key.startsWith("criterion") && val && typeof val === "object") {
161
+ const c = val as Record<string, unknown>;
162
+ const met = String(c.status ?? "").toUpperCase() === "SATISFIED" || c.met === true;
163
+ criteria.push({
164
+ criterion: String(c.name ?? c.criterion ?? key),
165
+ met,
166
+ note: c.evidence ? String(c.evidence).slice(0, 200) : undefined,
167
+ });
168
+ if (!met) allMet = false;
169
+ }
170
+ }
171
+ }
172
+ // Also check top-level acceptanceCriteria
173
+ if (obj.acceptanceCriteria && typeof obj.acceptanceCriteria === "object") {
174
+ const ac = obj.acceptanceCriteria as Record<string, unknown>;
175
+ if (typeof ac.allMet === "boolean") allMet = ac.allMet;
176
+ if (Array.isArray(ac.criteria)) {
177
+ for (const c of ac.criteria) {
178
+ if (c && typeof c === "object") {
179
+ criteria.push(c as { criterion: string; met: boolean; note?: string });
180
+ }
181
+ }
182
+ }
183
+ }
184
+ // Parse summary AC count like "4/4 SATISFIED"
185
+ if (criteria.length === 0 && summary?.acceptance_criteria && typeof summary.acceptance_criteria === "string") {
186
+ const acMatch = (summary.acceptance_criteria as string).match(/(\d+)\/(\d+)/);
187
+ if (acMatch) {
188
+ const met = Number.parseInt(acMatch[1], 10);
189
+ const total = Number.parseInt(acMatch[2], 10);
190
+ allMet = met === total;
191
+ }
192
+ }
193
+
194
+ // Parse quality
195
+ let rating: "good" | "acceptable" | "poor" = "acceptable";
196
+ const qualityStr = summary?.code_quality
197
+ ? String(summary.code_quality).toLowerCase()
198
+ : obj.quality && typeof obj.quality === "object"
199
+ ? String((obj.quality as Record<string, unknown>).rating ?? "acceptable").toLowerCase()
200
+ : "acceptable";
201
+ if (qualityStr === "high" || qualityStr === "good") rating = "good";
202
+ else if (qualityStr === "low" || qualityStr === "poor") rating = "poor";
203
+
204
+ // Build coerced verdict
205
+ return {
206
+ version: 1,
207
+ approved,
208
+ tests: { allPassing, passCount, failCount },
209
+ testModifications: {
210
+ detected: false,
211
+ files: [],
212
+ legitimate: true,
213
+ reasoning: "Not assessed in free-form verdict",
214
+ },
215
+ acceptanceCriteria: { allMet, criteria },
216
+ quality: { rating, issues: [] },
217
+ fixes: Array.isArray(obj.fixes) ? (obj.fixes as string[]) : [],
218
+ reasoning:
219
+ typeof obj.reasoning === "string"
220
+ ? obj.reasoning
221
+ : typeof obj.overall_status === "string"
222
+ ? (obj.overall_status as string)
223
+ : summary?.overall_status
224
+ ? String(summary.overall_status)
225
+ : `Coerced from free-form verdict: ${verdictStr}`,
226
+ };
227
+ } catch {
228
+ return null;
229
+ }
230
+ }
231
+
120
232
  /**
121
233
  * Read the verifier verdict file from the workdir.
122
234
  *
123
235
  * Returns the parsed VerifierVerdict when the file exists and is valid.
236
+ * Attempts tolerant coercion if the file doesn't match the strict schema.
124
237
  * Returns null if:
125
238
  * - File does not exist
126
239
  * - File is not valid JSON
127
- * - Required fields are missing or invalid
240
+ * - Required fields are missing and coercion fails
128
241
  *
129
242
  * Never throws.
130
243
  */
@@ -150,15 +263,29 @@ export async function readVerdict(workdir: string): Promise<VerifierVerdict | nu
150
263
  return null;
151
264
  }
152
265
 
153
- if (!isValidVerdict(parsed)) {
154
- logger.warn("tdd", "Verifier verdict file missing required fields — ignoring", {
155
- path: verdictPath,
156
- content: JSON.stringify(parsed).slice(0, 500),
157
- });
158
- return null;
266
+ if (isValidVerdict(parsed)) {
267
+ return parsed;
268
+ }
269
+
270
+ // Strict validation failed — attempt tolerant coercion
271
+ if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) {
272
+ const coerced = coerceVerdict(parsed as Record<string, unknown>);
273
+ if (coerced) {
274
+ logger.info("tdd", "Coerced free-form verdict to structured format", {
275
+ path: verdictPath,
276
+ approved: coerced.approved,
277
+ passCount: coerced.tests.passCount,
278
+ failCount: coerced.tests.failCount,
279
+ });
280
+ return coerced;
281
+ }
159
282
  }
160
283
 
161
- return parsed;
284
+ logger.warn("tdd", "Verifier verdict file missing required fields and coercion failed — ignoring", {
285
+ path: verdictPath,
286
+ content: JSON.stringify(parsed).slice(0, 500),
287
+ });
288
+ return null;
162
289
  } catch (err) {
163
290
  logger.warn("tdd", "Failed to read verifier verdict file — ignoring", {
164
291
  path: verdictPath,