npm - @oh-my-pi/pi-coding-agent - Versions diffs - 15.1.8 → 15.2.1 - Mend

@oh-my-pi/pi-coding-agent 15.1.8 → 15.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (71) hide show

package/CHANGELOG.md +52 -1
package/dist/types/cli/update-cli.d.ts +18 -0
package/dist/types/config/settings-schema.d.ts +10 -0
package/dist/types/eval/py/kernel.d.ts +6 -0
package/dist/types/goals/state.d.ts +1 -1
package/dist/types/goals/tools/goal-tool.d.ts +4 -0
package/dist/types/hashline/parser.d.ts +6 -2
package/dist/types/internal-urls/memory-protocol.d.ts +6 -0
package/dist/types/main.d.ts +25 -1
package/dist/types/modes/theme/shimmer.d.ts +27 -0
package/dist/types/slash-commands/helpers/format.d.ts +4 -1
package/dist/types/tools/ast-edit.d.ts +3 -0
package/dist/types/tools/ast-grep.d.ts +3 -0
package/dist/types/tools/find.d.ts +3 -0
package/dist/types/tools/search.d.ts +3 -0
package/dist/types/tui/file-list.d.ts +6 -0
package/dist/types/tui/hyperlink.d.ts +42 -0
package/dist/types/tui/index.d.ts +1 -0
package/dist/types/utils/tool-choice.d.ts +2 -1
package/dist/types/web/search/providers/utils.d.ts +27 -1
package/package.json +7 -7
package/src/cli/update-cli.ts +78 -36
package/src/config/model-registry.ts +23 -12
package/src/config/settings-schema.ts +12 -0
package/src/config/settings.ts +28 -5
package/src/edit/renderer.ts +5 -3
package/src/eval/py/executor.ts +12 -1
package/src/eval/py/kernel.ts +24 -8
package/src/extensibility/plugins/legacy-pi-compat.ts +2 -2
package/src/goals/runtime.ts +9 -3
package/src/goals/state.ts +1 -1
package/src/goals/tools/goal-tool.ts +12 -2
package/src/hashline/diff.ts +1 -1
package/src/hashline/execute.ts +2 -2
package/src/hashline/parser.ts +87 -12
package/src/internal-urls/memory-protocol.ts +1 -1
package/src/main.ts +13 -2
package/src/modes/interactive-mode.ts +29 -1
package/src/modes/theme/shimmer.ts +79 -0
package/src/prompts/agents/oracle.md +15 -16
package/src/prompts/tools/goal.md +7 -2
package/src/session/agent-session.ts +12 -75
package/src/slash-commands/helpers/format.ts +23 -3
package/src/task/executor.ts +115 -19
package/src/tools/ast-edit.ts +39 -6
package/src/tools/ast-grep.ts +38 -6
package/src/tools/find.ts +13 -2
package/src/tools/read.ts +46 -6
package/src/tools/search.ts +447 -265
package/src/tui/file-list.ts +10 -2
package/src/tui/hyperlink.ts +126 -0
package/src/tui/index.ts +1 -0
package/src/utils/tool-choice.ts +7 -7
package/src/web/kagi.ts +2 -2
package/src/web/parallel.ts +3 -3
package/src/web/search/index.ts +20 -9
package/src/web/search/providers/anthropic.ts +4 -2
package/src/web/search/providers/brave.ts +4 -2
package/src/web/search/providers/codex.ts +4 -1
package/src/web/search/providers/exa.ts +4 -1
package/src/web/search/providers/gemini.ts +4 -1
package/src/web/search/providers/jina.ts +4 -2
package/src/web/search/providers/kagi.ts +5 -1
package/src/web/search/providers/kimi.ts +4 -2
package/src/web/search/providers/parallel.ts +5 -1
package/src/web/search/providers/perplexity.ts +7 -2
package/src/web/search/providers/searxng.ts +4 -1
package/src/web/search/providers/synthetic.ts +4 -2
package/src/web/search/providers/tavily.ts +4 -2
package/src/web/search/providers/utils.ts +63 -1
package/src/web/search/providers/zai.ts +4 -2

package/src/modes/interactive-mode.ts CHANGED Viewed

@@ -98,6 +98,7 @@ import {
 } from "./loop-limit";
 import { OAuthManualInputManager } from "./oauth-manual-input";
 import { SessionObserverRegistry } from "./session-observer-registry";
+import { type ShimmerPalette, shimmerSegments, shimmerText } from "./theme/shimmer";
 import type { Theme } from "./theme/theme";
 import {
 	getEditorTheme,
@@ -110,6 +111,20 @@ import {
 import type { CompactionQueuedMessage, InteractiveModeContext, SubmittedUserInput, TodoItem, TodoPhase } from "./types";
 import { UiHelpers } from "./utils/ui-helpers";
+const WORKING_INTERRUPT_HINT = " (esc to interrupt)";
+const HINT_SHIMMER_PALETTE: ShimmerPalette = {
+	low: "dim",
+	mid: "muted",
+	high: "borderAccent",
+};
+function renderWorkingMessage(message: string): string {
+	if (!message.endsWith(WORKING_INTERRUPT_HINT)) return shimmerText(message, theme);
+	const header = message.slice(0, -WORKING_INTERRUPT_HINT.length);
+	return shimmerSegments([{ text: header }, { text: WORKING_INTERRUPT_HINT, palette: HINT_SHIMMER_PALETTE }], theme);
+}
 const EDITOR_MAX_HEIGHT_MIN = 6;
 const EDITOR_MAX_HEIGHT_MAX = 18;
 const EDITOR_RESERVED_ROWS = 12;
@@ -1063,6 +1078,12 @@ export class InteractiveMode implements InteractiveModeContext {
 			return;
 		}
 		if (event.type === "goal_updated") {
+			// Handle drop before clearing goalModeEnabled so #exitGoalMode can
+			// still restore the previous tool set while the flag is true.
+			if (event.state?.goal?.status === "dropped") {
+				await this.#exitGoalMode({ reason: "dropped", silent: true });
+				return;
+			}
 			this.goalModeEnabled = event.state?.enabled === true;
 			this.goalModePaused = event.state?.enabled !== true && event.state?.goal?.status === "paused";
 			if (!event.state?.enabled) {
@@ -1150,6 +1171,13 @@ export class InteractiveMode implements InteractiveModeContext {
 			const restored = await this.session.goalRuntime.onThreadResumed();
 			this.goalModeEnabled = restored?.enabled === true;
 			this.goalModePaused = restored?.enabled !== true && restored?.goal.status === "paused";
+			// sdk.ts excludes "goal" from the initial active tool set unconditionally.
+			// Re-add it now so the agent can call resume, complete, or drop on this goal.
+			if (restored?.goal) {
+				const previousTools = this.session.getActiveToolNames().filter(name => name !== "goal");
+				this.#goalModePreviousTools = previousTools;
+				await this.session.setActiveToolsByName([...new Set([...previousTools, "goal"])]);
+			}
 			this.#updateGoalModeStatus();
 			return;
 		}
@@ -2167,7 +2195,7 @@ export class InteractiveMode implements InteractiveModeContext {
 			this.loadingAnimation = new Loader(
 				this.ui,
 				spinner => theme.fg("accent", spinner),
-				text => theme.fg("muted", text),
+				renderWorkingMessage,
 				this.#defaultWorkingMessage,
 				getSymbolTheme().spinnerFrames,
 			);

package/src/modes/theme/shimmer.ts ADDED Viewed

@@ -0,0 +1,79 @@
+import type { Theme, ThemeColor } from "./theme";
+const SHIMMER_PADDING = 10;
+const SHIMMER_SWEEP_MS = 2000;
+const SHIMMER_BAND_HALF_WIDTH = 5;
+type ShimmerTheme = Pick<Theme, "bold" | "fg">;
+/** Three-tier color stack a shimmer character cycles through as the band sweeps. */
+export interface ShimmerPalette {
+	/** Color for chars outside / at the edge of the band (intensity < 0.2). */
+	low: ThemeColor;
+	/** Color for chars approaching the crest (0.2 <= intensity < 0.6). */
+	mid: ThemeColor;
+	/** Color at the band's crest (intensity >= 0.6). */
+	high: ThemeColor;
+	/** Whether to bold the crest tier. Default `false`. */
+	bold?: boolean;
+}
+/** One run of text that shares a palette inside a larger shimmer sweep. */
+export interface ShimmerSegment {
+	text: string;
+	palette?: ShimmerPalette;
+}
+export const DEFAULT_SHIMMER_PALETTE: ShimmerPalette = {
+	low: "dim",
+	mid: "muted",
+	high: "accent",
+	bold: true,
+};
+function shimmerIntensity(index: number, length: number): number {
+	const period = length + SHIMMER_PADDING * 2;
+	const pos = Math.floor(((Date.now() % SHIMMER_SWEEP_MS) / SHIMMER_SWEEP_MS) * period);
+	const dist = Math.abs(index + SHIMMER_PADDING - pos);
+	if (dist > SHIMMER_BAND_HALF_WIDTH) return 0;
+	const x = Math.PI * (dist / SHIMMER_BAND_HALF_WIDTH);
+	return 0.5 * (1 + Math.cos(x));
+}
+function styleShimmerChar(ch: string, intensity: number, theme: ShimmerTheme, palette: ShimmerPalette): string {
+	if (intensity < 0.2) return theme.fg(palette.low, ch);
+	if (intensity < 0.6) return theme.fg(palette.mid, ch);
+	const styled = theme.fg(palette.high, ch);
+	return palette.bold ? theme.bold(styled) : styled;
+}
+/**
+ * Apply a shimmer sweep across one or more segments, treating them as a single
+ * continuous string for band positioning. Each segment can supply its own
+ * palette so the gradient stays in lockstep while the colors differ.
+ */
+export function shimmerSegments(segments: readonly ShimmerSegment[], theme: ShimmerTheme): string {
+	let total = 0;
+	const expanded: Array<{ chars: string[]; palette: ShimmerPalette }> = [];
+	for (const seg of segments) {
+		const chars = [...seg.text];
+		total += chars.length;
+		expanded.push({ chars, palette: seg.palette ?? DEFAULT_SHIMMER_PALETTE });
+	}
+	if (total === 0) return "";
+	const out: string[] = [];
+	let index = 0;
+	for (const { chars, palette } of expanded) {
+		for (const ch of chars) {
+			out.push(styleShimmerChar(ch, shimmerIntensity(index, total), theme, palette));
+			index++;
+		}
+	}
+	return out.join("");
+}
+export function shimmerText(text: string, theme: ShimmerTheme, palette?: ShimmerPalette): string {
+	return shimmerSegments([{ text, palette }], theme);
+}

package/src/prompts/agents/oracle.md CHANGED Viewed

@@ -1,19 +1,17 @@
 ---
 name: oracle
-description: Deep reasoning advisor for debugging dead ends, architecture decisions, and second opinions. Read-only.
+description: Wise senior engineer to consult or delegate work to — debugging, architecture, second opinions, and hands-on implementation when asked.
 spawns: explore
 model: pi/slow
 thinking-level: xhigh
 blocking: true
 ---
-You are a senior diagnostician and strategic technical advisor. You receive problems other agents are stuck on — doom loops, mysterious failures, architectural tradeoffs, subtle bugs — and return clear, actionable analysis.
+You are the wise guy on the team — a senior engineer with deep judgment that other agents consult when they are stuck, uncertain, or need a second opinion. You also take direct delegation: if the caller hands you work, you do it, including reads, writes, edits, and running commands.
-You diagnose, explain, and recommend. You do not implement. Others act on your findings.
-<critical>
-You MUST operate as read-only. You NEVER write, edit, or modify files, nor execute any state-changing commands.
-</critical>
+You diagnose, decide, and execute. You match the mode to the ask:
+- **Consult**: explain the root cause, lay out tradeoffs, recommend a path.
+- **Delegate**: carry the work to completion — modify files, run verification, deliver a finished change.
 <directives>
 - You MUST reason from first principles. The caller already tried the obvious.
@@ -23,6 +21,7 @@ You MUST operate as read-only. You NEVER write, edit, or modify files, nor execu
 - You SHOULD consider at least two hypotheses before converging on one.
 - You SHOULD invoke tools in parallel when investigating multiple hypotheses.
 - When the problem is architectural, you MUST weigh tradeoffs explicitly: what does each option cost, what does it buy, what does it foreclose.
+- When delegated implementation work, you MUST finish it: edit the files, run the relevant tests/checks, and report exactly what changed.
 </directives>
 <decision-framework>
@@ -35,22 +34,22 @@ Apply pragmatic minimalism:
 </decision-framework>
 <procedure>
-1. Read the problem statement carefully. Identify what was already tried and why it failed.
-2. Form 2-3 hypotheses for the root cause.
+1. Read the problem statement carefully. Identify what was already tried, what failed, and whether the caller wants advice or execution.
+2. Form 2-3 hypotheses for the root cause (for diagnosis) or 2-3 viable approaches (for design).
 3. Use tools to gather evidence — read relevant code, trace data flow, check types, grep for related patterns. Parallelize independent reads.
-4. Eliminate hypotheses based on evidence. Narrow to the most likely cause.
-5. If the problem is a decision (not a bug), lay out options with concrete tradeoffs.
-6. Deliver a clear verdict with supporting evidence.
+4. Eliminate hypotheses based on evidence. Narrow to the most likely cause or best approach.
+5. If consulting: deliver verdict with supporting evidence and a concrete recommendation.
+6. If implementing: make the changes, verify them, and report the diff and verification result.
 </procedure>
 <scope-discipline>
-- Recommend ONLY what was asked. No unsolicited improvements.
+- Do ONLY what was asked. No unsolicited refactors or improvements.
 - If you notice other issues, list at most 2 as "Optional future considerations" at the end.
 - You NEVER expand the problem surface beyond the original request.
 - Exhaust provided context before reaching for tools. External lookups fill genuine gaps, not curiosity.
 </scope-discipline>
 <critical>
-You MUST keep going until you have a clear answer or have exhausted available evidence.
-Before finalizing: re-scan for unstated assumptions, verify claims are grounded in code not invented, check for overly strong language not justified by evidence.
-This matters. The caller is stuck. Get it right.
+You MUST keep going until the problem is solved or the work is finished. Before finalizing: re-scan for unstated assumptions, verify claims are grounded in code not invented, check for overly strong language not justified by evidence.
+The caller came to you because they trust your judgment. Get it right.
 </critical>

package/src/prompts/tools/goal.md CHANGED Viewed

@@ -1,13 +1,18 @@
 Manage the active goal-mode objective.
 Use a single `op` field:
-- `create` starts a goal. Requires `objective`; optional `token_budget` must be positive. Use only when no goal exists.
-- `get` returns the current goal and remaining token budget.
+- `create` starts a goal. Requires `objective`; optional `token_budget` must be positive. Use only when no goal exists and no goal is paused.
+- `get` returns the current goal (active or paused) and remaining token budget.
+- `resume` re-activates a paused goal so work can continue.
 - `complete` marks the goal complete after you have verified every deliverable against current evidence.
+- `drop` discards the current goal without completing it.
 Examples:
 - `goal({"op":"create","objective":"Implement feature X","token_budget":50000})`
 - `goal({"op":"get"})`
+- `goal({"op":"resume"})`
 - `goal({"op":"complete"})`
+- `goal({"op":"drop"})`
 Do not call `complete` because a budget is low or a turn is ending. Call it only when the goal is actually done and verified.
+If `get` shows a paused goal, call `resume` before continuing work on it.

package/src/session/agent-session.ts CHANGED Viewed

@@ -440,11 +440,6 @@ function formatRetryFallbackBaseSelector(selector: RetryFallbackSelector): strin
 	return `${selector.provider}/${selector.id}`;
 }
-/** Composite key for auto-clear timers, keyed by phase name + task content. */
-function todoClearKey(phaseName: string, taskContent: string): string {
-	return `${phaseName}\u0000${taskContent}`;
-}
 const IRC_REPLY_MAX_BYTES = 4096;
 /**
@@ -796,7 +791,6 @@ export class AgentSession {
 	// Todo completion reminder state
 	#todoReminderCount = 0;
 	#todoPhases: TodoPhase[] = [];
-	#todoClearTimers = new Map<string, Timer>();
 	#toolChoiceQueue = new ToolChoiceQueue();
 	// Bash execution state
@@ -2734,7 +2728,6 @@ export class AgentSession {
 			logger.warn("Failed to emit session_shutdown event", { error: String(error) });
 		}
 		await this.#cancelPostPromptTasks();
-		this.#clearTodoClearTimers();
 		// Cancel jobs this agent registered so a subagent's teardown doesn't
 		// leak its background bash/task work into the parent's manager. Only
 		// the session that owns the manager goes on to dispose it (which itself
@@ -4628,13 +4621,12 @@ export class AgentSession {
 	setTodoPhases(phases: TodoPhase[]): void {
 		this.#todoPhases = this.#cloneTodoPhases(phases);
-		this.#scheduleTodoAutoClear(phases);
 	}
 	#syncTodoPhasesFromBranch(): void {
 		const phases = getLatestTodoPhasesFromEntries(this.sessionManager.getBranch());
 		// Strip completed/abandoned tasks — they were done in a previous run,
-		// so the auto-clear grace period has already elapsed.
+		// so they have no bearing on progress tracking for the new turn.
 		for (const phase of phases) {
 			phase.tasks = phase.tasks.filter(t => t.status !== "completed" && t.status !== "abandoned");
 		}
@@ -4652,72 +4644,11 @@ export class AgentSession {
 		}));
 	}
-	/** Schedule auto-removal of completed/abandoned tasks after a delay. */
-	#scheduleTodoAutoClear(phases: TodoPhase[]): void {
-		// Default bumped from 60s to 30 min: the prior 60s splice mutated canonical
-		// state mid-turn, so the model observed phase totals shrinking ("6 → 5")
-		// between tool calls. Surviving the turn matches user expectations; a
-		// render-time filter in the UI consumer would be cleaner but lives in a
-		// different package and is out of scope for this fix.
-		const delaySec = this.settings.get("tasks.todoClearDelay") ?? 1800;
-		if (delaySec < 0) return; // "Never" — no auto-clear
-		const delayMs = delaySec * 1000;
-		const doneKeys = new Set<string>();
-		for (const phase of phases) {
-			for (const task of phase.tasks) {
-				if (task.status === "completed" || task.status === "abandoned") {
-					doneKeys.add(todoClearKey(phase.name, task.content));
-				}
-			}
-		}
-		// Cancel timers for tasks that are no longer done (e.g. status was reverted)
-		for (const [key, timer] of this.#todoClearTimers) {
-			if (!doneKeys.has(key)) {
-				clearTimeout(timer);
-				this.#todoClearTimers.delete(key);
-			}
-		}
-		// Schedule new timers for newly-done tasks
-		for (const key of doneKeys) {
-			if (this.#todoClearTimers.has(key)) continue;
-			if (delayMs === 0) {
-				// Instant — run synchronously on next microtask to batch removals
-				const timer = setTimeout(() => this.#runTodoAutoClear(key), 0);
-				this.#todoClearTimers.set(key, timer);
-			} else {
-				const timer = setTimeout(() => this.#runTodoAutoClear(key), delayMs);
-				this.#todoClearTimers.set(key, timer);
-			}
-		}
-	}
-	/** Remove a single completed task and notify the UI. */
-	#runTodoAutoClear(key: string): void {
-		this.#todoClearTimers.delete(key);
-		let removed = false;
-		for (const phase of this.#todoPhases) {
-			const idx = phase.tasks.findIndex(t => todoClearKey(phase.name, t.content) === key);
-			if (idx !== -1 && (phase.tasks[idx].status === "completed" || phase.tasks[idx].status === "abandoned")) {
-				phase.tasks.splice(idx, 1);
-				removed = true;
-				break;
-			}
-		}
-		if (!removed) return;
-		// Remove empty phases
-		this.#todoPhases = this.#todoPhases.filter(p => p.tasks.length > 0);
-		this.#emit({ type: "todo_auto_clear" });
-	}
-	#clearTodoClearTimers(): void {
-		for (const timer of this.#todoClearTimers.values()) {
-			clearTimeout(timer);
-		}
-		this.#todoClearTimers.clear();
-	}
+	// Auto-clear of completed/abandoned tasks was removed: the timer-driven
+	// splice mutated canonical `#todoPhases` between tool calls, so the model
+	// observed phase totals shrinking ("5 → 4") after marking tasks done. The
+	// `tasks.todoClearDelay` setting is now inert; completed tasks survive
+	// until the next explicit `todo_write` call removes them via `rm`/`drop`.
 	/**
 	 * Abort current operation and wait for agent to become idle.
@@ -6240,6 +6171,12 @@ export class AgentSession {
 		};
 		const currentModel = this.model;
+		// Prefer the active session's model: it's what the user is actively using,
+		// and routing compaction to a different provider (e.g. an OpenAI default
+		// model while the chat is on Anthropic) changes provider-specific behavior
+		// like remote compaction endpoints. Role-based candidates only kick in
+		// as auth fallbacks when the current model has no usable credentials.
+		addCandidate(currentModel);
 		for (const role of MODEL_ROLE_IDS) {
 			addCandidate(this.#resolveRoleModelFull(role, availableModels, currentModel).model);
 		}

package/src/slash-commands/helpers/format.ts CHANGED Viewed

@@ -1,3 +1,6 @@
+import { shimmerText } from "../../modes/theme/shimmer";
+import { theme as currentTheme, type Theme } from "../../modes/theme/theme";
 /** Format a millisecond duration as a coarse-grained human label. */
 export function formatDuration(ms: number): string {
 	const seconds = Math.max(0, Math.round(ms / 1000));
@@ -10,14 +13,31 @@ export function formatDuration(ms: number): string {
 	return `${days}d`;
 }
+type ProgressBarTheme = Pick<Theme, "bold" | "fg">;
+const unstyledProgressBarTheme: ProgressBarTheme = {
+	fg(_color, text) {
+		return text;
+	},
+	bold(text) {
+		return text;
+	},
+};
+function resolveProgressBarTheme(uiTheme: ProgressBarTheme | undefined): ProgressBarTheme {
+	return uiTheme ?? currentTheme ?? unstyledProgressBarTheme;
+}
 /**
  * Render an ASCII progress bar with a trailing percent label.
  * `fraction` is clamped to `[0, 1]`. `undefined` renders a dotted placeholder.
  */
-export function renderAsciiBar(fraction: number | undefined, width = 24): string {
-	if (fraction === undefined) return `[${"·".repeat(width)}]`;
+export function renderAsciiBar(fraction: number | undefined, width = 24, uiTheme?: ProgressBarTheme): string {
+	const progressBarTheme = resolveProgressBarTheme(uiTheme);
+	if (fraction === undefined) return `[${shimmerText("·".repeat(width), progressBarTheme)}]`;
 	const clamped = Math.min(Math.max(fraction, 0), 1);
 	const filled = Math.round(clamped * width);
 	const pct = Math.round(clamped * 100);
-	return `[${"█".repeat(filled)}${"░".repeat(Math.max(0, width - filled))}] ${pct}%`;
+	const bar = `${"█".repeat(filled)}${"░".repeat(Math.max(0, width - filled))}`;
+	return `[${shimmerText(bar, progressBarTheme)}] ${pct}%`;
 }

package/src/task/executor.ts CHANGED Viewed

@@ -7,7 +7,7 @@
 import path from "node:path";
 import type { AgentEvent, AgentIdentity, AgentTelemetryConfig, ThinkingLevel } from "@oh-my-pi/pi-agent-core";
 import { recordHandoff, resolveTelemetry } from "@oh-my-pi/pi-agent-core";
-import { isJsonSchemaValueValid } from "@oh-my-pi/pi-ai/utils/schema";
+import { type JsonSchemaValidationIssue, validateJsonSchemaValue } from "@oh-my-pi/pi-ai/utils/schema";
 import { logger, prompt, untilAborted } from "@oh-my-pi/pi-utils";
 import { ModelRegistry } from "../config/model-registry";
 import { resolveModelOverrideWithAuthFallback } from "../config/model-resolver";
@@ -204,12 +204,59 @@ function parseStringifiedJson(value: unknown): unknown {
 	}
 }
-function buildOutputValidator(schema: unknown): { validate?: (value: unknown) => boolean; error?: string } {
+interface OutputValidator {
+	validate: (value: unknown) => { ok: true } | { ok: false; message: string; missingRequired: string[] };
+	requiredFields: string[];
+}
+function buildOutputValidator(schema: unknown): { validator?: OutputValidator; error?: string } {
 	const { normalized, error } = normalizeSchema(schema);
 	if (error) return { error };
 	if (normalized === undefined) return {};
 	const jsonSchema = jtdToJsonSchema(normalized);
-	return { validate: value => isJsonSchemaValueValid(jsonSchema, value) };
+	const required = extractRequiredFields(jsonSchema);
+	return {
+		validator: {
+			requiredFields: required,
+			validate: value => {
+				const result = validateJsonSchemaValue(jsonSchema, value);
+				if (result.success) return { ok: true };
+				const missing = computeMissingRequired(required, value);
+				const message = formatValidationIssue(result.issues[0]) ?? "schema validation failed";
+				return { ok: false, message, missingRequired: missing };
+			},
+		},
+	};
+}
+function extractRequiredFields(jsonSchema: unknown): string[] {
+	if (!jsonSchema || typeof jsonSchema !== "object") return [];
+	const required = (jsonSchema as { required?: unknown }).required;
+	return Array.isArray(required) ? required.filter((k): k is string => typeof k === "string") : [];
+}
+function computeMissingRequired(required: readonly string[], value: unknown): string[] {
+	if (required.length === 0) return [];
+	if (value === null || value === undefined) return [...required];
+	if (typeof value !== "object" || Array.isArray(value)) return [];
+	const record = value as Record<string, unknown>;
+	return required.filter(key => !(key in record) || record[key] === undefined);
+}
+function formatValidationIssue(issue: JsonSchemaValidationIssue | undefined): string | undefined {
+	if (!issue) return undefined;
+	const path = issue.path.length > 0 ? issue.path.map(String).join(".") : "(root)";
+	return `${path}: ${issue.message}`;
+}
+function previewOffendingData(value: unknown, maxLength = 500): string {
+	let serialized: string;
+	try {
+		serialized = JSON.stringify(value) ?? "null";
+	} catch {
+		serialized = String(value);
+	}
+	return serialized.length > maxLength ? `${serialized.slice(0, maxLength)}…` : serialized;
 }
 function tryParseJsonOutput(text: string): unknown | undefined {
@@ -253,9 +300,9 @@ function resolveFallbackCompletion(rawOutput: string, outputSchema: unknown): {
 	if (parsed === undefined) return null;
 	const candidate = parseStringifiedJson(extractCompletionData(parsed));
 	if (candidate === undefined) return null;
-	const { validate, error } = buildOutputValidator(outputSchema);
+	const { validator, error } = buildOutputValidator(outputSchema);
 	if (error) return null;
-	if (validate && !validate(candidate)) return null;
+	if (validator && !validator.validate(candidate).ok) return null;
 	return { data: candidate };
 }
@@ -288,6 +335,31 @@ export const SUBAGENT_WARNING_NULL_YIELD = "SYSTEM WARNING: Subagent called yiel
 export const SUBAGENT_WARNING_MISSING_YIELD =
 	"SYSTEM WARNING: Subagent exited without calling yield tool after 3 reminders.";
+/** Build a schema_violation outcome — surfaced as a non-zero exit so callers treat it as a failure. */
+function buildSchemaViolationOutcome(
+	failure: { message: string; missingRequired: string[] },
+	data: unknown,
+): { rawOutput: string; stderr: string; exitCode: number } {
+	const missing = failure.missingRequired;
+	const headline =
+		missing.length > 0
+			? `schema_violation: missing required fields: ${missing.join(", ")}`
+			: `schema_violation: ${failure.message}`;
+	const payload = {
+		error: "schema_violation",
+		message: failure.message,
+		missingRequired: missing,
+		data: previewOffendingData(data),
+	};
+	let rawOutput: string;
+	try {
+		rawOutput = JSON.stringify(payload, null, 2);
+	} catch {
+		rawOutput = `{"error":"schema_violation","message":${JSON.stringify(headline)}}`;
+	}
+	return { rawOutput, stderr: headline, exitCode: 1 };
+}
 export function finalizeSubprocessOutput(args: FinalizeSubprocessOutputArgs): FinalizeSubprocessOutputResult {
 	let { rawOutput, exitCode, stderr } = args;
 	const { yieldItems, reportFindings, doneAborted, signalAborted, outputSchema } = args;
@@ -311,14 +383,29 @@ export function finalizeSubprocessOutput(args: FinalizeSubprocessOutputArgs): Fi
 				rawOutput = rawOutput ? `${SUBAGENT_WARNING_NULL_YIELD}\n\n${rawOutput}` : SUBAGENT_WARNING_NULL_YIELD;
 			} else {
 				const completeData = normalizeCompleteData(submitData, reportFindings);
-				try {
-					rawOutput = JSON.stringify(completeData, null, 2) ?? "null";
-				} catch (err) {
-					const errorMessage = err instanceof Error ? err.message : String(err);
-					rawOutput = `{"error":"Failed to serialize yield data: ${errorMessage}"}`;
+				const { validator, error: schemaError } = buildOutputValidator(outputSchema);
+				if (schemaError) {
+					rawOutput = `{"error":"schema_violation","message":"invalid output schema: ${schemaError.replace(/"/g, '\\"')}"}`;
+					stderr = `schema_violation: invalid output schema: ${schemaError}`;
+					exitCode = 1;
+				} else {
+					const verdict = validator ? validator.validate(completeData) : { ok: true as const };
+					if (!verdict.ok) {
+						const outcome = buildSchemaViolationOutcome(verdict, completeData);
+						rawOutput = outcome.rawOutput;
+						stderr = outcome.stderr;
+						exitCode = outcome.exitCode;
+					} else {
+						try {
+							rawOutput = JSON.stringify(completeData, null, 2) ?? "null";
+						} catch (err) {
+							const errorMessage = err instanceof Error ? err.message : String(err);
+							rawOutput = `{"error":"Failed to serialize yield data: ${errorMessage}"}`;
+						}
+						exitCode = 0;
+						stderr = "";
+					}
 				}
-				exitCode = 0;
-				stderr = "";
 			}
 		}
 	} else {
@@ -328,14 +415,23 @@ export function finalizeSubprocessOutput(args: FinalizeSubprocessOutputArgs): Fi
 		const fallback = allowFallback ? resolveFallbackCompletion(rawOutput, outputSchema) : null;
 		if (fallback) {
 			const completeData = normalizeCompleteData(fallback.data, reportFindings);
-			try {
-				rawOutput = JSON.stringify(completeData, null, 2) ?? "null";
-			} catch (err) {
-				const errorMessage = err instanceof Error ? err.message : String(err);
-				rawOutput = `{"error":"Failed to serialize fallback completion: ${errorMessage}"}`;
+			const { validator } = buildOutputValidator(outputSchema);
+			const verdict = validator ? validator.validate(completeData) : { ok: true as const };
+			if (!verdict.ok) {
+				const outcome = buildSchemaViolationOutcome(verdict, completeData);
+				rawOutput = outcome.rawOutput;
+				stderr = outcome.stderr;
+				exitCode = outcome.exitCode;
+			} else {
+				try {
+					rawOutput = JSON.stringify(completeData, null, 2) ?? "null";
+				} catch (err) {
+					const errorMessage = err instanceof Error ? err.message : String(err);
+					rawOutput = `{"error":"Failed to serialize fallback completion: ${errorMessage}"}`;
+				}
+				exitCode = 0;
+				stderr = "";
 			}
-			exitCode = 0;
-			stderr = "";
 		} else if (!hasOutputSchema && allowFallback && rawOutput.trim().length > 0) {
 			exitCode = 0;
 			stderr = "";