npm - @oh-my-pi/pi-coding-agent - Versions diffs - 16.0.2 → 16.0.3 - Mend

@oh-my-pi/pi-coding-agent 16.0.2 → 16.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (88) hide show

package/CHANGELOG.md +45 -0
package/README.md +0 -1
package/dist/cli.js +217 -276
package/dist/types/advisor/advise-tool.d.ts +30 -1
package/dist/types/commands/install.d.ts +1 -1
package/dist/types/config/model-resolver.d.ts +8 -0
package/dist/types/config/settings-schema.d.ts +0 -10
package/dist/types/eval/js/shared/runtime.d.ts +1 -0
package/dist/types/eval/js/worker-core.d.ts +1 -0
package/dist/types/extensibility/extensions/loader.d.ts +2 -2
package/dist/types/goals/runtime.d.ts +0 -1
package/dist/types/mcp/tool-bridge.d.ts +3 -0
package/dist/types/modes/components/custom-editor.d.ts +14 -4
package/dist/types/modes/controllers/command-controller.d.ts +1 -1
package/dist/types/modes/interactive-mode.d.ts +1 -1
package/dist/types/modes/setup-wizard/wizard-overlay.d.ts +3 -2
package/dist/types/modes/theme/mermaid-cache.d.ts +18 -1
package/dist/types/modes/types.d.ts +1 -1
package/dist/types/registry/agent-lifecycle.d.ts +16 -1
package/dist/types/sdk.d.ts +8 -0
package/dist/types/session/agent-session.d.ts +20 -8
package/dist/types/session/session-dump-format.d.ts +8 -2
package/dist/types/session/session-entries.d.ts +4 -0
package/dist/types/session/session-history-format.d.ts +2 -0
package/dist/types/session/session-manager.d.ts +22 -0
package/dist/types/stt/downloader.d.ts +5 -5
package/dist/types/task/executor.d.ts +6 -0
package/dist/types/task/persisted-revive.d.ts +36 -0
package/dist/types/tiny/models.d.ts +8 -0
package/dist/types/tools/builtin-names.d.ts +1 -1
package/dist/types/tools/index.d.ts +0 -1
package/package.json +12 -12
package/src/advisor/__tests__/advisor.test.ts +150 -50
package/src/advisor/advise-tool.ts +48 -6
package/src/advisor/runtime.ts +10 -3
package/src/auto-thinking/classifier.ts +12 -3
package/src/cli.ts +2 -2
package/src/commands/install.ts +3 -3
package/src/config/model-resolver.ts +28 -11
package/src/config/settings-schema.ts +0 -11
package/src/eval/agent-bridge.ts +2 -0
package/src/eval/js/context-manager.ts +2 -1
package/src/eval/js/shared/runtime.ts +189 -15
package/src/eval/js/worker-core.ts +19 -0
package/src/export/html/index.ts +1 -1
package/src/export/html/tool-views.generated.js +34 -35
package/src/extensibility/extensions/loader.ts +21 -9
package/src/goals/runtime.ts +1 -23
package/src/internal-urls/docs-index.generated.ts +4 -6
package/src/main.ts +20 -0
package/src/mcp/render.ts +11 -1
package/src/mcp/tool-bridge.ts +3 -0
package/src/modes/components/custom-editor.test.ts +63 -18
package/src/modes/components/custom-editor.ts +63 -15
package/src/modes/controllers/command-controller.ts +2 -2
package/src/modes/controllers/input-controller.ts +15 -9
package/src/modes/controllers/selector-controller.ts +13 -8
package/src/modes/controllers/tan-command-controller.ts +1 -0
package/src/modes/interactive-mode.ts +4 -2
package/src/modes/setup-wizard/wizard-overlay.ts +26 -4
package/src/modes/theme/mermaid-cache.ts +74 -11
package/src/modes/theme/theme.ts +14 -1
package/src/modes/types.ts +1 -1
package/src/prompts/system/system-prompt.md +2 -1
package/src/registry/agent-lifecycle.ts +60 -8
package/src/sdk.ts +20 -26
package/src/session/agent-session.ts +246 -78
package/src/session/artifacts.ts +19 -1
package/src/session/session-dump-format.ts +167 -23
package/src/session/session-entries.ts +4 -0
package/src/session/session-history-format.ts +37 -3
package/src/session/session-manager.ts +94 -4
package/src/slash-commands/builtin-registry.ts +4 -7
package/src/stt/asr-client.ts +6 -0
package/src/stt/downloader.ts +13 -6
package/src/stt/stt-controller.ts +52 -11
package/src/task/executor.ts +18 -2
package/src/task/index.ts +2 -2
package/src/task/persisted-revive.ts +128 -0
package/src/tiny/models.ts +10 -0
package/src/tiny/worker.ts +4 -3
package/src/tools/builtin-names.ts +0 -1
package/src/tools/index.ts +0 -4
package/src/tools/output-meta.ts +17 -3
package/src/utils/title-generator.ts +4 -4
package/dist/types/tools/render-mermaid.d.ts +0 -38
package/src/prompts/tools/render-mermaid.md +0 -9
package/src/tools/render-mermaid.ts +0 -69

package/src/advisor/__tests__/advisor.test.ts CHANGED Viewed

@@ -2,17 +2,18 @@ import { describe, expect, it, vi } from "bun:test";
 import type { AgentMessage } from "@oh-my-pi/pi-agent-core";
 import { createAdvisorMessageCard } from "../../modes/components/advisor-message";
 import { getThemeByName } from "../../modes/theme/theme";
-import { formatSessionDumpText } from "../../session/session-dump-format";
 import { formatSessionHistoryMarkdown } from "../../session/session-history-format";
 import { YieldQueue } from "../../session/yield-queue";
 import {
 	ADVISOR_READONLY_TOOL_NAMES,
 	AdviseTool,
 	type AdvisorAgent,
+	type AdvisorNote,
 	AdvisorRuntime,
 	type AdvisorRuntimeHost,
 	formatAdvisorBatchContent,
 	isInterruptingSeverity,
+	resolveAdvisorDeliveryChannel,
 } from "..";
 describe("advisor", () => {
@@ -52,7 +53,7 @@ describe("advisor", () => {
 				},
 				scheduleIdleFlush: () => {},
 			});
-			yq.register<{ note: string; severity?: "nit" | "concern" | "blocker" }>("advisor", {
+			yq.register<AdvisorNote>("advisor", {
 				build: entries =>
 					entries.length === 0
 						? null
@@ -62,9 +63,7 @@ describe("advisor", () => {
 								display: true,
 								attribution: "agent",
 								timestamp: Date.now(),
-								content:
-									"Advisor (a senior reviewer watching your work — weigh it, don't blindly obey):\n" +
-									entries.map(e => `- ${e.severity ? `[${e.severity}] ` : ""}${e.note}`).join("\n"),
+								content: formatAdvisorBatchContent(entries),
 							} as AgentMessage),
 			});
@@ -77,8 +76,9 @@ describe("advisor", () => {
 			expect(msg.role).toBe("custom");
 			expect(msg.customType).toBe("advisor");
 			expect(msg.display).toBe(true);
-			expect(msg.content).toContain("[blocker] second note");
-			expect(msg.content).toContain("- first note");
+			expect(msg.content).toContain("second note");
+			expect(msg.content).toContain('severity="blocker"');
+			expect(msg.content).toContain("first note");
 		});
 		it("skipIdleFlush prevents idle scheduling", () => {
@@ -124,15 +124,21 @@ describe("advisor", () => {
 			expect(isInterruptingSeverity(undefined)).toBe(false);
 		});
-		it("formats a batch with the advisor prefix and severity-tagged bullets", () => {
+		it("wraps each note in an advisory tag with severity as an attribute and escapes the body", () => {
 			const content = formatAdvisorBatchContent([
 				{ note: "first note" },
-				{ note: "second note", severity: "blocker" },
+				{ note: "second <note> & more", severity: "blocker" },
 			]);
-			const lines = content.split("\n");
-			expect(lines[0]).toContain("senior reviewer");
-			expect(lines[1]).toBe("- first note");
-			expect(lines[2]).toBe("- [blocker] second note");
+			// No-severity note: bare advisory tag (no severity attribute).
+			expect(content).toMatch(/<advisory guidance="[^"]*">\nfirst note\n<\/advisory>/);
+			// Severity rides an attribute, not an inline `[blocker]` tag or a bullet.
+			expect(content).toMatch(/<advisory severity="blocker" guidance="[^"]*">/);
+			expect(content).not.toContain("[blocker]");
+			expect(content).not.toContain("- first note");
+			// XML-significant characters in the body are escaped so they can't break the tag.
+			expect(content).toContain("second &lt;note&gt; &amp; more");
+			// Exactly one severity attribute (only the blocker note carries one).
+			expect(content.split('severity="').length - 1).toBe(1);
 		});
 	});
@@ -279,6 +285,56 @@ describe("advisor", () => {
 			expect(promptInputs[0]).not.toContain("note");
 		});
+		it("renders the watched delta with a heading, watched-role labels, and no inner ## headings", () => {
+			const promptInputs: string[] = [];
+			const agent = makeAgent(promptInputs);
+			const messages: AgentMessage[] = [
+				{ role: "user", content: "do the thing", timestamp: 1 } as AgentMessage,
+				{
+					role: "assistant",
+					content: [{ type: "toolCall", id: "a", name: "read", arguments: { path: "x.ts" } }],
+					timestamp: 2,
+				} as unknown as AgentMessage,
+				{
+					role: "toolResult",
+					toolCallId: "a",
+					toolName: "read",
+					content: [{ type: "text", text: "ok" }],
+					isError: false,
+					timestamp: 3,
+				} as AgentMessage,
+				{
+					role: "assistant",
+					content: [{ type: "toolCall", id: "b", name: "search", arguments: { pattern: "y" } }],
+					timestamp: 4,
+				} as unknown as AgentMessage,
+				{
+					role: "toolResult",
+					toolCallId: "b",
+					toolName: "search",
+					content: [{ type: "text", text: "ok" }],
+					isError: false,
+					timestamp: 5,
+				} as AgentMessage,
+			];
+			const host: AdvisorRuntimeHost = {
+				snapshotMessages: () => messages,
+				enqueueAdvice: () => {},
+			};
+			const runtime = new AdvisorRuntime(agent, host);
+			runtime.onTurnEnd();
+			expect(promptInputs).toHaveLength(1);
+			const prompt = promptInputs[0];
+			expect(prompt).toContain("### Session update");
+			expect(prompt).toContain("**user**:");
+			expect(prompt).toContain("**agent**:");
+			// Inner role headings would collide with the advisor's own turns in the dump.
+			expect(prompt).not.toContain("## assistant");
+			expect(prompt).not.toContain("## user");
+			// Consecutive assistant tool-call messages collapse under a single label.
+			expect(prompt.split("**agent**:").length - 1).toBe(1);
+		});
 		it("handles compaction shrink without prompting", () => {
 			const promptInputs: string[] = [];
 			const agent = makeAgent(promptInputs);
@@ -584,47 +640,91 @@ describe("advisor", () => {
 			expect(text).toContain("truncated.");
 		});
 	});
-	describe("formatSessionDumpText raw thinking", () => {
-		it("does not nest literal thinking envelopes", () => {
-			const md = formatSessionDumpText({
-				messages: [
-					{
-						role: "assistant",
-						content: [
-							{
-								type: "thinking",
-								thinking: "<thinking>\nCheck logs before accepting container health.\n</thinking>",
-							},
-						],
-						timestamp: Date.now(),
-					} as AgentMessage,
-				],
-				thinkingLevel: "high",
-			});
-			expect(md).toContain("Assistant: <thinking>\nCheck logs before accepting container health.\n</thinking>");
-			expect(md).not.toContain("<thinking>\n<thinking>");
+	// Regression: the advisor must not withhold interrupting advice from a turn
+	// that is actively streaming again after a user interrupt. The latch only
+	// guards auto-resume of a stopped/idle run; parking a note mid-stream stranded
+	// it (the agent never heard it) and dumped the backlog as one burst at the next
+	// user prompt. See the 7-concern same-instant burst in session 019ed1dd.
+	//
+	// `streaming` here means the live agent-CORE loop (agent.state.isStreaming) —
+	// NOT session `isStreaming`, which also counts `#promptInFlightCount` during
+	// post-turn unwind. Only a running core loop consumes a steer; in the unwind
+	// window (`streaming: false`) a suppressed note must `preserve`, never `steer`,
+	// or it strands and #drainStrandedQueuedMessages auto-resumes it. Do not swap
+	// the call site back to session `isStreaming`.
+	describe("resolveAdvisorDeliveryChannel", () => {
+		it("routes a non-interrupting nit to the aside queue regardless of state", () => {
+			expect(
+				resolveAdvisorDeliveryChannel({
+					severity: "nit",
+					autoResumeSuppressed: true,
+					streaming: true,
+					aborting: true,
+				}),
+			).toBe("aside");
+			expect(
+				resolveAdvisorDeliveryChannel({
+					severity: undefined,
+					autoResumeSuppressed: false,
+					streaming: false,
+					aborting: false,
+				}),
+			).toBe("aside");
 		});
-		it("unwraps sibling literal thinking envelopes independently", () => {
-			const md = formatSessionDumpText({
-				messages: [
-					{
-						role: "assistant",
-						content: [
-							{ type: "thinking", thinking: "<thinking>\nfirst\n</thinking>" },
-							{ type: "toolCall", id: "tc-1", name: "read", arguments: { path: "file.ts" } },
-							{ type: "thinking", thinking: "<thinking>\nsecond\n</thinking>" },
-						],
-						timestamp: Date.now(),
-					} as AgentMessage,
-				],
-				tools: [{ name: "read", description: "Read a file", parameters: { type: "object" } }],
-				thinkingLevel: "high",
-			});
+		it("steers concern/blocker when no user interrupt is in effect", () => {
+			for (const severity of ["concern", "blocker"] as const) {
+				for (const streaming of [true, false]) {
+					expect(
+						resolveAdvisorDeliveryChannel({
+							severity,
+							autoResumeSuppressed: false,
+							streaming,
+							aborting: false,
+						}),
+					).toBe("steer");
+				}
+			}
+		});
+		it("preserves an interrupting note while suppressed AND idle (no auto-resume of a stopped run)", () => {
+			for (const severity of ["concern", "blocker"] as const) {
+				expect(
+					resolveAdvisorDeliveryChannel({
+						severity,
+						autoResumeSuppressed: true,
+						streaming: false,
+						aborting: false,
+					}),
+				).toBe("preserve");
+			}
+		});
+		it("preserves an interrupting note while suppressed AND aborting, even though the turn still reports streaming", () => {
+			// Mid-abort teardown: steering would land after #extractQueuedAdvisorCards
+			// and could auto-resume on the stranded steer. Keep parking it.
+			expect(
+				resolveAdvisorDeliveryChannel({
+					severity: "blocker",
+					autoResumeSuppressed: true,
+					streaming: true,
+					aborting: true,
+				}),
+			).toBe("preserve");
+		});
-			expect(md).toContain("Assistant: <thinking>\nfirst\nsecond\n</thinking>");
-			expect(md).not.toContain("first\n</thinking>\n<thinking>\nsecond");
+		it("steers an interrupting note while suppressed once a turn is streaming again and not aborting (the fix)", () => {
+			for (const severity of ["concern", "blocker"] as const) {
+				expect(
+					resolveAdvisorDeliveryChannel({
+						severity,
+						autoResumeSuppressed: true,
+						streaming: true,
+						aborting: false,
+					}),
+				).toBe("steer");
+			}
 		});
 	});
 });

package/src/advisor/advise-tool.ts CHANGED Viewed

@@ -1,4 +1,5 @@
 import type { AgentTool, AgentToolContext, AgentToolResult, AgentToolUpdateCallback } from "@oh-my-pi/pi-agent-core";
+import { escapeXmlText } from "@oh-my-pi/pi-utils";
 import { z } from "zod/v4";
 import adviseDescription from "../prompts/advisor/advise-tool.md" with { type: "text" };
@@ -33,15 +34,26 @@ export interface AdvisorMessageDetails {
 }
 /**
- * Prose framing prepended to every batched advisor message. Kept here so the
- * non-interrupting YieldQueue dispatcher and the interrupting steer path build
- * byte-identical content.
+ * Behavioral framing for the watched agent — advice, not orders. Carried as a
+ * tag attribute (rather than a prose header) so the rendered agent-facing output
+ * stays a clean `<advisory>` block. The primary agent's system prompt never
+ * mentions advisories, so this is its only cue for how to treat them.
  */
-const ADVISOR_BATCH_PREFIX = "Advisor (a senior reviewer watching your work — weigh it, don't blindly obey):";
+const ADVISOR_GUIDANCE = "weigh, don't blindly obey";
-/** Render one advisor card body from a batch of notes (prefix + one bullet per note). */
+/**
+ * Render a batch of advisor notes as the agent-facing message body: one
+ * `<advisory>` element per note, severity as an attribute. Shared by the
+ * non-interrupting YieldQueue dispatcher and the interrupting steer path so both
+ * build byte-identical content.
+ */
 export function formatAdvisorBatchContent(notes: readonly AdvisorNote[]): string {
-	return `${ADVISOR_BATCH_PREFIX}\n${notes.map(n => `- ${n.severity ? `[${n.severity}] ` : ""}${n.note}`).join("\n")}`;
+	return notes
+		.map(n => {
+			const severity = n.severity ? ` severity="${n.severity}"` : "";
+			return `<advisory${severity} guidance="${ADVISOR_GUIDANCE}">\n${escapeXmlText(n.note)}\n</advisory>`;
+		})
+		.join("\n");
 }
 /**
@@ -54,6 +66,36 @@ export function isInterruptingSeverity(severity: AdvisorSeverity | undefined): b
 	return severity === "concern" || severity === "blocker";
 }
+/** How an advisor note is routed to the primary. */
+export type AdvisorDeliveryChannel = "aside" | "steer" | "preserve";
+/**
+ * Decide how one advisor note reaches the primary agent.
+ *
+ * - A non-interrupting `nit` always rides the non-interrupting aside queue.
+ * - An interrupting `concern`/`blocker` is normally steered into the agent: into
+ *   the live turn while one is streaming, or (when idle) a triggered turn so the
+ *   advice is acted on immediately.
+ * - After a deliberate user interrupt (`autoResumeSuppressed`) the advisor must
+ *   not auto-resume the stopped run. While the agent is idle — or still tearing
+ *   the interrupted turn down (`aborting`) — the note is preserved as a visible
+ *   card instead of restarting the run. But once a turn is actively streaming
+ *   again (a resume the user already drove), steering the note in does NOT
+ *   auto-resume anything, so it is delivered live. Parking it during an active
+ *   run instead strands it (it never reaches the running agent) and the withheld
+ *   notes dump as one burst at the next user prompt — the bug this guards.
+ */
+export function resolveAdvisorDeliveryChannel(opts: {
+	severity: AdvisorSeverity | undefined;
+	autoResumeSuppressed: boolean;
+	streaming: boolean;
+	aborting: boolean;
+}): AdvisorDeliveryChannel {
+	if (!isInterruptingSeverity(opts.severity)) return "aside";
+	if (opts.autoResumeSuppressed && (opts.aborting || !opts.streaming)) return "preserve";
+	return "steer";
+}
 /**
  * Side-effect-free investigation tools handed to the advisor agent so it can
  * inspect the workspace before weighing in. Names match the primary session's

package/src/advisor/runtime.ts CHANGED Viewed

@@ -157,8 +157,13 @@ export class AdvisorRuntime {
 			.filter(m => !(m.role === "custom" && (m as { customType?: string }).customType === "advisor"));
 		this.#lastCount = all.length;
 		if (delta.length === 0) return null;
-		const md = formatSessionHistoryMarkdown(delta, { includeThinking: true, includeToolIntent: true });
-		return md.trim() ? md : null;
+		const md = formatSessionHistoryMarkdown(delta, {
+			includeThinking: true,
+			includeToolIntent: true,
+			watchedRoles: true,
+		});
+		if (!md.trim()) return null;
+		return `### Session update\n\n${md}`;
 	}
 	#notifyWaiters(): void {
@@ -182,7 +187,9 @@ export class AdvisorRuntime {
 		try {
 			while (!this.disposed && this.#pending.length) {
 				const popped = this.#pending.splice(0);
-				const candidateBatch = popped.map(b => b.text).join("\n\n---\n\n");
+				// Each delta already opens with a `### Session update` heading, so
+				// join with a blank line rather than a `---` rule.
+				const candidateBatch = popped.map(b => b.text).join("\n\n");
 				const turnsCovered = popped.reduce((sum, b) => sum + b.turns, 0);
 				const incomingTokens = estimateTokens({
 					role: "user",

package/src/auto-thinking/classifier.ts CHANGED Viewed

@@ -22,7 +22,11 @@ import type { Settings } from "../config/settings";
 import difficultySystemPrompt from "../prompts/system/auto-thinking-difficulty.md" with { type: "text" };
 import difficultyLocalPrompt from "../prompts/system/auto-thinking-difficulty-local.md" with { type: "text" };
 import { clampAutoThinkingEffort } from "../thinking";
-import { isTinyMemoryLocalModelKey, ONLINE_AUTO_THINKING_MODEL_KEY } from "../tiny/models";
+import {
+	isTinyMemoryLocalModelKey,
+	isTinyMemoryReasoningModelKey,
+	ONLINE_AUTO_THINKING_MODEL_KEY,
+} from "../tiny/models";
 import { tinyModelClient } from "../tiny/title-client";
 const DIFFICULTY_SYSTEM_PROMPT = prompt.render(difficultySystemPrompt);
@@ -31,8 +35,10 @@ const DIFFICULTY_SYSTEM_PROMPT = prompt.render(difficultySystemPrompt);
 const MAX_INPUT_CHARS = 6000;
 const HEAD_CHARS = 4000;
 const TAIL_CHARS = 2000;
-/** The answer is a single word; keep budgets tiny for non-reasoning backends. */
+/** The online answer is a single word; keep budgets tiny for non-reasoning backends. */
 const ANSWER_MAX_TOKENS = 8;
+/** Local classifiers occasionally need more room for chat-template boilerplate. */
+const LOCAL_ANSWER_MAX_TOKENS = 16;
 /**
  * Reasoning backends ignore `disableReasoning` on some providers, so reserve
  * enough output room for the keyword to still land after unavoidable thinking.
@@ -107,9 +113,12 @@ async function classifyLocal(input: string, modelKey: string, deps: ClassifyDiff
 	if (!isTinyMemoryLocalModelKey(modelKey)) {
 		throw new Error(`auto-thinking: unsupported local classifier model: ${modelKey}`);
 	}
+	const maxTokens = isTinyMemoryReasoningModelKey(modelKey)
+		? Math.max(LOCAL_ANSWER_MAX_TOKENS, REASONING_SAFE_MAX_TOKENS)
+		: LOCAL_ANSWER_MAX_TOKENS;
 	const builtPrompt = prompt.render(difficultyLocalPrompt, { prompt: input });
 	const text = await tinyModelClient.complete(modelKey, builtPrompt, {
-		maxTokens: ANSWER_MAX_TOKENS,
+		maxTokens,
 		signal: deps.signal,
 	});
 	if (!text) {

package/src/cli.ts CHANGED Viewed

@@ -109,8 +109,8 @@ async function runWorkerEntrypoint(arg: string | undefined): Promise<boolean> {
 		// this dispatch completes — so anything the parent posted right after
 		// spawning (the smoke ping, the first parse request) would be dropped.
 		// Park early events and replay them once the module's handler is live.
-		// (The tab/eval workers are immune: `parentPort.on("message")` queues
-		// until a listener attaches.)
+		// Worker-thread entries using `parentPort` need the same sync-prefix
+		// buffering; the tab/eval cases install that inbox below before import.
 		const scope = globalThis as unknown as { onmessage: ((event: MessageEvent) => void) | null };
 		const pending: MessageEvent[] = [];
 		const buffer = (event: MessageEvent): void => {

package/src/commands/install.ts CHANGED Viewed

@@ -28,13 +28,13 @@ import { initTheme } from "../modes/theme/theme";
  * Heuristic used to decide whether `omp install <target>` should `link` a
  * local directory or `install` a remote spec. Exported for tests.
  */
-export function looksLikeLocalPath(target: string): boolean {
+export function looksLikeLocalPath(target: string, cwd?: string): boolean {
 	if (target.startsWith(".") || target.startsWith("/") || target.startsWith("~")) return true;
 	// Windows drive prefix (e.g. `C:\foo`).
 	if (/^[a-zA-Z]:[\\/]/.test(target)) return true;
-	// Bare names that happen to exist as a local directory.
+	// Bare names that happen to exist as a local directory (relative to `cwd`).
 	try {
-		return existsSync(path.resolve(target));
+		return existsSync(cwd ? path.resolve(cwd, target) : path.resolve(target));
 	} catch {
 		return false;
 	}

package/src/config/model-resolver.ts CHANGED Viewed

@@ -34,17 +34,36 @@ import { isAuthenticated, kNoAuth, type ModelRegistry } from "./model-registry";
 import { MODEL_ROLE_IDS, type ModelRole } from "./model-roles";
 import type { Settings } from "./settings";
+function isKnownProvider(provider: string): provider is KnownProvider {
+	return provider in DEFAULT_MODEL_PER_PROVIDER;
+}
 /**
- * Pick the first available model matching a known provider's default id
- * (catalog table order), falling back to the first available model.
+ * Pick the first provider-default model in availability order.
+ *
+ * If multiple providers expose that same default id, rank only that shared-id
+ * group by canonical provider priority so native/OAuth transports beat mirrors
+ * without changing unrelated provider fallback precedence.
  */
-function pickDefaultAvailableModel(availableModels: Model<Api>[]): Model<Api> | undefined {
-	for (const provider of Object.keys(DEFAULT_MODEL_PER_PROVIDER) as KnownProvider[]) {
-		const defaultId = DEFAULT_MODEL_PER_PROVIDER[provider];
-		const match = availableModels.find(m => m.provider === provider && m.id === defaultId);
-		if (match) return match;
-	}
-	return availableModels[0];
+export function pickDefaultAvailableModel(availableModels: Model<Api>[]): Model<Api> | undefined {
+	const firstDefault = availableModels.find(
+		model => isKnownProvider(model.provider) && DEFAULT_MODEL_PER_PROVIDER[model.provider] === model.id,
+	);
+	if (!firstDefault) return availableModels[0];
+	const providerPriority = buildModelProviderPriorityRank();
+	const sharedDefaultMatches = availableModels.filter(
+		model =>
+			model.id === firstDefault.id &&
+			isKnownProvider(model.provider) &&
+			DEFAULT_MODEL_PER_PROVIDER[model.provider] === model.id,
+	);
+	return [...sharedDefaultMatches].sort((a, b) => {
+		const aRank = providerPriority.get(a.provider.toLowerCase()) ?? Number.POSITIVE_INFINITY;
+		const bRank = providerPriority.get(b.provider.toLowerCase()) ?? Number.POSITIVE_INFINITY;
+		if (aRank !== bRank) return aRank - bRank;
+		return availableModels.indexOf(a) - availableModels.indexOf(b);
+	})[0];
 }
 export interface ScopedModel {
@@ -464,12 +483,10 @@ function matchModel(
 	context: ModelPreferenceContext,
 	options?: { modelRegistry?: CanonicalModelRegistry },
 ): Model<Api> | undefined {
-	// Explicit provider/model selectors always bypass canonical coalescing.
 	const exactRefMatch = findExactModelReferenceMatch(modelPattern, availableModels);
 	if (exactRefMatch) {
 		return exactRefMatch;
 	}
 	// Exact canonical ids coalesce provider variants before bare-id matching.
 	const exactCanonicalMatch = findExactCanonicalModelMatch(modelPattern, availableModels, options?.modelRegistry);
 	if (exactCanonicalMatch) {

package/src/config/settings-schema.ts CHANGED Viewed

@@ -3070,17 +3070,6 @@ export const SETTINGS_SCHEMA = {
 	// Optional tools
-	"renderMermaid.enabled": {
-		type: "boolean",
-		default: false,
-		ui: {
-			tab: "tools",
-			group: "Available Tools",
-			label: "Render Mermaid",
-			description: "Enable the render_mermaid tool for Mermaid-to-ASCII rendering",
-		},
-	},
 	"debug.enabled": {
 		type: "boolean",
 		default: true,

package/src/eval/agent-bridge.ts CHANGED Viewed

@@ -10,6 +10,7 @@ import { resolveAgentModelPatterns } from "../config/model-resolver";
 import type { LocalProtocolOptions } from "../internal-urls";
 import { MCPManager } from "../mcp/manager";
 import subagentUserPromptTemplate from "../prompts/system/subagent-user-prompt.md" with { type: "text" };
+import { MAIN_AGENT_ID } from "../registry/agent-registry";
 import * as taskDiscovery from "../task/discovery";
 import * as taskExecutor from "../task/executor";
 import { AgentOutputManager } from "../task/output-manager";
@@ -288,6 +289,7 @@ export async function runEvalAgent(args: unknown, options: EvalAgentBridgeOption
 			parentHindsightSessionState: options.session.getHindsightSessionState?.(),
 			parentMnemopiSessionState: options.session.getMnemopiSessionState?.(),
 			parentTelemetry: options.session.getTelemetry?.(),
+			parentAgentId: options.session.getAgentId?.() ?? MAIN_AGENT_ID,
 			// Deliberately omit parentEvalSessionId: the parent's Python kernel is
 			// blocked on this bridge call, so sharing the eval session would deadlock
 			// (subagent queues behind the parent's in-flight execution, parent waits

package/src/eval/js/context-manager.ts CHANGED Viewed

@@ -564,7 +564,7 @@ function spawnInlineWorker(): WorkerHandle {
 		},
 		close: () => {},
 	};
-	new WorkerCore(workerTransport);
+	const core = new WorkerCore(workerTransport);
 	return {
 		mode: "inline",
 		send: msg =>
@@ -600,6 +600,7 @@ function spawnInlineWorker(): WorkerHandle {
 		async terminate() {
 			hostListeners.clear();
 			workerListeners.clear();
+			core.dispose();
 		},
 	};
 }