npm - @oh-my-pi/pi-coding-agent - Versions diffs - 15.10.0 → 15.10.2 - Mend

@oh-my-pi/pi-coding-agent 15.10.0 → 15.10.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (238) hide show

package/CHANGELOG.md +142 -1
package/dist/types/cli/dry-balance-cli.d.ts +15 -1
package/dist/types/cli/startup-cwd.d.ts +2 -0
package/dist/types/commands/launch.d.ts +3 -0
package/dist/types/commit/analysis/conventional.d.ts +2 -2
package/dist/types/commit/analysis/summary.d.ts +2 -2
package/dist/types/commit/changelog/generate.d.ts +2 -2
package/dist/types/commit/changelog/index.d.ts +2 -2
package/dist/types/commit/map-reduce/index.d.ts +3 -3
package/dist/types/commit/map-reduce/map-phase.d.ts +2 -2
package/dist/types/commit/map-reduce/reduce-phase.d.ts +2 -2
package/dist/types/commit/model-selection.d.ts +10 -4
package/dist/types/config/api-key-resolver.d.ts +34 -0
package/dist/types/config/keybindings.d.ts +2 -2
package/dist/types/config/model-provider-priority.d.ts +1 -0
package/dist/types/config/model-registry.d.ts +17 -1
package/dist/types/config/model-resolver.d.ts +4 -1
package/dist/types/config/settings-schema.d.ts +9 -0
package/dist/types/config/settings.d.ts +7 -2
package/dist/types/dap/config.d.ts +14 -1
package/dist/types/dap/types.d.ts +10 -0
package/dist/types/debug/report-bundle.d.ts +3 -0
package/dist/types/edit/file-snapshot-store.d.ts +18 -10
package/dist/types/eval/py/__tests__/prelude.test.d.ts +1 -0
package/dist/types/extensibility/extensions/types.d.ts +4 -1
package/dist/types/lsp/client.d.ts +10 -0
package/dist/types/lsp/utils.d.ts +3 -2
package/dist/types/main.d.ts +3 -9
package/dist/types/mcp/tool-bridge.d.ts +2 -0
package/dist/types/modes/components/chat-block.d.ts +64 -0
package/dist/types/modes/components/custom-editor.d.ts +4 -1
package/dist/types/modes/components/overlay-box.d.ts +17 -0
package/dist/types/modes/components/plan-review-overlay.d.ts +59 -0
package/dist/types/modes/components/plan-toc.d.ts +41 -0
package/dist/types/modes/components/read-tool-group.d.ts +2 -0
package/dist/types/modes/components/status-line.d.ts +2 -0
package/dist/types/modes/components/transcript-container.d.ts +11 -0
package/dist/types/modes/controllers/command-controller.d.ts +1 -0
package/dist/types/modes/controllers/event-controller.d.ts +17 -1
package/dist/types/modes/controllers/extension-ui-controller.d.ts +0 -1
package/dist/types/modes/controllers/input-controller.d.ts +1 -1
package/dist/types/modes/controllers/streaming-reveal.d.ts +22 -0
package/dist/types/modes/controllers/tan-command-controller.d.ts +6 -0
package/dist/types/modes/interactive-mode.d.ts +16 -5
package/dist/types/modes/magic-keywords.d.ts +1 -1
package/dist/types/modes/markdown-prose.d.ts +1 -1
package/dist/types/modes/theme/theme.d.ts +1 -1
package/dist/types/modes/types.d.ts +21 -5
package/dist/types/modes/utils/copy-targets.d.ts +21 -1
package/dist/types/modes/workflow.d.ts +3 -3
package/dist/types/plan-mode/approved-plan.d.ts +27 -8
package/dist/types/plan-mode/plan-protection.d.ts +4 -4
package/dist/types/sdk.d.ts +2 -0
package/dist/types/session/agent-session.d.ts +21 -0
package/dist/types/session/auth-storage.d.ts +1 -1
package/dist/types/session/messages.d.ts +12 -0
package/dist/types/session/session-manager.d.ts +8 -3
package/dist/types/slash-commands/types.d.ts +4 -6
package/dist/types/task/executor.d.ts +17 -0
package/dist/types/task/index.d.ts +1 -0
package/dist/types/task/render.d.ts +3 -2
package/dist/types/tools/archive-reader.d.ts +5 -0
package/dist/types/tools/ast-edit.d.ts +3 -0
package/dist/types/tools/ast-grep.d.ts +3 -0
package/dist/types/tools/bash.d.ts +1 -0
package/dist/types/tools/eval.d.ts +8 -0
package/dist/types/tools/find.d.ts +8 -4
package/dist/types/tools/gh-cache-invalidation.d.ts +6 -0
package/dist/types/tools/github-cache.d.ts +12 -0
package/dist/types/tools/grouped-file-output.d.ts +95 -12
package/dist/types/tools/memory-render.d.ts +4 -1
package/dist/types/tools/path-utils.d.ts +8 -0
package/dist/types/tools/plan-mode-guard.d.ts +8 -9
package/dist/types/tools/render-utils.d.ts +5 -9
package/dist/types/tools/search.d.ts +6 -2
package/dist/types/tools/sqlite-reader.d.ts +1 -0
package/dist/types/tools/todo.d.ts +3 -2
package/dist/types/tools/write.d.ts +3 -0
package/dist/types/tools/yield.d.ts +8 -0
package/dist/types/tui/output-block.d.ts +16 -4
package/dist/types/tui/status-line.d.ts +3 -0
package/dist/types/utils/enhanced-paste.d.ts +20 -0
package/dist/types/web/search/providers/kimi.d.ts +1 -1
package/package.json +9 -9
package/src/auto-thinking/classifier.ts +5 -1
package/src/cli/args.ts +3 -1
package/src/cli/dry-balance-cli.ts +54 -21
package/src/cli/gallery-cli.ts +4 -1
package/src/cli/gallery-fixtures/misc.ts +29 -0
package/src/cli/startup-cwd.ts +68 -0
package/src/commands/launch.ts +3 -0
package/src/commit/analysis/conventional.ts +2 -2
package/src/commit/analysis/summary.ts +2 -2
package/src/commit/changelog/generate.ts +2 -2
package/src/commit/changelog/index.ts +2 -2
package/src/commit/map-reduce/index.ts +3 -3
package/src/commit/map-reduce/map-phase.ts +2 -2
package/src/commit/map-reduce/reduce-phase.ts +2 -2
package/src/commit/model-selection.ts +36 -11
package/src/commit/pipeline.ts +4 -4
package/src/config/api-key-resolver.ts +58 -0
package/src/config/model-provider-priority.ts +55 -0
package/src/config/model-registry.ts +29 -24
package/src/config/model-resolver.ts +39 -7
package/src/config/settings-schema.ts +10 -0
package/src/config/settings.ts +106 -43
package/src/dap/config.ts +41 -2
package/src/dap/defaults.json +1 -0
package/src/dap/session.ts +1 -0
package/src/dap/types.ts +10 -0
package/src/debug/index.ts +47 -53
package/src/debug/raw-sse-buffer.ts +7 -4
package/src/debug/report-bundle.ts +9 -0
package/src/edit/file-snapshot-store.ts +33 -1
package/src/edit/hashline/filesystem.ts +2 -1
package/src/edit/renderer.ts +82 -78
package/src/eval/__tests__/llm-bridge.test.ts +110 -31
package/src/eval/js/context-manager.ts +32 -15
package/src/eval/llm-bridge.ts +22 -6
package/src/eval/py/__tests__/prelude.test.ts +19 -0
package/src/eval/py/executor.ts +23 -11
package/src/eval/py/prelude.py +1 -1
package/src/extensibility/extensions/types.ts +10 -1
package/src/goals/tools/goal-tool.ts +36 -26
package/src/internal-urls/docs-index.generated.ts +8 -8
package/src/lsp/client.ts +23 -11
package/src/lsp/config.ts +11 -1
package/src/lsp/index.ts +61 -9
package/src/lsp/utils.ts +3 -2
package/src/main.ts +100 -72
package/src/mcp/tool-bridge.ts +2 -0
package/src/memories/index.ts +14 -7
package/src/mnemopi/backend.ts +5 -1
package/src/modes/acp/acp-agent.ts +33 -26
package/src/modes/components/assistant-message.ts +2 -9
package/src/modes/components/chat-block.ts +111 -0
package/src/modes/components/copy-selector.ts +1 -44
package/src/modes/components/custom-editor.ts +164 -109
package/src/modes/components/custom-message.ts +1 -3
package/src/modes/components/execution-shared.ts +1 -2
package/src/modes/components/hook-message.ts +1 -3
package/src/modes/components/model-selector.ts +59 -13
package/src/modes/components/oauth-selector.ts +33 -7
package/src/modes/components/overlay-box.ts +108 -0
package/src/modes/components/plan-review-overlay.ts +799 -0
package/src/modes/components/plan-toc.ts +138 -0
package/src/modes/components/read-tool-group.ts +20 -4
package/src/modes/components/skill-message.ts +0 -1
package/src/modes/components/status-line.ts +19 -4
package/src/modes/components/tips.txt +2 -1
package/src/modes/components/todo-reminder.ts +0 -2
package/src/modes/components/tool-execution.ts +68 -88
package/src/modes/components/transcript-container.ts +84 -24
package/src/modes/components/user-message.ts +2 -3
package/src/modes/controllers/command-controller-shared.ts +7 -6
package/src/modes/controllers/command-controller.ts +57 -55
package/src/modes/controllers/event-controller.ts +67 -40
package/src/modes/controllers/extension-ui-controller.ts +10 -73
package/src/modes/controllers/input-controller.ts +170 -126
package/src/modes/controllers/mcp-command-controller.ts +69 -60
package/src/modes/controllers/selector-controller.ts +23 -25
package/src/modes/controllers/streaming-reveal.ts +212 -0
package/src/modes/controllers/tan-command-controller.ts +173 -0
package/src/modes/interactive-mode.ts +274 -112
package/src/modes/magic-keywords.ts +1 -1
package/src/modes/markdown-prose.ts +1 -1
package/src/modes/setup-wizard/wizard-overlay.ts +1 -1
package/src/modes/theme/shimmer.ts +20 -9
package/src/modes/theme/theme-schema.json +1 -1
package/src/modes/theme/theme.ts +8 -4
package/src/modes/types.ts +21 -7
package/src/modes/utils/copy-targets.ts +133 -27
package/src/modes/utils/ui-helpers.ts +44 -46
package/src/modes/workflow.ts +10 -10
package/src/plan-mode/approved-plan.ts +66 -43
package/src/plan-mode/plan-protection.ts +4 -4
package/src/prompts/system/background-tan-dispatch.md +8 -0
package/src/prompts/system/plan-mode-active.md +67 -58
package/src/prompts/system/plan-mode-approved.md +1 -1
package/src/prompts/system/workflow-notice.md +1 -1
package/src/prompts/tools/bash.md +9 -0
package/src/prompts/tools/browser.md +1 -1
package/src/prompts/tools/eval.md +2 -1
package/src/prompts/tools/read.md +2 -2
package/src/sdk.ts +37 -46
package/src/session/agent-session.ts +119 -18
package/src/session/auth-storage.ts +2 -0
package/src/session/messages.ts +26 -0
package/src/session/session-manager.ts +109 -28
package/src/slash-commands/builtin-registry.ts +36 -9
package/src/slash-commands/types.ts +4 -6
package/src/task/executor.ts +76 -38
package/src/task/index.ts +4 -0
package/src/task/render.ts +211 -147
package/src/tools/archive-reader.ts +64 -0
package/src/tools/ask.ts +119 -164
package/src/tools/ast-edit.ts +98 -71
package/src/tools/ast-grep.ts +37 -43
package/src/tools/bash.ts +57 -6
package/src/tools/browser/tab-supervisor.ts +13 -1
package/src/tools/browser/tab-worker.ts +33 -4
package/src/tools/debug.ts +20 -8
package/src/tools/eval.ts +13 -2
package/src/tools/fetch.ts +297 -7
package/src/tools/find.ts +51 -30
package/src/tools/gh-cache-invalidation.ts +200 -0
package/src/tools/gh-renderer.ts +81 -42
package/src/tools/github-cache.ts +25 -0
package/src/tools/grouped-file-output.ts +272 -48
package/src/tools/image-gen.ts +150 -103
package/src/tools/inspect-image-renderer.ts +63 -41
package/src/tools/inspect-image.ts +10 -3
package/src/tools/job.ts +3 -4
package/src/tools/memory-render.ts +4 -1
package/src/tools/path-utils.ts +28 -2
package/src/tools/plan-mode-guard.ts +66 -39
package/src/tools/read.ts +48 -28
package/src/tools/render-utils.ts +21 -37
package/src/tools/resolve.ts +14 -0
package/src/tools/search-tool-bm25.ts +36 -23
package/src/tools/search.ts +118 -81
package/src/tools/sqlite-reader.ts +9 -12
package/src/tools/todo.ts +118 -52
package/src/tools/write.ts +83 -64
package/src/tools/yield.ts +10 -1
package/src/tui/output-block.ts +60 -13
package/src/tui/status-line.ts +5 -1
package/src/utils/commit-message-generator.ts +11 -3
package/src/utils/enhanced-paste.ts +230 -0
package/src/utils/title-generator.ts +2 -1
package/src/web/search/providers/anthropic.ts +25 -19
package/src/web/search/providers/codex.ts +37 -8
package/src/web/search/providers/exa.ts +11 -3
package/src/web/search/providers/kimi.ts +28 -17
package/src/web/search/providers/parallel.ts +35 -24
package/src/web/search/providers/synthetic.ts +8 -6
package/src/web/search/providers/tavily.ts +9 -8
package/src/web/search/providers/zai.ts +8 -6

package/src/eval/__tests__/llm-bridge.test.ts CHANGED Viewed

@@ -4,6 +4,7 @@ import type { Api, AssistantMessage, Model } from "@oh-my-pi/pi-ai";
 import * as ai from "@oh-my-pi/pi-ai";
 import { Effort } from "@oh-my-pi/pi-ai";
 import { TempDir } from "@oh-my-pi/pi-utils";
+import { $ } from "bun";
 import type { ModelRegistry } from "../../config/model-registry";
 import { Settings } from "../../config/settings";
 import type { ToolSession } from "../../tools";
@@ -13,7 +14,7 @@ import { IdleTimeout } from "../idle-timeout";
 import { disposeAllVmContexts } from "../js/context-manager";
 import { executeJs } from "../js/executor";
 import { runEvalLlm } from "../llm-bridge";
-import { disposeAllKernelSessions, executePython } from "../py/executor";
+import { disposeAllKernelSessions, type PythonResult } from "../py/executor";
 function makeModel(provider: string, id: string, extra: Partial<Model<Api>> = {}): Model<Api> {
 	return {
@@ -57,6 +58,7 @@ function makeSession(opts: SessionOptions = {}): ToolSession {
 	const modelRegistry = {
 		getAvailable: () => opts.available ?? [SMOL, DEFAULT, SLOW],
 		getApiKey: async () => (opts.apiKey === undefined ? "test-key" : opts.apiKey),
+		resolver: () => async () => (opts.apiKey === undefined ? "test-key" : opts.apiKey),
 	} as unknown as ModelRegistry;
 	return {
 		settings,
@@ -96,6 +98,77 @@ function assistant(opts: {
 	};
 }
+async function runPythonLlmInSubprocess(options: { structured: boolean; tempDir: TempDir }): Promise<PythonResult> {
+	const repoRoot = path.resolve(import.meta.dir, "../../../..");
+	const scriptPath = path.join(options.tempDir.path(), "run-python-llm.ts");
+	const resultPath = path.join(options.tempDir.path(), "python-llm-result.json");
+	const aiPath = path.resolve(import.meta.dir, "../../../../ai/src/index.ts");
+	const executorPath = path.resolve(import.meta.dir, "../py/executor.ts");
+	const settingsPath = path.resolve(import.meta.dir, "../../config/settings.ts");
+	const code = options.structured
+		? 'import json\nprint(json.dumps(llm("hi", schema={"type": "object"})))'
+		: 'print(llm("hi", model="smol"))';
+	const responseContent = options.structured
+		? '[{ type: "toolCall", id: "tc-1", name: "respond", arguments: { ok: true } }]'
+		: '[{ type: "text", text: "hello from python" }]';
+	await Bun.write(
+		scriptPath,
+		`
+import { vi } from "bun:test";
+import * as ai from ${JSON.stringify(aiPath)};
+import { executePython } from ${JSON.stringify(executorPath)};
+import { Settings } from ${JSON.stringify(settingsPath)};
+const SMOL = {
+	id: "smol",
+	name: "smol",
+	api: "openai-responses",
+	provider: "p",
+	baseUrl: "https://example.test/v1",
+	reasoning: false,
+	input: ["text"],
+	cost: { input: 1, output: 1, cacheRead: 0, cacheWrite: 1 },
+	contextWindow: 128000,
+	maxTokens: 4096,
+};
+const settings = Settings.isolated({ "async.enabled": false, "task.isolation.mode": "none" });
+settings.setModelRole("smol", "p/smol");
+settings.setModelRole("slow", "p/slow");
+const session = {
+	settings,
+	modelRegistry: {
+		getAvailable: () => [SMOL],
+		getApiKey: async () => "test-key",
+		resolver: () => async () => "test-key",
+	},
+	getActiveModelString: () => "p/smol",
+};
+vi.spyOn(ai, "completeSimple").mockResolvedValue({
+	role: "assistant",
+	api: "openai-responses",
+	provider: "p",
+	model: "smol",
+	stopReason: "stop",
+	content: ${responseContent},
+});
+const result = await executePython(${JSON.stringify(code)}, {
+	cwd: ${JSON.stringify(options.tempDir.path())},
+	sessionId: ${JSON.stringify(`py-llm:${options.structured ? "struct" : "plain"}`)},
+	sessionFile: ${JSON.stringify(path.join(options.tempDir.path(), "session.jsonl"))},
+	toolSession: session,
+	kernelMode: "per-call",
+});
+await Bun.write(${JSON.stringify(resultPath)}, JSON.stringify(result));
+process.exit(0);
+`,
+	);
+	const child = await $`bun ${scriptPath}`.cwd(repoRoot).quiet().nothrow();
+	const stdout = child.stdout.toString();
+	const stderr = child.stderr.toString();
+	if (child.exitCode !== 0) throw new Error(stderr || stdout || `Python llm subprocess exited with ${child.exitCode}`);
+	return (await Bun.file(resultPath).json()) as PythonResult;
+}
 describe("runEvalLlm", () => {
 	afterEach(() => {
 		vi.restoreAllMocks();
@@ -133,6 +206,26 @@ describe("runEvalLlm", () => {
 		expect(result.details).toEqual({ model: "p/smol", tier: "smol", structured: false });
 	});
+	it("supplies a non-empty systemPrompt when system is omitted (codex 'Instructions are required' guard)", async () => {
+		// The openai-codex Responses transformer drops `instructions` when no
+		// system prompt is provided, and the remote endpoint then 400s with
+		// "Instructions are required". runEvalLlm must always carry a non-empty
+		// systemPrompt so `llm("…")` without a `system` argument works.
+		const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
+		await runEvalLlm({ prompt: "q", model: "smol" }, { session: makeSession() });
+		const ctx = spy.mock.calls[0]?.[1] as { systemPrompt?: string[] };
+		expect(ctx.systemPrompt).toBeDefined();
+		expect(ctx.systemPrompt?.length).toBeGreaterThan(0);
+		expect(ctx.systemPrompt?.[0]).toMatch(/.+/);
+	});
+	it("honors an explicit system prompt instead of overriding it", async () => {
+		const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
+		await runEvalLlm({ prompt: "q", model: "smol", system: "Be terse." }, { session: makeSession() });
+		const ctx = spy.mock.calls[0]?.[1] as { systemPrompt?: string[] };
+		expect(ctx.systemPrompt).toEqual(["Be terse."]);
+	});
 	it("forces a respond tool call and returns its arguments in structured mode", async () => {
 		const spy = vi
 			.spyOn(ai, "completeSimple")
@@ -290,38 +383,24 @@ describe("llm() through eval runtimes", () => {
 	});
 	it("exposes llm() in the Python runtime", async () => {
-		using tempDir = TempDir.createSync("@omp-eval-llm-py-");
-		const sessionFile = path.join(tempDir.path(), "session.jsonl");
-		const sessionId = `py-llm:${crypto.randomUUID()}`;
-		vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "hello from python" }));
-		const result = await executePython('print(llm("hi", model="smol"))', {
-			cwd: tempDir.path(),
-			sessionId,
-			sessionFile,
-			toolSession: makeSession(),
-		});
-		expect(result.exitCode).toBe(0);
-		expect(result.output.trim()).toBe("hello from python");
+		const tempDir = TempDir.createSync("@omp-eval-llm-py-");
+		try {
+			const result = await runPythonLlmInSubprocess({ structured: false, tempDir });
+			expect(result.exitCode).toBe(0);
+			expect(result.output.trim()).toBe("hello from python");
+		} finally {
+			tempDir.removeSync();
+		}
 	});
 	it("parses structured llm() output in the Python runtime", async () => {
-		using tempDir = TempDir.createSync("@omp-eval-llm-py-struct-");
-		const sessionFile = path.join(tempDir.path(), "session.jsonl");
-		const sessionId = `py-llm-struct:${crypto.randomUUID()}`;
-		vi.spyOn(ai, "completeSimple").mockResolvedValue(
-			assistant({ toolCall: { name: "respond", arguments: { ok: true } } }),
-		);
-		const result = await executePython('import json\nprint(json.dumps(llm("hi", schema={"type": "object"})))', {
-			cwd: tempDir.path(),
-			sessionId,
-			sessionFile,
-			toolSession: makeSession(),
-		});
-		expect(result.exitCode).toBe(0);
-		expect(JSON.parse(result.output.trim())).toEqual({ ok: true });
+		const tempDir = TempDir.createSync("@omp-eval-llm-py-struct-");
+		try {
+			const result = await runPythonLlmInSubprocess({ structured: true, tempDir });
+			expect(result.exitCode).toBe(0);
+			expect(JSON.parse(result.output.trim())).toEqual({ ok: true });
+		} finally {
+			tempDir.removeSync();
+		}
 	});
 });

package/src/eval/js/context-manager.ts CHANGED Viewed

@@ -52,8 +52,14 @@ interface JsSession {
 const sessions = new Map<string, JsSession>();
 const startingSessions = new Map<string, Promise<JsSession>>();
-const resettingSessions = new Set<string>();
-const READY_TIMEOUT_MS_DEFAULT = 5_000;
+const resettingSessions = new Map<string, Promise<void>>();
+// Worker startup (module-graph import + WorkerCore construction) is infrastructure
+// cost, not user compute. Floor it independently of Bun's 5s default per-test timeout
+// so a slow cold-start under load isn't aborted mid-init — terminating a still-
+// initializing Bun worker triggers the same kind of terminate-race that motivates
+// avoiding `vm.runInContext` (see shared/indirect-eval.ts), here surfacing as a
+// SIGILL/SIGSEGV. Callers that pass a larger per-cell budget still dominate.
+const WORKER_INIT_TIMEOUT_MS = 15_000;
 export async function executeInVmContext(options: {
 	sessionKey: string;
@@ -67,17 +73,28 @@ export async function executeInVmContext(options: {
 	runState: VmRunState;
 }): Promise<{ value: unknown }> {
 	if (options.reset) {
-		if (resettingSessions.has(options.sessionKey)) {
-			throw new ToolError("JS context reset already in progress");
-		}
-		resettingSessions.add(options.sessionKey);
-		try {
-			await resetVmContext(options.sessionKey);
-		} finally {
-			resettingSessions.delete(options.sessionKey);
+		// Coalesce concurrent resets: an existing in-flight reset already
+		// produces a fresh context, so a follow-up `reset: true` cell should
+		// just wait for it rather than failing the user-visible call.
+		const inFlight = resettingSessions.get(options.sessionKey);
+		if (inFlight) await inFlight.catch(() => undefined);
+		else {
+			const resetPromise = resetVmContext(options.sessionKey);
+			resettingSessions.set(
+				options.sessionKey,
+				resetPromise.then(() => undefined),
+			);
+			try {
+				await resetPromise;
+			} finally {
+				resettingSessions.delete(options.sessionKey);
+			}
 		}
-	} else if (resettingSessions.has(options.sessionKey)) {
-		throw new ToolError("JS context reset in progress");
+	} else {
+		// Internal coordination: wait for any in-flight reset to settle and
+		// then run on the freshly-rebuilt context.
+		const inFlight = resettingSessions.get(options.sessionKey);
+		if (inFlight) await inFlight.catch(() => undefined);
 	}
 	const session = await acquireSession(
 		options.sessionKey,
@@ -191,9 +208,9 @@ async function acquireSession(sessionKey: string, snapshot: SessionSnapshot, tim
 			handleSessionMessage(session, msg);
 		});
 		try {
-			// Cold-start can exceed 5s on slow hosts. Let the caller's per-cell timeout dominate so
-			// users can grant more headroom when they raise `timeout` on a cell.
-			const readyTimeoutMs = Math.max(READY_TIMEOUT_MS_DEFAULT, timeoutMs ?? 0);
+			// Init headroom is the fixed infrastructure floor; the caller's per-cell timeout
+			// dominates when larger so users can grant more by raising `timeout` on a cell.
+			const readyTimeoutMs = Math.max(WORKER_INIT_TIMEOUT_MS, timeoutMs ?? 0);
 			await raceWithTimeout(readyPromise, readyTimeoutMs, "Timed out initializing JS eval worker");
 			worker.send({ type: "init", snapshot });
 			sessions.set(sessionKey, session);

package/src/eval/llm-bridge.ts CHANGED Viewed

@@ -15,7 +15,13 @@ import { instrumentedCompleteSimple, resolveTelemetry } from "@oh-my-pi/pi-agent
 import { type Api, Effort, getSupportedEfforts, type Model, type Tool } from "@oh-my-pi/pi-ai";
 import * as z from "zod/v4";
 import { extractTextContent, extractToolCall, parseJsonPayload } from "../commit/utils";
-import { expandRoleAlias, formatModelString, resolveModelFromString } from "../config/model-resolver";
+import {
+	expandRoleAlias,
+	formatModelString,
+	getModelMatchPreferences,
+	resolveModelFromString,
+} from "../config/model-resolver";
 import type { ToolSession } from "../tools";
 import { ToolError } from "../tools/tool-errors";
 import { withBridgeTimeoutPause } from "./bridge-timeout";
@@ -64,7 +70,7 @@ function resolveTierModel(tier: LlmTier, session: ToolSession): Model<Api> | und
 	const available = modelRegistry.getAvailable();
 	if (available.length === 0) return undefined;
-	const matchPreferences = { usageOrder: session.settings.getStorage()?.getModelUsageOrder() };
+	const matchPreferences = getModelMatchPreferences(session.settings);
 	const resolve = (pattern: string | undefined): Model<Api> | undefined => {
 		if (!pattern) return undefined;
 		const expanded = expandRoleAlias(pattern, session.settings);
@@ -112,8 +118,9 @@ export async function runEvalLlm(args: unknown, options: EvalLlmBridgeOptions):
 		);
 	}
-	const apiKey = await options.session.modelRegistry?.getApiKey(model);
-	if (!apiKey) {
+	const registry = options.session.modelRegistry;
+	const apiKey = await registry?.getApiKey(model);
+	if (!registry || !apiKey) {
 		throw new ToolError(
 			`llm() has no API key for ${formatModelString(model)}. Configure credentials for this provider or choose another tier.`,
 		);
@@ -132,18 +139,27 @@ export async function runEvalLlm(args: unknown, options: EvalLlmBridgeOptions):
 	const telemetry = resolveTelemetry(options.session.getTelemetry?.(), options.session.getSessionId?.() ?? undefined);
+	// Some providers (notably openai-codex) require a non-empty `instructions`
+	// field on every Responses request and 400 with "Instructions are required"
+	// when it is missing. Fall back to a minimal default so `llm(prompt)` works
+	// without forcing every caller to pass a `system` prompt.
+	const systemPrompt = system ? [system] : ["You are a helpful assistant."];
 	// Suspend eval timeout accounting while the model request owns control. The
 	// timeout clock restarts once the bridge returns to the cell runtime.
 	const response = await withBridgeTimeoutPause(options.emitStatus, () =>
 		instrumentedCompleteSimple(
 			model,
 			{
-				systemPrompt: system ? [system] : undefined,
+				systemPrompt,
 				messages: [{ role: "user", content: [{ type: "text", text: prompt }], timestamp: Date.now() }],
 				tools,
 			},
 			{
-				apiKey,
+				apiKey: registry.resolver(model.provider, {
+					sessionId: options.session.getSessionId?.() ?? undefined,
+					baseUrl: model.baseUrl,
+				}),
 				signal: options.signal,
 				reasoning: reasoningForTier(tier, model),
 				toolChoice: schema ? { type: "tool", name: STRUCTURED_TOOL_NAME } : undefined,

package/src/eval/py/__tests__/prelude.test.ts ADDED Viewed

@@ -0,0 +1,19 @@
+import { describe, expect, it } from "bun:test";
+import { PYTHON_PRELUDE } from "../prelude";
+describe("python prelude", () => {
+	it("exposes read(path, offset?, limit?) with positional optional args", () => {
+		// The eval docs advertise `read(path, offset?=1, limit?=None)`. A
+		// keyword-only signature (`def read(path, *, offset=1, limit=None)`)
+		// makes `read("file", 10)` raise `TypeError: read() takes 1 positional
+		// argument but 2 were given`, which agents in the wild repeatedly hit.
+		// Lock the contract so the helper accepts both positional and keyword
+		// forms.
+		const match = PYTHON_PRELUDE.match(/def\s+read\(([^)]+)\)/);
+		expect(match).not.toBeNull();
+		const signature = match?.[1] ?? "";
+		expect(signature).not.toContain("*,");
+		expect(signature).toContain("offset");
+		expect(signature).toContain("limit");
+	});
+});

package/src/eval/py/executor.ts CHANGED Viewed

@@ -126,7 +126,7 @@ interface PythonSession {
 const sessions = new Map<string, PythonSession>();
 const startingSessions = new Map<string, Promise<PythonSession>>();
-const resettingSessions = new Set<string>();
+const resettingSessions = new Map<string, Promise<void>>();
 function normalizeSessionCwd(cwd: string): string {
 	return path.resolve(cwd);
@@ -611,17 +611,29 @@ async function executeOnSession(code: string, cwd: string, options: PythonExecut
 		options.bridgeSessionId = sessionId;
 	}
 	if (options.reset) {
-		if (resettingSessions.has(sessionKey)) {
-			throw new Error("Python kernel reset already in progress");
-		}
-		resettingSessions.add(sessionKey);
-		try {
-			await resetSession(sessionKey);
-		} finally {
-			resettingSessions.delete(sessionKey);
+		// Coalesce concurrent resets: if another reset is in flight for this
+		// session, await it instead of throwing — the caller's intent ("start
+		// from a clean kernel") is satisfied once that reset settles.
+		const inFlight = resettingSessions.get(sessionKey);
+		if (inFlight) await inFlight.catch(() => undefined);
+		else {
+			const resetPromise = resetSession(sessionKey);
+			resettingSessions.set(
+				sessionKey,
+				resetPromise.then(() => undefined),
+			);
+			try {
+				await resetPromise;
+			} finally {
+				resettingSessions.delete(sessionKey);
+			}
 		}
-	} else if (resettingSessions.has(sessionKey)) {
-		throw new Error("Python kernel reset in progress");
+	} else {
+		// A reset already in progress is an internal coordination state, not a
+		// user-visible failure. Wait for it to clear, then proceed with the
+		// requested execution on the freshly-restarted kernel.
+		const inFlight = resettingSessions.get(sessionKey);
+		if (inFlight) await inFlight.catch(() => undefined);
 	}
 	const session = await acquireSession(sessionKey, sessionId, cwd, options);
 	if (options.signal?.aborted) {

package/src/eval/py/prelude.py CHANGED Viewed

@@ -53,7 +53,7 @@ if "__omp_prelude_loaded__" not in globals():
         _emit_status("env", key=key, value=val, action="get")
         return val
-    def read(path: str | Path, *, offset: int = 1, limit: int | None = None) -> str:
+    def read(path: str | Path, offset: int = 1, limit: int | None = None) -> str:
         """Read file contents. offset/limit are 1-indexed line numbers."""
         p = Path(path)
         data = p.read_text(encoding="utf-8")

package/src/extensibility/extensions/types.ts CHANGED Viewed

@@ -7,7 +7,13 @@
  * - Register commands, keyboard shortcuts, and CLI flags
  * - Interact with the user via UI primitives
  */
-import type { AgentMessage, AgentToolResult, AgentToolUpdateCallback, ThinkingLevel } from "@oh-my-pi/pi-agent-core";
+import type {
+	AgentMessage,
+	AgentToolResult,
+	AgentToolUpdateCallback,
+	ThinkingLevel,
+	ToolApproval,
+} from "@oh-my-pi/pi-agent-core";
 import type { CompactionResult } from "@oh-my-pi/pi-agent-core/compaction";
 import type {
 	Api,
@@ -392,6 +398,9 @@ export interface ToolDefinition<TParams extends TSchema = TSchema, TDetails = un
 	defaultInactive?: boolean;
 	/** If true, tool may stage deferred changes that require explicit resolve/discard. */
 	deferrable?: boolean;
+	/** Tool approval tier. Defaults to `"exec"` when omitted.
+	 *  `"read"`: read-only operations. `"write"`: mutations. `"exec"`: code execution. */
+	approval?: ToolApproval;
 	/** MCP server name for discovery/search metadata when this tool fronts an MCP server. */
 	mcpServerName?: string;
 	/** Original MCP tool name for discovery/search metadata. */

package/src/goals/tools/goal-tool.ts CHANGED Viewed

@@ -10,7 +10,7 @@ import { formatDuration } from "../../slash-commands/helpers/format";
 import type { ToolSession } from "../../tools";
 import { formatErrorDetail, TRUNCATE_LENGTHS } from "../../tools/render-utils";
 import { ToolError } from "../../tools/tool-errors";
-import { renderStatusLine, truncateToWidth } from "../../tui";
+import { framedBlock, renderStatusLine, truncateToWidth } from "../../tui";
 import { completionBudgetReport, remainingTokens } from "../runtime";
 import type { Goal, GoalStatus, GoalToolDetails } from "../state";
@@ -173,8 +173,7 @@ export const goalToolRenderer = {
 		if (args.op === "create" && args.token_budget !== undefined) {
 			meta.push(`budget ${formatNumber(args.token_budget)}`);
 		}
-		const text = renderStatusLine({ icon: "pending", title: "Goal", description, meta }, uiTheme);
-		return new Text(text, 0, 0);
+		return new Text(renderStatusLine({ icon: "pending", title: "Goal", description, meta }, uiTheme), 0, 0);
 	},
 	renderResult(
@@ -190,51 +189,62 @@ export const goalToolRenderer = {
 		if (result.isError) {
 			const header = renderStatusLine({ icon: "error", title: "Goal", description }, uiTheme);
-			const body = formatErrorDetail(fallbackText || "Goal tool failed", uiTheme);
-			return new Text([header, body].join("\n"), 0, 0);
+			return framedBlock(uiTheme, width => ({
+				header,
+				sections: [{ lines: formatErrorDetail(fallbackText || "Goal tool failed", uiTheme).split("\n") }],
+				state: "error",
+				borderColor: "error",
+				width,
+			}));
 		}
 		const goal = details?.goal ?? null;
 		if (!goal) {
-			const header = renderStatusLine({ icon: "warning", title: "Goal", description }, uiTheme);
-			const body = uiTheme.fg("muted", "No active goal.");
-			return new Text([header, body].join("\n"), 0, 0);
+			return new Text(
+				renderStatusLine({ icon: "warning", title: "Goal", description, meta: ["no active goal"] }, uiTheme),
+				0,
+				0,
+			);
 		}
-		const lines: string[] = [];
-		lines.push(
-			renderStatusLine(
-				{
-					icon: "success",
-					title: "Goal",
-					description,
-					badge: { label: goal.status, color: goalBadgeColor(goal.status) },
-				},
-				uiTheme,
-			),
+		const header = renderStatusLine(
+			{
+				icon: "success",
+				title: "Goal",
+				description,
+				badge: { label: goal.status, color: goalBadgeColor(goal.status) },
+			},
+			uiTheme,
 		);
+		const lines: string[] = [];
 		const objectiveText = truncateToWidth(goal.objective.trim(), TRUNCATE_LENGTHS.LONG);
-		lines.push(`  ${uiTheme.italic(uiTheme.fg("muted", `"${objectiveText}"`))}`);
+		lines.push(uiTheme.italic(uiTheme.fg("muted", `"${objectiveText}"`)));
 		const used = formatNumber(goal.tokensUsed);
 		const tokensLine =
 			goal.tokenBudget !== undefined
 				? `${used} / ${formatNumber(goal.tokenBudget)} tokens (${formatNumber(Math.max(0, goal.tokenBudget - goal.tokensUsed))} left)`
 				: `${used} tokens`;
-		lines.push(`  ${uiTheme.fg("dim", tokensLine)}`);
+		const metaParts = [tokensLine];
 		if (goal.timeUsedSeconds > 0) {
-			lines.push(`  ${uiTheme.fg("dim", `${formatDuration(goal.timeUsedSeconds * 1000)} elapsed`)}`);
+			metaParts.push(`${formatDuration(goal.timeUsedSeconds * 1000)} elapsed`);
 		}
+		lines.push(uiTheme.fg("dim", metaParts.join(" · ")));
 		const report = details?.completionBudgetReport;
+		const sections: Array<{ label?: string; lines: string[] }> = [{ lines }];
 		if (report) {
-			lines.push("");
-			lines.push(uiTheme.italic(uiTheme.fg("muted", report)));
+			sections.push({ label: "Report", lines: report.split("\n").map(line => uiTheme.fg("muted", line)) });
 		}
-		return new Text(lines.join("\n"), 0, 0);
+		return framedBlock(uiTheme, width => ({
+			header,
+			sections,
+			state: "success",
+			borderColor: "borderMuted",
+			width,
+		}));
 	},
 	mergeCallAndResult: true,