npm - @oh-my-pi/pi-coding-agent - Versions diffs - 14.5.12 → 14.5.14 - Mend

@oh-my-pi/pi-coding-agent 14.5.12 → 14.5.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (112) hide show

package/CHANGELOG.md +45 -0
package/package.json +18 -10
package/src/cli/jupyter-cli.ts +1 -1
package/src/commit/pipeline.ts +4 -3
package/src/config/model-equivalence.ts +49 -16
package/src/config/model-registry.ts +100 -25
package/src/config/model-resolver.ts +29 -15
package/src/config/settings-schema.ts +20 -6
package/src/config/settings.ts +9 -8
package/src/config.ts +18 -6
package/src/eval/backend.ts +43 -0
package/src/eval/eval.lark +43 -0
package/src/eval/index.ts +5 -0
package/src/eval/js/context-manager.ts +717 -0
package/src/eval/js/executor.ts +131 -0
package/src/eval/js/index.ts +46 -0
package/src/eval/js/prelude.ts +2 -0
package/src/eval/js/prelude.txt +84 -0
package/src/eval/js/tool-bridge.ts +124 -0
package/src/eval/parse.ts +337 -0
package/src/{ipy → eval/py}/executor.ts +2 -180
package/src/{ipy → eval/py}/gateway-coordinator.ts +2 -2
package/src/eval/py/index.ts +58 -0
package/src/{ipy → eval/py}/kernel.ts +9 -45
package/src/{ipy → eval/py}/prelude.py +39 -227
package/src/eval/types.ts +48 -0
package/src/export/html/template.generated.ts +1 -1
package/src/export/html/template.js +8 -10
package/src/extensibility/extensions/types.ts +2 -3
package/src/internal-urls/docs-index.generated.ts +5 -5
package/src/lsp/client.ts +9 -0
package/src/lsp/index.ts +395 -0
package/src/lsp/types.ts +15 -4
package/src/main.ts +35 -14
package/src/mcp/manager.ts +22 -0
package/src/mcp/oauth-flow.ts +1 -1
package/src/memories/index.ts +1 -1
package/src/modes/acp/acp-event-mapper.ts +1 -1
package/src/modes/components/{python-execution.ts → eval-execution.ts} +11 -4
package/src/modes/components/login-dialog.ts +1 -1
package/src/modes/components/oauth-selector.ts +2 -1
package/src/modes/components/tool-execution.ts +3 -4
package/src/modes/controllers/command-controller.ts +28 -8
package/src/modes/controllers/input-controller.ts +4 -4
package/src/modes/controllers/selector-controller.ts +2 -1
package/src/modes/interactive-mode.ts +4 -5
package/src/modes/rpc/rpc-client.ts +9 -0
package/src/modes/rpc/rpc-mode.ts +6 -0
package/src/modes/rpc/rpc-types.ts +9 -0
package/src/modes/types.ts +3 -3
package/src/modes/utils/ui-helpers.ts +2 -2
package/src/prompts/system/system-prompt.md +3 -3
package/src/prompts/tools/eval.md +92 -0
package/src/prompts/tools/lsp.md +7 -3
package/src/sdk.ts +64 -35
package/src/session/agent-session.ts +152 -46
package/src/session/messages.ts +1 -1
package/src/slash-commands/builtin-registry.ts +1 -1
package/src/system-prompt.ts +34 -66
package/src/task/agents.ts +4 -5
package/src/task/executor.ts +5 -9
package/src/tools/archive-reader.ts +9 -3
package/src/tools/browser/launch.ts +22 -0
package/src/tools/browser/readable.ts +11 -6
package/src/tools/browser/registry.ts +25 -244
package/src/tools/browser/render.ts +1 -1
package/src/tools/browser/tab-protocol.ts +101 -0
package/src/tools/browser/tab-supervisor.ts +429 -0
package/src/tools/browser/tab-worker-entry.ts +21 -0
package/src/tools/browser/tab-worker.ts +1006 -0
package/src/tools/browser.ts +17 -32
package/src/tools/checkpoint.ts +2 -2
package/src/tools/{python.ts → eval.ts} +324 -315
package/src/tools/exit-plan-mode.ts +1 -1
package/src/tools/image-gen.ts +2 -2
package/src/tools/index.ts +62 -100
package/src/tools/read.ts +0 -6
package/src/tools/recipe/runners/pkg.ts +34 -32
package/src/tools/renderers.ts +2 -2
package/src/tools/resolve.ts +7 -2
package/src/tools/todo-write.ts +0 -1
package/src/tools/tool-timeouts.ts +2 -2
package/src/tools/write.ts +8 -1
package/src/utils/markit.ts +15 -7
package/src/utils/tools-manager.ts +5 -5
package/src/web/scrapers/crossref.ts +3 -3
package/src/web/scrapers/devto.ts +1 -1
package/src/web/scrapers/discourse.ts +5 -5
package/src/web/scrapers/firefox-addons.ts +1 -1
package/src/web/scrapers/flathub.ts +2 -2
package/src/web/scrapers/gitlab.ts +1 -1
package/src/web/scrapers/go-pkg.ts +2 -2
package/src/web/scrapers/jetbrains-marketplace.ts +1 -1
package/src/web/scrapers/mastodon.ts +9 -9
package/src/web/scrapers/mdn.ts +11 -7
package/src/web/scrapers/pub-dev.ts +1 -1
package/src/web/scrapers/rawg.ts +3 -3
package/src/web/scrapers/readthedocs.ts +1 -1
package/src/web/scrapers/spdx.ts +1 -1
package/src/web/scrapers/stackoverflow.ts +2 -2
package/src/web/scrapers/types.ts +53 -39
package/src/web/scrapers/w3c.ts +1 -1
package/src/web/search/index.ts +5 -5
package/src/web/search/provider.ts +121 -39
package/src/web/search/providers/gemini.ts +4 -4
package/src/web/search/render.ts +2 -2
package/src/ipy/modules.ts +0 -144
package/src/prompts/tools/python.md +0 -57
package/src/tools/browser/vm.ts +0 -792
/package/src/{ipy → eval/py}/cancellation.ts +0 -0
/package/src/{ipy → eval/py}/prelude.ts +0 -0
/package/src/{ipy → eval/py}/runtime.ts +0 -0

package/src/sdk.ts CHANGED Viewed

@@ -36,6 +36,7 @@ import { CursorExecHandlers } from "./cursor";
 import "./discovery";
 import { resolveConfigValue } from "./config/resolve-config-value";
 import { initializeWithSettings } from "./discovery";
+import { disposeAllKernelSessions, disposeKernelSessionsByOwner } from "./eval/py/executor";
 import { TtsrManager } from "./export/ttsr";
 import {
 	type CustomCommandsLoadResult,
@@ -73,7 +74,6 @@ import {
 	RuleProtocolHandler,
 	SkillProtocolHandler,
 } from "./internal-urls";
-import { disposeAllKernelSessions, disposeKernelSessionsByOwner } from "./ipy/executor";
 import { LSP_STARTUP_EVENT_CHANNEL, type LspStartupEvent } from "./lsp/startup-events";
 import { discoverAndLoadMCPTools, type MCPManager, type MCPToolsLoadResult } from "./mcp";
 import {
@@ -99,6 +99,8 @@ import { SessionManager } from "./session/session-manager";
 import { closeAllConnections } from "./ssh/connection-manager";
 import { unmountAll } from "./ssh/sshfs-mount";
 import {
+	type AgentsMdSearch,
+	buildAgentsMdSearch,
 	buildSystemPrompt as buildSystemPromptInternal,
 	buildSystemPromptToolMetadata,
 	loadProjectContextFiles as loadContextFilesInternal,
@@ -111,13 +113,13 @@ import {
 	createTools,
 	discoverStartupLspServers,
 	EditTool,
+	EvalTool,
 	FindTool,
 	getSearchTools,
 	HIDDEN_TOOLS,
 	isSearchProviderPreference,
 	type LspStartupServerInfo,
 	loadSshTool,
-	PythonTool,
 	ReadTool,
 	ResolveTool,
 	renderSearchToolBm25Description,
@@ -204,9 +206,6 @@ export interface CreateAgentSessionOptions {
 	enableLsp?: boolean;
 	/** Skip Python kernel availability check and prelude warmup */
 	skipPythonPreflight?: boolean;
-	/** Force Python prelude warmup even when test env would normally skip it */
-	forcePythonWarmup?: boolean;
 	/** Tool names explicitly requested (enables disabled-by-default tools) */
 	toolNames?: string[];
@@ -275,10 +274,10 @@ export {
 	BUILTIN_TOOLS,
 	createTools,
 	EditTool,
+	EvalTool,
 	FindTool,
 	HIDDEN_TOOLS,
 	loadSshTool,
-	PythonTool,
 	ReadTool,
 	ResolveTool,
 	SearchTool,
@@ -301,7 +300,6 @@ function getDefaultAgentDir(): string {
  */
 export async function discoverAuthStorage(agentDir: string = getDefaultAgentDir()): Promise<AuthStorage> {
 	const dbPath = getAgentDbPath(agentDir);
-	logger.debug("discoverAuthStorage", { agentDir, dbPath });
 	const storage = await AuthStorage.create(dbPath, { configValueResolver: resolveConfigValue });
 	await storage.reload();
@@ -430,6 +428,9 @@ function isCustomTool(tool: CustomTool | ToolDefinition): tool is CustomTool {
 const TOOL_DEFINITION_MARKER = Symbol("__isToolDefinition");
+/** Matches the truncation applied to per-server instructions inside `rebuildSystemPrompt`. */
+const MAX_MCP_INSTRUCTIONS_LENGTH = 4000;
 let sshCleanupRegistered = false;
 async function cleanupSshResources(): Promise<void> {
@@ -667,17 +668,40 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
 	const modelRegistry = options.modelRegistry ?? new ModelRegistry(authStorage);
 	const settings = options.settings ?? (await logger.time("settings", Settings.init, { cwd, agentDir }));
-	logger.time("initializeWithSettings");
-	initializeWithSettings(settings);
+	logger.time("initializeWithSettings", initializeWithSettings, settings);
 	if (!options.modelRegistry) {
 		modelRegistry.refreshInBackground();
 	}
+	// Kick off AGENTS.md filesystem search in parallel — it is the slowest piece of buildSystemPrompt
+	// (~200ms on large repos) and only needs `cwd`, so it can overlap with everything that follows.
+	const agentsMdSearchPromise: Promise<AgentsMdSearch> = logger.time("buildAgentsMdSearch", buildAgentsMdSearch, cwd);
+	agentsMdSearchPromise.catch(() => {});
+	// Independent discoveries that depend only on cwd/agentDir — kicked off in parallel and awaited
+	// at their respective consumer sites. Their work can overlap with model resolution, secret loading,
+	// session-context build, tool creation, MCP discovery, and extension discovery.
+	const contextFilesPromise = options.contextFiles
+		? Promise.resolve(options.contextFiles)
+		: logger.time("discoverContextFiles", discoverContextFiles, cwd, agentDir);
+	contextFilesPromise.catch(() => {});
+	const promptTemplatesPromise = options.promptTemplates
+		? Promise.resolve(options.promptTemplates)
+		: logger.time("discoverPromptTemplates", discoverPromptTemplates, cwd, agentDir);
+	promptTemplatesPromise.catch(() => {});
+	const slashCommandsPromise = options.slashCommands
+		? Promise.resolve(options.slashCommands)
+		: logger.time("discoverSlashCommands", discoverSlashCommands, cwd);
+	slashCommandsPromise.catch(() => {});
 	const skillsSettings = settings.getGroup("skills");
 	const disabledExtensionIds = settings.get("disabledExtensions") ?? [];
 	const discoveredSkillsPromise =
 		options.skills === undefined
-			? discoverSkills(cwd, agentDir, { ...skillsSettings, disabledExtensions: disabledExtensionIds })
+			? logger.time("discoverSkills", discoverSkills, cwd, agentDir, {
+					...skillsSettings,
+					disabledExtensions: disabledExtensionIds,
+				})
 			: undefined;
+	discoveredSkillsPromise?.catch(() => {});
 	// Initialize provider preferences from settings
 	const webSearchProvider = settings.get("providers.webSearch");
@@ -814,10 +838,7 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
 		skills = options.skills;
 		skillWarnings = [];
 	} else {
-		const discovered = await logger.time(
-			"discoverSkills",
-			() => discoveredSkillsPromise ?? Promise.resolve({ skills: [], warnings: [] }),
-		);
+		const discovered = await (discoveredSkillsPromise ?? Promise.resolve({ skills: [], warnings: [] }));
 		skills = discovered.skills;
 		skillWarnings = discovered.warnings;
 	}
@@ -851,10 +872,7 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
 		return { ttsrManager, rulebookRules, alwaysApplyRules };
 	});
-	const contextFiles = await logger.time(
-		"discoverContextFiles",
-		async () => options.contextFiles ?? (await discoverContextFiles(cwd, agentDir)),
-	);
+	const contextFiles = await contextFilesPromise;
 	let agent: Agent;
 	let session!: AgentSession;
@@ -917,7 +935,7 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
 	const resolvedAgentId = options.agentId ?? options.parentTaskPrefix ?? MAIN_AGENT_ID;
 	const resolvedAgentDisplayName =
 		options.agentDisplayName ?? ((options.taskDepth ?? 0) > 0 || options.parentTaskPrefix ? "sub" : "main");
-	const pythonKernelOwnerId = `agent-session:${Snowflake.next()}`;
+	const evalKernelOwnerId = `agent-session:${Snowflake.next()}`;
 	try {
 		const getActiveModelString = (): string | undefined => {
@@ -937,7 +955,6 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
 				return !requestedToolNames || requestedToolNames.includes("edit");
 			},
 			skipPythonPreflight: options.skipPythonPreflight,
-			forcePythonWarmup: options.forcePythonWarmup,
 			contextFiles,
 			skills,
 			eventBus,
@@ -945,12 +962,13 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
 			requireYieldTool: options.requireYieldTool,
 			taskDepth: options.taskDepth ?? 0,
 			getSessionFile: () => sessionManager.getSessionFile() ?? null,
-			getPythonKernelOwnerId: () => pythonKernelOwnerId,
-			assertPythonExecutionAllowed: () => session?.assertPythonExecutionAllowed(),
-			trackPythonExecution: (execution, abortController) =>
-				session ? session.trackPythonExecution(execution, abortController) : execution,
+			getEvalKernelOwnerId: () => evalKernelOwnerId,
+			assertEvalExecutionAllowed: () => session?.assertEvalExecutionAllowed(),
+			trackEvalExecution: (execution, abortController) =>
+				session ? session.trackEvalExecution(execution, abortController) : execution,
 			getSessionId: () => sessionManager.getSessionId?.() ?? null,
 			getAgentId: () => resolvedAgentId,
+			getToolByName: name => session?.getToolByName(name),
 			agentRegistry,
 			getSessionSpawns: () => options.spawns ?? "*",
 			getModelString: () => (hasExplicitModel && model ? formatModelString(model) : undefined),
@@ -1322,7 +1340,6 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
 			const serverInstructions = mcpManager?.getServerInstructions();
 			let appendPrompt: string | undefined = memoryInstructions ?? undefined;
 			if (serverInstructions && serverInstructions.size > 0) {
-				const MAX_INSTRUCTIONS_LENGTH = 4000;
 				const parts: string[] = [];
 				if (appendPrompt) parts.push(appendPrompt);
 				parts.push(
@@ -1330,8 +1347,8 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
 				);
 				for (const [srvName, srvInstructions] of serverInstructions) {
 					const truncated =
-						srvInstructions.length > MAX_INSTRUCTIONS_LENGTH
-							? `${srvInstructions.slice(0, MAX_INSTRUCTIONS_LENGTH)}\n[truncated]`
+						srvInstructions.length > MAX_MCP_INSTRUCTIONS_LENGTH
+							? `${srvInstructions.slice(0, MAX_MCP_INSTRUCTIONS_LENGTH)}\n[truncated]`
 							: srvInstructions;
 					parts.push(`### ${srvName}\n${truncated}`);
 				}
@@ -1353,6 +1370,7 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
 				mcpDiscoveryServerSummaries: discoverableMCPSummary.servers.map(formatDiscoverableMCPToolServerSummary),
 				eagerTasks,
 				secretsEnabled,
+				agentsMdSearch: agentsMdSearchPromise,
 			});
 			if (options.systemPrompt === undefined) {
@@ -1376,6 +1394,7 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
 					mcpDiscoveryServerSummaries: discoverableMCPSummary.servers.map(formatDiscoverableMCPToolServerSummary),
 					eagerTasks,
 					secretsEnabled,
+					agentsMdSearch: agentsMdSearchPromise,
 				});
 			}
 			return options.systemPrompt(defaultPrompt);
@@ -1446,13 +1465,10 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
 		const systemPrompt = await logger.time("buildSystemPrompt", rebuildSystemPrompt, initialToolNames, toolRegistry);
-		const promptTemplates =
-			options.promptTemplates ??
-			(await logger.time("discoverPromptTemplates", discoverPromptTemplates, cwd, agentDir));
+		const promptTemplates = await promptTemplatesPromise;
 		toolSession.promptTemplates = promptTemplates;
-		const slashCommands =
-			options.slashCommands ?? (await logger.time("discoverSlashCommands", discoverSlashCommands, cwd));
+		const slashCommands = await slashCommandsPromise;
 		// Create convertToLlm wrapper that filters images if blockImages is enabled (defense-in-depth)
 		const convertToLlmWithBlockImages = (messages: AgentMessage[]): Message[] => {
@@ -1596,7 +1612,7 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
 			thinkingLevel,
 			sessionManager,
 			settings,
-			pythonKernelOwnerId,
+			evalKernelOwnerId,
 			scopedModels: options.scopedModels,
 			promptTemplates,
 			slashCommands,
@@ -1612,6 +1628,20 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
 			onResponse,
 			convertToLlm: convertToLlmFinal,
 			rebuildSystemPrompt,
+			getMcpServerInstructions: mcpManager
+				? () => {
+						const raw = mcpManager.getServerInstructions();
+						if (!raw || raw.size === 0) return raw;
+						const out = new Map<string, string>();
+						for (const [name, text] of raw) {
+							out.set(
+								name,
+								text.length > MAX_MCP_INSTRUCTIONS_LENGTH ? text.slice(0, MAX_MCP_INSTRUCTIONS_LENGTH) : text,
+							);
+						}
+						return out;
+					}
+				: undefined,
 			mcpDiscoveryEnabled,
 			initialSelectedMCPToolNames,
 			defaultSelectedMCPToolNames,
@@ -1765,7 +1795,6 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
 			});
 		}
-		logger.time("createAgentSession:return");
 		return {
 			session,
 			extensionsResult,
@@ -1780,7 +1809,7 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
 			if (hasSession) {
 				await session.dispose();
 			} else {
-				await disposeKernelSessionsByOwner(pythonKernelOwnerId);
+				await disposeKernelSessionsByOwner(evalKernelOwnerId);
 			}
 		} catch (cleanupError) {
 			logger.warn("Failed to clean up createAgentSession resources after startup error", {

package/src/session/agent-session.ts CHANGED Viewed

@@ -68,6 +68,11 @@ import {
 import { expandPromptTemplate, type PromptTemplate } from "../config/prompt-templates";
 import type { Settings, SkillsSettings } from "../config/settings";
 import { normalizeDiff, normalizeToLF, ParseError, previewPatch, stripBom } from "../edit";
+import {
+	disposeKernelSessionsByOwner,
+	executePython as executePythonCommand,
+	type PythonResult,
+} from "../eval/py/executor";
 import { type BashResult, executeBash as executeBashCommand } from "../exec/bash-executor";
 import { exportSessionToHtml } from "../export/html";
 import type { TtsrManager, TtsrMatchContext } from "../export/ttsr";
@@ -98,11 +103,6 @@ import type { HookCommandContext } from "../extensibility/hooks/types";
 import type { Skill, SkillWarning } from "../extensibility/skills";
 import { expandSlashCommand, type FileSlashCommand } from "../extensibility/slash-commands";
 import { type LocalProtocolOptions, resolveLocalUrlToPath } from "../internal-urls";
-import {
-	disposeKernelSessionsByOwner,
-	executePython as executePythonCommand,
-	type PythonResult,
-} from "../ipy/executor";
 import {
 	buildDiscoverableMCPSearchIndex,
 	collectDiscoverableMCPTools,
@@ -244,6 +244,13 @@ export interface AgentSessionConfig {
 	convertToLlm?: (messages: AgentMessage[]) => Message[] | Promise<Message[]>;
 	/** System prompt builder that can consider tool availability */
 	rebuildSystemPrompt?: (toolNames: string[], tools: Map<string, AgentTool>) => Promise<string>;
+	/**
+	 * Optional accessor for live MCP server instructions. Read by the session's
+	 * `rebuildSystemPrompt`-skip optimization to detect server-side instruction
+	 * changes (e.g. an MCP server upgrade) that would otherwise pass the tool-set
+	 * signature comparison and silently keep a stale prompt cached.
+	 */
+	getMcpServerInstructions?: () => Map<string, string> | undefined;
 	/** Enable hidden-by-default MCP tool discovery for this session. */
 	mcpDiscoveryEnabled?: boolean;
 	/** MCP tool names to activate for the current session when discovery mode is enabled. */
@@ -259,7 +266,7 @@ export interface AgentSessionConfig {
 	/** Secret obfuscator for deobfuscating streaming edit content */
 	obfuscator?: SecretObfuscator;
 	/** Logical owner for retained Python kernels created by this session. */
-	pythonKernelOwnerId?: string;
+	evalKernelOwnerId?: string;
 	/** Agent identity (registry id like "0-Main" or "3-Alice") used for IRC routing. */
 	agentId?: string;
 	/** Shared agent registry (for forwarding IRC observations to the main session UI). */
@@ -474,11 +481,11 @@ export class AgentSession {
 	#pendingBashMessages: BashExecutionMessage[] = [];
 	// Python execution state
-	#pythonAbortControllers = new Set<AbortController>();
-	#pythonKernelOwnerId: string;
+	#evalAbortControllers = new Set<AbortController>();
+	#evalKernelOwnerId: string;
 	#pendingPythonMessages: PythonExecutionMessage[] = [];
-	#activePythonExecutions = new Set<Promise<unknown>>();
-	#pythonExecutionDisposing = false;
+	#activeEvalExecutions = new Set<Promise<unknown>>();
+	#evalExecutionDisposing = false;
 	// Background-channel IRC exchanges queued while the recipient was streaming.
 	// Drained into history (via emitExternalEvent) once the recipient becomes idle.
@@ -511,7 +518,15 @@ export class AgentSession {
 	#onResponse: SimpleStreamOptions["onResponse"] | undefined;
 	#convertToLlm: (messages: AgentMessage[]) => Message[] | Promise<Message[]>;
 	#rebuildSystemPrompt: ((toolNames: string[], tools: Map<string, AgentTool>) => Promise<string>) | undefined;
+	#getMcpServerInstructions: (() => Map<string, string> | undefined) | undefined;
 	#baseSystemPrompt: string;
+	/**
+	 * Signature of the (toolNames, tool descriptions) tuple passed to the most
+	 * recent successful `rebuildSystemPrompt` call. Used to skip redundant rebuilds
+	 * when MCP servers reconnect without changing their tool definitions, which is
+	 * the dominant cause of prompt-cache invalidation in long sessions.
+	 */
+	#lastAppliedToolSignature: string | undefined;
 	#mcpDiscoveryEnabled = false;
 	#discoverableMCPTools = new Map<string, DiscoverableMCPTool>();
 	#discoverableMCPSearchIndex: DiscoverableMCPSearchIndex | null = null;
@@ -577,7 +592,7 @@ export class AgentSession {
 		this.settings = config.settings;
 		this.#startPowerAssertion();
 		this.#asyncJobManager = config.asyncJobManager;
-		this.#pythonKernelOwnerId = config.pythonKernelOwnerId ?? `agent-session:${Snowflake.next()}`;
+		this.#evalKernelOwnerId = config.evalKernelOwnerId ?? `agent-session:${Snowflake.next()}`;
 		this.#scopedModels = config.scopedModels ?? [];
 		this.#thinkingLevel = config.thinkingLevel;
 		this.#promptTemplates = config.promptTemplates ?? [];
@@ -595,6 +610,7 @@ export class AgentSession {
 		this.#onResponse = config.onResponse;
 		this.#convertToLlm = config.convertToLlm ?? convertToLlm;
 		this.#rebuildSystemPrompt = config.rebuildSystemPrompt;
+		this.#getMcpServerInstructions = config.getMcpServerInstructions;
 		this.#baseSystemPrompt = this.agent.state.systemPrompt;
 		this.#mcpDiscoveryEnabled = config.mcpDiscoveryEnabled ?? false;
 		this.#setDiscoverableMCPTools(this.#collectDiscoverableMCPToolsFromRegistry());
@@ -1938,7 +1954,7 @@ export class AgentSession {
 	 * Call this when completely done with the session.
 	 */
 	async dispose(): Promise<void> {
-		this.#pythonExecutionDisposing = true;
+		this.#evalExecutionDisposing = true;
 		try {
 			if (this.#extensionRunner?.hasHandlers("session_shutdown")) {
 				await this.#extensionRunner.emit({ type: "session_shutdown" });
@@ -1953,13 +1969,13 @@ export class AgentSession {
 		if (drained === false && deliveryState) {
 			logger.warn("Async job completion deliveries still pending during dispose", { ...deliveryState });
 		}
-		const pythonExecutionsSettled = await this.#preparePythonExecutionsForDispose();
+		const pythonExecutionsSettled = await this.#prepareEvalExecutionsForDispose();
 		if (!pythonExecutionsSettled) {
 			logger.warn(
 				"Detaching retained Python kernel ownership during dispose while Python execution is still active",
 			);
 		}
-		await disposeKernelSessionsByOwner(this.#pythonKernelOwnerId);
+		await disposeKernelSessionsByOwner(this.#evalKernelOwnerId);
 		this.#stopPowerAssertion();
 		await this.sessionManager.close();
 		this.#closeAllProviderSessions("dispose");
@@ -2211,10 +2227,18 @@ export class AgentSession {
 		}
 		this.agent.setTools(tools);
-		// Rebuild base system prompt with new tool set
+		// Rebuild base system prompt with new tool set, but only when the tool set
+		// actually changed. MCP servers can reconnect at arbitrary times and call
+		// `refreshMCPTools` -> `#applyActiveToolsByName` even though the resulting
+		// tool list is byte-identical. Skipping the rebuild keeps the system prompt
+		// stable, which is required for Anthropic prompt caching to keep hitting.
 		if (this.#rebuildSystemPrompt) {
-			this.#baseSystemPrompt = await this.#rebuildSystemPrompt(validToolNames, this.#toolRegistry);
-			this.agent.setSystemPrompt(this.#baseSystemPrompt);
+			const signature = this.#computeAppliedToolSignature(validToolNames, tools);
+			if (signature !== this.#lastAppliedToolSignature) {
+				this.#baseSystemPrompt = await this.#rebuildSystemPrompt(validToolNames, this.#toolRegistry);
+				this.agent.setSystemPrompt(this.#baseSystemPrompt);
+				this.#lastAppliedToolSignature = signature;
+			}
 		}
 		if (options?.persistMCPSelection !== false) {
 			this.#persistSelectedMCPToolNamesIfChanged(previousSelectedMCPToolNames);
@@ -2256,6 +2280,86 @@ export class AgentSession {
 		const activeToolNames = this.getActiveToolNames();
 		this.#baseSystemPrompt = await this.#rebuildSystemPrompt(activeToolNames, this.#toolRegistry);
 		this.agent.setSystemPrompt(this.#baseSystemPrompt);
+		// Refresh the cached signature so a subsequent `#applyActiveToolsByName` with
+		// the same tool set does not re-rebuild on top of the explicit refresh we
+		// just performed (and conversely, a different set forces a fresh rebuild).
+		const activeTools = activeToolNames
+			.map(name => this.#toolRegistry.get(name))
+			.filter((tool): tool is AgentTool => tool != null);
+		this.#lastAppliedToolSignature = this.#computeAppliedToolSignature(activeToolNames, activeTools);
+	}
+	/**
+	 * Compose a stable signature for the inputs that `rebuildSystemPrompt` reads.
+	 * Two calls producing identical signatures are guaranteed to produce identical
+	 * system prompt bytes, so the rebuild can be skipped.
+	 *
+	 * The signature covers:
+	 *   1. Active tool names in order (the prompt renders them in this order).
+	 *   2. Active tool labels, descriptions, and wire-visible names — all are
+	 *      rendered into the prompt body (see `system-prompt.md` `{{label}}: \`{{name}}\``
+	 *      and `toolPromptNames` in `buildSystemPrompt`). The wire name comes from
+	 *      `tool.customWireName` and overrides the internal name on the model wire
+	 *      (e.g. `edit` exposes itself as `apply_patch` to GPT-5 in apply_patch mode);
+	 *      a stale wire name would desync prompt guidance from actual tool routing.
+	 *   3. When MCP discovery is on, every registry tool's name+label+description+
+	 *      customWireName, since `rebuildSystemPrompt` summarizes discoverable MCP
+	 *      tools that are not in the active set.
+	 *   4. MCP server instructions text (per server), since `rebuildSystemPrompt`
+	 *      embeds these in the appended prompt under "## MCP Server Instructions".
+	 *      A server upgrade can change instructions while keeping tools identical.
+	 *
+	 * Settings-driven tool metadata is covered automatically: built-in tools that
+	 * depend on settings expose `description`/`label` via getters (see `TaskTool`,
+	 * `SearchToolBm25Tool`, `EditTool`), and the signature reads them live on every
+	 * call - so a settings flip that mutates the rendered string differs the signature
+	 * the next time `#applyActiveToolsByName` runs. Do not refactor `describeTool` to
+	 * cache per-tool strings without preserving this property.
+	 *
+	 * Inputs NOT covered: tool input schemas; memory instructions read from disk;
+	 * and SDK-init-time closure constants in `sdk.ts` (`repeatToolDescriptions`,
+	 * `eagerTasks`, `intentField`, `mcpDiscoveryEnabled`, `secretsEnabled`). The
+	 * closure-captured ones cannot change at runtime regardless of skip behavior.
+	 * For everything else, callers must explicitly call `refreshBaseSystemPrompt()`
+	 * after side-effecting changes; see e.g. the memory hooks and
+	 * `#syncEditToolModeAfterModelChange`.
+	 *
+	 * The current calendar date IS covered (appended as a segment) because
+	 * `buildSystemPrompt` injects it into the prompt body (`Today is '{{date}}'`).
+	 * Without this, a session spanning midnight with only tool-stable MCP
+	 * reconnects would keep yesterday's date indefinitely.
+	 */
+	#computeAppliedToolSignature(toolNames: string[], tools: AgentTool[]): string {
+		// Order-preserving join: any reorder must produce a different signature so
+		// the rebuild fires and the new tool list reaches the API.
+		const nameSegment = toolNames.join("\u0001");
+		const describeTool = (tool: AgentTool): string =>
+			`${tool.name}=${tool.label ?? ""}|${tool.description ?? ""}|${tool.customWireName ?? ""}`;
+		const descriptionSegment = tools.map(describeTool).join("\u0002");
+		let registrySegment = "";
+		if (this.#mcpDiscoveryEnabled) {
+			// Registry iteration order is not load-bearing for the prompt content, so we
+			// sort to keep the signature insensitive to incidental insertion order.
+			const entries: string[] = [];
+			for (const tool of this.#toolRegistry.values()) {
+				entries.push(describeTool(tool));
+			}
+			entries.sort();
+			registrySegment = entries.join("\u0004");
+		}
+		let instructionsSegment = "";
+		const serverInstructions = this.#getMcpServerInstructions?.();
+		if (serverInstructions && serverInstructions.size > 0) {
+			// Sort by server name so transport flap order does not perturb the signature.
+			const entries: string[] = [];
+			for (const [server, instructions] of serverInstructions) {
+				entries.push(`${server}=${instructions}`);
+			}
+			entries.sort();
+			instructionsSegment = entries.join("\u0006");
+		}
+		const date = new Date().toISOString().slice(0, 10);
+		return `${nameSegment}\u0003${descriptionSegment}\u0005${registrySegment}\u0007${instructionsSegment}|${date}`;
 	}
 	/**
@@ -3423,7 +3527,7 @@ export class AgentSession {
 		this.abortCompaction();
 		this.abortHandoff();
 		this.abortBash();
-		this.abortPython();
+		this.abortEval();
 		const postPromptDrain = this.#cancelPostPromptTasks();
 		this.agent.abort();
 		await postPromptDrain;
@@ -4248,9 +4352,10 @@ export class AgentSession {
 			}
 			// Start a new session
+			const previousSessionFile = this.sessionFile;
 			await this.sessionManager.flush();
 			this.#asyncJobManager?.cancelAll();
-			await this.sessionManager.newSession();
+			await this.sessionManager.newSession(previousSessionFile ? { parentSession: previousSessionFile } : undefined);
 			this.agent.reset();
 			this.agent.sessionId = this.sessionManager.getSessionId();
 			this.#steeringMessages = [];
@@ -4262,6 +4367,7 @@ export class AgentSession {
 			// Inject the handoff document as a custom message
 			const handoffContent = `<handoff-context>\n${handoffText}\n</handoff-context>\n\nThe above is a handoff document from a previous session. Use this context to continue the work seamlessly.`;
 			this.sessionManager.appendCustomMessageEntry("handoff", handoffContent, true, undefined, "agent");
+			await this.sessionManager.ensureOnDisk();
 			let savedPath: string | undefined;
 			if (options?.autoTriggered && this.settings.get("compaction.handoffSaveToDisk")) {
 				const artifactsDir = this.sessionManager.getArtifactsDir();
@@ -5895,7 +6001,7 @@ export class AgentSession {
 	/**
 	 * Execute Python code in the shared kernel.
-	 * Uses the same kernel session as the agent's Python tool, allowing collaborative editing.
+	 * Uses the same kernel session as eval's Python backend, allowing collaborative editing.
 	 * @param code The Python code to execute
 	 * @param onChunk Optional streaming callback for output
 	 * @param options.excludeFromContext If true, execution won't be sent to LLM ($$ prefix)
@@ -5907,7 +6013,7 @@ export class AgentSession {
 	): Promise<PythonResult> {
 		const excludeFromContext = options?.excludeFromContext === true;
 		const cwd = this.sessionManager.getCwd();
-		this.assertPythonExecutionAllowed();
+		this.assertEvalExecutionAllowed();
 		const abortController = new AbortController();
 		const execution = (async (): Promise<PythonResult> => {
@@ -5918,20 +6024,20 @@ export class AgentSession {
 					excludeFromContext,
 					cwd,
 				});
-				this.assertPythonExecutionAllowed();
+				this.assertEvalExecutionAllowed();
 				if (hookResult?.result) {
 					this.recordPythonResult(code, hookResult.result, options);
 					return hookResult.result;
 				}
 			}
-			// Use the same session ID as the Python tool for kernel sharing
+			// Use the same session ID as eval's Python backend for kernel sharing
 			const sessionFile = this.sessionManager.getSessionFile();
 			const sessionId = sessionFile ? `session:${sessionFile}:cwd:${cwd}` : `cwd:${cwd}`;
 			const result = await executePythonCommand(code, {
 				cwd,
 				sessionId,
-				kernelOwnerId: this.#pythonKernelOwnerId,
+				kernelOwnerId: this.#evalKernelOwnerId,
 				kernelMode: this.settings.get("python.kernelMode"),
 				useSharedGateway: this.settings.get("python.sharedGateway"),
 				onChunk,
@@ -5940,11 +6046,11 @@ export class AgentSession {
 			this.recordPythonResult(code, result, options);
 			return result;
 		})();
-		return await this.trackPythonExecution(execution, abortController);
+		return await this.trackEvalExecution(execution, abortController);
 	}
-	assertPythonExecutionAllowed(): void {
-		if (this.#pythonExecutionDisposing) {
+	assertEvalExecutionAllowed(): void {
+		if (this.#evalExecutionDisposing) {
 			throw new Error("Python execution is unavailable while session disposal is in progress");
 		}
 	}
@@ -5952,17 +6058,17 @@ export class AgentSession {
 	/**
 	 * Track Python work started outside AgentSession.executePython so dispose can await and abort it too.
 	 */
-	trackPythonExecution<T>(execution: Promise<T>, abortController: AbortController): Promise<T> {
-		this.#pythonAbortControllers.add(abortController);
-		this.#activePythonExecutions.add(execution);
+	trackEvalExecution<T>(execution: Promise<T>, abortController: AbortController): Promise<T> {
+		this.#evalAbortControllers.add(abortController);
+		this.#activeEvalExecutions.add(execution);
 		void execution.then(
 			() => {
-				this.#pythonAbortControllers.delete(abortController);
-				this.#activePythonExecutions.delete(execution);
+				this.#evalAbortControllers.delete(abortController);
+				this.#activeEvalExecutions.delete(execution);
 			},
 			() => {
-				this.#pythonAbortControllers.delete(abortController);
-				this.#activePythonExecutions.delete(execution);
+				this.#evalAbortControllers.delete(abortController);
+				this.#activeEvalExecutions.delete(execution);
 			},
 		);
 		return execution;
@@ -5997,35 +6103,35 @@ export class AgentSession {
 	/**
 	 * Cancel running Python execution.
 	 */
-	abortPython(): void {
-		for (const abortController of this.#pythonAbortControllers) {
+	abortEval(): void {
+		for (const abortController of this.#evalAbortControllers) {
 			abortController.abort();
 		}
 	}
-	async #waitForPythonExecutionsToSettle(timeoutMs: number): Promise<boolean> {
+	async #waitForEvalExecutionsToSettle(timeoutMs: number): Promise<boolean> {
 		const deadline = Date.now() + timeoutMs;
-		while (this.#activePythonExecutions.size > 0) {
+		while (this.#activeEvalExecutions.size > 0) {
 			const remainingMs = deadline - Date.now();
 			if (remainingMs <= 0) {
 				return false;
 			}
 			const settled = await Promise.race([
-				Promise.allSettled(Array.from(this.#activePythonExecutions)).then(() => true),
+				Promise.allSettled(Array.from(this.#activeEvalExecutions)).then(() => true),
 				Bun.sleep(remainingMs).then(() => false),
 			]);
-			if (!settled && this.#activePythonExecutions.size > 0) {
+			if (!settled && this.#activeEvalExecutions.size > 0) {
 				return false;
 			}
 		}
 		return true;
 	}
-	async #preparePythonExecutionsForDispose(): Promise<boolean> {
-		if (!(await this.#waitForPythonExecutionsToSettle(3_000))) {
+	async #prepareEvalExecutionsForDispose(): Promise<boolean> {
+		if (!(await this.#waitForEvalExecutionsToSettle(3_000))) {
 			logger.warn("Aborting active Python execution during dispose before retained kernel cleanup");
-			this.abortPython();
-			if (!(await this.#waitForPythonExecutionsToSettle(1_000))) {
+			this.abortEval();
+			if (!(await this.#waitForEvalExecutionsToSettle(1_000))) {
 				logger.warn(
 					"Python execution is still active after dispose aborted all active runs; retained kernel ownership will still be detached",
 				);
@@ -6036,8 +6142,8 @@ export class AgentSession {
 	}
 	/** Whether a Python execution is currently running */
-	get isPythonRunning(): boolean {
-		return this.#pythonAbortControllers.size > 0;
+	get isEvalRunning(): boolean {
+		return this.#evalAbortControllers.size > 0;
 	}
 	/** Whether there are pending Python messages waiting to be flushed */

package/src/session/messages.ts CHANGED Viewed

@@ -59,7 +59,7 @@ export interface BashExecutionMessage {
 /**
  * Message type for user-initiated Python executions via the $ command.
- * Shares the same kernel session as the agent's Python tool.
+ * Shares the same kernel session as eval's Python backend.
  */
 export interface PythonExecutionMessage {
 	role: "pythonExecution";