@oh-my-pi/pi-coding-agent 14.5.12 → 14.5.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. package/CHANGELOG.md +45 -0
  2. package/package.json +18 -10
  3. package/src/cli/jupyter-cli.ts +1 -1
  4. package/src/commit/pipeline.ts +4 -3
  5. package/src/config/model-equivalence.ts +49 -16
  6. package/src/config/model-registry.ts +100 -25
  7. package/src/config/model-resolver.ts +29 -15
  8. package/src/config/settings-schema.ts +20 -6
  9. package/src/config/settings.ts +9 -8
  10. package/src/config.ts +18 -6
  11. package/src/eval/backend.ts +43 -0
  12. package/src/eval/eval.lark +43 -0
  13. package/src/eval/index.ts +5 -0
  14. package/src/eval/js/context-manager.ts +717 -0
  15. package/src/eval/js/executor.ts +131 -0
  16. package/src/eval/js/index.ts +46 -0
  17. package/src/eval/js/prelude.ts +2 -0
  18. package/src/eval/js/prelude.txt +84 -0
  19. package/src/eval/js/tool-bridge.ts +124 -0
  20. package/src/eval/parse.ts +337 -0
  21. package/src/{ipy → eval/py}/executor.ts +2 -180
  22. package/src/{ipy → eval/py}/gateway-coordinator.ts +2 -2
  23. package/src/eval/py/index.ts +58 -0
  24. package/src/{ipy → eval/py}/kernel.ts +9 -45
  25. package/src/{ipy → eval/py}/prelude.py +39 -227
  26. package/src/eval/types.ts +48 -0
  27. package/src/export/html/template.generated.ts +1 -1
  28. package/src/export/html/template.js +8 -10
  29. package/src/extensibility/extensions/types.ts +2 -3
  30. package/src/internal-urls/docs-index.generated.ts +5 -5
  31. package/src/lsp/client.ts +9 -0
  32. package/src/lsp/index.ts +395 -0
  33. package/src/lsp/types.ts +15 -4
  34. package/src/main.ts +35 -14
  35. package/src/mcp/manager.ts +22 -0
  36. package/src/mcp/oauth-flow.ts +1 -1
  37. package/src/memories/index.ts +1 -1
  38. package/src/modes/acp/acp-event-mapper.ts +1 -1
  39. package/src/modes/components/{python-execution.ts → eval-execution.ts} +11 -4
  40. package/src/modes/components/login-dialog.ts +1 -1
  41. package/src/modes/components/oauth-selector.ts +2 -1
  42. package/src/modes/components/tool-execution.ts +3 -4
  43. package/src/modes/controllers/command-controller.ts +28 -8
  44. package/src/modes/controllers/input-controller.ts +4 -4
  45. package/src/modes/controllers/selector-controller.ts +2 -1
  46. package/src/modes/interactive-mode.ts +4 -5
  47. package/src/modes/rpc/rpc-client.ts +9 -0
  48. package/src/modes/rpc/rpc-mode.ts +6 -0
  49. package/src/modes/rpc/rpc-types.ts +9 -0
  50. package/src/modes/types.ts +3 -3
  51. package/src/modes/utils/ui-helpers.ts +2 -2
  52. package/src/prompts/system/system-prompt.md +3 -3
  53. package/src/prompts/tools/eval.md +92 -0
  54. package/src/prompts/tools/lsp.md +7 -3
  55. package/src/sdk.ts +64 -35
  56. package/src/session/agent-session.ts +152 -46
  57. package/src/session/messages.ts +1 -1
  58. package/src/slash-commands/builtin-registry.ts +1 -1
  59. package/src/system-prompt.ts +34 -66
  60. package/src/task/agents.ts +4 -5
  61. package/src/task/executor.ts +5 -9
  62. package/src/tools/archive-reader.ts +9 -3
  63. package/src/tools/browser/launch.ts +22 -0
  64. package/src/tools/browser/readable.ts +11 -6
  65. package/src/tools/browser/registry.ts +25 -244
  66. package/src/tools/browser/render.ts +1 -1
  67. package/src/tools/browser/tab-protocol.ts +101 -0
  68. package/src/tools/browser/tab-supervisor.ts +429 -0
  69. package/src/tools/browser/tab-worker-entry.ts +21 -0
  70. package/src/tools/browser/tab-worker.ts +1006 -0
  71. package/src/tools/browser.ts +17 -32
  72. package/src/tools/checkpoint.ts +2 -2
  73. package/src/tools/{python.ts → eval.ts} +324 -315
  74. package/src/tools/exit-plan-mode.ts +1 -1
  75. package/src/tools/image-gen.ts +2 -2
  76. package/src/tools/index.ts +62 -100
  77. package/src/tools/read.ts +0 -6
  78. package/src/tools/recipe/runners/pkg.ts +34 -32
  79. package/src/tools/renderers.ts +2 -2
  80. package/src/tools/resolve.ts +7 -2
  81. package/src/tools/todo-write.ts +0 -1
  82. package/src/tools/tool-timeouts.ts +2 -2
  83. package/src/tools/write.ts +8 -1
  84. package/src/utils/markit.ts +15 -7
  85. package/src/utils/tools-manager.ts +5 -5
  86. package/src/web/scrapers/crossref.ts +3 -3
  87. package/src/web/scrapers/devto.ts +1 -1
  88. package/src/web/scrapers/discourse.ts +5 -5
  89. package/src/web/scrapers/firefox-addons.ts +1 -1
  90. package/src/web/scrapers/flathub.ts +2 -2
  91. package/src/web/scrapers/gitlab.ts +1 -1
  92. package/src/web/scrapers/go-pkg.ts +2 -2
  93. package/src/web/scrapers/jetbrains-marketplace.ts +1 -1
  94. package/src/web/scrapers/mastodon.ts +9 -9
  95. package/src/web/scrapers/mdn.ts +11 -7
  96. package/src/web/scrapers/pub-dev.ts +1 -1
  97. package/src/web/scrapers/rawg.ts +3 -3
  98. package/src/web/scrapers/readthedocs.ts +1 -1
  99. package/src/web/scrapers/spdx.ts +1 -1
  100. package/src/web/scrapers/stackoverflow.ts +2 -2
  101. package/src/web/scrapers/types.ts +53 -39
  102. package/src/web/scrapers/w3c.ts +1 -1
  103. package/src/web/search/index.ts +5 -5
  104. package/src/web/search/provider.ts +121 -39
  105. package/src/web/search/providers/gemini.ts +4 -4
  106. package/src/web/search/render.ts +2 -2
  107. package/src/ipy/modules.ts +0 -144
  108. package/src/prompts/tools/python.md +0 -57
  109. package/src/tools/browser/vm.ts +0 -792
  110. /package/src/{ipy → eval/py}/cancellation.ts +0 -0
  111. /package/src/{ipy → eval/py}/prelude.ts +0 -0
  112. /package/src/{ipy → eval/py}/runtime.ts +0 -0
package/src/sdk.ts CHANGED
@@ -36,6 +36,7 @@ import { CursorExecHandlers } from "./cursor";
36
36
  import "./discovery";
37
37
  import { resolveConfigValue } from "./config/resolve-config-value";
38
38
  import { initializeWithSettings } from "./discovery";
39
+ import { disposeAllKernelSessions, disposeKernelSessionsByOwner } from "./eval/py/executor";
39
40
  import { TtsrManager } from "./export/ttsr";
40
41
  import {
41
42
  type CustomCommandsLoadResult,
@@ -73,7 +74,6 @@ import {
73
74
  RuleProtocolHandler,
74
75
  SkillProtocolHandler,
75
76
  } from "./internal-urls";
76
- import { disposeAllKernelSessions, disposeKernelSessionsByOwner } from "./ipy/executor";
77
77
  import { LSP_STARTUP_EVENT_CHANNEL, type LspStartupEvent } from "./lsp/startup-events";
78
78
  import { discoverAndLoadMCPTools, type MCPManager, type MCPToolsLoadResult } from "./mcp";
79
79
  import {
@@ -99,6 +99,8 @@ import { SessionManager } from "./session/session-manager";
99
99
  import { closeAllConnections } from "./ssh/connection-manager";
100
100
  import { unmountAll } from "./ssh/sshfs-mount";
101
101
  import {
102
+ type AgentsMdSearch,
103
+ buildAgentsMdSearch,
102
104
  buildSystemPrompt as buildSystemPromptInternal,
103
105
  buildSystemPromptToolMetadata,
104
106
  loadProjectContextFiles as loadContextFilesInternal,
@@ -111,13 +113,13 @@ import {
111
113
  createTools,
112
114
  discoverStartupLspServers,
113
115
  EditTool,
116
+ EvalTool,
114
117
  FindTool,
115
118
  getSearchTools,
116
119
  HIDDEN_TOOLS,
117
120
  isSearchProviderPreference,
118
121
  type LspStartupServerInfo,
119
122
  loadSshTool,
120
- PythonTool,
121
123
  ReadTool,
122
124
  ResolveTool,
123
125
  renderSearchToolBm25Description,
@@ -204,9 +206,6 @@ export interface CreateAgentSessionOptions {
204
206
  enableLsp?: boolean;
205
207
  /** Skip Python kernel availability check and prelude warmup */
206
208
  skipPythonPreflight?: boolean;
207
- /** Force Python prelude warmup even when test env would normally skip it */
208
- forcePythonWarmup?: boolean;
209
-
210
209
  /** Tool names explicitly requested (enables disabled-by-default tools) */
211
210
  toolNames?: string[];
212
211
 
@@ -275,10 +274,10 @@ export {
275
274
  BUILTIN_TOOLS,
276
275
  createTools,
277
276
  EditTool,
277
+ EvalTool,
278
278
  FindTool,
279
279
  HIDDEN_TOOLS,
280
280
  loadSshTool,
281
- PythonTool,
282
281
  ReadTool,
283
282
  ResolveTool,
284
283
  SearchTool,
@@ -301,7 +300,6 @@ function getDefaultAgentDir(): string {
301
300
  */
302
301
  export async function discoverAuthStorage(agentDir: string = getDefaultAgentDir()): Promise<AuthStorage> {
303
302
  const dbPath = getAgentDbPath(agentDir);
304
- logger.debug("discoverAuthStorage", { agentDir, dbPath });
305
303
 
306
304
  const storage = await AuthStorage.create(dbPath, { configValueResolver: resolveConfigValue });
307
305
  await storage.reload();
@@ -430,6 +428,9 @@ function isCustomTool(tool: CustomTool | ToolDefinition): tool is CustomTool {
430
428
 
431
429
  const TOOL_DEFINITION_MARKER = Symbol("__isToolDefinition");
432
430
 
431
+ /** Matches the truncation applied to per-server instructions inside `rebuildSystemPrompt`. */
432
+ const MAX_MCP_INSTRUCTIONS_LENGTH = 4000;
433
+
433
434
  let sshCleanupRegistered = false;
434
435
 
435
436
  async function cleanupSshResources(): Promise<void> {
@@ -667,17 +668,40 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
667
668
  const modelRegistry = options.modelRegistry ?? new ModelRegistry(authStorage);
668
669
 
669
670
  const settings = options.settings ?? (await logger.time("settings", Settings.init, { cwd, agentDir }));
670
- logger.time("initializeWithSettings");
671
- initializeWithSettings(settings);
671
+ logger.time("initializeWithSettings", initializeWithSettings, settings);
672
672
  if (!options.modelRegistry) {
673
673
  modelRegistry.refreshInBackground();
674
674
  }
675
+ // Kick off AGENTS.md filesystem search in parallel — it is the slowest piece of buildSystemPrompt
676
+ // (~200ms on large repos) and only needs `cwd`, so it can overlap with everything that follows.
677
+ const agentsMdSearchPromise: Promise<AgentsMdSearch> = logger.time("buildAgentsMdSearch", buildAgentsMdSearch, cwd);
678
+ agentsMdSearchPromise.catch(() => {});
679
+
680
+ // Independent discoveries that depend only on cwd/agentDir — kicked off in parallel and awaited
681
+ // at their respective consumer sites. Their work can overlap with model resolution, secret loading,
682
+ // session-context build, tool creation, MCP discovery, and extension discovery.
683
+ const contextFilesPromise = options.contextFiles
684
+ ? Promise.resolve(options.contextFiles)
685
+ : logger.time("discoverContextFiles", discoverContextFiles, cwd, agentDir);
686
+ contextFilesPromise.catch(() => {});
687
+ const promptTemplatesPromise = options.promptTemplates
688
+ ? Promise.resolve(options.promptTemplates)
689
+ : logger.time("discoverPromptTemplates", discoverPromptTemplates, cwd, agentDir);
690
+ promptTemplatesPromise.catch(() => {});
691
+ const slashCommandsPromise = options.slashCommands
692
+ ? Promise.resolve(options.slashCommands)
693
+ : logger.time("discoverSlashCommands", discoverSlashCommands, cwd);
694
+ slashCommandsPromise.catch(() => {});
675
695
  const skillsSettings = settings.getGroup("skills");
676
696
  const disabledExtensionIds = settings.get("disabledExtensions") ?? [];
677
697
  const discoveredSkillsPromise =
678
698
  options.skills === undefined
679
- ? discoverSkills(cwd, agentDir, { ...skillsSettings, disabledExtensions: disabledExtensionIds })
699
+ ? logger.time("discoverSkills", discoverSkills, cwd, agentDir, {
700
+ ...skillsSettings,
701
+ disabledExtensions: disabledExtensionIds,
702
+ })
680
703
  : undefined;
704
+ discoveredSkillsPromise?.catch(() => {});
681
705
 
682
706
  // Initialize provider preferences from settings
683
707
  const webSearchProvider = settings.get("providers.webSearch");
@@ -814,10 +838,7 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
814
838
  skills = options.skills;
815
839
  skillWarnings = [];
816
840
  } else {
817
- const discovered = await logger.time(
818
- "discoverSkills",
819
- () => discoveredSkillsPromise ?? Promise.resolve({ skills: [], warnings: [] }),
820
- );
841
+ const discovered = await (discoveredSkillsPromise ?? Promise.resolve({ skills: [], warnings: [] }));
821
842
  skills = discovered.skills;
822
843
  skillWarnings = discovered.warnings;
823
844
  }
@@ -851,10 +872,7 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
851
872
  return { ttsrManager, rulebookRules, alwaysApplyRules };
852
873
  });
853
874
 
854
- const contextFiles = await logger.time(
855
- "discoverContextFiles",
856
- async () => options.contextFiles ?? (await discoverContextFiles(cwd, agentDir)),
857
- );
875
+ const contextFiles = await contextFilesPromise;
858
876
 
859
877
  let agent: Agent;
860
878
  let session!: AgentSession;
@@ -917,7 +935,7 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
917
935
  const resolvedAgentId = options.agentId ?? options.parentTaskPrefix ?? MAIN_AGENT_ID;
918
936
  const resolvedAgentDisplayName =
919
937
  options.agentDisplayName ?? ((options.taskDepth ?? 0) > 0 || options.parentTaskPrefix ? "sub" : "main");
920
- const pythonKernelOwnerId = `agent-session:${Snowflake.next()}`;
938
+ const evalKernelOwnerId = `agent-session:${Snowflake.next()}`;
921
939
 
922
940
  try {
923
941
  const getActiveModelString = (): string | undefined => {
@@ -937,7 +955,6 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
937
955
  return !requestedToolNames || requestedToolNames.includes("edit");
938
956
  },
939
957
  skipPythonPreflight: options.skipPythonPreflight,
940
- forcePythonWarmup: options.forcePythonWarmup,
941
958
  contextFiles,
942
959
  skills,
943
960
  eventBus,
@@ -945,12 +962,13 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
945
962
  requireYieldTool: options.requireYieldTool,
946
963
  taskDepth: options.taskDepth ?? 0,
947
964
  getSessionFile: () => sessionManager.getSessionFile() ?? null,
948
- getPythonKernelOwnerId: () => pythonKernelOwnerId,
949
- assertPythonExecutionAllowed: () => session?.assertPythonExecutionAllowed(),
950
- trackPythonExecution: (execution, abortController) =>
951
- session ? session.trackPythonExecution(execution, abortController) : execution,
965
+ getEvalKernelOwnerId: () => evalKernelOwnerId,
966
+ assertEvalExecutionAllowed: () => session?.assertEvalExecutionAllowed(),
967
+ trackEvalExecution: (execution, abortController) =>
968
+ session ? session.trackEvalExecution(execution, abortController) : execution,
952
969
  getSessionId: () => sessionManager.getSessionId?.() ?? null,
953
970
  getAgentId: () => resolvedAgentId,
971
+ getToolByName: name => session?.getToolByName(name),
954
972
  agentRegistry,
955
973
  getSessionSpawns: () => options.spawns ?? "*",
956
974
  getModelString: () => (hasExplicitModel && model ? formatModelString(model) : undefined),
@@ -1322,7 +1340,6 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
1322
1340
  const serverInstructions = mcpManager?.getServerInstructions();
1323
1341
  let appendPrompt: string | undefined = memoryInstructions ?? undefined;
1324
1342
  if (serverInstructions && serverInstructions.size > 0) {
1325
- const MAX_INSTRUCTIONS_LENGTH = 4000;
1326
1343
  const parts: string[] = [];
1327
1344
  if (appendPrompt) parts.push(appendPrompt);
1328
1345
  parts.push(
@@ -1330,8 +1347,8 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
1330
1347
  );
1331
1348
  for (const [srvName, srvInstructions] of serverInstructions) {
1332
1349
  const truncated =
1333
- srvInstructions.length > MAX_INSTRUCTIONS_LENGTH
1334
- ? `${srvInstructions.slice(0, MAX_INSTRUCTIONS_LENGTH)}\n[truncated]`
1350
+ srvInstructions.length > MAX_MCP_INSTRUCTIONS_LENGTH
1351
+ ? `${srvInstructions.slice(0, MAX_MCP_INSTRUCTIONS_LENGTH)}\n[truncated]`
1335
1352
  : srvInstructions;
1336
1353
  parts.push(`### ${srvName}\n${truncated}`);
1337
1354
  }
@@ -1353,6 +1370,7 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
1353
1370
  mcpDiscoveryServerSummaries: discoverableMCPSummary.servers.map(formatDiscoverableMCPToolServerSummary),
1354
1371
  eagerTasks,
1355
1372
  secretsEnabled,
1373
+ agentsMdSearch: agentsMdSearchPromise,
1356
1374
  });
1357
1375
 
1358
1376
  if (options.systemPrompt === undefined) {
@@ -1376,6 +1394,7 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
1376
1394
  mcpDiscoveryServerSummaries: discoverableMCPSummary.servers.map(formatDiscoverableMCPToolServerSummary),
1377
1395
  eagerTasks,
1378
1396
  secretsEnabled,
1397
+ agentsMdSearch: agentsMdSearchPromise,
1379
1398
  });
1380
1399
  }
1381
1400
  return options.systemPrompt(defaultPrompt);
@@ -1446,13 +1465,10 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
1446
1465
 
1447
1466
  const systemPrompt = await logger.time("buildSystemPrompt", rebuildSystemPrompt, initialToolNames, toolRegistry);
1448
1467
 
1449
- const promptTemplates =
1450
- options.promptTemplates ??
1451
- (await logger.time("discoverPromptTemplates", discoverPromptTemplates, cwd, agentDir));
1468
+ const promptTemplates = await promptTemplatesPromise;
1452
1469
  toolSession.promptTemplates = promptTemplates;
1453
1470
 
1454
- const slashCommands =
1455
- options.slashCommands ?? (await logger.time("discoverSlashCommands", discoverSlashCommands, cwd));
1471
+ const slashCommands = await slashCommandsPromise;
1456
1472
 
1457
1473
  // Create convertToLlm wrapper that filters images if blockImages is enabled (defense-in-depth)
1458
1474
  const convertToLlmWithBlockImages = (messages: AgentMessage[]): Message[] => {
@@ -1596,7 +1612,7 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
1596
1612
  thinkingLevel,
1597
1613
  sessionManager,
1598
1614
  settings,
1599
- pythonKernelOwnerId,
1615
+ evalKernelOwnerId,
1600
1616
  scopedModels: options.scopedModels,
1601
1617
  promptTemplates,
1602
1618
  slashCommands,
@@ -1612,6 +1628,20 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
1612
1628
  onResponse,
1613
1629
  convertToLlm: convertToLlmFinal,
1614
1630
  rebuildSystemPrompt,
1631
+ getMcpServerInstructions: mcpManager
1632
+ ? () => {
1633
+ const raw = mcpManager.getServerInstructions();
1634
+ if (!raw || raw.size === 0) return raw;
1635
+ const out = new Map<string, string>();
1636
+ for (const [name, text] of raw) {
1637
+ out.set(
1638
+ name,
1639
+ text.length > MAX_MCP_INSTRUCTIONS_LENGTH ? text.slice(0, MAX_MCP_INSTRUCTIONS_LENGTH) : text,
1640
+ );
1641
+ }
1642
+ return out;
1643
+ }
1644
+ : undefined,
1615
1645
  mcpDiscoveryEnabled,
1616
1646
  initialSelectedMCPToolNames,
1617
1647
  defaultSelectedMCPToolNames,
@@ -1765,7 +1795,6 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
1765
1795
  });
1766
1796
  }
1767
1797
 
1768
- logger.time("createAgentSession:return");
1769
1798
  return {
1770
1799
  session,
1771
1800
  extensionsResult,
@@ -1780,7 +1809,7 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
1780
1809
  if (hasSession) {
1781
1810
  await session.dispose();
1782
1811
  } else {
1783
- await disposeKernelSessionsByOwner(pythonKernelOwnerId);
1812
+ await disposeKernelSessionsByOwner(evalKernelOwnerId);
1784
1813
  }
1785
1814
  } catch (cleanupError) {
1786
1815
  logger.warn("Failed to clean up createAgentSession resources after startup error", {
@@ -68,6 +68,11 @@ import {
68
68
  import { expandPromptTemplate, type PromptTemplate } from "../config/prompt-templates";
69
69
  import type { Settings, SkillsSettings } from "../config/settings";
70
70
  import { normalizeDiff, normalizeToLF, ParseError, previewPatch, stripBom } from "../edit";
71
+ import {
72
+ disposeKernelSessionsByOwner,
73
+ executePython as executePythonCommand,
74
+ type PythonResult,
75
+ } from "../eval/py/executor";
71
76
  import { type BashResult, executeBash as executeBashCommand } from "../exec/bash-executor";
72
77
  import { exportSessionToHtml } from "../export/html";
73
78
  import type { TtsrManager, TtsrMatchContext } from "../export/ttsr";
@@ -98,11 +103,6 @@ import type { HookCommandContext } from "../extensibility/hooks/types";
98
103
  import type { Skill, SkillWarning } from "../extensibility/skills";
99
104
  import { expandSlashCommand, type FileSlashCommand } from "../extensibility/slash-commands";
100
105
  import { type LocalProtocolOptions, resolveLocalUrlToPath } from "../internal-urls";
101
- import {
102
- disposeKernelSessionsByOwner,
103
- executePython as executePythonCommand,
104
- type PythonResult,
105
- } from "../ipy/executor";
106
106
  import {
107
107
  buildDiscoverableMCPSearchIndex,
108
108
  collectDiscoverableMCPTools,
@@ -244,6 +244,13 @@ export interface AgentSessionConfig {
244
244
  convertToLlm?: (messages: AgentMessage[]) => Message[] | Promise<Message[]>;
245
245
  /** System prompt builder that can consider tool availability */
246
246
  rebuildSystemPrompt?: (toolNames: string[], tools: Map<string, AgentTool>) => Promise<string>;
247
+ /**
248
+ * Optional accessor for live MCP server instructions. Read by the session's
249
+ * `rebuildSystemPrompt`-skip optimization to detect server-side instruction
250
+ * changes (e.g. an MCP server upgrade) that would otherwise pass the tool-set
251
+ * signature comparison and silently keep a stale prompt cached.
252
+ */
253
+ getMcpServerInstructions?: () => Map<string, string> | undefined;
247
254
  /** Enable hidden-by-default MCP tool discovery for this session. */
248
255
  mcpDiscoveryEnabled?: boolean;
249
256
  /** MCP tool names to activate for the current session when discovery mode is enabled. */
@@ -259,7 +266,7 @@ export interface AgentSessionConfig {
259
266
  /** Secret obfuscator for deobfuscating streaming edit content */
260
267
  obfuscator?: SecretObfuscator;
261
268
  /** Logical owner for retained Python kernels created by this session. */
262
- pythonKernelOwnerId?: string;
269
+ evalKernelOwnerId?: string;
263
270
  /** Agent identity (registry id like "0-Main" or "3-Alice") used for IRC routing. */
264
271
  agentId?: string;
265
272
  /** Shared agent registry (for forwarding IRC observations to the main session UI). */
@@ -474,11 +481,11 @@ export class AgentSession {
474
481
  #pendingBashMessages: BashExecutionMessage[] = [];
475
482
 
476
483
  // Python execution state
477
- #pythonAbortControllers = new Set<AbortController>();
478
- #pythonKernelOwnerId: string;
484
+ #evalAbortControllers = new Set<AbortController>();
485
+ #evalKernelOwnerId: string;
479
486
  #pendingPythonMessages: PythonExecutionMessage[] = [];
480
- #activePythonExecutions = new Set<Promise<unknown>>();
481
- #pythonExecutionDisposing = false;
487
+ #activeEvalExecutions = new Set<Promise<unknown>>();
488
+ #evalExecutionDisposing = false;
482
489
 
483
490
  // Background-channel IRC exchanges queued while the recipient was streaming.
484
491
  // Drained into history (via emitExternalEvent) once the recipient becomes idle.
@@ -511,7 +518,15 @@ export class AgentSession {
511
518
  #onResponse: SimpleStreamOptions["onResponse"] | undefined;
512
519
  #convertToLlm: (messages: AgentMessage[]) => Message[] | Promise<Message[]>;
513
520
  #rebuildSystemPrompt: ((toolNames: string[], tools: Map<string, AgentTool>) => Promise<string>) | undefined;
521
+ #getMcpServerInstructions: (() => Map<string, string> | undefined) | undefined;
514
522
  #baseSystemPrompt: string;
523
+ /**
524
+ * Signature of the (toolNames, tool descriptions) tuple passed to the most
525
+ * recent successful `rebuildSystemPrompt` call. Used to skip redundant rebuilds
526
+ * when MCP servers reconnect without changing their tool definitions, which is
527
+ * the dominant cause of prompt-cache invalidation in long sessions.
528
+ */
529
+ #lastAppliedToolSignature: string | undefined;
515
530
  #mcpDiscoveryEnabled = false;
516
531
  #discoverableMCPTools = new Map<string, DiscoverableMCPTool>();
517
532
  #discoverableMCPSearchIndex: DiscoverableMCPSearchIndex | null = null;
@@ -577,7 +592,7 @@ export class AgentSession {
577
592
  this.settings = config.settings;
578
593
  this.#startPowerAssertion();
579
594
  this.#asyncJobManager = config.asyncJobManager;
580
- this.#pythonKernelOwnerId = config.pythonKernelOwnerId ?? `agent-session:${Snowflake.next()}`;
595
+ this.#evalKernelOwnerId = config.evalKernelOwnerId ?? `agent-session:${Snowflake.next()}`;
581
596
  this.#scopedModels = config.scopedModels ?? [];
582
597
  this.#thinkingLevel = config.thinkingLevel;
583
598
  this.#promptTemplates = config.promptTemplates ?? [];
@@ -595,6 +610,7 @@ export class AgentSession {
595
610
  this.#onResponse = config.onResponse;
596
611
  this.#convertToLlm = config.convertToLlm ?? convertToLlm;
597
612
  this.#rebuildSystemPrompt = config.rebuildSystemPrompt;
613
+ this.#getMcpServerInstructions = config.getMcpServerInstructions;
598
614
  this.#baseSystemPrompt = this.agent.state.systemPrompt;
599
615
  this.#mcpDiscoveryEnabled = config.mcpDiscoveryEnabled ?? false;
600
616
  this.#setDiscoverableMCPTools(this.#collectDiscoverableMCPToolsFromRegistry());
@@ -1938,7 +1954,7 @@ export class AgentSession {
1938
1954
  * Call this when completely done with the session.
1939
1955
  */
1940
1956
  async dispose(): Promise<void> {
1941
- this.#pythonExecutionDisposing = true;
1957
+ this.#evalExecutionDisposing = true;
1942
1958
  try {
1943
1959
  if (this.#extensionRunner?.hasHandlers("session_shutdown")) {
1944
1960
  await this.#extensionRunner.emit({ type: "session_shutdown" });
@@ -1953,13 +1969,13 @@ export class AgentSession {
1953
1969
  if (drained === false && deliveryState) {
1954
1970
  logger.warn("Async job completion deliveries still pending during dispose", { ...deliveryState });
1955
1971
  }
1956
- const pythonExecutionsSettled = await this.#preparePythonExecutionsForDispose();
1972
+ const pythonExecutionsSettled = await this.#prepareEvalExecutionsForDispose();
1957
1973
  if (!pythonExecutionsSettled) {
1958
1974
  logger.warn(
1959
1975
  "Detaching retained Python kernel ownership during dispose while Python execution is still active",
1960
1976
  );
1961
1977
  }
1962
- await disposeKernelSessionsByOwner(this.#pythonKernelOwnerId);
1978
+ await disposeKernelSessionsByOwner(this.#evalKernelOwnerId);
1963
1979
  this.#stopPowerAssertion();
1964
1980
  await this.sessionManager.close();
1965
1981
  this.#closeAllProviderSessions("dispose");
@@ -2211,10 +2227,18 @@ export class AgentSession {
2211
2227
  }
2212
2228
  this.agent.setTools(tools);
2213
2229
 
2214
- // Rebuild base system prompt with new tool set
2230
+ // Rebuild base system prompt with new tool set, but only when the tool set
2231
+ // actually changed. MCP servers can reconnect at arbitrary times and call
2232
+ // `refreshMCPTools` -> `#applyActiveToolsByName` even though the resulting
2233
+ // tool list is byte-identical. Skipping the rebuild keeps the system prompt
2234
+ // stable, which is required for Anthropic prompt caching to keep hitting.
2215
2235
  if (this.#rebuildSystemPrompt) {
2216
- this.#baseSystemPrompt = await this.#rebuildSystemPrompt(validToolNames, this.#toolRegistry);
2217
- this.agent.setSystemPrompt(this.#baseSystemPrompt);
2236
+ const signature = this.#computeAppliedToolSignature(validToolNames, tools);
2237
+ if (signature !== this.#lastAppliedToolSignature) {
2238
+ this.#baseSystemPrompt = await this.#rebuildSystemPrompt(validToolNames, this.#toolRegistry);
2239
+ this.agent.setSystemPrompt(this.#baseSystemPrompt);
2240
+ this.#lastAppliedToolSignature = signature;
2241
+ }
2218
2242
  }
2219
2243
  if (options?.persistMCPSelection !== false) {
2220
2244
  this.#persistSelectedMCPToolNamesIfChanged(previousSelectedMCPToolNames);
@@ -2256,6 +2280,86 @@ export class AgentSession {
2256
2280
  const activeToolNames = this.getActiveToolNames();
2257
2281
  this.#baseSystemPrompt = await this.#rebuildSystemPrompt(activeToolNames, this.#toolRegistry);
2258
2282
  this.agent.setSystemPrompt(this.#baseSystemPrompt);
2283
+ // Refresh the cached signature so a subsequent `#applyActiveToolsByName` with
2284
+ // the same tool set does not re-rebuild on top of the explicit refresh we
2285
+ // just performed (and conversely, a different set forces a fresh rebuild).
2286
+ const activeTools = activeToolNames
2287
+ .map(name => this.#toolRegistry.get(name))
2288
+ .filter((tool): tool is AgentTool => tool != null);
2289
+ this.#lastAppliedToolSignature = this.#computeAppliedToolSignature(activeToolNames, activeTools);
2290
+ }
2291
+
2292
+ /**
2293
+ * Compose a stable signature for the inputs that `rebuildSystemPrompt` reads.
2294
+ * Two calls producing identical signatures are guaranteed to produce identical
2295
+ * system prompt bytes, so the rebuild can be skipped.
2296
+ *
2297
+ * The signature covers:
2298
+ * 1. Active tool names in order (the prompt renders them in this order).
2299
+ * 2. Active tool labels, descriptions, and wire-visible names — all are
2300
+ * rendered into the prompt body (see `system-prompt.md` `{{label}}: \`{{name}}\``
2301
+ * and `toolPromptNames` in `buildSystemPrompt`). The wire name comes from
2302
+ * `tool.customWireName` and overrides the internal name on the model wire
2303
+ * (e.g. `edit` exposes itself as `apply_patch` to GPT-5 in apply_patch mode);
2304
+ * a stale wire name would desync prompt guidance from actual tool routing.
2305
+ * 3. When MCP discovery is on, every registry tool's name+label+description+
2306
+ * customWireName, since `rebuildSystemPrompt` summarizes discoverable MCP
2307
+ * tools that are not in the active set.
2308
+ * 4. MCP server instructions text (per server), since `rebuildSystemPrompt`
2309
+ * embeds these in the appended prompt under "## MCP Server Instructions".
2310
+ * A server upgrade can change instructions while keeping tools identical.
2311
+ *
2312
+ * Settings-driven tool metadata is covered automatically: built-in tools that
2313
+ * depend on settings expose `description`/`label` via getters (see `TaskTool`,
2314
+ * `SearchToolBm25Tool`, `EditTool`), and the signature reads them live on every
2315
+ * call - so a settings flip that mutates the rendered string differs the signature
2316
+ * the next time `#applyActiveToolsByName` runs. Do not refactor `describeTool` to
2317
+ * cache per-tool strings without preserving this property.
2318
+ *
2319
+ * Inputs NOT covered: tool input schemas; memory instructions read from disk;
2320
+ * and SDK-init-time closure constants in `sdk.ts` (`repeatToolDescriptions`,
2321
+ * `eagerTasks`, `intentField`, `mcpDiscoveryEnabled`, `secretsEnabled`). The
2322
+ * closure-captured ones cannot change at runtime regardless of skip behavior.
2323
+ * For everything else, callers must explicitly call `refreshBaseSystemPrompt()`
2324
+ * after side-effecting changes; see e.g. the memory hooks and
2325
+ * `#syncEditToolModeAfterModelChange`.
2326
+ *
2327
+ * The current calendar date IS covered (appended as a segment) because
2328
+ * `buildSystemPrompt` injects it into the prompt body (`Today is '{{date}}'`).
2329
+ * Without this, a session spanning midnight with only tool-stable MCP
2330
+ * reconnects would keep yesterday's date indefinitely.
2331
+ */
2332
+ #computeAppliedToolSignature(toolNames: string[], tools: AgentTool[]): string {
2333
+ // Order-preserving join: any reorder must produce a different signature so
2334
+ // the rebuild fires and the new tool list reaches the API.
2335
+ const nameSegment = toolNames.join("\u0001");
2336
+ const describeTool = (tool: AgentTool): string =>
2337
+ `${tool.name}=${tool.label ?? ""}|${tool.description ?? ""}|${tool.customWireName ?? ""}`;
2338
+ const descriptionSegment = tools.map(describeTool).join("\u0002");
2339
+ let registrySegment = "";
2340
+ if (this.#mcpDiscoveryEnabled) {
2341
+ // Registry iteration order is not load-bearing for the prompt content, so we
2342
+ // sort to keep the signature insensitive to incidental insertion order.
2343
+ const entries: string[] = [];
2344
+ for (const tool of this.#toolRegistry.values()) {
2345
+ entries.push(describeTool(tool));
2346
+ }
2347
+ entries.sort();
2348
+ registrySegment = entries.join("\u0004");
2349
+ }
2350
+ let instructionsSegment = "";
2351
+ const serverInstructions = this.#getMcpServerInstructions?.();
2352
+ if (serverInstructions && serverInstructions.size > 0) {
2353
+ // Sort by server name so transport flap order does not perturb the signature.
2354
+ const entries: string[] = [];
2355
+ for (const [server, instructions] of serverInstructions) {
2356
+ entries.push(`${server}=${instructions}`);
2357
+ }
2358
+ entries.sort();
2359
+ instructionsSegment = entries.join("\u0006");
2360
+ }
2361
+ const date = new Date().toISOString().slice(0, 10);
2362
+ return `${nameSegment}\u0003${descriptionSegment}\u0005${registrySegment}\u0007${instructionsSegment}|${date}`;
2259
2363
  }
2260
2364
 
2261
2365
  /**
@@ -3423,7 +3527,7 @@ export class AgentSession {
3423
3527
  this.abortCompaction();
3424
3528
  this.abortHandoff();
3425
3529
  this.abortBash();
3426
- this.abortPython();
3530
+ this.abortEval();
3427
3531
  const postPromptDrain = this.#cancelPostPromptTasks();
3428
3532
  this.agent.abort();
3429
3533
  await postPromptDrain;
@@ -4248,9 +4352,10 @@ export class AgentSession {
4248
4352
  }
4249
4353
 
4250
4354
  // Start a new session
4355
+ const previousSessionFile = this.sessionFile;
4251
4356
  await this.sessionManager.flush();
4252
4357
  this.#asyncJobManager?.cancelAll();
4253
- await this.sessionManager.newSession();
4358
+ await this.sessionManager.newSession(previousSessionFile ? { parentSession: previousSessionFile } : undefined);
4254
4359
  this.agent.reset();
4255
4360
  this.agent.sessionId = this.sessionManager.getSessionId();
4256
4361
  this.#steeringMessages = [];
@@ -4262,6 +4367,7 @@ export class AgentSession {
4262
4367
  // Inject the handoff document as a custom message
4263
4368
  const handoffContent = `<handoff-context>\n${handoffText}\n</handoff-context>\n\nThe above is a handoff document from a previous session. Use this context to continue the work seamlessly.`;
4264
4369
  this.sessionManager.appendCustomMessageEntry("handoff", handoffContent, true, undefined, "agent");
4370
+ await this.sessionManager.ensureOnDisk();
4265
4371
  let savedPath: string | undefined;
4266
4372
  if (options?.autoTriggered && this.settings.get("compaction.handoffSaveToDisk")) {
4267
4373
  const artifactsDir = this.sessionManager.getArtifactsDir();
@@ -5895,7 +6001,7 @@ export class AgentSession {
5895
6001
 
5896
6002
  /**
5897
6003
  * Execute Python code in the shared kernel.
5898
- * Uses the same kernel session as the agent's Python tool, allowing collaborative editing.
6004
+ * Uses the same kernel session as eval's Python backend, allowing collaborative editing.
5899
6005
  * @param code The Python code to execute
5900
6006
  * @param onChunk Optional streaming callback for output
5901
6007
  * @param options.excludeFromContext If true, execution won't be sent to LLM ($$ prefix)
@@ -5907,7 +6013,7 @@ export class AgentSession {
5907
6013
  ): Promise<PythonResult> {
5908
6014
  const excludeFromContext = options?.excludeFromContext === true;
5909
6015
  const cwd = this.sessionManager.getCwd();
5910
- this.assertPythonExecutionAllowed();
6016
+ this.assertEvalExecutionAllowed();
5911
6017
 
5912
6018
  const abortController = new AbortController();
5913
6019
  const execution = (async (): Promise<PythonResult> => {
@@ -5918,20 +6024,20 @@ export class AgentSession {
5918
6024
  excludeFromContext,
5919
6025
  cwd,
5920
6026
  });
5921
- this.assertPythonExecutionAllowed();
6027
+ this.assertEvalExecutionAllowed();
5922
6028
  if (hookResult?.result) {
5923
6029
  this.recordPythonResult(code, hookResult.result, options);
5924
6030
  return hookResult.result;
5925
6031
  }
5926
6032
  }
5927
6033
 
5928
- // Use the same session ID as the Python tool for kernel sharing
6034
+ // Use the same session ID as eval's Python backend for kernel sharing
5929
6035
  const sessionFile = this.sessionManager.getSessionFile();
5930
6036
  const sessionId = sessionFile ? `session:${sessionFile}:cwd:${cwd}` : `cwd:${cwd}`;
5931
6037
  const result = await executePythonCommand(code, {
5932
6038
  cwd,
5933
6039
  sessionId,
5934
- kernelOwnerId: this.#pythonKernelOwnerId,
6040
+ kernelOwnerId: this.#evalKernelOwnerId,
5935
6041
  kernelMode: this.settings.get("python.kernelMode"),
5936
6042
  useSharedGateway: this.settings.get("python.sharedGateway"),
5937
6043
  onChunk,
@@ -5940,11 +6046,11 @@ export class AgentSession {
5940
6046
  this.recordPythonResult(code, result, options);
5941
6047
  return result;
5942
6048
  })();
5943
- return await this.trackPythonExecution(execution, abortController);
6049
+ return await this.trackEvalExecution(execution, abortController);
5944
6050
  }
5945
6051
 
5946
- assertPythonExecutionAllowed(): void {
5947
- if (this.#pythonExecutionDisposing) {
6052
+ assertEvalExecutionAllowed(): void {
6053
+ if (this.#evalExecutionDisposing) {
5948
6054
  throw new Error("Python execution is unavailable while session disposal is in progress");
5949
6055
  }
5950
6056
  }
@@ -5952,17 +6058,17 @@ export class AgentSession {
5952
6058
  /**
5953
6059
  * Track Python work started outside AgentSession.executePython so dispose can await and abort it too.
5954
6060
  */
5955
- trackPythonExecution<T>(execution: Promise<T>, abortController: AbortController): Promise<T> {
5956
- this.#pythonAbortControllers.add(abortController);
5957
- this.#activePythonExecutions.add(execution);
6061
+ trackEvalExecution<T>(execution: Promise<T>, abortController: AbortController): Promise<T> {
6062
+ this.#evalAbortControllers.add(abortController);
6063
+ this.#activeEvalExecutions.add(execution);
5958
6064
  void execution.then(
5959
6065
  () => {
5960
- this.#pythonAbortControllers.delete(abortController);
5961
- this.#activePythonExecutions.delete(execution);
6066
+ this.#evalAbortControllers.delete(abortController);
6067
+ this.#activeEvalExecutions.delete(execution);
5962
6068
  },
5963
6069
  () => {
5964
- this.#pythonAbortControllers.delete(abortController);
5965
- this.#activePythonExecutions.delete(execution);
6070
+ this.#evalAbortControllers.delete(abortController);
6071
+ this.#activeEvalExecutions.delete(execution);
5966
6072
  },
5967
6073
  );
5968
6074
  return execution;
@@ -5997,35 +6103,35 @@ export class AgentSession {
5997
6103
  /**
5998
6104
  * Cancel running Python execution.
5999
6105
  */
6000
- abortPython(): void {
6001
- for (const abortController of this.#pythonAbortControllers) {
6106
+ abortEval(): void {
6107
+ for (const abortController of this.#evalAbortControllers) {
6002
6108
  abortController.abort();
6003
6109
  }
6004
6110
  }
6005
6111
 
6006
- async #waitForPythonExecutionsToSettle(timeoutMs: number): Promise<boolean> {
6112
+ async #waitForEvalExecutionsToSettle(timeoutMs: number): Promise<boolean> {
6007
6113
  const deadline = Date.now() + timeoutMs;
6008
- while (this.#activePythonExecutions.size > 0) {
6114
+ while (this.#activeEvalExecutions.size > 0) {
6009
6115
  const remainingMs = deadline - Date.now();
6010
6116
  if (remainingMs <= 0) {
6011
6117
  return false;
6012
6118
  }
6013
6119
  const settled = await Promise.race([
6014
- Promise.allSettled(Array.from(this.#activePythonExecutions)).then(() => true),
6120
+ Promise.allSettled(Array.from(this.#activeEvalExecutions)).then(() => true),
6015
6121
  Bun.sleep(remainingMs).then(() => false),
6016
6122
  ]);
6017
- if (!settled && this.#activePythonExecutions.size > 0) {
6123
+ if (!settled && this.#activeEvalExecutions.size > 0) {
6018
6124
  return false;
6019
6125
  }
6020
6126
  }
6021
6127
  return true;
6022
6128
  }
6023
6129
 
6024
- async #preparePythonExecutionsForDispose(): Promise<boolean> {
6025
- if (!(await this.#waitForPythonExecutionsToSettle(3_000))) {
6130
+ async #prepareEvalExecutionsForDispose(): Promise<boolean> {
6131
+ if (!(await this.#waitForEvalExecutionsToSettle(3_000))) {
6026
6132
  logger.warn("Aborting active Python execution during dispose before retained kernel cleanup");
6027
- this.abortPython();
6028
- if (!(await this.#waitForPythonExecutionsToSettle(1_000))) {
6133
+ this.abortEval();
6134
+ if (!(await this.#waitForEvalExecutionsToSettle(1_000))) {
6029
6135
  logger.warn(
6030
6136
  "Python execution is still active after dispose aborted all active runs; retained kernel ownership will still be detached",
6031
6137
  );
@@ -6036,8 +6142,8 @@ export class AgentSession {
6036
6142
  }
6037
6143
 
6038
6144
  /** Whether a Python execution is currently running */
6039
- get isPythonRunning(): boolean {
6040
- return this.#pythonAbortControllers.size > 0;
6145
+ get isEvalRunning(): boolean {
6146
+ return this.#evalAbortControllers.size > 0;
6041
6147
  }
6042
6148
 
6043
6149
  /** Whether there are pending Python messages waiting to be flushed */
@@ -59,7 +59,7 @@ export interface BashExecutionMessage {
59
59
 
60
60
  /**
61
61
  * Message type for user-initiated Python executions via the $ command.
62
- * Shares the same kernel session as the agent's Python tool.
62
+ * Shares the same kernel session as eval's Python backend.
63
63
  */
64
64
  export interface PythonExecutionMessage {
65
65
  role: "pythonExecution";