@oh-my-pi/pi-coding-agent 14.5.12 → 14.5.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. package/CHANGELOG.md +32 -0
  2. package/package.json +18 -10
  3. package/src/cli/jupyter-cli.ts +1 -1
  4. package/src/config/model-equivalence.ts +49 -16
  5. package/src/config/model-registry.ts +100 -25
  6. package/src/config/model-resolver.ts +29 -15
  7. package/src/config/settings-schema.ts +20 -6
  8. package/src/config/settings.ts +9 -8
  9. package/src/config.ts +9 -0
  10. package/src/eval/backend.ts +43 -0
  11. package/src/eval/eval.lark +43 -0
  12. package/src/eval/index.ts +5 -0
  13. package/src/eval/js/context-manager.ts +717 -0
  14. package/src/eval/js/executor.ts +131 -0
  15. package/src/eval/js/index.ts +46 -0
  16. package/src/eval/js/prelude.ts +2 -0
  17. package/src/eval/js/prelude.txt +84 -0
  18. package/src/eval/js/tool-bridge.ts +124 -0
  19. package/src/eval/parse.ts +337 -0
  20. package/src/{ipy → eval/py}/executor.ts +2 -180
  21. package/src/{ipy → eval/py}/gateway-coordinator.ts +2 -2
  22. package/src/eval/py/index.ts +58 -0
  23. package/src/{ipy → eval/py}/kernel.ts +5 -41
  24. package/src/{ipy → eval/py}/prelude.py +39 -227
  25. package/src/eval/types.ts +48 -0
  26. package/src/export/html/template.generated.ts +1 -1
  27. package/src/export/html/template.js +8 -10
  28. package/src/extensibility/extensions/types.ts +2 -3
  29. package/src/internal-urls/docs-index.generated.ts +5 -5
  30. package/src/lsp/client.ts +9 -0
  31. package/src/lsp/index.ts +395 -0
  32. package/src/lsp/types.ts +15 -4
  33. package/src/main.ts +25 -14
  34. package/src/mcp/oauth-flow.ts +1 -1
  35. package/src/memories/index.ts +1 -1
  36. package/src/modes/acp/acp-event-mapper.ts +1 -1
  37. package/src/modes/components/{python-execution.ts → eval-execution.ts} +11 -4
  38. package/src/modes/components/login-dialog.ts +1 -1
  39. package/src/modes/components/oauth-selector.ts +2 -1
  40. package/src/modes/components/tool-execution.ts +3 -4
  41. package/src/modes/controllers/command-controller.ts +28 -8
  42. package/src/modes/controllers/input-controller.ts +4 -4
  43. package/src/modes/controllers/selector-controller.ts +2 -1
  44. package/src/modes/interactive-mode.ts +4 -5
  45. package/src/modes/types.ts +3 -3
  46. package/src/modes/utils/ui-helpers.ts +2 -2
  47. package/src/prompts/system/system-prompt.md +3 -3
  48. package/src/prompts/tools/eval.md +92 -0
  49. package/src/prompts/tools/lsp.md +7 -3
  50. package/src/sdk.ts +45 -31
  51. package/src/session/agent-session.ts +42 -42
  52. package/src/session/messages.ts +1 -1
  53. package/src/slash-commands/builtin-registry.ts +1 -1
  54. package/src/system-prompt.ts +34 -66
  55. package/src/task/executor.ts +5 -9
  56. package/src/tools/browser/launch.ts +22 -0
  57. package/src/tools/browser/registry.ts +25 -244
  58. package/src/tools/browser/render.ts +1 -1
  59. package/src/tools/browser/tab-protocol.ts +101 -0
  60. package/src/tools/browser/tab-supervisor.ts +429 -0
  61. package/src/tools/browser/tab-worker-entry.ts +21 -0
  62. package/src/tools/browser/tab-worker.ts +1006 -0
  63. package/src/tools/browser.ts +12 -29
  64. package/src/tools/checkpoint.ts +2 -2
  65. package/src/tools/{python.ts → eval.ts} +324 -315
  66. package/src/tools/exit-plan-mode.ts +1 -1
  67. package/src/tools/index.ts +62 -100
  68. package/src/tools/read.ts +0 -6
  69. package/src/tools/recipe/runners/pkg.ts +34 -32
  70. package/src/tools/renderers.ts +2 -2
  71. package/src/tools/resolve.ts +7 -2
  72. package/src/tools/todo-write.ts +0 -1
  73. package/src/tools/tool-timeouts.ts +2 -2
  74. package/src/utils/markit.ts +15 -7
  75. package/src/utils/tools-manager.ts +5 -5
  76. package/src/web/search/index.ts +5 -5
  77. package/src/web/search/provider.ts +121 -39
  78. package/src/web/search/providers/gemini.ts +2 -2
  79. package/src/web/search/render.ts +2 -2
  80. package/src/ipy/modules.ts +0 -144
  81. package/src/prompts/tools/python.md +0 -57
  82. package/src/tools/browser/vm.ts +0 -792
  83. /package/src/{ipy → eval/py}/cancellation.ts +0 -0
  84. /package/src/{ipy → eval/py}/prelude.ts +0 -0
  85. /package/src/{ipy → eval/py}/runtime.ts +0 -0
@@ -14,14 +14,14 @@ import { formatDuration, Snowflake, setProjectDir } from "@oh-my-pi/pi-utils";
14
14
  import { $ } from "bun";
15
15
  import { reset as resetCapabilities } from "../../capability";
16
16
  import { clearClaudePluginRootsCache } from "../../discovery/helpers";
17
+ import { getGatewayStatus } from "../../eval/py/gateway-coordinator";
17
18
  import { loadCustomShare } from "../../export/custom-share";
18
19
  import type { CompactOptions } from "../../extensibility/extensions/types";
19
- import { getGatewayStatus } from "../../ipy/gateway-coordinator";
20
20
  import { buildMemoryToolDeveloperInstructions, clearMemoryData, enqueueMemoryConsolidation } from "../../memories";
21
21
  import { BashExecutionComponent } from "../../modes/components/bash-execution";
22
22
  import { BorderedLoader } from "../../modes/components/bordered-loader";
23
23
  import { DynamicBorder } from "../../modes/components/dynamic-border";
24
- import { PythonExecutionComponent } from "../../modes/components/python-execution";
24
+ import { EvalExecutionComponent } from "../../modes/components/eval-execution";
25
25
  import { getMarkdownTheme, getSymbolTheme, theme } from "../../modes/theme/theme";
26
26
  import type { InteractiveModeContext } from "../../modes/types";
27
27
  import { computeContextBreakdown, renderContextUsage } from "../../modes/utils/context-usage";
@@ -285,9 +285,26 @@ export class CommandController {
285
285
  this.#doCopy(combined, `Copied ${matches.length} code block${matches.length > 1 ? "s" : ""} to clipboard`);
286
286
  }
287
287
 
288
+ #extractEvalCode(args: unknown): string | undefined {
289
+ if (!args || typeof args !== "object") return undefined;
290
+ const cells = (args as { cells?: unknown }).cells;
291
+ if (!Array.isArray(cells)) return undefined;
292
+
293
+ const codeBlocks: string[] = [];
294
+ for (const cell of cells) {
295
+ if (!cell || typeof cell !== "object") continue;
296
+ const code = (cell as { code?: unknown }).code;
297
+ if (typeof code === "string" && code.length > 0) {
298
+ codeBlocks.push(code);
299
+ }
300
+ }
301
+
302
+ return codeBlocks.length > 0 ? codeBlocks.join("\n\n") : undefined;
303
+ }
304
+
288
305
  #copyLastCommand() {
289
306
  const messages = this.ctx.session.messages;
290
- // Walk backwards to find the last bash/python tool call
307
+ // Walk backwards to find the last bash/eval tool call
291
308
  for (let i = messages.length - 1; i >= 0; i--) {
292
309
  const msg = messages[i];
293
310
  if (msg.role !== "assistant") continue;
@@ -298,13 +315,16 @@ export class CommandController {
298
315
  this.#doCopy(tc.arguments.command, "Copied last bash command to clipboard");
299
316
  return;
300
317
  }
301
- if (tc.name === "python" && typeof tc.arguments.code === "string") {
302
- this.#doCopy(tc.arguments.code, "Copied last python code to clipboard");
303
- return;
318
+ if (tc.name === "eval") {
319
+ const code = this.#extractEvalCode(tc.arguments);
320
+ if (code) {
321
+ this.#doCopy(code, "Copied last eval code to clipboard");
322
+ return;
323
+ }
304
324
  }
305
325
  }
306
326
  }
307
- this.ctx.showWarning("No bash or python command found in the conversation.");
327
+ this.ctx.showWarning("No bash or eval command found in the conversation.");
308
328
  }
309
329
 
310
330
  #doCopy(content: string, label: string) {
@@ -779,7 +799,7 @@ export class CommandController {
779
799
 
780
800
  async handlePythonCommand(code: string, excludeFromContext = false): Promise<void> {
781
801
  const isDeferred = this.ctx.session.isStreaming;
782
- this.ctx.pythonComponent = new PythonExecutionComponent(code, this.ctx.ui, excludeFromContext);
802
+ this.ctx.pythonComponent = new EvalExecutionComponent(code, this.ctx.ui, excludeFromContext);
783
803
 
784
804
  if (isDeferred) {
785
805
  this.ctx.pendingMessagesContainer.addChild(this.ctx.pythonComponent);
@@ -37,7 +37,7 @@ export class InputController {
37
37
  this.ctx.session.isCompacting ||
38
38
  this.ctx.session.isGeneratingHandoff ||
39
39
  this.ctx.session.isBashRunning ||
40
- this.ctx.session.isPythonRunning ||
40
+ this.ctx.session.isEvalRunning ||
41
41
  this.ctx.autoCompactionLoader ||
42
42
  this.ctx.retryLoader ||
43
43
  this.ctx.autoCompactionEscapeHandler ||
@@ -67,8 +67,8 @@ export class InputController {
67
67
  this.ctx.editor.setText("");
68
68
  this.ctx.isBashMode = false;
69
69
  this.ctx.updateEditorBorderColor();
70
- } else if (this.ctx.session.isPythonRunning) {
71
- this.ctx.session.abortPython();
70
+ } else if (this.ctx.session.isEvalRunning) {
71
+ this.ctx.session.abortEval();
72
72
  } else if (this.ctx.isPythonMode) {
73
73
  this.ctx.editor.setText("");
74
74
  this.ctx.isPythonMode = false;
@@ -304,7 +304,7 @@ export class InputController {
304
304
  const isExcluded = text.startsWith("$$");
305
305
  const code = isExcluded ? text.slice(2).trim() : text.slice(1).trim();
306
306
  if (code) {
307
- if (this.ctx.session.isPythonRunning) {
307
+ if (this.ctx.session.isEvalRunning) {
308
308
  this.ctx.showWarning("A Python execution is already running. Press Esc to cancel it first.");
309
309
  this.ctx.editor.setText(text);
310
310
  return;
@@ -1,7 +1,8 @@
1
1
  import * as os from "node:os";
2
2
  import * as path from "node:path";
3
3
  import { ThinkingLevel } from "@oh-my-pi/pi-agent-core";
4
- import { getOAuthProviders, type OAuthProvider } from "@oh-my-pi/pi-ai";
4
+ import { getOAuthProviders } from "@oh-my-pi/pi-ai/utils/oauth";
5
+ import type { OAuthProvider } from "@oh-my-pi/pi-ai/utils/oauth/types";
5
6
  import type { Component, OverlayHandle } from "@oh-my-pi/pi-tui";
6
7
  import { Input, Loader, Spacer, Text } from "@oh-my-pi/pi-tui";
7
8
  import { getAgentDbPath, getConfigDirName, getProjectDir } from "@oh-my-pi/pi-utils";
@@ -57,10 +57,10 @@ import type { AssistantMessageComponent } from "./components/assistant-message";
57
57
  import type { BashExecutionComponent } from "./components/bash-execution";
58
58
  import { CustomEditor } from "./components/custom-editor";
59
59
  import { DynamicBorder } from "./components/dynamic-border";
60
+ import type { EvalExecutionComponent } from "./components/eval-execution";
60
61
  import type { HookEditorComponent } from "./components/hook-editor";
61
62
  import type { HookInputComponent } from "./components/hook-input";
62
63
  import type { HookSelectorComponent } from "./components/hook-selector";
63
- import type { PythonExecutionComponent } from "./components/python-execution";
64
64
  import { StatusLineComponent } from "./components/status-line";
65
65
  import type { ToolExecutionHandle } from "./components/tool-execution";
66
66
  import { WelcomeComponent, type LspServerInfo as WelcomeLspServerInfo } from "./components/welcome";
@@ -166,8 +166,8 @@ export class InteractiveMode implements InteractiveModeContext {
166
166
  pendingTools = new Map<string, ToolExecutionHandle>();
167
167
  pendingBashComponents: BashExecutionComponent[] = [];
168
168
  bashComponent: BashExecutionComponent | undefined = undefined;
169
- pendingPythonComponents: PythonExecutionComponent[] = [];
170
- pythonComponent: PythonExecutionComponent | undefined = undefined;
169
+ pendingPythonComponents: EvalExecutionComponent[] = [];
170
+ pythonComponent: EvalExecutionComponent | undefined = undefined;
171
171
  isPythonMode = false;
172
172
  streamingComponent: AssistantMessageComponent | undefined = undefined;
173
173
  streamingMessage: AssistantMessage | undefined = undefined;
@@ -335,8 +335,7 @@ export class InteractiveMode implements InteractiveModeContext {
335
335
  async init(): Promise<void> {
336
336
  if (this.isInitialized) return;
337
337
 
338
- logger.time("InteractiveMode.init:keybindings");
339
- this.keybindings = KeybindingsManager.create();
338
+ this.keybindings = logger.time("InteractiveMode.init:keybindings", () => KeybindingsManager.create());
340
339
 
341
340
  // Register session manager flush for signal handlers (SIGINT, SIGTERM, SIGHUP)
342
341
  this.#cleanupUnsubscribe = postmortem.register("session-manager-flush", () => this.sessionManager.flush());
@@ -18,10 +18,10 @@ import type { ExitPlanModeDetails, LspStartupServerInfo } from "../tools";
18
18
  import type { AssistantMessageComponent } from "./components/assistant-message";
19
19
  import type { BashExecutionComponent } from "./components/bash-execution";
20
20
  import type { CustomEditor } from "./components/custom-editor";
21
+ import type { EvalExecutionComponent } from "./components/eval-execution";
21
22
  import type { HookEditorComponent } from "./components/hook-editor";
22
23
  import type { HookInputComponent } from "./components/hook-input";
23
24
  import type { HookSelectorComponent } from "./components/hook-selector";
24
- import type { PythonExecutionComponent } from "./components/python-execution";
25
25
  import type { StatusLineComponent } from "./components/status-line";
26
26
  import type { ToolExecutionHandle } from "./components/tool-execution";
27
27
  import type { OAuthManualInputManager } from "./oauth-manual-input";
@@ -93,8 +93,8 @@ export interface InteractiveModeContext {
93
93
  pendingTools: Map<string, ToolExecutionHandle>;
94
94
  pendingBashComponents: BashExecutionComponent[];
95
95
  bashComponent: BashExecutionComponent | undefined;
96
- pendingPythonComponents: PythonExecutionComponent[];
97
- pythonComponent: PythonExecutionComponent | undefined;
96
+ pendingPythonComponents: EvalExecutionComponent[];
97
+ pythonComponent: EvalExecutionComponent | undefined;
98
98
  isPythonMode: boolean;
99
99
  streamingComponent: AssistantMessageComponent | undefined;
100
100
  streamingMessage: AssistantMessage | undefined;
@@ -8,7 +8,7 @@ import { BranchSummaryMessageComponent } from "../../modes/components/branch-sum
8
8
  import { CompactionSummaryMessageComponent } from "../../modes/components/compaction-summary-message";
9
9
  import { CustomMessageComponent } from "../../modes/components/custom-message";
10
10
  import { DynamicBorder } from "../../modes/components/dynamic-border";
11
- import { PythonExecutionComponent } from "../../modes/components/python-execution";
11
+ import { EvalExecutionComponent } from "../../modes/components/eval-execution";
12
12
  import { ReadToolGroupComponent } from "../../modes/components/read-tool-group";
13
13
  import { SkillMessageComponent } from "../../modes/components/skill-message";
14
14
  import { ToolExecutionComponent } from "../../modes/components/tool-execution";
@@ -84,7 +84,7 @@ export class UiHelpers {
84
84
  break;
85
85
  }
86
86
  case "pythonExecution": {
87
- const component = new PythonExecutionComponent(message.code, this.ctx.ui, message.excludeFromContext);
87
+ const component = new EvalExecutionComponent(message.code, this.ctx.ui, message.excludeFromContext);
88
88
  if (message.output) {
89
89
  component.appendOutput(message.output);
90
90
  }
@@ -216,12 +216,12 @@ Most tools have a `{{intentField}}` parameter. Fill it with a concise intent in
216
216
  If the task may involve external systems, SaaS APIs, chat, tickets, databases, deployments, or other non-local integrations, you **SHOULD** call `{{toolRefs.search_tool_bm25}}` before concluding no such tool exists.
217
217
  {{/if}}
218
218
 
219
- {{#ifAny (includes tools "python") (includes tools "bash")}}
219
+ {{#ifAny (includes tools "eval") (includes tools "bash")}}
220
220
  ### Tool priority
221
221
  1. Use specialized tools first{{#ifAny (includes tools "read") (includes tools "search") (includes tools "find") (includes tools "edit") (includes tools "lsp")}}: {{#has tools "read"}}`{{toolRefs.read}}`, {{/has}}{{#has tools "search"}}`{{toolRefs.search}}`, {{/has}}{{#has tools "find"}}`{{toolRefs.find}}`, {{/has}}{{#has tools "edit"}}`{{toolRefs.edit}}`, {{/has}}{{#has tools "lsp"}}`{{toolRefs.lsp}}`{{/has}}{{/ifAny}}
222
- 2. Python: logic, loops, processing, display
222
+ 2. Eval: logic, loops, processing, display (default python; pass `language: "js"` for in-process JavaScript)
223
223
  3. Bash: simple one-liners only
224
- You **MUST NOT** use Python or Bash when a specialized tool exists.
224
+ You **MUST NOT** use Eval or Bash when a specialized tool exists.
225
225
  {{/ifAny}}
226
226
 
227
227
  {{#ifAny (includes tools "read") (includes tools "write") (includes tools "search") (includes tools "find") (includes tools "edit")}}
@@ -0,0 +1,92 @@
1
+ Run code in a persistent kernel, using a series of codeblocks acting as cells.
2
+
3
+ <instruction>
4
+ Each cell is a markdown fenced code block. The opening fence's info string carries metadata:
5
+
6
+ ```
7
+ <lang>? <duration>? (title-fragment | key=value)*
8
+ ```
9
+ - **Language**: {{#if py}}`py`/`python` for Python{{/if}}{{#ifAll py js}}, {{/ifAll}}{{#if js}}`js`/`javascript`/`ts`/`typescript` for JavaScript{{/if}}.{{#ifAll py js}} Omitted → inherit the previous cell's language (the first cell defaults to Python, falling back to JavaScript when Python is unavailable).{{else}} Omitted → inherit the previous cell's language.{{/ifAll}}
10
+ - **Positional duration**: `15s`, `500ms`, `2m`, or a bare integer (seconds). Default 30s.
11
+ - **Attributes**:
12
+ - `id="…"` — cell id (shown as the title in the transcript).
13
+ - `t=<duration>` — overrides the positional duration.
14
+ - `rst=true` — wipe **this cell's own language kernel** before running.{{#ifAll py js}} Other languages are untouched.{{/ifAll}}
15
+
16
+ **Work incrementally:** one logical step per cell (imports, define, test, use). Pass multiple small cells in one call. Define small reusable functions you can debug individually. You **MUST** put workflow explanations in the assistant message or cell title — never inside cell code.
17
+
18
+ **On failure:** errors identify the failing cell (e.g., "Cell 3 failed"). Resubmit only the fixed cell (or fixed cell + remaining cells).
19
+ </instruction>
20
+
21
+ <prelude>
22
+ {{#ifAll py js}}The same helpers are available in both runtimes with the same positional argument order. Python takes the trailing options as keyword args; JavaScript takes the same options as a trailing object literal. JavaScript helpers are async and `await`able; Python helpers run synchronously.{{else}}{{#if py}}Helpers run synchronously. Trailing options are passed as keyword arguments.{{/if}}{{#if js}}Helpers are async and `await`able. Trailing options are passed as a final object literal.{{/if}}{{/ifAll}}
23
+ ```
24
+ display(value) → None
25
+ Render a value in the current cell output.
26
+ print(value, ...) → None
27
+ Print to the cell's text output.
28
+ read(path, offset?=1, limit?=None) → str
29
+ Read file contents as text. offset/limit are 1-indexed line bounds.
30
+ write(path, content) → str
31
+ Write content to a file (creates parent directories). Returns the resolved path.
32
+ append(path, content) → str
33
+ Append content to a file. Returns the resolved path.
34
+ stat(path) → {path, size, is_file, is_dir, mtime}
35
+ File or directory metadata. mtime is an ISO-8601 string.
36
+ find(pattern, path?=".", type?="file", limit?=1000, hidden?=False, sort_by_mtime?=False, maxdepth?=None, mindepth?=None) → list[path]
37
+ Recursive glob find. Respects .gitignore.
38
+ glob(pattern, path?=".", hidden?=False) → list[path]
39
+ Non-recursive glob. Use find() for recursive walks. Respects .gitignore.
40
+ grep(pattern, path, ignore_case?=False, literal?=False, context?=0) → list[{line, text}]
41
+ Search a single file.
42
+ rgrep(pattern, path?=".", glob_pattern?="*", ignore_case?=False, literal?=False, limit?=100, hidden?=False) → list[{file, line, text}]
43
+ Search recursively across files. Respects .gitignore.
44
+ sed(path, pattern, repl, flags?=0) → int
45
+ Regex replace in a file (like sed -i). Returns replacement count.
46
+ tree(path?=".", max_depth?=3, show_hidden?=False) → str
47
+ Render a directory tree.
48
+ diff(a, b) → str
49
+ Unified diff between two files.
50
+ run(cmd, cwd?=None, timeout?=None) → {stdout, stderr, exit_code}
51
+ Run a shell command.
52
+ env(key?=None, value?=None) → str | None | dict
53
+ No args → full environment as dict. One arg → value of `key`. Two args → set `key=value` and return value.
54
+ output(*ids, format?="raw", query?=None, offset?=None, limit?=None) → str | dict | list[dict]
55
+ Read task/agent output by ID. Single id returns text/dict; multiple ids return a list.
56
+ ```
57
+
58
+ {{#if js}}**JavaScript only:** `tool.<name>(args)` invokes any session tool directly (e.g. `await tool.read({ path: "src/foo.ts" })`).
59
+ {{/if}}</prelude>
60
+
61
+ <output>
62
+ Cells render like a Jupyter notebook. Pass any value to `display(value)`; non-presentable data is rendered as an interactive JSON tree, and presentable values (figures, images, dataframes, etc.) render with their native representation.
63
+ </output>
64
+
65
+ <caution>
66
+ - In session mode, use `rst=true` on a cell to wipe its language's kernel before running.{{#ifAll py js}} Reset is per-language: a python cell's `rst=true` does not touch the JavaScript kernel and vice versa.{{/ifAll}}
67
+ {{#if js}}- **js**: the VM exposes a selective `process` subset, Web APIs, `Buffer`, `fs/promises`.
68
+ {{/if}}</caution>
69
+
70
+ <example>
71
+ {{#if py}}```py id="imports" t="10s"
72
+ import json
73
+ from pathlib import Path
74
+ ```
75
+
76
+ ```py id="load config"
77
+ data = json.loads(read('package.json'))
78
+ display(data)
79
+ ```
80
+ {{/if}}{{#ifAll py js}}
81
+
82
+ {{/ifAll}}{{#if js}}```js id="js summary" rst=true
83
+ const data = JSON.parse(await read('package.json'));
84
+ display(data);
85
+ return data.name;
86
+ ```
87
+
88
+ ```
89
+ return 'still JavaScript';
90
+ ```
91
+ {{/if}}
92
+ </example>
@@ -9,8 +9,11 @@ Interacts with Language Server Protocol servers for code intelligence.
9
9
  - `hover`: Get type info and documentation → type signature + docs
10
10
  - `symbols`: List symbols in a file, or search workspace with `file: "*"` and a `query`
11
11
  - `rename`: Rename symbol across codebase → preview or apply edits
12
+ - `rename_file`: Rename or move a file/directory; sends `workspace/willRenameFiles` so LSP servers update import paths and other references → preview or apply edits + filesystem rename
12
13
  - `code_actions`: List available quick-fixes/refactors/import actions; apply one when `apply: true` and `query` matches title or index
13
14
  - `status`: Show active language servers
15
+ - `capabilities`: Dump per-server capabilities (standard + experimental + executeCommand list) for discovery — file scopes to one server, omitted/`"*"` lists every active server
16
+ - `request`: Send a raw LSP request to a server — `query` is the method name (e.g., `rust-analyzer/expandMacro`, `typescript/goToSourceDefinition`, `workspace/executeCommand`); use `payload` for arbitrary JSON params or let the tool auto-build them from `file`/`line`/`symbol`
14
17
  - `reload`: Restart a specific server (via `file`) or all servers with `file: "*"`
15
18
  </operations>
16
19
 
@@ -18,9 +21,10 @@ Interacts with Language Server Protocol servers for code intelligence.
18
21
  - `file`: File path, glob pattern (e.g. `src/**/*.ts`), or `"*"` for workspace scope. Globs are expanded locally before dispatch. `"*"` routes `diagnostics`/`symbols`/`reload` to their workspace-wide form.
19
22
  - `line`: 1-indexed line number for position-based actions
20
23
  - `symbol`: Substring on the target line used to resolve column automatically. Append `#N` to pick the Nth occurrence on that line (1-indexed; default 1) — e.g. `foo#2` selects the second `foo`.
21
- - `query`: Symbol search query, code-action kind filter (list mode), or code-action selector (apply mode)
22
- - `new_name`: Required for rename
23
- - `apply`: Apply edits for rename/code_actions (default true for rename, list mode for code_actions unless explicitly true)
24
+ - `query`: Symbol search query, code-action kind filter / selector (list/apply mode), or LSP method name when `action: request`
25
+ - `new_name`: Required for `rename` (new symbol identifier) and `rename_file` (destination path)
26
+ - `apply`: Apply edits for rename/rename_file/code_actions (default true for rename and rename_file; list mode for code_actions unless explicitly true)
27
+ - `payload`: JSON-encoded params for `action: request`. Overrides the auto-built `{ textDocument, position }` shape when present.
24
28
  - `timeout`: Request timeout in seconds (clamped to 5-60, default 20)
25
29
  </parameters>
26
30
 
package/src/sdk.ts CHANGED
@@ -36,6 +36,7 @@ import { CursorExecHandlers } from "./cursor";
36
36
  import "./discovery";
37
37
  import { resolveConfigValue } from "./config/resolve-config-value";
38
38
  import { initializeWithSettings } from "./discovery";
39
+ import { disposeAllKernelSessions, disposeKernelSessionsByOwner } from "./eval/py/executor";
39
40
  import { TtsrManager } from "./export/ttsr";
40
41
  import {
41
42
  type CustomCommandsLoadResult,
@@ -73,7 +74,6 @@ import {
73
74
  RuleProtocolHandler,
74
75
  SkillProtocolHandler,
75
76
  } from "./internal-urls";
76
- import { disposeAllKernelSessions, disposeKernelSessionsByOwner } from "./ipy/executor";
77
77
  import { LSP_STARTUP_EVENT_CHANNEL, type LspStartupEvent } from "./lsp/startup-events";
78
78
  import { discoverAndLoadMCPTools, type MCPManager, type MCPToolsLoadResult } from "./mcp";
79
79
  import {
@@ -99,6 +99,8 @@ import { SessionManager } from "./session/session-manager";
99
99
  import { closeAllConnections } from "./ssh/connection-manager";
100
100
  import { unmountAll } from "./ssh/sshfs-mount";
101
101
  import {
102
+ type AgentsMdSearch,
103
+ buildAgentsMdSearch,
102
104
  buildSystemPrompt as buildSystemPromptInternal,
103
105
  buildSystemPromptToolMetadata,
104
106
  loadProjectContextFiles as loadContextFilesInternal,
@@ -111,13 +113,13 @@ import {
111
113
  createTools,
112
114
  discoverStartupLspServers,
113
115
  EditTool,
116
+ EvalTool,
114
117
  FindTool,
115
118
  getSearchTools,
116
119
  HIDDEN_TOOLS,
117
120
  isSearchProviderPreference,
118
121
  type LspStartupServerInfo,
119
122
  loadSshTool,
120
- PythonTool,
121
123
  ReadTool,
122
124
  ResolveTool,
123
125
  renderSearchToolBm25Description,
@@ -204,9 +206,6 @@ export interface CreateAgentSessionOptions {
204
206
  enableLsp?: boolean;
205
207
  /** Skip Python kernel availability check and prelude warmup */
206
208
  skipPythonPreflight?: boolean;
207
- /** Force Python prelude warmup even when test env would normally skip it */
208
- forcePythonWarmup?: boolean;
209
-
210
209
  /** Tool names explicitly requested (enables disabled-by-default tools) */
211
210
  toolNames?: string[];
212
211
 
@@ -275,10 +274,10 @@ export {
275
274
  BUILTIN_TOOLS,
276
275
  createTools,
277
276
  EditTool,
277
+ EvalTool,
278
278
  FindTool,
279
279
  HIDDEN_TOOLS,
280
280
  loadSshTool,
281
- PythonTool,
282
281
  ReadTool,
283
282
  ResolveTool,
284
283
  SearchTool,
@@ -667,17 +666,40 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
667
666
  const modelRegistry = options.modelRegistry ?? new ModelRegistry(authStorage);
668
667
 
669
668
  const settings = options.settings ?? (await logger.time("settings", Settings.init, { cwd, agentDir }));
670
- logger.time("initializeWithSettings");
671
- initializeWithSettings(settings);
669
+ logger.time("initializeWithSettings", initializeWithSettings, settings);
672
670
  if (!options.modelRegistry) {
673
671
  modelRegistry.refreshInBackground();
674
672
  }
673
+ // Kick off AGENTS.md filesystem search in parallel — it is the slowest piece of buildSystemPrompt
674
+ // (~200ms on large repos) and only needs `cwd`, so it can overlap with everything that follows.
675
+ const agentsMdSearchPromise: Promise<AgentsMdSearch> = logger.time("buildAgentsMdSearch", buildAgentsMdSearch, cwd);
676
+ agentsMdSearchPromise.catch(() => {});
677
+
678
+ // Independent discoveries that depend only on cwd/agentDir — kicked off in parallel and awaited
679
+ // at their respective consumer sites. Their work can overlap with model resolution, secret loading,
680
+ // session-context build, tool creation, MCP discovery, and extension discovery.
681
+ const contextFilesPromise = options.contextFiles
682
+ ? Promise.resolve(options.contextFiles)
683
+ : logger.time("discoverContextFiles", discoverContextFiles, cwd, agentDir);
684
+ contextFilesPromise.catch(() => {});
685
+ const promptTemplatesPromise = options.promptTemplates
686
+ ? Promise.resolve(options.promptTemplates)
687
+ : logger.time("discoverPromptTemplates", discoverPromptTemplates, cwd, agentDir);
688
+ promptTemplatesPromise.catch(() => {});
689
+ const slashCommandsPromise = options.slashCommands
690
+ ? Promise.resolve(options.slashCommands)
691
+ : logger.time("discoverSlashCommands", discoverSlashCommands, cwd);
692
+ slashCommandsPromise.catch(() => {});
675
693
  const skillsSettings = settings.getGroup("skills");
676
694
  const disabledExtensionIds = settings.get("disabledExtensions") ?? [];
677
695
  const discoveredSkillsPromise =
678
696
  options.skills === undefined
679
- ? discoverSkills(cwd, agentDir, { ...skillsSettings, disabledExtensions: disabledExtensionIds })
697
+ ? logger.time("discoverSkills", discoverSkills, cwd, agentDir, {
698
+ ...skillsSettings,
699
+ disabledExtensions: disabledExtensionIds,
700
+ })
680
701
  : undefined;
702
+ discoveredSkillsPromise?.catch(() => {});
681
703
 
682
704
  // Initialize provider preferences from settings
683
705
  const webSearchProvider = settings.get("providers.webSearch");
@@ -814,10 +836,7 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
814
836
  skills = options.skills;
815
837
  skillWarnings = [];
816
838
  } else {
817
- const discovered = await logger.time(
818
- "discoverSkills",
819
- () => discoveredSkillsPromise ?? Promise.resolve({ skills: [], warnings: [] }),
820
- );
839
+ const discovered = await (discoveredSkillsPromise ?? Promise.resolve({ skills: [], warnings: [] }));
821
840
  skills = discovered.skills;
822
841
  skillWarnings = discovered.warnings;
823
842
  }
@@ -851,10 +870,7 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
851
870
  return { ttsrManager, rulebookRules, alwaysApplyRules };
852
871
  });
853
872
 
854
- const contextFiles = await logger.time(
855
- "discoverContextFiles",
856
- async () => options.contextFiles ?? (await discoverContextFiles(cwd, agentDir)),
857
- );
873
+ const contextFiles = await contextFilesPromise;
858
874
 
859
875
  let agent: Agent;
860
876
  let session!: AgentSession;
@@ -917,7 +933,7 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
917
933
  const resolvedAgentId = options.agentId ?? options.parentTaskPrefix ?? MAIN_AGENT_ID;
918
934
  const resolvedAgentDisplayName =
919
935
  options.agentDisplayName ?? ((options.taskDepth ?? 0) > 0 || options.parentTaskPrefix ? "sub" : "main");
920
- const pythonKernelOwnerId = `agent-session:${Snowflake.next()}`;
936
+ const evalKernelOwnerId = `agent-session:${Snowflake.next()}`;
921
937
 
922
938
  try {
923
939
  const getActiveModelString = (): string | undefined => {
@@ -937,7 +953,6 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
937
953
  return !requestedToolNames || requestedToolNames.includes("edit");
938
954
  },
939
955
  skipPythonPreflight: options.skipPythonPreflight,
940
- forcePythonWarmup: options.forcePythonWarmup,
941
956
  contextFiles,
942
957
  skills,
943
958
  eventBus,
@@ -945,12 +960,13 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
945
960
  requireYieldTool: options.requireYieldTool,
946
961
  taskDepth: options.taskDepth ?? 0,
947
962
  getSessionFile: () => sessionManager.getSessionFile() ?? null,
948
- getPythonKernelOwnerId: () => pythonKernelOwnerId,
949
- assertPythonExecutionAllowed: () => session?.assertPythonExecutionAllowed(),
950
- trackPythonExecution: (execution, abortController) =>
951
- session ? session.trackPythonExecution(execution, abortController) : execution,
963
+ getEvalKernelOwnerId: () => evalKernelOwnerId,
964
+ assertEvalExecutionAllowed: () => session?.assertEvalExecutionAllowed(),
965
+ trackEvalExecution: (execution, abortController) =>
966
+ session ? session.trackEvalExecution(execution, abortController) : execution,
952
967
  getSessionId: () => sessionManager.getSessionId?.() ?? null,
953
968
  getAgentId: () => resolvedAgentId,
969
+ getToolByName: name => session?.getToolByName(name),
954
970
  agentRegistry,
955
971
  getSessionSpawns: () => options.spawns ?? "*",
956
972
  getModelString: () => (hasExplicitModel && model ? formatModelString(model) : undefined),
@@ -1353,6 +1369,7 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
1353
1369
  mcpDiscoveryServerSummaries: discoverableMCPSummary.servers.map(formatDiscoverableMCPToolServerSummary),
1354
1370
  eagerTasks,
1355
1371
  secretsEnabled,
1372
+ agentsMdSearch: agentsMdSearchPromise,
1356
1373
  });
1357
1374
 
1358
1375
  if (options.systemPrompt === undefined) {
@@ -1376,6 +1393,7 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
1376
1393
  mcpDiscoveryServerSummaries: discoverableMCPSummary.servers.map(formatDiscoverableMCPToolServerSummary),
1377
1394
  eagerTasks,
1378
1395
  secretsEnabled,
1396
+ agentsMdSearch: agentsMdSearchPromise,
1379
1397
  });
1380
1398
  }
1381
1399
  return options.systemPrompt(defaultPrompt);
@@ -1446,13 +1464,10 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
1446
1464
 
1447
1465
  const systemPrompt = await logger.time("buildSystemPrompt", rebuildSystemPrompt, initialToolNames, toolRegistry);
1448
1466
 
1449
- const promptTemplates =
1450
- options.promptTemplates ??
1451
- (await logger.time("discoverPromptTemplates", discoverPromptTemplates, cwd, agentDir));
1467
+ const promptTemplates = await promptTemplatesPromise;
1452
1468
  toolSession.promptTemplates = promptTemplates;
1453
1469
 
1454
- const slashCommands =
1455
- options.slashCommands ?? (await logger.time("discoverSlashCommands", discoverSlashCommands, cwd));
1470
+ const slashCommands = await slashCommandsPromise;
1456
1471
 
1457
1472
  // Create convertToLlm wrapper that filters images if blockImages is enabled (defense-in-depth)
1458
1473
  const convertToLlmWithBlockImages = (messages: AgentMessage[]): Message[] => {
@@ -1596,7 +1611,7 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
1596
1611
  thinkingLevel,
1597
1612
  sessionManager,
1598
1613
  settings,
1599
- pythonKernelOwnerId,
1614
+ evalKernelOwnerId,
1600
1615
  scopedModels: options.scopedModels,
1601
1616
  promptTemplates,
1602
1617
  slashCommands,
@@ -1765,7 +1780,6 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
1765
1780
  });
1766
1781
  }
1767
1782
 
1768
- logger.time("createAgentSession:return");
1769
1783
  return {
1770
1784
  session,
1771
1785
  extensionsResult,
@@ -1780,7 +1794,7 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
1780
1794
  if (hasSession) {
1781
1795
  await session.dispose();
1782
1796
  } else {
1783
- await disposeKernelSessionsByOwner(pythonKernelOwnerId);
1797
+ await disposeKernelSessionsByOwner(evalKernelOwnerId);
1784
1798
  }
1785
1799
  } catch (cleanupError) {
1786
1800
  logger.warn("Failed to clean up createAgentSession resources after startup error", {