@oh-my-pi/pi-coding-agent 15.1.2 → 15.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. package/CHANGELOG.md +60 -0
  2. package/dist/types/async/job-manager.d.ts +3 -2
  3. package/dist/types/cli/auth-broker-cli.d.ts +25 -0
  4. package/dist/types/cli/auth-gateway-cli.d.ts +18 -0
  5. package/dist/types/cli/grievances-cli.d.ts +12 -0
  6. package/dist/types/commands/auth-broker.d.ts +54 -0
  7. package/dist/types/commands/auth-gateway.d.ts +32 -0
  8. package/dist/types/commands/grievances.d.ts +1 -1
  9. package/dist/types/commit/agentic/tools/propose-commit.d.ts +9 -1
  10. package/dist/types/commit/agentic/tools/schemas.d.ts +9 -1
  11. package/dist/types/commit/agentic/tools/split-commit.d.ts +9 -1
  12. package/dist/types/config/model-registry.d.ts +3 -0
  13. package/dist/types/config/models-config-schema.d.ts +1 -0
  14. package/dist/types/config/settings-schema.d.ts +46 -0
  15. package/dist/types/discovery/agents.d.ts +12 -1
  16. package/dist/types/edit/renderer.d.ts +3 -0
  17. package/dist/types/eval/index.d.ts +0 -2
  18. package/dist/types/goals/tools/goal-tool.d.ts +10 -2
  19. package/dist/types/index.d.ts +0 -1
  20. package/dist/types/internal-urls/index.d.ts +1 -1
  21. package/dist/types/internal-urls/{pi-protocol.d.ts → omp-protocol.d.ts} +3 -3
  22. package/dist/types/internal-urls/types.d.ts +1 -1
  23. package/dist/types/main.d.ts +11 -2
  24. package/dist/types/modes/acp/acp-agent.d.ts +2 -1
  25. package/dist/types/modes/acp/acp-event-mapper.d.ts +13 -1
  26. package/dist/types/modes/acp/acp-mode.d.ts +3 -1
  27. package/dist/types/modes/emoji-autocomplete.d.ts +16 -0
  28. package/dist/types/modes/interactive-mode.d.ts +1 -1
  29. package/dist/types/modes/prompt-action-autocomplete.d.ts +4 -0
  30. package/dist/types/plan-mode/approved-plan.d.ts +10 -4
  31. package/dist/types/sdk.d.ts +10 -3
  32. package/dist/types/session/agent-session.d.ts +7 -3
  33. package/dist/types/session/auth-broker-config.d.ts +13 -0
  34. package/dist/types/session/auth-storage.d.ts +1 -1
  35. package/dist/types/session/client-bridge.d.ts +3 -0
  36. package/dist/types/tools/eval.d.ts +41 -7
  37. package/dist/types/tools/irc.d.ts +8 -2
  38. package/dist/types/tools/report-tool-issue.d.ts +118 -1
  39. package/dist/types/tools/resolve.d.ts +8 -2
  40. package/examples/custom-tools/README.md +3 -12
  41. package/examples/extensions/README.md +2 -15
  42. package/examples/extensions/api-demo.ts +1 -7
  43. package/package.json +7 -7
  44. package/src/async/job-manager.ts +111 -13
  45. package/src/autoresearch/tools/init-experiment.ts +11 -33
  46. package/src/autoresearch/tools/log-experiment.ts +10 -24
  47. package/src/autoresearch/tools/run-experiment.ts +1 -1
  48. package/src/autoresearch/tools/update-notes.ts +2 -9
  49. package/src/cli/auth-broker-cli.ts +746 -0
  50. package/src/cli/auth-gateway-cli.ts +342 -0
  51. package/src/cli/grievances-cli.ts +109 -16
  52. package/src/cli/update-cli.ts +1 -5
  53. package/src/cli.ts +4 -2
  54. package/src/commands/auth-broker.ts +96 -0
  55. package/src/commands/auth-gateway.ts +61 -0
  56. package/src/commands/grievances.ts +13 -8
  57. package/src/commands/launch.ts +1 -1
  58. package/src/commit/agentic/agent.ts +2 -0
  59. package/src/commit/agentic/tools/analyze-file.ts +2 -2
  60. package/src/commit/agentic/tools/git-file-diff.ts +2 -2
  61. package/src/commit/agentic/tools/git-hunk.ts +3 -3
  62. package/src/commit/agentic/tools/git-overview.ts +2 -2
  63. package/src/commit/agentic/tools/propose-changelog.ts +1 -3
  64. package/src/commit/agentic/tools/recent-commits.ts +1 -1
  65. package/src/commit/agentic/tools/schemas.ts +1 -9
  66. package/src/config/model-equivalence.ts +279 -174
  67. package/src/config/model-registry.ts +37 -6
  68. package/src/config/model-resolver.ts +13 -8
  69. package/src/config/models-config-schema.ts +8 -0
  70. package/src/config/settings-schema.ts +52 -0
  71. package/src/cursor.ts +1 -1
  72. package/src/debug/log-formatting.ts +1 -1
  73. package/src/debug/log-viewer.ts +1 -1
  74. package/src/debug/profiler.ts +4 -0
  75. package/src/debug/raw-sse-buffer.ts +100 -59
  76. package/src/debug/raw-sse.ts +1 -1
  77. package/src/discovery/agents.ts +15 -4
  78. package/src/edit/modes/apply-patch.ts +1 -5
  79. package/src/edit/modes/patch.ts +5 -5
  80. package/src/edit/modes/replace.ts +5 -5
  81. package/src/edit/renderer.ts +2 -1
  82. package/src/edit/streaming.ts +1 -1
  83. package/src/eval/index.ts +0 -2
  84. package/src/eval/js/shared/runtime.ts +107 -2
  85. package/src/eval/py/kernel.ts +1 -1
  86. package/src/exa/researcher.ts +4 -4
  87. package/src/exa/search.ts +10 -22
  88. package/src/exa/websets.ts +33 -33
  89. package/src/extensibility/typebox.ts +44 -17
  90. package/src/goals/tools/goal-tool.ts +3 -3
  91. package/src/index.ts +0 -3
  92. package/src/internal-urls/docs-index.generated.ts +21 -18
  93. package/src/internal-urls/index.ts +1 -1
  94. package/src/internal-urls/{pi-protocol.ts → omp-protocol.ts} +10 -10
  95. package/src/internal-urls/router.ts +3 -3
  96. package/src/internal-urls/types.ts +1 -1
  97. package/src/lsp/types.ts +8 -11
  98. package/src/main.ts +216 -146
  99. package/src/mcp/tool-bridge.ts +3 -3
  100. package/src/modes/acp/acp-agent.ts +203 -57
  101. package/src/modes/acp/acp-client-bridge.ts +2 -1
  102. package/src/modes/acp/acp-event-mapper.ts +208 -32
  103. package/src/modes/acp/acp-mode.ts +11 -3
  104. package/src/modes/components/bash-execution.ts +1 -1
  105. package/src/modes/components/diff.ts +1 -2
  106. package/src/modes/components/eval-execution.ts +1 -1
  107. package/src/modes/components/oauth-selector.ts +38 -2
  108. package/src/modes/components/tool-execution.ts +1 -2
  109. package/src/modes/components/tree-selector.ts +26 -7
  110. package/src/modes/controllers/command-controller.ts +95 -34
  111. package/src/modes/controllers/input-controller.ts +4 -3
  112. package/src/modes/data/emojis.json +1 -0
  113. package/src/modes/emoji-autocomplete.ts +285 -0
  114. package/src/modes/interactive-mode.ts +92 -19
  115. package/src/modes/print-mode.ts +3 -3
  116. package/src/modes/prompt-action-autocomplete.ts +14 -0
  117. package/src/plan-mode/approved-plan.ts +30 -9
  118. package/src/prompts/system/system-prompt.md +1 -1
  119. package/src/prompts/system/ttsr-tool-reminder.md +5 -0
  120. package/src/prompts/tools/ask.md +4 -3
  121. package/src/prompts/tools/eval.md +25 -26
  122. package/src/prompts/tools/read.md +1 -1
  123. package/src/prompts/tools/resolve.md +1 -1
  124. package/src/prompts/tools/search.md +1 -1
  125. package/src/prompts/tools/web-search.md +1 -1
  126. package/src/sdk.ts +81 -8
  127. package/src/session/agent-session.ts +362 -131
  128. package/src/session/agent-storage.ts +7 -2
  129. package/src/session/auth-broker-config.ts +102 -0
  130. package/src/session/auth-storage.ts +7 -1
  131. package/src/session/client-bridge.ts +3 -0
  132. package/src/session/streaming-output.ts +1 -1
  133. package/src/task/types.ts +10 -35
  134. package/src/tools/bash-interactive.ts +4 -1
  135. package/src/tools/bash-pty-selection.ts +2 -2
  136. package/src/tools/browser.ts +12 -20
  137. package/src/tools/eval.ts +77 -100
  138. package/src/tools/gh.ts +21 -45
  139. package/src/tools/hindsight-recall.ts +1 -1
  140. package/src/tools/hindsight-reflect.ts +2 -2
  141. package/src/tools/hindsight-retain.ts +3 -7
  142. package/src/tools/index.ts +8 -1
  143. package/src/tools/inspect-image.ts +4 -1
  144. package/src/tools/irc.ts +4 -12
  145. package/src/tools/job.ts +3 -11
  146. package/src/tools/report-tool-issue.ts +462 -17
  147. package/src/tools/resolve.ts +2 -7
  148. package/src/tools/todo-write.ts +8 -15
  149. package/src/utils/title-generator.ts +3 -0
  150. package/src/web/search/index.ts +6 -6
  151. package/dist/types/eval/parse.d.ts +0 -28
  152. package/dist/types/eval/sniff.d.ts +0 -11
  153. package/src/eval/eval.lark +0 -36
  154. package/src/eval/parse.ts +0 -407
  155. package/src/eval/sniff.ts +0 -28
@@ -1,7 +1,12 @@
1
1
  import { Database, type Statement } from "bun:sqlite";
2
2
  import * as fs from "node:fs";
3
3
  import * as path from "node:path";
4
- import { type AuthCredential, AuthCredentialStore, type StoredAuthCredential } from "@oh-my-pi/pi-ai";
4
+ import {
5
+ type AuthCredential,
6
+ type AuthCredentialStore,
7
+ SqliteAuthCredentialStore,
8
+ type StoredAuthCredential,
9
+ } from "@oh-my-pi/pi-ai";
5
10
  import { getAgentDbPath, isRecord, logger } from "@oh-my-pi/pi-utils";
6
11
  import type { RawSettings as Settings } from "../config/settings";
7
12
 
@@ -57,7 +62,7 @@ export class AgentStorage {
57
62
  this.#hardenPermissions(dbPath);
58
63
 
59
64
  // Create AuthCredentialStore with our open database
60
- this.#authStore = new AuthCredentialStore(this.#db);
65
+ this.#authStore = new SqliteAuthCredentialStore(this.#db);
61
66
 
62
67
  this.#listSettingsStmt = this.#db.prepare("SELECT key, value FROM settings");
63
68
  this.#upsertModelUsageStmt = this.#db.prepare(
@@ -0,0 +1,102 @@
1
+ /**
2
+ * Resolve auth-broker connection configuration for the local omp client.
3
+ *
4
+ * Precedence (highest first):
5
+ * 1. `OMP_AUTH_BROKER_URL` / `OMP_AUTH_BROKER_TOKEN` env vars.
6
+ * 2. `auth.broker.url` / `auth.broker.token` in `~/.omp/agent/config.yml`
7
+ * (hidden from the settings UI; `!command` resolution supported).
8
+ * 3. Token file `~/.omp/auth-broker.token` (paired with URL from env or config).
9
+ *
10
+ * Returns null when no broker URL is configured — caller falls back to the
11
+ * local SQLite store.
12
+ *
13
+ * Reads config.yml directly (instead of going through `Settings.init`) because
14
+ * `discoverAuthStorage` runs before the settings singleton is initialized in
15
+ * `runRootCommand`, and we want hand-edited config entries to be honoured at
16
+ * boot without forcing a startup reorder.
17
+ */
18
+ import * as path from "node:path";
19
+ import { getAgentDir, getConfigRootDir, isEnoent, logger } from "@oh-my-pi/pi-utils";
20
+ import { YAML } from "bun";
21
+ import { resolveConfigValue } from "../config/resolve-config-value";
22
+
23
+ export interface AuthBrokerClientConfig {
24
+ url: string;
25
+ token: string;
26
+ }
27
+
28
+ /** Path to the local bearer token file. Created on the broker host by `omp auth-broker token`. */
29
+ export function getAuthBrokerTokenFilePath(): string {
30
+ return path.join(getConfigRootDir(), "auth-broker.token");
31
+ }
32
+
33
+ async function readTokenFile(): Promise<string | null> {
34
+ try {
35
+ const raw = await Bun.file(getAuthBrokerTokenFilePath()).text();
36
+ const trimmed = raw.trim();
37
+ return trimmed.length > 0 ? trimmed : null;
38
+ } catch (err) {
39
+ if (isEnoent(err)) return null;
40
+ logger.warn("auth-broker token file unreadable", { error: String(err) });
41
+ return null;
42
+ }
43
+ }
44
+
45
+ interface ConfigSnapshot {
46
+ url?: string;
47
+ token?: string;
48
+ }
49
+
50
+ async function readConfigYaml(): Promise<ConfigSnapshot> {
51
+ const configPath = path.join(getAgentDir(), "config.yml");
52
+ try {
53
+ const raw = await Bun.file(configPath).text();
54
+ const parsed = YAML.parse(raw);
55
+ if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return {};
56
+ const record = parsed as Record<string, unknown>;
57
+ const url = typeof record["auth.broker.url"] === "string" ? (record["auth.broker.url"] as string) : undefined;
58
+ const token =
59
+ typeof record["auth.broker.token"] === "string" ? (record["auth.broker.token"] as string) : undefined;
60
+ return { url, token };
61
+ } catch (err) {
62
+ if (isEnoent(err)) return {};
63
+ logger.warn("auth-broker config.yml unreadable", { error: String(err) });
64
+ return {};
65
+ }
66
+ }
67
+
68
+ /**
69
+ * Read broker configuration. Returns null when the URL is missing
70
+ * (broker disabled — local store is used). Throws when URL is set but no
71
+ * token is available — the caller cannot fall back silently because the
72
+ * user explicitly asked to use the broker.
73
+ */
74
+ export async function resolveAuthBrokerConfig(): Promise<AuthBrokerClientConfig | null> {
75
+ const envUrl = process.env.OMP_AUTH_BROKER_URL;
76
+ const envToken = process.env.OMP_AUTH_BROKER_TOKEN;
77
+
78
+ let url = envUrl && envUrl.length > 0 ? envUrl : undefined;
79
+ let configToken: string | undefined;
80
+ if (!url || !envToken) {
81
+ const fromConfig = await readConfigYaml();
82
+ if (!url && fromConfig.url) {
83
+ const resolved = await resolveConfigValue(fromConfig.url);
84
+ if (resolved && resolved.length > 0) url = resolved;
85
+ }
86
+ if (fromConfig.token) {
87
+ const resolved = await resolveConfigValue(fromConfig.token);
88
+ if (resolved && resolved.length > 0) configToken = resolved;
89
+ }
90
+ }
91
+ if (!url) return null;
92
+
93
+ const token =
94
+ (envToken && envToken.length > 0 ? envToken : undefined) ?? configToken ?? (await readTokenFile()) ?? undefined;
95
+ if (!token) {
96
+ throw new Error(
97
+ `OMP_AUTH_BROKER_URL is set (${url}) but no bearer token is available. ` +
98
+ `Set OMP_AUTH_BROKER_TOKEN, the \`auth.broker.token\` config entry, or place one at ${getAuthBrokerTokenFilePath()}.`,
99
+ );
100
+ }
101
+ return { url, token };
102
+ }
@@ -14,4 +14,10 @@ export type {
14
14
  SerializedAuthStorage,
15
15
  StoredAuthCredential,
16
16
  } from "@oh-my-pi/pi-ai";
17
- export { AuthStorage } from "@oh-my-pi/pi-ai";
17
+ export {
18
+ AuthBrokerClient,
19
+ AuthStorage,
20
+ REMOTE_REFRESH_SENTINEL,
21
+ RemoteAuthCredentialStore,
22
+ SqliteAuthCredentialStore,
23
+ } from "@oh-my-pi/pi-ai";
@@ -25,6 +25,7 @@ export interface ClientBridgePermissionToolCall {
25
25
  toolName: string;
26
26
  title: string;
27
27
  kind?: string;
28
+ status?: "pending" | "in_progress" | "completed" | "failed";
28
29
  rawInput?: unknown;
29
30
  locations?: { path: string; line?: number }[];
30
31
  }
@@ -70,6 +71,8 @@ export interface ClientBridgeCreateTerminalParams {
70
71
 
71
72
  export interface ClientBridge {
72
73
  readonly capabilities: ClientBridgeCapabilities;
74
+ /** ACP v1 clients cannot show server-initiated turns as busy after prompt response. */
75
+ readonly deferAgentInitiatedTurns?: boolean;
73
76
  readTextFile?(params: { path: string; line?: number; limit?: number }): Promise<string>;
74
77
  writeTextFile?(params: { path: string; content: string }): Promise<void>;
75
78
  createTerminal?(params: ClientBridgeCreateTerminalParams): Promise<ClientBridgeTerminalHandle>;
@@ -1,5 +1,5 @@
1
1
  import type { AgentToolUpdateCallback } from "@oh-my-pi/pi-agent-core";
2
- import { sanitizeText } from "@oh-my-pi/pi-natives";
2
+ import { sanitizeText } from "@oh-my-pi/pi-utils";
3
3
  import { formatBytes } from "../tools/render-utils";
4
4
  import { sanitizeWithOptionalSixelPassthrough } from "../utils/sixel";
5
5
 
package/src/task/types.ts CHANGED
@@ -57,18 +57,13 @@ export interface SubagentLifecyclePayload {
57
57
  index: number;
58
58
  }
59
59
 
60
- const assignmentDescriptionForContextEnabled =
61
- "Complete per-task instructions the subagent executes. Must follow the Target/Change/Edge Cases/Acceptance structure. Only include per-task deltas — shared background belongs in `context`.";
62
- const assignmentDescriptionForContextDisabled =
63
- "Complete per-task instructions the subagent executes. Must follow the Target/Change/Edge Cases/Acceptance structure, and include any background that would otherwise live in `context` since shared context is disabled in this mode.";
60
+ const assignmentDescription = "per-task instructions; self-contained";
64
61
 
65
- const createTaskItemSchema = (contextEnabled: boolean) =>
62
+ const createTaskItemSchema = (_contextEnabled: boolean) =>
66
63
  z.object({
67
- id: z.string().max(48).describe("CamelCase identifier, max 48 chars"),
68
- description: z.string().describe("Short one-liner for UI display only — not seen by the subagent"),
69
- assignment: z
70
- .string()
71
- .describe(contextEnabled ? assignmentDescriptionForContextEnabled : assignmentDescriptionForContextDisabled),
64
+ id: z.string().max(48).describe("camelcase identifier"),
65
+ description: z.string().describe("ui label, not seen by subagent"),
66
+ assignment: z.string().describe(assignmentDescription),
72
67
  });
73
68
 
74
69
  /** Single task item for parallel execution (default shape with context enabled). */
@@ -80,44 +75,24 @@ const createTaskSchema = (options: { isolationEnabled: boolean; simpleMode: Task
80
75
  const itemSchema = createTaskItemSchema(contextEnabled);
81
76
 
82
77
  let schema = z.object({
83
- agent: z.string().describe("Agent type for all tasks in this batch"),
84
- tasks: z
85
- .array(itemSchema)
86
- .describe(
87
- contextEnabled
88
- ? "Tasks to execute in parallel. Each must be small-scoped (3-5 files max) and self-contained given context + assignment."
89
- : "Tasks to execute in parallel. Each must be small-scoped (3-5 files max) and fully self-contained inside assignment because shared context is disabled.",
90
- ),
78
+ agent: z.string().describe("agent type"),
79
+ tasks: z.array(itemSchema).describe("tasks to execute in parallel"),
91
80
  });
92
-
93
81
  if (contextEnabled) {
94
82
  schema = schema.extend({
95
- context: z
96
- .string()
97
- .optional()
98
- .describe(
99
- "Shared background prepended to every task's assignment. Put goal, non-goals, constraints, conventions, reference paths, API contracts, and global acceptance commands here once — instead of duplicating across assignments.",
100
- ),
83
+ context: z.string().optional().describe("shared background prepended to each assignment"),
101
84
  });
102
85
  }
103
86
 
104
87
  if (customSchemaEnabled) {
105
88
  schema = schema.extend({
106
- schema: z
107
- .string()
108
- .optional()
109
- .describe(
110
- "JSON-encoded JTD schema defining expected response structure. Output format belongs here — never in context or assignment.",
111
- ),
89
+ schema: z.string().optional().describe("jtd schema for expected response shape"),
112
90
  });
113
91
  }
114
92
 
115
93
  if (options.isolationEnabled) {
116
94
  schema = schema.extend({
117
- isolated: z
118
- .boolean()
119
- .optional()
120
- .describe("Run in isolated environment; returns patches. Use when tasks edit overlapping files."),
95
+ isolated: z.boolean().optional().describe("run in isolated env; returns patches"),
121
96
  });
122
97
  }
123
98
 
@@ -1,5 +1,5 @@
1
1
  import type { AgentToolContext } from "@oh-my-pi/pi-agent-core";
2
- import { type PtyRunResult, PtySession, sanitizeText } from "@oh-my-pi/pi-natives";
2
+ import { type PtyRunResult, PtySession } from "@oh-my-pi/pi-natives";
3
3
  import {
4
4
  type Component,
5
5
  extractPrintableText,
@@ -10,6 +10,7 @@ import {
10
10
  truncateToWidth,
11
11
  visibleWidth,
12
12
  } from "@oh-my-pi/pi-tui";
13
+ import { sanitizeText } from "@oh-my-pi/pi-utils";
13
14
  import type { Terminal as XtermTerminalType } from "@xterm/headless";
14
15
  import xterm from "@xterm/headless";
15
16
  import { Settings } from "../config/settings";
@@ -297,6 +298,7 @@ export async function runInteractiveBashPty(
297
298
  },
298
299
  ): Promise<BashInteractiveResult> {
299
300
  const settings = await Settings.init();
301
+ const { shell: resolvedShell } = settings.getShellConfig();
300
302
  const sink = new OutputSink({
301
303
  artifactPath: options.artifactPath,
302
304
  artifactId: options.artifactId,
@@ -363,6 +365,7 @@ export async function runInteractiveBashPty(
363
365
  signal: options.signal,
364
366
  cols,
365
367
  rows,
368
+ shell: resolvedShell,
366
369
  },
367
370
  (err, chunk) => {
368
371
  if (finished || err || !chunk) return;
@@ -9,6 +9,6 @@ export interface BashPtyContext {
9
9
  /** Return whether a bash tool call should use the local interactive PTY overlay. */
10
10
  export function canUseInteractiveBashPty(pty: boolean, ctx: BashPtyContext | undefined): boolean {
11
11
  if (!pty) return false;
12
- if (process.platform === "win32") return false;
13
- return $env.PI_NO_PTY !== "1" && ctx?.hasUI === true && ctx.ui !== undefined;
12
+ if ($env.PI_NO_PTY === "1") return false;
13
+ return ctx?.hasUI === true && ctx.ui !== undefined;
14
14
  }
@@ -18,19 +18,16 @@ export type { Observation, ObservationEntry } from "./browser/tab-protocol";
18
18
  const DEFAULT_TAB_NAME = "main";
19
19
 
20
20
  const appSchema = z.object({
21
- path: z.string().describe("absolute path to a binary to spawn (single-instance reuse)").optional(),
22
- cdp_url: z.string().describe("existing CDP endpoint to connect to (e.g. http://127.0.0.1:9222)").optional(),
23
- args: z.array(z.string()).describe("extra CLI args when spawning").optional(),
24
- target: z.string().describe("substring matched against url+title to pick a BrowserWindow").optional(),
21
+ path: z.string().describe("binary path to spawn").optional(),
22
+ cdp_url: z.string().describe("existing cdp endpoint").optional(),
23
+ args: z.array(z.string()).describe("extra cli args").optional(),
24
+ target: z.string().describe("substring to pick a window").optional(),
25
25
  });
26
26
 
27
27
  const browserSchema = z.object({
28
- action: z.enum(["open", "close", "run"] as const).describe("tab/browser operation"),
29
- name: z
30
- .string()
31
- .describe("tab id; default 'main'. Multiple tabs can coexist; reusable across run() calls and subagents.")
32
- .optional(),
33
- url: z.string().describe("open: navigate after acquiring tab").optional(),
28
+ action: z.enum(["open", "close", "run"] as const).describe("operation"),
29
+ name: z.string().describe("tab id (default 'main')").optional(),
30
+ url: z.string().describe("url to open").optional(),
34
31
  app: appSchema.optional(),
35
32
  viewport: z
36
33
  .object({
@@ -41,21 +38,16 @@ const browserSchema = z.object({
41
38
  .optional(),
42
39
  wait_until: z
43
40
  .enum(["load", "domcontentloaded", "networkidle0", "networkidle2"] as const)
44
- .describe("navigation wait condition for url")
41
+ .describe("navigation wait condition")
45
42
  .optional(),
46
43
  dialogs: z
47
44
  .enum(["accept", "dismiss"] as const)
48
- .describe("open: auto-handle alert/confirm/beforeunload dialogs (default: leave for caller to handle)")
49
- .optional(),
50
- code: z
51
- .string()
52
- .describe(
53
- "run: JS body executed with `page`, `browser`, `tab`, `display`, `assert`, `wait` in scope. Treated as the body of an async function. Use `display(value)` to attach text/JSON/images; the function's return value is JSON-serialized as a final block.",
54
- )
45
+ .describe("auto-handle dialogs")
55
46
  .optional(),
47
+ code: z.string().describe("js body to run in tab").optional(),
56
48
  timeout: z.number().default(30).describe("timeout in seconds").optional(),
57
- all: z.boolean().describe("close: close every tab").optional(),
58
- kill: z.boolean().describe("close: also kill spawned-app browsers (default: leave running)").optional(),
49
+ all: z.boolean().describe("close every tab").optional(),
50
+ kill: z.boolean().describe("also kill spawned-app browsers").optional(),
59
51
  });
60
52
 
61
53
  /** Input schema for the browser tool. */
package/src/tools/eval.ts CHANGED
@@ -4,10 +4,8 @@ import type { Component } from "@oh-my-pi/pi-tui";
4
4
  import { Markdown, Text } from "@oh-my-pi/pi-tui";
5
5
  import { prompt } from "@oh-my-pi/pi-utils";
6
6
  import * as z from "zod/v4";
7
- import { jsBackend, parseEvalInput, pythonBackend, sniffEvalLanguage } from "../eval";
7
+ import { jsBackend, pythonBackend } from "../eval";
8
8
  import type { ExecutorBackend } from "../eval/backend";
9
- import evalGrammar from "../eval/eval.lark" with { type: "text" };
10
- import { ABORT_WARNING, type ParsedEvalCell } from "../eval/parse";
11
9
  import type { EvalCellResult, EvalDisplayOutput, EvalLanguage, EvalStatusEvent, EvalToolDetails } from "../eval/types";
12
10
  import type { RenderResultOptions } from "../extensibility/custom-tools/types";
13
11
  import { truncateToVisualLines } from "../modes/components/visual-truncate";
@@ -29,8 +27,27 @@ import { clampTimeout } from "./tool-timeouts";
29
27
 
30
28
  export const EVAL_DEFAULT_PREVIEW_LINES = 10;
31
29
 
30
+ /**
31
+ * Per-cell input. Each cell runs in order; state persists within a language
32
+ * across cells and across tool calls.
33
+ */
34
+ const evalCellSchema = z.object({
35
+ language: z.enum(["py", "js"]).describe('runtime: "py" for the IPython kernel, "js" for the persistent JS VM'),
36
+ code: z.string().describe("cell body, verbatim. Use top-level await freely."),
37
+ title: z.string().optional().describe('short label shown in transcript (e.g. "imports", "load config")'),
38
+ timeout: z.number().int().min(1).max(600).optional().describe("per-cell timeout in seconds (1-600, default 30)"),
39
+ reset: z
40
+ .boolean()
41
+ .optional()
42
+ .describe("wipe this cell's language kernel before running. Other languages are untouched."),
43
+ });
44
+ export type EvalCellInput = z.infer<typeof evalCellSchema>;
45
+
32
46
  export const evalSchema = z.object({
33
- input: z.string().describe('eval input as a sequence of `*** Cell <lang>:"title"` cell headers followed by code'),
47
+ cells: z
48
+ .array(evalCellSchema)
49
+ .min(1)
50
+ .describe("cells executed in order. State persists within each language across cells and tool calls."),
34
51
  });
35
52
  export type EvalToolParams = z.infer<typeof evalSchema>;
36
53
 
@@ -134,7 +151,6 @@ export interface EvalToolOptions {
134
151
 
135
152
  interface ResolvedBackend {
136
153
  backend: ExecutorBackend;
137
- fallback: boolean;
138
154
  notice?: string;
139
155
  }
140
156
 
@@ -166,51 +182,21 @@ function timeoutSecondsFromMs(timeoutMs: number): number {
166
182
  return clampTimeout("eval", timeoutMs / 1000);
167
183
  }
168
184
 
169
- async function resolveBackend(
170
- session: ToolSession,
171
- requested: EvalLanguage | undefined,
172
- code: string,
173
- ): Promise<ResolvedBackend> {
185
+ async function resolveBackend(session: ToolSession, language: EvalLanguage): Promise<ResolvedBackend> {
174
186
  const allowPy = (session.settings.get("eval.py") as boolean | undefined) ?? true;
175
187
  const allowJs = (session.settings.get("eval.js") as boolean | undefined) ?? true;
176
188
 
177
- if (requested === "python") {
189
+ if (language === "python") {
178
190
  if (!allowPy) throw new ToolError("Python backend is disabled (eval.py = false).");
179
191
  if (!(await pythonBackend.isAvailable(session))) {
180
192
  throw new ToolError(
181
193
  'Python backend is unavailable in this session. Pass language: "js" or install the python kernel.',
182
194
  );
183
195
  }
184
- return { backend: pythonBackend, fallback: false };
185
- }
186
- if (requested === "js") {
187
- if (!allowJs) throw new ToolError("JavaScript backend is disabled (eval.js = false).");
188
- return { backend: jsBackend, fallback: false };
189
- }
190
- // Auto-detect.
191
- const sniffed = sniffEvalLanguage(code);
192
- if (sniffed === "python" && allowPy && (await pythonBackend.isAvailable(session))) {
193
- return { backend: pythonBackend, fallback: false };
194
- }
195
- if (sniffed === "js" && allowJs) {
196
- return { backend: jsBackend, fallback: false };
196
+ return { backend: pythonBackend };
197
197
  }
198
-
199
- // Sniffer returned undefined or the preferred backend was disabled. Prefer
200
- // python when its kernel is up, else fall back to js.
201
- if (allowPy && (await pythonBackend.isAvailable(session))) {
202
- const notice =
203
- sniffed === "js" ? "JavaScript markers detected but eval.js is disabled; using Python." : undefined;
204
- return { backend: pythonBackend, fallback: false, notice };
205
- }
206
- if (allowJs) {
207
- const notice =
208
- sniffed === "python"
209
- ? "Python markers detected but the python kernel is unavailable; using JavaScript."
210
- : undefined;
211
- return { backend: jsBackend, fallback: true, notice };
212
- }
213
- throw new ToolError("No eval backend is available; enable eval.py or eval.js.");
198
+ if (!allowJs) throw new ToolError("JavaScript backend is disabled (eval.js = false).");
199
+ return { backend: jsBackend };
214
200
  }
215
201
 
216
202
  export class EvalTool implements AgentTool<typeof evalSchema> {
@@ -227,20 +213,15 @@ export class EvalTool implements AgentTool<typeof evalSchema> {
227
213
  readonly concurrency = "exclusive";
228
214
  readonly strict = true;
229
215
  readonly intent = (args: Partial<z.infer<typeof evalSchema>>): string | undefined => {
230
- const input = args.input;
231
- if (input) {
232
- try {
233
- const cells = parseEvalInput(input).cells;
234
- return cells.map(cell => cell.title || `running ${cell.language}`).join("\n");
235
- } catch {}
236
- }
237
- return "evaluating";
216
+ const cells = Array.isArray(args.cells) ? args.cells : [];
217
+ const first = cells.find(c => c && typeof c === "object");
218
+ if (!first) return "evaluating";
219
+ const title = typeof first.title === "string" ? first.title : undefined;
220
+ const language = typeof first.language === "string" ? first.language : "?";
221
+ const label = title || `running ${language}`;
222
+ return cells.length > 1 ? `${label} (+${cells.length - 1})` : label;
238
223
  };
239
224
 
240
- get customFormat(): { syntax: "lark"; definition: string } {
241
- return { syntax: "lark", definition: evalGrammar };
242
- }
243
-
244
225
  readonly #proxyExecutor?: EvalProxyExecutor;
245
226
 
246
227
  constructor(
@@ -266,19 +247,17 @@ export class EvalTool implements AgentTool<typeof evalSchema> {
266
247
  }
267
248
  const session = this.session;
268
249
 
269
- const parsedInput = parseEvalInput(params.input);
270
- let previousRuntimeLanguage: EvalLanguage | undefined;
271
250
  const cells: ResolvedEvalCell[] = [];
272
- for (const cell of parsedInput.cells) {
273
- const requested = cell.languageOrigin === "header" ? cell.language : (previousRuntimeLanguage ?? undefined);
274
- const resolved = await resolveBackend(session, requested, cell.code);
275
- previousRuntimeLanguage = resolved.backend.id;
251
+ for (let i = 0; i < params.cells.length; i++) {
252
+ const cell = params.cells[i];
253
+ const language: EvalLanguage = cell.language === "py" ? "python" : "js";
254
+ const resolved = await resolveBackend(session, language);
276
255
  cells.push({
277
- index: cell.index,
256
+ index: i,
278
257
  title: cell.title,
279
258
  code: cell.code,
280
- timeoutMs: cell.timeoutMs,
281
- reset: cell.reset,
259
+ timeoutMs: (cell.timeout ?? 30) * 1000,
260
+ reset: cell.reset ?? false,
282
261
  resolved,
283
262
  });
284
263
  }
@@ -462,11 +441,10 @@ export class EvalTool implements AgentTool<typeof evalSchema> {
462
441
  pushUpdate();
463
442
  const errorMsg = result.output || "Command aborted";
464
443
  const combinedOutput = cellOutputs.join("\n\n");
465
- const abortSuffix = parsedInput.aborted ? `\n\n${ABORT_WARNING}` : "";
466
444
  const outputText =
467
- (cells.length > 1
445
+ cells.length > 1
468
446
  ? `${combinedOutput}\n\nCell ${i + 1} aborted: ${errorMsg}`
469
- : combinedOutput || errorMsg) + abortSuffix;
447
+ : combinedOutput || errorMsg;
470
448
 
471
449
  const summaryForMeta = await summarizeFinal(combinedOutput, finalizeOutput);
472
450
  const details: EvalToolDetails = {
@@ -489,13 +467,12 @@ export class EvalTool implements AgentTool<typeof evalSchema> {
489
467
  cellResult.status = "error";
490
468
  pushUpdate();
491
469
  const combinedOutput = cellOutputs.join("\n\n");
492
- const abortSuffix = parsedInput.aborted ? `\n\n${ABORT_WARNING}` : "";
493
470
  const outputText =
494
- (cells.length > 1
471
+ cells.length > 1
495
472
  ? `${combinedOutput}\n\nCell ${i + 1} failed (exit code ${result.exitCode}). Earlier cells succeeded—their state persists. Fix only cell ${i + 1}.`
496
473
  : combinedOutput
497
474
  ? `${combinedOutput}\n\nCommand exited with code ${result.exitCode}`
498
- : `Command exited with code ${result.exitCode}`) + abortSuffix;
475
+ : `Command exited with code ${result.exitCode}`;
499
476
 
500
477
  const summaryForMeta = await summarizeFinal(combinedOutput, finalizeOutput);
501
478
  const details: EvalToolDetails = {
@@ -519,13 +496,12 @@ export class EvalTool implements AgentTool<typeof evalSchema> {
519
496
  }
520
497
 
521
498
  const combinedOutput = cellOutputs.join("\n\n");
522
- const abortSuffix = parsedInput.aborted ? `\n\n${ABORT_WARNING}` : "";
523
499
  const hasImages = images.length > 0;
524
500
  const outputText =
525
- (combinedOutput ||
526
- (hasImages
527
- ? `(displayed ${images.length} image${images.length === 1 ? "" : "s"}; no text output)`
528
- : "(no output)")) + abortSuffix;
501
+ combinedOutput ||
502
+ (hasImages
503
+ ? `(displayed ${images.length} image${images.length === 1 ? "" : "s"}; no text output)`
504
+ : "(no output)");
529
505
  const summaryForMeta = await summarizeFinal(combinedOutput, finalizeOutput);
530
506
 
531
507
  const details: EvalToolDetails = {
@@ -581,8 +557,14 @@ async function summarizeFinal(
581
557
  };
582
558
  }
583
559
 
560
+ interface EvalRenderCellArg {
561
+ language?: string;
562
+ code?: string;
563
+ title?: string;
564
+ }
565
+
584
566
  interface EvalRenderArgs {
585
- input?: string;
567
+ cells?: EvalRenderCellArg[];
586
568
  __partialJson?: string;
587
569
  }
588
570
 
@@ -593,27 +575,30 @@ interface EvalRenderContext {
593
575
  timeout?: number;
594
576
  }
595
577
 
596
- function decodePartialJsonStringFragment(fragment: string): string {
597
- let text = fragment.replace(/\\u[0-9a-fA-F]{0,3}$/, "");
598
- const trailingBackslashes = text.match(/\\+$/)?.[0].length ?? 0;
599
- if (trailingBackslashes % 2 === 1) text = text.slice(0, -1);
600
- try {
601
- return JSON.parse(`"${text}"`) as string;
602
- } catch {
603
- return text;
604
- }
578
+ interface EvalRenderCell {
579
+ language: EvalLanguage;
580
+ code: string;
581
+ title?: string;
605
582
  }
606
583
 
607
- function extractPartialJsonString(partialJson: string | undefined, key: string): string | undefined {
608
- if (!partialJson) return undefined;
609
- const pattern = new RegExp(`"${key}"\\s*:\\s*"((?:\\\\.|[^"\\\\])*)`, "u");
610
- const match = pattern.exec(partialJson);
611
- if (!match) return undefined;
612
- return decodePartialJsonStringFragment(match[1]);
584
+ function normalizeRenderLanguage(value: string | undefined): EvalLanguage {
585
+ return value === "js" ? "js" : "python";
613
586
  }
614
587
 
615
- function getRenderInput(args: EvalRenderArgs | undefined): string | undefined {
616
- return args?.input ?? extractPartialJsonString(args?.__partialJson, "input");
588
+ function getRenderCells(args: EvalRenderArgs | undefined): EvalRenderCell[] {
589
+ const raw = args?.cells;
590
+ if (!Array.isArray(raw)) return [];
591
+ const out: EvalRenderCell[] = [];
592
+ for (const cell of raw) {
593
+ if (!cell || typeof cell !== "object") continue;
594
+ const code = typeof cell.code === "string" ? cell.code : "";
595
+ out.push({
596
+ language: normalizeRenderLanguage(typeof cell.language === "string" ? cell.language : undefined),
597
+ code,
598
+ title: typeof cell.title === "string" ? cell.title : undefined,
599
+ });
600
+ }
601
+ return out;
617
602
  }
618
603
 
619
604
  /** Format a status event as a single line for display. */
@@ -861,15 +846,7 @@ function formatCellOutputLines(
861
846
 
862
847
  export const evalToolRenderer = {
863
848
  renderCall(args: EvalRenderArgs, _options: RenderResultOptions, uiTheme: Theme): Component {
864
- const input = getRenderInput(args);
865
- let cells: ParsedEvalCell[] = [];
866
- if (input) {
867
- try {
868
- cells = parseEvalInput(input).cells;
869
- } catch {
870
- cells = [];
871
- }
872
- }
849
+ const cells = getRenderCells(args);
873
850
 
874
851
  if (cells.length === 0) {
875
852
  const promptSym = uiTheme.fg("accent", ">>>");
@@ -881,7 +858,7 @@ export const evalToolRenderer = {
881
858
 
882
859
  return {
883
860
  render: (width: number): string[] => {
884
- const key = `${input?.length ?? 0}`;
861
+ const key = cells.map(c => `${c.language}:${c.title ?? ""}:${c.code.length}`).join("|");
885
862
  if (cached && cached.key === key && cached.width === width) {
886
863
  return cached.result;
887
864
  }