@oh-my-pi/pi-coding-agent 15.1.2 → 15.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. package/CHANGELOG.md +42 -0
  2. package/dist/types/cli/auth-broker-cli.d.ts +25 -0
  3. package/dist/types/cli/auth-gateway-cli.d.ts +18 -0
  4. package/dist/types/cli/grievances-cli.d.ts +12 -0
  5. package/dist/types/commands/auth-broker.d.ts +54 -0
  6. package/dist/types/commands/auth-gateway.d.ts +32 -0
  7. package/dist/types/commands/grievances.d.ts +1 -1
  8. package/dist/types/commit/agentic/tools/propose-commit.d.ts +9 -1
  9. package/dist/types/commit/agentic/tools/schemas.d.ts +9 -1
  10. package/dist/types/commit/agentic/tools/split-commit.d.ts +9 -1
  11. package/dist/types/config/model-registry.d.ts +3 -0
  12. package/dist/types/config/models-config-schema.d.ts +1 -0
  13. package/dist/types/config/settings-schema.d.ts +46 -0
  14. package/dist/types/discovery/agents.d.ts +12 -1
  15. package/dist/types/edit/renderer.d.ts +3 -0
  16. package/dist/types/eval/index.d.ts +0 -2
  17. package/dist/types/goals/tools/goal-tool.d.ts +10 -2
  18. package/dist/types/index.d.ts +0 -1
  19. package/dist/types/internal-urls/index.d.ts +1 -1
  20. package/dist/types/internal-urls/{pi-protocol.d.ts → omp-protocol.d.ts} +3 -3
  21. package/dist/types/internal-urls/types.d.ts +1 -1
  22. package/dist/types/modes/acp/acp-agent.d.ts +1 -0
  23. package/dist/types/modes/emoji-autocomplete.d.ts +16 -0
  24. package/dist/types/modes/interactive-mode.d.ts +1 -1
  25. package/dist/types/modes/prompt-action-autocomplete.d.ts +4 -0
  26. package/dist/types/plan-mode/approved-plan.d.ts +4 -0
  27. package/dist/types/sdk.d.ts +10 -3
  28. package/dist/types/session/agent-session.d.ts +1 -1
  29. package/dist/types/session/auth-broker-config.d.ts +13 -0
  30. package/dist/types/session/auth-storage.d.ts +1 -1
  31. package/dist/types/tools/eval.d.ts +41 -7
  32. package/dist/types/tools/irc.d.ts +8 -2
  33. package/dist/types/tools/report-tool-issue.d.ts +118 -1
  34. package/dist/types/tools/resolve.d.ts +8 -2
  35. package/examples/custom-tools/README.md +3 -12
  36. package/examples/extensions/README.md +2 -15
  37. package/examples/extensions/api-demo.ts +1 -7
  38. package/package.json +7 -7
  39. package/src/autoresearch/tools/init-experiment.ts +11 -33
  40. package/src/autoresearch/tools/log-experiment.ts +10 -24
  41. package/src/autoresearch/tools/run-experiment.ts +1 -1
  42. package/src/autoresearch/tools/update-notes.ts +2 -9
  43. package/src/cli/auth-broker-cli.ts +746 -0
  44. package/src/cli/auth-gateway-cli.ts +342 -0
  45. package/src/cli/grievances-cli.ts +109 -16
  46. package/src/cli.ts +4 -2
  47. package/src/commands/auth-broker.ts +96 -0
  48. package/src/commands/auth-gateway.ts +61 -0
  49. package/src/commands/grievances.ts +13 -8
  50. package/src/commands/launch.ts +1 -1
  51. package/src/commit/agentic/agent.ts +2 -0
  52. package/src/commit/agentic/tools/analyze-file.ts +2 -2
  53. package/src/commit/agentic/tools/git-file-diff.ts +2 -2
  54. package/src/commit/agentic/tools/git-hunk.ts +3 -3
  55. package/src/commit/agentic/tools/git-overview.ts +2 -2
  56. package/src/commit/agentic/tools/propose-changelog.ts +1 -3
  57. package/src/commit/agentic/tools/recent-commits.ts +1 -1
  58. package/src/commit/agentic/tools/schemas.ts +1 -9
  59. package/src/config/model-equivalence.ts +279 -174
  60. package/src/config/model-registry.ts +37 -6
  61. package/src/config/model-resolver.ts +13 -8
  62. package/src/config/models-config-schema.ts +8 -0
  63. package/src/config/settings-schema.ts +52 -0
  64. package/src/cursor.ts +1 -1
  65. package/src/debug/log-formatting.ts +1 -1
  66. package/src/debug/log-viewer.ts +1 -1
  67. package/src/debug/profiler.ts +4 -0
  68. package/src/debug/raw-sse-buffer.ts +100 -59
  69. package/src/debug/raw-sse.ts +1 -1
  70. package/src/discovery/agents.ts +15 -4
  71. package/src/edit/modes/apply-patch.ts +1 -5
  72. package/src/edit/modes/patch.ts +5 -5
  73. package/src/edit/modes/replace.ts +5 -5
  74. package/src/edit/renderer.ts +2 -1
  75. package/src/edit/streaming.ts +1 -1
  76. package/src/eval/index.ts +0 -2
  77. package/src/eval/js/shared/runtime.ts +25 -0
  78. package/src/eval/py/kernel.ts +1 -1
  79. package/src/exa/researcher.ts +4 -4
  80. package/src/exa/search.ts +10 -22
  81. package/src/exa/websets.ts +33 -33
  82. package/src/goals/tools/goal-tool.ts +3 -3
  83. package/src/index.ts +0 -3
  84. package/src/internal-urls/docs-index.generated.ts +21 -18
  85. package/src/internal-urls/index.ts +1 -1
  86. package/src/internal-urls/{pi-protocol.ts → omp-protocol.ts} +10 -10
  87. package/src/internal-urls/router.ts +3 -3
  88. package/src/internal-urls/types.ts +1 -1
  89. package/src/lsp/types.ts +8 -11
  90. package/src/main.ts +3 -0
  91. package/src/mcp/tool-bridge.ts +3 -3
  92. package/src/modes/acp/acp-agent.ts +88 -25
  93. package/src/modes/components/bash-execution.ts +1 -1
  94. package/src/modes/components/diff.ts +1 -2
  95. package/src/modes/components/eval-execution.ts +1 -1
  96. package/src/modes/components/oauth-selector.ts +38 -2
  97. package/src/modes/components/tool-execution.ts +1 -2
  98. package/src/modes/controllers/command-controller.ts +95 -34
  99. package/src/modes/controllers/input-controller.ts +4 -3
  100. package/src/modes/data/emojis.json +1 -0
  101. package/src/modes/emoji-autocomplete.ts +285 -0
  102. package/src/modes/interactive-mode.ts +92 -19
  103. package/src/modes/print-mode.ts +3 -3
  104. package/src/modes/prompt-action-autocomplete.ts +14 -0
  105. package/src/plan-mode/approved-plan.ts +9 -0
  106. package/src/prompts/system/system-prompt.md +1 -1
  107. package/src/prompts/system/ttsr-tool-reminder.md +5 -0
  108. package/src/prompts/tools/eval.md +25 -26
  109. package/src/prompts/tools/read.md +1 -1
  110. package/src/prompts/tools/resolve.md +1 -1
  111. package/src/prompts/tools/search.md +1 -1
  112. package/src/prompts/tools/web-search.md +1 -1
  113. package/src/sdk.ts +78 -7
  114. package/src/session/agent-session.ts +176 -77
  115. package/src/session/agent-storage.ts +7 -2
  116. package/src/session/auth-broker-config.ts +102 -0
  117. package/src/session/auth-storage.ts +7 -1
  118. package/src/session/streaming-output.ts +1 -1
  119. package/src/task/types.ts +10 -35
  120. package/src/tools/bash-interactive.ts +4 -1
  121. package/src/tools/bash-pty-selection.ts +2 -2
  122. package/src/tools/browser.ts +12 -20
  123. package/src/tools/eval.ts +77 -100
  124. package/src/tools/gh.ts +21 -45
  125. package/src/tools/hindsight-recall.ts +1 -1
  126. package/src/tools/hindsight-reflect.ts +2 -2
  127. package/src/tools/hindsight-retain.ts +3 -7
  128. package/src/tools/index.ts +8 -1
  129. package/src/tools/inspect-image.ts +4 -1
  130. package/src/tools/irc.ts +4 -12
  131. package/src/tools/job.ts +3 -11
  132. package/src/tools/report-tool-issue.ts +462 -17
  133. package/src/tools/resolve.ts +2 -7
  134. package/src/tools/todo-write.ts +8 -15
  135. package/src/utils/title-generator.ts +3 -0
  136. package/src/web/search/index.ts +6 -6
  137. package/dist/types/eval/parse.d.ts +0 -28
  138. package/dist/types/eval/sniff.d.ts +0 -11
  139. package/src/eval/eval.lark +0 -36
  140. package/src/eval/parse.ts +0 -407
  141. package/src/eval/sniff.ts +0 -28
package/src/task/types.ts CHANGED
@@ -57,18 +57,13 @@ export interface SubagentLifecyclePayload {
57
57
  index: number;
58
58
  }
59
59
 
60
- const assignmentDescriptionForContextEnabled =
61
- "Complete per-task instructions the subagent executes. Must follow the Target/Change/Edge Cases/Acceptance structure. Only include per-task deltas — shared background belongs in `context`.";
62
- const assignmentDescriptionForContextDisabled =
63
- "Complete per-task instructions the subagent executes. Must follow the Target/Change/Edge Cases/Acceptance structure, and include any background that would otherwise live in `context` since shared context is disabled in this mode.";
60
+ const assignmentDescription = "per-task instructions; self-contained";
64
61
 
65
- const createTaskItemSchema = (contextEnabled: boolean) =>
62
+ const createTaskItemSchema = (_contextEnabled: boolean) =>
66
63
  z.object({
67
- id: z.string().max(48).describe("CamelCase identifier, max 48 chars"),
68
- description: z.string().describe("Short one-liner for UI display only — not seen by the subagent"),
69
- assignment: z
70
- .string()
71
- .describe(contextEnabled ? assignmentDescriptionForContextEnabled : assignmentDescriptionForContextDisabled),
64
+ id: z.string().max(48).describe("camelcase identifier"),
65
+ description: z.string().describe("ui label, not seen by subagent"),
66
+ assignment: z.string().describe(assignmentDescription),
72
67
  });
73
68
 
74
69
  /** Single task item for parallel execution (default shape with context enabled). */
@@ -80,44 +75,24 @@ const createTaskSchema = (options: { isolationEnabled: boolean; simpleMode: Task
80
75
  const itemSchema = createTaskItemSchema(contextEnabled);
81
76
 
82
77
  let schema = z.object({
83
- agent: z.string().describe("Agent type for all tasks in this batch"),
84
- tasks: z
85
- .array(itemSchema)
86
- .describe(
87
- contextEnabled
88
- ? "Tasks to execute in parallel. Each must be small-scoped (3-5 files max) and self-contained given context + assignment."
89
- : "Tasks to execute in parallel. Each must be small-scoped (3-5 files max) and fully self-contained inside assignment because shared context is disabled.",
90
- ),
78
+ agent: z.string().describe("agent type"),
79
+ tasks: z.array(itemSchema).describe("tasks to execute in parallel"),
91
80
  });
92
-
93
81
  if (contextEnabled) {
94
82
  schema = schema.extend({
95
- context: z
96
- .string()
97
- .optional()
98
- .describe(
99
- "Shared background prepended to every task's assignment. Put goal, non-goals, constraints, conventions, reference paths, API contracts, and global acceptance commands here once — instead of duplicating across assignments.",
100
- ),
83
+ context: z.string().optional().describe("shared background prepended to each assignment"),
101
84
  });
102
85
  }
103
86
 
104
87
  if (customSchemaEnabled) {
105
88
  schema = schema.extend({
106
- schema: z
107
- .string()
108
- .optional()
109
- .describe(
110
- "JSON-encoded JTD schema defining expected response structure. Output format belongs here — never in context or assignment.",
111
- ),
89
+ schema: z.string().optional().describe("jtd schema for expected response shape"),
112
90
  });
113
91
  }
114
92
 
115
93
  if (options.isolationEnabled) {
116
94
  schema = schema.extend({
117
- isolated: z
118
- .boolean()
119
- .optional()
120
- .describe("Run in isolated environment; returns patches. Use when tasks edit overlapping files."),
95
+ isolated: z.boolean().optional().describe("run in isolated env; returns patches"),
121
96
  });
122
97
  }
123
98
 
@@ -1,5 +1,5 @@
1
1
  import type { AgentToolContext } from "@oh-my-pi/pi-agent-core";
2
- import { type PtyRunResult, PtySession, sanitizeText } from "@oh-my-pi/pi-natives";
2
+ import { type PtyRunResult, PtySession } from "@oh-my-pi/pi-natives";
3
3
  import {
4
4
  type Component,
5
5
  extractPrintableText,
@@ -10,6 +10,7 @@ import {
10
10
  truncateToWidth,
11
11
  visibleWidth,
12
12
  } from "@oh-my-pi/pi-tui";
13
+ import { sanitizeText } from "@oh-my-pi/pi-utils";
13
14
  import type { Terminal as XtermTerminalType } from "@xterm/headless";
14
15
  import xterm from "@xterm/headless";
15
16
  import { Settings } from "../config/settings";
@@ -297,6 +298,7 @@ export async function runInteractiveBashPty(
297
298
  },
298
299
  ): Promise<BashInteractiveResult> {
299
300
  const settings = await Settings.init();
301
+ const { shell: resolvedShell } = settings.getShellConfig();
300
302
  const sink = new OutputSink({
301
303
  artifactPath: options.artifactPath,
302
304
  artifactId: options.artifactId,
@@ -363,6 +365,7 @@ export async function runInteractiveBashPty(
363
365
  signal: options.signal,
364
366
  cols,
365
367
  rows,
368
+ shell: resolvedShell,
366
369
  },
367
370
  (err, chunk) => {
368
371
  if (finished || err || !chunk) return;
@@ -9,6 +9,6 @@ export interface BashPtyContext {
9
9
  /** Return whether a bash tool call should use the local interactive PTY overlay. */
10
10
  export function canUseInteractiveBashPty(pty: boolean, ctx: BashPtyContext | undefined): boolean {
11
11
  if (!pty) return false;
12
- if (process.platform === "win32") return false;
13
- return $env.PI_NO_PTY !== "1" && ctx?.hasUI === true && ctx.ui !== undefined;
12
+ if ($env.PI_NO_PTY === "1") return false;
13
+ return ctx?.hasUI === true && ctx.ui !== undefined;
14
14
  }
@@ -18,19 +18,16 @@ export type { Observation, ObservationEntry } from "./browser/tab-protocol";
18
18
  const DEFAULT_TAB_NAME = "main";
19
19
 
20
20
  const appSchema = z.object({
21
- path: z.string().describe("absolute path to a binary to spawn (single-instance reuse)").optional(),
22
- cdp_url: z.string().describe("existing CDP endpoint to connect to (e.g. http://127.0.0.1:9222)").optional(),
23
- args: z.array(z.string()).describe("extra CLI args when spawning").optional(),
24
- target: z.string().describe("substring matched against url+title to pick a BrowserWindow").optional(),
21
+ path: z.string().describe("binary path to spawn").optional(),
22
+ cdp_url: z.string().describe("existing cdp endpoint").optional(),
23
+ args: z.array(z.string()).describe("extra cli args").optional(),
24
+ target: z.string().describe("substring to pick a window").optional(),
25
25
  });
26
26
 
27
27
  const browserSchema = z.object({
28
- action: z.enum(["open", "close", "run"] as const).describe("tab/browser operation"),
29
- name: z
30
- .string()
31
- .describe("tab id; default 'main'. Multiple tabs can coexist; reusable across run() calls and subagents.")
32
- .optional(),
33
- url: z.string().describe("open: navigate after acquiring tab").optional(),
28
+ action: z.enum(["open", "close", "run"] as const).describe("operation"),
29
+ name: z.string().describe("tab id (default 'main')").optional(),
30
+ url: z.string().describe("url to open").optional(),
34
31
  app: appSchema.optional(),
35
32
  viewport: z
36
33
  .object({
@@ -41,21 +38,16 @@ const browserSchema = z.object({
41
38
  .optional(),
42
39
  wait_until: z
43
40
  .enum(["load", "domcontentloaded", "networkidle0", "networkidle2"] as const)
44
- .describe("navigation wait condition for url")
41
+ .describe("navigation wait condition")
45
42
  .optional(),
46
43
  dialogs: z
47
44
  .enum(["accept", "dismiss"] as const)
48
- .describe("open: auto-handle alert/confirm/beforeunload dialogs (default: leave for caller to handle)")
49
- .optional(),
50
- code: z
51
- .string()
52
- .describe(
53
- "run: JS body executed with `page`, `browser`, `tab`, `display`, `assert`, `wait` in scope. Treated as the body of an async function. Use `display(value)` to attach text/JSON/images; the function's return value is JSON-serialized as a final block.",
54
- )
45
+ .describe("auto-handle dialogs")
55
46
  .optional(),
47
+ code: z.string().describe("js body to run in tab").optional(),
56
48
  timeout: z.number().default(30).describe("timeout in seconds").optional(),
57
- all: z.boolean().describe("close: close every tab").optional(),
58
- kill: z.boolean().describe("close: also kill spawned-app browsers (default: leave running)").optional(),
49
+ all: z.boolean().describe("close every tab").optional(),
50
+ kill: z.boolean().describe("also kill spawned-app browsers").optional(),
59
51
  });
60
52
 
61
53
  /** Input schema for the browser tool. */
package/src/tools/eval.ts CHANGED
@@ -4,10 +4,8 @@ import type { Component } from "@oh-my-pi/pi-tui";
4
4
  import { Markdown, Text } from "@oh-my-pi/pi-tui";
5
5
  import { prompt } from "@oh-my-pi/pi-utils";
6
6
  import * as z from "zod/v4";
7
- import { jsBackend, parseEvalInput, pythonBackend, sniffEvalLanguage } from "../eval";
7
+ import { jsBackend, pythonBackend } from "../eval";
8
8
  import type { ExecutorBackend } from "../eval/backend";
9
- import evalGrammar from "../eval/eval.lark" with { type: "text" };
10
- import { ABORT_WARNING, type ParsedEvalCell } from "../eval/parse";
11
9
  import type { EvalCellResult, EvalDisplayOutput, EvalLanguage, EvalStatusEvent, EvalToolDetails } from "../eval/types";
12
10
  import type { RenderResultOptions } from "../extensibility/custom-tools/types";
13
11
  import { truncateToVisualLines } from "../modes/components/visual-truncate";
@@ -29,8 +27,27 @@ import { clampTimeout } from "./tool-timeouts";
29
27
 
30
28
  export const EVAL_DEFAULT_PREVIEW_LINES = 10;
31
29
 
30
+ /**
31
+ * Per-cell input. Each cell runs in order; state persists within a language
32
+ * across cells and across tool calls.
33
+ */
34
+ const evalCellSchema = z.object({
35
+ language: z.enum(["py", "js"]).describe('runtime: "py" for the IPython kernel, "js" for the persistent JS VM'),
36
+ code: z.string().describe("cell body, verbatim. Use top-level await freely."),
37
+ title: z.string().optional().describe('short label shown in transcript (e.g. "imports", "load config")'),
38
+ timeout: z.number().int().min(1).max(600).optional().describe("per-cell timeout in seconds (1-600, default 30)"),
39
+ reset: z
40
+ .boolean()
41
+ .optional()
42
+ .describe("wipe this cell's language kernel before running. Other languages are untouched."),
43
+ });
44
+ export type EvalCellInput = z.infer<typeof evalCellSchema>;
45
+
32
46
  export const evalSchema = z.object({
33
- input: z.string().describe('eval input as a sequence of `*** Cell <lang>:"title"` cell headers followed by code'),
47
+ cells: z
48
+ .array(evalCellSchema)
49
+ .min(1)
50
+ .describe("cells executed in order. State persists within each language across cells and tool calls."),
34
51
  });
35
52
  export type EvalToolParams = z.infer<typeof evalSchema>;
36
53
 
@@ -134,7 +151,6 @@ export interface EvalToolOptions {
134
151
 
135
152
  interface ResolvedBackend {
136
153
  backend: ExecutorBackend;
137
- fallback: boolean;
138
154
  notice?: string;
139
155
  }
140
156
 
@@ -166,51 +182,21 @@ function timeoutSecondsFromMs(timeoutMs: number): number {
166
182
  return clampTimeout("eval", timeoutMs / 1000);
167
183
  }
168
184
 
169
- async function resolveBackend(
170
- session: ToolSession,
171
- requested: EvalLanguage | undefined,
172
- code: string,
173
- ): Promise<ResolvedBackend> {
185
+ async function resolveBackend(session: ToolSession, language: EvalLanguage): Promise<ResolvedBackend> {
174
186
  const allowPy = (session.settings.get("eval.py") as boolean | undefined) ?? true;
175
187
  const allowJs = (session.settings.get("eval.js") as boolean | undefined) ?? true;
176
188
 
177
- if (requested === "python") {
189
+ if (language === "python") {
178
190
  if (!allowPy) throw new ToolError("Python backend is disabled (eval.py = false).");
179
191
  if (!(await pythonBackend.isAvailable(session))) {
180
192
  throw new ToolError(
181
193
  'Python backend is unavailable in this session. Pass language: "js" or install the python kernel.',
182
194
  );
183
195
  }
184
- return { backend: pythonBackend, fallback: false };
185
- }
186
- if (requested === "js") {
187
- if (!allowJs) throw new ToolError("JavaScript backend is disabled (eval.js = false).");
188
- return { backend: jsBackend, fallback: false };
189
- }
190
- // Auto-detect.
191
- const sniffed = sniffEvalLanguage(code);
192
- if (sniffed === "python" && allowPy && (await pythonBackend.isAvailable(session))) {
193
- return { backend: pythonBackend, fallback: false };
194
- }
195
- if (sniffed === "js" && allowJs) {
196
- return { backend: jsBackend, fallback: false };
196
+ return { backend: pythonBackend };
197
197
  }
198
-
199
- // Sniffer returned undefined or the preferred backend was disabled. Prefer
200
- // python when its kernel is up, else fall back to js.
201
- if (allowPy && (await pythonBackend.isAvailable(session))) {
202
- const notice =
203
- sniffed === "js" ? "JavaScript markers detected but eval.js is disabled; using Python." : undefined;
204
- return { backend: pythonBackend, fallback: false, notice };
205
- }
206
- if (allowJs) {
207
- const notice =
208
- sniffed === "python"
209
- ? "Python markers detected but the python kernel is unavailable; using JavaScript."
210
- : undefined;
211
- return { backend: jsBackend, fallback: true, notice };
212
- }
213
- throw new ToolError("No eval backend is available; enable eval.py or eval.js.");
198
+ if (!allowJs) throw new ToolError("JavaScript backend is disabled (eval.js = false).");
199
+ return { backend: jsBackend };
214
200
  }
215
201
 
216
202
  export class EvalTool implements AgentTool<typeof evalSchema> {
@@ -227,20 +213,15 @@ export class EvalTool implements AgentTool<typeof evalSchema> {
227
213
  readonly concurrency = "exclusive";
228
214
  readonly strict = true;
229
215
  readonly intent = (args: Partial<z.infer<typeof evalSchema>>): string | undefined => {
230
- const input = args.input;
231
- if (input) {
232
- try {
233
- const cells = parseEvalInput(input).cells;
234
- return cells.map(cell => cell.title || `running ${cell.language}`).join("\n");
235
- } catch {}
236
- }
237
- return "evaluating";
216
+ const cells = Array.isArray(args.cells) ? args.cells : [];
217
+ const first = cells.find(c => c && typeof c === "object");
218
+ if (!first) return "evaluating";
219
+ const title = typeof first.title === "string" ? first.title : undefined;
220
+ const language = typeof first.language === "string" ? first.language : "?";
221
+ const label = title || `running ${language}`;
222
+ return cells.length > 1 ? `${label} (+${cells.length - 1})` : label;
238
223
  };
239
224
 
240
- get customFormat(): { syntax: "lark"; definition: string } {
241
- return { syntax: "lark", definition: evalGrammar };
242
- }
243
-
244
225
  readonly #proxyExecutor?: EvalProxyExecutor;
245
226
 
246
227
  constructor(
@@ -266,19 +247,17 @@ export class EvalTool implements AgentTool<typeof evalSchema> {
266
247
  }
267
248
  const session = this.session;
268
249
 
269
- const parsedInput = parseEvalInput(params.input);
270
- let previousRuntimeLanguage: EvalLanguage | undefined;
271
250
  const cells: ResolvedEvalCell[] = [];
272
- for (const cell of parsedInput.cells) {
273
- const requested = cell.languageOrigin === "header" ? cell.language : (previousRuntimeLanguage ?? undefined);
274
- const resolved = await resolveBackend(session, requested, cell.code);
275
- previousRuntimeLanguage = resolved.backend.id;
251
+ for (let i = 0; i < params.cells.length; i++) {
252
+ const cell = params.cells[i];
253
+ const language: EvalLanguage = cell.language === "py" ? "python" : "js";
254
+ const resolved = await resolveBackend(session, language);
276
255
  cells.push({
277
- index: cell.index,
256
+ index: i,
278
257
  title: cell.title,
279
258
  code: cell.code,
280
- timeoutMs: cell.timeoutMs,
281
- reset: cell.reset,
259
+ timeoutMs: (cell.timeout ?? 30) * 1000,
260
+ reset: cell.reset ?? false,
282
261
  resolved,
283
262
  });
284
263
  }
@@ -462,11 +441,10 @@ export class EvalTool implements AgentTool<typeof evalSchema> {
462
441
  pushUpdate();
463
442
  const errorMsg = result.output || "Command aborted";
464
443
  const combinedOutput = cellOutputs.join("\n\n");
465
- const abortSuffix = parsedInput.aborted ? `\n\n${ABORT_WARNING}` : "";
466
444
  const outputText =
467
- (cells.length > 1
445
+ cells.length > 1
468
446
  ? `${combinedOutput}\n\nCell ${i + 1} aborted: ${errorMsg}`
469
- : combinedOutput || errorMsg) + abortSuffix;
447
+ : combinedOutput || errorMsg;
470
448
 
471
449
  const summaryForMeta = await summarizeFinal(combinedOutput, finalizeOutput);
472
450
  const details: EvalToolDetails = {
@@ -489,13 +467,12 @@ export class EvalTool implements AgentTool<typeof evalSchema> {
489
467
  cellResult.status = "error";
490
468
  pushUpdate();
491
469
  const combinedOutput = cellOutputs.join("\n\n");
492
- const abortSuffix = parsedInput.aborted ? `\n\n${ABORT_WARNING}` : "";
493
470
  const outputText =
494
- (cells.length > 1
471
+ cells.length > 1
495
472
  ? `${combinedOutput}\n\nCell ${i + 1} failed (exit code ${result.exitCode}). Earlier cells succeeded—their state persists. Fix only cell ${i + 1}.`
496
473
  : combinedOutput
497
474
  ? `${combinedOutput}\n\nCommand exited with code ${result.exitCode}`
498
- : `Command exited with code ${result.exitCode}`) + abortSuffix;
475
+ : `Command exited with code ${result.exitCode}`;
499
476
 
500
477
  const summaryForMeta = await summarizeFinal(combinedOutput, finalizeOutput);
501
478
  const details: EvalToolDetails = {
@@ -519,13 +496,12 @@ export class EvalTool implements AgentTool<typeof evalSchema> {
519
496
  }
520
497
 
521
498
  const combinedOutput = cellOutputs.join("\n\n");
522
- const abortSuffix = parsedInput.aborted ? `\n\n${ABORT_WARNING}` : "";
523
499
  const hasImages = images.length > 0;
524
500
  const outputText =
525
- (combinedOutput ||
526
- (hasImages
527
- ? `(displayed ${images.length} image${images.length === 1 ? "" : "s"}; no text output)`
528
- : "(no output)")) + abortSuffix;
501
+ combinedOutput ||
502
+ (hasImages
503
+ ? `(displayed ${images.length} image${images.length === 1 ? "" : "s"}; no text output)`
504
+ : "(no output)");
529
505
  const summaryForMeta = await summarizeFinal(combinedOutput, finalizeOutput);
530
506
 
531
507
  const details: EvalToolDetails = {
@@ -581,8 +557,14 @@ async function summarizeFinal(
581
557
  };
582
558
  }
583
559
 
560
+ interface EvalRenderCellArg {
561
+ language?: string;
562
+ code?: string;
563
+ title?: string;
564
+ }
565
+
584
566
  interface EvalRenderArgs {
585
- input?: string;
567
+ cells?: EvalRenderCellArg[];
586
568
  __partialJson?: string;
587
569
  }
588
570
 
@@ -593,27 +575,30 @@ interface EvalRenderContext {
593
575
  timeout?: number;
594
576
  }
595
577
 
596
- function decodePartialJsonStringFragment(fragment: string): string {
597
- let text = fragment.replace(/\\u[0-9a-fA-F]{0,3}$/, "");
598
- const trailingBackslashes = text.match(/\\+$/)?.[0].length ?? 0;
599
- if (trailingBackslashes % 2 === 1) text = text.slice(0, -1);
600
- try {
601
- return JSON.parse(`"${text}"`) as string;
602
- } catch {
603
- return text;
604
- }
578
+ interface EvalRenderCell {
579
+ language: EvalLanguage;
580
+ code: string;
581
+ title?: string;
605
582
  }
606
583
 
607
- function extractPartialJsonString(partialJson: string | undefined, key: string): string | undefined {
608
- if (!partialJson) return undefined;
609
- const pattern = new RegExp(`"${key}"\\s*:\\s*"((?:\\\\.|[^"\\\\])*)`, "u");
610
- const match = pattern.exec(partialJson);
611
- if (!match) return undefined;
612
- return decodePartialJsonStringFragment(match[1]);
584
+ function normalizeRenderLanguage(value: string | undefined): EvalLanguage {
585
+ return value === "js" ? "js" : "python";
613
586
  }
614
587
 
615
- function getRenderInput(args: EvalRenderArgs | undefined): string | undefined {
616
- return args?.input ?? extractPartialJsonString(args?.__partialJson, "input");
588
+ function getRenderCells(args: EvalRenderArgs | undefined): EvalRenderCell[] {
589
+ const raw = args?.cells;
590
+ if (!Array.isArray(raw)) return [];
591
+ const out: EvalRenderCell[] = [];
592
+ for (const cell of raw) {
593
+ if (!cell || typeof cell !== "object") continue;
594
+ const code = typeof cell.code === "string" ? cell.code : "";
595
+ out.push({
596
+ language: normalizeRenderLanguage(typeof cell.language === "string" ? cell.language : undefined),
597
+ code,
598
+ title: typeof cell.title === "string" ? cell.title : undefined,
599
+ });
600
+ }
601
+ return out;
617
602
  }
618
603
 
619
604
  /** Format a status event as a single line for display. */
@@ -861,15 +846,7 @@ function formatCellOutputLines(
861
846
 
862
847
  export const evalToolRenderer = {
863
848
  renderCall(args: EvalRenderArgs, _options: RenderResultOptions, uiTheme: Theme): Component {
864
- const input = getRenderInput(args);
865
- let cells: ParsedEvalCell[] = [];
866
- if (input) {
867
- try {
868
- cells = parseEvalInput(input).cells;
869
- } catch {
870
- cells = [];
871
- }
872
- }
849
+ const cells = getRenderCells(args);
873
850
 
874
851
  if (cells.length === 0) {
875
852
  const promptSym = uiTheme.fg("accent", ">>>");
@@ -881,7 +858,7 @@ export const evalToolRenderer = {
881
858
 
882
859
  return {
883
860
  render: (width: number): string[] => {
884
- const key = `${input?.length ?? 0}`;
861
+ const key = cells.map(c => `${c.language}:${c.title ?? ""}:${c.code.length}`).join("|");
885
862
  if (cached && cached.key === key && cached.width === width) {
886
863
  return cached.result;
887
864
  }
package/src/tools/gh.ts CHANGED
@@ -213,58 +213,34 @@ const githubSchema = z
213
213
  "run_watch",
214
214
  ] as const)
215
215
  .describe("github operation"),
216
- repo: z.string().describe("owner/repo (any op)").optional(),
217
- branch: z.string().describe("branch (repo_view, pr_push local branch, run_watch)").optional(),
216
+ repo: z.string().describe("owner/repo").optional(),
217
+ branch: z.string().describe("branch").optional(),
218
218
  pr: z
219
219
  .union([z.string(), z.array(z.string())])
220
- .describe(
221
- "pr number, url, or branch (pr_checkout); pass an array to batch-process multiple pull requests in one call",
222
- )
223
- .optional(),
224
- force: z.boolean().describe("reset existing local branch (pr_checkout)").optional(),
225
- forceWithLease: z.boolean().describe("force-with-lease push (pr_push)").optional(),
226
- title: z.string().describe("PR title (pr_create)").optional(),
227
- body: z.string().describe("PR body markdown (pr_create); mutually exclusive with fill").optional(),
228
- base: z.string().describe("PR base branch (pr_create); defaults to repo default branch").optional(),
229
- head: z.string().describe("PR head branch (pr_create); defaults to current branch").optional(),
230
- draft: z.boolean().describe("open PR as draft (pr_create)").optional(),
231
- fill: z
232
- .boolean()
233
- .describe("auto-fill PR title/body from commits (pr_create); mutually exclusive with title/body")
234
- .optional(),
235
- reviewer: z.array(z.string()).describe("reviewers to request (pr_create); accepts users or org/team").optional(),
236
- assignee: z.array(z.string()).describe("assignees (pr_create); use @me for the authenticated user").optional(),
237
- label: z.array(z.string()).describe("labels to apply (pr_create)").optional(),
238
- query: z
239
- .string()
240
- .describe("search query (search_issues, search_prs, search_code, search_commits, search_repos)")
241
- .optional(),
242
- since: z
243
- .string()
244
- .describe(
245
- "lower-bound date for search_issues/search_prs/search_commits/search_repos. Accepts a relative duration (`<n><unit>` with unit `m`/`h`/`d`/`w`/`mo`/`y`, e.g. `3d`, `12h`, `2w`) or an ISO date (`YYYY-MM-DD`) / datetime. Translated to a `created:>=…` (or `committer-date:`/`pushed:`) qualifier; not supported by search_code.",
246
- )
247
- .optional(),
248
- until: z
249
- .string()
250
- .describe(
251
- "upper-bound date in the same format as `since`. With both, builds a `field:since..until` range qualifier.",
252
- )
220
+ .describe("pr number, url, or branch")
253
221
  .optional(),
222
+ force: z.boolean().describe("reset existing local branch").optional(),
223
+ forceWithLease: z.boolean().describe("force-with-lease push").optional(),
224
+ title: z.string().describe("pr title").optional(),
225
+ body: z.string().describe("pr body markdown").optional(),
226
+ base: z.string().describe("pr base branch").optional(),
227
+ head: z.string().describe("pr head branch").optional(),
228
+ draft: z.boolean().describe("open pr as draft").optional(),
229
+ fill: z.boolean().describe("auto-fill pr title/body from commits").optional(),
230
+ reviewer: z.array(z.string()).describe("reviewers").optional(),
231
+ assignee: z.array(z.string()).describe("assignees").optional(),
232
+ label: z.array(z.string()).describe("labels").optional(),
233
+ query: z.string().describe("search query").optional(),
234
+ since: z.string().describe("lower-bound date filter").optional(),
235
+ until: z.string().describe("upper-bound date filter").optional(),
254
236
  dateField: z
255
237
  .enum(["created", "updated"] as const)
256
- .describe(
257
- "date field used by `since`/`until`. issues/prs: `created` (default) or `updated`. repos: `created` (default) or `updated` (mapped to GitHub's `pushed:`). commits: ignored — always uses `committer-date`.",
258
- )
238
+ .describe("date field")
259
239
  .default("created")
260
240
  .optional(),
261
- limit: z
262
- .number()
263
- .default(10)
264
- .describe("max results (search_issues, search_prs, search_code, search_commits, search_repos)")
265
- .optional(),
266
- run: z.string().describe("actions run id or url (run_watch)").optional(),
267
- tail: z.number().default(15).describe("log lines per failed job (run_watch)").optional(),
241
+ limit: z.number().default(10).describe("max results").optional(),
242
+ run: z.string().describe("actions run id or url").optional(),
243
+ tail: z.number().default(15).describe("log lines per failed job").optional(),
268
244
  })
269
245
  .strict();
270
246
 
@@ -6,7 +6,7 @@ import recallDescription from "../prompts/tools/recall.md" with { type: "text" }
6
6
  import type { ToolSession } from ".";
7
7
 
8
8
  const hindsightRecallSchema = z.object({
9
- query: z.string().describe("Natural language search query. Be specific about what you need to know."),
9
+ query: z.string().describe("natural language search query"),
10
10
  });
11
11
 
12
12
  export type HindsightRecallParams = z.infer<typeof hindsightRecallSchema>;
@@ -6,8 +6,8 @@ import reflectDescription from "../prompts/tools/reflect.md" with { type: "text"
6
6
  import type { ToolSession } from ".";
7
7
 
8
8
  const hindsightReflectSchema = z.object({
9
- query: z.string().describe("The question to answer using long-term memory."),
10
- context: z.string().describe("Optional additional context to guide the reflection.").optional(),
9
+ query: z.string().describe("question to answer"),
10
+ context: z.string().describe("optional context").optional(),
11
11
  });
12
12
 
13
13
  export type HindsightReflectParams = z.infer<typeof hindsightReflectSchema>;
@@ -7,16 +7,12 @@ const hindsightRetainSchema = z.object({
7
7
  items: z
8
8
  .array(
9
9
  z.object({
10
- content: z
11
- .string()
12
- .describe("The information to remember. Be specific and self-contained — include who, what, when, why."),
13
- context: z.string().describe("Optional context describing where this information came from.").optional(),
10
+ content: z.string().describe("information to remember"),
11
+ context: z.string().describe("source context").optional(),
14
12
  }),
15
13
  )
16
14
  .min(1)
17
- .describe(
18
- "One or more memories to retain. Batch related facts in a single call rather than calling retain repeatedly — they are deduplicated and consolidated together.",
19
- ),
15
+ .describe("memories to retain"),
20
16
  });
21
17
 
22
18
  export type HindsightRetainParams = z.infer<typeof hindsightRetainSchema>;
@@ -514,7 +514,14 @@ export async function createTools(session: ToolSession, toolNames?: string[]): P
514
514
  // Injected unconditionally into every agent, regardless of requested tool list.
515
515
  const autoQA = isAutoQaEnabled(session.settings);
516
516
  if (autoQA && !tools.some(t => t.name === "report_tool_issue")) {
517
- const qaTool = await HIDDEN_TOOLS.report_tool_issue(session);
517
+ // Build the enum from tools we just constructed via BUILTIN_TOOLS / HIDDEN_TOOLS.
518
+ // Extension overrides (e.g. a user's custom `bash`) get added later by
519
+ // other code paths, so they're absent here — exactly what we want; MCP /
520
+ // extension tools never end up in the report enum.
521
+ const activeBuiltinNames = tools
522
+ .map(t => t.name)
523
+ .filter(name => (name in BUILTIN_TOOLS || name in HIDDEN_TOOLS) && name !== "report_tool_issue");
524
+ const qaTool = createReportToolIssueTool(session, activeBuiltinNames);
518
525
  if (qaTool) {
519
526
  tools.push(wrapToolWithMetaNotice(qaTool));
520
527
  }
@@ -1,4 +1,5 @@
1
1
  import type { AgentTool, AgentToolContext, AgentToolResult, AgentToolUpdateCallback } from "@oh-my-pi/pi-agent-core";
2
+ import { instrumentedCompleteSimple, resolveTelemetry } from "@oh-my-pi/pi-agent-core";
2
3
  import { type Api, completeSimple, type Model } from "@oh-my-pi/pi-ai";
3
4
  import { prompt } from "@oh-my-pi/pi-utils";
4
5
  import * as z from "zod/v4";
@@ -118,7 +119,8 @@ export class InspectImageTool implements AgentTool<typeof inspectImageSchema, In
118
119
  throw new ToolError("inspect_image only supports PNG, JPEG, GIF, and WEBP files detected by file content.");
119
120
  }
120
121
 
121
- const response = await this.completeImageRequest(
122
+ const telemetry = resolveTelemetry(this.session.getTelemetry?.(), this.session.getSessionId?.() ?? undefined);
123
+ const response = await instrumentedCompleteSimple(
122
124
  model,
123
125
  {
124
126
  systemPrompt: [prompt.render(inspectImageSystemPromptTemplate)],
@@ -134,6 +136,7 @@ export class InspectImageTool implements AgentTool<typeof inspectImageSchema, In
134
136
  ],
135
137
  },
136
138
  { apiKey, signal },
139
+ { telemetry, oneshotKind: "inspect_image", completeImpl: this.completeImageRequest },
137
140
  );
138
141
 
139
142
  if (response.stopReason === "error") {
package/src/tools/irc.ts CHANGED
@@ -26,18 +26,10 @@ import type { AgentRef, AgentRegistry } from "../registry/agent-registry";
26
26
  import type { ToolSession } from ".";
27
27
 
28
28
  const ircSchema = z.object({
29
- op: z
30
- .union([
31
- z.literal("send").describe("Send a message to one peer or to all peers"),
32
- z.literal("list").describe("List currently visible peers"),
33
- ])
34
- .describe("IRC operation"),
35
- to: z.string().optional().describe('Recipient agent id (e.g. "0-Main", "0-AuthLoader") or "all" to broadcast'),
36
- message: z.string().optional().describe("Message body to deliver"),
37
- awaitReply: z
38
- .boolean()
39
- .optional()
40
- .describe("Wait for the recipient's prose reply (default: true for DM, false for broadcast)"),
29
+ op: z.enum(["send", "list"]).describe("irc operation"),
30
+ to: z.string().optional().describe('recipient agent id or "all"'),
31
+ message: z.string().optional().describe("message body"),
32
+ awaitReply: z.boolean().optional().describe("wait for prose reply"),
41
33
  });
42
34
 
43
35
  type IrcParams = z.infer<typeof ircSchema>;