@oh-my-pi/pi-coding-agent 15.10.3 → 15.10.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. package/CHANGELOG.md +72 -0
  2. package/dist/types/capability/rule-buckets.d.ts +1 -1
  3. package/dist/types/capability/rule.d.ts +6 -1
  4. package/dist/types/cli/update-cli.d.ts +11 -1
  5. package/dist/types/config/model-registry.d.ts +18 -1
  6. package/dist/types/discovery/at-imports.d.ts +15 -0
  7. package/dist/types/edit/diff.d.ts +3 -2
  8. package/dist/types/eval/__tests__/helpers-local-roots.test.d.ts +1 -0
  9. package/dist/types/eval/__tests__/js-context-manager.test.d.ts +1 -0
  10. package/dist/types/eval/backend.d.ts +7 -0
  11. package/dist/types/eval/bridge-timeout.d.ts +1 -1
  12. package/dist/types/eval/{llm-bridge.d.ts → completion-bridge.d.ts} +8 -8
  13. package/dist/types/eval/idle-timeout.d.ts +1 -1
  14. package/dist/types/eval/js/context-manager.d.ts +1 -0
  15. package/dist/types/eval/js/executor.d.ts +2 -0
  16. package/dist/types/eval/js/index.d.ts +1 -1
  17. package/dist/types/eval/js/shared/helpers.d.ts +6 -0
  18. package/dist/types/eval/js/shared/runtime.d.ts +5 -0
  19. package/dist/types/eval/js/worker-protocol.d.ts +6 -0
  20. package/dist/types/eval/py/executor.d.ts +7 -0
  21. package/dist/types/eval/py/index.d.ts +1 -1
  22. package/dist/types/export/ttsr.d.ts +14 -0
  23. package/dist/types/extensibility/extensions/types.d.ts +8 -1
  24. package/dist/types/extensibility/legacy-pi-ai-shim.d.ts +1 -1
  25. package/dist/types/internal-urls/local-protocol.d.ts +10 -0
  26. package/dist/types/mcp/oauth-flow.d.ts +2 -2
  27. package/dist/types/modes/components/custom-editor.d.ts +3 -0
  28. package/dist/types/modes/components/{status-line.d.ts → status-line/component.d.ts} +2 -32
  29. package/dist/types/modes/components/status-line/index.d.ts +1 -0
  30. package/dist/types/modes/components/status-line/types.d.ts +31 -2
  31. package/dist/types/modes/image-references.d.ts +8 -3
  32. package/dist/types/modes/interactive-mode.d.ts +1 -1
  33. package/dist/types/modes/theme/theme.d.ts +2 -1
  34. package/dist/types/modes/types.d.ts +2 -1
  35. package/dist/types/modes/utils/ui-helpers.d.ts +2 -2
  36. package/dist/types/session/agent-session.d.ts +0 -2
  37. package/dist/types/tools/ask.d.ts +1 -0
  38. package/dist/types/tools/browser/tab-worker.d.ts +15 -0
  39. package/dist/types/tools/index.d.ts +17 -0
  40. package/dist/types/tools/render-utils.d.ts +1 -1
  41. package/dist/types/tools/tool-timeouts.d.ts +1 -1
  42. package/dist/types/utils/block-context.d.ts +35 -0
  43. package/dist/types/utils/image-loading.d.ts +12 -0
  44. package/package.json +29 -9
  45. package/src/capability/rule-buckets.ts +4 -2
  46. package/src/capability/rule.ts +10 -1
  47. package/src/cli/auth-broker-cli.ts +6 -7
  48. package/src/cli/auth-gateway-cli.ts +1 -1
  49. package/src/cli/list-models.ts +5 -0
  50. package/src/cli/update-cli.ts +138 -16
  51. package/src/config/model-registry.ts +81 -2
  52. package/src/debug/index.ts +4 -8
  53. package/src/discovery/at-imports.ts +273 -0
  54. package/src/discovery/builtin-rules/index.ts +4 -0
  55. package/src/discovery/builtin-rules/ts-no-test-timers.md +55 -0
  56. package/src/discovery/builtin-rules/ts-redundant-clear-guard.md +75 -0
  57. package/src/discovery/helpers.ts +2 -1
  58. package/src/edit/diff.ts +114 -4
  59. package/src/edit/hashline/diff.ts +1 -1
  60. package/src/edit/hashline/execute.ts +1 -1
  61. package/src/edit/modes/patch.ts +6 -2
  62. package/src/edit/modes/replace.ts +1 -1
  63. package/src/edit/renderer.ts +12 -2
  64. package/src/eval/__tests__/agent-bridge.test.ts +13 -0
  65. package/src/eval/__tests__/{llm-bridge.test.ts → completion-bridge.test.ts} +60 -54
  66. package/src/eval/__tests__/helpers-local-roots.test.ts +58 -0
  67. package/src/eval/__tests__/js-context-manager.test.ts +241 -0
  68. package/src/eval/agent-bridge.ts +6 -1
  69. package/src/eval/backend.ts +15 -0
  70. package/src/eval/bridge-timeout.ts +1 -1
  71. package/src/eval/{llm-bridge.ts → completion-bridge.ts} +30 -27
  72. package/src/eval/idle-timeout.ts +1 -1
  73. package/src/eval/js/context-manager.ts +70 -8
  74. package/src/eval/js/executor.ts +3 -0
  75. package/src/eval/js/index.ts +7 -1
  76. package/src/eval/js/shared/helpers.ts +53 -6
  77. package/src/eval/js/shared/prelude.txt +4 -4
  78. package/src/eval/js/shared/runtime.ts +8 -0
  79. package/src/eval/js/tool-bridge.ts +3 -3
  80. package/src/eval/js/worker-core.ts +1 -0
  81. package/src/eval/js/worker-entry.ts +6 -0
  82. package/src/eval/js/worker-protocol.ts +6 -0
  83. package/src/eval/py/executor.ts +12 -0
  84. package/src/eval/py/index.ts +7 -1
  85. package/src/eval/py/prelude.py +46 -7
  86. package/src/eval/py/runner.py +1 -0
  87. package/src/exa/render.ts +1 -1
  88. package/src/export/ttsr.ts +122 -1
  89. package/src/extensibility/extensions/types.ts +8 -1
  90. package/src/extensibility/legacy-pi-ai-shim.ts +1 -1
  91. package/src/extensibility/plugins/doctor.ts +1 -1
  92. package/src/extensibility/plugins/legacy-pi-compat.ts +6 -5
  93. package/src/goals/tools/goal-tool.ts +1 -1
  94. package/src/internal-urls/docs-index.generated.ts +8 -6
  95. package/src/internal-urls/local-protocol.ts +13 -0
  96. package/src/lsp/render.ts +8 -6
  97. package/src/mcp/oauth-flow.ts +3 -3
  98. package/src/mcp/render.ts +7 -1
  99. package/src/modes/components/custom-editor.ts +12 -6
  100. package/src/modes/components/login-dialog.ts +1 -1
  101. package/src/modes/components/oauth-selector.ts +4 -4
  102. package/src/modes/components/read-tool-group.ts +10 -3
  103. package/src/modes/components/{status-line.ts → status-line/component.ts} +18 -40
  104. package/src/modes/components/status-line/index.ts +1 -0
  105. package/src/modes/components/status-line/types.ts +23 -8
  106. package/src/modes/components/tips.txt +1 -1
  107. package/src/modes/components/tool-execution.ts +1 -1
  108. package/src/modes/components/transcript-container.ts +17 -10
  109. package/src/modes/components/user-message.ts +6 -3
  110. package/src/modes/components/welcome.ts +1 -1
  111. package/src/modes/controllers/extension-ui-controller.ts +143 -127
  112. package/src/modes/controllers/input-controller.ts +36 -10
  113. package/src/modes/controllers/mcp-command-controller.ts +28 -12
  114. package/src/modes/controllers/selector-controller.ts +4 -11
  115. package/src/modes/controllers/ssh-command-controller.ts +2 -2
  116. package/src/modes/image-references.ts +13 -7
  117. package/src/modes/interactive-mode.ts +2 -2
  118. package/src/modes/rpc/rpc-mode.ts +1 -1
  119. package/src/modes/setup-wizard/scenes/sign-in.ts +3 -11
  120. package/src/modes/theme/theme.ts +95 -1
  121. package/src/modes/types.ts +2 -1
  122. package/src/modes/utils/ui-helpers.ts +14 -5
  123. package/src/prompts/system/tiny-title-system.md +1 -1
  124. package/src/prompts/system/title-system.md +16 -3
  125. package/src/prompts/system/workflow-notice.md +1 -1
  126. package/src/prompts/tools/bash.md +1 -1
  127. package/src/prompts/tools/eval.md +6 -6
  128. package/src/sdk.ts +31 -14
  129. package/src/session/agent-session.ts +213 -155
  130. package/src/session/session-manager.ts +1 -1
  131. package/src/slash-commands/builtin-registry.ts +1 -1
  132. package/src/system-prompt.ts +15 -9
  133. package/src/task/render.ts +20 -8
  134. package/src/tools/ask.ts +14 -5
  135. package/src/tools/bash-interactive.ts +1 -1
  136. package/src/tools/bash.ts +14 -2
  137. package/src/tools/browser/render.ts +5 -2
  138. package/src/tools/browser/tab-worker.ts +211 -91
  139. package/src/tools/debug.ts +5 -2
  140. package/src/tools/eval-render.ts +8 -5
  141. package/src/tools/eval.ts +2 -2
  142. package/src/tools/gh-renderer.ts +29 -15
  143. package/src/tools/index.ts +32 -0
  144. package/src/tools/inspect-image-renderer.ts +12 -5
  145. package/src/tools/job.ts +9 -6
  146. package/src/tools/memory-render.ts +19 -5
  147. package/src/tools/read.ts +165 -18
  148. package/src/tools/render-utils.ts +3 -1
  149. package/src/tools/resolve.ts +1 -1
  150. package/src/tools/review.ts +1 -1
  151. package/src/tools/ssh.ts +4 -1
  152. package/src/tools/todo.ts +8 -1
  153. package/src/tools/tool-timeouts.ts +1 -1
  154. package/src/tools/write.ts +1 -1
  155. package/src/tui/code-cell.ts +1 -1
  156. package/src/utils/block-context.ts +312 -0
  157. package/src/utils/image-loading.ts +31 -1
  158. package/src/utils/title-generator.ts +2 -2
  159. package/src/web/search/providers/codex.ts +1 -1
  160. package/src/web/search/render.ts +14 -6
  161. /package/dist/types/eval/__tests__/{llm-bridge.test.d.ts → completion-bridge.test.d.ts} +0 -0
package/src/edit/diff.ts CHANGED
@@ -6,6 +6,7 @@
6
6
  */
7
7
  import * as Diff from "diff";
8
8
  import { resolveToCwd } from "../tools/path-utils";
9
+ import { type BlockContextSource, findBlockContextLines } from "../utils/block-context";
9
10
  import { DEFAULT_FUZZY_THRESHOLD, EditMatchError, findMatch } from "./modes/replace";
10
11
  import { adjustIndentation, normalizeToLF, stripBom } from "./normalize";
11
12
  import { readEditFileText } from "./read-file";
@@ -54,11 +55,109 @@ function formatNumberedDiffLine(prefix: "+" | "-" | " ", lineNum: number, conten
54
55
  return `${prefix}${lineNum}|${content}`;
55
56
  }
56
57
 
58
+ type DiffSource = "old" | "new";
59
+
60
+ interface ParsedNumberedDiffRow {
61
+ prefix: "+" | "-" | " ";
62
+ lineNumber: number;
63
+ content: string;
64
+ source: DiffSource;
65
+ }
66
+
67
+ function parseNumberedDiffRow(row: string): ParsedNumberedDiffRow | undefined {
68
+ const match = /^([+\- ])(\d+)\|(.*)$/s.exec(row);
69
+ if (!match) return undefined;
70
+ const prefix = match[1] as "+" | "-" | " ";
71
+ const lineNumber = Number.parseInt(match[2], 10);
72
+ if (!Number.isFinite(lineNumber)) return undefined;
73
+ return {
74
+ prefix,
75
+ lineNumber,
76
+ content: match[3] ?? "",
77
+ source: prefix === "+" ? "new" : "old",
78
+ };
79
+ }
80
+
81
+ function isDiffChangeRow(row: string | undefined): boolean {
82
+ return row !== undefined && (row.startsWith("+") || row.startsWith("-"));
83
+ }
84
+
85
+ function adjustedContextInsertIndex(rows: readonly string[], index: number): number {
86
+ let start = index;
87
+ while (start > 0 && isDiffChangeRow(rows[start - 1])) start--;
88
+ let end = index;
89
+ while (end < rows.length && isDiffChangeRow(rows[end])) end++;
90
+ return index > start && index < end ? end : index;
91
+ }
92
+
93
+ function insertBracketContextRows(
94
+ rows: string[],
95
+ source: DiffSource,
96
+ contextLines: ReadonlyMap<number, string>,
97
+ seenRows: Set<string>,
98
+ ): void {
99
+ const context = [...contextLines].sort(([left], [right]) => left - right);
100
+ for (const [lineNumber, text] of context) {
101
+ const row = formatNumberedDiffLine(" ", lineNumber, text);
102
+ if (seenRows.has(row)) continue;
103
+
104
+ let insertIndex = rows.length;
105
+ let previousSourceLine: number | undefined;
106
+ let nextSourceLine: number | undefined;
107
+ for (let i = 0; i < rows.length; i++) {
108
+ const parsed = parseNumberedDiffRow(rows[i]);
109
+ if (!parsed || parsed.source !== source) continue;
110
+ if (parsed.lineNumber < lineNumber) {
111
+ previousSourceLine = parsed.lineNumber;
112
+ continue;
113
+ }
114
+ nextSourceLine = parsed.lineNumber;
115
+ insertIndex = i;
116
+ break;
117
+ }
118
+
119
+ const chunk: string[] = [];
120
+ if (previousSourceLine !== undefined && lineNumber > previousSourceLine + 1) chunk.push("...");
121
+ chunk.push(row);
122
+ if (nextSourceLine !== undefined && nextSourceLine > lineNumber + 1) chunk.push("...");
123
+
124
+ const adjustedIndex = adjustedContextInsertIndex(rows, insertIndex);
125
+ rows.splice(adjustedIndex, 0, ...chunk);
126
+ for (const inserted of chunk) seenRows.add(inserted);
127
+ }
128
+ }
129
+
130
+ function addMatchingBracketContextRows(
131
+ rows: string[],
132
+ oldLines: readonly string[],
133
+ newLines: readonly string[],
134
+ source: BlockContextSource,
135
+ ): void {
136
+ const oldVisible: number[] = [];
137
+ const newVisible: number[] = [];
138
+ const seenRows = new Set(rows);
139
+
140
+ for (const row of rows) {
141
+ const parsed = parseNumberedDiffRow(row);
142
+ if (!parsed) continue;
143
+ if (parsed.source === "old") oldVisible.push(parsed.lineNumber);
144
+ else newVisible.push(parsed.lineNumber);
145
+ }
146
+
147
+ insertBracketContextRows(rows, "old", findBlockContextLines(oldLines, oldVisible, source), seenRows);
148
+ insertBracketContextRows(rows, "new", findBlockContextLines(newLines, newVisible, source), seenRows);
149
+ }
150
+
57
151
  /**
58
152
  * Generate a unified diff string with line numbers and context.
59
153
  * Returns both the diff string and the first changed line number (in the new file).
60
154
  */
61
- export function generateDiffString(oldContent: string, newContent: string, contextLines = 2): DiffResult {
155
+ export function generateDiffString(
156
+ oldContent: string,
157
+ newContent: string,
158
+ contextLines = 2,
159
+ source: BlockContextSource = {},
160
+ ): DiffResult {
62
161
  const parts = Diff.diffLines(oldContent, newContent);
63
162
  const output: string[] = [];
64
163
 
@@ -133,8 +232,10 @@ export function generateDiffString(oldContent: string, newContent: string, conte
133
232
  newLineNum++;
134
233
  }
135
234
 
235
+ // Mid-skip placeholder is omitted too: the jump between the trailing
236
+ // number of the leading context and the leading number of the
237
+ // trailing context conveys the gap, just like leading/trailing skips.
136
238
  if (middleSkip > 0) {
137
- output.push(formatNumberedDiffLine(" ", oldLineNum, "..."));
138
239
  oldLineNum += middleSkip;
139
240
  newLineNum += middleSkip;
140
241
  for (const line of linesToShow.slice(firstChunkLength)) {
@@ -160,6 +261,8 @@ export function generateDiffString(oldContent: string, newContent: string, conte
160
261
  }
161
262
  }
162
263
 
264
+ addMatchingBracketContextRows(output, oldContent.split("\n"), newContent.split("\n"), source);
265
+
163
266
  return { diff: output.join("\n"), firstChangedLine };
164
267
  }
165
268
 
@@ -187,7 +290,12 @@ export interface ReplaceResult {
187
290
  * Generate a unified diff string without file headers.
188
291
  * Returns both the diff string and the first changed line number (in the new file).
189
292
  */
190
- export function generateUnifiedDiffString(oldContent: string, newContent: string, contextLines = 3): DiffResult {
293
+ export function generateUnifiedDiffString(
294
+ oldContent: string,
295
+ newContent: string,
296
+ contextLines = 3,
297
+ source: BlockContextSource = {},
298
+ ): DiffResult {
191
299
  const patch = Diff.structuredPatch("", "", oldContent, newContent, "", "", { context: contextLines });
192
300
  const output: string[] = [];
193
301
  let firstChangedLine: number | undefined;
@@ -218,6 +326,8 @@ export function generateUnifiedDiffString(oldContent: string, newContent: string
218
326
  }
219
327
  }
220
328
 
329
+ addMatchingBracketContextRows(output, oldContent.split("\n"), newContent.split("\n"), source);
330
+
221
331
  return { diff: output.join("\n"), firstChangedLine };
222
332
  }
223
333
 
@@ -805,7 +915,7 @@ export async function computeEditDiff(
805
915
  };
806
916
  }
807
917
 
808
- return generateDiffString(normalizedContent, result.content);
918
+ return generateDiffString(normalizedContent, result.content, undefined, { path });
809
919
  } catch (err) {
810
920
  return { error: err instanceof Error ? err.message : String(err) };
811
921
  }
@@ -230,7 +230,7 @@ export async function computeHashlineSectionDiff(
230
230
  if (options.streaming) return buildStreamingSectionDiff(section, normalized);
231
231
  const result = applyPreviewEdits({ section, absolutePath, normalized, snapshots, options });
232
232
  if (normalized === result.text) return { error: `No changes would be made to ${section.path}.` };
233
- return generateDiffString(normalized, result.text);
233
+ return generateDiffString(normalized, result.text, undefined, { path: section.path });
234
234
  } catch (err) {
235
235
  return { error: err instanceof Error ? err.message : String(err) };
236
236
  }
@@ -97,7 +97,7 @@ function renderSection(result: PatchSectionResult, diagnostics: FileDiagnosticsR
97
97
  };
98
98
  }
99
99
 
100
- const diff = generateDiffString(result.before, result.after);
100
+ const diff = generateDiffString(result.before, result.after, undefined, { path: result.path });
101
101
  const preview = buildCompactDiffPreview(diff.diff);
102
102
  const meta = outputMeta()
103
103
  .diagnostics(diagnostics?.summary ?? "", diagnostics?.messages ?? [])
@@ -1571,7 +1571,9 @@ export async function computePatchDiff(
1571
1571
  if (!normalizedOld && !normalizedNew) {
1572
1572
  return { diff: "", firstChangedLine: undefined };
1573
1573
  }
1574
- return generateUnifiedDiffString(normalizedOld, normalizedNew);
1574
+ return generateUnifiedDiffString(normalizedOld, normalizedNew, undefined, {
1575
+ path: result.change.newPath ?? result.change.path,
1576
+ });
1575
1577
  } catch (err) {
1576
1578
  return { error: err instanceof Error ? err.message : String(err) };
1577
1579
  }
@@ -1785,7 +1787,9 @@ export async function executePatchSingle(
1785
1787
  if (result.change.type === "update" && result.change.oldContent && result.change.newContent) {
1786
1788
  const normalizedOld = normalizeToLF(stripBom(result.change.oldContent).text);
1787
1789
  const normalizedNew = normalizeToLF(stripBom(result.change.newContent).text);
1788
- diffResult = generateUnifiedDiffString(normalizedOld, normalizedNew);
1790
+ diffResult = generateUnifiedDiffString(normalizedOld, normalizedNew, undefined, {
1791
+ path: result.change.newPath ?? result.change.path,
1792
+ });
1789
1793
  }
1790
1794
 
1791
1795
  let resultText: string;
@@ -1078,7 +1078,7 @@ export async function executeReplaceSingle(
1078
1078
  );
1079
1079
  invalidateFsScanAfterWrite(absolutePath);
1080
1080
 
1081
- const diffResult = generateDiffString(normalizedContent, result.content);
1081
+ const diffResult = generateDiffString(normalizedContent, result.content, undefined, { path });
1082
1082
  const resultText =
1083
1083
  result.count > 1
1084
1084
  ? `Successfully replaced ${result.count} occurrences in ${path}.`
@@ -260,6 +260,7 @@ function renderEditHeader(
260
260
  uiTheme: Theme,
261
261
  options: {
262
262
  icon: "pending" | "success" | "error";
263
+ iconOverride?: string;
263
264
  spinnerFrame?: number;
264
265
  op?: Operation;
265
266
  rawPath: string;
@@ -279,8 +280,16 @@ function renderEditHeader(
279
280
  const formatted = formatEditDescription(options.rawPath, uiTheme, descriptionOptions);
280
281
  const suffix = `${options.statsSuffix ?? ""}${options.extraSuffix ?? ""}`;
281
282
  const buildHeader = (description: string): string =>
282
- renderStatusLine({ icon: options.icon, spinnerFrame: options.spinnerFrame, title, description }, uiTheme) +
283
- suffix;
283
+ renderStatusLine(
284
+ {
285
+ icon: options.icon,
286
+ iconOverride: options.iconOverride,
287
+ spinnerFrame: options.spinnerFrame,
288
+ title,
289
+ description,
290
+ },
291
+ uiTheme,
292
+ ) + suffix;
284
293
 
285
294
  const header = buildHeader(formatted.description);
286
295
  const overflow = visibleWidth(header) - editHeaderLabelBudget(width, uiTheme);
@@ -633,6 +642,7 @@ function renderSingleFileResult(
633
642
  const statsSuffix = headerDiff ? formatDiffStatsSuffix(headerDiff, uiTheme) : "";
634
643
  const header = renderEditHeader(width, uiTheme, {
635
644
  icon: isError ? "error" : "success",
645
+ iconOverride: !isError && !options.isPartial ? uiTheme.styledSymbol("tool.edit", "accent") : undefined,
636
646
  op,
637
647
  rawPath,
638
648
  rename,
@@ -205,6 +205,19 @@ describe("runEvalAgent", () => {
205
205
  expect(secondOptions.outputSchema).toBeUndefined();
206
206
  });
207
207
 
208
+ it("forces LSP off for bridge subagents even when task.enableLsp is on", async () => {
209
+ mockAgents();
210
+ const runSpy = vi.spyOn(taskExecutor, "runSubprocess").mockImplementation(async options => singleResult(options));
211
+ // makeSession() defaults to enableLsp: true and task.enableLsp: true.
212
+ const session = makeSession();
213
+
214
+ await runEvalAgent({ prompt: "hello" }, { session });
215
+
216
+ const options = runSpy.mock.calls[0]?.[0];
217
+ if (!options) throw new Error("runSubprocess was not called");
218
+ expect(options.enableLsp).toBe(false);
219
+ });
220
+
208
221
  it("maps successful and failed subagent results", async () => {
209
222
  mockAgents();
210
223
  const runSpy = vi.spyOn(taskExecutor, "runSubprocess");
@@ -10,10 +10,10 @@ import { Settings } from "../../config/settings";
10
10
  import type { ToolSession } from "../../tools";
11
11
  import { ToolError } from "../../tools/tool-errors";
12
12
  import { EVAL_TIMEOUT_PAUSE_OP, EVAL_TIMEOUT_RESUME_OP } from "../bridge-timeout";
13
+ import { runEvalCompletion } from "../completion-bridge";
13
14
  import { IdleTimeout } from "../idle-timeout";
14
15
  import { disposeAllVmContexts } from "../js/context-manager";
15
16
  import { executeJs } from "../js/executor";
16
- import { runEvalLlm } from "../llm-bridge";
17
17
  import { disposeAllKernelSessions, type PythonResult } from "../py/executor";
18
18
 
19
19
  function makeModel(provider: string, id: string, extra: Partial<Model<Api>> = {}): Model<Api> {
@@ -98,16 +98,19 @@ function assistant(opts: {
98
98
  };
99
99
  }
100
100
 
101
- async function runPythonLlmInSubprocess(options: { structured: boolean; tempDir: TempDir }): Promise<PythonResult> {
101
+ async function runPythonCompletionInSubprocess(options: {
102
+ structured: boolean;
103
+ tempDir: TempDir;
104
+ }): Promise<PythonResult> {
102
105
  const repoRoot = path.resolve(import.meta.dir, "../../../..");
103
- const scriptPath = path.join(options.tempDir.path(), "run-python-llm.ts");
104
- const resultPath = path.join(options.tempDir.path(), "python-llm-result.json");
106
+ const scriptPath = path.join(options.tempDir.path(), "run-python-completion.ts");
107
+ const resultPath = path.join(options.tempDir.path(), "python-completion-result.json");
105
108
  const aiPath = path.resolve(import.meta.dir, "../../../../ai/src/index.ts");
106
109
  const executorPath = path.resolve(import.meta.dir, "../py/executor.ts");
107
110
  const settingsPath = path.resolve(import.meta.dir, "../../config/settings.ts");
108
111
  const code = options.structured
109
- ? 'import json\nprint(json.dumps(llm("hi", schema={"type": "object"})))'
110
- : 'print(llm("hi", model="smol"))';
112
+ ? 'import json\nprint(json.dumps(completion("hi", schema={"type": "object"})))'
113
+ : 'print(completion("hi", model="smol"))';
111
114
  const responseContent = options.structured
112
115
  ? '[{ type: "toolCall", id: "tc-1", name: "respond", arguments: { ok: true } }]'
113
116
  : '[{ type: "text", text: "hello from python" }]';
@@ -153,7 +156,7 @@ vi.spyOn(ai, "completeSimple").mockResolvedValue({
153
156
  });
154
157
  const result = await executePython(${JSON.stringify(code)}, {
155
158
  cwd: ${JSON.stringify(options.tempDir.path())},
156
- sessionId: ${JSON.stringify(`py-llm:${options.structured ? "struct" : "plain"}`)},
159
+ sessionId: ${JSON.stringify(`py-completion:${options.structured ? "struct" : "plain"}`)},
157
160
  sessionFile: ${JSON.stringify(path.join(options.tempDir.path(), "session.jsonl"))},
158
161
  toolSession: session,
159
162
  kernelMode: "per-call",
@@ -165,11 +168,12 @@ process.exit(0);
165
168
  const child = await $`bun ${scriptPath}`.cwd(repoRoot).quiet().nothrow();
166
169
  const stdout = child.stdout.toString();
167
170
  const stderr = child.stderr.toString();
168
- if (child.exitCode !== 0) throw new Error(stderr || stdout || `Python llm subprocess exited with ${child.exitCode}`);
171
+ if (child.exitCode !== 0)
172
+ throw new Error(stderr || stdout || `Python completion subprocess exited with ${child.exitCode}`);
169
173
  return (await Bun.file(resultPath).json()) as PythonResult;
170
174
  }
171
175
 
172
- describe("runEvalLlm", () => {
176
+ describe("runEvalCompletion", () => {
173
177
  afterEach(() => {
174
178
  vi.restoreAllMocks();
175
179
  });
@@ -178,9 +182,9 @@ describe("runEvalLlm", () => {
178
182
  const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
179
183
  const session = makeSession();
180
184
 
181
- await runEvalLlm({ prompt: "q", model: "smol" }, { session });
182
- await runEvalLlm({ prompt: "q", model: "default" }, { session });
183
- await runEvalLlm({ prompt: "q", model: "slow" }, { session });
185
+ await runEvalCompletion({ prompt: "q", model: "smol" }, { session });
186
+ await runEvalCompletion({ prompt: "q", model: "default" }, { session });
187
+ await runEvalCompletion({ prompt: "q", model: "slow" }, { session });
184
188
 
185
189
  const resolved = spy.mock.calls.map(call => {
186
190
  const model = call[0] as Model<Api>;
@@ -193,7 +197,7 @@ describe("runEvalLlm", () => {
193
197
  const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
194
198
  const session = makeSession({ available: [SMOL, DEFAULT, SLOW], activeModel: "p/slow" });
195
199
 
196
- await runEvalLlm({ prompt: "q", model: "default" }, { session });
200
+ await runEvalCompletion({ prompt: "q", model: "default" }, { session });
197
201
 
198
202
  const model = spy.mock.calls[0]?.[0] as Model<Api>;
199
203
  expect(`${model.provider}/${model.id}`).toBe("p/slow");
@@ -201,7 +205,7 @@ describe("runEvalLlm", () => {
201
205
 
202
206
  it("returns the completion text in plain mode", async () => {
203
207
  vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "the answer" }));
204
- const result = await runEvalLlm({ prompt: "q", model: "smol" }, { session: makeSession() });
208
+ const result = await runEvalCompletion({ prompt: "q", model: "smol" }, { session: makeSession() });
205
209
  expect(result.text).toBe("the answer");
206
210
  expect(result.details).toEqual({ model: "p/smol", tier: "smol", structured: false });
207
211
  });
@@ -209,10 +213,10 @@ describe("runEvalLlm", () => {
209
213
  it("supplies a non-empty systemPrompt when system is omitted (codex 'Instructions are required' guard)", async () => {
210
214
  // The openai-codex Responses transformer drops `instructions` when no
211
215
  // system prompt is provided, and the remote endpoint then 400s with
212
- // "Instructions are required". runEvalLlm must always carry a non-empty
213
- // systemPrompt so `llm("…")` without a `system` argument works.
216
+ // "Instructions are required". runEvalCompletion must always carry a non-empty
217
+ // systemPrompt so `completion("…")` without a `system` argument works.
214
218
  const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
215
- await runEvalLlm({ prompt: "q", model: "smol" }, { session: makeSession() });
219
+ await runEvalCompletion({ prompt: "q", model: "smol" }, { session: makeSession() });
216
220
  const ctx = spy.mock.calls[0]?.[1] as { systemPrompt?: string[] };
217
221
  expect(ctx.systemPrompt).toBeDefined();
218
222
  expect(ctx.systemPrompt?.length).toBeGreaterThan(0);
@@ -221,7 +225,7 @@ describe("runEvalLlm", () => {
221
225
 
222
226
  it("honors an explicit system prompt instead of overriding it", async () => {
223
227
  const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
224
- await runEvalLlm({ prompt: "q", model: "smol", system: "Be terse." }, { session: makeSession() });
228
+ await runEvalCompletion({ prompt: "q", model: "smol", system: "Be terse." }, { session: makeSession() });
225
229
  const ctx = spy.mock.calls[0]?.[1] as { systemPrompt?: string[] };
226
230
  expect(ctx.systemPrompt).toEqual(["Be terse."]);
227
231
  });
@@ -230,7 +234,7 @@ describe("runEvalLlm", () => {
230
234
  const spy = vi
231
235
  .spyOn(ai, "completeSimple")
232
236
  .mockResolvedValue(assistant({ toolCall: { name: "respond", arguments: { answer: 42 } } }));
233
- const result = await runEvalLlm(
237
+ const result = await runEvalCompletion(
234
238
  { prompt: "q", model: "smol", schema: { type: "object", properties: { answer: { type: "number" } } } },
235
239
  { session: makeSession() },
236
240
  );
@@ -246,7 +250,7 @@ describe("runEvalLlm", () => {
246
250
 
247
251
  it("falls back to JSON embedded in text when the model skips the respond tool", async () => {
248
252
  vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: 'here: {"answer": 7}' }));
249
- const result = await runEvalLlm(
253
+ const result = await runEvalCompletion(
250
254
  { prompt: "q", model: "smol", schema: { type: "object" } },
251
255
  { session: makeSession() },
252
256
  );
@@ -257,8 +261,8 @@ describe("runEvalLlm", () => {
257
261
  const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
258
262
  const session = makeSession({ available: [SMOL, DEFAULT, REASONING_SLOW] });
259
263
 
260
- await runEvalLlm({ prompt: "q", model: "smol" }, { session });
261
- await runEvalLlm({ prompt: "q", model: "slow" }, { session });
264
+ await runEvalCompletion({ prompt: "q", model: "smol" }, { session });
265
+ await runEvalCompletion({ prompt: "q", model: "slow" }, { session });
262
266
 
263
267
  const smolOpts = spy.mock.calls[0]?.[2] as { reasoning?: unknown };
264
268
  const slowOpts = spy.mock.calls[1]?.[2] as { reasoning?: unknown };
@@ -269,47 +273,49 @@ describe("runEvalLlm", () => {
269
273
  it("does not request reasoning for the slow tier on a non-reasoning model", async () => {
270
274
  const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
271
275
  // SLOW is reasoning:false — must not trip requireSupportedEffort downstream.
272
- const result = await runEvalLlm({ prompt: "q", model: "slow" }, { session: makeSession() });
276
+ const result = await runEvalCompletion({ prompt: "q", model: "slow" }, { session: makeSession() });
273
277
  expect(result.text).toBe("ok");
274
278
  const opts = spy.mock.calls[0]?.[2] as { reasoning?: unknown };
275
279
  expect(opts.reasoning).toBeUndefined();
276
280
  });
277
281
 
278
282
  it("throws ToolError on invalid arguments", async () => {
279
- await expect(runEvalLlm({ prompt: "" }, { session: makeSession() })).rejects.toBeInstanceOf(ToolError);
280
- await expect(runEvalLlm({ prompt: "q", model: "huge" }, { session: makeSession() })).rejects.toBeInstanceOf(
281
- ToolError,
282
- );
283
+ await expect(runEvalCompletion({ prompt: "" }, { session: makeSession() })).rejects.toBeInstanceOf(ToolError);
284
+ await expect(
285
+ runEvalCompletion({ prompt: "q", model: "huge" }, { session: makeSession() }),
286
+ ).rejects.toBeInstanceOf(ToolError);
283
287
  });
284
288
 
285
289
  it("throws ToolError when no model resolves for the tier", async () => {
286
290
  const session = makeSession({ available: [DEFAULT], roles: { smol: "missing/model" } });
287
- await expect(runEvalLlm({ prompt: "q", model: "smol" }, { session })).rejects.toBeInstanceOf(ToolError);
291
+ await expect(runEvalCompletion({ prompt: "q", model: "smol" }, { session })).rejects.toBeInstanceOf(ToolError);
288
292
  });
289
293
 
290
294
  it("throws ToolError when the resolved model has no API key", async () => {
291
295
  const session = makeSession({ apiKey: null });
292
- await expect(runEvalLlm({ prompt: "q", model: "smol" }, { session })).rejects.toBeInstanceOf(ToolError);
296
+ await expect(runEvalCompletion({ prompt: "q", model: "smol" }, { session })).rejects.toBeInstanceOf(ToolError);
293
297
  });
294
298
 
295
299
  it("maps error and aborted stop reasons to ToolError", async () => {
296
300
  vi.spyOn(ai, "completeSimple").mockResolvedValueOnce(assistant({ stopReason: "error", errorMessage: "boom" }));
297
- await expect(runEvalLlm({ prompt: "q", model: "smol" }, { session: makeSession() })).rejects.toThrow("boom");
301
+ await expect(runEvalCompletion({ prompt: "q", model: "smol" }, { session: makeSession() })).rejects.toThrow(
302
+ "boom",
303
+ );
298
304
 
299
305
  vi.spyOn(ai, "completeSimple").mockResolvedValueOnce(assistant({ stopReason: "aborted" }));
300
- await expect(runEvalLlm({ prompt: "q", model: "smol" }, { session: makeSession() })).rejects.toBeInstanceOf(
301
- ToolError,
302
- );
306
+ await expect(
307
+ runEvalCompletion({ prompt: "q", model: "smol" }, { session: makeSession() }),
308
+ ).rejects.toBeInstanceOf(ToolError);
303
309
  });
304
310
 
305
311
  it("throws ToolError when plain mode produces no text", async () => {
306
312
  vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "" }));
307
- await expect(runEvalLlm({ prompt: "q", model: "smol" }, { session: makeSession() })).rejects.toBeInstanceOf(
308
- ToolError,
309
- );
313
+ await expect(
314
+ runEvalCompletion({ prompt: "q", model: "smol" }, { session: makeSession() }),
315
+ ).rejects.toBeInstanceOf(ToolError);
310
316
  });
311
317
 
312
- it("pauses the idle watchdog while a slow llm() request is in flight", async () => {
318
+ it("pauses the idle watchdog while a slow completion() request is in flight", async () => {
313
319
  // A oneshot completion emits no status until it returns; delegated model
314
320
  // time must be invisible to the eval timeout budget.
315
321
  vi.spyOn(ai, "completeSimple").mockImplementation(async () => {
@@ -319,7 +325,7 @@ describe("runEvalLlm", () => {
319
325
 
320
326
  const ops: string[] = [];
321
327
  using idle = new IdleTimeout(60);
322
- const result = await runEvalLlm(
328
+ const result = await runEvalCompletion(
323
329
  { prompt: "q", model: "smol" },
324
330
  {
325
331
  session: makeSession(),
@@ -333,12 +339,12 @@ describe("runEvalLlm", () => {
333
339
  );
334
340
 
335
341
  expect(result.text).toBe("the answer");
336
- expect(ops).toEqual([EVAL_TIMEOUT_PAUSE_OP, EVAL_TIMEOUT_RESUME_OP, "llm"]);
342
+ expect(ops).toEqual([EVAL_TIMEOUT_PAUSE_OP, EVAL_TIMEOUT_RESUME_OP, "completion"]);
337
343
  expect(idle.signal.aborted).toBe(false);
338
344
  });
339
345
  });
340
346
 
341
- describe("llm() through eval runtimes", () => {
347
+ describe("completion() through eval runtimes", () => {
342
348
  afterEach(() => {
343
349
  vi.restoreAllMocks();
344
350
  });
@@ -348,13 +354,13 @@ describe("llm() through eval runtimes", () => {
348
354
  await disposeAllKernelSessions();
349
355
  });
350
356
 
351
- it("exposes llm() in the JavaScript runtime", async () => {
352
- using tempDir = TempDir.createSync("@omp-eval-llm-js-");
357
+ it("exposes completion() in the JavaScript runtime", async () => {
358
+ using tempDir = TempDir.createSync("@omp-eval-completion-js-");
353
359
  const sessionFile = path.join(tempDir.path(), "session.jsonl");
354
- const sessionId = `js-llm:${crypto.randomUUID()}`;
360
+ const sessionId = `js-completion:${crypto.randomUUID()}`;
355
361
  vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "hello from smol" }));
356
362
 
357
- const result = await executeJs('return await llm("hi", { model: "smol" });', {
363
+ const result = await executeJs('return await completion("hi", { model: "smol" });', {
358
364
  cwd: tempDir.path(),
359
365
  sessionId,
360
366
  session: makeSession(),
@@ -365,16 +371,16 @@ describe("llm() through eval runtimes", () => {
365
371
  expect(result.output.trim()).toBe("hello from smol");
366
372
  });
367
373
 
368
- it("parses structured llm() output in the JavaScript runtime", async () => {
369
- using tempDir = TempDir.createSync("@omp-eval-llm-js-struct-");
374
+ it("parses structured completion() output in the JavaScript runtime", async () => {
375
+ using tempDir = TempDir.createSync("@omp-eval-completion-js-struct-");
370
376
  const sessionFile = path.join(tempDir.path(), "session.jsonl");
371
- const sessionId = `js-llm-struct:${crypto.randomUUID()}`;
377
+ const sessionId = `js-completion-struct:${crypto.randomUUID()}`;
372
378
  vi.spyOn(ai, "completeSimple").mockResolvedValue(
373
379
  assistant({ toolCall: { name: "respond", arguments: { ok: true, n: 3 } } }),
374
380
  );
375
381
 
376
382
  const result = await executeJs(
377
- 'const r = await llm("hi", { schema: { type: "object" } }); return JSON.stringify(r);',
383
+ 'const r = await completion("hi", { schema: { type: "object" } }); return JSON.stringify(r);',
378
384
  { cwd: tempDir.path(), sessionId, session: makeSession(), sessionFile },
379
385
  );
380
386
 
@@ -382,10 +388,10 @@ describe("llm() through eval runtimes", () => {
382
388
  expect(JSON.parse(result.output.trim())).toEqual({ ok: true, n: 3 });
383
389
  });
384
390
 
385
- it("exposes llm() in the Python runtime", async () => {
386
- const tempDir = TempDir.createSync("@omp-eval-llm-py-");
391
+ it("exposes completion() in the Python runtime", async () => {
392
+ const tempDir = TempDir.createSync("@omp-eval-completion-py-");
387
393
  try {
388
- const result = await runPythonLlmInSubprocess({ structured: false, tempDir });
394
+ const result = await runPythonCompletionInSubprocess({ structured: false, tempDir });
389
395
  expect(result.exitCode).toBe(0);
390
396
  expect(result.output.trim()).toBe("hello from python");
391
397
  } finally {
@@ -393,10 +399,10 @@ describe("llm() through eval runtimes", () => {
393
399
  }
394
400
  });
395
401
 
396
- it("parses structured llm() output in the Python runtime", async () => {
397
- const tempDir = TempDir.createSync("@omp-eval-llm-py-struct-");
402
+ it("parses structured completion() output in the Python runtime", async () => {
403
+ const tempDir = TempDir.createSync("@omp-eval-completion-py-struct-");
398
404
  try {
399
- const result = await runPythonLlmInSubprocess({ structured: true, tempDir });
405
+ const result = await runPythonCompletionInSubprocess({ structured: true, tempDir });
400
406
  expect(result.exitCode).toBe(0);
401
407
  expect(JSON.parse(result.output.trim())).toEqual({ ok: true });
402
408
  } finally {
@@ -0,0 +1,58 @@
1
+ import { describe, expect, it } from "bun:test";
2
+ import * as path from "node:path";
3
+ import { TempDir } from "@oh-my-pi/pi-utils";
4
+ import { createHelpers, type HelperContext } from "../js/shared/helpers";
5
+
6
+ /**
7
+ * The eval helpers (`read`/`write`/`append`) must substitute injected on-disk
8
+ * roots for internal-URL schemes. Without it, `write("local://x.md")` hits a
9
+ * stdlib `path.resolve` that collapses `local://` to `local:/`, creating a junk
10
+ * `local:` directory under the cwd instead of landing where `read local://x.md`
11
+ * resolves. These lock the substitution contract and its guards.
12
+ */
13
+ function makeCtx(cwd: string, roots: Record<string, string>): HelperContext {
14
+ return {
15
+ cwd: () => cwd,
16
+ env: new Map(),
17
+ localRoots: () => roots,
18
+ emitStatus: () => {},
19
+ };
20
+ }
21
+
22
+ describe("eval js helpers internal-url resolution", () => {
23
+ it("writes, reads, and appends local:// under the injected root", async () => {
24
+ using tmp = TempDir.createSync("@eval-helpers-local-");
25
+ const root = path.join(tmp.path(), "local");
26
+ const helpers = createHelpers(makeCtx(tmp.path(), { local: root }));
27
+
28
+ const written = await helpers.writeFile("local://notes/merge-map.md", "hello");
29
+ expect(written).toBe(path.join(root, "notes", "merge-map.md"));
30
+ expect(await Bun.file(written).text()).toBe("hello");
31
+ expect(await helpers.read("local://notes/merge-map.md")).toBe("hello");
32
+
33
+ await helpers.append("local://notes/merge-map.md", " world");
34
+ expect(await helpers.read("local://notes/merge-map.md")).toBe("hello world");
35
+
36
+ // Regression: no literal `local:` directory created under the cwd.
37
+ expect(await Bun.file(path.join(tmp.path(), "local:")).exists()).toBe(false);
38
+ expect(await Bun.file(path.join(tmp.path(), "local:", "notes", "merge-map.md")).exists()).toBe(false);
39
+ });
40
+
41
+ it("rejects traversal and schemes without an injected root", async () => {
42
+ using tmp = TempDir.createSync("@eval-helpers-guard-");
43
+ const helpers = createHelpers(makeCtx(tmp.path(), { local: path.join(tmp.path(), "local") }));
44
+
45
+ await expect(helpers.writeFile("local://../escape.md", "x")).rejects.toThrow(/traversal|escapes/i);
46
+ await expect(helpers.writeFile("memory://x.md", "x")).rejects.toThrow(/not supported/i);
47
+ await expect(helpers.read("https://example.com/page")).rejects.toThrow(/not supported/i);
48
+ });
49
+
50
+ it("leaves plain relative and absolute paths resolving against the cwd", async () => {
51
+ using tmp = TempDir.createSync("@eval-helpers-plain-");
52
+ const helpers = createHelpers(makeCtx(tmp.path(), {}));
53
+
54
+ const rel = await helpers.writeFile("foo/bar.txt", "bar");
55
+ expect(rel).toBe(path.join(tmp.path(), "foo", "bar.txt"));
56
+ expect(await helpers.read("foo/bar.txt")).toBe("bar");
57
+ });
58
+ });