@oh-my-pi/pi-coding-agent 15.10.0 → 15.10.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +142 -1
- package/dist/types/cli/dry-balance-cli.d.ts +15 -1
- package/dist/types/cli/startup-cwd.d.ts +2 -0
- package/dist/types/commands/launch.d.ts +3 -0
- package/dist/types/commit/analysis/conventional.d.ts +2 -2
- package/dist/types/commit/analysis/summary.d.ts +2 -2
- package/dist/types/commit/changelog/generate.d.ts +2 -2
- package/dist/types/commit/changelog/index.d.ts +2 -2
- package/dist/types/commit/map-reduce/index.d.ts +3 -3
- package/dist/types/commit/map-reduce/map-phase.d.ts +2 -2
- package/dist/types/commit/map-reduce/reduce-phase.d.ts +2 -2
- package/dist/types/commit/model-selection.d.ts +10 -4
- package/dist/types/config/api-key-resolver.d.ts +34 -0
- package/dist/types/config/keybindings.d.ts +2 -2
- package/dist/types/config/model-provider-priority.d.ts +1 -0
- package/dist/types/config/model-registry.d.ts +17 -1
- package/dist/types/config/model-resolver.d.ts +4 -1
- package/dist/types/config/settings-schema.d.ts +9 -0
- package/dist/types/config/settings.d.ts +7 -2
- package/dist/types/dap/config.d.ts +14 -1
- package/dist/types/dap/types.d.ts +10 -0
- package/dist/types/debug/report-bundle.d.ts +3 -0
- package/dist/types/edit/file-snapshot-store.d.ts +18 -10
- package/dist/types/eval/py/__tests__/prelude.test.d.ts +1 -0
- package/dist/types/extensibility/extensions/types.d.ts +4 -1
- package/dist/types/lsp/client.d.ts +10 -0
- package/dist/types/lsp/utils.d.ts +3 -2
- package/dist/types/main.d.ts +3 -9
- package/dist/types/mcp/tool-bridge.d.ts +2 -0
- package/dist/types/modes/components/chat-block.d.ts +64 -0
- package/dist/types/modes/components/custom-editor.d.ts +4 -1
- package/dist/types/modes/components/overlay-box.d.ts +17 -0
- package/dist/types/modes/components/plan-review-overlay.d.ts +59 -0
- package/dist/types/modes/components/plan-toc.d.ts +41 -0
- package/dist/types/modes/components/read-tool-group.d.ts +2 -0
- package/dist/types/modes/components/status-line.d.ts +2 -0
- package/dist/types/modes/components/transcript-container.d.ts +11 -0
- package/dist/types/modes/controllers/command-controller.d.ts +1 -0
- package/dist/types/modes/controllers/event-controller.d.ts +17 -1
- package/dist/types/modes/controllers/extension-ui-controller.d.ts +0 -1
- package/dist/types/modes/controllers/input-controller.d.ts +1 -1
- package/dist/types/modes/controllers/streaming-reveal.d.ts +22 -0
- package/dist/types/modes/controllers/tan-command-controller.d.ts +6 -0
- package/dist/types/modes/interactive-mode.d.ts +16 -5
- package/dist/types/modes/magic-keywords.d.ts +1 -1
- package/dist/types/modes/markdown-prose.d.ts +1 -1
- package/dist/types/modes/theme/theme.d.ts +1 -1
- package/dist/types/modes/types.d.ts +21 -5
- package/dist/types/modes/utils/copy-targets.d.ts +21 -1
- package/dist/types/modes/workflow.d.ts +3 -3
- package/dist/types/plan-mode/approved-plan.d.ts +27 -8
- package/dist/types/plan-mode/plan-protection.d.ts +4 -4
- package/dist/types/sdk.d.ts +2 -0
- package/dist/types/session/agent-session.d.ts +21 -0
- package/dist/types/session/auth-storage.d.ts +1 -1
- package/dist/types/session/messages.d.ts +12 -0
- package/dist/types/session/session-manager.d.ts +8 -3
- package/dist/types/slash-commands/types.d.ts +4 -6
- package/dist/types/task/executor.d.ts +17 -0
- package/dist/types/task/index.d.ts +1 -0
- package/dist/types/task/render.d.ts +3 -2
- package/dist/types/tools/archive-reader.d.ts +5 -0
- package/dist/types/tools/ast-edit.d.ts +3 -0
- package/dist/types/tools/ast-grep.d.ts +3 -0
- package/dist/types/tools/bash.d.ts +1 -0
- package/dist/types/tools/eval.d.ts +8 -0
- package/dist/types/tools/find.d.ts +8 -4
- package/dist/types/tools/gh-cache-invalidation.d.ts +6 -0
- package/dist/types/tools/github-cache.d.ts +12 -0
- package/dist/types/tools/grouped-file-output.d.ts +95 -12
- package/dist/types/tools/memory-render.d.ts +4 -1
- package/dist/types/tools/path-utils.d.ts +8 -0
- package/dist/types/tools/plan-mode-guard.d.ts +8 -9
- package/dist/types/tools/render-utils.d.ts +5 -9
- package/dist/types/tools/search.d.ts +6 -2
- package/dist/types/tools/sqlite-reader.d.ts +1 -0
- package/dist/types/tools/todo.d.ts +3 -2
- package/dist/types/tools/write.d.ts +3 -0
- package/dist/types/tools/yield.d.ts +8 -0
- package/dist/types/tui/output-block.d.ts +16 -4
- package/dist/types/tui/status-line.d.ts +3 -0
- package/dist/types/utils/enhanced-paste.d.ts +20 -0
- package/dist/types/web/search/providers/kimi.d.ts +1 -1
- package/package.json +9 -9
- package/src/auto-thinking/classifier.ts +5 -1
- package/src/cli/args.ts +3 -1
- package/src/cli/dry-balance-cli.ts +54 -21
- package/src/cli/gallery-cli.ts +4 -1
- package/src/cli/gallery-fixtures/misc.ts +29 -0
- package/src/cli/startup-cwd.ts +68 -0
- package/src/commands/launch.ts +3 -0
- package/src/commit/analysis/conventional.ts +2 -2
- package/src/commit/analysis/summary.ts +2 -2
- package/src/commit/changelog/generate.ts +2 -2
- package/src/commit/changelog/index.ts +2 -2
- package/src/commit/map-reduce/index.ts +3 -3
- package/src/commit/map-reduce/map-phase.ts +2 -2
- package/src/commit/map-reduce/reduce-phase.ts +2 -2
- package/src/commit/model-selection.ts +36 -11
- package/src/commit/pipeline.ts +4 -4
- package/src/config/api-key-resolver.ts +58 -0
- package/src/config/model-provider-priority.ts +55 -0
- package/src/config/model-registry.ts +29 -24
- package/src/config/model-resolver.ts +39 -7
- package/src/config/settings-schema.ts +10 -0
- package/src/config/settings.ts +106 -43
- package/src/dap/config.ts +41 -2
- package/src/dap/defaults.json +1 -0
- package/src/dap/session.ts +1 -0
- package/src/dap/types.ts +10 -0
- package/src/debug/index.ts +47 -53
- package/src/debug/raw-sse-buffer.ts +7 -4
- package/src/debug/report-bundle.ts +9 -0
- package/src/edit/file-snapshot-store.ts +33 -1
- package/src/edit/hashline/filesystem.ts +2 -1
- package/src/edit/renderer.ts +82 -78
- package/src/eval/__tests__/llm-bridge.test.ts +110 -31
- package/src/eval/js/context-manager.ts +32 -15
- package/src/eval/llm-bridge.ts +22 -6
- package/src/eval/py/__tests__/prelude.test.ts +19 -0
- package/src/eval/py/executor.ts +23 -11
- package/src/eval/py/prelude.py +1 -1
- package/src/extensibility/extensions/types.ts +10 -1
- package/src/goals/tools/goal-tool.ts +36 -26
- package/src/internal-urls/docs-index.generated.ts +8 -8
- package/src/lsp/client.ts +23 -11
- package/src/lsp/config.ts +11 -1
- package/src/lsp/index.ts +61 -9
- package/src/lsp/utils.ts +3 -2
- package/src/main.ts +100 -72
- package/src/mcp/tool-bridge.ts +2 -0
- package/src/memories/index.ts +14 -7
- package/src/mnemopi/backend.ts +5 -1
- package/src/modes/acp/acp-agent.ts +33 -26
- package/src/modes/components/assistant-message.ts +2 -9
- package/src/modes/components/chat-block.ts +111 -0
- package/src/modes/components/copy-selector.ts +1 -44
- package/src/modes/components/custom-editor.ts +164 -109
- package/src/modes/components/custom-message.ts +1 -3
- package/src/modes/components/execution-shared.ts +1 -2
- package/src/modes/components/hook-message.ts +1 -3
- package/src/modes/components/model-selector.ts +59 -13
- package/src/modes/components/oauth-selector.ts +33 -7
- package/src/modes/components/overlay-box.ts +108 -0
- package/src/modes/components/plan-review-overlay.ts +799 -0
- package/src/modes/components/plan-toc.ts +138 -0
- package/src/modes/components/read-tool-group.ts +20 -4
- package/src/modes/components/skill-message.ts +0 -1
- package/src/modes/components/status-line.ts +19 -4
- package/src/modes/components/tips.txt +2 -1
- package/src/modes/components/todo-reminder.ts +0 -2
- package/src/modes/components/tool-execution.ts +68 -88
- package/src/modes/components/transcript-container.ts +84 -24
- package/src/modes/components/user-message.ts +2 -3
- package/src/modes/controllers/command-controller-shared.ts +7 -6
- package/src/modes/controllers/command-controller.ts +57 -55
- package/src/modes/controllers/event-controller.ts +67 -40
- package/src/modes/controllers/extension-ui-controller.ts +10 -73
- package/src/modes/controllers/input-controller.ts +170 -126
- package/src/modes/controllers/mcp-command-controller.ts +69 -60
- package/src/modes/controllers/selector-controller.ts +23 -25
- package/src/modes/controllers/streaming-reveal.ts +212 -0
- package/src/modes/controllers/tan-command-controller.ts +173 -0
- package/src/modes/interactive-mode.ts +274 -112
- package/src/modes/magic-keywords.ts +1 -1
- package/src/modes/markdown-prose.ts +1 -1
- package/src/modes/setup-wizard/wizard-overlay.ts +1 -1
- package/src/modes/theme/shimmer.ts +20 -9
- package/src/modes/theme/theme-schema.json +1 -1
- package/src/modes/theme/theme.ts +8 -4
- package/src/modes/types.ts +21 -7
- package/src/modes/utils/copy-targets.ts +133 -27
- package/src/modes/utils/ui-helpers.ts +44 -46
- package/src/modes/workflow.ts +10 -10
- package/src/plan-mode/approved-plan.ts +66 -43
- package/src/plan-mode/plan-protection.ts +4 -4
- package/src/prompts/system/background-tan-dispatch.md +8 -0
- package/src/prompts/system/plan-mode-active.md +67 -58
- package/src/prompts/system/plan-mode-approved.md +1 -1
- package/src/prompts/system/workflow-notice.md +1 -1
- package/src/prompts/tools/bash.md +9 -0
- package/src/prompts/tools/browser.md +1 -1
- package/src/prompts/tools/eval.md +2 -1
- package/src/prompts/tools/read.md +2 -2
- package/src/sdk.ts +37 -46
- package/src/session/agent-session.ts +119 -18
- package/src/session/auth-storage.ts +2 -0
- package/src/session/messages.ts +26 -0
- package/src/session/session-manager.ts +109 -28
- package/src/slash-commands/builtin-registry.ts +36 -9
- package/src/slash-commands/types.ts +4 -6
- package/src/task/executor.ts +76 -38
- package/src/task/index.ts +4 -0
- package/src/task/render.ts +211 -147
- package/src/tools/archive-reader.ts +64 -0
- package/src/tools/ask.ts +119 -164
- package/src/tools/ast-edit.ts +98 -71
- package/src/tools/ast-grep.ts +37 -43
- package/src/tools/bash.ts +57 -6
- package/src/tools/browser/tab-supervisor.ts +13 -1
- package/src/tools/browser/tab-worker.ts +33 -4
- package/src/tools/debug.ts +20 -8
- package/src/tools/eval.ts +13 -2
- package/src/tools/fetch.ts +297 -7
- package/src/tools/find.ts +51 -30
- package/src/tools/gh-cache-invalidation.ts +200 -0
- package/src/tools/gh-renderer.ts +81 -42
- package/src/tools/github-cache.ts +25 -0
- package/src/tools/grouped-file-output.ts +272 -48
- package/src/tools/image-gen.ts +150 -103
- package/src/tools/inspect-image-renderer.ts +63 -41
- package/src/tools/inspect-image.ts +10 -3
- package/src/tools/job.ts +3 -4
- package/src/tools/memory-render.ts +4 -1
- package/src/tools/path-utils.ts +28 -2
- package/src/tools/plan-mode-guard.ts +66 -39
- package/src/tools/read.ts +48 -28
- package/src/tools/render-utils.ts +21 -37
- package/src/tools/resolve.ts +14 -0
- package/src/tools/search-tool-bm25.ts +36 -23
- package/src/tools/search.ts +118 -81
- package/src/tools/sqlite-reader.ts +9 -12
- package/src/tools/todo.ts +118 -52
- package/src/tools/write.ts +83 -64
- package/src/tools/yield.ts +10 -1
- package/src/tui/output-block.ts +60 -13
- package/src/tui/status-line.ts +5 -1
- package/src/utils/commit-message-generator.ts +11 -3
- package/src/utils/enhanced-paste.ts +230 -0
- package/src/utils/title-generator.ts +2 -1
- package/src/web/search/providers/anthropic.ts +25 -19
- package/src/web/search/providers/codex.ts +37 -8
- package/src/web/search/providers/exa.ts +11 -3
- package/src/web/search/providers/kimi.ts +28 -17
- package/src/web/search/providers/parallel.ts +35 -24
- package/src/web/search/providers/synthetic.ts +8 -6
- package/src/web/search/providers/tavily.ts +9 -8
- package/src/web/search/providers/zai.ts +8 -6
|
@@ -4,6 +4,7 @@ import type { Api, AssistantMessage, Model } from "@oh-my-pi/pi-ai";
|
|
|
4
4
|
import * as ai from "@oh-my-pi/pi-ai";
|
|
5
5
|
import { Effort } from "@oh-my-pi/pi-ai";
|
|
6
6
|
import { TempDir } from "@oh-my-pi/pi-utils";
|
|
7
|
+
import { $ } from "bun";
|
|
7
8
|
import type { ModelRegistry } from "../../config/model-registry";
|
|
8
9
|
import { Settings } from "../../config/settings";
|
|
9
10
|
import type { ToolSession } from "../../tools";
|
|
@@ -13,7 +14,7 @@ import { IdleTimeout } from "../idle-timeout";
|
|
|
13
14
|
import { disposeAllVmContexts } from "../js/context-manager";
|
|
14
15
|
import { executeJs } from "../js/executor";
|
|
15
16
|
import { runEvalLlm } from "../llm-bridge";
|
|
16
|
-
import { disposeAllKernelSessions,
|
|
17
|
+
import { disposeAllKernelSessions, type PythonResult } from "../py/executor";
|
|
17
18
|
|
|
18
19
|
function makeModel(provider: string, id: string, extra: Partial<Model<Api>> = {}): Model<Api> {
|
|
19
20
|
return {
|
|
@@ -57,6 +58,7 @@ function makeSession(opts: SessionOptions = {}): ToolSession {
|
|
|
57
58
|
const modelRegistry = {
|
|
58
59
|
getAvailable: () => opts.available ?? [SMOL, DEFAULT, SLOW],
|
|
59
60
|
getApiKey: async () => (opts.apiKey === undefined ? "test-key" : opts.apiKey),
|
|
61
|
+
resolver: () => async () => (opts.apiKey === undefined ? "test-key" : opts.apiKey),
|
|
60
62
|
} as unknown as ModelRegistry;
|
|
61
63
|
return {
|
|
62
64
|
settings,
|
|
@@ -96,6 +98,77 @@ function assistant(opts: {
|
|
|
96
98
|
};
|
|
97
99
|
}
|
|
98
100
|
|
|
101
|
+
async function runPythonLlmInSubprocess(options: { structured: boolean; tempDir: TempDir }): Promise<PythonResult> {
|
|
102
|
+
const repoRoot = path.resolve(import.meta.dir, "../../../..");
|
|
103
|
+
const scriptPath = path.join(options.tempDir.path(), "run-python-llm.ts");
|
|
104
|
+
const resultPath = path.join(options.tempDir.path(), "python-llm-result.json");
|
|
105
|
+
const aiPath = path.resolve(import.meta.dir, "../../../../ai/src/index.ts");
|
|
106
|
+
const executorPath = path.resolve(import.meta.dir, "../py/executor.ts");
|
|
107
|
+
const settingsPath = path.resolve(import.meta.dir, "../../config/settings.ts");
|
|
108
|
+
const code = options.structured
|
|
109
|
+
? 'import json\nprint(json.dumps(llm("hi", schema={"type": "object"})))'
|
|
110
|
+
: 'print(llm("hi", model="smol"))';
|
|
111
|
+
const responseContent = options.structured
|
|
112
|
+
? '[{ type: "toolCall", id: "tc-1", name: "respond", arguments: { ok: true } }]'
|
|
113
|
+
: '[{ type: "text", text: "hello from python" }]';
|
|
114
|
+
await Bun.write(
|
|
115
|
+
scriptPath,
|
|
116
|
+
`
|
|
117
|
+
import { vi } from "bun:test";
|
|
118
|
+
import * as ai from ${JSON.stringify(aiPath)};
|
|
119
|
+
import { executePython } from ${JSON.stringify(executorPath)};
|
|
120
|
+
import { Settings } from ${JSON.stringify(settingsPath)};
|
|
121
|
+
|
|
122
|
+
const SMOL = {
|
|
123
|
+
id: "smol",
|
|
124
|
+
name: "smol",
|
|
125
|
+
api: "openai-responses",
|
|
126
|
+
provider: "p",
|
|
127
|
+
baseUrl: "https://example.test/v1",
|
|
128
|
+
reasoning: false,
|
|
129
|
+
input: ["text"],
|
|
130
|
+
cost: { input: 1, output: 1, cacheRead: 0, cacheWrite: 1 },
|
|
131
|
+
contextWindow: 128000,
|
|
132
|
+
maxTokens: 4096,
|
|
133
|
+
};
|
|
134
|
+
const settings = Settings.isolated({ "async.enabled": false, "task.isolation.mode": "none" });
|
|
135
|
+
settings.setModelRole("smol", "p/smol");
|
|
136
|
+
settings.setModelRole("slow", "p/slow");
|
|
137
|
+
const session = {
|
|
138
|
+
settings,
|
|
139
|
+
modelRegistry: {
|
|
140
|
+
getAvailable: () => [SMOL],
|
|
141
|
+
getApiKey: async () => "test-key",
|
|
142
|
+
resolver: () => async () => "test-key",
|
|
143
|
+
},
|
|
144
|
+
getActiveModelString: () => "p/smol",
|
|
145
|
+
};
|
|
146
|
+
vi.spyOn(ai, "completeSimple").mockResolvedValue({
|
|
147
|
+
role: "assistant",
|
|
148
|
+
api: "openai-responses",
|
|
149
|
+
provider: "p",
|
|
150
|
+
model: "smol",
|
|
151
|
+
stopReason: "stop",
|
|
152
|
+
content: ${responseContent},
|
|
153
|
+
});
|
|
154
|
+
const result = await executePython(${JSON.stringify(code)}, {
|
|
155
|
+
cwd: ${JSON.stringify(options.tempDir.path())},
|
|
156
|
+
sessionId: ${JSON.stringify(`py-llm:${options.structured ? "struct" : "plain"}`)},
|
|
157
|
+
sessionFile: ${JSON.stringify(path.join(options.tempDir.path(), "session.jsonl"))},
|
|
158
|
+
toolSession: session,
|
|
159
|
+
kernelMode: "per-call",
|
|
160
|
+
});
|
|
161
|
+
await Bun.write(${JSON.stringify(resultPath)}, JSON.stringify(result));
|
|
162
|
+
process.exit(0);
|
|
163
|
+
`,
|
|
164
|
+
);
|
|
165
|
+
const child = await $`bun ${scriptPath}`.cwd(repoRoot).quiet().nothrow();
|
|
166
|
+
const stdout = child.stdout.toString();
|
|
167
|
+
const stderr = child.stderr.toString();
|
|
168
|
+
if (child.exitCode !== 0) throw new Error(stderr || stdout || `Python llm subprocess exited with ${child.exitCode}`);
|
|
169
|
+
return (await Bun.file(resultPath).json()) as PythonResult;
|
|
170
|
+
}
|
|
171
|
+
|
|
99
172
|
describe("runEvalLlm", () => {
|
|
100
173
|
afterEach(() => {
|
|
101
174
|
vi.restoreAllMocks();
|
|
@@ -133,6 +206,26 @@ describe("runEvalLlm", () => {
|
|
|
133
206
|
expect(result.details).toEqual({ model: "p/smol", tier: "smol", structured: false });
|
|
134
207
|
});
|
|
135
208
|
|
|
209
|
+
it("supplies a non-empty systemPrompt when system is omitted (codex 'Instructions are required' guard)", async () => {
|
|
210
|
+
// The openai-codex Responses transformer drops `instructions` when no
|
|
211
|
+
// system prompt is provided, and the remote endpoint then 400s with
|
|
212
|
+
// "Instructions are required". runEvalLlm must always carry a non-empty
|
|
213
|
+
// systemPrompt so `llm("…")` without a `system` argument works.
|
|
214
|
+
const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
|
|
215
|
+
await runEvalLlm({ prompt: "q", model: "smol" }, { session: makeSession() });
|
|
216
|
+
const ctx = spy.mock.calls[0]?.[1] as { systemPrompt?: string[] };
|
|
217
|
+
expect(ctx.systemPrompt).toBeDefined();
|
|
218
|
+
expect(ctx.systemPrompt?.length).toBeGreaterThan(0);
|
|
219
|
+
expect(ctx.systemPrompt?.[0]).toMatch(/.+/);
|
|
220
|
+
});
|
|
221
|
+
|
|
222
|
+
it("honors an explicit system prompt instead of overriding it", async () => {
|
|
223
|
+
const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
|
|
224
|
+
await runEvalLlm({ prompt: "q", model: "smol", system: "Be terse." }, { session: makeSession() });
|
|
225
|
+
const ctx = spy.mock.calls[0]?.[1] as { systemPrompt?: string[] };
|
|
226
|
+
expect(ctx.systemPrompt).toEqual(["Be terse."]);
|
|
227
|
+
});
|
|
228
|
+
|
|
136
229
|
it("forces a respond tool call and returns its arguments in structured mode", async () => {
|
|
137
230
|
const spy = vi
|
|
138
231
|
.spyOn(ai, "completeSimple")
|
|
@@ -290,38 +383,24 @@ describe("llm() through eval runtimes", () => {
|
|
|
290
383
|
});
|
|
291
384
|
|
|
292
385
|
it("exposes llm() in the Python runtime", async () => {
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
sessionFile,
|
|
302
|
-
toolSession: makeSession(),
|
|
303
|
-
});
|
|
304
|
-
|
|
305
|
-
expect(result.exitCode).toBe(0);
|
|
306
|
-
expect(result.output.trim()).toBe("hello from python");
|
|
386
|
+
const tempDir = TempDir.createSync("@omp-eval-llm-py-");
|
|
387
|
+
try {
|
|
388
|
+
const result = await runPythonLlmInSubprocess({ structured: false, tempDir });
|
|
389
|
+
expect(result.exitCode).toBe(0);
|
|
390
|
+
expect(result.output.trim()).toBe("hello from python");
|
|
391
|
+
} finally {
|
|
392
|
+
tempDir.removeSync();
|
|
393
|
+
}
|
|
307
394
|
});
|
|
308
395
|
|
|
309
396
|
it("parses structured llm() output in the Python runtime", async () => {
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
cwd: tempDir.path(),
|
|
319
|
-
sessionId,
|
|
320
|
-
sessionFile,
|
|
321
|
-
toolSession: makeSession(),
|
|
322
|
-
});
|
|
323
|
-
|
|
324
|
-
expect(result.exitCode).toBe(0);
|
|
325
|
-
expect(JSON.parse(result.output.trim())).toEqual({ ok: true });
|
|
397
|
+
const tempDir = TempDir.createSync("@omp-eval-llm-py-struct-");
|
|
398
|
+
try {
|
|
399
|
+
const result = await runPythonLlmInSubprocess({ structured: true, tempDir });
|
|
400
|
+
expect(result.exitCode).toBe(0);
|
|
401
|
+
expect(JSON.parse(result.output.trim())).toEqual({ ok: true });
|
|
402
|
+
} finally {
|
|
403
|
+
tempDir.removeSync();
|
|
404
|
+
}
|
|
326
405
|
});
|
|
327
406
|
});
|
|
@@ -52,8 +52,14 @@ interface JsSession {
|
|
|
52
52
|
|
|
53
53
|
const sessions = new Map<string, JsSession>();
|
|
54
54
|
const startingSessions = new Map<string, Promise<JsSession>>();
|
|
55
|
-
const resettingSessions = new
|
|
56
|
-
|
|
55
|
+
const resettingSessions = new Map<string, Promise<void>>();
|
|
56
|
+
// Worker startup (module-graph import + WorkerCore construction) is infrastructure
|
|
57
|
+
// cost, not user compute. Floor it independently of Bun's 5s default per-test timeout
|
|
58
|
+
// so a slow cold-start under load isn't aborted mid-init — terminating a still-
|
|
59
|
+
// initializing Bun worker triggers the same kind of terminate-race that motivates
|
|
60
|
+
// avoiding `vm.runInContext` (see shared/indirect-eval.ts), here surfacing as a
|
|
61
|
+
// SIGILL/SIGSEGV. Callers that pass a larger per-cell budget still dominate.
|
|
62
|
+
const WORKER_INIT_TIMEOUT_MS = 15_000;
|
|
57
63
|
|
|
58
64
|
export async function executeInVmContext(options: {
|
|
59
65
|
sessionKey: string;
|
|
@@ -67,17 +73,28 @@ export async function executeInVmContext(options: {
|
|
|
67
73
|
runState: VmRunState;
|
|
68
74
|
}): Promise<{ value: unknown }> {
|
|
69
75
|
if (options.reset) {
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
resettingSessions.
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
resettingSessions.
|
|
76
|
+
// Coalesce concurrent resets: an existing in-flight reset already
|
|
77
|
+
// produces a fresh context, so a follow-up `reset: true` cell should
|
|
78
|
+
// just wait for it rather than failing the user-visible call.
|
|
79
|
+
const inFlight = resettingSessions.get(options.sessionKey);
|
|
80
|
+
if (inFlight) await inFlight.catch(() => undefined);
|
|
81
|
+
else {
|
|
82
|
+
const resetPromise = resetVmContext(options.sessionKey);
|
|
83
|
+
resettingSessions.set(
|
|
84
|
+
options.sessionKey,
|
|
85
|
+
resetPromise.then(() => undefined),
|
|
86
|
+
);
|
|
87
|
+
try {
|
|
88
|
+
await resetPromise;
|
|
89
|
+
} finally {
|
|
90
|
+
resettingSessions.delete(options.sessionKey);
|
|
91
|
+
}
|
|
78
92
|
}
|
|
79
|
-
} else
|
|
80
|
-
|
|
93
|
+
} else {
|
|
94
|
+
// Internal coordination: wait for any in-flight reset to settle and
|
|
95
|
+
// then run on the freshly-rebuilt context.
|
|
96
|
+
const inFlight = resettingSessions.get(options.sessionKey);
|
|
97
|
+
if (inFlight) await inFlight.catch(() => undefined);
|
|
81
98
|
}
|
|
82
99
|
const session = await acquireSession(
|
|
83
100
|
options.sessionKey,
|
|
@@ -191,9 +208,9 @@ async function acquireSession(sessionKey: string, snapshot: SessionSnapshot, tim
|
|
|
191
208
|
handleSessionMessage(session, msg);
|
|
192
209
|
});
|
|
193
210
|
try {
|
|
194
|
-
//
|
|
195
|
-
// users can grant more
|
|
196
|
-
const readyTimeoutMs = Math.max(
|
|
211
|
+
// Init headroom is the fixed infrastructure floor; the caller's per-cell timeout
|
|
212
|
+
// dominates when larger so users can grant more by raising `timeout` on a cell.
|
|
213
|
+
const readyTimeoutMs = Math.max(WORKER_INIT_TIMEOUT_MS, timeoutMs ?? 0);
|
|
197
214
|
await raceWithTimeout(readyPromise, readyTimeoutMs, "Timed out initializing JS eval worker");
|
|
198
215
|
worker.send({ type: "init", snapshot });
|
|
199
216
|
sessions.set(sessionKey, session);
|
package/src/eval/llm-bridge.ts
CHANGED
|
@@ -15,7 +15,13 @@ import { instrumentedCompleteSimple, resolveTelemetry } from "@oh-my-pi/pi-agent
|
|
|
15
15
|
import { type Api, Effort, getSupportedEfforts, type Model, type Tool } from "@oh-my-pi/pi-ai";
|
|
16
16
|
import * as z from "zod/v4";
|
|
17
17
|
import { extractTextContent, extractToolCall, parseJsonPayload } from "../commit/utils";
|
|
18
|
-
|
|
18
|
+
|
|
19
|
+
import {
|
|
20
|
+
expandRoleAlias,
|
|
21
|
+
formatModelString,
|
|
22
|
+
getModelMatchPreferences,
|
|
23
|
+
resolveModelFromString,
|
|
24
|
+
} from "../config/model-resolver";
|
|
19
25
|
import type { ToolSession } from "../tools";
|
|
20
26
|
import { ToolError } from "../tools/tool-errors";
|
|
21
27
|
import { withBridgeTimeoutPause } from "./bridge-timeout";
|
|
@@ -64,7 +70,7 @@ function resolveTierModel(tier: LlmTier, session: ToolSession): Model<Api> | und
|
|
|
64
70
|
const available = modelRegistry.getAvailable();
|
|
65
71
|
if (available.length === 0) return undefined;
|
|
66
72
|
|
|
67
|
-
const matchPreferences =
|
|
73
|
+
const matchPreferences = getModelMatchPreferences(session.settings);
|
|
68
74
|
const resolve = (pattern: string | undefined): Model<Api> | undefined => {
|
|
69
75
|
if (!pattern) return undefined;
|
|
70
76
|
const expanded = expandRoleAlias(pattern, session.settings);
|
|
@@ -112,8 +118,9 @@ export async function runEvalLlm(args: unknown, options: EvalLlmBridgeOptions):
|
|
|
112
118
|
);
|
|
113
119
|
}
|
|
114
120
|
|
|
115
|
-
const
|
|
116
|
-
|
|
121
|
+
const registry = options.session.modelRegistry;
|
|
122
|
+
const apiKey = await registry?.getApiKey(model);
|
|
123
|
+
if (!registry || !apiKey) {
|
|
117
124
|
throw new ToolError(
|
|
118
125
|
`llm() has no API key for ${formatModelString(model)}. Configure credentials for this provider or choose another tier.`,
|
|
119
126
|
);
|
|
@@ -132,18 +139,27 @@ export async function runEvalLlm(args: unknown, options: EvalLlmBridgeOptions):
|
|
|
132
139
|
|
|
133
140
|
const telemetry = resolveTelemetry(options.session.getTelemetry?.(), options.session.getSessionId?.() ?? undefined);
|
|
134
141
|
|
|
142
|
+
// Some providers (notably openai-codex) require a non-empty `instructions`
|
|
143
|
+
// field on every Responses request and 400 with "Instructions are required"
|
|
144
|
+
// when it is missing. Fall back to a minimal default so `llm(prompt)` works
|
|
145
|
+
// without forcing every caller to pass a `system` prompt.
|
|
146
|
+
const systemPrompt = system ? [system] : ["You are a helpful assistant."];
|
|
147
|
+
|
|
135
148
|
// Suspend eval timeout accounting while the model request owns control. The
|
|
136
149
|
// timeout clock restarts once the bridge returns to the cell runtime.
|
|
137
150
|
const response = await withBridgeTimeoutPause(options.emitStatus, () =>
|
|
138
151
|
instrumentedCompleteSimple(
|
|
139
152
|
model,
|
|
140
153
|
{
|
|
141
|
-
systemPrompt
|
|
154
|
+
systemPrompt,
|
|
142
155
|
messages: [{ role: "user", content: [{ type: "text", text: prompt }], timestamp: Date.now() }],
|
|
143
156
|
tools,
|
|
144
157
|
},
|
|
145
158
|
{
|
|
146
|
-
apiKey,
|
|
159
|
+
apiKey: registry.resolver(model.provider, {
|
|
160
|
+
sessionId: options.session.getSessionId?.() ?? undefined,
|
|
161
|
+
baseUrl: model.baseUrl,
|
|
162
|
+
}),
|
|
147
163
|
signal: options.signal,
|
|
148
164
|
reasoning: reasoningForTier(tier, model),
|
|
149
165
|
toolChoice: schema ? { type: "tool", name: STRUCTURED_TOOL_NAME } : undefined,
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import { describe, expect, it } from "bun:test";
|
|
2
|
+
import { PYTHON_PRELUDE } from "../prelude";
|
|
3
|
+
|
|
4
|
+
describe("python prelude", () => {
|
|
5
|
+
it("exposes read(path, offset?, limit?) with positional optional args", () => {
|
|
6
|
+
// The eval docs advertise `read(path, offset?=1, limit?=None)`. A
|
|
7
|
+
// keyword-only signature (`def read(path, *, offset=1, limit=None)`)
|
|
8
|
+
// makes `read("file", 10)` raise `TypeError: read() takes 1 positional
|
|
9
|
+
// argument but 2 were given`, which agents in the wild repeatedly hit.
|
|
10
|
+
// Lock the contract so the helper accepts both positional and keyword
|
|
11
|
+
// forms.
|
|
12
|
+
const match = PYTHON_PRELUDE.match(/def\s+read\(([^)]+)\)/);
|
|
13
|
+
expect(match).not.toBeNull();
|
|
14
|
+
const signature = match?.[1] ?? "";
|
|
15
|
+
expect(signature).not.toContain("*,");
|
|
16
|
+
expect(signature).toContain("offset");
|
|
17
|
+
expect(signature).toContain("limit");
|
|
18
|
+
});
|
|
19
|
+
});
|
package/src/eval/py/executor.ts
CHANGED
|
@@ -126,7 +126,7 @@ interface PythonSession {
|
|
|
126
126
|
|
|
127
127
|
const sessions = new Map<string, PythonSession>();
|
|
128
128
|
const startingSessions = new Map<string, Promise<PythonSession>>();
|
|
129
|
-
const resettingSessions = new
|
|
129
|
+
const resettingSessions = new Map<string, Promise<void>>();
|
|
130
130
|
|
|
131
131
|
function normalizeSessionCwd(cwd: string): string {
|
|
132
132
|
return path.resolve(cwd);
|
|
@@ -611,17 +611,29 @@ async function executeOnSession(code: string, cwd: string, options: PythonExecut
|
|
|
611
611
|
options.bridgeSessionId = sessionId;
|
|
612
612
|
}
|
|
613
613
|
if (options.reset) {
|
|
614
|
-
if
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
resettingSessions.
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
resettingSessions.
|
|
614
|
+
// Coalesce concurrent resets: if another reset is in flight for this
|
|
615
|
+
// session, await it instead of throwing — the caller's intent ("start
|
|
616
|
+
// from a clean kernel") is satisfied once that reset settles.
|
|
617
|
+
const inFlight = resettingSessions.get(sessionKey);
|
|
618
|
+
if (inFlight) await inFlight.catch(() => undefined);
|
|
619
|
+
else {
|
|
620
|
+
const resetPromise = resetSession(sessionKey);
|
|
621
|
+
resettingSessions.set(
|
|
622
|
+
sessionKey,
|
|
623
|
+
resetPromise.then(() => undefined),
|
|
624
|
+
);
|
|
625
|
+
try {
|
|
626
|
+
await resetPromise;
|
|
627
|
+
} finally {
|
|
628
|
+
resettingSessions.delete(sessionKey);
|
|
629
|
+
}
|
|
622
630
|
}
|
|
623
|
-
} else
|
|
624
|
-
|
|
631
|
+
} else {
|
|
632
|
+
// A reset already in progress is an internal coordination state, not a
|
|
633
|
+
// user-visible failure. Wait for it to clear, then proceed with the
|
|
634
|
+
// requested execution on the freshly-restarted kernel.
|
|
635
|
+
const inFlight = resettingSessions.get(sessionKey);
|
|
636
|
+
if (inFlight) await inFlight.catch(() => undefined);
|
|
625
637
|
}
|
|
626
638
|
const session = await acquireSession(sessionKey, sessionId, cwd, options);
|
|
627
639
|
if (options.signal?.aborted) {
|
package/src/eval/py/prelude.py
CHANGED
|
@@ -53,7 +53,7 @@ if "__omp_prelude_loaded__" not in globals():
|
|
|
53
53
|
_emit_status("env", key=key, value=val, action="get")
|
|
54
54
|
return val
|
|
55
55
|
|
|
56
|
-
def read(path: str | Path,
|
|
56
|
+
def read(path: str | Path, offset: int = 1, limit: int | None = None) -> str:
|
|
57
57
|
"""Read file contents. offset/limit are 1-indexed line numbers."""
|
|
58
58
|
p = Path(path)
|
|
59
59
|
data = p.read_text(encoding="utf-8")
|
|
@@ -7,7 +7,13 @@
|
|
|
7
7
|
* - Register commands, keyboard shortcuts, and CLI flags
|
|
8
8
|
* - Interact with the user via UI primitives
|
|
9
9
|
*/
|
|
10
|
-
import type {
|
|
10
|
+
import type {
|
|
11
|
+
AgentMessage,
|
|
12
|
+
AgentToolResult,
|
|
13
|
+
AgentToolUpdateCallback,
|
|
14
|
+
ThinkingLevel,
|
|
15
|
+
ToolApproval,
|
|
16
|
+
} from "@oh-my-pi/pi-agent-core";
|
|
11
17
|
import type { CompactionResult } from "@oh-my-pi/pi-agent-core/compaction";
|
|
12
18
|
import type {
|
|
13
19
|
Api,
|
|
@@ -392,6 +398,9 @@ export interface ToolDefinition<TParams extends TSchema = TSchema, TDetails = un
|
|
|
392
398
|
defaultInactive?: boolean;
|
|
393
399
|
/** If true, tool may stage deferred changes that require explicit resolve/discard. */
|
|
394
400
|
deferrable?: boolean;
|
|
401
|
+
/** Tool approval tier. Defaults to `"exec"` when omitted.
|
|
402
|
+
* `"read"`: read-only operations. `"write"`: mutations. `"exec"`: code execution. */
|
|
403
|
+
approval?: ToolApproval;
|
|
395
404
|
/** MCP server name for discovery/search metadata when this tool fronts an MCP server. */
|
|
396
405
|
mcpServerName?: string;
|
|
397
406
|
/** Original MCP tool name for discovery/search metadata. */
|
|
@@ -10,7 +10,7 @@ import { formatDuration } from "../../slash-commands/helpers/format";
|
|
|
10
10
|
import type { ToolSession } from "../../tools";
|
|
11
11
|
import { formatErrorDetail, TRUNCATE_LENGTHS } from "../../tools/render-utils";
|
|
12
12
|
import { ToolError } from "../../tools/tool-errors";
|
|
13
|
-
import { renderStatusLine, truncateToWidth } from "../../tui";
|
|
13
|
+
import { framedBlock, renderStatusLine, truncateToWidth } from "../../tui";
|
|
14
14
|
import { completionBudgetReport, remainingTokens } from "../runtime";
|
|
15
15
|
import type { Goal, GoalStatus, GoalToolDetails } from "../state";
|
|
16
16
|
|
|
@@ -173,8 +173,7 @@ export const goalToolRenderer = {
|
|
|
173
173
|
if (args.op === "create" && args.token_budget !== undefined) {
|
|
174
174
|
meta.push(`budget ${formatNumber(args.token_budget)}`);
|
|
175
175
|
}
|
|
176
|
-
|
|
177
|
-
return new Text(text, 0, 0);
|
|
176
|
+
return new Text(renderStatusLine({ icon: "pending", title: "Goal", description, meta }, uiTheme), 0, 0);
|
|
178
177
|
},
|
|
179
178
|
|
|
180
179
|
renderResult(
|
|
@@ -190,51 +189,62 @@ export const goalToolRenderer = {
|
|
|
190
189
|
|
|
191
190
|
if (result.isError) {
|
|
192
191
|
const header = renderStatusLine({ icon: "error", title: "Goal", description }, uiTheme);
|
|
193
|
-
|
|
194
|
-
|
|
192
|
+
return framedBlock(uiTheme, width => ({
|
|
193
|
+
header,
|
|
194
|
+
sections: [{ lines: formatErrorDetail(fallbackText || "Goal tool failed", uiTheme).split("\n") }],
|
|
195
|
+
state: "error",
|
|
196
|
+
borderColor: "error",
|
|
197
|
+
width,
|
|
198
|
+
}));
|
|
195
199
|
}
|
|
196
200
|
|
|
197
201
|
const goal = details?.goal ?? null;
|
|
198
202
|
if (!goal) {
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
203
|
+
return new Text(
|
|
204
|
+
renderStatusLine({ icon: "warning", title: "Goal", description, meta: ["no active goal"] }, uiTheme),
|
|
205
|
+
0,
|
|
206
|
+
0,
|
|
207
|
+
);
|
|
202
208
|
}
|
|
203
209
|
|
|
204
|
-
const
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
},
|
|
213
|
-
uiTheme,
|
|
214
|
-
),
|
|
210
|
+
const header = renderStatusLine(
|
|
211
|
+
{
|
|
212
|
+
icon: "success",
|
|
213
|
+
title: "Goal",
|
|
214
|
+
description,
|
|
215
|
+
badge: { label: goal.status, color: goalBadgeColor(goal.status) },
|
|
216
|
+
},
|
|
217
|
+
uiTheme,
|
|
215
218
|
);
|
|
216
219
|
|
|
220
|
+
const lines: string[] = [];
|
|
217
221
|
const objectiveText = truncateToWidth(goal.objective.trim(), TRUNCATE_LENGTHS.LONG);
|
|
218
|
-
lines.push(
|
|
222
|
+
lines.push(uiTheme.italic(uiTheme.fg("muted", `"${objectiveText}"`)));
|
|
219
223
|
|
|
220
224
|
const used = formatNumber(goal.tokensUsed);
|
|
221
225
|
const tokensLine =
|
|
222
226
|
goal.tokenBudget !== undefined
|
|
223
227
|
? `${used} / ${formatNumber(goal.tokenBudget)} tokens (${formatNumber(Math.max(0, goal.tokenBudget - goal.tokensUsed))} left)`
|
|
224
228
|
: `${used} tokens`;
|
|
225
|
-
|
|
226
|
-
|
|
229
|
+
const metaParts = [tokensLine];
|
|
227
230
|
if (goal.timeUsedSeconds > 0) {
|
|
228
|
-
|
|
231
|
+
metaParts.push(`${formatDuration(goal.timeUsedSeconds * 1000)} elapsed`);
|
|
229
232
|
}
|
|
233
|
+
lines.push(uiTheme.fg("dim", metaParts.join(" · ")));
|
|
230
234
|
|
|
231
235
|
const report = details?.completionBudgetReport;
|
|
236
|
+
const sections: Array<{ label?: string; lines: string[] }> = [{ lines }];
|
|
232
237
|
if (report) {
|
|
233
|
-
|
|
234
|
-
lines.push(uiTheme.italic(uiTheme.fg("muted", report)));
|
|
238
|
+
sections.push({ label: "Report", lines: report.split("\n").map(line => uiTheme.fg("muted", line)) });
|
|
235
239
|
}
|
|
236
240
|
|
|
237
|
-
return
|
|
241
|
+
return framedBlock(uiTheme, width => ({
|
|
242
|
+
header,
|
|
243
|
+
sections,
|
|
244
|
+
state: "success",
|
|
245
|
+
borderColor: "borderMuted",
|
|
246
|
+
width,
|
|
247
|
+
}));
|
|
238
248
|
},
|
|
239
249
|
|
|
240
250
|
mergeCallAndResult: true,
|