@oh-my-pi/pi-coding-agent 15.5.12 → 15.5.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +46 -0
- package/dist/types/config/model-registry.d.ts +1 -1
- package/dist/types/config/models-config-schema.d.ts +2 -0
- package/dist/types/config/settings-schema.d.ts +1 -10
- package/dist/types/edit/file-snapshot-store.d.ts +19 -0
- package/dist/types/eval/__tests__/llm-bridge.test.d.ts +1 -0
- package/dist/types/eval/llm-bridge.d.ts +25 -0
- package/dist/types/export/html/template.generated.d.ts +1 -1
- package/dist/types/extensibility/plugins/legacy-pi-compat.d.ts +15 -0
- package/dist/types/modes/theme/theme.d.ts +2 -1
- package/dist/types/session/agent-session.d.ts +2 -0
- package/dist/types/tools/index.d.ts +0 -1
- package/package.json +8 -8
- package/src/config/model-registry.ts +89 -5
- package/src/config/models-config-schema.ts +1 -1
- package/src/config/settings-schema.ts +1 -10
- package/src/edit/file-snapshot-store.ts +34 -0
- package/src/edit/hashline/diff.ts +3 -8
- package/src/edit/renderer.ts +1 -1
- package/src/eval/__tests__/llm-bridge.test.ts +297 -0
- package/src/eval/js/shared/prelude.txt +8 -0
- package/src/eval/js/tool-bridge.ts +4 -0
- package/src/eval/llm-bridge.ts +181 -0
- package/src/eval/py/prelude.py +52 -31
- package/src/export/html/template.generated.ts +1 -1
- package/src/export/html/template.js +0 -13
- package/src/extensibility/plugins/legacy-pi-compat.ts +60 -23
- package/src/internal-urls/docs-index.generated.ts +4 -5
- package/src/main.ts +4 -0
- package/src/modes/components/model-selector.ts +119 -22
- package/src/modes/components/status-line/presets.ts +1 -0
- package/src/modes/components/status-line/segments.ts +23 -0
- package/src/modes/interactive-mode.ts +22 -87
- package/src/modes/theme/theme.ts +7 -0
- package/src/prompts/tools/eval.md +2 -0
- package/src/session/agent-session.ts +19 -0
- package/src/session/session-manager.ts +47 -0
- package/src/tools/ast-edit.ts +1 -1
- package/src/tools/ast-grep.ts +6 -17
- package/src/tools/eval.ts +24 -48
- package/src/tools/index.ts +0 -4
- package/src/tools/read.ts +23 -33
- package/src/tools/renderers.ts +0 -2
- package/src/tools/search.ts +12 -21
- package/src/tools/write.ts +1 -3
- package/src/utils/file-mentions.ts +1 -3
- package/dist/types/tools/calculator.d.ts +0 -77
- package/src/prompts/tools/calculator.md +0 -10
- package/src/tools/calculator.ts +0 -541
|
@@ -39,6 +39,13 @@ if (!globalThis.__omp_js_prelude_loaded__) {
|
|
|
39
39
|
return values.length === 1 ? values[0] : values;
|
|
40
40
|
};
|
|
41
41
|
|
|
42
|
+
const llm = async (prompt, opts = {}) => {
|
|
43
|
+
const o = toOptions(opts);
|
|
44
|
+
const res = await globalThis.__omp_call_tool__("__llm__", { prompt, ...o });
|
|
45
|
+
const text = res && typeof res === "object" ? res.text : res;
|
|
46
|
+
return o.schema ? JSON.parse(text) : text;
|
|
47
|
+
};
|
|
48
|
+
|
|
42
49
|
const display = value => {
|
|
43
50
|
globalThis.__omp_display__(value);
|
|
44
51
|
};
|
|
@@ -61,6 +68,7 @@ if (!globalThis.__omp_js_prelude_loaded__) {
|
|
|
61
68
|
globalThis.print = consoleBridge.log;
|
|
62
69
|
globalThis.display = display;
|
|
63
70
|
globalThis.tool = tool;
|
|
71
|
+
globalThis.llm = llm;
|
|
64
72
|
globalThis.output = output;
|
|
65
73
|
globalThis.read = read;
|
|
66
74
|
globalThis.write = write;
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import type { AgentTool, AgentToolResult } from "@oh-my-pi/pi-agent-core";
|
|
2
2
|
import type { ToolSession } from "../../tools";
|
|
3
3
|
import { ToolError } from "../../tools/tool-errors";
|
|
4
|
+
import { EVAL_LLM_BRIDGE_NAME, runEvalLlm } from "../llm-bridge";
|
|
4
5
|
import type { JsStatusEvent } from "./shared/types";
|
|
5
6
|
|
|
6
7
|
export type { JsStatusEvent } from "./shared/types";
|
|
@@ -101,6 +102,9 @@ function summarizeToolResult(
|
|
|
101
102
|
}
|
|
102
103
|
|
|
103
104
|
export async function callSessionTool(name: string, args: unknown, options: ToolBridgeOptions): Promise<ToolValue> {
|
|
105
|
+
if (name === EVAL_LLM_BRIDGE_NAME) {
|
|
106
|
+
return await runEvalLlm(args, options);
|
|
107
|
+
}
|
|
104
108
|
const tool = getTool(options.session, name);
|
|
105
109
|
const normalizedArgs = normalizeArgs(args);
|
|
106
110
|
const toolCallId = `js-${name}-${crypto.randomUUID()}`;
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Host-side handler for the eval `llm()` helper.
|
|
3
|
+
*
|
|
4
|
+
* Both eval runtimes (JS worker + Python kernel) route helper→host calls
|
|
5
|
+
* through {@link callSessionTool}. Reserving the synthetic tool name
|
|
6
|
+
* {@link EVAL_LLM_BRIDGE_NAME} lets a single host handler serve both
|
|
7
|
+
* transports without registering an agent-visible tool: cell code calls
|
|
8
|
+
* `llm(prompt, opts)`, the prelude forwards `{ prompt, model, system?, schema? }`
|
|
9
|
+
* through the bridge, and this module performs one stateless completion.
|
|
10
|
+
*
|
|
11
|
+
* The call is oneshot and toolless from the model's perspective — pure text
|
|
12
|
+
* in, text (or, with `schema`, a structured object) out.
|
|
13
|
+
*/
|
|
14
|
+
import { instrumentedCompleteSimple, resolveTelemetry } from "@oh-my-pi/pi-agent-core";
|
|
15
|
+
import { type Api, Effort, getSupportedEfforts, type Model, type Tool } from "@oh-my-pi/pi-ai";
|
|
16
|
+
import * as z from "zod/v4";
|
|
17
|
+
import { extractTextContent, extractToolCall, parseJsonPayload } from "../commit/utils";
|
|
18
|
+
import { expandRoleAlias, formatModelString, resolveModelFromString } from "../config/model-resolver";
|
|
19
|
+
import type { ToolSession } from "../tools";
|
|
20
|
+
import { ToolError } from "../tools/tool-errors";
|
|
21
|
+
import type { JsStatusEvent } from "./js/shared/types";
|
|
22
|
+
|
|
23
|
+
/** Synthetic bridge name reserved for the `llm()` helper across both runtimes. */
|
|
24
|
+
export const EVAL_LLM_BRIDGE_NAME = "__llm__";
|
|
25
|
+
|
|
26
|
+
/** Synthetic tool the model is forced to call when a `schema` is supplied. */
|
|
27
|
+
const STRUCTURED_TOOL_NAME = "respond";
|
|
28
|
+
|
|
29
|
+
type LlmTier = "smol" | "default" | "slow";
|
|
30
|
+
|
|
31
|
+
const TIER_TO_PATTERN: Record<LlmTier, string> = {
|
|
32
|
+
smol: "pi/smol",
|
|
33
|
+
default: "pi/default",
|
|
34
|
+
slow: "pi/slow",
|
|
35
|
+
};
|
|
36
|
+
|
|
37
|
+
const llmArgsSchema = z.object({
|
|
38
|
+
prompt: z.string().min(1, "prompt must be a non-empty string"),
|
|
39
|
+
model: z.enum(["smol", "default", "slow"]).default("default"),
|
|
40
|
+
system: z.string().optional(),
|
|
41
|
+
schema: z.record(z.string(), z.unknown()).optional(),
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
export interface EvalLlmBridgeOptions {
|
|
45
|
+
session: ToolSession;
|
|
46
|
+
signal?: AbortSignal;
|
|
47
|
+
emitStatus?: (event: JsStatusEvent) => void;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
export interface EvalLlmResult {
|
|
51
|
+
text: string;
|
|
52
|
+
details: { model: string; tier: LlmTier; structured: boolean };
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Resolve a tier to a concrete {@link Model}. `default` prefers the session's
|
|
57
|
+
* active model and falls back to the `pi/default` role; `smol`/`slow` resolve
|
|
58
|
+
* their respective role patterns. Returns `undefined` when nothing matches.
|
|
59
|
+
*/
|
|
60
|
+
function resolveTierModel(tier: LlmTier, session: ToolSession): Model<Api> | undefined {
|
|
61
|
+
const modelRegistry = session.modelRegistry;
|
|
62
|
+
if (!modelRegistry) return undefined;
|
|
63
|
+
const available = modelRegistry.getAvailable();
|
|
64
|
+
if (available.length === 0) return undefined;
|
|
65
|
+
|
|
66
|
+
const matchPreferences = { usageOrder: session.settings.getStorage()?.getModelUsageOrder() };
|
|
67
|
+
const resolve = (pattern: string | undefined): Model<Api> | undefined => {
|
|
68
|
+
if (!pattern) return undefined;
|
|
69
|
+
const expanded = expandRoleAlias(pattern, session.settings);
|
|
70
|
+
return resolveModelFromString(expanded, available, matchPreferences, modelRegistry);
|
|
71
|
+
};
|
|
72
|
+
|
|
73
|
+
if (tier === "default") {
|
|
74
|
+
const activePattern = session.getActiveModelString?.() ?? session.getModelString?.();
|
|
75
|
+
return resolve(activePattern) ?? resolve(TIER_TO_PATTERN.default);
|
|
76
|
+
}
|
|
77
|
+
return resolve(TIER_TO_PATTERN[tier]);
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Choose the reasoning effort for a tier. Only `slow` opts into thinking, and
|
|
82
|
+
* only on reasoning-capable models — guarding against `requireSupportedEffort`
|
|
83
|
+
* throwing downstream on models that cannot reason. Clamps to the highest
|
|
84
|
+
* supported effort so a reasoning model without `high` does not 400.
|
|
85
|
+
*/
|
|
86
|
+
function reasoningForTier(tier: LlmTier, model: Model<Api>): Effort | undefined {
|
|
87
|
+
if (tier !== "slow" || !model.reasoning) return undefined;
|
|
88
|
+
const efforts = getSupportedEfforts(model);
|
|
89
|
+
if (efforts.length === 0) return undefined;
|
|
90
|
+
return efforts.includes(Effort.High) ? Effort.High : efforts[efforts.length - 1];
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Run a single stateless completion on behalf of an eval cell's `llm()` call.
|
|
95
|
+
* Returns a `{ text, details }` value shaped like a {@link callSessionTool}
|
|
96
|
+
* result so the existing bridge transport carries it to either runtime.
|
|
97
|
+
*/
|
|
98
|
+
export async function runEvalLlm(args: unknown, options: EvalLlmBridgeOptions): Promise<EvalLlmResult> {
|
|
99
|
+
const parsed = llmArgsSchema.safeParse(args);
|
|
100
|
+
if (!parsed.success) {
|
|
101
|
+
const issue = parsed.error.issues[0];
|
|
102
|
+
const where = issue?.path.length ? `${issue.path.join(".")}: ` : "";
|
|
103
|
+
throw new ToolError(`llm() received invalid arguments: ${where}${issue?.message ?? "bad input"}`);
|
|
104
|
+
}
|
|
105
|
+
const { prompt, model: tier, system, schema } = parsed.data;
|
|
106
|
+
|
|
107
|
+
const model = resolveTierModel(tier, options.session);
|
|
108
|
+
if (!model) {
|
|
109
|
+
throw new ToolError(
|
|
110
|
+
`llm() could not resolve a model for the "${tier}" tier. Configure modelRoles.${tier === "default" ? "default" : tier} or ensure a provider is available.`,
|
|
111
|
+
);
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
const apiKey = await options.session.modelRegistry?.getApiKey(model);
|
|
115
|
+
if (!apiKey) {
|
|
116
|
+
throw new ToolError(
|
|
117
|
+
`llm() has no API key for ${formatModelString(model)}. Configure credentials for this provider or choose another tier.`,
|
|
118
|
+
);
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
const tools: Tool[] | undefined = schema
|
|
122
|
+
? [
|
|
123
|
+
{
|
|
124
|
+
name: STRUCTURED_TOOL_NAME,
|
|
125
|
+
description: "Return your answer by calling this tool with the requested structured fields.",
|
|
126
|
+
parameters: schema,
|
|
127
|
+
strict: false,
|
|
128
|
+
},
|
|
129
|
+
]
|
|
130
|
+
: undefined;
|
|
131
|
+
|
|
132
|
+
const telemetry = resolveTelemetry(options.session.getTelemetry?.(), options.session.getSessionId?.() ?? undefined);
|
|
133
|
+
|
|
134
|
+
const response = await instrumentedCompleteSimple(
|
|
135
|
+
model,
|
|
136
|
+
{
|
|
137
|
+
systemPrompt: system ? [system] : undefined,
|
|
138
|
+
messages: [{ role: "user", content: [{ type: "text", text: prompt }], timestamp: Date.now() }],
|
|
139
|
+
tools,
|
|
140
|
+
},
|
|
141
|
+
{
|
|
142
|
+
apiKey,
|
|
143
|
+
signal: options.signal,
|
|
144
|
+
reasoning: reasoningForTier(tier, model),
|
|
145
|
+
toolChoice: schema ? { type: "tool", name: STRUCTURED_TOOL_NAME } : undefined,
|
|
146
|
+
},
|
|
147
|
+
{ telemetry, oneshotKind: "eval_llm" },
|
|
148
|
+
);
|
|
149
|
+
|
|
150
|
+
if (response.stopReason === "error") {
|
|
151
|
+
throw new ToolError(response.errorMessage ?? "llm() request failed.");
|
|
152
|
+
}
|
|
153
|
+
if (response.stopReason === "aborted") {
|
|
154
|
+
throw new ToolError("llm() request aborted.");
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
let resultText: string;
|
|
158
|
+
if (schema) {
|
|
159
|
+
const call = extractToolCall(response, STRUCTURED_TOOL_NAME);
|
|
160
|
+
let value: unknown;
|
|
161
|
+
if (call) {
|
|
162
|
+
value = call.arguments;
|
|
163
|
+
} else {
|
|
164
|
+
const text = extractTextContent(response);
|
|
165
|
+
if (!text) throw new ToolError("llm() returned no structured response.");
|
|
166
|
+
try {
|
|
167
|
+
value = parseJsonPayload(text);
|
|
168
|
+
} catch {
|
|
169
|
+
throw new ToolError("llm() did not return a structured response matching the schema.");
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
resultText = JSON.stringify(value);
|
|
173
|
+
} else {
|
|
174
|
+
resultText = extractTextContent(response);
|
|
175
|
+
if (!resultText) throw new ToolError("llm() returned no text output.");
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
options.emitStatus?.({ op: "llm", model: formatModelString(model), tier, chars: resultText.length });
|
|
179
|
+
|
|
180
|
+
return { text: resultText, details: { model: formatModelString(model), tier, structured: Boolean(schema) } };
|
|
181
|
+
}
|
package/src/eval/py/prelude.py
CHANGED
|
@@ -385,6 +385,40 @@ if "__omp_prelude_loaded__" not in globals():
|
|
|
385
385
|
raise RuntimeError("tool bridge is unavailable in this kernel")
|
|
386
386
|
return (base.rstrip("/"), token, session)
|
|
387
387
|
|
|
388
|
+
def _bridge_call(name: str, args: dict):
|
|
389
|
+
"""POST one request to the host tool bridge and return its `value`."""
|
|
390
|
+
import urllib.request, urllib.error
|
|
391
|
+
base, token, session = _tool_proxy_from_env()
|
|
392
|
+
_run_id_getter = globals().get("__omp_current_run_id__")
|
|
393
|
+
_run_id = _run_id_getter() if callable(_run_id_getter) else globals().get("__omp_run_id__")
|
|
394
|
+
payload = json.dumps(
|
|
395
|
+
{"session": session, "run": _run_id, "name": name, "args": args}
|
|
396
|
+
).encode("utf-8")
|
|
397
|
+
req = urllib.request.Request(
|
|
398
|
+
f"{base}/v1/tool",
|
|
399
|
+
data=payload,
|
|
400
|
+
method="POST",
|
|
401
|
+
headers={
|
|
402
|
+
"Content-Type": "application/json",
|
|
403
|
+
"Authorization": f"Bearer {token}",
|
|
404
|
+
},
|
|
405
|
+
)
|
|
406
|
+
try:
|
|
407
|
+
with urllib.request.urlopen(req) as resp:
|
|
408
|
+
body = resp.read()
|
|
409
|
+
except urllib.error.HTTPError as exc:
|
|
410
|
+
body = exc.read()
|
|
411
|
+
try:
|
|
412
|
+
data = json.loads(body)
|
|
413
|
+
except json.JSONDecodeError:
|
|
414
|
+
raise RuntimeError(
|
|
415
|
+
f"bridge call {name!r}: non-JSON response: {body[:200]!r}"
|
|
416
|
+
) from None
|
|
417
|
+
if not isinstance(data, dict) or not data.get("ok"):
|
|
418
|
+
msg = (data or {}).get("error") if isinstance(data, dict) else None
|
|
419
|
+
raise RuntimeError(msg or f"bridge call {name!r} failed")
|
|
420
|
+
return data.get("value")
|
|
421
|
+
|
|
388
422
|
class _ToolCallable:
|
|
389
423
|
"""Invokes one host-side tool via the loopback HTTP bridge."""
|
|
390
424
|
|
|
@@ -397,7 +431,6 @@ if "__omp_prelude_loaded__" not in globals():
|
|
|
397
431
|
return f"<tool.{self._name}>"
|
|
398
432
|
|
|
399
433
|
def __call__(self, args=None, /, **kwargs):
|
|
400
|
-
import urllib.request, urllib.error
|
|
401
434
|
if args is None:
|
|
402
435
|
merged: dict = {}
|
|
403
436
|
elif isinstance(args, dict):
|
|
@@ -409,36 +442,7 @@ if "__omp_prelude_loaded__" not in globals():
|
|
|
409
442
|
merged.update(kwargs)
|
|
410
443
|
if "_i" not in merged:
|
|
411
444
|
merged["_i"] = "py prelude"
|
|
412
|
-
|
|
413
|
-
_run_id_getter = globals().get("__omp_current_run_id__")
|
|
414
|
-
_run_id = _run_id_getter() if callable(_run_id_getter) else globals().get("__omp_run_id__")
|
|
415
|
-
payload = json.dumps(
|
|
416
|
-
{"session": session, "run": _run_id, "name": self._name, "args": merged}
|
|
417
|
-
).encode("utf-8")
|
|
418
|
-
req = urllib.request.Request(
|
|
419
|
-
f"{base}/v1/tool",
|
|
420
|
-
data=payload,
|
|
421
|
-
method="POST",
|
|
422
|
-
headers={
|
|
423
|
-
"Content-Type": "application/json",
|
|
424
|
-
"Authorization": f"Bearer {token}",
|
|
425
|
-
},
|
|
426
|
-
)
|
|
427
|
-
try:
|
|
428
|
-
with urllib.request.urlopen(req) as resp:
|
|
429
|
-
body = resp.read()
|
|
430
|
-
except urllib.error.HTTPError as exc:
|
|
431
|
-
body = exc.read()
|
|
432
|
-
try:
|
|
433
|
-
data = json.loads(body)
|
|
434
|
-
except json.JSONDecodeError:
|
|
435
|
-
raise RuntimeError(
|
|
436
|
-
f"tool.{self._name}: bridge returned non-JSON response: {body[:200]!r}"
|
|
437
|
-
) from None
|
|
438
|
-
if not isinstance(data, dict) or not data.get("ok"):
|
|
439
|
-
msg = (data or {}).get("error") if isinstance(data, dict) else None
|
|
440
|
-
raise RuntimeError(msg or f"tool.{self._name} failed")
|
|
441
|
-
return data.get("value")
|
|
445
|
+
return _bridge_call(self._name, merged)
|
|
442
446
|
|
|
443
447
|
class _ToolProxy:
|
|
444
448
|
"""`tool.<name>(args)` proxy mirroring the JS runtime bridge."""
|
|
@@ -458,3 +462,20 @@ if "__omp_prelude_loaded__" not in globals():
|
|
|
458
462
|
return f"<tool proxy session={session}>" if session else "<tool proxy unavailable>"
|
|
459
463
|
|
|
460
464
|
tool = _ToolProxy()
|
|
465
|
+
|
|
466
|
+
def llm(prompt, *, model="default", system=None, schema=None):
|
|
467
|
+
"""Oneshot, stateless LLM call against a model tier.
|
|
468
|
+
|
|
469
|
+
`model` selects a tier: "smol", "default" (the session's active model),
|
|
470
|
+
or "slow". Pass `system` for a system prompt. Pass a JSON-Schema dict
|
|
471
|
+
as `schema` to force a structured response; the parsed object is then
|
|
472
|
+
returned instead of the completion text.
|
|
473
|
+
"""
|
|
474
|
+
args = {"prompt": prompt, "model": model}
|
|
475
|
+
if system is not None:
|
|
476
|
+
args["system"] = system
|
|
477
|
+
if schema is not None:
|
|
478
|
+
args["schema"] = schema
|
|
479
|
+
res = _bridge_call("__llm__", args)
|
|
480
|
+
text = res.get("text") if isinstance(res, dict) else res
|
|
481
|
+
return json.loads(text) if schema is not None else text
|