@oh-my-pi/pi-coding-agent 15.5.13 → 15.5.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/CHANGELOG.md +31 -0
  2. package/dist/types/config/model-registry.d.ts +1 -1
  3. package/dist/types/config/models-config-schema.d.ts +2 -0
  4. package/dist/types/config/settings-schema.d.ts +1 -10
  5. package/dist/types/eval/__tests__/llm-bridge.test.d.ts +1 -0
  6. package/dist/types/eval/llm-bridge.d.ts +25 -0
  7. package/dist/types/export/html/template.generated.d.ts +1 -1
  8. package/dist/types/extensibility/plugins/legacy-pi-compat.d.ts +15 -0
  9. package/dist/types/modes/theme/theme.d.ts +2 -1
  10. package/dist/types/session/agent-session.d.ts +2 -0
  11. package/dist/types/tools/index.d.ts +0 -1
  12. package/package.json +8 -8
  13. package/src/config/model-registry.ts +89 -5
  14. package/src/config/models-config-schema.ts +1 -1
  15. package/src/config/settings-schema.ts +1 -10
  16. package/src/eval/__tests__/llm-bridge.test.ts +297 -0
  17. package/src/eval/js/shared/prelude.txt +8 -0
  18. package/src/eval/js/tool-bridge.ts +4 -0
  19. package/src/eval/llm-bridge.ts +181 -0
  20. package/src/eval/py/prelude.py +52 -31
  21. package/src/export/html/template.generated.ts +1 -1
  22. package/src/export/html/template.js +0 -13
  23. package/src/extensibility/plugins/legacy-pi-compat.ts +60 -23
  24. package/src/internal-urls/docs-index.generated.ts +3 -4
  25. package/src/main.ts +4 -0
  26. package/src/modes/components/model-selector.ts +119 -22
  27. package/src/modes/components/status-line/presets.ts +1 -0
  28. package/src/modes/components/status-line/segments.ts +23 -0
  29. package/src/modes/interactive-mode.ts +22 -87
  30. package/src/modes/theme/theme.ts +7 -0
  31. package/src/prompts/tools/eval.md +2 -0
  32. package/src/session/agent-session.ts +19 -0
  33. package/src/session/session-manager.ts +47 -0
  34. package/src/tools/eval.ts +24 -48
  35. package/src/tools/index.ts +0 -4
  36. package/src/tools/renderers.ts +0 -2
  37. package/dist/types/tools/calculator.d.ts +0 -77
  38. package/src/prompts/tools/calculator.md +0 -10
  39. package/src/tools/calculator.ts +0 -541
@@ -0,0 +1,181 @@
1
+ /**
2
+ * Host-side handler for the eval `llm()` helper.
3
+ *
4
+ * Both eval runtimes (JS worker + Python kernel) route helper→host calls
5
+ * through {@link callSessionTool}. Reserving the synthetic tool name
6
+ * {@link EVAL_LLM_BRIDGE_NAME} lets a single host handler serve both
7
+ * transports without registering an agent-visible tool: cell code calls
8
+ * `llm(prompt, opts)`, the prelude forwards `{ prompt, model, system?, schema? }`
9
+ * through the bridge, and this module performs one stateless completion.
10
+ *
11
+ * The call is oneshot and toolless from the model's perspective — pure text
12
+ * in, text (or, with `schema`, a structured object) out.
13
+ */
14
+ import { instrumentedCompleteSimple, resolveTelemetry } from "@oh-my-pi/pi-agent-core";
15
+ import { type Api, Effort, getSupportedEfforts, type Model, type Tool } from "@oh-my-pi/pi-ai";
16
+ import * as z from "zod/v4";
17
+ import { extractTextContent, extractToolCall, parseJsonPayload } from "../commit/utils";
18
+ import { expandRoleAlias, formatModelString, resolveModelFromString } from "../config/model-resolver";
19
+ import type { ToolSession } from "../tools";
20
+ import { ToolError } from "../tools/tool-errors";
21
+ import type { JsStatusEvent } from "./js/shared/types";
22
+
23
+ /** Synthetic bridge name reserved for the `llm()` helper across both runtimes. */
24
+ export const EVAL_LLM_BRIDGE_NAME = "__llm__";
25
+
26
+ /** Synthetic tool the model is forced to call when a `schema` is supplied. */
27
+ const STRUCTURED_TOOL_NAME = "respond";
28
+
29
+ type LlmTier = "smol" | "default" | "slow";
30
+
31
+ const TIER_TO_PATTERN: Record<LlmTier, string> = {
32
+ smol: "pi/smol",
33
+ default: "pi/default",
34
+ slow: "pi/slow",
35
+ };
36
+
37
+ const llmArgsSchema = z.object({
38
+ prompt: z.string().min(1, "prompt must be a non-empty string"),
39
+ model: z.enum(["smol", "default", "slow"]).default("default"),
40
+ system: z.string().optional(),
41
+ schema: z.record(z.string(), z.unknown()).optional(),
42
+ });
43
+
44
+ export interface EvalLlmBridgeOptions {
45
+ session: ToolSession;
46
+ signal?: AbortSignal;
47
+ emitStatus?: (event: JsStatusEvent) => void;
48
+ }
49
+
50
+ export interface EvalLlmResult {
51
+ text: string;
52
+ details: { model: string; tier: LlmTier; structured: boolean };
53
+ }
54
+
55
+ /**
56
+ * Resolve a tier to a concrete {@link Model}. `default` prefers the session's
57
+ * active model and falls back to the `pi/default` role; `smol`/`slow` resolve
58
+ * their respective role patterns. Returns `undefined` when nothing matches.
59
+ */
60
+ function resolveTierModel(tier: LlmTier, session: ToolSession): Model<Api> | undefined {
61
+ const modelRegistry = session.modelRegistry;
62
+ if (!modelRegistry) return undefined;
63
+ const available = modelRegistry.getAvailable();
64
+ if (available.length === 0) return undefined;
65
+
66
+ const matchPreferences = { usageOrder: session.settings.getStorage()?.getModelUsageOrder() };
67
+ const resolve = (pattern: string | undefined): Model<Api> | undefined => {
68
+ if (!pattern) return undefined;
69
+ const expanded = expandRoleAlias(pattern, session.settings);
70
+ return resolveModelFromString(expanded, available, matchPreferences, modelRegistry);
71
+ };
72
+
73
+ if (tier === "default") {
74
+ const activePattern = session.getActiveModelString?.() ?? session.getModelString?.();
75
+ return resolve(activePattern) ?? resolve(TIER_TO_PATTERN.default);
76
+ }
77
+ return resolve(TIER_TO_PATTERN[tier]);
78
+ }
79
+
80
+ /**
81
+ * Choose the reasoning effort for a tier. Only `slow` opts into thinking, and
82
+ * only on reasoning-capable models — guarding against `requireSupportedEffort`
83
+ * throwing downstream on models that cannot reason. Clamps to the highest
84
+ * supported effort so a reasoning model without `high` does not 400.
85
+ */
86
+ function reasoningForTier(tier: LlmTier, model: Model<Api>): Effort | undefined {
87
+ if (tier !== "slow" || !model.reasoning) return undefined;
88
+ const efforts = getSupportedEfforts(model);
89
+ if (efforts.length === 0) return undefined;
90
+ return efforts.includes(Effort.High) ? Effort.High : efforts[efforts.length - 1];
91
+ }
92
+
93
+ /**
94
+ * Run a single stateless completion on behalf of an eval cell's `llm()` call.
95
+ * Returns a `{ text, details }` value shaped like a {@link callSessionTool}
96
+ * result so the existing bridge transport carries it to either runtime.
97
+ */
98
+ export async function runEvalLlm(args: unknown, options: EvalLlmBridgeOptions): Promise<EvalLlmResult> {
99
+ const parsed = llmArgsSchema.safeParse(args);
100
+ if (!parsed.success) {
101
+ const issue = parsed.error.issues[0];
102
+ const where = issue?.path.length ? `${issue.path.join(".")}: ` : "";
103
+ throw new ToolError(`llm() received invalid arguments: ${where}${issue?.message ?? "bad input"}`);
104
+ }
105
+ const { prompt, model: tier, system, schema } = parsed.data;
106
+
107
+ const model = resolveTierModel(tier, options.session);
108
+ if (!model) {
109
+ throw new ToolError(
110
+ `llm() could not resolve a model for the "${tier}" tier. Configure modelRoles.${tier === "default" ? "default" : tier} or ensure a provider is available.`,
111
+ );
112
+ }
113
+
114
+ const apiKey = await options.session.modelRegistry?.getApiKey(model);
115
+ if (!apiKey) {
116
+ throw new ToolError(
117
+ `llm() has no API key for ${formatModelString(model)}. Configure credentials for this provider or choose another tier.`,
118
+ );
119
+ }
120
+
121
+ const tools: Tool[] | undefined = schema
122
+ ? [
123
+ {
124
+ name: STRUCTURED_TOOL_NAME,
125
+ description: "Return your answer by calling this tool with the requested structured fields.",
126
+ parameters: schema,
127
+ strict: false,
128
+ },
129
+ ]
130
+ : undefined;
131
+
132
+ const telemetry = resolveTelemetry(options.session.getTelemetry?.(), options.session.getSessionId?.() ?? undefined);
133
+
134
+ const response = await instrumentedCompleteSimple(
135
+ model,
136
+ {
137
+ systemPrompt: system ? [system] : undefined,
138
+ messages: [{ role: "user", content: [{ type: "text", text: prompt }], timestamp: Date.now() }],
139
+ tools,
140
+ },
141
+ {
142
+ apiKey,
143
+ signal: options.signal,
144
+ reasoning: reasoningForTier(tier, model),
145
+ toolChoice: schema ? { type: "tool", name: STRUCTURED_TOOL_NAME } : undefined,
146
+ },
147
+ { telemetry, oneshotKind: "eval_llm" },
148
+ );
149
+
150
+ if (response.stopReason === "error") {
151
+ throw new ToolError(response.errorMessage ?? "llm() request failed.");
152
+ }
153
+ if (response.stopReason === "aborted") {
154
+ throw new ToolError("llm() request aborted.");
155
+ }
156
+
157
+ let resultText: string;
158
+ if (schema) {
159
+ const call = extractToolCall(response, STRUCTURED_TOOL_NAME);
160
+ let value: unknown;
161
+ if (call) {
162
+ value = call.arguments;
163
+ } else {
164
+ const text = extractTextContent(response);
165
+ if (!text) throw new ToolError("llm() returned no structured response.");
166
+ try {
167
+ value = parseJsonPayload(text);
168
+ } catch {
169
+ throw new ToolError("llm() did not return a structured response matching the schema.");
170
+ }
171
+ }
172
+ resultText = JSON.stringify(value);
173
+ } else {
174
+ resultText = extractTextContent(response);
175
+ if (!resultText) throw new ToolError("llm() returned no text output.");
176
+ }
177
+
178
+ options.emitStatus?.({ op: "llm", model: formatModelString(model), tier, chars: resultText.length });
179
+
180
+ return { text: resultText, details: { model: formatModelString(model), tier, structured: Boolean(schema) } };
181
+ }
@@ -385,6 +385,40 @@ if "__omp_prelude_loaded__" not in globals():
385
385
  raise RuntimeError("tool bridge is unavailable in this kernel")
386
386
  return (base.rstrip("/"), token, session)
387
387
 
388
+ def _bridge_call(name: str, args: dict):
389
+ """POST one request to the host tool bridge and return its `value`."""
390
+ import urllib.request, urllib.error
391
+ base, token, session = _tool_proxy_from_env()
392
+ _run_id_getter = globals().get("__omp_current_run_id__")
393
+ _run_id = _run_id_getter() if callable(_run_id_getter) else globals().get("__omp_run_id__")
394
+ payload = json.dumps(
395
+ {"session": session, "run": _run_id, "name": name, "args": args}
396
+ ).encode("utf-8")
397
+ req = urllib.request.Request(
398
+ f"{base}/v1/tool",
399
+ data=payload,
400
+ method="POST",
401
+ headers={
402
+ "Content-Type": "application/json",
403
+ "Authorization": f"Bearer {token}",
404
+ },
405
+ )
406
+ try:
407
+ with urllib.request.urlopen(req) as resp:
408
+ body = resp.read()
409
+ except urllib.error.HTTPError as exc:
410
+ body = exc.read()
411
+ try:
412
+ data = json.loads(body)
413
+ except json.JSONDecodeError:
414
+ raise RuntimeError(
415
+ f"bridge call {name!r}: non-JSON response: {body[:200]!r}"
416
+ ) from None
417
+ if not isinstance(data, dict) or not data.get("ok"):
418
+ msg = (data or {}).get("error") if isinstance(data, dict) else None
419
+ raise RuntimeError(msg or f"bridge call {name!r} failed")
420
+ return data.get("value")
421
+
388
422
  class _ToolCallable:
389
423
  """Invokes one host-side tool via the loopback HTTP bridge."""
390
424
 
@@ -397,7 +431,6 @@ if "__omp_prelude_loaded__" not in globals():
397
431
  return f"<tool.{self._name}>"
398
432
 
399
433
  def __call__(self, args=None, /, **kwargs):
400
- import urllib.request, urllib.error
401
434
  if args is None:
402
435
  merged: dict = {}
403
436
  elif isinstance(args, dict):
@@ -409,36 +442,7 @@ if "__omp_prelude_loaded__" not in globals():
409
442
  merged.update(kwargs)
410
443
  if "_i" not in merged:
411
444
  merged["_i"] = "py prelude"
412
- base, token, session = _tool_proxy_from_env()
413
- _run_id_getter = globals().get("__omp_current_run_id__")
414
- _run_id = _run_id_getter() if callable(_run_id_getter) else globals().get("__omp_run_id__")
415
- payload = json.dumps(
416
- {"session": session, "run": _run_id, "name": self._name, "args": merged}
417
- ).encode("utf-8")
418
- req = urllib.request.Request(
419
- f"{base}/v1/tool",
420
- data=payload,
421
- method="POST",
422
- headers={
423
- "Content-Type": "application/json",
424
- "Authorization": f"Bearer {token}",
425
- },
426
- )
427
- try:
428
- with urllib.request.urlopen(req) as resp:
429
- body = resp.read()
430
- except urllib.error.HTTPError as exc:
431
- body = exc.read()
432
- try:
433
- data = json.loads(body)
434
- except json.JSONDecodeError:
435
- raise RuntimeError(
436
- f"tool.{self._name}: bridge returned non-JSON response: {body[:200]!r}"
437
- ) from None
438
- if not isinstance(data, dict) or not data.get("ok"):
439
- msg = (data or {}).get("error") if isinstance(data, dict) else None
440
- raise RuntimeError(msg or f"tool.{self._name} failed")
441
- return data.get("value")
445
+ return _bridge_call(self._name, merged)
442
446
 
443
447
  class _ToolProxy:
444
448
  """`tool.<name>(args)` proxy mirroring the JS runtime bridge."""
@@ -458,3 +462,20 @@ if "__omp_prelude_loaded__" not in globals():
458
462
  return f"<tool proxy session={session}>" if session else "<tool proxy unavailable>"
459
463
 
460
464
  tool = _ToolProxy()
465
+
466
+ def llm(prompt, *, model="default", system=None, schema=None):
467
+ """Oneshot, stateless LLM call against a model tier.
468
+
469
+ `model` selects a tier: "smol", "default" (the session's active model),
470
+ or "slow". Pass `system` for a system prompt. Pass a JSON-Schema dict
471
+ as `schema` to force a structured response; the parsed object is then
472
+ returned instead of the completion text.
473
+ """
474
+ args = {"prompt": prompt, "model": model}
475
+ if system is not None:
476
+ args["system"] = system
477
+ if schema is not None:
478
+ args["schema"] = schema
479
+ res = _bridge_call("__llm__", args)
480
+ text = res.get("text") if isinstance(res, dict) else res
481
+ return json.loads(text) if schema is not None else text