@oh-my-pi/pi-coding-agent 15.5.13 → 15.5.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +31 -0
- package/dist/types/config/model-registry.d.ts +1 -1
- package/dist/types/config/models-config-schema.d.ts +2 -0
- package/dist/types/config/settings-schema.d.ts +1 -10
- package/dist/types/eval/__tests__/llm-bridge.test.d.ts +1 -0
- package/dist/types/eval/llm-bridge.d.ts +25 -0
- package/dist/types/export/html/template.generated.d.ts +1 -1
- package/dist/types/extensibility/plugins/legacy-pi-compat.d.ts +15 -0
- package/dist/types/modes/theme/theme.d.ts +2 -1
- package/dist/types/session/agent-session.d.ts +2 -0
- package/dist/types/tools/index.d.ts +0 -1
- package/package.json +8 -8
- package/src/config/model-registry.ts +89 -5
- package/src/config/models-config-schema.ts +1 -1
- package/src/config/settings-schema.ts +1 -10
- package/src/eval/__tests__/llm-bridge.test.ts +297 -0
- package/src/eval/js/shared/prelude.txt +8 -0
- package/src/eval/js/tool-bridge.ts +4 -0
- package/src/eval/llm-bridge.ts +181 -0
- package/src/eval/py/prelude.py +52 -31
- package/src/export/html/template.generated.ts +1 -1
- package/src/export/html/template.js +0 -13
- package/src/extensibility/plugins/legacy-pi-compat.ts +60 -23
- package/src/internal-urls/docs-index.generated.ts +3 -4
- package/src/main.ts +4 -0
- package/src/modes/components/model-selector.ts +119 -22
- package/src/modes/components/status-line/presets.ts +1 -0
- package/src/modes/components/status-line/segments.ts +23 -0
- package/src/modes/interactive-mode.ts +22 -87
- package/src/modes/theme/theme.ts +7 -0
- package/src/prompts/tools/eval.md +2 -0
- package/src/session/agent-session.ts +19 -0
- package/src/session/session-manager.ts +47 -0
- package/src/tools/eval.ts +24 -48
- package/src/tools/index.ts +0 -4
- package/src/tools/renderers.ts +0 -2
- package/dist/types/tools/calculator.d.ts +0 -77
- package/src/prompts/tools/calculator.md +0 -10
- package/src/tools/calculator.ts +0 -541
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,37 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## [15.5.15] - 2026-05-30
|
|
6
|
+
### Changed
|
|
7
|
+
|
|
8
|
+
- Enabled the agent loop's tool-call batch cap for Anthropic Claude sessions, cutting oversized streamed tool-use bursts into runnable batches before continuing the conversation.
|
|
9
|
+
|
|
10
|
+
### Removed
|
|
11
|
+
|
|
12
|
+
- Removed the `calc` tool (deterministic arithmetic evaluator) and its `calc.enabled` setting. The model can compute via `eval` instead.
|
|
13
|
+
|
|
14
|
+
### Fixed
|
|
15
|
+
|
|
16
|
+
- Fixed Anthropic Claude tool-call batching to clear and reapply the Claude-specific batch cap whenever the session model changes
|
|
17
|
+
|
|
18
|
+
## [15.5.14] - 2026-05-29
|
|
19
|
+
### Added
|
|
20
|
+
|
|
21
|
+
- Added progress status output for `llm()` calls in `eval`, including the resolved model, tier, and returned character count
|
|
22
|
+
- Added an `llm(prompt, opts)` helper to both `eval` runtimes (JavaScript and Python) for oneshot, stateless LLM calls. `opts.model` selects a tier — `"smol"` (`pi/smol`), `"default"` (the session's active model, falling back to `pi/default`), or `"slow"` (`pi/slow`, with high reasoning effort on reasoning-capable models). Pass `system` for a system prompt and a plain JSON-Schema `schema` to force a structured response (the helper returns the parsed object instead of the completion string). Calls carry no conversation history and expose no agent-visible tools; they route host-side through the existing tool bridge under the reserved name `__llm__` (`packages/coding-agent/src/eval/llm-bridge.ts`).
|
|
23
|
+
|
|
24
|
+
### Fixed
|
|
25
|
+
|
|
26
|
+
- Fixed a rewind/restore loop (and a follow-on handoff failure) caused by assistant turns whose tool results are off the resolved conversation path — e.g. selecting such a turn in `/tree`, restoring a session whose head is a mid-batch turn, or branching a new message in right after a turn whose tool calls hadn't resolved on that branch. `buildSessionContext` walks the leaf→root path, so any turn whose `tool_result` children live on a sibling branch (or below the leaf) ends up with **dangling** `tool_use` blocks. `transformMessages` then fabricated one synthetic `"aborted"`/`"No result provided"` result per dangling call plus a `<turn-aborted>` developer note, which both rendered as phantom failed calls on a turn that "hadn't run anything yet" and re-injected the failed batch into the model's context, prompting it to re-issue the batch (the spiral). `buildSessionContext` now rewrites **every** assistant turn on the resolved path that has dangling `tool_use`: it drops the unpaired `tool_use` blocks, drops `redacted_thinking` blocks, and clears `thinking` signatures (the provider encoder then emits them as plain text), dropping a turn entirely if no content remains. Turns whose tool calls *are* paired on the path are left untouched. Stripping the calls alone was insufficient — a *modified* assistant turn that still carried signed `thinking`/`redacted_thinking` was rejected by Anthropic with `messages.N.content.M: 'thinking' or 'redacted_thinking' blocks in the latest assistant message cannot be modified`, which surfaced as `Handoff generation failed: 400` on navigation. Live turns are unaffected — their results persist on the same path before any context rebuild.
|
|
27
|
+
|
|
28
|
+
- Fixed external extension loading on Windows compiled binaries: bare `@oh-my-pi/pi-*` value imports (e.g. `import { AssistantMessageEventStream } from "@oh-my-pi/pi-ai"`) failed with `Cannot find package '\$bunfs\root\packages\…'` because `legacy-pi-compat.ts` built shim override paths from a hardcoded POSIX `/$bunfs/root/packages` literal. Win32 normalised the leading slash to a backslash and the resulting path never resolved against the real bunfs mount (`<drive>:\~BUN\root\…`). The bunfs package root is now derived from `import.meta.dir`, so override paths stay platform-native on Windows, Linux, and macOS ([#1514](https://github.com/can1357/oh-my-pi/issues/1514)).
|
|
29
|
+
- Fixed the interactive prompt showing no cursor in Ghostty. A prior change wired the editor's cursor mode to a new `getUseTerminalCursorMarker()` (which always reported the *requested* preference) instead of the resolved hardware-cursor visibility, so when Ghostty force-hid the hardware cursor the editor stayed in terminal-cursor (marker-only) mode and drew no glyph — leaving no visible caret with either `showHardwareCursor`/`PI_HARDWARE_CURSOR` value. The editor now follows `ui.getShowHardwareCursor()`: a hidden hardware cursor falls back to the steady software-cursor glyph (which still emits `CURSOR_MARKER` for IME positioning).
|
|
30
|
+
|
|
31
|
+
### Changed
|
|
32
|
+
|
|
33
|
+
- Changed the `eval` tool's `display()` JSON tree in the transcript to use the shared `renderJsonTreeLines` renderer (the same one behind tool args, MCP results, and subagent output) instead of its own format. This drops the redundant `Object(N)` / `Array(N)` type labels and the per-output `JSON output N` header in favor of type icons plus bare keys; the `display[N]` header is now shown only when a cell emits more than one `display()` value.
|
|
34
|
+
- Reverted the sticky `Todos` panel task glyphs to the pre-15.5.12 checkbox icons: completed tasks render `theme.checkbox.checked` (not `theme.status.success`) and in-progress tasks render `theme.checkbox.unchecked` (not the running glyph). Removed the animated spinner entirely — in-progress tasks and pending tasks with a matching in-flight subagent still highlight via the `accent` colour, but the panel now paints once per state change instead of on an 80 ms timer. Subagent auto-checkmarking, the advancing window (`selectStickyTodoWindow`), `todoMatchesAnyDescription` highlighting, and the all-done close animation are unchanged.
|
|
35
|
+
|
|
5
36
|
## [15.5.13] - 2026-05-29
|
|
6
37
|
### Breaking Changes
|
|
7
38
|
|
|
@@ -78,7 +78,7 @@ export declare const ModelsConfigFile: ConfigFile<{
|
|
|
78
78
|
authHeader?: boolean | undefined;
|
|
79
79
|
auth?: "apiKey" | "none" | "oauth" | undefined;
|
|
80
80
|
discovery?: {
|
|
81
|
-
type: "llama.cpp" | "lm-studio" | "ollama" | "openai-models-list";
|
|
81
|
+
type: "llama.cpp" | "lm-studio" | "ollama" | "openai-models-list" | "proxy";
|
|
82
82
|
} | undefined;
|
|
83
83
|
models?: {
|
|
84
84
|
id: string;
|
|
@@ -170,6 +170,7 @@ export declare const ProviderDiscoverySchema: z.ZodObject<{
|
|
|
170
170
|
"lm-studio": "lm-studio";
|
|
171
171
|
ollama: "ollama";
|
|
172
172
|
"openai-models-list": "openai-models-list";
|
|
173
|
+
proxy: "proxy";
|
|
173
174
|
}>;
|
|
174
175
|
}, z.core.$strip>;
|
|
175
176
|
export declare const ProviderAuthSchema: z.ZodEnum<{
|
|
@@ -259,6 +260,7 @@ export declare const ModelsConfigSchema: z.ZodObject<{
|
|
|
259
260
|
"lm-studio": "lm-studio";
|
|
260
261
|
ollama: "ollama";
|
|
261
262
|
"openai-models-list": "openai-models-list";
|
|
263
|
+
proxy: "proxy";
|
|
262
264
|
}>;
|
|
263
265
|
}, z.core.$strip>>;
|
|
264
266
|
models: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
@@ -23,7 +23,7 @@ export declare const TAB_METADATA: Record<SettingTab, {
|
|
|
23
23
|
icon: `tab.${string}`;
|
|
24
24
|
}>;
|
|
25
25
|
/** Status line segment identifiers */
|
|
26
|
-
export type StatusLineSegmentId = "pi" | "model" | "mode" | "path" | "git" | "pr" | "subagents" | "token_in" | "token_out" | "token_total" | "token_rate" | "cost" | "context_pct" | "context_total" | "time_spent" | "time" | "session" | "hostname" | "cache_read" | "cache_write" | "session_name" | "usage";
|
|
26
|
+
export type StatusLineSegmentId = "pi" | "model" | "mode" | "path" | "git" | "pr" | "subagents" | "token_in" | "token_out" | "token_total" | "token_rate" | "cost" | "context_pct" | "context_total" | "time_spent" | "time" | "session" | "hostname" | "cache_read" | "cache_write" | "cache_hit" | "session_name" | "usage";
|
|
27
27
|
/** Submenu choice metadata. */
|
|
28
28
|
export type SubmenuOption<V extends string = string> = {
|
|
29
29
|
value: V;
|
|
@@ -2375,15 +2375,6 @@ export declare const SETTINGS_SCHEMA: {
|
|
|
2375
2375
|
readonly description: "Enable the debug tool for DAP-based debugging";
|
|
2376
2376
|
};
|
|
2377
2377
|
};
|
|
2378
|
-
readonly "calc.enabled": {
|
|
2379
|
-
readonly type: "boolean";
|
|
2380
|
-
readonly default: false;
|
|
2381
|
-
readonly ui: {
|
|
2382
|
-
readonly tab: "tools";
|
|
2383
|
-
readonly label: "Calculator";
|
|
2384
|
-
readonly description: "Enable the calculator tool for basic calculations";
|
|
2385
|
-
};
|
|
2386
|
-
};
|
|
2387
2378
|
readonly "tts.enabled": {
|
|
2388
2379
|
readonly type: "boolean";
|
|
2389
2380
|
readonly default: false;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import type { ToolSession } from "../tools";
|
|
2
|
+
import type { JsStatusEvent } from "./js/shared/types";
|
|
3
|
+
/** Synthetic bridge name reserved for the `llm()` helper across both runtimes. */
|
|
4
|
+
export declare const EVAL_LLM_BRIDGE_NAME = "__llm__";
|
|
5
|
+
type LlmTier = "smol" | "default" | "slow";
|
|
6
|
+
export interface EvalLlmBridgeOptions {
|
|
7
|
+
session: ToolSession;
|
|
8
|
+
signal?: AbortSignal;
|
|
9
|
+
emitStatus?: (event: JsStatusEvent) => void;
|
|
10
|
+
}
|
|
11
|
+
export interface EvalLlmResult {
|
|
12
|
+
text: string;
|
|
13
|
+
details: {
|
|
14
|
+
model: string;
|
|
15
|
+
tier: LlmTier;
|
|
16
|
+
structured: boolean;
|
|
17
|
+
};
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Run a single stateless completion on behalf of an eval cell's `llm()` call.
|
|
21
|
+
* Returns a `{ text, details }` value shaped like a {@link callSessionTool}
|
|
22
|
+
* result so the existing bridge transport carries it to either runtime.
|
|
23
|
+
*/
|
|
24
|
+
export declare function runEvalLlm(args: unknown, options: EvalLlmBridgeOptions): Promise<EvalLlmResult>;
|
|
25
|
+
export {};
|