@oh-my-pi/pi-coding-agent 15.5.12 → 15.5.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/CHANGELOG.md +46 -0
  2. package/dist/types/config/model-registry.d.ts +1 -1
  3. package/dist/types/config/models-config-schema.d.ts +2 -0
  4. package/dist/types/config/settings-schema.d.ts +1 -10
  5. package/dist/types/edit/file-snapshot-store.d.ts +19 -0
  6. package/dist/types/eval/__tests__/llm-bridge.test.d.ts +1 -0
  7. package/dist/types/eval/llm-bridge.d.ts +25 -0
  8. package/dist/types/export/html/template.generated.d.ts +1 -1
  9. package/dist/types/extensibility/plugins/legacy-pi-compat.d.ts +15 -0
  10. package/dist/types/modes/theme/theme.d.ts +2 -1
  11. package/dist/types/session/agent-session.d.ts +2 -0
  12. package/dist/types/tools/index.d.ts +0 -1
  13. package/package.json +8 -8
  14. package/src/config/model-registry.ts +89 -5
  15. package/src/config/models-config-schema.ts +1 -1
  16. package/src/config/settings-schema.ts +1 -10
  17. package/src/edit/file-snapshot-store.ts +34 -0
  18. package/src/edit/hashline/diff.ts +3 -8
  19. package/src/edit/renderer.ts +1 -1
  20. package/src/eval/__tests__/llm-bridge.test.ts +297 -0
  21. package/src/eval/js/shared/prelude.txt +8 -0
  22. package/src/eval/js/tool-bridge.ts +4 -0
  23. package/src/eval/llm-bridge.ts +181 -0
  24. package/src/eval/py/prelude.py +52 -31
  25. package/src/export/html/template.generated.ts +1 -1
  26. package/src/export/html/template.js +0 -13
  27. package/src/extensibility/plugins/legacy-pi-compat.ts +60 -23
  28. package/src/internal-urls/docs-index.generated.ts +4 -5
  29. package/src/main.ts +4 -0
  30. package/src/modes/components/model-selector.ts +119 -22
  31. package/src/modes/components/status-line/presets.ts +1 -0
  32. package/src/modes/components/status-line/segments.ts +23 -0
  33. package/src/modes/interactive-mode.ts +22 -87
  34. package/src/modes/theme/theme.ts +7 -0
  35. package/src/prompts/tools/eval.md +2 -0
  36. package/src/session/agent-session.ts +19 -0
  37. package/src/session/session-manager.ts +47 -0
  38. package/src/tools/ast-edit.ts +1 -1
  39. package/src/tools/ast-grep.ts +6 -17
  40. package/src/tools/eval.ts +24 -48
  41. package/src/tools/index.ts +0 -4
  42. package/src/tools/read.ts +23 -33
  43. package/src/tools/renderers.ts +0 -2
  44. package/src/tools/search.ts +12 -21
  45. package/src/tools/write.ts +1 -3
  46. package/src/utils/file-mentions.ts +1 -3
  47. package/dist/types/tools/calculator.d.ts +0 -77
  48. package/src/prompts/tools/calculator.md +0 -10
  49. package/src/tools/calculator.ts +0 -541
package/CHANGELOG.md CHANGED
@@ -2,6 +2,52 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [15.5.15] - 2026-05-30
6
+ ### Changed
7
+
8
+ - Enabled the agent loop's tool-call batch cap for Anthropic Claude sessions, cutting oversized streamed tool-use bursts into runnable batches before continuing the conversation.
9
+
10
+ ### Removed
11
+
12
+ - Removed the `calc` tool (deterministic arithmetic evaluator) and its `calc.enabled` setting. The model can compute via `eval` instead.
13
+
14
+ ### Fixed
15
+
16
+ - Fixed Anthropic Claude tool-call batching to clear and reapply the Claude-specific batch cap whenever the session model changes
17
+
18
+ ## [15.5.14] - 2026-05-29
19
+ ### Added
20
+
21
+ - Added progress status output for `llm()` calls in `eval`, including the resolved model, tier, and returned character count
22
+ - Added an `llm(prompt, opts)` helper to both `eval` runtimes (JavaScript and Python) for oneshot, stateless LLM calls. `opts.model` selects a tier — `"smol"` (`pi/smol`), `"default"` (the session's active model, falling back to `pi/default`), or `"slow"` (`pi/slow`, with high reasoning effort on reasoning-capable models). Pass `system` for a system prompt and a plain JSON-Schema `schema` to force a structured response (the helper returns the parsed object instead of the completion string). Calls carry no conversation history and expose no agent-visible tools; they route host-side through the existing tool bridge under the reserved name `__llm__` (`packages/coding-agent/src/eval/llm-bridge.ts`).
23
+
24
+ ### Fixed
25
+
26
+ - Fixed a rewind/restore loop (and a follow-on handoff failure) caused by assistant turns whose tool results are off the resolved conversation path — e.g. selecting such a turn in `/tree`, restoring a session whose head is a mid-batch turn, or branching a new message in right after a turn whose tool calls hadn't resolved on that branch. `buildSessionContext` walks the leaf→root path, so any turn whose `tool_result` children live on a sibling branch (or below the leaf) ends up with **dangling** `tool_use` blocks. `transformMessages` then fabricated one synthetic `"aborted"`/`"No result provided"` result per dangling call plus a `<turn-aborted>` developer note, which both rendered as phantom failed calls on a turn that "hadn't run anything yet" and re-injected the failed batch into the model's context, prompting it to re-issue the batch (the spiral). `buildSessionContext` now rewrites **every** assistant turn on the resolved path that has dangling `tool_use`: it drops the unpaired `tool_use` blocks, drops `redacted_thinking` blocks, and clears `thinking` signatures (the provider encoder then emits them as plain text), dropping a turn entirely if no content remains. Turns whose tool calls *are* paired on the path are left untouched. Stripping the calls alone was insufficient — a *modified* assistant turn that still carried signed `thinking`/`redacted_thinking` was rejected by Anthropic with `messages.N.content.M: 'thinking' or 'redacted_thinking' blocks in the latest assistant message cannot be modified`, which surfaced as `Handoff generation failed: 400` on navigation. Live turns are unaffected — their results persist on the same path before any context rebuild.
27
+
28
+ - Fixed external extension loading on Windows compiled binaries: bare `@oh-my-pi/pi-*` value imports (e.g. `import { AssistantMessageEventStream } from "@oh-my-pi/pi-ai"`) failed with `Cannot find package '\$bunfs\root\packages\…'` because `legacy-pi-compat.ts` built shim override paths from a hardcoded POSIX `/$bunfs/root/packages` literal. Win32 normalised the leading slash to a backslash and the resulting path never resolved against the real bunfs mount (`<drive>:\~BUN\root\…`). The bunfs package root is now derived from `import.meta.dir`, so override paths stay platform-native on Windows, Linux, and macOS ([#1514](https://github.com/can1357/oh-my-pi/issues/1514)).
29
+ - Fixed the interactive prompt showing no cursor in Ghostty. A prior change wired the editor's cursor mode to a new `getUseTerminalCursorMarker()` (which always reported the *requested* preference) instead of the resolved hardware-cursor visibility, so when Ghostty force-hid the hardware cursor the editor stayed in terminal-cursor (marker-only) mode and drew no glyph — leaving no visible caret with either `showHardwareCursor`/`PI_HARDWARE_CURSOR` value. The editor now follows `ui.getShowHardwareCursor()`: a hidden hardware cursor falls back to the steady software-cursor glyph (which still emits `CURSOR_MARKER` for IME positioning).
30
+
31
+ ### Changed
32
+
33
+ - Changed the `eval` tool's `display()` JSON tree in the transcript to use the shared `renderJsonTreeLines` renderer (the same one behind tool args, MCP results, and subagent output) instead of its own format. This drops the redundant `Object(N)` / `Array(N)` type labels and the per-output `JSON output N` header in favor of type icons plus bare keys; the `display[N]` header is now shown only when a cell emits more than one `display()` value.
34
+ - Reverted the sticky `Todos` panel task glyphs to the pre-15.5.12 checkbox icons: completed tasks render `theme.checkbox.checked` (not `theme.status.success`) and in-progress tasks render `theme.checkbox.unchecked` (not the running glyph). Removed the animated spinner entirely — in-progress tasks and pending tasks with a matching in-flight subagent still highlight via the `accent` colour, but the panel now paints once per state change instead of on an 80 ms timer. Subagent auto-checkmarking, the advancing window (`selectStickyTodoWindow`), `todoMatchesAnyDescription` highlighting, and the all-done close animation are unchanged.
35
+
36
+ ## [15.5.13] - 2026-05-29
37
+ ### Breaking Changes
38
+
39
+ - Changed hashline edit syntax to verb-based v4: body-bearing ops are `replace N..M:`, `insert before N:`, `insert after N:`, `insert head:`, and `insert tail:`, while bodyless `delete N..M` handles deletion. Removed `>A..B` repeat rows and the old `prepend:` / `append:` virtual insert headers; `-` rows remain rejected with a teaching error.
40
+
41
+ ### Changed
42
+
43
+ - Changed hashline tag generation to use full-file snapshots for read/search/ast-grep and related outputs, so hashline anchors now validate only when the complete file matches
44
+ - Changed hashline tagging to omit file headers for files over 4 MiB or that cannot be snapshotted, so those files are returned without editable hashline anchors
45
+ - Changed hashline context generation for line edits from partial/sparse snippets to complete-file fingerprints, reducing stale anchors for partially read files
46
+
47
+ ### Fixed
48
+
49
+ - Restored automatic repair of `edit` range hunks that break bracket balance — the failure class that previously left a duplicated closing line (a `</>` / `);` / `}` echoed just below the range) or dropped one (the range swallowed a `});` the payload never restated), leaving the file syntactically broken until a follow-up edit. The hashline applier now normalizes each replacement so its payload preserves the deleted region's delimiter balance, dropping a duplicated bordering closer or sparing a deleted one, and surfaces a warning on the tool result. Always on and balance-validated (no `edit.hashlineAutoDropPureInsertDuplicates` setting); see `@oh-my-pi/hashline` for the contract.
50
+
5
51
  ## [15.5.12] - 2026-05-29
6
52
 
7
53
  ### Added
@@ -78,7 +78,7 @@ export declare const ModelsConfigFile: ConfigFile<{
78
78
  authHeader?: boolean | undefined;
79
79
  auth?: "apiKey" | "none" | "oauth" | undefined;
80
80
  discovery?: {
81
- type: "llama.cpp" | "lm-studio" | "ollama" | "openai-models-list";
81
+ type: "llama.cpp" | "lm-studio" | "ollama" | "openai-models-list" | "proxy";
82
82
  } | undefined;
83
83
  models?: {
84
84
  id: string;
@@ -170,6 +170,7 @@ export declare const ProviderDiscoverySchema: z.ZodObject<{
170
170
  "lm-studio": "lm-studio";
171
171
  ollama: "ollama";
172
172
  "openai-models-list": "openai-models-list";
173
+ proxy: "proxy";
173
174
  }>;
174
175
  }, z.core.$strip>;
175
176
  export declare const ProviderAuthSchema: z.ZodEnum<{
@@ -259,6 +260,7 @@ export declare const ModelsConfigSchema: z.ZodObject<{
259
260
  "lm-studio": "lm-studio";
260
261
  ollama: "ollama";
261
262
  "openai-models-list": "openai-models-list";
263
+ proxy: "proxy";
262
264
  }>;
263
265
  }, z.core.$strip>>;
264
266
  models: z.ZodOptional<z.ZodArray<z.ZodObject<{
@@ -23,7 +23,7 @@ export declare const TAB_METADATA: Record<SettingTab, {
23
23
  icon: `tab.${string}`;
24
24
  }>;
25
25
  /** Status line segment identifiers */
26
- export type StatusLineSegmentId = "pi" | "model" | "mode" | "path" | "git" | "pr" | "subagents" | "token_in" | "token_out" | "token_total" | "token_rate" | "cost" | "context_pct" | "context_total" | "time_spent" | "time" | "session" | "hostname" | "cache_read" | "cache_write" | "session_name" | "usage";
26
+ export type StatusLineSegmentId = "pi" | "model" | "mode" | "path" | "git" | "pr" | "subagents" | "token_in" | "token_out" | "token_total" | "token_rate" | "cost" | "context_pct" | "context_total" | "time_spent" | "time" | "session" | "hostname" | "cache_read" | "cache_write" | "cache_hit" | "session_name" | "usage";
27
27
  /** Submenu choice metadata. */
28
28
  export type SubmenuOption<V extends string = string> = {
29
29
  value: V;
@@ -2375,15 +2375,6 @@ export declare const SETTINGS_SCHEMA: {
2375
2375
  readonly description: "Enable the debug tool for DAP-based debugging";
2376
2376
  };
2377
2377
  };
2378
- readonly "calc.enabled": {
2379
- readonly type: "boolean";
2380
- readonly default: false;
2381
- readonly ui: {
2382
- readonly tab: "tools";
2383
- readonly label: "Calculator";
2384
- readonly description: "Enable the calculator tool for basic calculations";
2385
- };
2386
- };
2387
2378
  readonly "tts.enabled": {
2388
2379
  readonly type: "boolean";
2389
2380
  readonly default: false;
@@ -9,6 +9,13 @@
9
9
  * is wiring it onto the per-session owner object.
10
10
  */
11
11
  import { InMemorySnapshotStore } from "@oh-my-pi/hashline";
12
+ /**
13
+ * Upper bound on the file size we snapshot. A section tag is a content hash of
14
+ * the *whole* file, so minting one means holding the full normalized text in
15
+ * the store. Files above this cap emit no `¶path#tag` header — line-anchored
16
+ * editing of multi-megabyte files is out of scope under the full-content model.
17
+ */
18
+ export declare const SNAPSHOT_MAX_BYTES: number;
12
19
  interface FileSnapshotStoreOwner {
13
20
  fileSnapshotStore?: InMemorySnapshotStore;
14
21
  }
@@ -18,4 +25,16 @@ interface FileSnapshotStoreOwner {
18
25
  * the session itself.
19
26
  */
20
27
  export declare function getFileSnapshotStore(session: FileSnapshotStoreOwner): InMemorySnapshotStore;
28
+ /**
29
+ * Read the full text of `absolutePath` (within {@link SNAPSHOT_MAX_BYTES}),
30
+ * record it as a version snapshot, and return its content-hash tag. Returns
31
+ * `undefined` when the file exceeds the cap or cannot be read — callers then
32
+ * omit the section header so the model never sees a tag it can't anchor against.
33
+ *
34
+ * Producers that only displayed a slice of the file (range reads, search hits)
35
+ * use this to mint a whole-file tag: the displayed lines stay partial, but the
36
+ * tag fingerprints the entire file so a follow-up edit anchored at any line
37
+ * validates whenever the live file is byte-identical to what was read.
38
+ */
39
+ export declare function recordFileSnapshot(session: FileSnapshotStoreOwner, absolutePath: string): Promise<string | undefined>;
21
40
  export {};
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,25 @@
1
+ import type { ToolSession } from "../tools";
2
+ import type { JsStatusEvent } from "./js/shared/types";
3
+ /** Synthetic bridge name reserved for the `llm()` helper across both runtimes. */
4
+ export declare const EVAL_LLM_BRIDGE_NAME = "__llm__";
5
+ type LlmTier = "smol" | "default" | "slow";
6
+ export interface EvalLlmBridgeOptions {
7
+ session: ToolSession;
8
+ signal?: AbortSignal;
9
+ emitStatus?: (event: JsStatusEvent) => void;
10
+ }
11
+ export interface EvalLlmResult {
12
+ text: string;
13
+ details: {
14
+ model: string;
15
+ tier: LlmTier;
16
+ structured: boolean;
17
+ };
18
+ }
19
+ /**
20
+ * Run a single stateless completion on behalf of an eval cell's `llm()` call.
21
+ * Returns a `{ text, details }` value shaped like a {@link callSessionTool}
22
+ * result so the existing bridge transport carries it to either runtime.
23
+ */
24
+ export declare function runEvalLlm(args: unknown, options: EvalLlmBridgeOptions): Promise<EvalLlmResult>;
25
+ export {};