@oh-my-pi/pi-coding-agent 15.5.13 → 15.5.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/CHANGELOG.md +31 -0
  2. package/dist/types/config/model-registry.d.ts +1 -1
  3. package/dist/types/config/models-config-schema.d.ts +2 -0
  4. package/dist/types/config/settings-schema.d.ts +1 -10
  5. package/dist/types/eval/__tests__/llm-bridge.test.d.ts +1 -0
  6. package/dist/types/eval/llm-bridge.d.ts +25 -0
  7. package/dist/types/export/html/template.generated.d.ts +1 -1
  8. package/dist/types/extensibility/plugins/legacy-pi-compat.d.ts +15 -0
  9. package/dist/types/modes/theme/theme.d.ts +2 -1
  10. package/dist/types/session/agent-session.d.ts +2 -0
  11. package/dist/types/tools/index.d.ts +0 -1
  12. package/package.json +8 -8
  13. package/src/config/model-registry.ts +89 -5
  14. package/src/config/models-config-schema.ts +1 -1
  15. package/src/config/settings-schema.ts +1 -10
  16. package/src/eval/__tests__/llm-bridge.test.ts +297 -0
  17. package/src/eval/js/shared/prelude.txt +8 -0
  18. package/src/eval/js/tool-bridge.ts +4 -0
  19. package/src/eval/llm-bridge.ts +181 -0
  20. package/src/eval/py/prelude.py +52 -31
  21. package/src/export/html/template.generated.ts +1 -1
  22. package/src/export/html/template.js +0 -13
  23. package/src/extensibility/plugins/legacy-pi-compat.ts +60 -23
  24. package/src/internal-urls/docs-index.generated.ts +3 -4
  25. package/src/main.ts +4 -0
  26. package/src/modes/components/model-selector.ts +119 -22
  27. package/src/modes/components/status-line/presets.ts +1 -0
  28. package/src/modes/components/status-line/segments.ts +23 -0
  29. package/src/modes/interactive-mode.ts +22 -87
  30. package/src/modes/theme/theme.ts +7 -0
  31. package/src/prompts/tools/eval.md +2 -0
  32. package/src/session/agent-session.ts +19 -0
  33. package/src/session/session-manager.ts +47 -0
  34. package/src/tools/eval.ts +24 -48
  35. package/src/tools/index.ts +0 -4
  36. package/src/tools/renderers.ts +0 -2
  37. package/dist/types/tools/calculator.d.ts +0 -77
  38. package/src/prompts/tools/calculator.md +0 -10
  39. package/src/tools/calculator.ts +0 -541
@@ -702,6 +702,53 @@ export function buildSessionContext(
702
702
  }
703
703
  }
704
704
 
705
+ // Strip dangling tool_use blocks — a tool_use with no matching tool_result on the
706
+ // resolved leaf→root path — from ANY assistant turn, not just the trailing one.
707
+ // This happens whenever the leaf (or a branch point) lands such that an assistant
708
+ // turn's tool results are off the selected path: its result children live on a
709
+ // sibling branch, or it is the leaf itself (results are children below it). Left
710
+ // in place, `transformMessages` fabricates one synthetic "aborted"/"No result
711
+ // provided" result per dangling call plus a `<turn-aborted>` developer note, which
712
+ // render as phantom failed calls and re-inject the failed batch into the model's
713
+ // context — the rewind/restore loop.
714
+ //
715
+ // Stripping is necessary but not sufficient: a *modified* assistant turn that still
716
+ // carries signed `thinking`/`redacted_thinking` is rejected by Anthropic — "thinking
717
+ // blocks in the latest assistant message cannot be modified", and signed thinking
718
+ // replayed out of its original turn shape can also fail signature validation (this
719
+ // bites the handoff/branch-summary request). So when we rewrite a turn we also
720
+ // neutralize its protected reasoning: drop `redactedThinking` (encrypted, no
721
+ // plaintext to keep) and clear `thinking` signatures so the provider encoder
722
+ // downgrades them to plain text (verified accepted by the live API), preserving the
723
+ // visible reasoning while removing the immutability/invalid-signature hazard. Drop a
724
+ // turn left with no content. (Live turns never qualify: their results are persisted
725
+ // on the same path before any context rebuild.)
726
+ const pairedToolResultIds = new Set<string>();
727
+ for (const message of messages) {
728
+ if (message.role === "toolResult") pairedToolResultIds.add(message.toolCallId);
729
+ }
730
+ for (let i = messages.length - 1; i >= 0; i--) {
731
+ const message = messages[i];
732
+ if (message.role !== "assistant") continue;
733
+ const hasDangling = message.content.some(
734
+ block => block.type === "toolCall" && !pairedToolResultIds.has(block.id),
735
+ );
736
+ if (!hasDangling) continue;
737
+ const normalized = message.content
738
+ .filter(
739
+ block =>
740
+ !(block.type === "toolCall" && !pairedToolResultIds.has(block.id)) && block.type !== "redactedThinking",
741
+ )
742
+ .map(block =>
743
+ block.type === "thinking" && block.thinkingSignature ? { ...block, thinkingSignature: undefined } : block,
744
+ );
745
+ if (normalized.length === 0) {
746
+ messages.splice(i, 1);
747
+ } else {
748
+ messages[i] = { ...message, content: normalized };
749
+ }
750
+ }
751
+
705
752
  return {
706
753
  messages,
707
754
  thinkingLevel,
package/src/tools/eval.ts CHANGED
@@ -13,10 +13,19 @@ import { truncateToVisualLines } from "../modes/components/visual-truncate";
13
13
  import { getMarkdownTheme, type Theme } from "../modes/theme/theme";
14
14
  import evalDescription from "../prompts/tools/eval.md" with { type: "text" };
15
15
  import { DEFAULT_MAX_BYTES, OutputSink, type OutputSummary, TailBuffer } from "../session/streaming-output";
16
- import { getTreeBranch, getTreeContinuePrefix, renderCodeCell } from "../tui";
16
+ import { renderCodeCell } from "../tui";
17
17
  import { formatDimensionNote, resizeImage } from "../utils/image-resize";
18
18
  import { resolveEvalBackends, type ToolSession } from ".";
19
19
  import { truncateForPrompt } from "./approval";
20
+ import {
21
+ JSON_TREE_MAX_DEPTH_COLLAPSED,
22
+ JSON_TREE_MAX_DEPTH_EXPANDED,
23
+ JSON_TREE_MAX_LINES_COLLAPSED,
24
+ JSON_TREE_MAX_LINES_EXPANDED,
25
+ JSON_TREE_SCALAR_LEN_COLLAPSED,
26
+ JSON_TREE_SCALAR_LEN_EXPANDED,
27
+ renderJsonTreeLines,
28
+ } from "./json-tree";
20
29
  import {
21
30
  formatStyledTruncationWarning,
22
31
  resolveOutputMaxColumns,
@@ -61,15 +70,6 @@ export type EvalToolResult = {
61
70
 
62
71
  export type EvalProxyExecutor = (params: EvalToolParams, signal?: AbortSignal) => Promise<EvalToolResult>;
63
72
 
64
- function formatJsonScalar(value: unknown): string {
65
- if (value === null) return "null";
66
- if (value === undefined) return "undefined";
67
- if (typeof value === "string") return JSON.stringify(value);
68
- if (typeof value === "number" || typeof value === "boolean" || typeof value === "bigint") return String(value);
69
- if (typeof value === "function") return "[function]";
70
- return "[object]";
71
- }
72
-
73
73
  /** Cap per `display()` value sent back to the model. */
74
74
  const MAX_DISPLAY_TEXT_BYTES = 8000;
75
75
 
@@ -102,41 +102,6 @@ function formatDisplayOutputsForText(outputs: EvalDisplayOutput[]): string {
102
102
  return chunks.join("\n\n");
103
103
  }
104
104
 
105
- function renderJsonTree(value: unknown, theme: Theme, expanded: boolean, maxDepth = expanded ? 6 : 2): string[] {
106
- const maxItems = expanded ? 20 : 5;
107
-
108
- const renderNode = (node: unknown, prefix: string, depth: number, isLast: boolean, label?: string): string[] => {
109
- const branch = getTreeBranch(isLast, theme);
110
- const displayLabel = label ? `${label}: ` : "";
111
-
112
- if (depth >= maxDepth || node === null || typeof node !== "object") {
113
- return [`${prefix}${branch} ${displayLabel}${formatJsonScalar(node)}`];
114
- }
115
-
116
- const isArray = Array.isArray(node);
117
- const entries = isArray
118
- ? node.map((val, index) => [String(index), val] as const)
119
- : Object.entries(node as object);
120
- const header = `${prefix}${branch} ${displayLabel}${isArray ? `Array(${entries.length})` : `Object(${entries.length})`}`;
121
- const lines = [header];
122
-
123
- const childPrefix = prefix + getTreeContinuePrefix(isLast, theme);
124
- const visible = entries.slice(0, maxItems);
125
- for (let i = 0; i < visible.length; i++) {
126
- const [key, val] = visible[i];
127
- const childLast = i === visible.length - 1 && (expanded || entries.length <= maxItems);
128
- lines.push(...renderNode(val, childPrefix, depth + 1, childLast, isArray ? `[${key}]` : key));
129
- }
130
- if (!expanded && entries.length > maxItems) {
131
- const moreBranch = theme.tree.last;
132
- lines.push(`${childPrefix}${moreBranch} ${entries.length - maxItems} more item(s)`);
133
- }
134
- return lines;
135
- };
136
-
137
- return renderNode(value, "", 0, true);
138
- }
139
-
140
105
  export interface EvalToolDescriptionOptions {
141
106
  py?: boolean;
142
107
  js?: boolean;
@@ -669,6 +634,7 @@ function formatStatusEvent(event: EvalStatusEvent, theme: Theme): string {
669
634
  sh: "icon.package",
670
635
  env: "icon.package",
671
636
  batch: "icon.package",
637
+ llm: "icon.package",
672
638
  };
673
639
 
674
640
  const iconKey = opIcons[op] ?? "icon.file";
@@ -735,6 +701,11 @@ function formatStatusEvent(event: EvalStatusEvent, theme: Theme): string {
735
701
  case "batch":
736
702
  parts.push(`${data.files} file${(data.files as number) !== 1 ? "s" : ""} processed`);
737
703
  break;
704
+ case "llm":
705
+ if (data.model) parts.push(String(data.model));
706
+ if (data.tier && data.tier !== data.model) parts.push(`(${data.tier})`);
707
+ parts.push(`${data.chars ?? 0} chars`);
708
+ break;
738
709
  case "wc":
739
710
  parts.push(`${data.lines}L ${data.words}W ${data.chars}C`);
740
711
  break;
@@ -950,10 +921,15 @@ export const evalToolRenderer = {
950
921
  const output = stripOutputNotice(rawOutput, details?.meta).trimEnd();
951
922
 
952
923
  const jsonOutputs = details?.jsonOutputs ?? [];
924
+ const treeExpanded = options.renderContext?.expanded ?? options.expanded;
925
+ const treeDepth = treeExpanded ? JSON_TREE_MAX_DEPTH_EXPANDED : JSON_TREE_MAX_DEPTH_COLLAPSED;
926
+ const treeLineCap = treeExpanded ? JSON_TREE_MAX_LINES_EXPANDED : JSON_TREE_MAX_LINES_COLLAPSED;
927
+ const treeScalarLen = treeExpanded ? JSON_TREE_SCALAR_LEN_EXPANDED : JSON_TREE_SCALAR_LEN_COLLAPSED;
928
+ const labelOutputs = jsonOutputs.length > 1;
953
929
  const jsonLines = jsonOutputs.flatMap((value, index) => {
954
- const header = `JSON output ${index + 1}`;
955
- const treeLines = renderJsonTree(value, uiTheme, options.renderContext?.expanded ?? options.expanded);
956
- return [header, ...treeLines];
930
+ const tree = renderJsonTreeLines(value, uiTheme, treeDepth, treeLineCap, treeScalarLen);
931
+ const body = tree.truncated ? [...tree.lines, uiTheme.fg("dim", "…")] : tree.lines;
932
+ return labelOutputs ? [uiTheme.fg("dim", `display[${index + 1}]`), ...body] : body;
957
933
  });
958
934
 
959
935
  const timeoutSeconds = options.renderContext?.timeout;
@@ -28,7 +28,6 @@ import { AstEditTool } from "./ast-edit";
28
28
  import { AstGrepTool } from "./ast-grep";
29
29
  import { BashTool } from "./bash";
30
30
  import { BrowserTool } from "./browser";
31
- import { CalculatorTool } from "./calculator";
32
31
  import { type CheckpointState, CheckpointTool, RewindTool } from "./checkpoint";
33
32
  import { DebugTool } from "./debug";
34
33
  import { EvalTool } from "./eval";
@@ -69,7 +68,6 @@ export * from "./ast-edit";
69
68
  export * from "./ast-grep";
70
69
  export * from "./bash";
71
70
  export * from "./browser";
72
- export * from "./calculator";
73
71
  export * from "./checkpoint";
74
72
  export * from "./debug";
75
73
  export * from "./eval";
@@ -286,7 +284,6 @@ export const BUILTIN_TOOLS: Record<string, ToolFactory> = {
286
284
  ask: AskTool.createIf,
287
285
  debug: DebugTool.createIf,
288
286
  eval: s => new EvalTool(s),
289
- calc: s => new CalculatorTool(s),
290
287
  ssh: loadSshTool,
291
288
  github: GithubTool.createIf,
292
289
  find: s => new FindTool(s),
@@ -455,7 +452,6 @@ export async function createTools(session: ToolSession, toolNames?: string[]): P
455
452
  if (name === "web_search") return session.settings.get("web_search.enabled");
456
453
  // search_tool_bm25 is allowed when either legacy mcp.discoveryMode or new tools.discoveryMode is active.
457
454
  if (name === "search_tool_bm25") return discoveryActive;
458
- if (name === "calc") return session.settings.get("calc.enabled");
459
455
  if (name === "browser") return session.settings.get("browser.enabled");
460
456
  if (name === "checkpoint" || name === "rewind") return session.settings.get("checkpoint.enabled");
461
457
  if (name === "irc") {
@@ -16,7 +16,6 @@ import { astEditToolRenderer } from "./ast-edit";
16
16
  import { astGrepToolRenderer } from "./ast-grep";
17
17
  import { bashToolRenderer } from "./bash";
18
18
  import { browserToolRenderer } from "./browser/render";
19
- import { calculatorToolRenderer } from "./calculator";
20
19
  import { debugToolRenderer } from "./debug";
21
20
  import { evalToolRenderer } from "./eval";
22
21
  import { findToolRenderer } from "./find";
@@ -54,7 +53,6 @@ export const toolRenderers: Record<string, ToolRenderer> = {
54
53
  recipe: recipeToolRenderer as ToolRenderer,
55
54
  debug: debugToolRenderer as ToolRenderer,
56
55
  eval: evalToolRenderer as ToolRenderer,
57
- calc: calculatorToolRenderer as ToolRenderer,
58
56
  edit: editToolRenderer as ToolRenderer,
59
57
  apply_patch: editToolRenderer as ToolRenderer,
60
58
  find: findToolRenderer as ToolRenderer,
@@ -1,77 +0,0 @@
1
- import type { AgentTool, AgentToolResult } from "@oh-my-pi/pi-agent-core";
2
- import type { Component } from "@oh-my-pi/pi-tui";
3
- import * as z from "zod/v4";
4
- import type { RenderResultOptions } from "../extensibility/custom-tools/types";
5
- import type { Theme } from "../modes/theme/theme";
6
- import type { ToolSession } from ".";
7
- declare const calculatorSchema: z.ZodObject<{
8
- calculations: z.ZodArray<z.ZodObject<{
9
- expression: z.ZodString;
10
- prefix: z.ZodString;
11
- suffix: z.ZodString;
12
- }, z.core.$strip>>;
13
- }, z.core.$strip>;
14
- export interface CalculatorToolDetails {
15
- results: Array<{
16
- expression: string;
17
- value: number;
18
- output: string;
19
- }>;
20
- }
21
- type CalculatorParams = z.infer<typeof calculatorSchema>;
22
- /**
23
- * Calculator tool for evaluating mathematical expressions.
24
- *
25
- * Supports decimal, hex (0x), binary (0b), octal (0o) literals,
26
- * standard arithmetic operators, and parentheses.
27
- */
28
- export declare class CalculatorTool implements AgentTool<typeof calculatorSchema, CalculatorToolDetails> {
29
- readonly name = "calc";
30
- readonly approval: "read";
31
- readonly label = "Calc";
32
- readonly summary = "Evaluate a mathematical expression";
33
- readonly loadMode = "discoverable";
34
- readonly description: string;
35
- readonly parameters: z.ZodObject<{
36
- calculations: z.ZodArray<z.ZodObject<{
37
- expression: z.ZodString;
38
- prefix: z.ZodString;
39
- suffix: z.ZodString;
40
- }, z.core.$strip>>;
41
- }, z.core.$strip>;
42
- readonly strict = true;
43
- constructor(_session: ToolSession);
44
- execute(_toolCallId: string, { calculations }: CalculatorParams, signal?: AbortSignal): Promise<AgentToolResult<CalculatorToolDetails>>;
45
- }
46
- interface CalculatorRenderArgs {
47
- calculations?: Array<{
48
- expression: string;
49
- prefix?: string;
50
- suffix?: string;
51
- }>;
52
- }
53
- /**
54
- * TUI renderer for calculator tool calls and results.
55
- * Handles both collapsed (preview) and expanded (full) display modes.
56
- */
57
- export declare const calculatorToolRenderer: {
58
- /**
59
- * Render the tool call header showing the first expression and count.
60
- * Format: "Calc <expression> (N calcs)"
61
- */
62
- renderCall(args: CalculatorRenderArgs, _options: RenderResultOptions, uiTheme: Theme): Component;
63
- /**
64
- * Render calculation results as a tree list.
65
- * Collapsed mode shows first N items with expand hint; expanded shows all.
66
- */
67
- renderResult(result: {
68
- content: Array<{
69
- type: string;
70
- text?: string;
71
- }>;
72
- details?: CalculatorToolDetails;
73
- isError?: boolean;
74
- }, options: RenderResultOptions, uiTheme: Theme, args?: CalculatorRenderArgs): Component;
75
- mergeCallAndResult: boolean;
76
- };
77
- export {};
@@ -1,10 +0,0 @@
1
- Performs basic calculations.
2
-
3
- <instruction>
4
- - Supports +, -, *, /, %, ** and parentheses
5
- - Supports decimal, hex (0x), binary (0b), and octal (0o) literals
6
- </instruction>
7
-
8
- <output>
9
- Returns each calculation result with its prefix and suffix applied.
10
- </output>