@oh-my-pi/pi-coding-agent 15.5.13 → 15.5.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +31 -0
- package/dist/types/config/model-registry.d.ts +1 -1
- package/dist/types/config/models-config-schema.d.ts +2 -0
- package/dist/types/config/settings-schema.d.ts +1 -10
- package/dist/types/eval/__tests__/llm-bridge.test.d.ts +1 -0
- package/dist/types/eval/llm-bridge.d.ts +25 -0
- package/dist/types/export/html/template.generated.d.ts +1 -1
- package/dist/types/extensibility/plugins/legacy-pi-compat.d.ts +15 -0
- package/dist/types/modes/theme/theme.d.ts +2 -1
- package/dist/types/session/agent-session.d.ts +2 -0
- package/dist/types/tools/index.d.ts +0 -1
- package/package.json +8 -8
- package/src/config/model-registry.ts +89 -5
- package/src/config/models-config-schema.ts +1 -1
- package/src/config/settings-schema.ts +1 -10
- package/src/eval/__tests__/llm-bridge.test.ts +297 -0
- package/src/eval/js/shared/prelude.txt +8 -0
- package/src/eval/js/tool-bridge.ts +4 -0
- package/src/eval/llm-bridge.ts +181 -0
- package/src/eval/py/prelude.py +52 -31
- package/src/export/html/template.generated.ts +1 -1
- package/src/export/html/template.js +0 -13
- package/src/extensibility/plugins/legacy-pi-compat.ts +60 -23
- package/src/internal-urls/docs-index.generated.ts +3 -4
- package/src/main.ts +4 -0
- package/src/modes/components/model-selector.ts +119 -22
- package/src/modes/components/status-line/presets.ts +1 -0
- package/src/modes/components/status-line/segments.ts +23 -0
- package/src/modes/interactive-mode.ts +22 -87
- package/src/modes/theme/theme.ts +7 -0
- package/src/prompts/tools/eval.md +2 -0
- package/src/session/agent-session.ts +19 -0
- package/src/session/session-manager.ts +47 -0
- package/src/tools/eval.ts +24 -48
- package/src/tools/index.ts +0 -4
- package/src/tools/renderers.ts +0 -2
- package/dist/types/tools/calculator.d.ts +0 -77
- package/src/prompts/tools/calculator.md +0 -10
- package/src/tools/calculator.ts +0 -541
|
@@ -702,6 +702,53 @@ export function buildSessionContext(
|
|
|
702
702
|
}
|
|
703
703
|
}
|
|
704
704
|
|
|
705
|
+
// Strip dangling tool_use blocks — a tool_use with no matching tool_result on the
|
|
706
|
+
// resolved leaf→root path — from ANY assistant turn, not just the trailing one.
|
|
707
|
+
// This happens whenever the leaf (or a branch point) lands such that an assistant
|
|
708
|
+
// turn's tool results are off the selected path: its result children live on a
|
|
709
|
+
// sibling branch, or it is the leaf itself (results are children below it). Left
|
|
710
|
+
// in place, `transformMessages` fabricates one synthetic "aborted"/"No result
|
|
711
|
+
// provided" result per dangling call plus a `<turn-aborted>` developer note, which
|
|
712
|
+
// render as phantom failed calls and re-inject the failed batch into the model's
|
|
713
|
+
// context — the rewind/restore loop.
|
|
714
|
+
//
|
|
715
|
+
// Stripping is necessary but not sufficient: a *modified* assistant turn that still
|
|
716
|
+
// carries signed `thinking`/`redacted_thinking` is rejected by Anthropic — "thinking
|
|
717
|
+
// blocks in the latest assistant message cannot be modified", and signed thinking
|
|
718
|
+
// replayed out of its original turn shape can also fail signature validation (this
|
|
719
|
+
// bites the handoff/branch-summary request). So when we rewrite a turn we also
|
|
720
|
+
// neutralize its protected reasoning: drop `redactedThinking` (encrypted, no
|
|
721
|
+
// plaintext to keep) and clear `thinking` signatures so the provider encoder
|
|
722
|
+
// downgrades them to plain text (verified accepted by the live API), preserving the
|
|
723
|
+
// visible reasoning while removing the immutability/invalid-signature hazard. Drop a
|
|
724
|
+
// turn left with no content. (Live turns never qualify: their results are persisted
|
|
725
|
+
// on the same path before any context rebuild.)
|
|
726
|
+
const pairedToolResultIds = new Set<string>();
|
|
727
|
+
for (const message of messages) {
|
|
728
|
+
if (message.role === "toolResult") pairedToolResultIds.add(message.toolCallId);
|
|
729
|
+
}
|
|
730
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
731
|
+
const message = messages[i];
|
|
732
|
+
if (message.role !== "assistant") continue;
|
|
733
|
+
const hasDangling = message.content.some(
|
|
734
|
+
block => block.type === "toolCall" && !pairedToolResultIds.has(block.id),
|
|
735
|
+
);
|
|
736
|
+
if (!hasDangling) continue;
|
|
737
|
+
const normalized = message.content
|
|
738
|
+
.filter(
|
|
739
|
+
block =>
|
|
740
|
+
!(block.type === "toolCall" && !pairedToolResultIds.has(block.id)) && block.type !== "redactedThinking",
|
|
741
|
+
)
|
|
742
|
+
.map(block =>
|
|
743
|
+
block.type === "thinking" && block.thinkingSignature ? { ...block, thinkingSignature: undefined } : block,
|
|
744
|
+
);
|
|
745
|
+
if (normalized.length === 0) {
|
|
746
|
+
messages.splice(i, 1);
|
|
747
|
+
} else {
|
|
748
|
+
messages[i] = { ...message, content: normalized };
|
|
749
|
+
}
|
|
750
|
+
}
|
|
751
|
+
|
|
705
752
|
return {
|
|
706
753
|
messages,
|
|
707
754
|
thinkingLevel,
|
package/src/tools/eval.ts
CHANGED
|
@@ -13,10 +13,19 @@ import { truncateToVisualLines } from "../modes/components/visual-truncate";
|
|
|
13
13
|
import { getMarkdownTheme, type Theme } from "../modes/theme/theme";
|
|
14
14
|
import evalDescription from "../prompts/tools/eval.md" with { type: "text" };
|
|
15
15
|
import { DEFAULT_MAX_BYTES, OutputSink, type OutputSummary, TailBuffer } from "../session/streaming-output";
|
|
16
|
-
import {
|
|
16
|
+
import { renderCodeCell } from "../tui";
|
|
17
17
|
import { formatDimensionNote, resizeImage } from "../utils/image-resize";
|
|
18
18
|
import { resolveEvalBackends, type ToolSession } from ".";
|
|
19
19
|
import { truncateForPrompt } from "./approval";
|
|
20
|
+
import {
|
|
21
|
+
JSON_TREE_MAX_DEPTH_COLLAPSED,
|
|
22
|
+
JSON_TREE_MAX_DEPTH_EXPANDED,
|
|
23
|
+
JSON_TREE_MAX_LINES_COLLAPSED,
|
|
24
|
+
JSON_TREE_MAX_LINES_EXPANDED,
|
|
25
|
+
JSON_TREE_SCALAR_LEN_COLLAPSED,
|
|
26
|
+
JSON_TREE_SCALAR_LEN_EXPANDED,
|
|
27
|
+
renderJsonTreeLines,
|
|
28
|
+
} from "./json-tree";
|
|
20
29
|
import {
|
|
21
30
|
formatStyledTruncationWarning,
|
|
22
31
|
resolveOutputMaxColumns,
|
|
@@ -61,15 +70,6 @@ export type EvalToolResult = {
|
|
|
61
70
|
|
|
62
71
|
export type EvalProxyExecutor = (params: EvalToolParams, signal?: AbortSignal) => Promise<EvalToolResult>;
|
|
63
72
|
|
|
64
|
-
function formatJsonScalar(value: unknown): string {
|
|
65
|
-
if (value === null) return "null";
|
|
66
|
-
if (value === undefined) return "undefined";
|
|
67
|
-
if (typeof value === "string") return JSON.stringify(value);
|
|
68
|
-
if (typeof value === "number" || typeof value === "boolean" || typeof value === "bigint") return String(value);
|
|
69
|
-
if (typeof value === "function") return "[function]";
|
|
70
|
-
return "[object]";
|
|
71
|
-
}
|
|
72
|
-
|
|
73
73
|
/** Cap per `display()` value sent back to the model. */
|
|
74
74
|
const MAX_DISPLAY_TEXT_BYTES = 8000;
|
|
75
75
|
|
|
@@ -102,41 +102,6 @@ function formatDisplayOutputsForText(outputs: EvalDisplayOutput[]): string {
|
|
|
102
102
|
return chunks.join("\n\n");
|
|
103
103
|
}
|
|
104
104
|
|
|
105
|
-
function renderJsonTree(value: unknown, theme: Theme, expanded: boolean, maxDepth = expanded ? 6 : 2): string[] {
|
|
106
|
-
const maxItems = expanded ? 20 : 5;
|
|
107
|
-
|
|
108
|
-
const renderNode = (node: unknown, prefix: string, depth: number, isLast: boolean, label?: string): string[] => {
|
|
109
|
-
const branch = getTreeBranch(isLast, theme);
|
|
110
|
-
const displayLabel = label ? `${label}: ` : "";
|
|
111
|
-
|
|
112
|
-
if (depth >= maxDepth || node === null || typeof node !== "object") {
|
|
113
|
-
return [`${prefix}${branch} ${displayLabel}${formatJsonScalar(node)}`];
|
|
114
|
-
}
|
|
115
|
-
|
|
116
|
-
const isArray = Array.isArray(node);
|
|
117
|
-
const entries = isArray
|
|
118
|
-
? node.map((val, index) => [String(index), val] as const)
|
|
119
|
-
: Object.entries(node as object);
|
|
120
|
-
const header = `${prefix}${branch} ${displayLabel}${isArray ? `Array(${entries.length})` : `Object(${entries.length})`}`;
|
|
121
|
-
const lines = [header];
|
|
122
|
-
|
|
123
|
-
const childPrefix = prefix + getTreeContinuePrefix(isLast, theme);
|
|
124
|
-
const visible = entries.slice(0, maxItems);
|
|
125
|
-
for (let i = 0; i < visible.length; i++) {
|
|
126
|
-
const [key, val] = visible[i];
|
|
127
|
-
const childLast = i === visible.length - 1 && (expanded || entries.length <= maxItems);
|
|
128
|
-
lines.push(...renderNode(val, childPrefix, depth + 1, childLast, isArray ? `[${key}]` : key));
|
|
129
|
-
}
|
|
130
|
-
if (!expanded && entries.length > maxItems) {
|
|
131
|
-
const moreBranch = theme.tree.last;
|
|
132
|
-
lines.push(`${childPrefix}${moreBranch} ${entries.length - maxItems} more item(s)`);
|
|
133
|
-
}
|
|
134
|
-
return lines;
|
|
135
|
-
};
|
|
136
|
-
|
|
137
|
-
return renderNode(value, "", 0, true);
|
|
138
|
-
}
|
|
139
|
-
|
|
140
105
|
export interface EvalToolDescriptionOptions {
|
|
141
106
|
py?: boolean;
|
|
142
107
|
js?: boolean;
|
|
@@ -669,6 +634,7 @@ function formatStatusEvent(event: EvalStatusEvent, theme: Theme): string {
|
|
|
669
634
|
sh: "icon.package",
|
|
670
635
|
env: "icon.package",
|
|
671
636
|
batch: "icon.package",
|
|
637
|
+
llm: "icon.package",
|
|
672
638
|
};
|
|
673
639
|
|
|
674
640
|
const iconKey = opIcons[op] ?? "icon.file";
|
|
@@ -735,6 +701,11 @@ function formatStatusEvent(event: EvalStatusEvent, theme: Theme): string {
|
|
|
735
701
|
case "batch":
|
|
736
702
|
parts.push(`${data.files} file${(data.files as number) !== 1 ? "s" : ""} processed`);
|
|
737
703
|
break;
|
|
704
|
+
case "llm":
|
|
705
|
+
if (data.model) parts.push(String(data.model));
|
|
706
|
+
if (data.tier && data.tier !== data.model) parts.push(`(${data.tier})`);
|
|
707
|
+
parts.push(`${data.chars ?? 0} chars`);
|
|
708
|
+
break;
|
|
738
709
|
case "wc":
|
|
739
710
|
parts.push(`${data.lines}L ${data.words}W ${data.chars}C`);
|
|
740
711
|
break;
|
|
@@ -950,10 +921,15 @@ export const evalToolRenderer = {
|
|
|
950
921
|
const output = stripOutputNotice(rawOutput, details?.meta).trimEnd();
|
|
951
922
|
|
|
952
923
|
const jsonOutputs = details?.jsonOutputs ?? [];
|
|
924
|
+
const treeExpanded = options.renderContext?.expanded ?? options.expanded;
|
|
925
|
+
const treeDepth = treeExpanded ? JSON_TREE_MAX_DEPTH_EXPANDED : JSON_TREE_MAX_DEPTH_COLLAPSED;
|
|
926
|
+
const treeLineCap = treeExpanded ? JSON_TREE_MAX_LINES_EXPANDED : JSON_TREE_MAX_LINES_COLLAPSED;
|
|
927
|
+
const treeScalarLen = treeExpanded ? JSON_TREE_SCALAR_LEN_EXPANDED : JSON_TREE_SCALAR_LEN_COLLAPSED;
|
|
928
|
+
const labelOutputs = jsonOutputs.length > 1;
|
|
953
929
|
const jsonLines = jsonOutputs.flatMap((value, index) => {
|
|
954
|
-
const
|
|
955
|
-
const
|
|
956
|
-
return [
|
|
930
|
+
const tree = renderJsonTreeLines(value, uiTheme, treeDepth, treeLineCap, treeScalarLen);
|
|
931
|
+
const body = tree.truncated ? [...tree.lines, uiTheme.fg("dim", "…")] : tree.lines;
|
|
932
|
+
return labelOutputs ? [uiTheme.fg("dim", `display[${index + 1}]`), ...body] : body;
|
|
957
933
|
});
|
|
958
934
|
|
|
959
935
|
const timeoutSeconds = options.renderContext?.timeout;
|
package/src/tools/index.ts
CHANGED
|
@@ -28,7 +28,6 @@ import { AstEditTool } from "./ast-edit";
|
|
|
28
28
|
import { AstGrepTool } from "./ast-grep";
|
|
29
29
|
import { BashTool } from "./bash";
|
|
30
30
|
import { BrowserTool } from "./browser";
|
|
31
|
-
import { CalculatorTool } from "./calculator";
|
|
32
31
|
import { type CheckpointState, CheckpointTool, RewindTool } from "./checkpoint";
|
|
33
32
|
import { DebugTool } from "./debug";
|
|
34
33
|
import { EvalTool } from "./eval";
|
|
@@ -69,7 +68,6 @@ export * from "./ast-edit";
|
|
|
69
68
|
export * from "./ast-grep";
|
|
70
69
|
export * from "./bash";
|
|
71
70
|
export * from "./browser";
|
|
72
|
-
export * from "./calculator";
|
|
73
71
|
export * from "./checkpoint";
|
|
74
72
|
export * from "./debug";
|
|
75
73
|
export * from "./eval";
|
|
@@ -286,7 +284,6 @@ export const BUILTIN_TOOLS: Record<string, ToolFactory> = {
|
|
|
286
284
|
ask: AskTool.createIf,
|
|
287
285
|
debug: DebugTool.createIf,
|
|
288
286
|
eval: s => new EvalTool(s),
|
|
289
|
-
calc: s => new CalculatorTool(s),
|
|
290
287
|
ssh: loadSshTool,
|
|
291
288
|
github: GithubTool.createIf,
|
|
292
289
|
find: s => new FindTool(s),
|
|
@@ -455,7 +452,6 @@ export async function createTools(session: ToolSession, toolNames?: string[]): P
|
|
|
455
452
|
if (name === "web_search") return session.settings.get("web_search.enabled");
|
|
456
453
|
// search_tool_bm25 is allowed when either legacy mcp.discoveryMode or new tools.discoveryMode is active.
|
|
457
454
|
if (name === "search_tool_bm25") return discoveryActive;
|
|
458
|
-
if (name === "calc") return session.settings.get("calc.enabled");
|
|
459
455
|
if (name === "browser") return session.settings.get("browser.enabled");
|
|
460
456
|
if (name === "checkpoint" || name === "rewind") return session.settings.get("checkpoint.enabled");
|
|
461
457
|
if (name === "irc") {
|
package/src/tools/renderers.ts
CHANGED
|
@@ -16,7 +16,6 @@ import { astEditToolRenderer } from "./ast-edit";
|
|
|
16
16
|
import { astGrepToolRenderer } from "./ast-grep";
|
|
17
17
|
import { bashToolRenderer } from "./bash";
|
|
18
18
|
import { browserToolRenderer } from "./browser/render";
|
|
19
|
-
import { calculatorToolRenderer } from "./calculator";
|
|
20
19
|
import { debugToolRenderer } from "./debug";
|
|
21
20
|
import { evalToolRenderer } from "./eval";
|
|
22
21
|
import { findToolRenderer } from "./find";
|
|
@@ -54,7 +53,6 @@ export const toolRenderers: Record<string, ToolRenderer> = {
|
|
|
54
53
|
recipe: recipeToolRenderer as ToolRenderer,
|
|
55
54
|
debug: debugToolRenderer as ToolRenderer,
|
|
56
55
|
eval: evalToolRenderer as ToolRenderer,
|
|
57
|
-
calc: calculatorToolRenderer as ToolRenderer,
|
|
58
56
|
edit: editToolRenderer as ToolRenderer,
|
|
59
57
|
apply_patch: editToolRenderer as ToolRenderer,
|
|
60
58
|
find: findToolRenderer as ToolRenderer,
|
|
@@ -1,77 +0,0 @@
|
|
|
1
|
-
import type { AgentTool, AgentToolResult } from "@oh-my-pi/pi-agent-core";
|
|
2
|
-
import type { Component } from "@oh-my-pi/pi-tui";
|
|
3
|
-
import * as z from "zod/v4";
|
|
4
|
-
import type { RenderResultOptions } from "../extensibility/custom-tools/types";
|
|
5
|
-
import type { Theme } from "../modes/theme/theme";
|
|
6
|
-
import type { ToolSession } from ".";
|
|
7
|
-
declare const calculatorSchema: z.ZodObject<{
|
|
8
|
-
calculations: z.ZodArray<z.ZodObject<{
|
|
9
|
-
expression: z.ZodString;
|
|
10
|
-
prefix: z.ZodString;
|
|
11
|
-
suffix: z.ZodString;
|
|
12
|
-
}, z.core.$strip>>;
|
|
13
|
-
}, z.core.$strip>;
|
|
14
|
-
export interface CalculatorToolDetails {
|
|
15
|
-
results: Array<{
|
|
16
|
-
expression: string;
|
|
17
|
-
value: number;
|
|
18
|
-
output: string;
|
|
19
|
-
}>;
|
|
20
|
-
}
|
|
21
|
-
type CalculatorParams = z.infer<typeof calculatorSchema>;
|
|
22
|
-
/**
|
|
23
|
-
* Calculator tool for evaluating mathematical expressions.
|
|
24
|
-
*
|
|
25
|
-
* Supports decimal, hex (0x), binary (0b), octal (0o) literals,
|
|
26
|
-
* standard arithmetic operators, and parentheses.
|
|
27
|
-
*/
|
|
28
|
-
export declare class CalculatorTool implements AgentTool<typeof calculatorSchema, CalculatorToolDetails> {
|
|
29
|
-
readonly name = "calc";
|
|
30
|
-
readonly approval: "read";
|
|
31
|
-
readonly label = "Calc";
|
|
32
|
-
readonly summary = "Evaluate a mathematical expression";
|
|
33
|
-
readonly loadMode = "discoverable";
|
|
34
|
-
readonly description: string;
|
|
35
|
-
readonly parameters: z.ZodObject<{
|
|
36
|
-
calculations: z.ZodArray<z.ZodObject<{
|
|
37
|
-
expression: z.ZodString;
|
|
38
|
-
prefix: z.ZodString;
|
|
39
|
-
suffix: z.ZodString;
|
|
40
|
-
}, z.core.$strip>>;
|
|
41
|
-
}, z.core.$strip>;
|
|
42
|
-
readonly strict = true;
|
|
43
|
-
constructor(_session: ToolSession);
|
|
44
|
-
execute(_toolCallId: string, { calculations }: CalculatorParams, signal?: AbortSignal): Promise<AgentToolResult<CalculatorToolDetails>>;
|
|
45
|
-
}
|
|
46
|
-
interface CalculatorRenderArgs {
|
|
47
|
-
calculations?: Array<{
|
|
48
|
-
expression: string;
|
|
49
|
-
prefix?: string;
|
|
50
|
-
suffix?: string;
|
|
51
|
-
}>;
|
|
52
|
-
}
|
|
53
|
-
/**
|
|
54
|
-
* TUI renderer for calculator tool calls and results.
|
|
55
|
-
* Handles both collapsed (preview) and expanded (full) display modes.
|
|
56
|
-
*/
|
|
57
|
-
export declare const calculatorToolRenderer: {
|
|
58
|
-
/**
|
|
59
|
-
* Render the tool call header showing the first expression and count.
|
|
60
|
-
* Format: "Calc <expression> (N calcs)"
|
|
61
|
-
*/
|
|
62
|
-
renderCall(args: CalculatorRenderArgs, _options: RenderResultOptions, uiTheme: Theme): Component;
|
|
63
|
-
/**
|
|
64
|
-
* Render calculation results as a tree list.
|
|
65
|
-
* Collapsed mode shows first N items with expand hint; expanded shows all.
|
|
66
|
-
*/
|
|
67
|
-
renderResult(result: {
|
|
68
|
-
content: Array<{
|
|
69
|
-
type: string;
|
|
70
|
-
text?: string;
|
|
71
|
-
}>;
|
|
72
|
-
details?: CalculatorToolDetails;
|
|
73
|
-
isError?: boolean;
|
|
74
|
-
}, options: RenderResultOptions, uiTheme: Theme, args?: CalculatorRenderArgs): Component;
|
|
75
|
-
mergeCallAndResult: boolean;
|
|
76
|
-
};
|
|
77
|
-
export {};
|
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
Performs basic calculations.
|
|
2
|
-
|
|
3
|
-
<instruction>
|
|
4
|
-
- Supports +, -, *, /, %, ** and parentheses
|
|
5
|
-
- Supports decimal, hex (0x), binary (0b), and octal (0o) literals
|
|
6
|
-
</instruction>
|
|
7
|
-
|
|
8
|
-
<output>
|
|
9
|
-
Returns each calculation result with its prefix and suffix applied.
|
|
10
|
-
</output>
|