@oh-my-pi/pi-agent-core 15.13.1 → 15.13.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +35 -0
- package/dist/types/agent-loop.d.ts +2 -1
- package/dist/types/agent.d.ts +11 -1
- package/dist/types/append-only-context.d.ts +2 -0
- package/dist/types/compaction/utils.d.ts +2 -1
- package/dist/types/index.d.ts +0 -1
- package/dist/types/types.d.ts +32 -1
- package/package.json +6 -6
- package/src/agent-loop.ts +118 -12
- package/src/agent.ts +17 -1
- package/src/append-only-context.ts +4 -1
- package/src/compaction/branch-summarization.ts +2 -1
- package/src/compaction/compaction.ts +4 -3
- package/src/compaction/pruning.ts +12 -1
- package/src/compaction/utils.ts +44 -11
- package/src/index.ts +0 -1
- package/src/types.ts +32 -1
- package/dist/types/harmony-leak.d.ts +0 -118
- package/src/harmony-leak.ts +0 -456
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,41 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## [15.13.3] - 2026-06-15
|
|
6
|
+
|
|
7
|
+
### Added
|
|
8
|
+
|
|
9
|
+
- Added the `interruptible` tool field: when set, the agent loop may abort the tool mid-execution to deliver a queued steering message (honored only in `immediate` interrupt mode).
|
|
10
|
+
- Added support for `gemini` and `gemma` as valid owned tool syntax values in environment configuration
|
|
11
|
+
|
|
12
|
+
### Fixed
|
|
13
|
+
|
|
14
|
+
- Fixed `pruneToolOutputs` blanking tiny tool results during overflow pruning: results below `50` tokens (`MIN_PRUNE_TOKENS`) are no longer replaced with the `[Output truncated - N tokens]` placeholder, which cost more tokens than the result itself and churned the prompt cache for zero savings.
|
|
15
|
+
|
|
16
|
+
## [15.13.2] - 2026-06-15
|
|
17
|
+
|
|
18
|
+
### Breaking Changes
|
|
19
|
+
|
|
20
|
+
- Removed `harmony-leak` exports from the `@oh-my-pi/pi-agent-core` package entrypoint
|
|
21
|
+
- Replaced the experimental `promptToolCalls` agent/loop option with `toolCallSyntax`, selecting an explicit in-band tool-call grammar instead of a boolean GLM-only mode.
|
|
22
|
+
|
|
23
|
+
### Added
|
|
24
|
+
|
|
25
|
+
- Added support for selecting owned in-band tool-call syntax via `PI_OWNED_TOOLS=<syntax>` (for example `hermes` or `qwen3`) while preserving legacy `PI_OWNED_TOOLS=1/true` as GLM mode
|
|
26
|
+
- Added owned in-band tool calling for multiple syntaxes (`glm`, `hermes`, `kimi`, `xml`, `anthropic`, `deepseek`, `harmony`, `pi-native`, `qwen3`). Owned mode sends no native provider tools, appends a syntax-specific prompt/catalog, re-encodes prior tool calls/results as grammar-owned text, and parses streamed model output back into canonical tool calls.
|
|
27
|
+
- Added tool-example folding to `normalizeTools`: when given a model's affinity syntax (resolved via `preferredToolSyntax`), it renders each tool's `examples` into an `<examples>` block in that native syntax and appends it to the wire description. Wired through both context paths (fresh build and append-only `takeSnapshot`/`build` via a new `exampleSyntax` build option), with the `_i` intent-field placeholder added to examples when intent tracing injects it.
|
|
28
|
+
- Added the `abortOnFabricatedToolResult` option to `AgentOptions`/`AgentLoopConfig` (default `true`): when owned tool calling is active and the model fabricates a tool result mid-turn, `true` aborts the provider request immediately while `false` lets it finish and discards the fabricated continuation.
|
|
29
|
+
|
|
30
|
+
### Changed
|
|
31
|
+
|
|
32
|
+
- Added owned in-band syntax support to `Agent` loop configuration resolution by selecting syntax from `toolCallSyntax` or `PI_OWNED_TOOLS` when present
|
|
33
|
+
|
|
34
|
+
### Fixed
|
|
35
|
+
|
|
36
|
+
- Fixed append-only context cache fingerprinting to account for `exampleSyntax`, so switching tool-call syntax rebuilds cached prompts with the correct injected tool examples
|
|
37
|
+
- Fixed owned in-band tool-calling requests to omit `toolChoice` after stripping native tools, preventing invalid tool-choice requests
|
|
38
|
+
- Fixed owned tool calling letting the model fabricate tool results by treating grammar-owned tool-result markers in assistant text as a hard turn boundary: calls before the fabrication are kept, fabricated results and dependent calls are dropped, and the real result is fed back on the next turn.
|
|
39
|
+
|
|
5
40
|
## [15.13.1] - 2026-06-15
|
|
6
41
|
|
|
7
42
|
### Added
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
* Transforms to Message[] only at the LLM call boundary.
|
|
4
4
|
*/
|
|
5
5
|
import { type Context, EventStream } from "@oh-my-pi/pi-ai";
|
|
6
|
+
import { type ToolCallSyntax } from "@oh-my-pi/pi-ai/grammar";
|
|
6
7
|
import { type AgentRunCoverage, type AgentRunSummary } from "./run-collector";
|
|
7
8
|
import type { AgentContext, AgentEvent, AgentLoopConfig, AgentMessage, StreamFn } from "./types";
|
|
8
9
|
/**
|
|
@@ -52,7 +53,7 @@ export declare function agentLoopContinueDetailed(context: AgentContext, config:
|
|
|
52
53
|
readonly detailed: () => Promise<AgentLoopDetailedResult>;
|
|
53
54
|
};
|
|
54
55
|
export declare const INTENT_FIELD = "_i";
|
|
55
|
-
export declare function normalizeTools(tools: AgentContext["tools"], injectIntent: boolean): Context["tools"];
|
|
56
|
+
export declare function normalizeTools(tools: AgentContext["tools"], injectIntent: boolean, exampleSyntax?: ToolCallSyntax): Context["tools"];
|
|
56
57
|
/** Resolve the human-readable reason an abort carried. A caller that aborts via
|
|
57
58
|
* `AbortController.abort(reason)` with a string or a non-`AbortError` `Error`
|
|
58
59
|
* (e.g. the coding agent's user-interrupt label) gets that text surfaced on the
|
package/dist/types/agent.d.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { type ApiKeyResolveContext, type AssistantMessage, type AssistantMessageEvent, type Context, type CursorExecHandlers, type CursorToolResultHandler, type Effort, type ImageContent, type Message, type Model, type ProviderSessionState, type ServiceTier, type SimpleStreamOptions, type ThinkingBudgets, type ToolChoice } from "@oh-my-pi/pi-ai";
|
|
2
|
+
import type { ToolCallSyntax } from "@oh-my-pi/pi-ai/grammar";
|
|
3
|
+
import type { HarmonyAuditEvent } from "@oh-my-pi/pi-ai/utils/harmony-leak";
|
|
2
4
|
import type { AppendOnlyContextManager } from "./append-only-context";
|
|
3
|
-
import type { HarmonyAuditEvent } from "./harmony-leak";
|
|
4
5
|
import type { AgentEvent, AgentLoopConfig, AgentMessage, AgentState, AgentTool, AgentToolContext, AsideMessage, StreamFn, ToolCallContext } from "./types";
|
|
5
6
|
export declare class AgentBusyError extends Error {
|
|
6
7
|
constructor(message?: string);
|
|
@@ -126,6 +127,15 @@ export interface AgentOptions {
|
|
|
126
127
|
transformToolCallArguments?: (args: Record<string, unknown>, toolName: string) => Record<string, unknown>;
|
|
127
128
|
/** Enable intent tracing schema injection/stripping in the harness. */
|
|
128
129
|
intentTracing?: boolean;
|
|
130
|
+
/** Owned tool-calling syntax. Undefined keeps provider-native tool calling. */
|
|
131
|
+
toolCallSyntax?: ToolCallSyntax;
|
|
132
|
+
/**
|
|
133
|
+
* When owned tool calling is active and the model fabricates a tool result
|
|
134
|
+
* mid-turn: `true` (default) aborts the provider request immediately; `false`
|
|
135
|
+
* drains the request and discards the fabricated continuation. Forwarded to
|
|
136
|
+
* the loop's {@link AgentLoopConfig.abortOnFabricatedToolResult}.
|
|
137
|
+
*/
|
|
138
|
+
abortOnFabricatedToolResult?: boolean;
|
|
129
139
|
/** Dynamic tool choice override, resolved per LLM call. */
|
|
130
140
|
getToolChoice?: () => ToolChoice | undefined;
|
|
131
141
|
/**
|
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
* message delta is a cache miss each turn.
|
|
15
15
|
*/
|
|
16
16
|
import type { Context, Message, Tool } from "@oh-my-pi/pi-ai";
|
|
17
|
+
import type { ToolCallSyntax } from "@oh-my-pi/pi-ai/grammar";
|
|
17
18
|
import type { AgentContext } from "./types";
|
|
18
19
|
/** Frozen system prompt + tool spec snapshot. */
|
|
19
20
|
export interface StablePrefixSnapshot {
|
|
@@ -25,6 +26,7 @@ export interface StablePrefixSnapshot {
|
|
|
25
26
|
export interface BuildOptions {
|
|
26
27
|
/** Inject the `_i` intent field into tool schemas (must match agent-loop's normalizeTools). */
|
|
27
28
|
intentTracing: boolean;
|
|
29
|
+
exampleSyntax?: ToolCallSyntax;
|
|
28
30
|
}
|
|
29
31
|
/**
|
|
30
32
|
* A frozen prefix (system prompt + tools) that produces stable byte
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
* Shared utilities for compaction and branch summarization.
|
|
3
3
|
*/
|
|
4
4
|
import type { Message } from "@oh-my-pi/pi-ai";
|
|
5
|
+
import { type ToolCallSyntax } from "@oh-my-pi/pi-ai/grammar";
|
|
5
6
|
import type { AgentMessage } from "../types";
|
|
6
7
|
export interface FileOperations {
|
|
7
8
|
read: Set<string>;
|
|
@@ -44,5 +45,5 @@ export declare function upsertFileOperations(summary: string, readFiles: string[
|
|
|
44
45
|
* This prevents the model from treating it as a conversation to continue.
|
|
45
46
|
* Call convertToLlm() first to handle custom message types.
|
|
46
47
|
*/
|
|
47
|
-
export declare function serializeConversation(messages: Message[]): string;
|
|
48
|
+
export declare function serializeConversation(messages: Message[], syntax?: ToolCallSyntax): string;
|
|
48
49
|
export declare const SUMMARIZATION_SYSTEM_PROMPT: string;
|
package/dist/types/index.d.ts
CHANGED
package/dist/types/types.d.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import type { ApiKeyResolveContext, AssistantMessage, AssistantMessageEvent, AssistantMessageEventStream, Context, Effort, ImageContent, Message, Model, SimpleStreamOptions, Static, streamSimple, TextContent, Tool, ToolChoice, ToolResultMessage, TSchema } from "@oh-my-pi/pi-ai";
|
|
2
|
+
import type { ToolCallSyntax } from "@oh-my-pi/pi-ai/grammar";
|
|
3
|
+
import type { HarmonyAuditEvent } from "@oh-my-pi/pi-ai/utils/harmony-leak";
|
|
2
4
|
import type { AppendOnlyContextManager } from "./append-only-context";
|
|
3
|
-
import type { HarmonyAuditEvent } from "./harmony-leak";
|
|
4
5
|
import type { AgentRunCoverage, AgentRunSummary } from "./run-collector";
|
|
5
6
|
import type { AgentTelemetryConfig } from "./telemetry";
|
|
6
7
|
/** Stream function - can return sync or Promise for async config lookup */
|
|
@@ -162,6 +163,27 @@ export interface AgentLoopConfig extends SimpleStreamOptions {
|
|
|
162
163
|
* then strips from arguments before executing tools.
|
|
163
164
|
*/
|
|
164
165
|
intentTracing?: boolean;
|
|
166
|
+
/**
|
|
167
|
+
* Owned tool calling syntax.
|
|
168
|
+
*
|
|
169
|
+
* Undefined keeps provider-native tool calling. A syntax value sends no
|
|
170
|
+
* native `tools`, forces `toolChoice` off, appends that syntax's tool catalog
|
|
171
|
+
* instructions, re-encodes prior tool calls/results as text, and parses the
|
|
172
|
+
* model's text output back into canonical `toolCall` blocks.
|
|
173
|
+
*/
|
|
174
|
+
toolCallSyntax?: ToolCallSyntax;
|
|
175
|
+
/**
|
|
176
|
+
* When owned (in-band) tool calling is active and the model starts
|
|
177
|
+
* fabricating a tool result inside its own turn, control how the loop reacts:
|
|
178
|
+
* - `true` (default): abort the provider request immediately so it stops
|
|
179
|
+
* generating the hallucinated continuation (cheaper, lower latency).
|
|
180
|
+
* - `false`: let the request finish and silently discard everything past the
|
|
181
|
+
* fabrication boundary (keeps the connection alive but pays for the tokens
|
|
182
|
+
* the model spends on the discarded tail).
|
|
183
|
+
* Only meaningful when {@link toolCallSyntax} (or `PI_OWNED_TOOLS`) selects an
|
|
184
|
+
* owned syntax; native tool calling never fabricates results in text.
|
|
185
|
+
*/
|
|
186
|
+
abortOnFabricatedToolResult?: boolean;
|
|
165
187
|
/**
|
|
166
188
|
* Append-only context mode — stabilizes system prompt + tool spec bytes
|
|
167
189
|
* across turns so provider prefix caches hit at maximum rate.
|
|
@@ -406,6 +428,15 @@ export interface AgentTool<TParameters extends TSchema = TSchema, TDetails = any
|
|
|
406
428
|
concurrency?: "shared" | "exclusive" | ((args: Partial<Static<TParameters>>) => "shared" | "exclusive");
|
|
407
429
|
/** If true, argument validation errors are non-fatal: raw args are passed to execute() instead of returning an error to the LLM. */
|
|
408
430
|
lenientArgValidation?: boolean;
|
|
431
|
+
/**
|
|
432
|
+
* If true, the agent loop may abort this tool mid-execution to deliver a
|
|
433
|
+
* queued steering message (instead of waiting for the tool to finish on its
|
|
434
|
+
* own). Set only on tools that purely *wait* and observe their abort signal
|
|
435
|
+
* cleanly (e.g. the `job` poll), so the abort surfaces the tool's current
|
|
436
|
+
* snapshot rather than corrupting a side effect. Honored only when
|
|
437
|
+
* `interruptMode` is "immediate".
|
|
438
|
+
*/
|
|
439
|
+
interruptible?: boolean;
|
|
409
440
|
/**
|
|
410
441
|
* Controls how the INTENT_FIELD (`_i`) is handled for this tool.
|
|
411
442
|
* - `"require"` (default): `_i` is injected and required in the parameter schema.
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "module",
|
|
3
3
|
"name": "@oh-my-pi/pi-agent-core",
|
|
4
|
-
"version": "15.13.
|
|
4
|
+
"version": "15.13.3",
|
|
5
5
|
"description": "General-purpose agent with transport abstraction, state management, and attachment support",
|
|
6
6
|
"homepage": "https://omp.sh",
|
|
7
7
|
"author": "Can Boluk",
|
|
@@ -35,11 +35,11 @@
|
|
|
35
35
|
"fmt": "biome format --write ."
|
|
36
36
|
},
|
|
37
37
|
"dependencies": {
|
|
38
|
-
"@oh-my-pi/pi-ai": "15.13.
|
|
39
|
-
"@oh-my-pi/pi-catalog": "15.13.
|
|
40
|
-
"@oh-my-pi/pi-natives": "15.13.
|
|
41
|
-
"@oh-my-pi/pi-utils": "15.13.
|
|
42
|
-
"@oh-my-pi/snapcompact": "15.13.
|
|
38
|
+
"@oh-my-pi/pi-ai": "15.13.3",
|
|
39
|
+
"@oh-my-pi/pi-catalog": "15.13.3",
|
|
40
|
+
"@oh-my-pi/pi-natives": "15.13.3",
|
|
41
|
+
"@oh-my-pi/pi-utils": "15.13.3",
|
|
42
|
+
"@oh-my-pi/snapcompact": "15.13.3",
|
|
43
43
|
"@opentelemetry/api": "^1.9.1"
|
|
44
44
|
},
|
|
45
45
|
"devDependencies": {
|
package/src/agent-loop.ts
CHANGED
|
@@ -15,7 +15,13 @@ import {
|
|
|
15
15
|
validateToolArguments,
|
|
16
16
|
zodToWireSchema,
|
|
17
17
|
} from "@oh-my-pi/pi-ai";
|
|
18
|
-
import {
|
|
18
|
+
import {
|
|
19
|
+
encodeInbandToolHistory,
|
|
20
|
+
renderInbandToolPrompt,
|
|
21
|
+
renderToolExamples,
|
|
22
|
+
type ToolCallSyntax,
|
|
23
|
+
wrapInbandToolStream,
|
|
24
|
+
} from "@oh-my-pi/pi-ai/grammar";
|
|
19
25
|
import {
|
|
20
26
|
createHarmonyAuditEvent,
|
|
21
27
|
detectHarmonyLeakInAssistantMessage,
|
|
@@ -25,7 +31,9 @@ import {
|
|
|
25
31
|
isHarmonyLeakMitigationTarget,
|
|
26
32
|
recoverHarmonyToolCall,
|
|
27
33
|
signalListLabel,
|
|
28
|
-
} from "
|
|
34
|
+
} from "@oh-my-pi/pi-ai/utils/harmony-leak";
|
|
35
|
+
import { preferredToolSyntax } from "@oh-my-pi/pi-catalog/identity";
|
|
36
|
+
import { logger, sanitizeText } from "@oh-my-pi/pi-utils";
|
|
29
37
|
import { type AgentRunCoverage, type AgentRunSummary, ToolCallBlockedError } from "./run-collector";
|
|
30
38
|
import {
|
|
31
39
|
type AgentTelemetry,
|
|
@@ -66,6 +74,14 @@ const ABORTED: unique symbol = Symbol("agent-loop-aborted");
|
|
|
66
74
|
*/
|
|
67
75
|
const MAX_PAUSED_TURN_CONTINUATIONS = 8;
|
|
68
76
|
|
|
77
|
+
/**
|
|
78
|
+
* Cadence (ms) for polling queued steering while an `interruptible` tool is in
|
|
79
|
+
* flight, so a steer cuts the wait short instead of sitting idle until the
|
|
80
|
+
* tool's own window elapses. A cheap synchronous queue check; latency-bounded
|
|
81
|
+
* at one tick.
|
|
82
|
+
*/
|
|
83
|
+
const STEERING_INTERRUPT_POLL_MS = 250;
|
|
84
|
+
|
|
69
85
|
class HarmonyLeakInterruption extends Error {
|
|
70
86
|
constructor(
|
|
71
87
|
readonly detection: HarmonyDetection,
|
|
@@ -76,6 +92,27 @@ class HarmonyLeakInterruption extends Error {
|
|
|
76
92
|
this.name = "HarmonyLeakInterruption";
|
|
77
93
|
}
|
|
78
94
|
}
|
|
95
|
+
function resolveOwnedToolSyntaxFromEnv(value: string | undefined): ToolCallSyntax | undefined {
|
|
96
|
+
switch (value) {
|
|
97
|
+
case "1":
|
|
98
|
+
case "true":
|
|
99
|
+
return "glm";
|
|
100
|
+
case "glm":
|
|
101
|
+
case "hermes":
|
|
102
|
+
case "kimi":
|
|
103
|
+
case "xml":
|
|
104
|
+
case "anthropic":
|
|
105
|
+
case "deepseek":
|
|
106
|
+
case "harmony":
|
|
107
|
+
case "pi":
|
|
108
|
+
case "qwen3":
|
|
109
|
+
case "gemini":
|
|
110
|
+
case "gemma":
|
|
111
|
+
return value;
|
|
112
|
+
default:
|
|
113
|
+
return undefined;
|
|
114
|
+
}
|
|
115
|
+
}
|
|
79
116
|
|
|
80
117
|
type AssistantContentBlock = AssistantMessage["content"][number];
|
|
81
118
|
type AssistantToolCallBlock = Extract<AssistantContentBlock, { type: "toolCall" }>;
|
|
@@ -491,7 +528,11 @@ function injectIntentIntoSchema(schema: unknown, mode: "require" | "optional" =
|
|
|
491
528
|
};
|
|
492
529
|
}
|
|
493
530
|
|
|
494
|
-
export function normalizeTools(
|
|
531
|
+
export function normalizeTools(
|
|
532
|
+
tools: AgentContext["tools"],
|
|
533
|
+
injectIntent: boolean,
|
|
534
|
+
exampleSyntax?: ToolCallSyntax,
|
|
535
|
+
): Context["tools"] {
|
|
495
536
|
injectIntent = injectIntent && Bun.env.PI_NO_INTENT !== "1";
|
|
496
537
|
return tools?.map(t => {
|
|
497
538
|
const intentMode = resolveIntentMode(t.intent);
|
|
@@ -505,7 +546,12 @@ export function normalizeTools(tools: AgentContext["tools"], injectIntent: boole
|
|
|
505
546
|
}
|
|
506
547
|
}
|
|
507
548
|
const description = t.description ?? "";
|
|
508
|
-
|
|
549
|
+
const injectExampleIntent = injectIntent && intentMode !== "omit";
|
|
550
|
+
const examplesBlock = exampleSyntax
|
|
551
|
+
? renderToolExamples({ ...t, parameters }, exampleSyntax, injectExampleIntent ? INTENT_FIELD : undefined)
|
|
552
|
+
: "";
|
|
553
|
+
const finalDescription = examplesBlock ? `${description}\n\n${examplesBlock}` : description;
|
|
554
|
+
return { ...t, parameters, description: finalDescription };
|
|
509
555
|
});
|
|
510
556
|
}
|
|
511
557
|
|
|
@@ -884,18 +930,37 @@ async function streamAssistantResponse(
|
|
|
884
930
|
let llmContext: Context;
|
|
885
931
|
if (config.appendOnlyContext) {
|
|
886
932
|
config.appendOnlyContext.syncMessages(normalizedMessages);
|
|
887
|
-
llmContext = config.appendOnlyContext.build(context, {
|
|
933
|
+
llmContext = config.appendOnlyContext.build(context, {
|
|
934
|
+
intentTracing: !!config.intentTracing,
|
|
935
|
+
exampleSyntax: preferredToolSyntax(config.model.id),
|
|
936
|
+
});
|
|
888
937
|
} else {
|
|
889
938
|
llmContext = {
|
|
890
939
|
systemPrompt: context.systemPrompt,
|
|
891
940
|
messages: normalizedMessages,
|
|
892
|
-
tools: normalizeTools(context.tools, !!config.intentTracing),
|
|
941
|
+
tools: normalizeTools(context.tools, !!config.intentTracing, preferredToolSyntax(config.model.id)),
|
|
893
942
|
};
|
|
894
943
|
}
|
|
895
944
|
if (config.transformProviderContext) {
|
|
896
945
|
llmContext = config.transformProviderContext(llmContext, config.model);
|
|
897
946
|
}
|
|
898
947
|
|
|
948
|
+
// Owned tool calling: take tool calls away from the provider and run them
|
|
949
|
+
// through the selected in-band prompt syntax. `PI_OWNED_TOOLS=1` still
|
|
950
|
+
// force-enables GLM; `PI_OWNED_TOOLS=<syntax>` force-enables that syntax.
|
|
951
|
+
const ownedSyntax: ToolCallSyntax | undefined =
|
|
952
|
+
config.toolCallSyntax ?? resolveOwnedToolSyntaxFromEnv(Bun.env.PI_OWNED_TOOLS);
|
|
953
|
+
let promptToolWireTools: Context["tools"];
|
|
954
|
+
if (ownedSyntax && llmContext.tools && llmContext.tools.length > 0) {
|
|
955
|
+
promptToolWireTools = llmContext.tools;
|
|
956
|
+
llmContext = {
|
|
957
|
+
...llmContext,
|
|
958
|
+
systemPrompt: [...(llmContext.systemPrompt ?? []), renderInbandToolPrompt(promptToolWireTools, ownedSyntax)],
|
|
959
|
+
messages: encodeInbandToolHistory(llmContext.messages, ownedSyntax, promptToolWireTools),
|
|
960
|
+
tools: undefined,
|
|
961
|
+
};
|
|
962
|
+
}
|
|
963
|
+
|
|
899
964
|
const streamFunction = streamFn || streamSimple;
|
|
900
965
|
|
|
901
966
|
// Resolve API key (important for expiring tokens) — do this before resolving
|
|
@@ -920,12 +985,22 @@ async function streamAssistantResponse(
|
|
|
920
985
|
: harmonyAbortController.signal
|
|
921
986
|
: signal;
|
|
922
987
|
const repetitionAbortController = new AbortController();
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
988
|
+
// Owned tool calling: aborted by the stream wrapper when the model starts
|
|
989
|
+
// fabricating a `<tool_response>`, so the provider stops generating the rest of
|
|
990
|
+
// the hallucinated turn. Merged into the provider signal ONLY (not
|
|
991
|
+
// `requestSignal`), so it cancels the request without tripping the loop's
|
|
992
|
+
// external-abort handling (`abortRacePromise` / `requestSignal.aborted`).
|
|
993
|
+
const promptToolAbortController = ownedSyntax ? new AbortController() : undefined;
|
|
994
|
+
const providerAbortSignals: AbortSignal[] = [];
|
|
995
|
+
if (requestSignal) providerAbortSignals.push(requestSignal);
|
|
996
|
+
providerAbortSignals.push(repetitionAbortController.signal);
|
|
997
|
+
if (promptToolAbortController) providerAbortSignals.push(promptToolAbortController.signal);
|
|
998
|
+
const finalRequestSignal =
|
|
999
|
+
providerAbortSignals.length === 1 ? providerAbortSignals[0]! : AbortSignal.any(providerAbortSignals);
|
|
926
1000
|
const effectiveTemperature =
|
|
927
1001
|
harmonyRetryAttempt > 0 && config.temperature !== undefined ? config.temperature + 0.05 : config.temperature;
|
|
928
|
-
|
|
1002
|
+
// Owned tool calling sends no native tools, so any tool_choice would error.
|
|
1003
|
+
const effectiveToolChoice = ownedSyntax ? undefined : (dynamicToolChoice ?? config.toolChoice);
|
|
929
1004
|
const effectiveReasoning = dynamicReasoning ?? config.reasoning;
|
|
930
1005
|
const effectiveDisableReasoning = dynamicDisableReasoning ?? config.disableReasoning;
|
|
931
1006
|
|
|
@@ -970,7 +1045,7 @@ async function streamAssistantResponse(
|
|
|
970
1045
|
|
|
971
1046
|
try {
|
|
972
1047
|
return await runInActiveSpan(chatSpan, async () => {
|
|
973
|
-
|
|
1048
|
+
let response = await streamFunction(config.model, llmContext, {
|
|
974
1049
|
...config,
|
|
975
1050
|
// Hand streamSimple a resolver so its central auth-retry policy can
|
|
976
1051
|
// re-resolve on 401 / usage-limit: the initial step reuses the key
|
|
@@ -993,6 +1068,20 @@ async function streamAssistantResponse(
|
|
|
993
1068
|
signal: finalRequestSignal,
|
|
994
1069
|
onResponse: captureOnResponse,
|
|
995
1070
|
});
|
|
1071
|
+
if (promptToolWireTools && ownedSyntax) {
|
|
1072
|
+
// Re-materialize in-band tool-call text as native toolCall content blocks
|
|
1073
|
+
// so the rest of the loop executes them unchanged. When the model starts
|
|
1074
|
+
// fabricating tool results, the abort callback cancels the provider — unless
|
|
1075
|
+
// `abortOnFabricatedToolResult` is false, in which case the stream drains and
|
|
1076
|
+
// the fabricated continuation is discarded without aborting.
|
|
1077
|
+
response = wrapInbandToolStream(
|
|
1078
|
+
response,
|
|
1079
|
+
promptToolWireTools,
|
|
1080
|
+
ownedSyntax,
|
|
1081
|
+
() => promptToolAbortController?.abort(),
|
|
1082
|
+
config.abortOnFabricatedToolResult ?? true,
|
|
1083
|
+
);
|
|
1084
|
+
}
|
|
996
1085
|
|
|
997
1086
|
let partialMessage: AssistantMessage | null = null;
|
|
998
1087
|
let addedPartial = false;
|
|
@@ -1716,7 +1805,24 @@ async function executeToolCalls(
|
|
|
1716
1805
|
}
|
|
1717
1806
|
}
|
|
1718
1807
|
|
|
1719
|
-
|
|
1808
|
+
// While an interruptible tool is in flight (e.g. a `job` poll blocking on
|
|
1809
|
+
// background work), a queued steer would otherwise wait out the tool's own
|
|
1810
|
+
// window. Poll the steering queue and let checkSteering() abort the shared
|
|
1811
|
+
// tool signal so the wait returns early; the boundary dequeue below then
|
|
1812
|
+
// injects it. Gated on immediate-interrupt mode + an interruptible tool;
|
|
1813
|
+
// checkSteering is idempotent (no-op once triggered).
|
|
1814
|
+
const watchSteeringWhileRunning =
|
|
1815
|
+
shouldInterruptImmediately &&
|
|
1816
|
+
(hasSteeringMessages !== undefined || getSteeringMessages !== undefined) &&
|
|
1817
|
+
records.some(r => r.tool?.interruptible === true);
|
|
1818
|
+
const steeringWatchTimer = watchSteeringWhileRunning
|
|
1819
|
+
? setInterval(() => void checkSteering(), STEERING_INTERRUPT_POLL_MS)
|
|
1820
|
+
: undefined;
|
|
1821
|
+
try {
|
|
1822
|
+
await Promise.allSettled(tasks);
|
|
1823
|
+
} finally {
|
|
1824
|
+
if (steeringWatchTimer !== undefined) clearInterval(steeringWatchTimer);
|
|
1825
|
+
}
|
|
1720
1826
|
// Yield after batch tool execution to let GC and I/O catch up,
|
|
1721
1827
|
// especially when tool results are large (e.g. bash output).
|
|
1722
1828
|
await yieldIfDue();
|
package/src/agent.ts
CHANGED
|
@@ -22,11 +22,12 @@ import {
|
|
|
22
22
|
type ToolChoice,
|
|
23
23
|
type ToolResultMessage,
|
|
24
24
|
} from "@oh-my-pi/pi-ai";
|
|
25
|
+
import type { ToolCallSyntax } from "@oh-my-pi/pi-ai/grammar";
|
|
26
|
+
import type { HarmonyAuditEvent } from "@oh-my-pi/pi-ai/utils/harmony-leak";
|
|
25
27
|
import { getBundledModel } from "@oh-my-pi/pi-catalog/models";
|
|
26
28
|
import { logger } from "@oh-my-pi/pi-utils";
|
|
27
29
|
import { abortReasonText, agentLoop, agentLoopContinue } from "./agent-loop";
|
|
28
30
|
import type { AppendOnlyContextManager } from "./append-only-context";
|
|
29
|
-
import type { HarmonyAuditEvent } from "./harmony-leak";
|
|
30
31
|
import type {
|
|
31
32
|
AgentContext,
|
|
32
33
|
AgentEvent,
|
|
@@ -220,6 +221,15 @@ export interface AgentOptions {
|
|
|
220
221
|
|
|
221
222
|
/** Enable intent tracing schema injection/stripping in the harness. */
|
|
222
223
|
intentTracing?: boolean;
|
|
224
|
+
/** Owned tool-calling syntax. Undefined keeps provider-native tool calling. */
|
|
225
|
+
toolCallSyntax?: ToolCallSyntax;
|
|
226
|
+
/**
|
|
227
|
+
* When owned tool calling is active and the model fabricates a tool result
|
|
228
|
+
* mid-turn: `true` (default) aborts the provider request immediately; `false`
|
|
229
|
+
* drains the request and discards the fabricated continuation. Forwarded to
|
|
230
|
+
* the loop's {@link AgentLoopConfig.abortOnFabricatedToolResult}.
|
|
231
|
+
*/
|
|
232
|
+
abortOnFabricatedToolResult?: boolean;
|
|
223
233
|
/** Dynamic tool choice override, resolved per LLM call. */
|
|
224
234
|
getToolChoice?: () => ToolChoice | undefined;
|
|
225
235
|
|
|
@@ -316,6 +326,8 @@ export class Agent {
|
|
|
316
326
|
#preferWebsockets?: boolean;
|
|
317
327
|
#transformToolCallArguments?: (args: Record<string, unknown>, toolName: string) => Record<string, unknown>;
|
|
318
328
|
#intentTracing: boolean;
|
|
329
|
+
#toolCallSyntax?: ToolCallSyntax;
|
|
330
|
+
#abortOnFabricatedToolResult?: boolean;
|
|
319
331
|
#getToolChoice?: () => ToolChoice | undefined;
|
|
320
332
|
#onPayload?: SimpleStreamOptions["onPayload"];
|
|
321
333
|
#onResponse?: SimpleStreamOptions["onResponse"];
|
|
@@ -378,6 +390,8 @@ export class Agent {
|
|
|
378
390
|
this.#preferWebsockets = opts.preferWebsockets;
|
|
379
391
|
this.#transformToolCallArguments = opts.transformToolCallArguments;
|
|
380
392
|
this.#intentTracing = opts.intentTracing === true;
|
|
393
|
+
this.#toolCallSyntax = opts.toolCallSyntax;
|
|
394
|
+
this.#abortOnFabricatedToolResult = opts.abortOnFabricatedToolResult;
|
|
381
395
|
this.#getToolChoice = opts.getToolChoice;
|
|
382
396
|
this.#onAssistantMessageEvent = opts.onAssistantMessageEvent;
|
|
383
397
|
this.#onHarmonyLeak = opts.onHarmonyLeak;
|
|
@@ -1023,6 +1037,8 @@ export class Agent {
|
|
|
1023
1037
|
cursorOnToolResult,
|
|
1024
1038
|
transformToolCallArguments: this.#transformToolCallArguments,
|
|
1025
1039
|
intentTracing: this.#intentTracing,
|
|
1040
|
+
toolCallSyntax: this.#toolCallSyntax,
|
|
1041
|
+
abortOnFabricatedToolResult: this.#abortOnFabricatedToolResult,
|
|
1026
1042
|
appendOnlyContext: this.#appendOnlyContext,
|
|
1027
1043
|
beforeToolCall: this.beforeToolCall ? (ctx, signal) => this.beforeToolCall?.(ctx, signal) : undefined,
|
|
1028
1044
|
afterToolCall: this.afterToolCall ? (ctx, signal) => this.afterToolCall?.(ctx, signal) : undefined,
|
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
*/
|
|
16
16
|
|
|
17
17
|
import type { Context, Message, Tool } from "@oh-my-pi/pi-ai";
|
|
18
|
+
import type { ToolCallSyntax } from "@oh-my-pi/pi-ai/grammar";
|
|
18
19
|
import { normalizeTools } from "./agent-loop";
|
|
19
20
|
import type { AgentContext } from "./types";
|
|
20
21
|
|
|
@@ -33,6 +34,7 @@ export interface StablePrefixSnapshot {
|
|
|
33
34
|
export interface BuildOptions {
|
|
34
35
|
/** Inject the `_i` intent field into tool schemas (must match agent-loop's normalizeTools). */
|
|
35
36
|
intentTracing: boolean;
|
|
37
|
+
exampleSyntax?: ToolCallSyntax;
|
|
36
38
|
}
|
|
37
39
|
|
|
38
40
|
/**
|
|
@@ -268,7 +270,7 @@ export class AppendOnlyContextManager {
|
|
|
268
270
|
|
|
269
271
|
function takeSnapshot(context: AgentContext, options: BuildOptions): StablePrefixSnapshot {
|
|
270
272
|
const systemPrompt = [...context.systemPrompt];
|
|
271
|
-
const tools = normalizeTools(context.tools, options.intentTracing) ?? [];
|
|
273
|
+
const tools = normalizeTools(context.tools, options.intentTracing, options.exampleSyntax) ?? [];
|
|
272
274
|
return {
|
|
273
275
|
systemPrompt,
|
|
274
276
|
tools,
|
|
@@ -288,6 +290,7 @@ function computeFingerprint(systemPrompt: string[], tools: Tool[], options: Buil
|
|
|
288
290
|
cw: t.customWireName,
|
|
289
291
|
})),
|
|
290
292
|
i: options.intentTracing,
|
|
293
|
+
ex: options.exampleSyntax,
|
|
291
294
|
});
|
|
292
295
|
let hash = 0;
|
|
293
296
|
for (let i = 0; i < payload.length; i++) {
|
|
@@ -6,6 +6,7 @@
|
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
8
|
import type { ApiKey, Model } from "@oh-my-pi/pi-ai";
|
|
9
|
+
import { preferredToolSyntax } from "@oh-my-pi/pi-catalog/identity";
|
|
9
10
|
import { prompt } from "@oh-my-pi/pi-utils";
|
|
10
11
|
import { type AgentTelemetry, instrumentedCompleteSimple } from "../telemetry";
|
|
11
12
|
import type { AgentMessage } from "../types";
|
|
@@ -290,7 +291,7 @@ export async function generateBranchSummary(
|
|
|
290
291
|
// Transform to LLM-compatible messages, then serialize to text
|
|
291
292
|
// Serialization prevents the model from treating it as a conversation to continue
|
|
292
293
|
const llmMessages = (options.convertToLlm ?? defaultConvertToLlm)(messages);
|
|
293
|
-
const conversationText = serializeConversation(llmMessages);
|
|
294
|
+
const conversationText = serializeConversation(llmMessages, preferredToolSyntax(model.id));
|
|
294
295
|
|
|
295
296
|
// Build prompt
|
|
296
297
|
const instructions = customInstructions || BRANCH_SUMMARY_PROMPT;
|
|
@@ -18,6 +18,7 @@ import {
|
|
|
18
18
|
type Usage,
|
|
19
19
|
withAuth,
|
|
20
20
|
} from "@oh-my-pi/pi-ai";
|
|
21
|
+
import { preferredToolSyntax } from "@oh-my-pi/pi-catalog/identity";
|
|
21
22
|
import { clampThinkingLevelForModel } from "@oh-my-pi/pi-catalog/model-thinking";
|
|
22
23
|
import { countTokens } from "@oh-my-pi/pi-natives";
|
|
23
24
|
import { logger, prompt } from "@oh-my-pi/pi-utils";
|
|
@@ -642,7 +643,7 @@ export async function generateSummary(
|
|
|
642
643
|
// Serialize conversation to text so model doesn't try to continue it
|
|
643
644
|
// Convert to LLM messages first (handles custom app messages when caller provides a transformer).
|
|
644
645
|
const llmMessages = (options?.convertToLlm ?? defaultConvertToLlm)(currentMessages);
|
|
645
|
-
const conversationText = serializeConversation(llmMessages);
|
|
646
|
+
const conversationText = serializeConversation(llmMessages, preferredToolSyntax(model.id));
|
|
646
647
|
|
|
647
648
|
// Build the prompt with conversation wrapped in tags
|
|
648
649
|
let promptText = `<conversation>\n${conversationText}\n</conversation>\n\n`;
|
|
@@ -790,7 +791,7 @@ async function generateShortSummary(
|
|
|
790
791
|
): Promise<string> {
|
|
791
792
|
const maxTokens = Math.min(512, Math.floor(0.2 * reserveTokens));
|
|
792
793
|
const llmMessages = (options?.convertToLlm ?? defaultConvertToLlm)(recentMessages);
|
|
793
|
-
const conversationText = serializeConversation(llmMessages);
|
|
794
|
+
const conversationText = serializeConversation(llmMessages, preferredToolSyntax(model.id));
|
|
794
795
|
|
|
795
796
|
let promptText = `<conversation>\n${conversationText}\n</conversation>\n\n`;
|
|
796
797
|
if (historySummary) {
|
|
@@ -1155,7 +1156,7 @@ async function generateTurnPrefixSummary(
|
|
|
1155
1156
|
const maxTokens = Math.floor(0.5 * reserveTokens); // Smaller budget for turn prefix
|
|
1156
1157
|
|
|
1157
1158
|
const llmMessages = (options?.convertToLlm ?? defaultConvertToLlm)(messages);
|
|
1158
|
-
const conversationText = serializeConversation(llmMessages);
|
|
1159
|
+
const conversationText = serializeConversation(llmMessages, preferredToolSyntax(model.id));
|
|
1159
1160
|
const promptText = `<conversation>\n${conversationText}\n</conversation>\n\n${TURN_PREFIX_SUMMARIZATION_PROMPT}`;
|
|
1160
1161
|
const summarizationMessages = [
|
|
1161
1162
|
{
|
|
@@ -81,6 +81,16 @@ function createPrunedNotice(tokens: number): string {
|
|
|
81
81
|
return `[Output truncated - ${tokens} tokens]`;
|
|
82
82
|
}
|
|
83
83
|
|
|
84
|
+
/**
|
|
85
|
+
* Generic age-based pruning floor. Below this, blanking a result to
|
|
86
|
+
* `[Output truncated - N tokens]` recovers nothing — the placeholder itself
|
|
87
|
+
* costs ~8 tokens, so a sub-floor result grows the context (and churns the
|
|
88
|
+
* prompt cache) instead of shrinking it. Superseded/useless results keep their
|
|
89
|
+
* own rules: useless already drops no-savings candidates, superseded prunes for
|
|
90
|
+
* correctness regardless of size.
|
|
91
|
+
*/
|
|
92
|
+
const MIN_PRUNE_TOKENS = 50;
|
|
93
|
+
|
|
84
94
|
function getToolResultMessage(entry: SessionEntry): ToolResultMessage | undefined {
|
|
85
95
|
if (entry.type !== "message") return undefined;
|
|
86
96
|
const message = entry.message as AgentMessage;
|
|
@@ -271,7 +281,8 @@ export function pruneToolOutputs(entries: SessionEntry[], config: PruneConfig =
|
|
|
271
281
|
// any age).
|
|
272
282
|
const superseded = supersededMessages?.has(message) ?? false;
|
|
273
283
|
const useless = uselessMessages?.has(message) ?? false;
|
|
274
|
-
|
|
284
|
+
const tooSmall = tokens < MIN_PRUNE_TOKENS;
|
|
285
|
+
if (!superseded && !useless && (accumulatedTokens < config.protectTokens || isProtected || tooSmall)) {
|
|
275
286
|
accumulatedTokens += tokens;
|
|
276
287
|
continue;
|
|
277
288
|
}
|