@oh-my-pi/pi-agent-core 15.13.1 → 15.13.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,41 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [15.13.3] - 2026-06-15
6
+
7
+ ### Added
8
+
9
+ - Added the `interruptible` tool field: when set, the agent loop may abort the tool mid-execution to deliver a queued steering message (honored only in `immediate` interrupt mode).
10
+ - Added support for `gemini` and `gemma` as valid owned tool syntax values in environment configuration
11
+
12
+ ### Fixed
13
+
14
+ - Fixed `pruneToolOutputs` blanking tiny tool results during overflow pruning: results below `50` tokens (`MIN_PRUNE_TOKENS`) are no longer replaced with the `[Output truncated - N tokens]` placeholder, which cost more tokens than the result itself and churned the prompt cache for zero savings.
15
+
16
+ ## [15.13.2] - 2026-06-15
17
+
18
+ ### Breaking Changes
19
+
20
+ - Removed `harmony-leak` exports from the `@oh-my-pi/pi-agent-core` package entrypoint
21
+ - Replaced the experimental `promptToolCalls` agent/loop option with `toolCallSyntax`, selecting an explicit in-band tool-call grammar instead of a boolean GLM-only mode.
22
+
23
+ ### Added
24
+
25
+ - Added support for selecting owned in-band tool-call syntax via `PI_OWNED_TOOLS=<syntax>` (for example `hermes` or `qwen3`) while preserving legacy `PI_OWNED_TOOLS=1/true` as GLM mode
26
+ - Added owned in-band tool calling for multiple syntaxes (`glm`, `hermes`, `kimi`, `xml`, `anthropic`, `deepseek`, `harmony`, `pi-native`, `qwen3`). Owned mode sends no native provider tools, appends a syntax-specific prompt/catalog, re-encodes prior tool calls/results as grammar-owned text, and parses streamed model output back into canonical tool calls.
27
+ - Added tool-example folding to `normalizeTools`: when given a model's affinity syntax (resolved via `preferredToolSyntax`), it renders each tool's `examples` into an `<examples>` block in that native syntax and appends it to the wire description. Wired through both context paths (fresh build and append-only `takeSnapshot`/`build` via a new `exampleSyntax` build option), with the `_i` intent-field placeholder added to examples when intent tracing injects it.
28
+ - Added the `abortOnFabricatedToolResult` option to `AgentOptions`/`AgentLoopConfig` (default `true`): when owned tool calling is active and the model fabricates a tool result mid-turn, `true` aborts the provider request immediately while `false` lets it finish and discards the fabricated continuation.
29
+
30
+ ### Changed
31
+
32
+ - Added owned in-band syntax support to `Agent` loop configuration resolution by selecting syntax from `toolCallSyntax` or `PI_OWNED_TOOLS` when present
33
+
34
+ ### Fixed
35
+
36
+ - Fixed append-only context cache fingerprinting to account for `exampleSyntax`, so switching tool-call syntax rebuilds cached prompts with the correct injected tool examples
37
+ - Fixed owned in-band tool-calling requests to omit `toolChoice` after stripping native tools, preventing invalid tool-choice requests
38
+ - Fixed owned tool calling letting the model fabricate tool results by treating grammar-owned tool-result markers in assistant text as a hard turn boundary: calls before the fabrication are kept, fabricated results and dependent calls are dropped, and the real result is fed back on the next turn.
39
+
5
40
  ## [15.13.1] - 2026-06-15
6
41
 
7
42
  ### Added
@@ -3,6 +3,7 @@
3
3
  * Transforms to Message[] only at the LLM call boundary.
4
4
  */
5
5
  import { type Context, EventStream } from "@oh-my-pi/pi-ai";
6
+ import { type ToolCallSyntax } from "@oh-my-pi/pi-ai/grammar";
6
7
  import { type AgentRunCoverage, type AgentRunSummary } from "./run-collector";
7
8
  import type { AgentContext, AgentEvent, AgentLoopConfig, AgentMessage, StreamFn } from "./types";
8
9
  /**
@@ -52,7 +53,7 @@ export declare function agentLoopContinueDetailed(context: AgentContext, config:
52
53
  readonly detailed: () => Promise<AgentLoopDetailedResult>;
53
54
  };
54
55
  export declare const INTENT_FIELD = "_i";
55
- export declare function normalizeTools(tools: AgentContext["tools"], injectIntent: boolean): Context["tools"];
56
+ export declare function normalizeTools(tools: AgentContext["tools"], injectIntent: boolean, exampleSyntax?: ToolCallSyntax): Context["tools"];
56
57
  /** Resolve the human-readable reason an abort carried. A caller that aborts via
57
58
  * `AbortController.abort(reason)` with a string or a non-`AbortError` `Error`
58
59
  * (e.g. the coding agent's user-interrupt label) gets that text surfaced on the
@@ -1,6 +1,7 @@
1
1
  import { type ApiKeyResolveContext, type AssistantMessage, type AssistantMessageEvent, type Context, type CursorExecHandlers, type CursorToolResultHandler, type Effort, type ImageContent, type Message, type Model, type ProviderSessionState, type ServiceTier, type SimpleStreamOptions, type ThinkingBudgets, type ToolChoice } from "@oh-my-pi/pi-ai";
2
+ import type { ToolCallSyntax } from "@oh-my-pi/pi-ai/grammar";
3
+ import type { HarmonyAuditEvent } from "@oh-my-pi/pi-ai/utils/harmony-leak";
2
4
  import type { AppendOnlyContextManager } from "./append-only-context";
3
- import type { HarmonyAuditEvent } from "./harmony-leak";
4
5
  import type { AgentEvent, AgentLoopConfig, AgentMessage, AgentState, AgentTool, AgentToolContext, AsideMessage, StreamFn, ToolCallContext } from "./types";
5
6
  export declare class AgentBusyError extends Error {
6
7
  constructor(message?: string);
@@ -126,6 +127,15 @@ export interface AgentOptions {
126
127
  transformToolCallArguments?: (args: Record<string, unknown>, toolName: string) => Record<string, unknown>;
127
128
  /** Enable intent tracing schema injection/stripping in the harness. */
128
129
  intentTracing?: boolean;
130
+ /** Owned tool-calling syntax. Undefined keeps provider-native tool calling. */
131
+ toolCallSyntax?: ToolCallSyntax;
132
+ /**
133
+ * When owned tool calling is active and the model fabricates a tool result
134
+ * mid-turn: `true` (default) aborts the provider request immediately; `false`
135
+ * drains the request and discards the fabricated continuation. Forwarded to
136
+ * the loop's {@link AgentLoopConfig.abortOnFabricatedToolResult}.
137
+ */
138
+ abortOnFabricatedToolResult?: boolean;
129
139
  /** Dynamic tool choice override, resolved per LLM call. */
130
140
  getToolChoice?: () => ToolChoice | undefined;
131
141
  /**
@@ -14,6 +14,7 @@
14
14
  * message delta is a cache miss each turn.
15
15
  */
16
16
  import type { Context, Message, Tool } from "@oh-my-pi/pi-ai";
17
+ import type { ToolCallSyntax } from "@oh-my-pi/pi-ai/grammar";
17
18
  import type { AgentContext } from "./types";
18
19
  /** Frozen system prompt + tool spec snapshot. */
19
20
  export interface StablePrefixSnapshot {
@@ -25,6 +26,7 @@ export interface StablePrefixSnapshot {
25
26
  export interface BuildOptions {
26
27
  /** Inject the `_i` intent field into tool schemas (must match agent-loop's normalizeTools). */
27
28
  intentTracing: boolean;
29
+ exampleSyntax?: ToolCallSyntax;
28
30
  }
29
31
  /**
30
32
  * A frozen prefix (system prompt + tools) that produces stable byte
@@ -2,6 +2,7 @@
2
2
  * Shared utilities for compaction and branch summarization.
3
3
  */
4
4
  import type { Message } from "@oh-my-pi/pi-ai";
5
+ import { type ToolCallSyntax } from "@oh-my-pi/pi-ai/grammar";
5
6
  import type { AgentMessage } from "../types";
6
7
  export interface FileOperations {
7
8
  read: Set<string>;
@@ -44,5 +45,5 @@ export declare function upsertFileOperations(summary: string, readFiles: string[
44
45
  * This prevents the model from treating it as a conversation to continue.
45
46
  * Call convertToLlm() first to handle custom message types.
46
47
  */
47
- export declare function serializeConversation(messages: Message[]): string;
48
+ export declare function serializeConversation(messages: Message[], syntax?: ToolCallSyntax): string;
48
49
  export declare const SUMMARIZATION_SYSTEM_PROMPT: string;
@@ -2,7 +2,6 @@ export * from "./agent";
2
2
  export * from "./agent-loop";
3
3
  export * from "./append-only-context";
4
4
  export * from "./compaction";
5
- export * from "./harmony-leak";
6
5
  export * from "./proxy";
7
6
  export * from "./run-collector";
8
7
  export * from "./telemetry";
@@ -1,6 +1,7 @@
1
1
  import type { ApiKeyResolveContext, AssistantMessage, AssistantMessageEvent, AssistantMessageEventStream, Context, Effort, ImageContent, Message, Model, SimpleStreamOptions, Static, streamSimple, TextContent, Tool, ToolChoice, ToolResultMessage, TSchema } from "@oh-my-pi/pi-ai";
2
+ import type { ToolCallSyntax } from "@oh-my-pi/pi-ai/grammar";
3
+ import type { HarmonyAuditEvent } from "@oh-my-pi/pi-ai/utils/harmony-leak";
2
4
  import type { AppendOnlyContextManager } from "./append-only-context";
3
- import type { HarmonyAuditEvent } from "./harmony-leak";
4
5
  import type { AgentRunCoverage, AgentRunSummary } from "./run-collector";
5
6
  import type { AgentTelemetryConfig } from "./telemetry";
6
7
  /** Stream function - can return sync or Promise for async config lookup */
@@ -162,6 +163,27 @@ export interface AgentLoopConfig extends SimpleStreamOptions {
162
163
  * then strips from arguments before executing tools.
163
164
  */
164
165
  intentTracing?: boolean;
166
+ /**
167
+ * Owned tool calling syntax.
168
+ *
169
+ * Undefined keeps provider-native tool calling. A syntax value sends no
170
+ * native `tools`, forces `toolChoice` off, appends that syntax's tool catalog
171
+ * instructions, re-encodes prior tool calls/results as text, and parses the
172
+ * model's text output back into canonical `toolCall` blocks.
173
+ */
174
+ toolCallSyntax?: ToolCallSyntax;
175
+ /**
176
+ * When owned (in-band) tool calling is active and the model starts
177
+ * fabricating a tool result inside its own turn, control how the loop reacts:
178
+ * - `true` (default): abort the provider request immediately so it stops
179
+ * generating the hallucinated continuation (cheaper, lower latency).
180
+ * - `false`: let the request finish and silently discard everything past the
181
+ * fabrication boundary (keeps the connection alive but pays for the tokens
182
+ * the model spends on the discarded tail).
183
+ * Only meaningful when {@link toolCallSyntax} (or `PI_OWNED_TOOLS`) selects an
184
+ * owned syntax; native tool calling never fabricates results in text.
185
+ */
186
+ abortOnFabricatedToolResult?: boolean;
165
187
  /**
166
188
  * Append-only context mode — stabilizes system prompt + tool spec bytes
167
189
  * across turns so provider prefix caches hit at maximum rate.
@@ -406,6 +428,15 @@ export interface AgentTool<TParameters extends TSchema = TSchema, TDetails = any
406
428
  concurrency?: "shared" | "exclusive" | ((args: Partial<Static<TParameters>>) => "shared" | "exclusive");
407
429
  /** If true, argument validation errors are non-fatal: raw args are passed to execute() instead of returning an error to the LLM. */
408
430
  lenientArgValidation?: boolean;
431
+ /**
432
+ * If true, the agent loop may abort this tool mid-execution to deliver a
433
+ * queued steering message (instead of waiting for the tool to finish on its
434
+ * own). Set only on tools that purely *wait* and observe their abort signal
435
+ * cleanly (e.g. the `job` poll), so the abort surfaces the tool's current
436
+ * snapshot rather than corrupting a side effect. Honored only when
437
+ * `interruptMode` is "immediate".
438
+ */
439
+ interruptible?: boolean;
409
440
  /**
410
441
  * Controls how the INTENT_FIELD (`_i`) is handled for this tool.
411
442
  * - `"require"` (default): `_i` is injected and required in the parameter schema.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@oh-my-pi/pi-agent-core",
4
- "version": "15.13.1",
4
+ "version": "15.13.3",
5
5
  "description": "General-purpose agent with transport abstraction, state management, and attachment support",
6
6
  "homepage": "https://omp.sh",
7
7
  "author": "Can Boluk",
@@ -35,11 +35,11 @@
35
35
  "fmt": "biome format --write ."
36
36
  },
37
37
  "dependencies": {
38
- "@oh-my-pi/pi-ai": "15.13.1",
39
- "@oh-my-pi/pi-catalog": "15.13.1",
40
- "@oh-my-pi/pi-natives": "15.13.1",
41
- "@oh-my-pi/pi-utils": "15.13.1",
42
- "@oh-my-pi/snapcompact": "15.13.1",
38
+ "@oh-my-pi/pi-ai": "15.13.3",
39
+ "@oh-my-pi/pi-catalog": "15.13.3",
40
+ "@oh-my-pi/pi-natives": "15.13.3",
41
+ "@oh-my-pi/pi-utils": "15.13.3",
42
+ "@oh-my-pi/snapcompact": "15.13.3",
43
43
  "@opentelemetry/api": "^1.9.1"
44
44
  },
45
45
  "devDependencies": {
package/src/agent-loop.ts CHANGED
@@ -15,7 +15,13 @@ import {
15
15
  validateToolArguments,
16
16
  zodToWireSchema,
17
17
  } from "@oh-my-pi/pi-ai";
18
- import { logger, sanitizeText } from "@oh-my-pi/pi-utils";
18
+ import {
19
+ encodeInbandToolHistory,
20
+ renderInbandToolPrompt,
21
+ renderToolExamples,
22
+ type ToolCallSyntax,
23
+ wrapInbandToolStream,
24
+ } from "@oh-my-pi/pi-ai/grammar";
19
25
  import {
20
26
  createHarmonyAuditEvent,
21
27
  detectHarmonyLeakInAssistantMessage,
@@ -25,7 +31,9 @@ import {
25
31
  isHarmonyLeakMitigationTarget,
26
32
  recoverHarmonyToolCall,
27
33
  signalListLabel,
28
- } from "./harmony-leak";
34
+ } from "@oh-my-pi/pi-ai/utils/harmony-leak";
35
+ import { preferredToolSyntax } from "@oh-my-pi/pi-catalog/identity";
36
+ import { logger, sanitizeText } from "@oh-my-pi/pi-utils";
29
37
  import { type AgentRunCoverage, type AgentRunSummary, ToolCallBlockedError } from "./run-collector";
30
38
  import {
31
39
  type AgentTelemetry,
@@ -66,6 +74,14 @@ const ABORTED: unique symbol = Symbol("agent-loop-aborted");
66
74
  */
67
75
  const MAX_PAUSED_TURN_CONTINUATIONS = 8;
68
76
 
77
+ /**
78
+ * Cadence (ms) for polling queued steering while an `interruptible` tool is in
79
+ * flight, so a steer cuts the wait short instead of sitting idle until the
80
+ * tool's own window elapses. A cheap synchronous queue check; latency-bounded
81
+ * at one tick.
82
+ */
83
+ const STEERING_INTERRUPT_POLL_MS = 250;
84
+
69
85
  class HarmonyLeakInterruption extends Error {
70
86
  constructor(
71
87
  readonly detection: HarmonyDetection,
@@ -76,6 +92,27 @@ class HarmonyLeakInterruption extends Error {
76
92
  this.name = "HarmonyLeakInterruption";
77
93
  }
78
94
  }
95
+ function resolveOwnedToolSyntaxFromEnv(value: string | undefined): ToolCallSyntax | undefined {
96
+ switch (value) {
97
+ case "1":
98
+ case "true":
99
+ return "glm";
100
+ case "glm":
101
+ case "hermes":
102
+ case "kimi":
103
+ case "xml":
104
+ case "anthropic":
105
+ case "deepseek":
106
+ case "harmony":
107
+ case "pi":
108
+ case "qwen3":
109
+ case "gemini":
110
+ case "gemma":
111
+ return value;
112
+ default:
113
+ return undefined;
114
+ }
115
+ }
79
116
 
80
117
  type AssistantContentBlock = AssistantMessage["content"][number];
81
118
  type AssistantToolCallBlock = Extract<AssistantContentBlock, { type: "toolCall" }>;
@@ -491,7 +528,11 @@ function injectIntentIntoSchema(schema: unknown, mode: "require" | "optional" =
491
528
  };
492
529
  }
493
530
 
494
- export function normalizeTools(tools: AgentContext["tools"], injectIntent: boolean): Context["tools"] {
531
+ export function normalizeTools(
532
+ tools: AgentContext["tools"],
533
+ injectIntent: boolean,
534
+ exampleSyntax?: ToolCallSyntax,
535
+ ): Context["tools"] {
495
536
  injectIntent = injectIntent && Bun.env.PI_NO_INTENT !== "1";
496
537
  return tools?.map(t => {
497
538
  const intentMode = resolveIntentMode(t.intent);
@@ -505,7 +546,12 @@ export function normalizeTools(tools: AgentContext["tools"], injectIntent: boole
505
546
  }
506
547
  }
507
548
  const description = t.description ?? "";
508
- return { ...t, parameters, description };
549
+ const injectExampleIntent = injectIntent && intentMode !== "omit";
550
+ const examplesBlock = exampleSyntax
551
+ ? renderToolExamples({ ...t, parameters }, exampleSyntax, injectExampleIntent ? INTENT_FIELD : undefined)
552
+ : "";
553
+ const finalDescription = examplesBlock ? `${description}\n\n${examplesBlock}` : description;
554
+ return { ...t, parameters, description: finalDescription };
509
555
  });
510
556
  }
511
557
 
@@ -884,18 +930,37 @@ async function streamAssistantResponse(
884
930
  let llmContext: Context;
885
931
  if (config.appendOnlyContext) {
886
932
  config.appendOnlyContext.syncMessages(normalizedMessages);
887
- llmContext = config.appendOnlyContext.build(context, { intentTracing: !!config.intentTracing });
933
+ llmContext = config.appendOnlyContext.build(context, {
934
+ intentTracing: !!config.intentTracing,
935
+ exampleSyntax: preferredToolSyntax(config.model.id),
936
+ });
888
937
  } else {
889
938
  llmContext = {
890
939
  systemPrompt: context.systemPrompt,
891
940
  messages: normalizedMessages,
892
- tools: normalizeTools(context.tools, !!config.intentTracing),
941
+ tools: normalizeTools(context.tools, !!config.intentTracing, preferredToolSyntax(config.model.id)),
893
942
  };
894
943
  }
895
944
  if (config.transformProviderContext) {
896
945
  llmContext = config.transformProviderContext(llmContext, config.model);
897
946
  }
898
947
 
948
+ // Owned tool calling: take tool calls away from the provider and run them
949
+ // through the selected in-band prompt syntax. `PI_OWNED_TOOLS=1` still
950
+ // force-enables GLM; `PI_OWNED_TOOLS=<syntax>` force-enables that syntax.
951
+ const ownedSyntax: ToolCallSyntax | undefined =
952
+ config.toolCallSyntax ?? resolveOwnedToolSyntaxFromEnv(Bun.env.PI_OWNED_TOOLS);
953
+ let promptToolWireTools: Context["tools"];
954
+ if (ownedSyntax && llmContext.tools && llmContext.tools.length > 0) {
955
+ promptToolWireTools = llmContext.tools;
956
+ llmContext = {
957
+ ...llmContext,
958
+ systemPrompt: [...(llmContext.systemPrompt ?? []), renderInbandToolPrompt(promptToolWireTools, ownedSyntax)],
959
+ messages: encodeInbandToolHistory(llmContext.messages, ownedSyntax, promptToolWireTools),
960
+ tools: undefined,
961
+ };
962
+ }
963
+
899
964
  const streamFunction = streamFn || streamSimple;
900
965
 
901
966
  // Resolve API key (important for expiring tokens) — do this before resolving
@@ -920,12 +985,22 @@ async function streamAssistantResponse(
920
985
  : harmonyAbortController.signal
921
986
  : signal;
922
987
  const repetitionAbortController = new AbortController();
923
- const finalRequestSignal = requestSignal
924
- ? AbortSignal.any([requestSignal, repetitionAbortController.signal])
925
- : repetitionAbortController.signal;
988
+ // Owned tool calling: aborted by the stream wrapper when the model starts
989
+ // fabricating a `<tool_response>`, so the provider stops generating the rest of
990
+ // the hallucinated turn. Merged into the provider signal ONLY (not
991
+ // `requestSignal`), so it cancels the request without tripping the loop's
992
+ // external-abort handling (`abortRacePromise` / `requestSignal.aborted`).
993
+ const promptToolAbortController = ownedSyntax ? new AbortController() : undefined;
994
+ const providerAbortSignals: AbortSignal[] = [];
995
+ if (requestSignal) providerAbortSignals.push(requestSignal);
996
+ providerAbortSignals.push(repetitionAbortController.signal);
997
+ if (promptToolAbortController) providerAbortSignals.push(promptToolAbortController.signal);
998
+ const finalRequestSignal =
999
+ providerAbortSignals.length === 1 ? providerAbortSignals[0]! : AbortSignal.any(providerAbortSignals);
926
1000
  const effectiveTemperature =
927
1001
  harmonyRetryAttempt > 0 && config.temperature !== undefined ? config.temperature + 0.05 : config.temperature;
928
- const effectiveToolChoice = dynamicToolChoice ?? config.toolChoice;
1002
+ // Owned tool calling sends no native tools, so any tool_choice would error.
1003
+ const effectiveToolChoice = ownedSyntax ? undefined : (dynamicToolChoice ?? config.toolChoice);
929
1004
  const effectiveReasoning = dynamicReasoning ?? config.reasoning;
930
1005
  const effectiveDisableReasoning = dynamicDisableReasoning ?? config.disableReasoning;
931
1006
 
@@ -970,7 +1045,7 @@ async function streamAssistantResponse(
970
1045
 
971
1046
  try {
972
1047
  return await runInActiveSpan(chatSpan, async () => {
973
- const response = await streamFunction(config.model, llmContext, {
1048
+ let response = await streamFunction(config.model, llmContext, {
974
1049
  ...config,
975
1050
  // Hand streamSimple a resolver so its central auth-retry policy can
976
1051
  // re-resolve on 401 / usage-limit: the initial step reuses the key
@@ -993,6 +1068,20 @@ async function streamAssistantResponse(
993
1068
  signal: finalRequestSignal,
994
1069
  onResponse: captureOnResponse,
995
1070
  });
1071
+ if (promptToolWireTools && ownedSyntax) {
1072
+ // Re-materialize in-band tool-call text as native toolCall content blocks
1073
+ // so the rest of the loop executes them unchanged. When the model starts
1074
+ // fabricating tool results, the abort callback cancels the provider — unless
1075
+ // `abortOnFabricatedToolResult` is false, in which case the stream drains and
1076
+ // the fabricated continuation is discarded without aborting.
1077
+ response = wrapInbandToolStream(
1078
+ response,
1079
+ promptToolWireTools,
1080
+ ownedSyntax,
1081
+ () => promptToolAbortController?.abort(),
1082
+ config.abortOnFabricatedToolResult ?? true,
1083
+ );
1084
+ }
996
1085
 
997
1086
  let partialMessage: AssistantMessage | null = null;
998
1087
  let addedPartial = false;
@@ -1716,7 +1805,24 @@ async function executeToolCalls(
1716
1805
  }
1717
1806
  }
1718
1807
 
1719
- await Promise.allSettled(tasks);
1808
+ // While an interruptible tool is in flight (e.g. a `job` poll blocking on
1809
+ // background work), a queued steer would otherwise wait out the tool's own
1810
+ // window. Poll the steering queue and let checkSteering() abort the shared
1811
+ // tool signal so the wait returns early; the boundary dequeue below then
1812
+ // injects it. Gated on immediate-interrupt mode + an interruptible tool;
1813
+ // checkSteering is idempotent (no-op once triggered).
1814
+ const watchSteeringWhileRunning =
1815
+ shouldInterruptImmediately &&
1816
+ (hasSteeringMessages !== undefined || getSteeringMessages !== undefined) &&
1817
+ records.some(r => r.tool?.interruptible === true);
1818
+ const steeringWatchTimer = watchSteeringWhileRunning
1819
+ ? setInterval(() => void checkSteering(), STEERING_INTERRUPT_POLL_MS)
1820
+ : undefined;
1821
+ try {
1822
+ await Promise.allSettled(tasks);
1823
+ } finally {
1824
+ if (steeringWatchTimer !== undefined) clearInterval(steeringWatchTimer);
1825
+ }
1720
1826
  // Yield after batch tool execution to let GC and I/O catch up,
1721
1827
  // especially when tool results are large (e.g. bash output).
1722
1828
  await yieldIfDue();
package/src/agent.ts CHANGED
@@ -22,11 +22,12 @@ import {
22
22
  type ToolChoice,
23
23
  type ToolResultMessage,
24
24
  } from "@oh-my-pi/pi-ai";
25
+ import type { ToolCallSyntax } from "@oh-my-pi/pi-ai/grammar";
26
+ import type { HarmonyAuditEvent } from "@oh-my-pi/pi-ai/utils/harmony-leak";
25
27
  import { getBundledModel } from "@oh-my-pi/pi-catalog/models";
26
28
  import { logger } from "@oh-my-pi/pi-utils";
27
29
  import { abortReasonText, agentLoop, agentLoopContinue } from "./agent-loop";
28
30
  import type { AppendOnlyContextManager } from "./append-only-context";
29
- import type { HarmonyAuditEvent } from "./harmony-leak";
30
31
  import type {
31
32
  AgentContext,
32
33
  AgentEvent,
@@ -220,6 +221,15 @@ export interface AgentOptions {
220
221
 
221
222
  /** Enable intent tracing schema injection/stripping in the harness. */
222
223
  intentTracing?: boolean;
224
+ /** Owned tool-calling syntax. Undefined keeps provider-native tool calling. */
225
+ toolCallSyntax?: ToolCallSyntax;
226
+ /**
227
+ * When owned tool calling is active and the model fabricates a tool result
228
+ * mid-turn: `true` (default) aborts the provider request immediately; `false`
229
+ * drains the request and discards the fabricated continuation. Forwarded to
230
+ * the loop's {@link AgentLoopConfig.abortOnFabricatedToolResult}.
231
+ */
232
+ abortOnFabricatedToolResult?: boolean;
223
233
  /** Dynamic tool choice override, resolved per LLM call. */
224
234
  getToolChoice?: () => ToolChoice | undefined;
225
235
 
@@ -316,6 +326,8 @@ export class Agent {
316
326
  #preferWebsockets?: boolean;
317
327
  #transformToolCallArguments?: (args: Record<string, unknown>, toolName: string) => Record<string, unknown>;
318
328
  #intentTracing: boolean;
329
+ #toolCallSyntax?: ToolCallSyntax;
330
+ #abortOnFabricatedToolResult?: boolean;
319
331
  #getToolChoice?: () => ToolChoice | undefined;
320
332
  #onPayload?: SimpleStreamOptions["onPayload"];
321
333
  #onResponse?: SimpleStreamOptions["onResponse"];
@@ -378,6 +390,8 @@ export class Agent {
378
390
  this.#preferWebsockets = opts.preferWebsockets;
379
391
  this.#transformToolCallArguments = opts.transformToolCallArguments;
380
392
  this.#intentTracing = opts.intentTracing === true;
393
+ this.#toolCallSyntax = opts.toolCallSyntax;
394
+ this.#abortOnFabricatedToolResult = opts.abortOnFabricatedToolResult;
381
395
  this.#getToolChoice = opts.getToolChoice;
382
396
  this.#onAssistantMessageEvent = opts.onAssistantMessageEvent;
383
397
  this.#onHarmonyLeak = opts.onHarmonyLeak;
@@ -1023,6 +1037,8 @@ export class Agent {
1023
1037
  cursorOnToolResult,
1024
1038
  transformToolCallArguments: this.#transformToolCallArguments,
1025
1039
  intentTracing: this.#intentTracing,
1040
+ toolCallSyntax: this.#toolCallSyntax,
1041
+ abortOnFabricatedToolResult: this.#abortOnFabricatedToolResult,
1026
1042
  appendOnlyContext: this.#appendOnlyContext,
1027
1043
  beforeToolCall: this.beforeToolCall ? (ctx, signal) => this.beforeToolCall?.(ctx, signal) : undefined,
1028
1044
  afterToolCall: this.afterToolCall ? (ctx, signal) => this.afterToolCall?.(ctx, signal) : undefined,
@@ -15,6 +15,7 @@
15
15
  */
16
16
 
17
17
  import type { Context, Message, Tool } from "@oh-my-pi/pi-ai";
18
+ import type { ToolCallSyntax } from "@oh-my-pi/pi-ai/grammar";
18
19
  import { normalizeTools } from "./agent-loop";
19
20
  import type { AgentContext } from "./types";
20
21
 
@@ -33,6 +34,7 @@ export interface StablePrefixSnapshot {
33
34
  export interface BuildOptions {
34
35
  /** Inject the `_i` intent field into tool schemas (must match agent-loop's normalizeTools). */
35
36
  intentTracing: boolean;
37
+ exampleSyntax?: ToolCallSyntax;
36
38
  }
37
39
 
38
40
  /**
@@ -268,7 +270,7 @@ export class AppendOnlyContextManager {
268
270
 
269
271
  function takeSnapshot(context: AgentContext, options: BuildOptions): StablePrefixSnapshot {
270
272
  const systemPrompt = [...context.systemPrompt];
271
- const tools = normalizeTools(context.tools, options.intentTracing) ?? [];
273
+ const tools = normalizeTools(context.tools, options.intentTracing, options.exampleSyntax) ?? [];
272
274
  return {
273
275
  systemPrompt,
274
276
  tools,
@@ -288,6 +290,7 @@ function computeFingerprint(systemPrompt: string[], tools: Tool[], options: Buil
288
290
  cw: t.customWireName,
289
291
  })),
290
292
  i: options.intentTracing,
293
+ ex: options.exampleSyntax,
291
294
  });
292
295
  let hash = 0;
293
296
  for (let i = 0; i < payload.length; i++) {
@@ -6,6 +6,7 @@
6
6
  */
7
7
 
8
8
  import type { ApiKey, Model } from "@oh-my-pi/pi-ai";
9
+ import { preferredToolSyntax } from "@oh-my-pi/pi-catalog/identity";
9
10
  import { prompt } from "@oh-my-pi/pi-utils";
10
11
  import { type AgentTelemetry, instrumentedCompleteSimple } from "../telemetry";
11
12
  import type { AgentMessage } from "../types";
@@ -290,7 +291,7 @@ export async function generateBranchSummary(
290
291
  // Transform to LLM-compatible messages, then serialize to text
291
292
  // Serialization prevents the model from treating it as a conversation to continue
292
293
  const llmMessages = (options.convertToLlm ?? defaultConvertToLlm)(messages);
293
- const conversationText = serializeConversation(llmMessages);
294
+ const conversationText = serializeConversation(llmMessages, preferredToolSyntax(model.id));
294
295
 
295
296
  // Build prompt
296
297
  const instructions = customInstructions || BRANCH_SUMMARY_PROMPT;
@@ -18,6 +18,7 @@ import {
18
18
  type Usage,
19
19
  withAuth,
20
20
  } from "@oh-my-pi/pi-ai";
21
+ import { preferredToolSyntax } from "@oh-my-pi/pi-catalog/identity";
21
22
  import { clampThinkingLevelForModel } from "@oh-my-pi/pi-catalog/model-thinking";
22
23
  import { countTokens } from "@oh-my-pi/pi-natives";
23
24
  import { logger, prompt } from "@oh-my-pi/pi-utils";
@@ -642,7 +643,7 @@ export async function generateSummary(
642
643
  // Serialize conversation to text so model doesn't try to continue it
643
644
  // Convert to LLM messages first (handles custom app messages when caller provides a transformer).
644
645
  const llmMessages = (options?.convertToLlm ?? defaultConvertToLlm)(currentMessages);
645
- const conversationText = serializeConversation(llmMessages);
646
+ const conversationText = serializeConversation(llmMessages, preferredToolSyntax(model.id));
646
647
 
647
648
  // Build the prompt with conversation wrapped in tags
648
649
  let promptText = `<conversation>\n${conversationText}\n</conversation>\n\n`;
@@ -790,7 +791,7 @@ async function generateShortSummary(
790
791
  ): Promise<string> {
791
792
  const maxTokens = Math.min(512, Math.floor(0.2 * reserveTokens));
792
793
  const llmMessages = (options?.convertToLlm ?? defaultConvertToLlm)(recentMessages);
793
- const conversationText = serializeConversation(llmMessages);
794
+ const conversationText = serializeConversation(llmMessages, preferredToolSyntax(model.id));
794
795
 
795
796
  let promptText = `<conversation>\n${conversationText}\n</conversation>\n\n`;
796
797
  if (historySummary) {
@@ -1155,7 +1156,7 @@ async function generateTurnPrefixSummary(
1155
1156
  const maxTokens = Math.floor(0.5 * reserveTokens); // Smaller budget for turn prefix
1156
1157
 
1157
1158
  const llmMessages = (options?.convertToLlm ?? defaultConvertToLlm)(messages);
1158
- const conversationText = serializeConversation(llmMessages);
1159
+ const conversationText = serializeConversation(llmMessages, preferredToolSyntax(model.id));
1159
1160
  const promptText = `<conversation>\n${conversationText}\n</conversation>\n\n${TURN_PREFIX_SUMMARIZATION_PROMPT}`;
1160
1161
  const summarizationMessages = [
1161
1162
  {
@@ -81,6 +81,16 @@ function createPrunedNotice(tokens: number): string {
81
81
  return `[Output truncated - ${tokens} tokens]`;
82
82
  }
83
83
 
84
+ /**
85
+ * Generic age-based pruning floor. Below this, blanking a result to
86
+ * `[Output truncated - N tokens]` recovers nothing — the placeholder itself
87
+ * costs ~8 tokens, so a sub-floor result grows the context (and churns the
88
+ * prompt cache) instead of shrinking it. Superseded/useless results keep their
89
+ * own rules: useless already drops no-savings candidates, superseded prunes for
90
+ * correctness regardless of size.
91
+ */
92
+ const MIN_PRUNE_TOKENS = 50;
93
+
84
94
  function getToolResultMessage(entry: SessionEntry): ToolResultMessage | undefined {
85
95
  if (entry.type !== "message") return undefined;
86
96
  const message = entry.message as AgentMessage;
@@ -271,7 +281,8 @@ export function pruneToolOutputs(entries: SessionEntry[], config: PruneConfig =
271
281
  // any age).
272
282
  const superseded = supersededMessages?.has(message) ?? false;
273
283
  const useless = uselessMessages?.has(message) ?? false;
274
- if (!superseded && !useless && (accumulatedTokens < config.protectTokens || isProtected)) {
284
+ const tooSmall = tokens < MIN_PRUNE_TOKENS;
285
+ if (!superseded && !useless && (accumulatedTokens < config.protectTokens || isProtected || tooSmall)) {
275
286
  accumulatedTokens += tokens;
276
287
  continue;
277
288
  }