npm - @oh-my-pi/pi-agent-core - Versions diffs - 15.13.1 → 15.13.3 - Mend

@oh-my-pi/pi-agent-core 15.13.1 → 15.13.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/CHANGELOG.md +35 -0
package/dist/types/agent-loop.d.ts +2 -1
package/dist/types/agent.d.ts +11 -1
package/dist/types/append-only-context.d.ts +2 -0
package/dist/types/compaction/utils.d.ts +2 -1
package/dist/types/index.d.ts +0 -1
package/dist/types/types.d.ts +32 -1
package/package.json +6 -6
package/src/agent-loop.ts +118 -12
package/src/agent.ts +17 -1
package/src/append-only-context.ts +4 -1
package/src/compaction/branch-summarization.ts +2 -1
package/src/compaction/compaction.ts +4 -3
package/src/compaction/pruning.ts +12 -1
package/src/compaction/utils.ts +44 -11
package/src/index.ts +0 -1
package/src/types.ts +32 -1
package/dist/types/harmony-leak.d.ts +0 -118
package/src/harmony-leak.ts +0 -456

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,41 @@
 ## [Unreleased]
+## [15.13.3] - 2026-06-15
+### Added
+- Added the `interruptible` tool field: when set, the agent loop may abort the tool mid-execution to deliver a queued steering message (honored only in `immediate` interrupt mode).
+- Added support for `gemini` and `gemma` as valid owned tool syntax values in environment configuration
+### Fixed
+- Fixed `pruneToolOutputs` blanking tiny tool results during overflow pruning: results below `50` tokens (`MIN_PRUNE_TOKENS`) are no longer replaced with the `[Output truncated - N tokens]` placeholder, which cost more tokens than the result itself and churned the prompt cache for zero savings.
+## [15.13.2] - 2026-06-15
+### Breaking Changes
+- Removed `harmony-leak` exports from the `@oh-my-pi/pi-agent-core` package entrypoint
+- Replaced the experimental `promptToolCalls` agent/loop option with `toolCallSyntax`, selecting an explicit in-band tool-call grammar instead of a boolean GLM-only mode.
+### Added
+- Added support for selecting owned in-band tool-call syntax via `PI_OWNED_TOOLS=<syntax>` (for example `hermes` or `qwen3`) while preserving legacy `PI_OWNED_TOOLS=1/true` as GLM mode
+- Added owned in-band tool calling for multiple syntaxes (`glm`, `hermes`, `kimi`, `xml`, `anthropic`, `deepseek`, `harmony`, `pi-native`, `qwen3`). Owned mode sends no native provider tools, appends a syntax-specific prompt/catalog, re-encodes prior tool calls/results as grammar-owned text, and parses streamed model output back into canonical tool calls.
+- Added tool-example folding to `normalizeTools`: when given a model's affinity syntax (resolved via `preferredToolSyntax`), it renders each tool's `examples` into an `<examples>` block in that native syntax and appends it to the wire description. Wired through both context paths (fresh build and append-only `takeSnapshot`/`build` via a new `exampleSyntax` build option), with the `_i` intent-field placeholder added to examples when intent tracing injects it.
+- Added the `abortOnFabricatedToolResult` option to `AgentOptions`/`AgentLoopConfig` (default `true`): when owned tool calling is active and the model fabricates a tool result mid-turn, `true` aborts the provider request immediately while `false` lets it finish and discards the fabricated continuation.
+### Changed
+- Added owned in-band syntax support to `Agent` loop configuration resolution by selecting syntax from `toolCallSyntax` or `PI_OWNED_TOOLS` when present
+### Fixed
+- Fixed append-only context cache fingerprinting to account for `exampleSyntax`, so switching tool-call syntax rebuilds cached prompts with the correct injected tool examples
+- Fixed owned in-band tool-calling requests to omit `toolChoice` after stripping native tools, preventing invalid tool-choice requests
+- Fixed owned tool calling letting the model fabricate tool results by treating grammar-owned tool-result markers in assistant text as a hard turn boundary: calls before the fabrication are kept, fabricated results and dependent calls are dropped, and the real result is fed back on the next turn.
 ## [15.13.1] - 2026-06-15
 ### Added

package/dist/types/agent-loop.d.ts CHANGED Viewed

@@ -3,6 +3,7 @@
  * Transforms to Message[] only at the LLM call boundary.
  */
 import { type Context, EventStream } from "@oh-my-pi/pi-ai";
+import { type ToolCallSyntax } from "@oh-my-pi/pi-ai/grammar";
 import { type AgentRunCoverage, type AgentRunSummary } from "./run-collector";
 import type { AgentContext, AgentEvent, AgentLoopConfig, AgentMessage, StreamFn } from "./types";
 /**
@@ -52,7 +53,7 @@ export declare function agentLoopContinueDetailed(context: AgentContext, config:
     readonly detailed: () => Promise<AgentLoopDetailedResult>;
 };
 export declare const INTENT_FIELD = "_i";
-export declare function normalizeTools(tools: AgentContext["tools"], injectIntent: boolean): Context["tools"];
+export declare function normalizeTools(tools: AgentContext["tools"], injectIntent: boolean, exampleSyntax?: ToolCallSyntax): Context["tools"];
 /** Resolve the human-readable reason an abort carried. A caller that aborts via
  *  `AbortController.abort(reason)` with a string or a non-`AbortError` `Error`
  *  (e.g. the coding agent's user-interrupt label) gets that text surfaced on the

package/dist/types/agent.d.ts CHANGED Viewed

@@ -1,6 +1,7 @@
 import { type ApiKeyResolveContext, type AssistantMessage, type AssistantMessageEvent, type Context, type CursorExecHandlers, type CursorToolResultHandler, type Effort, type ImageContent, type Message, type Model, type ProviderSessionState, type ServiceTier, type SimpleStreamOptions, type ThinkingBudgets, type ToolChoice } from "@oh-my-pi/pi-ai";
+import type { ToolCallSyntax } from "@oh-my-pi/pi-ai/grammar";
+import type { HarmonyAuditEvent } from "@oh-my-pi/pi-ai/utils/harmony-leak";
 import type { AppendOnlyContextManager } from "./append-only-context";
-import type { HarmonyAuditEvent } from "./harmony-leak";
 import type { AgentEvent, AgentLoopConfig, AgentMessage, AgentState, AgentTool, AgentToolContext, AsideMessage, StreamFn, ToolCallContext } from "./types";
 export declare class AgentBusyError extends Error {
     constructor(message?: string);
@@ -126,6 +127,15 @@ export interface AgentOptions {
     transformToolCallArguments?: (args: Record<string, unknown>, toolName: string) => Record<string, unknown>;
     /** Enable intent tracing schema injection/stripping in the harness. */
     intentTracing?: boolean;
+    /** Owned tool-calling syntax. Undefined keeps provider-native tool calling. */
+    toolCallSyntax?: ToolCallSyntax;
+    /**
+     * When owned tool calling is active and the model fabricates a tool result
+     * mid-turn: `true` (default) aborts the provider request immediately; `false`
+     * drains the request and discards the fabricated continuation. Forwarded to
+     * the loop's {@link AgentLoopConfig.abortOnFabricatedToolResult}.
+     */
+    abortOnFabricatedToolResult?: boolean;
     /** Dynamic tool choice override, resolved per LLM call. */
     getToolChoice?: () => ToolChoice | undefined;
     /**

package/dist/types/append-only-context.d.ts CHANGED Viewed

@@ -14,6 +14,7 @@
  *    message delta is a cache miss each turn.
  */
 import type { Context, Message, Tool } from "@oh-my-pi/pi-ai";
+import type { ToolCallSyntax } from "@oh-my-pi/pi-ai/grammar";
 import type { AgentContext } from "./types";
 /** Frozen system prompt + tool spec snapshot. */
 export interface StablePrefixSnapshot {
@@ -25,6 +26,7 @@ export interface StablePrefixSnapshot {
 export interface BuildOptions {
     /** Inject the `_i` intent field into tool schemas (must match agent-loop's normalizeTools). */
     intentTracing: boolean;
+    exampleSyntax?: ToolCallSyntax;
 }
 /**
  * A frozen prefix (system prompt + tools) that produces stable byte

package/dist/types/compaction/utils.d.ts CHANGED Viewed

@@ -2,6 +2,7 @@
  * Shared utilities for compaction and branch summarization.
  */
 import type { Message } from "@oh-my-pi/pi-ai";
+import { type ToolCallSyntax } from "@oh-my-pi/pi-ai/grammar";
 import type { AgentMessage } from "../types";
 export interface FileOperations {
     read: Set<string>;
@@ -44,5 +45,5 @@ export declare function upsertFileOperations(summary: string, readFiles: string[
  * This prevents the model from treating it as a conversation to continue.
  * Call convertToLlm() first to handle custom message types.
  */
-export declare function serializeConversation(messages: Message[]): string;
+export declare function serializeConversation(messages: Message[], syntax?: ToolCallSyntax): string;
 export declare const SUMMARIZATION_SYSTEM_PROMPT: string;

package/dist/types/index.d.ts CHANGED Viewed

@@ -2,7 +2,6 @@ export * from "./agent";
 export * from "./agent-loop";
 export * from "./append-only-context";
 export * from "./compaction";
-export * from "./harmony-leak";
 export * from "./proxy";
 export * from "./run-collector";
 export * from "./telemetry";

package/dist/types/types.d.ts CHANGED Viewed

@@ -1,6 +1,7 @@
 import type { ApiKeyResolveContext, AssistantMessage, AssistantMessageEvent, AssistantMessageEventStream, Context, Effort, ImageContent, Message, Model, SimpleStreamOptions, Static, streamSimple, TextContent, Tool, ToolChoice, ToolResultMessage, TSchema } from "@oh-my-pi/pi-ai";
+import type { ToolCallSyntax } from "@oh-my-pi/pi-ai/grammar";
+import type { HarmonyAuditEvent } from "@oh-my-pi/pi-ai/utils/harmony-leak";
 import type { AppendOnlyContextManager } from "./append-only-context";
-import type { HarmonyAuditEvent } from "./harmony-leak";
 import type { AgentRunCoverage, AgentRunSummary } from "./run-collector";
 import type { AgentTelemetryConfig } from "./telemetry";
 /** Stream function - can return sync or Promise for async config lookup */
@@ -162,6 +163,27 @@ export interface AgentLoopConfig extends SimpleStreamOptions {
      * then strips from arguments before executing tools.
      */
     intentTracing?: boolean;
+    /**
+     * Owned tool calling syntax.
+     *
+     * Undefined keeps provider-native tool calling. A syntax value sends no
+     * native `tools`, forces `toolChoice` off, appends that syntax's tool catalog
+     * instructions, re-encodes prior tool calls/results as text, and parses the
+     * model's text output back into canonical `toolCall` blocks.
+     */
+    toolCallSyntax?: ToolCallSyntax;
+    /**
+     * When owned (in-band) tool calling is active and the model starts
+     * fabricating a tool result inside its own turn, control how the loop reacts:
+     * - `true` (default): abort the provider request immediately so it stops
+     *   generating the hallucinated continuation (cheaper, lower latency).
+     * - `false`: let the request finish and silently discard everything past the
+     *   fabrication boundary (keeps the connection alive but pays for the tokens
+     *   the model spends on the discarded tail).
+     * Only meaningful when {@link toolCallSyntax} (or `PI_OWNED_TOOLS`) selects an
+     * owned syntax; native tool calling never fabricates results in text.
+     */
+    abortOnFabricatedToolResult?: boolean;
     /**
      * Append-only context mode — stabilizes system prompt + tool spec bytes
      * across turns so provider prefix caches hit at maximum rate.
@@ -406,6 +428,15 @@ export interface AgentTool<TParameters extends TSchema = TSchema, TDetails = any
     concurrency?: "shared" | "exclusive" | ((args: Partial<Static<TParameters>>) => "shared" | "exclusive");
     /** If true, argument validation errors are non-fatal: raw args are passed to execute() instead of returning an error to the LLM. */
     lenientArgValidation?: boolean;
+    /**
+     * If true, the agent loop may abort this tool mid-execution to deliver a
+     * queued steering message (instead of waiting for the tool to finish on its
+     * own). Set only on tools that purely *wait* and observe their abort signal
+     * cleanly (e.g. the `job` poll), so the abort surfaces the tool's current
+     * snapshot rather than corrupting a side effect. Honored only when
+     * `interruptMode` is "immediate".
+     */
+    interruptible?: boolean;
     /**
      * Controls how the INTENT_FIELD (`_i`) is handled for this tool.
      * - `"require"` (default): `_i` is injected and required in the parameter schema.

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
 	"type": "module",
 	"name": "@oh-my-pi/pi-agent-core",
-	"version": "15.13.1",
+	"version": "15.13.3",
 	"description": "General-purpose agent with transport abstraction, state management, and attachment support",
 	"homepage": "https://omp.sh",
 	"author": "Can Boluk",
@@ -35,11 +35,11 @@
 		"fmt": "biome format --write ."
 	},
 	"dependencies": {
-		"@oh-my-pi/pi-ai": "15.13.1",
-		"@oh-my-pi/pi-catalog": "15.13.1",
-		"@oh-my-pi/pi-natives": "15.13.1",
-		"@oh-my-pi/pi-utils": "15.13.1",
-		"@oh-my-pi/snapcompact": "15.13.1",
+		"@oh-my-pi/pi-ai": "15.13.3",
+		"@oh-my-pi/pi-catalog": "15.13.3",
+		"@oh-my-pi/pi-natives": "15.13.3",
+		"@oh-my-pi/pi-utils": "15.13.3",
+		"@oh-my-pi/snapcompact": "15.13.3",
 		"@opentelemetry/api": "^1.9.1"
 	},
 	"devDependencies": {

package/src/agent-loop.ts CHANGED Viewed

@@ -15,7 +15,13 @@ import {
 	validateToolArguments,
 	zodToWireSchema,
 } from "@oh-my-pi/pi-ai";
-import { logger, sanitizeText } from "@oh-my-pi/pi-utils";
+import {
+	encodeInbandToolHistory,
+	renderInbandToolPrompt,
+	renderToolExamples,
+	type ToolCallSyntax,
+	wrapInbandToolStream,
+} from "@oh-my-pi/pi-ai/grammar";
 import {
 	createHarmonyAuditEvent,
 	detectHarmonyLeakInAssistantMessage,
@@ -25,7 +31,9 @@ import {
 	isHarmonyLeakMitigationTarget,
 	recoverHarmonyToolCall,
 	signalListLabel,
-} from "./harmony-leak";
+} from "@oh-my-pi/pi-ai/utils/harmony-leak";
+import { preferredToolSyntax } from "@oh-my-pi/pi-catalog/identity";
+import { logger, sanitizeText } from "@oh-my-pi/pi-utils";
 import { type AgentRunCoverage, type AgentRunSummary, ToolCallBlockedError } from "./run-collector";
 import {
 	type AgentTelemetry,
@@ -66,6 +74,14 @@ const ABORTED: unique symbol = Symbol("agent-loop-aborted");
  */
 const MAX_PAUSED_TURN_CONTINUATIONS = 8;
+/**
+ * Cadence (ms) for polling queued steering while an `interruptible` tool is in
+ * flight, so a steer cuts the wait short instead of sitting idle until the
+ * tool's own window elapses. A cheap synchronous queue check; latency-bounded
+ * at one tick.
+ */
+const STEERING_INTERRUPT_POLL_MS = 250;
 class HarmonyLeakInterruption extends Error {
 	constructor(
 		readonly detection: HarmonyDetection,
@@ -76,6 +92,27 @@ class HarmonyLeakInterruption extends Error {
 		this.name = "HarmonyLeakInterruption";
 	}
 }
+function resolveOwnedToolSyntaxFromEnv(value: string | undefined): ToolCallSyntax | undefined {
+	switch (value) {
+		case "1":
+		case "true":
+			return "glm";
+		case "glm":
+		case "hermes":
+		case "kimi":
+		case "xml":
+		case "anthropic":
+		case "deepseek":
+		case "harmony":
+		case "pi":
+		case "qwen3":
+		case "gemini":
+		case "gemma":
+			return value;
+		default:
+			return undefined;
+	}
+}
 type AssistantContentBlock = AssistantMessage["content"][number];
 type AssistantToolCallBlock = Extract<AssistantContentBlock, { type: "toolCall" }>;
@@ -491,7 +528,11 @@ function injectIntentIntoSchema(schema: unknown, mode: "require" | "optional" =
 	};
 }
-export function normalizeTools(tools: AgentContext["tools"], injectIntent: boolean): Context["tools"] {
+export function normalizeTools(
+	tools: AgentContext["tools"],
+	injectIntent: boolean,
+	exampleSyntax?: ToolCallSyntax,
+): Context["tools"] {
 	injectIntent = injectIntent && Bun.env.PI_NO_INTENT !== "1";
 	return tools?.map(t => {
 		const intentMode = resolveIntentMode(t.intent);
@@ -505,7 +546,12 @@ export function normalizeTools(tools: AgentContext["tools"], injectIntent: boole
 			}
 		}
 		const description = t.description ?? "";
-		return { ...t, parameters, description };
+		const injectExampleIntent = injectIntent && intentMode !== "omit";
+		const examplesBlock = exampleSyntax
+			? renderToolExamples({ ...t, parameters }, exampleSyntax, injectExampleIntent ? INTENT_FIELD : undefined)
+			: "";
+		const finalDescription = examplesBlock ? `${description}\n\n${examplesBlock}` : description;
+		return { ...t, parameters, description: finalDescription };
 	});
 }
@@ -884,18 +930,37 @@ async function streamAssistantResponse(
 	let llmContext: Context;
 	if (config.appendOnlyContext) {
 		config.appendOnlyContext.syncMessages(normalizedMessages);
-		llmContext = config.appendOnlyContext.build(context, { intentTracing: !!config.intentTracing });
+		llmContext = config.appendOnlyContext.build(context, {
+			intentTracing: !!config.intentTracing,
+			exampleSyntax: preferredToolSyntax(config.model.id),
+		});
 	} else {
 		llmContext = {
 			systemPrompt: context.systemPrompt,
 			messages: normalizedMessages,
-			tools: normalizeTools(context.tools, !!config.intentTracing),
+			tools: normalizeTools(context.tools, !!config.intentTracing, preferredToolSyntax(config.model.id)),
 		};
 	}
 	if (config.transformProviderContext) {
 		llmContext = config.transformProviderContext(llmContext, config.model);
 	}
+	// Owned tool calling: take tool calls away from the provider and run them
+	// through the selected in-band prompt syntax. `PI_OWNED_TOOLS=1` still
+	// force-enables GLM; `PI_OWNED_TOOLS=<syntax>` force-enables that syntax.
+	const ownedSyntax: ToolCallSyntax | undefined =
+		config.toolCallSyntax ?? resolveOwnedToolSyntaxFromEnv(Bun.env.PI_OWNED_TOOLS);
+	let promptToolWireTools: Context["tools"];
+	if (ownedSyntax && llmContext.tools && llmContext.tools.length > 0) {
+		promptToolWireTools = llmContext.tools;
+		llmContext = {
+			...llmContext,
+			systemPrompt: [...(llmContext.systemPrompt ?? []), renderInbandToolPrompt(promptToolWireTools, ownedSyntax)],
+			messages: encodeInbandToolHistory(llmContext.messages, ownedSyntax, promptToolWireTools),
+			tools: undefined,
+		};
+	}
 	const streamFunction = streamFn || streamSimple;
 	// Resolve API key (important for expiring tokens) — do this before resolving
@@ -920,12 +985,22 @@ async function streamAssistantResponse(
 			: harmonyAbortController.signal
 		: signal;
 	const repetitionAbortController = new AbortController();
-	const finalRequestSignal = requestSignal
-		? AbortSignal.any([requestSignal, repetitionAbortController.signal])
-		: repetitionAbortController.signal;
+	// Owned tool calling: aborted by the stream wrapper when the model starts
+	// fabricating a `<tool_response>`, so the provider stops generating the rest of
+	// the hallucinated turn. Merged into the provider signal ONLY (not
+	// `requestSignal`), so it cancels the request without tripping the loop's
+	// external-abort handling (`abortRacePromise` / `requestSignal.aborted`).
+	const promptToolAbortController = ownedSyntax ? new AbortController() : undefined;
+	const providerAbortSignals: AbortSignal[] = [];
+	if (requestSignal) providerAbortSignals.push(requestSignal);
+	providerAbortSignals.push(repetitionAbortController.signal);
+	if (promptToolAbortController) providerAbortSignals.push(promptToolAbortController.signal);
+	const finalRequestSignal =
+		providerAbortSignals.length === 1 ? providerAbortSignals[0]! : AbortSignal.any(providerAbortSignals);
 	const effectiveTemperature =
 		harmonyRetryAttempt > 0 && config.temperature !== undefined ? config.temperature + 0.05 : config.temperature;
-	const effectiveToolChoice = dynamicToolChoice ?? config.toolChoice;
+	// Owned tool calling sends no native tools, so any tool_choice would error.
+	const effectiveToolChoice = ownedSyntax ? undefined : (dynamicToolChoice ?? config.toolChoice);
 	const effectiveReasoning = dynamicReasoning ?? config.reasoning;
 	const effectiveDisableReasoning = dynamicDisableReasoning ?? config.disableReasoning;
@@ -970,7 +1045,7 @@ async function streamAssistantResponse(
 	try {
 		return await runInActiveSpan(chatSpan, async () => {
-			const response = await streamFunction(config.model, llmContext, {
+			let response = await streamFunction(config.model, llmContext, {
 				...config,
 				// Hand streamSimple a resolver so its central auth-retry policy can
 				// re-resolve on 401 / usage-limit: the initial step reuses the key
@@ -993,6 +1068,20 @@ async function streamAssistantResponse(
 				signal: finalRequestSignal,
 				onResponse: captureOnResponse,
 			});
+			if (promptToolWireTools && ownedSyntax) {
+				// Re-materialize in-band tool-call text as native toolCall content blocks
+				// so the rest of the loop executes them unchanged. When the model starts
+				// fabricating tool results, the abort callback cancels the provider — unless
+				// `abortOnFabricatedToolResult` is false, in which case the stream drains and
+				// the fabricated continuation is discarded without aborting.
+				response = wrapInbandToolStream(
+					response,
+					promptToolWireTools,
+					ownedSyntax,
+					() => promptToolAbortController?.abort(),
+					config.abortOnFabricatedToolResult ?? true,
+				);
+			}
 			let partialMessage: AssistantMessage | null = null;
 			let addedPartial = false;
@@ -1716,7 +1805,24 @@ async function executeToolCalls(
 		}
 	}
-	await Promise.allSettled(tasks);
+	// While an interruptible tool is in flight (e.g. a `job` poll blocking on
+	// background work), a queued steer would otherwise wait out the tool's own
+	// window. Poll the steering queue and let checkSteering() abort the shared
+	// tool signal so the wait returns early; the boundary dequeue below then
+	// injects it. Gated on immediate-interrupt mode + an interruptible tool;
+	// checkSteering is idempotent (no-op once triggered).
+	const watchSteeringWhileRunning =
+		shouldInterruptImmediately &&
+		(hasSteeringMessages !== undefined || getSteeringMessages !== undefined) &&
+		records.some(r => r.tool?.interruptible === true);
+	const steeringWatchTimer = watchSteeringWhileRunning
+		? setInterval(() => void checkSteering(), STEERING_INTERRUPT_POLL_MS)
+		: undefined;
+	try {
+		await Promise.allSettled(tasks);
+	} finally {
+		if (steeringWatchTimer !== undefined) clearInterval(steeringWatchTimer);
+	}
 	// Yield after batch tool execution to let GC and I/O catch up,
 	// especially when tool results are large (e.g. bash output).
 	await yieldIfDue();

package/src/agent.ts CHANGED Viewed

@@ -22,11 +22,12 @@ import {
 	type ToolChoice,
 	type ToolResultMessage,
 } from "@oh-my-pi/pi-ai";
+import type { ToolCallSyntax } from "@oh-my-pi/pi-ai/grammar";
+import type { HarmonyAuditEvent } from "@oh-my-pi/pi-ai/utils/harmony-leak";
 import { getBundledModel } from "@oh-my-pi/pi-catalog/models";
 import { logger } from "@oh-my-pi/pi-utils";
 import { abortReasonText, agentLoop, agentLoopContinue } from "./agent-loop";
 import type { AppendOnlyContextManager } from "./append-only-context";
-import type { HarmonyAuditEvent } from "./harmony-leak";
 import type {
 	AgentContext,
 	AgentEvent,
@@ -220,6 +221,15 @@ export interface AgentOptions {
 	/** Enable intent tracing schema injection/stripping in the harness. */
 	intentTracing?: boolean;
+	/** Owned tool-calling syntax. Undefined keeps provider-native tool calling. */
+	toolCallSyntax?: ToolCallSyntax;
+	/**
+	 * When owned tool calling is active and the model fabricates a tool result
+	 * mid-turn: `true` (default) aborts the provider request immediately; `false`
+	 * drains the request and discards the fabricated continuation. Forwarded to
+	 * the loop's {@link AgentLoopConfig.abortOnFabricatedToolResult}.
+	 */
+	abortOnFabricatedToolResult?: boolean;
 	/** Dynamic tool choice override, resolved per LLM call. */
 	getToolChoice?: () => ToolChoice | undefined;
@@ -316,6 +326,8 @@ export class Agent {
 	#preferWebsockets?: boolean;
 	#transformToolCallArguments?: (args: Record<string, unknown>, toolName: string) => Record<string, unknown>;
 	#intentTracing: boolean;
+	#toolCallSyntax?: ToolCallSyntax;
+	#abortOnFabricatedToolResult?: boolean;
 	#getToolChoice?: () => ToolChoice | undefined;
 	#onPayload?: SimpleStreamOptions["onPayload"];
 	#onResponse?: SimpleStreamOptions["onResponse"];
@@ -378,6 +390,8 @@ export class Agent {
 		this.#preferWebsockets = opts.preferWebsockets;
 		this.#transformToolCallArguments = opts.transformToolCallArguments;
 		this.#intentTracing = opts.intentTracing === true;
+		this.#toolCallSyntax = opts.toolCallSyntax;
+		this.#abortOnFabricatedToolResult = opts.abortOnFabricatedToolResult;
 		this.#getToolChoice = opts.getToolChoice;
 		this.#onAssistantMessageEvent = opts.onAssistantMessageEvent;
 		this.#onHarmonyLeak = opts.onHarmonyLeak;
@@ -1023,6 +1037,8 @@ export class Agent {
 			cursorOnToolResult,
 			transformToolCallArguments: this.#transformToolCallArguments,
 			intentTracing: this.#intentTracing,
+			toolCallSyntax: this.#toolCallSyntax,
+			abortOnFabricatedToolResult: this.#abortOnFabricatedToolResult,
 			appendOnlyContext: this.#appendOnlyContext,
 			beforeToolCall: this.beforeToolCall ? (ctx, signal) => this.beforeToolCall?.(ctx, signal) : undefined,
 			afterToolCall: this.afterToolCall ? (ctx, signal) => this.afterToolCall?.(ctx, signal) : undefined,

package/src/append-only-context.ts CHANGED Viewed

@@ -15,6 +15,7 @@
  */
 import type { Context, Message, Tool } from "@oh-my-pi/pi-ai";
+import type { ToolCallSyntax } from "@oh-my-pi/pi-ai/grammar";
 import { normalizeTools } from "./agent-loop";
 import type { AgentContext } from "./types";
@@ -33,6 +34,7 @@ export interface StablePrefixSnapshot {
 export interface BuildOptions {
 	/** Inject the `_i` intent field into tool schemas (must match agent-loop's normalizeTools). */
 	intentTracing: boolean;
+	exampleSyntax?: ToolCallSyntax;
 }
 /**
@@ -268,7 +270,7 @@ export class AppendOnlyContextManager {
 function takeSnapshot(context: AgentContext, options: BuildOptions): StablePrefixSnapshot {
 	const systemPrompt = [...context.systemPrompt];
-	const tools = normalizeTools(context.tools, options.intentTracing) ?? [];
+	const tools = normalizeTools(context.tools, options.intentTracing, options.exampleSyntax) ?? [];
 	return {
 		systemPrompt,
 		tools,
@@ -288,6 +290,7 @@ function computeFingerprint(systemPrompt: string[], tools: Tool[], options: Buil
 			cw: t.customWireName,
 		})),
 		i: options.intentTracing,
+		ex: options.exampleSyntax,
 	});
 	let hash = 0;
 	for (let i = 0; i < payload.length; i++) {

package/src/compaction/branch-summarization.ts CHANGED Viewed

@@ -6,6 +6,7 @@
  */
 import type { ApiKey, Model } from "@oh-my-pi/pi-ai";
+import { preferredToolSyntax } from "@oh-my-pi/pi-catalog/identity";
 import { prompt } from "@oh-my-pi/pi-utils";
 import { type AgentTelemetry, instrumentedCompleteSimple } from "../telemetry";
 import type { AgentMessage } from "../types";
@@ -290,7 +291,7 @@ export async function generateBranchSummary(
 	// Transform to LLM-compatible messages, then serialize to text
 	// Serialization prevents the model from treating it as a conversation to continue
 	const llmMessages = (options.convertToLlm ?? defaultConvertToLlm)(messages);
-	const conversationText = serializeConversation(llmMessages);
+	const conversationText = serializeConversation(llmMessages, preferredToolSyntax(model.id));
 	// Build prompt
 	const instructions = customInstructions || BRANCH_SUMMARY_PROMPT;

package/src/compaction/compaction.ts CHANGED Viewed

@@ -18,6 +18,7 @@ import {
 	type Usage,
 	withAuth,
 } from "@oh-my-pi/pi-ai";
+import { preferredToolSyntax } from "@oh-my-pi/pi-catalog/identity";
 import { clampThinkingLevelForModel } from "@oh-my-pi/pi-catalog/model-thinking";
 import { countTokens } from "@oh-my-pi/pi-natives";
 import { logger, prompt } from "@oh-my-pi/pi-utils";
@@ -642,7 +643,7 @@ export async function generateSummary(
 	// Serialize conversation to text so model doesn't try to continue it
 	// Convert to LLM messages first (handles custom app messages when caller provides a transformer).
 	const llmMessages = (options?.convertToLlm ?? defaultConvertToLlm)(currentMessages);
-	const conversationText = serializeConversation(llmMessages);
+	const conversationText = serializeConversation(llmMessages, preferredToolSyntax(model.id));
 	// Build the prompt with conversation wrapped in tags
 	let promptText = `<conversation>\n${conversationText}\n</conversation>\n\n`;
@@ -790,7 +791,7 @@ async function generateShortSummary(
 ): Promise<string> {
 	const maxTokens = Math.min(512, Math.floor(0.2 * reserveTokens));
 	const llmMessages = (options?.convertToLlm ?? defaultConvertToLlm)(recentMessages);
-	const conversationText = serializeConversation(llmMessages);
+	const conversationText = serializeConversation(llmMessages, preferredToolSyntax(model.id));
 	let promptText = `<conversation>\n${conversationText}\n</conversation>\n\n`;
 	if (historySummary) {
@@ -1155,7 +1156,7 @@ async function generateTurnPrefixSummary(
 	const maxTokens = Math.floor(0.5 * reserveTokens); // Smaller budget for turn prefix
 	const llmMessages = (options?.convertToLlm ?? defaultConvertToLlm)(messages);
-	const conversationText = serializeConversation(llmMessages);
+	const conversationText = serializeConversation(llmMessages, preferredToolSyntax(model.id));
 	const promptText = `<conversation>\n${conversationText}\n</conversation>\n\n${TURN_PREFIX_SUMMARIZATION_PROMPT}`;
 	const summarizationMessages = [
 		{

package/src/compaction/pruning.ts CHANGED Viewed

@@ -81,6 +81,16 @@ function createPrunedNotice(tokens: number): string {
 	return `[Output truncated - ${tokens} tokens]`;
 }
+/**
+ * Generic age-based pruning floor. Below this, blanking a result to
+ * `[Output truncated - N tokens]` recovers nothing — the placeholder itself
+ * costs ~8 tokens, so a sub-floor result grows the context (and churns the
+ * prompt cache) instead of shrinking it. Superseded/useless results keep their
+ * own rules: useless already drops no-savings candidates, superseded prunes for
+ * correctness regardless of size.
+ */
+const MIN_PRUNE_TOKENS = 50;
 function getToolResultMessage(entry: SessionEntry): ToolResultMessage | undefined {
 	if (entry.type !== "message") return undefined;
 	const message = entry.message as AgentMessage;
@@ -271,7 +281,8 @@ export function pruneToolOutputs(entries: SessionEntry[], config: PruneConfig =
 		// any age).
 		const superseded = supersededMessages?.has(message) ?? false;
 		const useless = uselessMessages?.has(message) ?? false;
-		if (!superseded && !useless && (accumulatedTokens < config.protectTokens || isProtected)) {
+		const tooSmall = tokens < MIN_PRUNE_TOKENS;
+		if (!superseded && !useless && (accumulatedTokens < config.protectTokens || isProtected || tooSmall)) {
 			accumulatedTokens += tokens;
 			continue;
 		}