npm - @oh-my-pi/pi-agent-core - Versions diffs - 15.13.2 → 15.13.3 - Mend

@oh-my-pi/pi-agent-core 15.13.2 → 15.13.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/CHANGELOG.md +11 -0
package/dist/types/compaction/utils.d.ts +2 -1
package/dist/types/types.d.ts +9 -0
package/package.json +6 -6
package/src/agent-loop.ts +28 -1
package/src/compaction/branch-summarization.ts +2 -1
package/src/compaction/compaction.ts +4 -3
package/src/compaction/pruning.ts +12 -1
package/src/compaction/utils.ts +44 -11
package/src/types.ts +9 -0

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,17 @@
 ## [Unreleased]
+## [15.13.3] - 2026-06-15
+### Added
+- Added the `interruptible` tool field: when set, the agent loop may abort the tool mid-execution to deliver a queued steering message (honored only in `immediate` interrupt mode).
+- Added support for `gemini` and `gemma` as valid owned tool syntax values in environment configuration
+### Fixed
+- Fixed `pruneToolOutputs` blanking tiny tool results during overflow pruning: results below `50` tokens (`MIN_PRUNE_TOKENS`) are no longer replaced with the `[Output truncated - N tokens]` placeholder, which cost more tokens than the result itself and churned the prompt cache for zero savings.
 ## [15.13.2] - 2026-06-15
 ### Breaking Changes

package/dist/types/compaction/utils.d.ts CHANGED Viewed

@@ -2,6 +2,7 @@
  * Shared utilities for compaction and branch summarization.
  */
 import type { Message } from "@oh-my-pi/pi-ai";
+import { type ToolCallSyntax } from "@oh-my-pi/pi-ai/grammar";
 import type { AgentMessage } from "../types";
 export interface FileOperations {
     read: Set<string>;
@@ -44,5 +45,5 @@ export declare function upsertFileOperations(summary: string, readFiles: string[
  * This prevents the model from treating it as a conversation to continue.
  * Call convertToLlm() first to handle custom message types.
  */
-export declare function serializeConversation(messages: Message[]): string;
+export declare function serializeConversation(messages: Message[], syntax?: ToolCallSyntax): string;
 export declare const SUMMARIZATION_SYSTEM_PROMPT: string;

package/dist/types/types.d.ts CHANGED Viewed

@@ -428,6 +428,15 @@ export interface AgentTool<TParameters extends TSchema = TSchema, TDetails = any
     concurrency?: "shared" | "exclusive" | ((args: Partial<Static<TParameters>>) => "shared" | "exclusive");
     /** If true, argument validation errors are non-fatal: raw args are passed to execute() instead of returning an error to the LLM. */
     lenientArgValidation?: boolean;
+    /**
+     * If true, the agent loop may abort this tool mid-execution to deliver a
+     * queued steering message (instead of waiting for the tool to finish on its
+     * own). Set only on tools that purely *wait* and observe their abort signal
+     * cleanly (e.g. the `job` poll), so the abort surfaces the tool's current
+     * snapshot rather than corrupting a side effect. Honored only when
+     * `interruptMode` is "immediate".
+     */
+    interruptible?: boolean;
     /**
      * Controls how the INTENT_FIELD (`_i`) is handled for this tool.
      * - `"require"` (default): `_i` is injected and required in the parameter schema.

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
 	"type": "module",
 	"name": "@oh-my-pi/pi-agent-core",
-	"version": "15.13.2",
+	"version": "15.13.3",
 	"description": "General-purpose agent with transport abstraction, state management, and attachment support",
 	"homepage": "https://omp.sh",
 	"author": "Can Boluk",
@@ -35,11 +35,11 @@
 		"fmt": "biome format --write ."
 	},
 	"dependencies": {
-		"@oh-my-pi/pi-ai": "15.13.2",
-		"@oh-my-pi/pi-catalog": "15.13.2",
-		"@oh-my-pi/pi-natives": "15.13.2",
-		"@oh-my-pi/pi-utils": "15.13.2",
-		"@oh-my-pi/snapcompact": "15.13.2",
+		"@oh-my-pi/pi-ai": "15.13.3",
+		"@oh-my-pi/pi-catalog": "15.13.3",
+		"@oh-my-pi/pi-natives": "15.13.3",
+		"@oh-my-pi/pi-utils": "15.13.3",
+		"@oh-my-pi/snapcompact": "15.13.3",
 		"@opentelemetry/api": "^1.9.1"
 	},
 	"devDependencies": {

package/src/agent-loop.ts CHANGED Viewed

@@ -74,6 +74,14 @@ const ABORTED: unique symbol = Symbol("agent-loop-aborted");
  */
 const MAX_PAUSED_TURN_CONTINUATIONS = 8;
+/**
+ * Cadence (ms) for polling queued steering while an `interruptible` tool is in
+ * flight, so a steer cuts the wait short instead of sitting idle until the
+ * tool's own window elapses. A cheap synchronous queue check; latency-bounded
+ * at one tick.
+ */
+const STEERING_INTERRUPT_POLL_MS = 250;
 class HarmonyLeakInterruption extends Error {
 	constructor(
 		readonly detection: HarmonyDetection,
@@ -98,6 +106,8 @@ function resolveOwnedToolSyntaxFromEnv(value: string | undefined): ToolCallSynta
 		case "harmony":
 		case "pi":
 		case "qwen3":
+		case "gemini":
+		case "gemma":
 			return value;
 		default:
 			return undefined;
@@ -1795,7 +1805,24 @@ async function executeToolCalls(
 		}
 	}
-	await Promise.allSettled(tasks);
+	// While an interruptible tool is in flight (e.g. a `job` poll blocking on
+	// background work), a queued steer would otherwise wait out the tool's own
+	// window. Poll the steering queue and let checkSteering() abort the shared
+	// tool signal so the wait returns early; the boundary dequeue below then
+	// injects it. Gated on immediate-interrupt mode + an interruptible tool;
+	// checkSteering is idempotent (no-op once triggered).
+	const watchSteeringWhileRunning =
+		shouldInterruptImmediately &&
+		(hasSteeringMessages !== undefined || getSteeringMessages !== undefined) &&
+		records.some(r => r.tool?.interruptible === true);
+	const steeringWatchTimer = watchSteeringWhileRunning
+		? setInterval(() => void checkSteering(), STEERING_INTERRUPT_POLL_MS)
+		: undefined;
+	try {
+		await Promise.allSettled(tasks);
+	} finally {
+		if (steeringWatchTimer !== undefined) clearInterval(steeringWatchTimer);
+	}
 	// Yield after batch tool execution to let GC and I/O catch up,
 	// especially when tool results are large (e.g. bash output).
 	await yieldIfDue();

package/src/compaction/branch-summarization.ts CHANGED Viewed

@@ -6,6 +6,7 @@
  */
 import type { ApiKey, Model } from "@oh-my-pi/pi-ai";
+import { preferredToolSyntax } from "@oh-my-pi/pi-catalog/identity";
 import { prompt } from "@oh-my-pi/pi-utils";
 import { type AgentTelemetry, instrumentedCompleteSimple } from "../telemetry";
 import type { AgentMessage } from "../types";
@@ -290,7 +291,7 @@ export async function generateBranchSummary(
 	// Transform to LLM-compatible messages, then serialize to text
 	// Serialization prevents the model from treating it as a conversation to continue
 	const llmMessages = (options.convertToLlm ?? defaultConvertToLlm)(messages);
-	const conversationText = serializeConversation(llmMessages);
+	const conversationText = serializeConversation(llmMessages, preferredToolSyntax(model.id));
 	// Build prompt
 	const instructions = customInstructions || BRANCH_SUMMARY_PROMPT;

package/src/compaction/compaction.ts CHANGED Viewed

@@ -18,6 +18,7 @@ import {
 	type Usage,
 	withAuth,
 } from "@oh-my-pi/pi-ai";
+import { preferredToolSyntax } from "@oh-my-pi/pi-catalog/identity";
 import { clampThinkingLevelForModel } from "@oh-my-pi/pi-catalog/model-thinking";
 import { countTokens } from "@oh-my-pi/pi-natives";
 import { logger, prompt } from "@oh-my-pi/pi-utils";
@@ -642,7 +643,7 @@ export async function generateSummary(
 	// Serialize conversation to text so model doesn't try to continue it
 	// Convert to LLM messages first (handles custom app messages when caller provides a transformer).
 	const llmMessages = (options?.convertToLlm ?? defaultConvertToLlm)(currentMessages);
-	const conversationText = serializeConversation(llmMessages);
+	const conversationText = serializeConversation(llmMessages, preferredToolSyntax(model.id));
 	// Build the prompt with conversation wrapped in tags
 	let promptText = `<conversation>\n${conversationText}\n</conversation>\n\n`;
@@ -790,7 +791,7 @@ async function generateShortSummary(
 ): Promise<string> {
 	const maxTokens = Math.min(512, Math.floor(0.2 * reserveTokens));
 	const llmMessages = (options?.convertToLlm ?? defaultConvertToLlm)(recentMessages);
-	const conversationText = serializeConversation(llmMessages);
+	const conversationText = serializeConversation(llmMessages, preferredToolSyntax(model.id));
 	let promptText = `<conversation>\n${conversationText}\n</conversation>\n\n`;
 	if (historySummary) {
@@ -1155,7 +1156,7 @@ async function generateTurnPrefixSummary(
 	const maxTokens = Math.floor(0.5 * reserveTokens); // Smaller budget for turn prefix
 	const llmMessages = (options?.convertToLlm ?? defaultConvertToLlm)(messages);
-	const conversationText = serializeConversation(llmMessages);
+	const conversationText = serializeConversation(llmMessages, preferredToolSyntax(model.id));
 	const promptText = `<conversation>\n${conversationText}\n</conversation>\n\n${TURN_PREFIX_SUMMARIZATION_PROMPT}`;
 	const summarizationMessages = [
 		{

package/src/compaction/pruning.ts CHANGED Viewed

@@ -81,6 +81,16 @@ function createPrunedNotice(tokens: number): string {
 	return `[Output truncated - ${tokens} tokens]`;
 }
+/**
+ * Generic age-based pruning floor. Below this, blanking a result to
+ * `[Output truncated - N tokens]` recovers nothing — the placeholder itself
+ * costs ~8 tokens, so a sub-floor result grows the context (and churns the
+ * prompt cache) instead of shrinking it. Superseded/useless results keep their
+ * own rules: useless already drops no-savings candidates, superseded prunes for
+ * correctness regardless of size.
+ */
+const MIN_PRUNE_TOKENS = 50;
 function getToolResultMessage(entry: SessionEntry): ToolResultMessage | undefined {
 	if (entry.type !== "message") return undefined;
 	const message = entry.message as AgentMessage;
@@ -271,7 +281,8 @@ export function pruneToolOutputs(entries: SessionEntry[], config: PruneConfig =
 		// any age).
 		const superseded = supersededMessages?.has(message) ?? false;
 		const useless = uselessMessages?.has(message) ?? false;
-		if (!superseded && !useless && (accumulatedTokens < config.protectTokens || isProtected)) {
+		const tooSmall = tokens < MIN_PRUNE_TOKENS;
+		if (!superseded && !useless && (accumulatedTokens < config.protectTokens || isProtected || tooSmall)) {
 			accumulatedTokens += tokens;
 			continue;
 		}

package/src/compaction/utils.ts CHANGED Viewed

@@ -2,7 +2,8 @@
  * Shared utilities for compaction and branch summarization.
  */
-import type { Message } from "@oh-my-pi/pi-ai";
+import type { Message, ToolCall } from "@oh-my-pi/pi-ai";
+import { type Grammar, type GrammarToolResult, getInbandGrammar, type ToolCallSyntax } from "@oh-my-pi/pi-ai/grammar";
 import { formatGroupedPaths, prompt } from "@oh-my-pi/pi-utils";
 import type { AgentMessage } from "../types";
 import fileOperationsTemplate from "./prompts/file-operations.md" with { type: "text" };
@@ -188,7 +189,8 @@ function truncateForSummary(text: string, maxChars: number): string {
  * This prevents the model from treating it as a conversation to continue.
  * Call convertToLlm() first to handle custom message types.
  */
-export function serializeConversation(messages: Message[]): string {
+export function serializeConversation(messages: Message[], syntax?: ToolCallSyntax): string {
+	const grammar = syntax ? getInbandGrammar(syntax) : undefined;
 	const parts: string[] = [];
 	// Tool results flagged contextually useless (and their paired calls) are
@@ -215,7 +217,7 @@ export function serializeConversation(messages: Message[]): string {
 		} else if (msg.role === "assistant") {
 			const textParts: string[] = [];
 			const thinkingParts: string[] = [];
-			const toolCalls: string[] = [];
+			const toolCalls: ToolCall[] = [];
 			for (const block of msg.content) {
 				if (block.type === "text") {
@@ -224,22 +226,18 @@ export function serializeConversation(messages: Message[]): string {
 					thinkingParts.push(block.thinking);
 				} else if (block.type === "toolCall") {
 					if (uselessCallIds.has(block.id)) continue;
-					const args = block.arguments as Record<string, unknown>;
-					const argsStr = Object.entries(args)
-						.map(([k, v]) => `${k}=${JSON.stringify(v)}`)
-						.join(", ");
-					toolCalls.push(`${block.name}(${argsStr})`);
+					toolCalls.push(block);
 				}
 			}
 			if (thinkingParts.length > 0) {
-				parts.push(`[Assistant thinking]: ${thinkingParts.join("\n")}`);
+				parts.push(`[Think]: ${thinkingParts.join("\n")}`);
 			}
 			if (textParts.length > 0) {
 				parts.push(`[Assistant]: ${textParts.join("\n")}`);
 			}
 			if (toolCalls.length > 0) {
-				parts.push(`[Assistant tool calls]: ${toolCalls.join("; ")}`);
+				parts.push(`[Tool Call]: ${renderToolCalls(toolCalls, grammar)}`);
 			}
 		} else if (msg.role === "toolResult") {
 			if (uselessCallIds.has(msg.toolCallId)) continue;
@@ -248,7 +246,10 @@ export function serializeConversation(messages: Message[]): string {
 				.map(c => c.text)
 				.join("");
 			if (content) {
-				parts.push(`[Tool result]: ${truncateForSummary(content, TOOL_RESULT_MAX_CHARS)}`);
+				const text = truncateForSummary(content, TOOL_RESULT_MAX_CHARS);
+				parts.push(
+					`[Tool Result]: ${renderToolResult(msg.toolCallId, msg.toolName, msg.isError === true, text, grammar)}`,
+				);
 			}
 		}
 	}
@@ -256,6 +257,38 @@ export function serializeConversation(messages: Message[]): string {
 	return parts.join("\n\n");
 }
+/**
+ * Render an assistant turn's tool calls. With a grammar, emit the model's
+ * native invocation block; otherwise fall back to a compact `name(args)` list.
+ */
+function renderToolCalls(calls: ToolCall[], grammar: Grammar | undefined): string {
+	if (grammar) return grammar.renderAssistantToolCalls(calls);
+	return calls
+		.map(call => {
+			const argsStr = Object.entries(call.arguments as Record<string, unknown>)
+				.map(([k, v]) => `${k}=${JSON.stringify(v)}`)
+				.join(", ");
+			return `${call.name}(${argsStr})`;
+		})
+		.join("; ");
+}
+/**
+ * Render a single tool result. With a grammar, emit the model's native
+ * tool-result envelope; otherwise return the (already truncated) text verbatim.
+ */
+function renderToolResult(
+	id: string,
+	name: string,
+	isError: boolean,
+	text: string,
+	grammar: Grammar | undefined,
+): string {
+	if (!grammar) return text;
+	const result: GrammarToolResult = { id, name, index: 0, text, isError };
+	return grammar.renderToolResults([result]);
+}
 // ============================================================================
 // Summarization System Prompt
 // ============================================================================

package/src/types.ts CHANGED Viewed

@@ -503,6 +503,15 @@ export interface AgentTool<TParameters extends TSchema = TSchema, TDetails = any
 	concurrency?: "shared" | "exclusive" | ((args: Partial<Static<TParameters>>) => "shared" | "exclusive");
 	/** If true, argument validation errors are non-fatal: raw args are passed to execute() instead of returning an error to the LLM. */
 	lenientArgValidation?: boolean;
+	/**
+	 * If true, the agent loop may abort this tool mid-execution to deliver a
+	 * queued steering message (instead of waiting for the tool to finish on its
+	 * own). Set only on tools that purely *wait* and observe their abort signal
+	 * cleanly (e.g. the `job` poll), so the abort surfaces the tool's current
+	 * snapshot rather than corrupting a side effect. Honored only when
+	 * `interruptMode` is "immediate".
+	 */
+	interruptible?: boolean;
 	/**
 	 * Controls how the INTENT_FIELD (`_i`) is handled for this tool.
 	 * - `"require"` (default): `_i` is injected and required in the parameter schema.