npm - @oh-my-pi/pi-agent-core - Versions diffs - 15.5.13 → 15.6.0 - Mend

@oh-my-pi/pi-agent-core 15.5.13 → 15.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,21 @@
 ## [Unreleased]
+## [15.5.15] - 2026-05-30
+### Added
+- Added `maxToolCallsPerTurn` to `AgentLoopConfig`/`AgentOptions`, allowing callers to cut a streamed assistant turn after a completed tool-call batch and execute the runnable partial turn instead of waiting for the provider to yield.
+### Fixed
+- Normalized `maxToolCallsPerTurn` to accept only positive integer limits, with non-finite or non-positive values treated as disabled
+## [15.5.14] - 2026-05-29
+### Fixed
+- Fixed the agent loop abandoning tool calls that Anthropic adaptive/interleaved-thinking models (e.g. Opus) emit under `stop_reason: "end_turn"`. The previous gate only ran tools when `stopReason === "toolUse"`, so an `end_turn`+tool_use turn produced "Tool call was not executed because the assistant ended its turn" placeholders, made no progress, and could trap the model in a re-emit/abandon loop. `stop_reason` is never replayed on the wire and (verified against the live Anthropic Messages API) does not gate continuation validity, so `stop`/`end_turn` turns carrying tool_use blocks are now executed and the loop continues — exactly like `toolUse`. Only `length` (max_tokens truncation) still abandons, since the trailing tool call may have incomplete arguments. The continuation stays valid because `transformMessages` strips the now-untrustworthy thinking signature and the encoder downgrades the block to text.
 ## [15.5.10] - 2026-05-28
 ### Fixed

package/dist/types/agent.d.ts CHANGED Viewed

@@ -31,6 +31,11 @@ export interface AgentOptions {
      * - "wait": defer steering until the current turn completes
      */
     interruptMode?: "immediate" | "wait";
+    /**
+     * Maximum completed tool calls to accept from one streamed assistant turn before
+     * executing the batch. Undefined disables batching.
+     */
+    maxToolCallsPerTurn?: number;
     /**
      * API format for Kimi Code provider: "openai" or "anthropic" (default: "anthropic")
      */
@@ -263,6 +268,8 @@ export declare class Agent {
      * Set to 0 to disable the cap.
      */
     set maxRetryDelayMs(value: number | undefined);
+    get maxToolCallsPerTurn(): number | undefined;
+    set maxToolCallsPerTurn(value: number | undefined);
     get state(): AgentState;
     get appendOnlyContext(): AppendOnlyContextManager | undefined;
     setAppendOnlyContext(manager?: AppendOnlyContextManager): void;

package/dist/types/types.d.ts CHANGED Viewed

@@ -16,6 +16,13 @@ export interface AgentLoopConfig extends SimpleStreamOptions {
      * - "wait" = defer steering until the current turn completes
      */
     interruptMode?: "immediate" | "wait";
+    /**
+     * Maximum completed tool calls to accept from one streamed assistant turn before
+     * cutting the provider stream and executing that batch. The cap is enforced on
+     * `toolcall_end` so every executed call has complete arguments. Undefined disables
+     * batching.
+     */
+    maxToolCallsPerTurn?: number;
     /**
      * Optional session identifier forwarded to LLM providers.
      * Used by providers that support session-based caching (e.g., OpenAI Codex).

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
 	"type": "module",
 	"name": "@oh-my-pi/pi-agent-core",
-	"version": "15.5.13",
+	"version": "15.6.0",
 	"description": "General-purpose agent with transport abstraction, state management, and attachment support",
 	"homepage": "https://omp.sh",
 	"author": "Can Boluk",
@@ -35,14 +35,14 @@
 		"fmt": "biome format --write ."
 	},
 	"dependencies": {
-		"@oh-my-pi/pi-ai": "15.5.13",
-		"@oh-my-pi/pi-natives": "15.5.13",
-		"@oh-my-pi/pi-utils": "15.5.13",
-		"@opentelemetry/api": "^1.9.0"
+		"@oh-my-pi/pi-ai": "15.6.0",
+		"@oh-my-pi/pi-natives": "15.6.0",
+		"@oh-my-pi/pi-utils": "15.6.0",
+		"@opentelemetry/api": "^1.9.1"
 	},
 	"devDependencies": {
-		"@opentelemetry/context-async-hooks": "^2.0.0",
-		"@opentelemetry/sdk-trace-base": "^2.0.0",
+		"@opentelemetry/context-async-hooks": "^2.7.1",
+		"@opentelemetry/sdk-trace-base": "^2.7.1",
 		"@types/bun": "^1.3.14"
 	},
 	"engines": {

package/src/agent-loop.ts CHANGED Viewed

@@ -441,6 +441,27 @@ interface StepCounter {
 	count: number;
 }
+function normalizeMaxToolCallsPerTurn(value: number | undefined): number | undefined {
+	if (value === undefined || !Number.isFinite(value)) return undefined;
+	const normalized = Math.trunc(value);
+	return normalized > 0 ? normalized : undefined;
+}
+function cloneAssistantMessageForToolCallCap(message: AssistantMessage): AssistantMessage {
+	return {
+		...message,
+		content: message.content.map(block => {
+			if (block.type === "toolCall") {
+				return { ...block, arguments: structuredClone(block.arguments) };
+			}
+			return { ...block };
+		}),
+		stopReason: "toolUse",
+		errorMessage: undefined,
+		errorStatus: undefined,
+	};
+}
 async function runLoopBody(
 	currentContext: AgentContext,
 	newMessages: AgentMessage[],
@@ -562,19 +583,23 @@ async function runLoopBody(
 				return;
 			}
-			// Tool execution is gated on the model's *stop reason* (`toolUse`), not the
-			// mere presence of toolCall blocks. Anthropic's documented agentic loop runs
-			// tools "while stop_reason == tool_use" and exits on any other reason. With
-			// adaptive/interleaved thinking a turn can emit tool calls and then end
-			// naturally (`end_turn` → `stop`) when the model decides to wrap up — those
-			// calls are abandoned. Executing them and appending tool_results yields an
-			// invalid continuation (Anthropic rejects continuing an ended turn), which is
-			// what broke interleaved tool use. Providers set `toolUse` whenever they
-			// genuinely want tools run (Anthropic on `tool_use`; OpenAI-style providers
-			// promote `stop`→`toolUse` whenever tool-call blocks are emitted).
+			// Run tools whenever the turn carries tool_use blocks AND was not truncated.
+			// `stop_reason` is provider metadata that never goes back on the wire, so it
+			// does not gate continuation validity: replaying a tool_use turn with the
+			// tool_results appended is accepted whether the turn ended on `tool_use` or
+			// `end_turn` (adaptive/interleaved-thinking Opus routinely emits tool calls
+			// under `end_turn`; verified against the live Anthropic API). The only
+			// continuation hazard is a thinking block carrying a stale/invalid signature,
+			// which `transformMessages` already neutralizes — it strips the signature on
+			// non-`toolUse` turns and the encoder downgrades the unsigned block to text,
+			// which the API accepts. So treat `stop` (end_turn/pause_turn) the same as
+			// `toolUse`. `length` (max_tokens) is the one reason we must NOT run: the
+			// trailing tool_use may be truncated with incomplete arguments — those calls
+			// are abandoned below. (`error`/`aborted` already returned above.)
 			type ToolCallContent = Extract<AssistantMessage["content"][number], { type: "toolCall" }>;
 			const toolCalls = message.content.filter((c): c is ToolCallContent => c.type === "toolCall");
-			hasMoreToolCalls = message.stopReason === "toolUse" && toolCalls.length > 0;
+			const runnableStop = message.stopReason === "toolUse" || message.stopReason === "stop";
+			hasMoreToolCalls = runnableStop && toolCalls.length > 0;
 			const toolResults: ToolResultMessage[] = [];
 			if (hasMoreToolCalls) {
@@ -596,10 +621,11 @@ async function runLoopBody(
 					newMessages.push(result);
 				}
 			} else if (toolCalls.length > 0) {
-				// Model ended the turn (stopReason !== "toolUse") but left toolCall blocks
-				// behind. They were abandoned, so don't execute or continue — but pair each
-				// with a placeholder result to keep the tool_use/tool_result contract valid
-				// for any later request that replays this turn.
+				// Turn ended on a non-runnable reason (`length` truncation) but left
+				// toolCall blocks behind. The trailing call's arguments may be incomplete,
+				// so don't execute or continue — pair each with a placeholder result to keep
+				// the tool_use/tool_result contract valid for any later request that
+				// replays this turn.
 				for (const toolCall of toolCalls) {
 					const result = createAbortedToolResult(toolCall, stream, "skipped");
 					currentContext.messages.push(result);
@@ -707,11 +733,18 @@ async function streamAssistantResponse(
 	const dynamicReasoning = config.getReasoning?.();
 	const harmonyMitigationEnabled = isHarmonyLeakMitigationTarget(config.model);
 	const harmonyAbortController = harmonyMitigationEnabled ? new AbortController() : undefined;
-	const requestSignal = harmonyAbortController
-		? signal
-			? AbortSignal.any([signal, harmonyAbortController.signal])
-			: harmonyAbortController.signal
-		: signal;
+	const maxToolCallsPerTurn = normalizeMaxToolCallsPerTurn(config.maxToolCallsPerTurn);
+	const toolCallCapAbortController = maxToolCallsPerTurn === undefined ? undefined : new AbortController();
+	const requestSignals: AbortSignal[] = [];
+	if (signal) requestSignals.push(signal);
+	if (harmonyAbortController) requestSignals.push(harmonyAbortController.signal);
+	if (toolCallCapAbortController) requestSignals.push(toolCallCapAbortController.signal);
+	const requestSignal =
+		requestSignals.length === 0
+			? undefined
+			: requestSignals.length === 1
+				? requestSignals[0]
+				: AbortSignal.any(requestSignals);
 	const effectiveTemperature =
 		harmonyRetryAttempt > 0 && config.temperature !== undefined ? config.temperature + 0.05 : config.temperature;
 	const effectiveToolChoice = dynamicToolChoice ?? config.toolChoice;
@@ -773,6 +806,26 @@ async function streamAssistantResponse(
 			let addedPartial = false;
 			const responseIterator = response[Symbol.asyncIterator]();
+			let completedToolCalls = 0;
+			let cappedMessage: AssistantMessage | undefined;
+			let capFinalized = false;
+			const finishCappedAssistantMessage = async (): Promise<AssistantMessage | undefined> => {
+				if (!cappedMessage) return undefined;
+				responseIterator.return?.()?.catch(() => {});
+				if (!capFinalized) {
+					if (addedPartial) {
+						context.messages[context.messages.length - 1] = cappedMessage;
+					} else {
+						context.messages.push(cappedMessage);
+						stream.push({ type: "message_start", message: { ...cappedMessage } });
+					}
+					stream.push({ type: "message_end", message: cappedMessage });
+					await finishChat(cappedMessage);
+					capFinalized = true;
+				}
+				return cappedMessage;
+			};
 			// Set up a single abort race: register the abort listener once for the whole
 			// stream and reuse the same race promise for every iterator.next() instead of
@@ -798,6 +851,10 @@ async function streamAssistantResponse(
 					if (abortRacePromise) {
 						const result = await Promise.race([responseIterator.next(), abortRacePromise]);
 						if (result === ABORTED) {
+							if (toolCallCapAbortController?.signal.aborted) {
+								const capped = await finishCappedAssistantMessage();
+								if (capped) return capped;
+							}
 							responseIterator.return?.()?.catch(() => {});
 							const aborted = emitAbortedAssistantMessage(partialMessage, addedPartial, context, config, stream);
 							await finishChat(aborted);
@@ -808,6 +865,10 @@ async function streamAssistantResponse(
 						next = await responseIterator.next();
 					}
 					if (requestSignal?.aborted) {
+						if (toolCallCapAbortController?.signal.aborted) {
+							const capped = await finishCappedAssistantMessage();
+							if (capped) return capped;
+						}
 						const aborted = emitAbortedAssistantMessage(partialMessage, addedPartial, context, config, stream);
 						await finishChat(aborted);
 						return aborted;
@@ -848,6 +909,15 @@ async function streamAssistantResponse(
 									assistantMessageEvent: event,
 									message: { ...partialMessage },
 								});
+								if (event.type === "toolcall_end" && maxToolCallsPerTurn !== undefined) {
+									completedToolCalls++;
+									if (completedToolCalls >= maxToolCallsPerTurn) {
+										cappedMessage = cloneAssistantMessageForToolCallCap(partialMessage);
+										toolCallCapAbortController?.abort();
+										const capped = await finishCappedAssistantMessage();
+										if (capped) return capped;
+									}
+								}
 							}
 							break;

package/src/agent.ts CHANGED Viewed

@@ -102,6 +102,12 @@ export interface AgentOptions {
 	 */
 	interruptMode?: "immediate" | "wait";
+	/**
+	 * Maximum completed tool calls to accept from one streamed assistant turn before
+	 * executing the batch. Undefined disables batching.
+	 */
+	maxToolCallsPerTurn?: number;
 	/**
 	 * API format for Kimi Code provider: "openai" or "anthropic" (default: "anthropic")
 	 */
@@ -269,6 +275,7 @@ export class Agent {
 	#steeringMode: "all" | "one-at-a-time";
 	#followUpMode: "all" | "one-at-a-time";
 	#interruptMode: "immediate" | "wait";
+	#maxToolCallsPerTurn?: number;
 	#sessionId?: string;
 	#metadata?: Record<string, unknown>;
 	#metadataResolver?: (provider: string) => Record<string, unknown> | undefined;
@@ -325,6 +332,7 @@ export class Agent {
 		this.#steeringMode = opts.steeringMode || "one-at-a-time";
 		this.#followUpMode = opts.followUpMode || "one-at-a-time";
 		this.#interruptMode = opts.interruptMode || "immediate";
+		this.#maxToolCallsPerTurn = opts.maxToolCallsPerTurn;
 		this.streamFn = opts.streamFn || streamSimple;
 		this.#sessionId = opts.sessionId;
 		this.#providerSessionState = opts.providerSessionState;
@@ -547,6 +555,14 @@ export class Agent {
 		this.#maxRetryDelayMs = value;
 	}
+	get maxToolCallsPerTurn(): number | undefined {
+		return this.#maxToolCallsPerTurn;
+	}
+	set maxToolCallsPerTurn(value: number | undefined) {
+		this.#maxToolCallsPerTurn = value;
+	}
 	get state(): AgentState {
 		return this.#state;
 	}
@@ -917,6 +933,7 @@ export class Agent {
 			serviceTier: this.#serviceTier,
 			hideThinkingSummary: this.#hideThinkingSummary,
 			interruptMode: this.#interruptMode,
+			maxToolCallsPerTurn: this.#maxToolCallsPerTurn,
 			sessionId: this.#sessionId,
 			metadata: this.#metadataResolver ? undefined : this.#metadata,
 			metadataResolver: this.#metadataResolver,
@@ -1091,7 +1108,7 @@ export class Agent {
 	/** Calculate total text length from an assistant message's content blocks */
 	#getAssistantTextLength(message: AgentMessage | null): number {
-		if (!message || message.role !== "assistant" || !Array.isArray(message.content)) {
+		if (message?.role !== "assistant" || !Array.isArray(message.content)) {
 			return 0;
 		}
 		let length = 0;

package/src/harmony-leak.ts CHANGED Viewed

@@ -230,7 +230,7 @@ export function recoverHarmonyToolCall(
 ): HarmonyRecoveredToolCall | undefined {
 	if (detection.surface !== "tool_arg" || detection.contentIndex === undefined) return undefined;
 	const block = message.content[detection.contentIndex];
-	if (!block || block.type !== "toolCall") return undefined;
+	if (block?.type !== "toolCall") return undefined;
 	const config = RECOVERY_REGISTRY[block.name];
 	if (!config) return undefined;

package/src/types.ts CHANGED Viewed

@@ -38,6 +38,14 @@ export interface AgentLoopConfig extends SimpleStreamOptions {
 	 */
 	interruptMode?: "immediate" | "wait";
+	/**
+	 * Maximum completed tool calls to accept from one streamed assistant turn before
+	 * cutting the provider stream and executing that batch. The cap is enforced on
+	 * `toolcall_end` so every executed call has complete arguments. Undefined disables
+	 * batching.
+	 */
+	maxToolCallsPerTurn?: number;
 	/**
 	 * Optional session identifier forwarded to LLM providers.
 	 * Used by providers that support session-based caching (e.g., OpenAI Codex).