@oh-my-pi/pi-agent-core 15.5.13 → 15.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,21 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [15.5.15] - 2026-05-30
6
+ ### Added
7
+
8
+ - Added `maxToolCallsPerTurn` to `AgentLoopConfig`/`AgentOptions`, allowing callers to cut a streamed assistant turn after a completed tool-call batch and execute the runnable partial turn instead of waiting for the provider to yield.
9
+
10
+ ### Fixed
11
+
12
+ - Normalized `maxToolCallsPerTurn` to accept only positive integer limits, with non-finite or non-positive values treated as disabled
13
+
14
+ ## [15.5.14] - 2026-05-29
15
+
16
+ ### Fixed
17
+
18
+ - Fixed the agent loop abandoning tool calls that Anthropic adaptive/interleaved-thinking models (e.g. Opus) emit under `stop_reason: "end_turn"`. The previous gate only ran tools when `stopReason === "toolUse"`, so an `end_turn`+tool_use turn produced "Tool call was not executed because the assistant ended its turn" placeholders, made no progress, and could trap the model in a re-emit/abandon loop. `stop_reason` is never replayed on the wire and (verified against the live Anthropic Messages API) does not gate continuation validity, so `stop`/`end_turn` turns carrying tool_use blocks are now executed and the loop continues — exactly like `toolUse`. Only `length` (max_tokens truncation) still abandons, since the trailing tool call may have incomplete arguments. The continuation stays valid because `transformMessages` strips the now-untrustworthy thinking signature and the encoder downgrades the block to text.
19
+
5
20
  ## [15.5.10] - 2026-05-28
6
21
 
7
22
  ### Fixed
@@ -31,6 +31,11 @@ export interface AgentOptions {
31
31
  * - "wait": defer steering until the current turn completes
32
32
  */
33
33
  interruptMode?: "immediate" | "wait";
34
+ /**
35
+ * Maximum completed tool calls to accept from one streamed assistant turn before
36
+ * executing the batch. Undefined disables batching.
37
+ */
38
+ maxToolCallsPerTurn?: number;
34
39
  /**
35
40
  * API format for Kimi Code provider: "openai" or "anthropic" (default: "anthropic")
36
41
  */
@@ -263,6 +268,8 @@ export declare class Agent {
263
268
  * Set to 0 to disable the cap.
264
269
  */
265
270
  set maxRetryDelayMs(value: number | undefined);
271
+ get maxToolCallsPerTurn(): number | undefined;
272
+ set maxToolCallsPerTurn(value: number | undefined);
266
273
  get state(): AgentState;
267
274
  get appendOnlyContext(): AppendOnlyContextManager | undefined;
268
275
  setAppendOnlyContext(manager?: AppendOnlyContextManager): void;
@@ -16,6 +16,13 @@ export interface AgentLoopConfig extends SimpleStreamOptions {
16
16
  * - "wait" = defer steering until the current turn completes
17
17
  */
18
18
  interruptMode?: "immediate" | "wait";
19
+ /**
20
+ * Maximum completed tool calls to accept from one streamed assistant turn before
21
+ * cutting the provider stream and executing that batch. The cap is enforced on
22
+ * `toolcall_end` so every executed call has complete arguments. Undefined disables
23
+ * batching.
24
+ */
25
+ maxToolCallsPerTurn?: number;
19
26
  /**
20
27
  * Optional session identifier forwarded to LLM providers.
21
28
  * Used by providers that support session-based caching (e.g., OpenAI Codex).
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@oh-my-pi/pi-agent-core",
4
- "version": "15.5.13",
4
+ "version": "15.6.0",
5
5
  "description": "General-purpose agent with transport abstraction, state management, and attachment support",
6
6
  "homepage": "https://omp.sh",
7
7
  "author": "Can Boluk",
@@ -35,14 +35,14 @@
35
35
  "fmt": "biome format --write ."
36
36
  },
37
37
  "dependencies": {
38
- "@oh-my-pi/pi-ai": "15.5.13",
39
- "@oh-my-pi/pi-natives": "15.5.13",
40
- "@oh-my-pi/pi-utils": "15.5.13",
41
- "@opentelemetry/api": "^1.9.0"
38
+ "@oh-my-pi/pi-ai": "15.6.0",
39
+ "@oh-my-pi/pi-natives": "15.6.0",
40
+ "@oh-my-pi/pi-utils": "15.6.0",
41
+ "@opentelemetry/api": "^1.9.1"
42
42
  },
43
43
  "devDependencies": {
44
- "@opentelemetry/context-async-hooks": "^2.0.0",
45
- "@opentelemetry/sdk-trace-base": "^2.0.0",
44
+ "@opentelemetry/context-async-hooks": "^2.7.1",
45
+ "@opentelemetry/sdk-trace-base": "^2.7.1",
46
46
  "@types/bun": "^1.3.14"
47
47
  },
48
48
  "engines": {
package/src/agent-loop.ts CHANGED
@@ -441,6 +441,27 @@ interface StepCounter {
441
441
  count: number;
442
442
  }
443
443
 
444
+ function normalizeMaxToolCallsPerTurn(value: number | undefined): number | undefined {
445
+ if (value === undefined || !Number.isFinite(value)) return undefined;
446
+ const normalized = Math.trunc(value);
447
+ return normalized > 0 ? normalized : undefined;
448
+ }
449
+
450
+ function cloneAssistantMessageForToolCallCap(message: AssistantMessage): AssistantMessage {
451
+ return {
452
+ ...message,
453
+ content: message.content.map(block => {
454
+ if (block.type === "toolCall") {
455
+ return { ...block, arguments: structuredClone(block.arguments) };
456
+ }
457
+ return { ...block };
458
+ }),
459
+ stopReason: "toolUse",
460
+ errorMessage: undefined,
461
+ errorStatus: undefined,
462
+ };
463
+ }
464
+
444
465
  async function runLoopBody(
445
466
  currentContext: AgentContext,
446
467
  newMessages: AgentMessage[],
@@ -562,19 +583,23 @@ async function runLoopBody(
562
583
  return;
563
584
  }
564
585
 
565
- // Tool execution is gated on the model's *stop reason* (`toolUse`), not the
566
- // mere presence of toolCall blocks. Anthropic's documented agentic loop runs
567
- // tools "while stop_reason == tool_use" and exits on any other reason. With
568
- // adaptive/interleaved thinking a turn can emit tool calls and then end
569
- // naturally (`end_turn` `stop`) when the model decides to wrap up — those
570
- // calls are abandoned. Executing them and appending tool_results yields an
571
- // invalid continuation (Anthropic rejects continuing an ended turn), which is
572
- // what broke interleaved tool use. Providers set `toolUse` whenever they
573
- // genuinely want tools run (Anthropic on `tool_use`; OpenAI-style providers
574
- // promote `stop`→`toolUse` whenever tool-call blocks are emitted).
586
+ // Run tools whenever the turn carries tool_use blocks AND was not truncated.
587
+ // `stop_reason` is provider metadata that never goes back on the wire, so it
588
+ // does not gate continuation validity: replaying a tool_use turn with the
589
+ // tool_results appended is accepted whether the turn ended on `tool_use` or
590
+ // `end_turn` (adaptive/interleaved-thinking Opus routinely emits tool calls
591
+ // under `end_turn`; verified against the live Anthropic API). The only
592
+ // continuation hazard is a thinking block carrying a stale/invalid signature,
593
+ // which `transformMessages` already neutralizes it strips the signature on
594
+ // non-`toolUse` turns and the encoder downgrades the unsigned block to text,
595
+ // which the API accepts. So treat `stop` (end_turn/pause_turn) the same as
596
+ // `toolUse`. `length` (max_tokens) is the one reason we must NOT run: the
597
+ // trailing tool_use may be truncated with incomplete arguments — those calls
598
+ // are abandoned below. (`error`/`aborted` already returned above.)
575
599
  type ToolCallContent = Extract<AssistantMessage["content"][number], { type: "toolCall" }>;
576
600
  const toolCalls = message.content.filter((c): c is ToolCallContent => c.type === "toolCall");
577
- hasMoreToolCalls = message.stopReason === "toolUse" && toolCalls.length > 0;
601
+ const runnableStop = message.stopReason === "toolUse" || message.stopReason === "stop";
602
+ hasMoreToolCalls = runnableStop && toolCalls.length > 0;
578
603
 
579
604
  const toolResults: ToolResultMessage[] = [];
580
605
  if (hasMoreToolCalls) {
@@ -596,10 +621,11 @@ async function runLoopBody(
596
621
  newMessages.push(result);
597
622
  }
598
623
  } else if (toolCalls.length > 0) {
599
- // Model ended the turn (stopReason !== "toolUse") but left toolCall blocks
600
- // behind. They were abandoned, so don't execute or continue — but pair each
601
- // with a placeholder result to keep the tool_use/tool_result contract valid
602
- // for any later request that replays this turn.
624
+ // Turn ended on a non-runnable reason (`length` truncation) but left
625
+ // toolCall blocks behind. The trailing call's arguments may be incomplete,
626
+ // so don't execute or continue — pair each with a placeholder result to keep
627
+ // the tool_use/tool_result contract valid for any later request that
628
+ // replays this turn.
603
629
  for (const toolCall of toolCalls) {
604
630
  const result = createAbortedToolResult(toolCall, stream, "skipped");
605
631
  currentContext.messages.push(result);
@@ -707,11 +733,18 @@ async function streamAssistantResponse(
707
733
  const dynamicReasoning = config.getReasoning?.();
708
734
  const harmonyMitigationEnabled = isHarmonyLeakMitigationTarget(config.model);
709
735
  const harmonyAbortController = harmonyMitigationEnabled ? new AbortController() : undefined;
710
- const requestSignal = harmonyAbortController
711
- ? signal
712
- ? AbortSignal.any([signal, harmonyAbortController.signal])
713
- : harmonyAbortController.signal
714
- : signal;
736
+ const maxToolCallsPerTurn = normalizeMaxToolCallsPerTurn(config.maxToolCallsPerTurn);
737
+ const toolCallCapAbortController = maxToolCallsPerTurn === undefined ? undefined : new AbortController();
738
+ const requestSignals: AbortSignal[] = [];
739
+ if (signal) requestSignals.push(signal);
740
+ if (harmonyAbortController) requestSignals.push(harmonyAbortController.signal);
741
+ if (toolCallCapAbortController) requestSignals.push(toolCallCapAbortController.signal);
742
+ const requestSignal =
743
+ requestSignals.length === 0
744
+ ? undefined
745
+ : requestSignals.length === 1
746
+ ? requestSignals[0]
747
+ : AbortSignal.any(requestSignals);
715
748
  const effectiveTemperature =
716
749
  harmonyRetryAttempt > 0 && config.temperature !== undefined ? config.temperature + 0.05 : config.temperature;
717
750
  const effectiveToolChoice = dynamicToolChoice ?? config.toolChoice;
@@ -773,6 +806,26 @@ async function streamAssistantResponse(
773
806
  let addedPartial = false;
774
807
 
775
808
  const responseIterator = response[Symbol.asyncIterator]();
809
+ let completedToolCalls = 0;
810
+ let cappedMessage: AssistantMessage | undefined;
811
+ let capFinalized = false;
812
+
813
+ const finishCappedAssistantMessage = async (): Promise<AssistantMessage | undefined> => {
814
+ if (!cappedMessage) return undefined;
815
+ responseIterator.return?.()?.catch(() => {});
816
+ if (!capFinalized) {
817
+ if (addedPartial) {
818
+ context.messages[context.messages.length - 1] = cappedMessage;
819
+ } else {
820
+ context.messages.push(cappedMessage);
821
+ stream.push({ type: "message_start", message: { ...cappedMessage } });
822
+ }
823
+ stream.push({ type: "message_end", message: cappedMessage });
824
+ await finishChat(cappedMessage);
825
+ capFinalized = true;
826
+ }
827
+ return cappedMessage;
828
+ };
776
829
 
777
830
  // Set up a single abort race: register the abort listener once for the whole
778
831
  // stream and reuse the same race promise for every iterator.next() instead of
@@ -798,6 +851,10 @@ async function streamAssistantResponse(
798
851
  if (abortRacePromise) {
799
852
  const result = await Promise.race([responseIterator.next(), abortRacePromise]);
800
853
  if (result === ABORTED) {
854
+ if (toolCallCapAbortController?.signal.aborted) {
855
+ const capped = await finishCappedAssistantMessage();
856
+ if (capped) return capped;
857
+ }
801
858
  responseIterator.return?.()?.catch(() => {});
802
859
  const aborted = emitAbortedAssistantMessage(partialMessage, addedPartial, context, config, stream);
803
860
  await finishChat(aborted);
@@ -808,6 +865,10 @@ async function streamAssistantResponse(
808
865
  next = await responseIterator.next();
809
866
  }
810
867
  if (requestSignal?.aborted) {
868
+ if (toolCallCapAbortController?.signal.aborted) {
869
+ const capped = await finishCappedAssistantMessage();
870
+ if (capped) return capped;
871
+ }
811
872
  const aborted = emitAbortedAssistantMessage(partialMessage, addedPartial, context, config, stream);
812
873
  await finishChat(aborted);
813
874
  return aborted;
@@ -848,6 +909,15 @@ async function streamAssistantResponse(
848
909
  assistantMessageEvent: event,
849
910
  message: { ...partialMessage },
850
911
  });
912
+ if (event.type === "toolcall_end" && maxToolCallsPerTurn !== undefined) {
913
+ completedToolCalls++;
914
+ if (completedToolCalls >= maxToolCallsPerTurn) {
915
+ cappedMessage = cloneAssistantMessageForToolCallCap(partialMessage);
916
+ toolCallCapAbortController?.abort();
917
+ const capped = await finishCappedAssistantMessage();
918
+ if (capped) return capped;
919
+ }
920
+ }
851
921
  }
852
922
  break;
853
923
 
package/src/agent.ts CHANGED
@@ -102,6 +102,12 @@ export interface AgentOptions {
102
102
  */
103
103
  interruptMode?: "immediate" | "wait";
104
104
 
105
+ /**
106
+ * Maximum completed tool calls to accept from one streamed assistant turn before
107
+ * executing the batch. Undefined disables batching.
108
+ */
109
+ maxToolCallsPerTurn?: number;
110
+
105
111
  /**
106
112
  * API format for Kimi Code provider: "openai" or "anthropic" (default: "anthropic")
107
113
  */
@@ -269,6 +275,7 @@ export class Agent {
269
275
  #steeringMode: "all" | "one-at-a-time";
270
276
  #followUpMode: "all" | "one-at-a-time";
271
277
  #interruptMode: "immediate" | "wait";
278
+ #maxToolCallsPerTurn?: number;
272
279
  #sessionId?: string;
273
280
  #metadata?: Record<string, unknown>;
274
281
  #metadataResolver?: (provider: string) => Record<string, unknown> | undefined;
@@ -325,6 +332,7 @@ export class Agent {
325
332
  this.#steeringMode = opts.steeringMode || "one-at-a-time";
326
333
  this.#followUpMode = opts.followUpMode || "one-at-a-time";
327
334
  this.#interruptMode = opts.interruptMode || "immediate";
335
+ this.#maxToolCallsPerTurn = opts.maxToolCallsPerTurn;
328
336
  this.streamFn = opts.streamFn || streamSimple;
329
337
  this.#sessionId = opts.sessionId;
330
338
  this.#providerSessionState = opts.providerSessionState;
@@ -547,6 +555,14 @@ export class Agent {
547
555
  this.#maxRetryDelayMs = value;
548
556
  }
549
557
 
558
+ get maxToolCallsPerTurn(): number | undefined {
559
+ return this.#maxToolCallsPerTurn;
560
+ }
561
+
562
+ set maxToolCallsPerTurn(value: number | undefined) {
563
+ this.#maxToolCallsPerTurn = value;
564
+ }
565
+
550
566
  get state(): AgentState {
551
567
  return this.#state;
552
568
  }
@@ -917,6 +933,7 @@ export class Agent {
917
933
  serviceTier: this.#serviceTier,
918
934
  hideThinkingSummary: this.#hideThinkingSummary,
919
935
  interruptMode: this.#interruptMode,
936
+ maxToolCallsPerTurn: this.#maxToolCallsPerTurn,
920
937
  sessionId: this.#sessionId,
921
938
  metadata: this.#metadataResolver ? undefined : this.#metadata,
922
939
  metadataResolver: this.#metadataResolver,
@@ -1091,7 +1108,7 @@ export class Agent {
1091
1108
 
1092
1109
  /** Calculate total text length from an assistant message's content blocks */
1093
1110
  #getAssistantTextLength(message: AgentMessage | null): number {
1094
- if (!message || message.role !== "assistant" || !Array.isArray(message.content)) {
1111
+ if (message?.role !== "assistant" || !Array.isArray(message.content)) {
1095
1112
  return 0;
1096
1113
  }
1097
1114
  let length = 0;
@@ -230,7 +230,7 @@ export function recoverHarmonyToolCall(
230
230
  ): HarmonyRecoveredToolCall | undefined {
231
231
  if (detection.surface !== "tool_arg" || detection.contentIndex === undefined) return undefined;
232
232
  const block = message.content[detection.contentIndex];
233
- if (!block || block.type !== "toolCall") return undefined;
233
+ if (block?.type !== "toolCall") return undefined;
234
234
 
235
235
  const config = RECOVERY_REGISTRY[block.name];
236
236
  if (!config) return undefined;
package/src/types.ts CHANGED
@@ -38,6 +38,14 @@ export interface AgentLoopConfig extends SimpleStreamOptions {
38
38
  */
39
39
  interruptMode?: "immediate" | "wait";
40
40
 
41
+ /**
42
+ * Maximum completed tool calls to accept from one streamed assistant turn before
43
+ * cutting the provider stream and executing that batch. The cap is enforced on
44
+ * `toolcall_end` so every executed call has complete arguments. Undefined disables
45
+ * batching.
46
+ */
47
+ maxToolCallsPerTurn?: number;
48
+
41
49
  /**
42
50
  * Optional session identifier forwarded to LLM providers.
43
51
  * Used by providers that support session-based caching (e.g., OpenAI Codex).