@oh-my-pi/pi-agent-core 15.5.13 → 15.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +15 -0
- package/dist/types/agent.d.ts +7 -0
- package/dist/types/types.d.ts +7 -0
- package/package.json +7 -7
- package/src/agent-loop.ts +90 -20
- package/src/agent.ts +18 -1
- package/src/harmony-leak.ts +1 -1
- package/src/types.ts +8 -0
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,21 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## [15.5.15] - 2026-05-30
|
|
6
|
+
### Added
|
|
7
|
+
|
|
8
|
+
- Added `maxToolCallsPerTurn` to `AgentLoopConfig`/`AgentOptions`, allowing callers to cut a streamed assistant turn after a completed tool-call batch and execute the runnable partial turn instead of waiting for the provider to yield.
|
|
9
|
+
|
|
10
|
+
### Fixed
|
|
11
|
+
|
|
12
|
+
- Normalized `maxToolCallsPerTurn` to accept only positive integer limits, with non-finite or non-positive values treated as disabled
|
|
13
|
+
|
|
14
|
+
## [15.5.14] - 2026-05-29
|
|
15
|
+
|
|
16
|
+
### Fixed
|
|
17
|
+
|
|
18
|
+
- Fixed the agent loop abandoning tool calls that Anthropic adaptive/interleaved-thinking models (e.g. Opus) emit under `stop_reason: "end_turn"`. The previous gate only ran tools when `stopReason === "toolUse"`, so an `end_turn`+tool_use turn produced "Tool call was not executed because the assistant ended its turn" placeholders, made no progress, and could trap the model in a re-emit/abandon loop. `stop_reason` is never replayed on the wire and (verified against the live Anthropic Messages API) does not gate continuation validity, so `stop`/`end_turn` turns carrying tool_use blocks are now executed and the loop continues — exactly like `toolUse`. Only `length` (max_tokens truncation) still abandons, since the trailing tool call may have incomplete arguments. The continuation stays valid because `transformMessages` strips the now-untrustworthy thinking signature and the encoder downgrades the block to text.
|
|
19
|
+
|
|
5
20
|
## [15.5.10] - 2026-05-28
|
|
6
21
|
|
|
7
22
|
### Fixed
|
package/dist/types/agent.d.ts
CHANGED
|
@@ -31,6 +31,11 @@ export interface AgentOptions {
|
|
|
31
31
|
* - "wait": defer steering until the current turn completes
|
|
32
32
|
*/
|
|
33
33
|
interruptMode?: "immediate" | "wait";
|
|
34
|
+
/**
|
|
35
|
+
* Maximum completed tool calls to accept from one streamed assistant turn before
|
|
36
|
+
* executing the batch. Undefined disables batching.
|
|
37
|
+
*/
|
|
38
|
+
maxToolCallsPerTurn?: number;
|
|
34
39
|
/**
|
|
35
40
|
* API format for Kimi Code provider: "openai" or "anthropic" (default: "anthropic")
|
|
36
41
|
*/
|
|
@@ -263,6 +268,8 @@ export declare class Agent {
|
|
|
263
268
|
* Set to 0 to disable the cap.
|
|
264
269
|
*/
|
|
265
270
|
set maxRetryDelayMs(value: number | undefined);
|
|
271
|
+
get maxToolCallsPerTurn(): number | undefined;
|
|
272
|
+
set maxToolCallsPerTurn(value: number | undefined);
|
|
266
273
|
get state(): AgentState;
|
|
267
274
|
get appendOnlyContext(): AppendOnlyContextManager | undefined;
|
|
268
275
|
setAppendOnlyContext(manager?: AppendOnlyContextManager): void;
|
package/dist/types/types.d.ts
CHANGED
|
@@ -16,6 +16,13 @@ export interface AgentLoopConfig extends SimpleStreamOptions {
|
|
|
16
16
|
* - "wait" = defer steering until the current turn completes
|
|
17
17
|
*/
|
|
18
18
|
interruptMode?: "immediate" | "wait";
|
|
19
|
+
/**
|
|
20
|
+
* Maximum completed tool calls to accept from one streamed assistant turn before
|
|
21
|
+
* cutting the provider stream and executing that batch. The cap is enforced on
|
|
22
|
+
* `toolcall_end` so every executed call has complete arguments. Undefined disables
|
|
23
|
+
* batching.
|
|
24
|
+
*/
|
|
25
|
+
maxToolCallsPerTurn?: number;
|
|
19
26
|
/**
|
|
20
27
|
* Optional session identifier forwarded to LLM providers.
|
|
21
28
|
* Used by providers that support session-based caching (e.g., OpenAI Codex).
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "module",
|
|
3
3
|
"name": "@oh-my-pi/pi-agent-core",
|
|
4
|
-
"version": "15.
|
|
4
|
+
"version": "15.6.0",
|
|
5
5
|
"description": "General-purpose agent with transport abstraction, state management, and attachment support",
|
|
6
6
|
"homepage": "https://omp.sh",
|
|
7
7
|
"author": "Can Boluk",
|
|
@@ -35,14 +35,14 @@
|
|
|
35
35
|
"fmt": "biome format --write ."
|
|
36
36
|
},
|
|
37
37
|
"dependencies": {
|
|
38
|
-
"@oh-my-pi/pi-ai": "15.
|
|
39
|
-
"@oh-my-pi/pi-natives": "15.
|
|
40
|
-
"@oh-my-pi/pi-utils": "15.
|
|
41
|
-
"@opentelemetry/api": "^1.9.
|
|
38
|
+
"@oh-my-pi/pi-ai": "15.6.0",
|
|
39
|
+
"@oh-my-pi/pi-natives": "15.6.0",
|
|
40
|
+
"@oh-my-pi/pi-utils": "15.6.0",
|
|
41
|
+
"@opentelemetry/api": "^1.9.1"
|
|
42
42
|
},
|
|
43
43
|
"devDependencies": {
|
|
44
|
-
"@opentelemetry/context-async-hooks": "^2.
|
|
45
|
-
"@opentelemetry/sdk-trace-base": "^2.
|
|
44
|
+
"@opentelemetry/context-async-hooks": "^2.7.1",
|
|
45
|
+
"@opentelemetry/sdk-trace-base": "^2.7.1",
|
|
46
46
|
"@types/bun": "^1.3.14"
|
|
47
47
|
},
|
|
48
48
|
"engines": {
|
package/src/agent-loop.ts
CHANGED
|
@@ -441,6 +441,27 @@ interface StepCounter {
|
|
|
441
441
|
count: number;
|
|
442
442
|
}
|
|
443
443
|
|
|
444
|
+
function normalizeMaxToolCallsPerTurn(value: number | undefined): number | undefined {
|
|
445
|
+
if (value === undefined || !Number.isFinite(value)) return undefined;
|
|
446
|
+
const normalized = Math.trunc(value);
|
|
447
|
+
return normalized > 0 ? normalized : undefined;
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
function cloneAssistantMessageForToolCallCap(message: AssistantMessage): AssistantMessage {
|
|
451
|
+
return {
|
|
452
|
+
...message,
|
|
453
|
+
content: message.content.map(block => {
|
|
454
|
+
if (block.type === "toolCall") {
|
|
455
|
+
return { ...block, arguments: structuredClone(block.arguments) };
|
|
456
|
+
}
|
|
457
|
+
return { ...block };
|
|
458
|
+
}),
|
|
459
|
+
stopReason: "toolUse",
|
|
460
|
+
errorMessage: undefined,
|
|
461
|
+
errorStatus: undefined,
|
|
462
|
+
};
|
|
463
|
+
}
|
|
464
|
+
|
|
444
465
|
async function runLoopBody(
|
|
445
466
|
currentContext: AgentContext,
|
|
446
467
|
newMessages: AgentMessage[],
|
|
@@ -562,19 +583,23 @@ async function runLoopBody(
|
|
|
562
583
|
return;
|
|
563
584
|
}
|
|
564
585
|
|
|
565
|
-
//
|
|
566
|
-
//
|
|
567
|
-
//
|
|
568
|
-
//
|
|
569
|
-
//
|
|
570
|
-
//
|
|
571
|
-
//
|
|
572
|
-
//
|
|
573
|
-
//
|
|
574
|
-
//
|
|
586
|
+
// Run tools whenever the turn carries tool_use blocks AND was not truncated.
|
|
587
|
+
// `stop_reason` is provider metadata that never goes back on the wire, so it
|
|
588
|
+
// does not gate continuation validity: replaying a tool_use turn with the
|
|
589
|
+
// tool_results appended is accepted whether the turn ended on `tool_use` or
|
|
590
|
+
// `end_turn` (adaptive/interleaved-thinking Opus routinely emits tool calls
|
|
591
|
+
// under `end_turn`; verified against the live Anthropic API). The only
|
|
592
|
+
// continuation hazard is a thinking block carrying a stale/invalid signature,
|
|
593
|
+
// which `transformMessages` already neutralizes — it strips the signature on
|
|
594
|
+
// non-`toolUse` turns and the encoder downgrades the unsigned block to text,
|
|
595
|
+
// which the API accepts. So treat `stop` (end_turn/pause_turn) the same as
|
|
596
|
+
// `toolUse`. `length` (max_tokens) is the one reason we must NOT run: the
|
|
597
|
+
// trailing tool_use may be truncated with incomplete arguments — those calls
|
|
598
|
+
// are abandoned below. (`error`/`aborted` already returned above.)
|
|
575
599
|
type ToolCallContent = Extract<AssistantMessage["content"][number], { type: "toolCall" }>;
|
|
576
600
|
const toolCalls = message.content.filter((c): c is ToolCallContent => c.type === "toolCall");
|
|
577
|
-
|
|
601
|
+
const runnableStop = message.stopReason === "toolUse" || message.stopReason === "stop";
|
|
602
|
+
hasMoreToolCalls = runnableStop && toolCalls.length > 0;
|
|
578
603
|
|
|
579
604
|
const toolResults: ToolResultMessage[] = [];
|
|
580
605
|
if (hasMoreToolCalls) {
|
|
@@ -596,10 +621,11 @@ async function runLoopBody(
|
|
|
596
621
|
newMessages.push(result);
|
|
597
622
|
}
|
|
598
623
|
} else if (toolCalls.length > 0) {
|
|
599
|
-
//
|
|
600
|
-
// behind.
|
|
601
|
-
// with a placeholder result to keep
|
|
602
|
-
// for any later request that
|
|
624
|
+
// Turn ended on a non-runnable reason (`length` truncation) but left
|
|
625
|
+
// toolCall blocks behind. The trailing call's arguments may be incomplete,
|
|
626
|
+
// so don't execute or continue — pair each with a placeholder result to keep
|
|
627
|
+
// the tool_use/tool_result contract valid for any later request that
|
|
628
|
+
// replays this turn.
|
|
603
629
|
for (const toolCall of toolCalls) {
|
|
604
630
|
const result = createAbortedToolResult(toolCall, stream, "skipped");
|
|
605
631
|
currentContext.messages.push(result);
|
|
@@ -707,11 +733,18 @@ async function streamAssistantResponse(
|
|
|
707
733
|
const dynamicReasoning = config.getReasoning?.();
|
|
708
734
|
const harmonyMitigationEnabled = isHarmonyLeakMitigationTarget(config.model);
|
|
709
735
|
const harmonyAbortController = harmonyMitigationEnabled ? new AbortController() : undefined;
|
|
710
|
-
const
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
736
|
+
const maxToolCallsPerTurn = normalizeMaxToolCallsPerTurn(config.maxToolCallsPerTurn);
|
|
737
|
+
const toolCallCapAbortController = maxToolCallsPerTurn === undefined ? undefined : new AbortController();
|
|
738
|
+
const requestSignals: AbortSignal[] = [];
|
|
739
|
+
if (signal) requestSignals.push(signal);
|
|
740
|
+
if (harmonyAbortController) requestSignals.push(harmonyAbortController.signal);
|
|
741
|
+
if (toolCallCapAbortController) requestSignals.push(toolCallCapAbortController.signal);
|
|
742
|
+
const requestSignal =
|
|
743
|
+
requestSignals.length === 0
|
|
744
|
+
? undefined
|
|
745
|
+
: requestSignals.length === 1
|
|
746
|
+
? requestSignals[0]
|
|
747
|
+
: AbortSignal.any(requestSignals);
|
|
715
748
|
const effectiveTemperature =
|
|
716
749
|
harmonyRetryAttempt > 0 && config.temperature !== undefined ? config.temperature + 0.05 : config.temperature;
|
|
717
750
|
const effectiveToolChoice = dynamicToolChoice ?? config.toolChoice;
|
|
@@ -773,6 +806,26 @@ async function streamAssistantResponse(
|
|
|
773
806
|
let addedPartial = false;
|
|
774
807
|
|
|
775
808
|
const responseIterator = response[Symbol.asyncIterator]();
|
|
809
|
+
let completedToolCalls = 0;
|
|
810
|
+
let cappedMessage: AssistantMessage | undefined;
|
|
811
|
+
let capFinalized = false;
|
|
812
|
+
|
|
813
|
+
const finishCappedAssistantMessage = async (): Promise<AssistantMessage | undefined> => {
|
|
814
|
+
if (!cappedMessage) return undefined;
|
|
815
|
+
responseIterator.return?.()?.catch(() => {});
|
|
816
|
+
if (!capFinalized) {
|
|
817
|
+
if (addedPartial) {
|
|
818
|
+
context.messages[context.messages.length - 1] = cappedMessage;
|
|
819
|
+
} else {
|
|
820
|
+
context.messages.push(cappedMessage);
|
|
821
|
+
stream.push({ type: "message_start", message: { ...cappedMessage } });
|
|
822
|
+
}
|
|
823
|
+
stream.push({ type: "message_end", message: cappedMessage });
|
|
824
|
+
await finishChat(cappedMessage);
|
|
825
|
+
capFinalized = true;
|
|
826
|
+
}
|
|
827
|
+
return cappedMessage;
|
|
828
|
+
};
|
|
776
829
|
|
|
777
830
|
// Set up a single abort race: register the abort listener once for the whole
|
|
778
831
|
// stream and reuse the same race promise for every iterator.next() instead of
|
|
@@ -798,6 +851,10 @@ async function streamAssistantResponse(
|
|
|
798
851
|
if (abortRacePromise) {
|
|
799
852
|
const result = await Promise.race([responseIterator.next(), abortRacePromise]);
|
|
800
853
|
if (result === ABORTED) {
|
|
854
|
+
if (toolCallCapAbortController?.signal.aborted) {
|
|
855
|
+
const capped = await finishCappedAssistantMessage();
|
|
856
|
+
if (capped) return capped;
|
|
857
|
+
}
|
|
801
858
|
responseIterator.return?.()?.catch(() => {});
|
|
802
859
|
const aborted = emitAbortedAssistantMessage(partialMessage, addedPartial, context, config, stream);
|
|
803
860
|
await finishChat(aborted);
|
|
@@ -808,6 +865,10 @@ async function streamAssistantResponse(
|
|
|
808
865
|
next = await responseIterator.next();
|
|
809
866
|
}
|
|
810
867
|
if (requestSignal?.aborted) {
|
|
868
|
+
if (toolCallCapAbortController?.signal.aborted) {
|
|
869
|
+
const capped = await finishCappedAssistantMessage();
|
|
870
|
+
if (capped) return capped;
|
|
871
|
+
}
|
|
811
872
|
const aborted = emitAbortedAssistantMessage(partialMessage, addedPartial, context, config, stream);
|
|
812
873
|
await finishChat(aborted);
|
|
813
874
|
return aborted;
|
|
@@ -848,6 +909,15 @@ async function streamAssistantResponse(
|
|
|
848
909
|
assistantMessageEvent: event,
|
|
849
910
|
message: { ...partialMessage },
|
|
850
911
|
});
|
|
912
|
+
if (event.type === "toolcall_end" && maxToolCallsPerTurn !== undefined) {
|
|
913
|
+
completedToolCalls++;
|
|
914
|
+
if (completedToolCalls >= maxToolCallsPerTurn) {
|
|
915
|
+
cappedMessage = cloneAssistantMessageForToolCallCap(partialMessage);
|
|
916
|
+
toolCallCapAbortController?.abort();
|
|
917
|
+
const capped = await finishCappedAssistantMessage();
|
|
918
|
+
if (capped) return capped;
|
|
919
|
+
}
|
|
920
|
+
}
|
|
851
921
|
}
|
|
852
922
|
break;
|
|
853
923
|
|
package/src/agent.ts
CHANGED
|
@@ -102,6 +102,12 @@ export interface AgentOptions {
|
|
|
102
102
|
*/
|
|
103
103
|
interruptMode?: "immediate" | "wait";
|
|
104
104
|
|
|
105
|
+
/**
|
|
106
|
+
* Maximum completed tool calls to accept from one streamed assistant turn before
|
|
107
|
+
* executing the batch. Undefined disables batching.
|
|
108
|
+
*/
|
|
109
|
+
maxToolCallsPerTurn?: number;
|
|
110
|
+
|
|
105
111
|
/**
|
|
106
112
|
* API format for Kimi Code provider: "openai" or "anthropic" (default: "anthropic")
|
|
107
113
|
*/
|
|
@@ -269,6 +275,7 @@ export class Agent {
|
|
|
269
275
|
#steeringMode: "all" | "one-at-a-time";
|
|
270
276
|
#followUpMode: "all" | "one-at-a-time";
|
|
271
277
|
#interruptMode: "immediate" | "wait";
|
|
278
|
+
#maxToolCallsPerTurn?: number;
|
|
272
279
|
#sessionId?: string;
|
|
273
280
|
#metadata?: Record<string, unknown>;
|
|
274
281
|
#metadataResolver?: (provider: string) => Record<string, unknown> | undefined;
|
|
@@ -325,6 +332,7 @@ export class Agent {
|
|
|
325
332
|
this.#steeringMode = opts.steeringMode || "one-at-a-time";
|
|
326
333
|
this.#followUpMode = opts.followUpMode || "one-at-a-time";
|
|
327
334
|
this.#interruptMode = opts.interruptMode || "immediate";
|
|
335
|
+
this.#maxToolCallsPerTurn = opts.maxToolCallsPerTurn;
|
|
328
336
|
this.streamFn = opts.streamFn || streamSimple;
|
|
329
337
|
this.#sessionId = opts.sessionId;
|
|
330
338
|
this.#providerSessionState = opts.providerSessionState;
|
|
@@ -547,6 +555,14 @@ export class Agent {
|
|
|
547
555
|
this.#maxRetryDelayMs = value;
|
|
548
556
|
}
|
|
549
557
|
|
|
558
|
+
get maxToolCallsPerTurn(): number | undefined {
|
|
559
|
+
return this.#maxToolCallsPerTurn;
|
|
560
|
+
}
|
|
561
|
+
|
|
562
|
+
set maxToolCallsPerTurn(value: number | undefined) {
|
|
563
|
+
this.#maxToolCallsPerTurn = value;
|
|
564
|
+
}
|
|
565
|
+
|
|
550
566
|
get state(): AgentState {
|
|
551
567
|
return this.#state;
|
|
552
568
|
}
|
|
@@ -917,6 +933,7 @@ export class Agent {
|
|
|
917
933
|
serviceTier: this.#serviceTier,
|
|
918
934
|
hideThinkingSummary: this.#hideThinkingSummary,
|
|
919
935
|
interruptMode: this.#interruptMode,
|
|
936
|
+
maxToolCallsPerTurn: this.#maxToolCallsPerTurn,
|
|
920
937
|
sessionId: this.#sessionId,
|
|
921
938
|
metadata: this.#metadataResolver ? undefined : this.#metadata,
|
|
922
939
|
metadataResolver: this.#metadataResolver,
|
|
@@ -1091,7 +1108,7 @@ export class Agent {
|
|
|
1091
1108
|
|
|
1092
1109
|
/** Calculate total text length from an assistant message's content blocks */
|
|
1093
1110
|
#getAssistantTextLength(message: AgentMessage | null): number {
|
|
1094
|
-
if (
|
|
1111
|
+
if (message?.role !== "assistant" || !Array.isArray(message.content)) {
|
|
1095
1112
|
return 0;
|
|
1096
1113
|
}
|
|
1097
1114
|
let length = 0;
|
package/src/harmony-leak.ts
CHANGED
|
@@ -230,7 +230,7 @@ export function recoverHarmonyToolCall(
|
|
|
230
230
|
): HarmonyRecoveredToolCall | undefined {
|
|
231
231
|
if (detection.surface !== "tool_arg" || detection.contentIndex === undefined) return undefined;
|
|
232
232
|
const block = message.content[detection.contentIndex];
|
|
233
|
-
if (
|
|
233
|
+
if (block?.type !== "toolCall") return undefined;
|
|
234
234
|
|
|
235
235
|
const config = RECOVERY_REGISTRY[block.name];
|
|
236
236
|
if (!config) return undefined;
|
package/src/types.ts
CHANGED
|
@@ -38,6 +38,14 @@ export interface AgentLoopConfig extends SimpleStreamOptions {
|
|
|
38
38
|
*/
|
|
39
39
|
interruptMode?: "immediate" | "wait";
|
|
40
40
|
|
|
41
|
+
/**
|
|
42
|
+
* Maximum completed tool calls to accept from one streamed assistant turn before
|
|
43
|
+
* cutting the provider stream and executing that batch. The cap is enforced on
|
|
44
|
+
* `toolcall_end` so every executed call has complete arguments. Undefined disables
|
|
45
|
+
* batching.
|
|
46
|
+
*/
|
|
47
|
+
maxToolCallsPerTurn?: number;
|
|
48
|
+
|
|
41
49
|
/**
|
|
42
50
|
* Optional session identifier forwarded to LLM providers.
|
|
43
51
|
* Used by providers that support session-based caching (e.g., OpenAI Codex).
|