@oh-my-pi/pi-agent-core 15.13.2 → 15.13.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +11 -0
- package/dist/types/compaction/utils.d.ts +2 -1
- package/dist/types/types.d.ts +9 -0
- package/package.json +6 -6
- package/src/agent-loop.ts +28 -1
- package/src/compaction/branch-summarization.ts +2 -1
- package/src/compaction/compaction.ts +4 -3
- package/src/compaction/pruning.ts +12 -1
- package/src/compaction/utils.ts +44 -11
- package/src/types.ts +9 -0
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,17 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## [15.13.3] - 2026-06-15
|
|
6
|
+
|
|
7
|
+
### Added
|
|
8
|
+
|
|
9
|
+
- Added the `interruptible` tool field: when set, the agent loop may abort the tool mid-execution to deliver a queued steering message (honored only in `immediate` interrupt mode).
|
|
10
|
+
- Added support for `gemini` and `gemma` as valid owned tool syntax values in environment configuration
|
|
11
|
+
|
|
12
|
+
### Fixed
|
|
13
|
+
|
|
14
|
+
- Fixed `pruneToolOutputs` blanking tiny tool results during overflow pruning: results below `50` tokens (`MIN_PRUNE_TOKENS`) are no longer replaced with the `[Output truncated - N tokens]` placeholder, which cost more tokens than the result itself and churned the prompt cache for zero savings.
|
|
15
|
+
|
|
5
16
|
## [15.13.2] - 2026-06-15
|
|
6
17
|
|
|
7
18
|
### Breaking Changes
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
* Shared utilities for compaction and branch summarization.
|
|
3
3
|
*/
|
|
4
4
|
import type { Message } from "@oh-my-pi/pi-ai";
|
|
5
|
+
import { type ToolCallSyntax } from "@oh-my-pi/pi-ai/grammar";
|
|
5
6
|
import type { AgentMessage } from "../types";
|
|
6
7
|
export interface FileOperations {
|
|
7
8
|
read: Set<string>;
|
|
@@ -44,5 +45,5 @@ export declare function upsertFileOperations(summary: string, readFiles: string[
|
|
|
44
45
|
* This prevents the model from treating it as a conversation to continue.
|
|
45
46
|
* Call convertToLlm() first to handle custom message types.
|
|
46
47
|
*/
|
|
47
|
-
export declare function serializeConversation(messages: Message[]): string;
|
|
48
|
+
export declare function serializeConversation(messages: Message[], syntax?: ToolCallSyntax): string;
|
|
48
49
|
export declare const SUMMARIZATION_SYSTEM_PROMPT: string;
|
package/dist/types/types.d.ts
CHANGED
|
@@ -428,6 +428,15 @@ export interface AgentTool<TParameters extends TSchema = TSchema, TDetails = any
|
|
|
428
428
|
concurrency?: "shared" | "exclusive" | ((args: Partial<Static<TParameters>>) => "shared" | "exclusive");
|
|
429
429
|
/** If true, argument validation errors are non-fatal: raw args are passed to execute() instead of returning an error to the LLM. */
|
|
430
430
|
lenientArgValidation?: boolean;
|
|
431
|
+
/**
|
|
432
|
+
* If true, the agent loop may abort this tool mid-execution to deliver a
|
|
433
|
+
* queued steering message (instead of waiting for the tool to finish on its
|
|
434
|
+
* own). Set only on tools that purely *wait* and observe their abort signal
|
|
435
|
+
* cleanly (e.g. the `job` poll), so the abort surfaces the tool's current
|
|
436
|
+
* snapshot rather than corrupting a side effect. Honored only when
|
|
437
|
+
* `interruptMode` is "immediate".
|
|
438
|
+
*/
|
|
439
|
+
interruptible?: boolean;
|
|
431
440
|
/**
|
|
432
441
|
* Controls how the INTENT_FIELD (`_i`) is handled for this tool.
|
|
433
442
|
* - `"require"` (default): `_i` is injected and required in the parameter schema.
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "module",
|
|
3
3
|
"name": "@oh-my-pi/pi-agent-core",
|
|
4
|
-
"version": "15.13.
|
|
4
|
+
"version": "15.13.3",
|
|
5
5
|
"description": "General-purpose agent with transport abstraction, state management, and attachment support",
|
|
6
6
|
"homepage": "https://omp.sh",
|
|
7
7
|
"author": "Can Boluk",
|
|
@@ -35,11 +35,11 @@
|
|
|
35
35
|
"fmt": "biome format --write ."
|
|
36
36
|
},
|
|
37
37
|
"dependencies": {
|
|
38
|
-
"@oh-my-pi/pi-ai": "15.13.
|
|
39
|
-
"@oh-my-pi/pi-catalog": "15.13.
|
|
40
|
-
"@oh-my-pi/pi-natives": "15.13.
|
|
41
|
-
"@oh-my-pi/pi-utils": "15.13.
|
|
42
|
-
"@oh-my-pi/snapcompact": "15.13.
|
|
38
|
+
"@oh-my-pi/pi-ai": "15.13.3",
|
|
39
|
+
"@oh-my-pi/pi-catalog": "15.13.3",
|
|
40
|
+
"@oh-my-pi/pi-natives": "15.13.3",
|
|
41
|
+
"@oh-my-pi/pi-utils": "15.13.3",
|
|
42
|
+
"@oh-my-pi/snapcompact": "15.13.3",
|
|
43
43
|
"@opentelemetry/api": "^1.9.1"
|
|
44
44
|
},
|
|
45
45
|
"devDependencies": {
|
package/src/agent-loop.ts
CHANGED
|
@@ -74,6 +74,14 @@ const ABORTED: unique symbol = Symbol("agent-loop-aborted");
|
|
|
74
74
|
*/
|
|
75
75
|
const MAX_PAUSED_TURN_CONTINUATIONS = 8;
|
|
76
76
|
|
|
77
|
+
/**
|
|
78
|
+
* Cadence (ms) for polling queued steering while an `interruptible` tool is in
|
|
79
|
+
* flight, so a steer cuts the wait short instead of sitting idle until the
|
|
80
|
+
* tool's own window elapses. A cheap synchronous queue check; latency-bounded
|
|
81
|
+
* at one tick.
|
|
82
|
+
*/
|
|
83
|
+
const STEERING_INTERRUPT_POLL_MS = 250;
|
|
84
|
+
|
|
77
85
|
class HarmonyLeakInterruption extends Error {
|
|
78
86
|
constructor(
|
|
79
87
|
readonly detection: HarmonyDetection,
|
|
@@ -98,6 +106,8 @@ function resolveOwnedToolSyntaxFromEnv(value: string | undefined): ToolCallSynta
|
|
|
98
106
|
case "harmony":
|
|
99
107
|
case "pi":
|
|
100
108
|
case "qwen3":
|
|
109
|
+
case "gemini":
|
|
110
|
+
case "gemma":
|
|
101
111
|
return value;
|
|
102
112
|
default:
|
|
103
113
|
return undefined;
|
|
@@ -1795,7 +1805,24 @@ async function executeToolCalls(
|
|
|
1795
1805
|
}
|
|
1796
1806
|
}
|
|
1797
1807
|
|
|
1798
|
-
|
|
1808
|
+
// While an interruptible tool is in flight (e.g. a `job` poll blocking on
|
|
1809
|
+
// background work), a queued steer would otherwise wait out the tool's own
|
|
1810
|
+
// window. Poll the steering queue and let checkSteering() abort the shared
|
|
1811
|
+
// tool signal so the wait returns early; the boundary dequeue below then
|
|
1812
|
+
// injects it. Gated on immediate-interrupt mode + an interruptible tool;
|
|
1813
|
+
// checkSteering is idempotent (no-op once triggered).
|
|
1814
|
+
const watchSteeringWhileRunning =
|
|
1815
|
+
shouldInterruptImmediately &&
|
|
1816
|
+
(hasSteeringMessages !== undefined || getSteeringMessages !== undefined) &&
|
|
1817
|
+
records.some(r => r.tool?.interruptible === true);
|
|
1818
|
+
const steeringWatchTimer = watchSteeringWhileRunning
|
|
1819
|
+
? setInterval(() => void checkSteering(), STEERING_INTERRUPT_POLL_MS)
|
|
1820
|
+
: undefined;
|
|
1821
|
+
try {
|
|
1822
|
+
await Promise.allSettled(tasks);
|
|
1823
|
+
} finally {
|
|
1824
|
+
if (steeringWatchTimer !== undefined) clearInterval(steeringWatchTimer);
|
|
1825
|
+
}
|
|
1799
1826
|
// Yield after batch tool execution to let GC and I/O catch up,
|
|
1800
1827
|
// especially when tool results are large (e.g. bash output).
|
|
1801
1828
|
await yieldIfDue();
|
|
@@ -6,6 +6,7 @@
|
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
8
|
import type { ApiKey, Model } from "@oh-my-pi/pi-ai";
|
|
9
|
+
import { preferredToolSyntax } from "@oh-my-pi/pi-catalog/identity";
|
|
9
10
|
import { prompt } from "@oh-my-pi/pi-utils";
|
|
10
11
|
import { type AgentTelemetry, instrumentedCompleteSimple } from "../telemetry";
|
|
11
12
|
import type { AgentMessage } from "../types";
|
|
@@ -290,7 +291,7 @@ export async function generateBranchSummary(
|
|
|
290
291
|
// Transform to LLM-compatible messages, then serialize to text
|
|
291
292
|
// Serialization prevents the model from treating it as a conversation to continue
|
|
292
293
|
const llmMessages = (options.convertToLlm ?? defaultConvertToLlm)(messages);
|
|
293
|
-
const conversationText = serializeConversation(llmMessages);
|
|
294
|
+
const conversationText = serializeConversation(llmMessages, preferredToolSyntax(model.id));
|
|
294
295
|
|
|
295
296
|
// Build prompt
|
|
296
297
|
const instructions = customInstructions || BRANCH_SUMMARY_PROMPT;
|
|
@@ -18,6 +18,7 @@ import {
|
|
|
18
18
|
type Usage,
|
|
19
19
|
withAuth,
|
|
20
20
|
} from "@oh-my-pi/pi-ai";
|
|
21
|
+
import { preferredToolSyntax } from "@oh-my-pi/pi-catalog/identity";
|
|
21
22
|
import { clampThinkingLevelForModel } from "@oh-my-pi/pi-catalog/model-thinking";
|
|
22
23
|
import { countTokens } from "@oh-my-pi/pi-natives";
|
|
23
24
|
import { logger, prompt } from "@oh-my-pi/pi-utils";
|
|
@@ -642,7 +643,7 @@ export async function generateSummary(
|
|
|
642
643
|
// Serialize conversation to text so model doesn't try to continue it
|
|
643
644
|
// Convert to LLM messages first (handles custom app messages when caller provides a transformer).
|
|
644
645
|
const llmMessages = (options?.convertToLlm ?? defaultConvertToLlm)(currentMessages);
|
|
645
|
-
const conversationText = serializeConversation(llmMessages);
|
|
646
|
+
const conversationText = serializeConversation(llmMessages, preferredToolSyntax(model.id));
|
|
646
647
|
|
|
647
648
|
// Build the prompt with conversation wrapped in tags
|
|
648
649
|
let promptText = `<conversation>\n${conversationText}\n</conversation>\n\n`;
|
|
@@ -790,7 +791,7 @@ async function generateShortSummary(
|
|
|
790
791
|
): Promise<string> {
|
|
791
792
|
const maxTokens = Math.min(512, Math.floor(0.2 * reserveTokens));
|
|
792
793
|
const llmMessages = (options?.convertToLlm ?? defaultConvertToLlm)(recentMessages);
|
|
793
|
-
const conversationText = serializeConversation(llmMessages);
|
|
794
|
+
const conversationText = serializeConversation(llmMessages, preferredToolSyntax(model.id));
|
|
794
795
|
|
|
795
796
|
let promptText = `<conversation>\n${conversationText}\n</conversation>\n\n`;
|
|
796
797
|
if (historySummary) {
|
|
@@ -1155,7 +1156,7 @@ async function generateTurnPrefixSummary(
|
|
|
1155
1156
|
const maxTokens = Math.floor(0.5 * reserveTokens); // Smaller budget for turn prefix
|
|
1156
1157
|
|
|
1157
1158
|
const llmMessages = (options?.convertToLlm ?? defaultConvertToLlm)(messages);
|
|
1158
|
-
const conversationText = serializeConversation(llmMessages);
|
|
1159
|
+
const conversationText = serializeConversation(llmMessages, preferredToolSyntax(model.id));
|
|
1159
1160
|
const promptText = `<conversation>\n${conversationText}\n</conversation>\n\n${TURN_PREFIX_SUMMARIZATION_PROMPT}`;
|
|
1160
1161
|
const summarizationMessages = [
|
|
1161
1162
|
{
|
|
@@ -81,6 +81,16 @@ function createPrunedNotice(tokens: number): string {
|
|
|
81
81
|
return `[Output truncated - ${tokens} tokens]`;
|
|
82
82
|
}
|
|
83
83
|
|
|
84
|
+
/**
|
|
85
|
+
* Generic age-based pruning floor. Below this, blanking a result to
|
|
86
|
+
* `[Output truncated - N tokens]` recovers nothing — the placeholder itself
|
|
87
|
+
* costs ~8 tokens, so a sub-floor result grows the context (and churns the
|
|
88
|
+
* prompt cache) instead of shrinking it. Superseded/useless results keep their
|
|
89
|
+
* own rules: useless already drops no-savings candidates, superseded prunes for
|
|
90
|
+
* correctness regardless of size.
|
|
91
|
+
*/
|
|
92
|
+
const MIN_PRUNE_TOKENS = 50;
|
|
93
|
+
|
|
84
94
|
function getToolResultMessage(entry: SessionEntry): ToolResultMessage | undefined {
|
|
85
95
|
if (entry.type !== "message") return undefined;
|
|
86
96
|
const message = entry.message as AgentMessage;
|
|
@@ -271,7 +281,8 @@ export function pruneToolOutputs(entries: SessionEntry[], config: PruneConfig =
|
|
|
271
281
|
// any age).
|
|
272
282
|
const superseded = supersededMessages?.has(message) ?? false;
|
|
273
283
|
const useless = uselessMessages?.has(message) ?? false;
|
|
274
|
-
|
|
284
|
+
const tooSmall = tokens < MIN_PRUNE_TOKENS;
|
|
285
|
+
if (!superseded && !useless && (accumulatedTokens < config.protectTokens || isProtected || tooSmall)) {
|
|
275
286
|
accumulatedTokens += tokens;
|
|
276
287
|
continue;
|
|
277
288
|
}
|
package/src/compaction/utils.ts
CHANGED
|
@@ -2,7 +2,8 @@
|
|
|
2
2
|
* Shared utilities for compaction and branch summarization.
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
|
-
import type { Message } from "@oh-my-pi/pi-ai";
|
|
5
|
+
import type { Message, ToolCall } from "@oh-my-pi/pi-ai";
|
|
6
|
+
import { type Grammar, type GrammarToolResult, getInbandGrammar, type ToolCallSyntax } from "@oh-my-pi/pi-ai/grammar";
|
|
6
7
|
import { formatGroupedPaths, prompt } from "@oh-my-pi/pi-utils";
|
|
7
8
|
import type { AgentMessage } from "../types";
|
|
8
9
|
import fileOperationsTemplate from "./prompts/file-operations.md" with { type: "text" };
|
|
@@ -188,7 +189,8 @@ function truncateForSummary(text: string, maxChars: number): string {
|
|
|
188
189
|
* This prevents the model from treating it as a conversation to continue.
|
|
189
190
|
* Call convertToLlm() first to handle custom message types.
|
|
190
191
|
*/
|
|
191
|
-
export function serializeConversation(messages: Message[]): string {
|
|
192
|
+
export function serializeConversation(messages: Message[], syntax?: ToolCallSyntax): string {
|
|
193
|
+
const grammar = syntax ? getInbandGrammar(syntax) : undefined;
|
|
192
194
|
const parts: string[] = [];
|
|
193
195
|
|
|
194
196
|
// Tool results flagged contextually useless (and their paired calls) are
|
|
@@ -215,7 +217,7 @@ export function serializeConversation(messages: Message[]): string {
|
|
|
215
217
|
} else if (msg.role === "assistant") {
|
|
216
218
|
const textParts: string[] = [];
|
|
217
219
|
const thinkingParts: string[] = [];
|
|
218
|
-
const toolCalls:
|
|
220
|
+
const toolCalls: ToolCall[] = [];
|
|
219
221
|
|
|
220
222
|
for (const block of msg.content) {
|
|
221
223
|
if (block.type === "text") {
|
|
@@ -224,22 +226,18 @@ export function serializeConversation(messages: Message[]): string {
|
|
|
224
226
|
thinkingParts.push(block.thinking);
|
|
225
227
|
} else if (block.type === "toolCall") {
|
|
226
228
|
if (uselessCallIds.has(block.id)) continue;
|
|
227
|
-
|
|
228
|
-
const argsStr = Object.entries(args)
|
|
229
|
-
.map(([k, v]) => `${k}=${JSON.stringify(v)}`)
|
|
230
|
-
.join(", ");
|
|
231
|
-
toolCalls.push(`${block.name}(${argsStr})`);
|
|
229
|
+
toolCalls.push(block);
|
|
232
230
|
}
|
|
233
231
|
}
|
|
234
232
|
|
|
235
233
|
if (thinkingParts.length > 0) {
|
|
236
|
-
parts.push(`[
|
|
234
|
+
parts.push(`[Think]: ${thinkingParts.join("\n")}`);
|
|
237
235
|
}
|
|
238
236
|
if (textParts.length > 0) {
|
|
239
237
|
parts.push(`[Assistant]: ${textParts.join("\n")}`);
|
|
240
238
|
}
|
|
241
239
|
if (toolCalls.length > 0) {
|
|
242
|
-
parts.push(`[
|
|
240
|
+
parts.push(`[Tool Call]: ${renderToolCalls(toolCalls, grammar)}`);
|
|
243
241
|
}
|
|
244
242
|
} else if (msg.role === "toolResult") {
|
|
245
243
|
if (uselessCallIds.has(msg.toolCallId)) continue;
|
|
@@ -248,7 +246,10 @@ export function serializeConversation(messages: Message[]): string {
|
|
|
248
246
|
.map(c => c.text)
|
|
249
247
|
.join("");
|
|
250
248
|
if (content) {
|
|
251
|
-
|
|
249
|
+
const text = truncateForSummary(content, TOOL_RESULT_MAX_CHARS);
|
|
250
|
+
parts.push(
|
|
251
|
+
`[Tool Result]: ${renderToolResult(msg.toolCallId, msg.toolName, msg.isError === true, text, grammar)}`,
|
|
252
|
+
);
|
|
252
253
|
}
|
|
253
254
|
}
|
|
254
255
|
}
|
|
@@ -256,6 +257,38 @@ export function serializeConversation(messages: Message[]): string {
|
|
|
256
257
|
return parts.join("\n\n");
|
|
257
258
|
}
|
|
258
259
|
|
|
260
|
+
/**
|
|
261
|
+
* Render an assistant turn's tool calls. With a grammar, emit the model's
|
|
262
|
+
* native invocation block; otherwise fall back to a compact `name(args)` list.
|
|
263
|
+
*/
|
|
264
|
+
function renderToolCalls(calls: ToolCall[], grammar: Grammar | undefined): string {
|
|
265
|
+
if (grammar) return grammar.renderAssistantToolCalls(calls);
|
|
266
|
+
return calls
|
|
267
|
+
.map(call => {
|
|
268
|
+
const argsStr = Object.entries(call.arguments as Record<string, unknown>)
|
|
269
|
+
.map(([k, v]) => `${k}=${JSON.stringify(v)}`)
|
|
270
|
+
.join(", ");
|
|
271
|
+
return `${call.name}(${argsStr})`;
|
|
272
|
+
})
|
|
273
|
+
.join("; ");
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
/**
|
|
277
|
+
* Render a single tool result. With a grammar, emit the model's native
|
|
278
|
+
* tool-result envelope; otherwise return the (already truncated) text verbatim.
|
|
279
|
+
*/
|
|
280
|
+
function renderToolResult(
|
|
281
|
+
id: string,
|
|
282
|
+
name: string,
|
|
283
|
+
isError: boolean,
|
|
284
|
+
text: string,
|
|
285
|
+
grammar: Grammar | undefined,
|
|
286
|
+
): string {
|
|
287
|
+
if (!grammar) return text;
|
|
288
|
+
const result: GrammarToolResult = { id, name, index: 0, text, isError };
|
|
289
|
+
return grammar.renderToolResults([result]);
|
|
290
|
+
}
|
|
291
|
+
|
|
259
292
|
// ============================================================================
|
|
260
293
|
// Summarization System Prompt
|
|
261
294
|
// ============================================================================
|
package/src/types.ts
CHANGED
|
@@ -503,6 +503,15 @@ export interface AgentTool<TParameters extends TSchema = TSchema, TDetails = any
|
|
|
503
503
|
concurrency?: "shared" | "exclusive" | ((args: Partial<Static<TParameters>>) => "shared" | "exclusive");
|
|
504
504
|
/** If true, argument validation errors are non-fatal: raw args are passed to execute() instead of returning an error to the LLM. */
|
|
505
505
|
lenientArgValidation?: boolean;
|
|
506
|
+
/**
|
|
507
|
+
* If true, the agent loop may abort this tool mid-execution to deliver a
|
|
508
|
+
* queued steering message (instead of waiting for the tool to finish on its
|
|
509
|
+
* own). Set only on tools that purely *wait* and observe their abort signal
|
|
510
|
+
* cleanly (e.g. the `job` poll), so the abort surfaces the tool's current
|
|
511
|
+
* snapshot rather than corrupting a side effect. Honored only when
|
|
512
|
+
* `interruptMode` is "immediate".
|
|
513
|
+
*/
|
|
514
|
+
interruptible?: boolean;
|
|
506
515
|
/**
|
|
507
516
|
* Controls how the INTENT_FIELD (`_i`) is handled for this tool.
|
|
508
517
|
* - `"require"` (default): `_i` is injected and required in the parameter schema.
|