@oh-my-pi/pi-agent-core 15.13.2 → 15.13.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,17 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [15.13.3] - 2026-06-15
6
+
7
+ ### Added
8
+
9
+ - Added the `interruptible` tool field: when set, the agent loop may abort the tool mid-execution to deliver a queued steering message (honored only in `immediate` interrupt mode).
10
+ - Added support for `gemini` and `gemma` as valid owned tool syntax values in environment configuration
11
+
12
+ ### Fixed
13
+
14
+ - Fixed `pruneToolOutputs` blanking tiny tool results during overflow pruning: results below `50` tokens (`MIN_PRUNE_TOKENS`) are no longer replaced with the `[Output truncated - N tokens]` placeholder, which cost more tokens than the result itself and churned the prompt cache for zero savings.
15
+
5
16
  ## [15.13.2] - 2026-06-15
6
17
 
7
18
  ### Breaking Changes
@@ -2,6 +2,7 @@
2
2
  * Shared utilities for compaction and branch summarization.
3
3
  */
4
4
  import type { Message } from "@oh-my-pi/pi-ai";
5
+ import { type ToolCallSyntax } from "@oh-my-pi/pi-ai/grammar";
5
6
  import type { AgentMessage } from "../types";
6
7
  export interface FileOperations {
7
8
  read: Set<string>;
@@ -44,5 +45,5 @@ export declare function upsertFileOperations(summary: string, readFiles: string[
44
45
  * This prevents the model from treating it as a conversation to continue.
45
46
  * Call convertToLlm() first to handle custom message types.
46
47
  */
47
- export declare function serializeConversation(messages: Message[]): string;
48
+ export declare function serializeConversation(messages: Message[], syntax?: ToolCallSyntax): string;
48
49
  export declare const SUMMARIZATION_SYSTEM_PROMPT: string;
@@ -428,6 +428,15 @@ export interface AgentTool<TParameters extends TSchema = TSchema, TDetails = any
428
428
  concurrency?: "shared" | "exclusive" | ((args: Partial<Static<TParameters>>) => "shared" | "exclusive");
429
429
  /** If true, argument validation errors are non-fatal: raw args are passed to execute() instead of returning an error to the LLM. */
430
430
  lenientArgValidation?: boolean;
431
+ /**
432
+ * If true, the agent loop may abort this tool mid-execution to deliver a
433
+ * queued steering message (instead of waiting for the tool to finish on its
434
+ * own). Set only on tools that purely *wait* and observe their abort signal
435
+ * cleanly (e.g. the `job` poll), so the abort surfaces the tool's current
436
+ * snapshot rather than corrupting a side effect. Honored only when
437
+ * `interruptMode` is "immediate".
438
+ */
439
+ interruptible?: boolean;
431
440
  /**
432
441
  * Controls how the INTENT_FIELD (`_i`) is handled for this tool.
433
442
  * - `"require"` (default): `_i` is injected and required in the parameter schema.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@oh-my-pi/pi-agent-core",
4
- "version": "15.13.2",
4
+ "version": "15.13.3",
5
5
  "description": "General-purpose agent with transport abstraction, state management, and attachment support",
6
6
  "homepage": "https://omp.sh",
7
7
  "author": "Can Boluk",
@@ -35,11 +35,11 @@
35
35
  "fmt": "biome format --write ."
36
36
  },
37
37
  "dependencies": {
38
- "@oh-my-pi/pi-ai": "15.13.2",
39
- "@oh-my-pi/pi-catalog": "15.13.2",
40
- "@oh-my-pi/pi-natives": "15.13.2",
41
- "@oh-my-pi/pi-utils": "15.13.2",
42
- "@oh-my-pi/snapcompact": "15.13.2",
38
+ "@oh-my-pi/pi-ai": "15.13.3",
39
+ "@oh-my-pi/pi-catalog": "15.13.3",
40
+ "@oh-my-pi/pi-natives": "15.13.3",
41
+ "@oh-my-pi/pi-utils": "15.13.3",
42
+ "@oh-my-pi/snapcompact": "15.13.3",
43
43
  "@opentelemetry/api": "^1.9.1"
44
44
  },
45
45
  "devDependencies": {
package/src/agent-loop.ts CHANGED
@@ -74,6 +74,14 @@ const ABORTED: unique symbol = Symbol("agent-loop-aborted");
74
74
  */
75
75
  const MAX_PAUSED_TURN_CONTINUATIONS = 8;
76
76
 
77
+ /**
78
+ * Cadence (ms) for polling queued steering while an `interruptible` tool is in
79
+ * flight, so a steer cuts the wait short instead of sitting idle until the
80
+ * tool's own window elapses. A cheap synchronous queue check; latency-bounded
81
+ * at one tick.
82
+ */
83
+ const STEERING_INTERRUPT_POLL_MS = 250;
84
+
77
85
  class HarmonyLeakInterruption extends Error {
78
86
  constructor(
79
87
  readonly detection: HarmonyDetection,
@@ -98,6 +106,8 @@ function resolveOwnedToolSyntaxFromEnv(value: string | undefined): ToolCallSynta
98
106
  case "harmony":
99
107
  case "pi":
100
108
  case "qwen3":
109
+ case "gemini":
110
+ case "gemma":
101
111
  return value;
102
112
  default:
103
113
  return undefined;
@@ -1795,7 +1805,24 @@ async function executeToolCalls(
1795
1805
  }
1796
1806
  }
1797
1807
 
1798
- await Promise.allSettled(tasks);
1808
+ // While an interruptible tool is in flight (e.g. a `job` poll blocking on
1809
+ // background work), a queued steer would otherwise wait out the tool's own
1810
+ // window. Poll the steering queue and let checkSteering() abort the shared
1811
+ // tool signal so the wait returns early; the boundary dequeue below then
1812
+ // injects it. Gated on immediate-interrupt mode + an interruptible tool;
1813
+ // checkSteering is idempotent (no-op once triggered).
1814
+ const watchSteeringWhileRunning =
1815
+ shouldInterruptImmediately &&
1816
+ (hasSteeringMessages !== undefined || getSteeringMessages !== undefined) &&
1817
+ records.some(r => r.tool?.interruptible === true);
1818
+ const steeringWatchTimer = watchSteeringWhileRunning
1819
+ ? setInterval(() => void checkSteering(), STEERING_INTERRUPT_POLL_MS)
1820
+ : undefined;
1821
+ try {
1822
+ await Promise.allSettled(tasks);
1823
+ } finally {
1824
+ if (steeringWatchTimer !== undefined) clearInterval(steeringWatchTimer);
1825
+ }
1799
1826
  // Yield after batch tool execution to let GC and I/O catch up,
1800
1827
  // especially when tool results are large (e.g. bash output).
1801
1828
  await yieldIfDue();
@@ -6,6 +6,7 @@
6
6
  */
7
7
 
8
8
  import type { ApiKey, Model } from "@oh-my-pi/pi-ai";
9
+ import { preferredToolSyntax } from "@oh-my-pi/pi-catalog/identity";
9
10
  import { prompt } from "@oh-my-pi/pi-utils";
10
11
  import { type AgentTelemetry, instrumentedCompleteSimple } from "../telemetry";
11
12
  import type { AgentMessage } from "../types";
@@ -290,7 +291,7 @@ export async function generateBranchSummary(
290
291
  // Transform to LLM-compatible messages, then serialize to text
291
292
  // Serialization prevents the model from treating it as a conversation to continue
292
293
  const llmMessages = (options.convertToLlm ?? defaultConvertToLlm)(messages);
293
- const conversationText = serializeConversation(llmMessages);
294
+ const conversationText = serializeConversation(llmMessages, preferredToolSyntax(model.id));
294
295
 
295
296
  // Build prompt
296
297
  const instructions = customInstructions || BRANCH_SUMMARY_PROMPT;
@@ -18,6 +18,7 @@ import {
18
18
  type Usage,
19
19
  withAuth,
20
20
  } from "@oh-my-pi/pi-ai";
21
+ import { preferredToolSyntax } from "@oh-my-pi/pi-catalog/identity";
21
22
  import { clampThinkingLevelForModel } from "@oh-my-pi/pi-catalog/model-thinking";
22
23
  import { countTokens } from "@oh-my-pi/pi-natives";
23
24
  import { logger, prompt } from "@oh-my-pi/pi-utils";
@@ -642,7 +643,7 @@ export async function generateSummary(
642
643
  // Serialize conversation to text so model doesn't try to continue it
643
644
  // Convert to LLM messages first (handles custom app messages when caller provides a transformer).
644
645
  const llmMessages = (options?.convertToLlm ?? defaultConvertToLlm)(currentMessages);
645
- const conversationText = serializeConversation(llmMessages);
646
+ const conversationText = serializeConversation(llmMessages, preferredToolSyntax(model.id));
646
647
 
647
648
  // Build the prompt with conversation wrapped in tags
648
649
  let promptText = `<conversation>\n${conversationText}\n</conversation>\n\n`;
@@ -790,7 +791,7 @@ async function generateShortSummary(
790
791
  ): Promise<string> {
791
792
  const maxTokens = Math.min(512, Math.floor(0.2 * reserveTokens));
792
793
  const llmMessages = (options?.convertToLlm ?? defaultConvertToLlm)(recentMessages);
793
- const conversationText = serializeConversation(llmMessages);
794
+ const conversationText = serializeConversation(llmMessages, preferredToolSyntax(model.id));
794
795
 
795
796
  let promptText = `<conversation>\n${conversationText}\n</conversation>\n\n`;
796
797
  if (historySummary) {
@@ -1155,7 +1156,7 @@ async function generateTurnPrefixSummary(
1155
1156
  const maxTokens = Math.floor(0.5 * reserveTokens); // Smaller budget for turn prefix
1156
1157
 
1157
1158
  const llmMessages = (options?.convertToLlm ?? defaultConvertToLlm)(messages);
1158
- const conversationText = serializeConversation(llmMessages);
1159
+ const conversationText = serializeConversation(llmMessages, preferredToolSyntax(model.id));
1159
1160
  const promptText = `<conversation>\n${conversationText}\n</conversation>\n\n${TURN_PREFIX_SUMMARIZATION_PROMPT}`;
1160
1161
  const summarizationMessages = [
1161
1162
  {
@@ -81,6 +81,16 @@ function createPrunedNotice(tokens: number): string {
81
81
  return `[Output truncated - ${tokens} tokens]`;
82
82
  }
83
83
 
84
+ /**
85
+ * Generic age-based pruning floor. Below this, blanking a result to
86
+ * `[Output truncated - N tokens]` recovers nothing — the placeholder itself
87
+ * costs ~8 tokens, so a sub-floor result grows the context (and churns the
88
+ * prompt cache) instead of shrinking it. Superseded/useless results keep their
89
+ * own rules: useless already drops no-savings candidates, superseded prunes for
90
+ * correctness regardless of size.
91
+ */
92
+ const MIN_PRUNE_TOKENS = 50;
93
+
84
94
  function getToolResultMessage(entry: SessionEntry): ToolResultMessage | undefined {
85
95
  if (entry.type !== "message") return undefined;
86
96
  const message = entry.message as AgentMessage;
@@ -271,7 +281,8 @@ export function pruneToolOutputs(entries: SessionEntry[], config: PruneConfig =
271
281
  // any age).
272
282
  const superseded = supersededMessages?.has(message) ?? false;
273
283
  const useless = uselessMessages?.has(message) ?? false;
274
- if (!superseded && !useless && (accumulatedTokens < config.protectTokens || isProtected)) {
284
+ const tooSmall = tokens < MIN_PRUNE_TOKENS;
285
+ if (!superseded && !useless && (accumulatedTokens < config.protectTokens || isProtected || tooSmall)) {
275
286
  accumulatedTokens += tokens;
276
287
  continue;
277
288
  }
@@ -2,7 +2,8 @@
2
2
  * Shared utilities for compaction and branch summarization.
3
3
  */
4
4
 
5
- import type { Message } from "@oh-my-pi/pi-ai";
5
+ import type { Message, ToolCall } from "@oh-my-pi/pi-ai";
6
+ import { type Grammar, type GrammarToolResult, getInbandGrammar, type ToolCallSyntax } from "@oh-my-pi/pi-ai/grammar";
6
7
  import { formatGroupedPaths, prompt } from "@oh-my-pi/pi-utils";
7
8
  import type { AgentMessage } from "../types";
8
9
  import fileOperationsTemplate from "./prompts/file-operations.md" with { type: "text" };
@@ -188,7 +189,8 @@ function truncateForSummary(text: string, maxChars: number): string {
188
189
  * This prevents the model from treating it as a conversation to continue.
189
190
  * Call convertToLlm() first to handle custom message types.
190
191
  */
191
- export function serializeConversation(messages: Message[]): string {
192
+ export function serializeConversation(messages: Message[], syntax?: ToolCallSyntax): string {
193
+ const grammar = syntax ? getInbandGrammar(syntax) : undefined;
192
194
  const parts: string[] = [];
193
195
 
194
196
  // Tool results flagged contextually useless (and their paired calls) are
@@ -215,7 +217,7 @@ export function serializeConversation(messages: Message[]): string {
215
217
  } else if (msg.role === "assistant") {
216
218
  const textParts: string[] = [];
217
219
  const thinkingParts: string[] = [];
218
- const toolCalls: string[] = [];
220
+ const toolCalls: ToolCall[] = [];
219
221
 
220
222
  for (const block of msg.content) {
221
223
  if (block.type === "text") {
@@ -224,22 +226,18 @@ export function serializeConversation(messages: Message[]): string {
224
226
  thinkingParts.push(block.thinking);
225
227
  } else if (block.type === "toolCall") {
226
228
  if (uselessCallIds.has(block.id)) continue;
227
- const args = block.arguments as Record<string, unknown>;
228
- const argsStr = Object.entries(args)
229
- .map(([k, v]) => `${k}=${JSON.stringify(v)}`)
230
- .join(", ");
231
- toolCalls.push(`${block.name}(${argsStr})`);
229
+ toolCalls.push(block);
232
230
  }
233
231
  }
234
232
 
235
233
  if (thinkingParts.length > 0) {
236
- parts.push(`[Assistant thinking]: ${thinkingParts.join("\n")}`);
234
+ parts.push(`[Think]: ${thinkingParts.join("\n")}`);
237
235
  }
238
236
  if (textParts.length > 0) {
239
237
  parts.push(`[Assistant]: ${textParts.join("\n")}`);
240
238
  }
241
239
  if (toolCalls.length > 0) {
242
- parts.push(`[Assistant tool calls]: ${toolCalls.join("; ")}`);
240
+ parts.push(`[Tool Call]: ${renderToolCalls(toolCalls, grammar)}`);
243
241
  }
244
242
  } else if (msg.role === "toolResult") {
245
243
  if (uselessCallIds.has(msg.toolCallId)) continue;
@@ -248,7 +246,10 @@ export function serializeConversation(messages: Message[]): string {
248
246
  .map(c => c.text)
249
247
  .join("");
250
248
  if (content) {
251
- parts.push(`[Tool result]: ${truncateForSummary(content, TOOL_RESULT_MAX_CHARS)}`);
249
+ const text = truncateForSummary(content, TOOL_RESULT_MAX_CHARS);
250
+ parts.push(
251
+ `[Tool Result]: ${renderToolResult(msg.toolCallId, msg.toolName, msg.isError === true, text, grammar)}`,
252
+ );
252
253
  }
253
254
  }
254
255
  }
@@ -256,6 +257,38 @@ export function serializeConversation(messages: Message[]): string {
256
257
  return parts.join("\n\n");
257
258
  }
258
259
 
260
+ /**
261
+ * Render an assistant turn's tool calls. With a grammar, emit the model's
262
+ * native invocation block; otherwise fall back to a compact `name(args)` list.
263
+ */
264
+ function renderToolCalls(calls: ToolCall[], grammar: Grammar | undefined): string {
265
+ if (grammar) return grammar.renderAssistantToolCalls(calls);
266
+ return calls
267
+ .map(call => {
268
+ const argsStr = Object.entries(call.arguments as Record<string, unknown>)
269
+ .map(([k, v]) => `${k}=${JSON.stringify(v)}`)
270
+ .join(", ");
271
+ return `${call.name}(${argsStr})`;
272
+ })
273
+ .join("; ");
274
+ }
275
+
276
+ /**
277
+ * Render a single tool result. With a grammar, emit the model's native
278
+ * tool-result envelope; otherwise return the (already truncated) text verbatim.
279
+ */
280
+ function renderToolResult(
281
+ id: string,
282
+ name: string,
283
+ isError: boolean,
284
+ text: string,
285
+ grammar: Grammar | undefined,
286
+ ): string {
287
+ if (!grammar) return text;
288
+ const result: GrammarToolResult = { id, name, index: 0, text, isError };
289
+ return grammar.renderToolResults([result]);
290
+ }
291
+
259
292
  // ============================================================================
260
293
  // Summarization System Prompt
261
294
  // ============================================================================
package/src/types.ts CHANGED
@@ -503,6 +503,15 @@ export interface AgentTool<TParameters extends TSchema = TSchema, TDetails = any
503
503
  concurrency?: "shared" | "exclusive" | ((args: Partial<Static<TParameters>>) => "shared" | "exclusive");
504
504
  /** If true, argument validation errors are non-fatal: raw args are passed to execute() instead of returning an error to the LLM. */
505
505
  lenientArgValidation?: boolean;
506
+ /**
507
+ * If true, the agent loop may abort this tool mid-execution to deliver a
508
+ * queued steering message (instead of waiting for the tool to finish on its
509
+ * own). Set only on tools that purely *wait* and observe their abort signal
510
+ * cleanly (e.g. the `job` poll), so the abort surfaces the tool's current
511
+ * snapshot rather than corrupting a side effect. Honored only when
512
+ * `interruptMode` is "immediate".
513
+ */
514
+ interruptible?: boolean;
506
515
  /**
507
516
  * Controls how the INTENT_FIELD (`_i`) is handled for this tool.
508
517
  * - `"require"` (default): `_i` is injected and required in the parameter schema.