@oh-my-pi/pi-agent-core 15.11.2 → 15.11.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,23 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [15.11.4] - 2026-06-12
6
+ ### Added
7
+
8
+ - Added `hasSteeringMessages` to `AgentLoopConfig` (wired by `Agent` to its steering queue): a peek used by the immediate-interrupt poll during tool execution, so the loop can detect queued steering without dequeuing and the queue keeps owning its messages until the injection boundary
9
+ - The agent loop now re-samples after a non-terminal stop (`stopReason: "stop"` with `stopDetails: { type: "pause_turn" }`, emitted by the Codex providers for `end_turn: false` commentary-only responses): the assistant message is committed to history and the model is called again without ending the turn. Consecutive pause continuations without an intervening tool call are capped at 8 to bound a backend that never stops pausing.
10
+
11
+ ### Changed
12
+
13
+ - Changed steering handling so queued steering messages are now dequeued only at injection boundaries, with immediate mid-batch interrupt polling using `hasSteeringMessages`. Consumers constructing `AgentLoopConfig` directly with only `getSteeringMessages` no longer get mid-batch interrupts — steering degrades to boundary-only delivery until they also supply `hasSteeringMessages`
14
+ - Compaction, handoff, short-summary, and branch-summarization helpers now accept an `ApiKey` (static string or resolver) instead of a pre-resolved string, so a 401 mid-compaction force-refreshes and rotates the credential through the central auth-retry policy before any model-level fallback. The remote OpenAI compaction request is wrapped in `withAuth` and its HTTP failures now carry `.status`, so the retry classifier actually fires on remote-compaction 401s.
15
+ - `transformProviderContext` now receives the dispatch model as a second argument (`(context, model) => Context`), so per-request transforms can gate on model capabilities (vision input, provider, API family). Existing single-argument implementations keep working unchanged.
16
+ - Remote-compaction and summarization failures now throw pi-ai's typed `ProviderHttpError` instead of mutating plain `Error`s with a `.status` property; the generic `requestRemoteCompaction` error now carries `.status` (and response headers) too.
17
+
18
+ ### Fixed
19
+
20
+ - Fixed a regression where steering messages could be injected into history during an aborted in-flight tool batch, leaving them hidden from queue consumers for post-abort continue
21
+
5
22
  ## [15.11.2] - 2026-06-11
6
23
 
7
24
  ### Added
@@ -21,7 +21,7 @@ export interface AgentOptions {
21
21
  * Optional transform applied after provider context assembly and before
22
22
  * telemetry capture/provider send.
23
23
  */
24
- transformProviderContext?: (context: Context) => Context;
24
+ transformProviderContext?: (context: Context, model: Model) => Context;
25
25
  /**
26
26
  * Steering mode: "all" = send all steering messages at once, "one-at-a-time" = one per turn
27
27
  */
@@ -4,7 +4,7 @@
4
4
  * When navigating to a different point in the session tree, this generates
5
5
  * a summary of the branch being left so context isn't lost.
6
6
  */
7
- import type { Model } from "@oh-my-pi/pi-ai";
7
+ import type { ApiKey, Model } from "@oh-my-pi/pi-ai";
8
8
  import { type AgentTelemetry } from "../telemetry";
9
9
  import type { AgentMessage } from "../types";
10
10
  import type { ReadonlySessionManager, SessionEntry } from "./entries";
@@ -41,7 +41,7 @@ export interface GenerateBranchSummaryOptions {
41
41
  /** Model to use for summarization */
42
42
  model: Model;
43
43
  /** API key for the model */
44
- apiKey: string;
44
+ apiKey: ApiKey;
45
45
  /** Abort signal for cancellation */
46
46
  signal: AbortSignal;
47
47
  /** Optional custom instructions for summarization */
@@ -4,7 +4,7 @@
4
4
  * Pure functions for compaction logic. The session manager handles I/O,
5
5
  * and after compaction the session is reloaded.
6
6
  */
7
- import { type FetchImpl, type MessageAttribution, type Model, type Tool, type Usage } from "@oh-my-pi/pi-ai";
7
+ import { type ApiKey, type FetchImpl, type MessageAttribution, type Model, type Tool, type Usage } from "@oh-my-pi/pi-ai";
8
8
  import { type AgentTelemetry } from "../telemetry";
9
9
  import { ThinkingLevel } from "../thinking";
10
10
  import type { AgentMessage } from "../types";
@@ -128,7 +128,7 @@ export interface SummaryOptions {
128
128
  /** Optional fetch implementation threaded into remote compaction calls. */
129
129
  fetch?: FetchImpl;
130
130
  }
131
- export declare function generateSummary(currentMessages: AgentMessage[], model: Model, reserveTokens: number, apiKey: string, signal?: AbortSignal, customInstructions?: string, previousSummary?: string, options?: SummaryOptions): Promise<string>;
131
+ export declare function generateSummary(currentMessages: AgentMessage[], model: Model, reserveTokens: number, apiKey: ApiKey, signal?: AbortSignal, customInstructions?: string, previousSummary?: string, options?: SummaryOptions): Promise<string>;
132
132
  export interface HandoffOptions {
133
133
  /** Live agent system prompt — passed verbatim so providers hit the cached prefix. */
134
134
  systemPrompt: string[];
@@ -152,7 +152,7 @@ export interface HandoffOptions {
152
152
  thinkingLevel?: ThinkingLevel;
153
153
  }
154
154
  export declare function renderHandoffPrompt(customInstructions?: string): string;
155
- export declare function generateHandoff(messages: AgentMessage[], model: Model, apiKey: string, options: HandoffOptions, signal?: AbortSignal): Promise<string>;
155
+ export declare function generateHandoff(messages: AgentMessage[], model: Model, apiKey: ApiKey, options: HandoffOptions, signal?: AbortSignal): Promise<string>;
156
156
  export interface CompactionPreparation {
157
157
  /** UUID of first entry to keep */
158
158
  firstKeptEntryId: string;
@@ -182,4 +182,4 @@ export declare function prepareCompaction(pathEntries: SessionEntry[], settings:
182
182
  * @param preparation - Pre-calculated preparation from prepareCompaction()
183
183
  * @param customInstructions - Optional custom focus for the summary
184
184
  */
185
- export declare function compact(preparation: CompactionPreparation, model: Model, apiKey: string, customInstructions?: string, signal?: AbortSignal, options?: SummaryOptions): Promise<CompactionResult>;
185
+ export declare function compact(preparation: CompactionPreparation, model: Model, apiKey: ApiKey, customInstructions?: string, signal?: AbortSignal, options?: SummaryOptions): Promise<CompactionResult>;
@@ -84,7 +84,7 @@ export interface AgentLoopConfig extends SimpleStreamOptions {
84
84
  * normalization, and append-only context handling, but before telemetry capture
85
85
  * and provider send.
86
86
  */
87
- transformProviderContext?: (context: Context) => Context;
87
+ transformProviderContext?: (context: Context, model: Model) => Context;
88
88
  /**
89
89
  * Resolves an API key dynamically for each LLM call.
90
90
  *
@@ -95,11 +95,26 @@ export interface AgentLoopConfig extends SimpleStreamOptions {
95
95
  /**
96
96
  * Returns steering messages to inject into the conversation mid-run.
97
97
  *
98
- * Called after each tool execution to check for user interruptions unless interruptMode is "wait".
99
- * If messages are returned, remaining tool calls are skipped and
100
- * these messages are added to the context before the next LLM call.
98
+ * Called at injection boundaries only (loop start and after a tool batch
99
+ * fully settles), so dequeued messages are immediately injected. The
100
+ * mid-batch interrupt poll uses {@link hasSteeringMessages} instead and
101
+ * never consumes the queue.
101
102
  */
102
103
  getSteeringMessages?: () => Promise<AgentMessage[]>;
104
+ /**
105
+ * Peeks whether steering messages are queued, without consuming them.
106
+ *
107
+ * Called after each tool execution (unless interruptMode is "wait") to decide
108
+ * whether to skip the remaining tool calls in the batch. The queue keeps
109
+ * owning its messages until the loop reaches the next injection boundary and
110
+ * dequeues via {@link getSteeringMessages} — so callers can still cancel or
111
+ * restore queued messages while in-flight tools settle, and an external
112
+ * abort in that window leaves the queue intact for a post-abort continue.
113
+ *
114
+ * When omitted, steering never interrupts a running tool batch; queued
115
+ * messages are still delivered at the next injection boundary.
116
+ */
117
+ hasSteeringMessages?: () => boolean | Promise<boolean>;
103
118
  /**
104
119
  * Returns follow-up messages to process after the agent would otherwise stop.
105
120
  *
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@oh-my-pi/pi-agent-core",
4
- "version": "15.11.2",
4
+ "version": "15.11.4",
5
5
  "description": "General-purpose agent with transport abstraction, state management, and attachment support",
6
6
  "homepage": "https://omp.sh",
7
7
  "author": "Can Boluk",
@@ -35,11 +35,11 @@
35
35
  "fmt": "biome format --write ."
36
36
  },
37
37
  "dependencies": {
38
- "@oh-my-pi/pi-ai": "15.11.2",
39
- "@oh-my-pi/pi-catalog": "15.11.2",
40
- "@oh-my-pi/pi-natives": "15.11.2",
41
- "@oh-my-pi/pi-utils": "15.11.2",
42
- "@oh-my-pi/snapcompact": "15.11.2",
38
+ "@oh-my-pi/pi-ai": "15.11.4",
39
+ "@oh-my-pi/pi-catalog": "15.11.4",
40
+ "@oh-my-pi/pi-natives": "15.11.4",
41
+ "@oh-my-pi/pi-utils": "15.11.4",
42
+ "@oh-my-pi/snapcompact": "15.11.4",
43
43
  "@opentelemetry/api": "^1.9.1"
44
44
  },
45
45
  "devDependencies": {
package/src/agent-loop.ts CHANGED
@@ -58,6 +58,14 @@ import { yieldIfDue } from "./utils/yield";
58
58
  /** Sentinel returned by the abort race in `streamAssistantResponse`. */
59
59
  const ABORTED: unique symbol = Symbol("agent-loop-aborted");
60
60
 
61
+ /**
62
+ * Cap on consecutive re-samples triggered by a non-terminal stop
63
+ * (`stopDetails.type === "pause_turn"`) without an intervening tool call. Each
64
+ * continuation is a full model request, so a backend that never stops pausing
65
+ * must not spin the loop forever. Resets whenever a turn carries tool calls.
66
+ */
67
+ const MAX_PAUSED_TURN_CONTINUATIONS = 8;
68
+
61
69
  class HarmonyLeakInterruption extends Error {
62
70
  constructor(
63
71
  readonly detection: HarmonyDetection,
@@ -586,6 +594,7 @@ async function runLoopBody(
586
594
  let pendingMessages: AgentMessage[] = signal?.aborted ? [] : (await config.getSteeringMessages?.()) || [];
587
595
  let harmonyRetryAttempt = 0;
588
596
  let harmonyTruncateResumeCount = 0;
597
+ let pausedTurnContinuations = 0;
589
598
 
590
599
  // Outer loop: continues when queued follow-up messages arrive after agent would stop
591
600
  while (true) {
@@ -667,7 +676,6 @@ async function runLoopBody(
667
676
  stream.push({ type: "message_end", message: snapshotAssistantMessage(message) });
668
677
  }
669
678
  newMessages.push(message);
670
- let steeringMessagesFromExecution: AgentMessage[] | undefined;
671
679
 
672
680
  if (message.stopReason === "error" || message.stopReason === "aborted") {
673
681
  // Create placeholder tool results for any tool calls in the aborted message
@@ -728,7 +736,6 @@ async function runLoopBody(
728
736
  );
729
737
 
730
738
  toolResults.push(...executionResult.toolResults);
731
- steeringMessagesFromExecution = executionResult.steeringMessages;
732
739
 
733
740
  for (const result of toolResults) {
734
741
  currentContext.messages.push(result);
@@ -759,14 +766,32 @@ async function runLoopBody(
759
766
  }
760
767
  }
761
768
 
769
+ if (toolCalls.length > 0) {
770
+ pausedTurnContinuations = 0;
771
+ } else if (
772
+ !hasMoreToolCalls &&
773
+ message.stopReason === "stop" &&
774
+ message.stopDetails?.type === "pause_turn" &&
775
+ pausedTurnContinuations < MAX_PAUSED_TURN_CONTINUATIONS
776
+ ) {
777
+ // Non-terminal stop: the provider ended the response but not the turn
778
+ // (e.g. Codex `end_turn: false` on a commentary-only progress update).
779
+ // Re-sample with the assistant message replayed so the model keeps
780
+ // working; the next round folds steering/asides in like any other
781
+ // mid-work turn.
782
+ pausedTurnContinuations++;
783
+ hasMoreToolCalls = true;
784
+ }
785
+
762
786
  stream.push({ type: "turn_end", message, toolResults });
763
787
 
764
788
  // On external abort (user interrupt), leave the steering queue intact: the
765
789
  // session aborts then continues, delivering the queue into a fresh run.
766
790
  // Draining it here would inject the messages right before a model call that
767
- // instantly aborts — message lands in history, agent never responds.
768
- const steering =
769
- steeringMessagesFromExecution ?? (signal?.aborted ? [] : (await config.getSteeringMessages?.()) || []);
791
+ // instantly aborts — message lands in history, agent never responds. The
792
+ // mid-batch interrupt poll only peeks (hasSteeringMessages), so the queue
793
+ // still owns every message until this dequeue.
794
+ const steering = signal?.aborted ? [] : (await config.getSteeringMessages?.()) || [];
770
795
  if (hasMoreToolCalls) {
771
796
  // Mid-work: fold any non-interrupting asides into the next turn alongside steering.
772
797
  const asides = resolveAsides(await config.getAsideMessages?.());
@@ -854,7 +879,7 @@ async function streamAssistantResponse(
854
879
  };
855
880
  }
856
881
  if (config.transformProviderContext) {
857
- llmContext = config.transformProviderContext(llmContext);
882
+ llmContext = config.transformProviderContext(llmContext, config.model);
858
883
  }
859
884
 
860
885
  const streamFunction = streamFn || streamSimple;
@@ -1233,9 +1258,10 @@ async function executeToolCalls(
1233
1258
  config: AgentLoopConfig,
1234
1259
  telemetry: AgentTelemetry | undefined,
1235
1260
  invokeAgentSpan: Span | undefined,
1236
- ): Promise<{ toolResults: ToolResultMessage[]; steeringMessages?: AgentMessage[] }> {
1261
+ ): Promise<{ toolResults: ToolResultMessage[] }> {
1237
1262
  const tools = currentContext.tools;
1238
1263
  const {
1264
+ hasSteeringMessages,
1239
1265
  getSteeringMessages,
1240
1266
  interruptMode = "immediate",
1241
1267
  getToolContext,
@@ -1255,8 +1281,6 @@ async function executeToolCalls(
1255
1281
  ? AbortSignal.any([signal, steeringAbortController.signal])
1256
1282
  : steeringAbortController.signal;
1257
1283
  const interruptState = { triggered: false };
1258
- let steeringMessages: AgentMessage[] | undefined;
1259
- let steeringCheckTail: Promise<void> = Promise.resolve();
1260
1284
 
1261
1285
  const records = toolCalls.map(toolCall => ({
1262
1286
  toolCall,
@@ -1279,23 +1303,29 @@ async function executeToolCalls(
1279
1303
  const checkSteering = async (): Promise<void> => {
1280
1304
  // `signal` (external/user abort) is checked separately from the internal
1281
1305
  // steeringAbortController: once the run is externally aborted it is
1282
- // unwinding, and draining the steering queue here would strand the
1283
- // messages in the dying run instead of leaving them for the post-abort
1284
- // continue (interruptAndFlushQueuedMessages → Agent.continue()).
1285
- if (!shouldInterruptImmediately || !getSteeringMessages || interruptState.triggered || signal?.aborted) {
1306
+ // unwinding and the interrupt would be redundant.
1307
+ if (!shouldInterruptImmediately || interruptState.triggered || signal?.aborted) {
1286
1308
  return;
1287
1309
  }
1288
- const check = steeringCheckTail.then(async () => {
1310
+ // Prefer the non-consuming peek (`hasSteeringMessages`) when available.
1311
+ // Fall back to calling `getSteeringMessages` directly when only it is
1312
+ // provided (e.g. in tests or minimal integrations without a separate
1313
+ // peek function). In that case the message is consumed here rather than
1314
+ // at the outer injection boundary, but the interrupt still fires.
1315
+ let hasMessages: boolean;
1316
+ if (hasSteeringMessages) {
1317
+ hasMessages = await hasSteeringMessages();
1318
+ } else if (getSteeringMessages) {
1319
+ const msgs = await getSteeringMessages();
1320
+ hasMessages = (msgs?.length ?? 0) > 0;
1321
+ } else {
1322
+ return;
1323
+ }
1324
+ if (hasMessages) {
1289
1325
  if (interruptState.triggered || signal?.aborted) return;
1290
- const steering = await getSteeringMessages();
1291
- if (steering.length > 0) {
1292
- steeringMessages = steering;
1293
- interruptState.triggered = true;
1294
- steeringAbortController.abort();
1295
- }
1296
- });
1297
- steeringCheckTail = check.catch(() => {});
1298
- await check;
1326
+ interruptState.triggered = true;
1327
+ steeringAbortController.abort();
1328
+ }
1299
1329
  };
1300
1330
 
1301
1331
  const emitToolResult = (record: (typeof records)[number], result: AgentToolResult<any>, isError: boolean): void => {
@@ -1604,7 +1634,7 @@ async function executeToolCalls(
1604
1634
  }
1605
1635
  }
1606
1636
 
1607
- return { toolResults: emittedToolResults, steeringMessages };
1637
+ return { toolResults: emittedToolResults };
1608
1638
  }
1609
1639
 
1610
1640
  /**
package/src/agent.ts CHANGED
@@ -98,7 +98,7 @@ export interface AgentOptions {
98
98
  * Optional transform applied after provider context assembly and before
99
99
  * telemetry capture/provider send.
100
100
  */
101
- transformProviderContext?: (context: Context) => Context;
101
+ transformProviderContext?: (context: Context, model: Model) => Context;
102
102
 
103
103
  /**
104
104
  * Steering mode: "all" = send all steering messages at once, "one-at-a-time" = one per turn
@@ -285,7 +285,7 @@ export class Agent {
285
285
  #abortController?: AbortController;
286
286
  #convertToLlm: (messages: AgentMessage[]) => Message[] | Promise<Message[]>;
287
287
  #transformContext?: (messages: AgentMessage[], signal?: AbortSignal) => Promise<AgentMessage[]>;
288
- #transformProviderContext?: (context: Context) => Context;
288
+ #transformProviderContext?: (context: Context, model: Model) => Context;
289
289
  #steeringQueue: AgentMessage[] = [];
290
290
  #followUpQueue: AgentMessage[] = [];
291
291
  #steeringMode: "all" | "one-at-a-time";
@@ -1009,6 +1009,7 @@ export class Agent {
1009
1009
  }
1010
1010
  return this.#dequeueSteeringMessages();
1011
1011
  },
1012
+ hasSteeringMessages: () => this.#steeringQueue.length > 0,
1012
1013
  getFollowUpMessages: async () => this.#dequeueFollowUpMessages(),
1013
1014
  getAsideMessages: async () => (await this.#asideMessageProvider?.()) ?? [],
1014
1015
  onBeforeYield: () => this.#onBeforeYield?.(),
@@ -5,7 +5,7 @@
5
5
  * a summary of the branch being left so context isn't lost.
6
6
  */
7
7
 
8
- import type { Model } from "@oh-my-pi/pi-ai";
8
+ import type { ApiKey, Model } from "@oh-my-pi/pi-ai";
9
9
  import { prompt } from "@oh-my-pi/pi-utils";
10
10
  import { type AgentTelemetry, instrumentedCompleteSimple } from "../telemetry";
11
11
  import type { AgentMessage } from "../types";
@@ -71,7 +71,7 @@ export interface GenerateBranchSummaryOptions {
71
71
  /** Model to use for summarization */
72
72
  model: Model;
73
73
  /** API key for the model */
74
- apiKey: string;
74
+ apiKey: ApiKey;
75
75
  /** Abort signal for cancellation */
76
76
  signal: AbortSignal;
77
77
  /** Optional custom instructions for summarization */
@@ -6,19 +6,22 @@
6
6
  */
7
7
 
8
8
  import {
9
+ type ApiKey,
9
10
  type AssistantMessage,
10
11
  Effort,
11
12
  type FetchImpl,
12
13
  type Message,
13
14
  type MessageAttribution,
14
15
  type Model,
16
+ ProviderHttpError,
15
17
  type Tool,
16
18
  type Usage,
19
+ withAuth,
17
20
  } from "@oh-my-pi/pi-ai";
18
21
  import { clampThinkingLevelForModel } from "@oh-my-pi/pi-catalog/model-thinking";
19
22
  import { countTokens } from "@oh-my-pi/pi-natives";
20
23
  import { logger, prompt } from "@oh-my-pi/pi-utils";
21
- import { SNAPCOMPACT_FRAME_TOKEN_ESTIMATE } from "@oh-my-pi/snapcompact";
24
+ import * as snapcompact from "@oh-my-pi/snapcompact";
22
25
  import { type AgentTelemetry, instrumentedCompleteSimple } from "../telemetry";
23
26
  import { ThinkingLevel } from "../thinking";
24
27
  import type { AgentMessage } from "../types";
@@ -324,7 +327,7 @@ export function estimateTokens(message: AgentMessage): number {
324
327
  fragments.push(message.summary);
325
328
  if (message.role === "compactionSummary" && message.images) {
326
329
  // Snapcompact frames render at ≥1568px; providers bill the downscaled cap.
327
- extra += message.images.length * SNAPCOMPACT_FRAME_TOKEN_ESTIMATE;
330
+ extra += message.images.length * snapcompact.FRAME_TOKEN_ESTIMATE;
328
331
  }
329
332
  break;
330
333
  }
@@ -579,11 +582,8 @@ function resolveCompactionEffort(model: Model, level: ThinkingLevel | undefined)
579
582
  * message-based check is still required upstream — see issue #986.
580
583
  */
581
584
  function createSummarizationError(prefix: string, response: AssistantMessage): Error {
582
- const error: Error & { status?: number } = new Error(`${prefix}: ${response.errorMessage || "Unknown error"}`);
583
- if (response.errorStatus !== undefined) {
584
- error.status = response.errorStatus;
585
- }
586
- return error;
585
+ const text = `${prefix}: ${response.errorMessage || "Unknown error"}`;
586
+ return response.errorStatus === undefined ? new Error(text) : new ProviderHttpError(text, response.errorStatus);
587
587
  }
588
588
 
589
589
  /**
@@ -622,7 +622,7 @@ export async function generateSummary(
622
622
  currentMessages: AgentMessage[],
623
623
  model: Model,
624
624
  reserveTokens: number,
625
- apiKey: string,
625
+ apiKey: ApiKey,
626
626
  signal?: AbortSignal,
627
627
  customInstructions?: string,
628
628
  previousSummary?: string,
@@ -736,7 +736,7 @@ export function renderHandoffPrompt(customInstructions?: string): string {
736
736
  export async function generateHandoff(
737
737
  messages: AgentMessage[],
738
738
  model: Model,
739
- apiKey: string,
739
+ apiKey: ApiKey,
740
740
  options: HandoffOptions,
741
741
  signal?: AbortSignal,
742
742
  ): Promise<string> {
@@ -784,7 +784,7 @@ async function generateShortSummary(
784
784
  historySummary: string | undefined,
785
785
  model: Model,
786
786
  reserveTokens: number,
787
- apiKey: string,
787
+ apiKey: ApiKey,
788
788
  signal?: AbortSignal,
789
789
  options?: SummaryOptions,
790
790
  ): Promise<string> {
@@ -981,7 +981,7 @@ const TURN_PREFIX_SUMMARIZATION_PROMPT = prompt.render(compactionTurnPrefixPromp
981
981
  export async function compact(
982
982
  preparation: CompactionPreparation,
983
983
  model: Model,
984
- apiKey: string,
984
+ apiKey: ApiKey,
985
985
  customInstructions?: string,
986
986
  signal?: AbortSignal,
987
987
  options?: SummaryOptions,
@@ -1032,13 +1032,18 @@ export async function compact(
1032
1032
  );
1033
1033
  if (remoteHistory.length > 0) {
1034
1034
  try {
1035
- const remote = await requestOpenAiRemoteCompaction(
1036
- model,
1035
+ const remote = await withAuth(
1037
1036
  apiKey,
1038
- remoteHistory,
1039
- summaryOptions.remoteInstructions ?? SUMMARIZATION_SYSTEM_PROMPT,
1040
- signal,
1041
- { fetch: summaryOptions.fetch },
1037
+ key =>
1038
+ requestOpenAiRemoteCompaction(
1039
+ model,
1040
+ key,
1041
+ remoteHistory,
1042
+ summaryOptions.remoteInstructions ?? SUMMARIZATION_SYSTEM_PROMPT,
1043
+ signal,
1044
+ { fetch: summaryOptions.fetch },
1045
+ ),
1046
+ { signal },
1042
1047
  );
1043
1048
  preserveData = withOpenAiRemoteCompactionPreserveData(previousPreserveData, remote);
1044
1049
  } catch (err) {
@@ -1137,7 +1142,7 @@ async function generateTurnPrefixSummary(
1137
1142
  messages: AgentMessage[],
1138
1143
  model: Model,
1139
1144
  reserveTokens: number,
1140
- apiKey: string,
1145
+ apiKey: ApiKey,
1141
1146
  signal?: AbortSignal,
1142
1147
  options?: SummaryOptions,
1143
1148
  ): Promise<string> {
@@ -12,6 +12,7 @@
12
12
  * with `{ summary, shortSummary? }`.
13
13
  */
14
14
 
15
+ import { ProviderHttpError } from "@oh-my-pi/pi-ai/errors";
15
16
  import { parseTextSignature } from "@oh-my-pi/pi-ai/providers/openai-responses-shared";
16
17
  import { transformMessages } from "@oh-my-pi/pi-ai/providers/transform-messages";
17
18
  import type { AssistantMessage, FetchImpl, Message, Model } from "@oh-my-pi/pi-ai/types";
@@ -467,7 +468,13 @@ export async function requestOpenAiRemoteCompaction(
467
468
  statusText: response.statusText,
468
469
  errorText,
469
470
  });
470
- throw new Error(`Remote compaction failed (${response.status} ${response.statusText})`);
471
+ throw new ProviderHttpError(
472
+ `Remote compaction failed (${response.status} ${response.statusText})`,
473
+ response.status,
474
+ {
475
+ headers: response.headers,
476
+ },
477
+ );
471
478
  }
472
479
 
473
480
  const data = (await response.json()) as { output?: unknown[] } | undefined;
@@ -519,7 +526,13 @@ export async function requestRemoteCompaction(
519
526
  statusText: response.statusText,
520
527
  errorText,
521
528
  });
522
- throw new Error(`Remote compaction failed (${response.status} ${response.statusText})`);
529
+ throw new ProviderHttpError(
530
+ `Remote compaction failed (${response.status} ${response.statusText})`,
531
+ response.status,
532
+ {
533
+ headers: response.headers,
534
+ },
535
+ );
523
536
  }
524
537
 
525
538
  const data = (await response.json()) as RemoteCompactionResponse | undefined;
package/src/types.ts CHANGED
@@ -113,7 +113,7 @@ export interface AgentLoopConfig extends SimpleStreamOptions {
113
113
  * normalization, and append-only context handling, but before telemetry capture
114
114
  * and provider send.
115
115
  */
116
- transformProviderContext?: (context: Context) => Context;
116
+ transformProviderContext?: (context: Context, model: Model) => Context;
117
117
 
118
118
  /**
119
119
  * Resolves an API key dynamically for each LLM call.
@@ -126,12 +126,28 @@ export interface AgentLoopConfig extends SimpleStreamOptions {
126
126
  /**
127
127
  * Returns steering messages to inject into the conversation mid-run.
128
128
  *
129
- * Called after each tool execution to check for user interruptions unless interruptMode is "wait".
130
- * If messages are returned, remaining tool calls are skipped and
131
- * these messages are added to the context before the next LLM call.
129
+ * Called at injection boundaries only (loop start and after a tool batch
130
+ * fully settles), so dequeued messages are immediately injected. The
131
+ * mid-batch interrupt poll uses {@link hasSteeringMessages} instead and
132
+ * never consumes the queue.
132
133
  */
133
134
  getSteeringMessages?: () => Promise<AgentMessage[]>;
134
135
 
136
+ /**
137
+ * Peeks whether steering messages are queued, without consuming them.
138
+ *
139
+ * Called after each tool execution (unless interruptMode is "wait") to decide
140
+ * whether to skip the remaining tool calls in the batch. The queue keeps
141
+ * owning its messages until the loop reaches the next injection boundary and
142
+ * dequeues via {@link getSteeringMessages} — so callers can still cancel or
143
+ * restore queued messages while in-flight tools settle, and an external
144
+ * abort in that window leaves the queue intact for a post-abort continue.
145
+ *
146
+ * When omitted, steering never interrupts a running tool batch; queued
147
+ * messages are still delivered at the next injection boundary.
148
+ */
149
+ hasSteeringMessages?: () => boolean | Promise<boolean>;
150
+
135
151
  /**
136
152
  * Returns follow-up messages to process after the agent would otherwise stop.
137
153
  *