@oh-my-pi/pi-agent-core 15.12.3 → 15.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,106 +2,68 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
- ## [15.12.1] - 2026-06-12
6
5
  ### Breaking Changes
7
6
 
8
7
  - Changed `pruneSupersededToolResults` to allow `supersedeKey` to be omitted so useless-result pruning can run without read-style supersede grouping
8
+ - Removed `compaction/index.ts` re-export of snapcompact helpers, so snapcompact utilities are no longer available from the agent compaction barrel and should be imported from `@oh-my-pi/snapcompact`
9
+ - Removed the `convertToLlm` alias export from `compaction/messages` — it duplicated `defaultConvertToLlm` under a second name. Import `defaultConvertToLlm` (array form) or the new `convertMessageToLlm` (single-message form) instead
9
10
 
10
11
  ### Added
11
12
 
13
+ - Added repetition-loop detection to the streaming agent loop for Gemini-family providers. A runaway run of a repeated text or thinking unit is detected mid-stream from a bounded rolling tail (O(1) per delta), the provider request is aborted, the repeated tail is collapsed to a single representative copy, and the turn ends gracefully with an `error` stop reason. Legitimate all-numeric/whitespace/punctuation runs (hexdumps, zero-fills, numeric tables) are not misclassified as loops ([#2549](https://github.com/can1357/oh-my-pi/pull/2549) by [@usr-bin-roygbiv](https://github.com/usr-bin-roygbiv)).
12
14
  - Added `pruneUseless` controls to `PruneConfig` and `SupersedePruneConfig` so callers can toggle compaction of `toolResult` entries marked `useless`
13
15
  - Added the ability to disable useless-result pruning by setting `pruneUseless` to false
14
16
  - Tools can flag a result contextually useless (`AgentToolResult.useless`; overridable via `AfterToolCallResult.useless`): the agent loop copies the flag onto the persisted `ToolResultMessage` (errors always win), and compaction consumes it — the cache-aware supersede pass and the threshold prune blank flagged results to the exact `USELESS_NOTICE` placeholder (bypassing the protect window, skipping results smaller than the notice), shake collects them inside the protect-recent window, and `serializeConversation` drops the whole tool call/result pair from summarizer input
15
-
16
- ### Changed
17
-
18
- - Changed `pruneSupersededToolResults` to allow omitted `supersedeKey` when `pruneUseless` is enabled, so useless-result pruning can run without read-style supersede grouping
19
-
20
- ## [15.11.4] - 2026-06-12
21
- ### Added
22
-
23
17
  - Added `hasSteeringMessages` to `AgentLoopConfig` (wired by `Agent` to its steering queue): a peek used by the immediate-interrupt poll during tool execution, so the loop can detect queued steering without dequeuing and the queue keeps owning its messages until the injection boundary
24
18
  - The agent loop now re-samples after a non-terminal stop (`stopReason: "stop"` with `stopDetails: { type: "pause_turn" }`, emitted by the Codex providers for `end_turn: false` commentary-only responses): the assistant message is committed to history and the model is called again without ending the turn. Consecutive pause continuations without an intervening tool call are capped at 8 to bound a backend that never stops pausing.
25
-
26
- ### Changed
27
-
28
- - Changed steering handling so queued steering messages are now dequeued only at injection boundaries, with immediate mid-batch interrupt polling using `hasSteeringMessages`. Consumers constructing `AgentLoopConfig` directly with only `getSteeringMessages` no longer get mid-batch interrupts — steering degrades to boundary-only delivery until they also supply `hasSteeringMessages`
29
- - Compaction, handoff, short-summary, and branch-summarization helpers now accept an `ApiKey` (static string or resolver) instead of a pre-resolved string, so a 401 mid-compaction force-refreshes and rotates the credential through the central auth-retry policy before any model-level fallback. The remote OpenAI compaction request is wrapped in `withAuth` and its HTTP failures now carry `.status`, so the retry classifier actually fires on remote-compaction 401s.
30
- - `transformProviderContext` now receives the dispatch model as a second argument (`(context, model) => Context`), so per-request transforms can gate on model capabilities (vision input, provider, API family). Existing single-argument implementations keep working unchanged.
31
- - Remote-compaction and summarization failures now throw pi-ai's typed `ProviderHttpError` instead of mutating plain `Error`s with a `.status` property; the generic `requestRemoteCompaction` error now carries `.status` (and response headers) too.
32
-
33
- ### Fixed
34
-
35
- - Fixed a regression where steering messages could be injected into history during an aborted in-flight tool batch, leaving them hidden from queue consumers for post-abort continue
36
-
37
- ## [15.11.2] - 2026-06-11
38
-
39
- ### Added
40
-
41
19
  - `AgentTool.concurrency` now also accepts a per-call resolver function `(args) => "shared" | "exclusive"`, letting tools pick the scheduling mode from the call's arguments (a throwing resolver falls back to `"exclusive"`)
42
-
43
- ### Fixed
44
-
45
- - Fixed whitespace-only error tool results so Anthropic requests no longer 400 with `tool_result: content cannot be empty if is_error is true` and wedge the session on every subsequent turn
46
- ## [15.11.0] - 2026-06-10
47
- ### Breaking Changes
48
-
49
- - Removed `compaction/index.ts` re-export of snapcompact helpers, so snapcompact utilities are no longer available from the agent compaction barrel and should be imported from `@oh-my-pi/snapcompact`
50
- - Removed the `convertToLlm` alias export from `compaction/messages` — it duplicated `defaultConvertToLlm` under a second name. Import `defaultConvertToLlm` (array form) or the new `convertMessageToLlm` (single-message form) instead
51
-
52
- ### Added
53
-
54
20
  - Added `convertMessageToLlm()`: the single-message core transformer behind `defaultConvertToLlm()`. Embedders with app-specific message roles should handle their own roles and delegate every core role (`user`/`developer`/`assistant`/`toolResult`/`custom`/`hookMessage`/`branchSummary`/`compactionSummary`) to it instead of duplicating the conversion — a duplicated `compactionSummary` case is how snapcompact frames once silently dropped off provider requests
55
21
  - Added `pruneSupersededToolResults()` and the opt-in `PruneConfig.supersedeKey` hook so harnesses can prune stale tool results superseded by a newer read of the same file; superseded results are pruned ahead of age-based victims during overflow pruning and replaced with a `[Superseded by a newer read of this file]` placeholder. Without the new config, `pruneToolOutputs()` behavior is unchanged.
56
22
  - Added `readToolSupersedeKey()` implementing the read-tool path/selector grammar (selector-free reads supersede range reads of the same file; URL-scheme paths exempt). Pruning honors prompt-cache economics: per-turn prunes only fire when the post-candidate suffix is small or the cache is cold (idle gap).
57
23
  - Added the `snapcompact` compaction strategy via `@oh-my-pi/snapcompact`: instead of an LLM summary, discarded history is printed onto dense bitmap frames and re-attached to the compaction summary message as image blocks. `CompactionSummaryMessage` gains an optional `images` field, `estimateTokens()` charges per attached frame, and frames persist under `preserveData.snapcompact` with an 8-frame middle-out eviction budget.
58
24
  - Snapcompact frames are now rendered in a provider-aware shape (`SNAPCOMPACT_SHAPES` + `resolveSnapcompactShape(api)`), following the snapcompact 200k-token monolithic evals: Anthropic-family and unknown APIs get `8x8r-bw` (unscii-8 square cells, black ink, every line printed twice with the copy on a pale highlight band — read at F1 parity with raw text at ~2x lower cost and the most refusal-robust), Google gets `8x8r-sent` (sentence-hue ink, ~2.9x cheaper), and OpenAI gets `6x6u-sent` (unscii Lanczos-stretched to 6x6 cells — OpenAI bills a flat ~2.9k tokens per image, so frame count is the only cost lever) with `detail: "original"` on the frame images. `snapcompactCompact()` accepts `model`/`shape` options, frames persist their shape metadata, mixed-shape archives (provider switches, legacy 5x8 frames) are flagged in the reading instructions, and `snapcompactGeometry()`/`renderSnapcompactFrame()` now take a shape
59
-
60
- ### Changed
61
-
62
- - Compaction and branch-summary file lists are now a single `<files>` tag instead of `<read-files>`/`<modified-files>`: paths render as the grouped, prefix-folded directory tree the find/search tools emit (`# dir/` headers, bare basenames), each annotated `(Read)`, `(Write)`, or `(RW)` — modified files that were also read get `(RW)`. Legacy tags in summaries written by earlier versions are still stripped and self-heal on the next compaction
63
-
64
- ### Fixed
65
-
66
- - Fixed queued steering messages being drained into an externally aborted run: interrupting mid-tool execution (e.g. Enter with a pending steer) dequeued the steer into the dying run — it landed in history without a response and the post-abort resume saw an empty queue, so the agent stopped instead of continuing. Steering/follow-up/aside queue polls are now skipped once the run's abort signal fires, leaving the queue intact for `Agent.continue()`.
67
- - Fixed `<read-files>` compaction lists recording the same file once per line-range/raw selector (`src/foo.ts:50-200`, `:raw`, `:1-50:raw`, …): read-tool selectors are now stripped before tracking, so reads dedupe to the base path and match their write/edit path when splitting read-only vs modified lists. Selector-polluted lists stored by earlier compactions self-heal on the next compaction. `readToolSupersedeKey()` now shares the same splitter (`splitReadSelector()`), gaining the `..` range alias and `L`-prefix forms it previously missed.
68
- - Fixed `estimateTokens()` undercounting thinking-heavy assistant messages on replay: `thinkingSignature` payloads (OpenAI Responses encrypted reasoning items, Anthropic signed thinking blocks, etc.) and `redactedThinking.data` are now charged alongside the visible thinking text, so the local estimate tracks provider-reported usage instead of straddling the threshold on every turn ([#2275](https://github.com/can1357/oh-my-pi/issues/2275)).
69
-
70
- ## [15.10.12] - 2026-06-10
71
-
72
- ### Added
73
-
74
25
  - Added `AgentLoopConfig.getDisableReasoning` so callers can override `disableReasoning` per LLM call, mirroring `getReasoning`.
75
26
  - Added `transformProviderContext` to `AgentOptions`/`AgentLoopConfig`: an optional hook applied to the assembled provider context after conversion, normalization, and append-only handling, but before telemetry capture and provider send.
76
-
77
- ### Fixed
78
-
79
- - Fixed `Agent` runs so explicit reasoning disablement is forwarded to provider stream options and re-resolved per continuation, keeping mid-run thinking-off changes in sync with the next provider request.
80
-
81
- ## [15.10.11] - 2026-06-10
82
-
83
- ### Changed
84
-
85
- - Editorial pass over the compaction prompts: fixed garbled grammar and missing articles, RFC-keyed prohibitions, deduped restated instructions; parsed markers (`<read-files>`/`<modified-files>`/`<previous-summary>`) and all output-format headings left byte-identical
86
- - Catalog imports moved to the new `@oh-my-pi/pi-catalog` package: subpath imports (`calculateCost`, Codex wire constants) plus catalog values previously taken from the `@oh-my-pi/pi-ai` root (`getBundledModel`, `clampThinkingLevelForModel`), which pi-ai no longer re-exports; type-only `Model`/`Api`/`Effort` imports from pi-ai are unchanged
87
-
88
- ## [15.10.8] - 2026-06-09
89
-
90
- ### Added
91
-
92
27
  - Added optional `fetch` overrides to `SummaryOptions` and `compact`/`generateSummary` so remote compaction can use custom HTTP clients
93
28
  - Added optional `fetch` option to `ProxyStreamOptions` to control the HTTP request used by `streamProxy`
94
29
  - Added optional `fetch` overrides to `requestOpenAiRemoteCompaction` and `requestRemoteCompaction` for injectable HTTP transport
95
30
  - Added the upstream provider that served a request (`AssistantMessage.upstreamProvider`, e.g. OpenRouter's routed provider) as a `pi.gen_ai.response.upstream_provider` chat-span telemetry attribute, alongside the existing response id and time-to-first-chunk.
31
+ - Added a non-interrupting "aside" message channel to the agent loop (`AgentLoopConfig.getAsideMessages` / `Agent.setAsideMessageProvider`). Asides are drained at each step boundary (after a tool batch, before the next model call) and at the yield check, so passive notifications (e.g. background-job completions, late LSP diagnostics) reach the model *between requests* without waiting for the agent to stop and without aborting in-flight tools the way steering does.
32
+ - Added optional `promptCacheKey` support to `AgentOptions` and `Agent` via a new `promptCacheKey` property so providers can receive a caller-provided prompt cache key
33
+ - Added optional `ApiKeyResolveContext` parameter to `getApiKey` in `AgentOptions` and `AgentLoopConfig` so key resolvers can receive retry context
34
+ - Added `getReadToolPath(context)` to `@oh-my-pi/pi-agent-core/compaction/tool-protection` to extract a paired `read` tool call's `path` for embedders building read-targeted protection matchers
35
+ - Added `getReadToolPath(context)` to `@oh-my-pi/pi-agent-core/compaction/tool-protection`: the shared primitive that extracts a paired `read` tool call's `path` argument, so embedders can build their own read-targeted compaction protection matchers (e.g. plan-file reads) the same way `isSkillReadToolResult` does.
36
+ - Added optional `AgentTool.matcherDigest(args)` hook: tools whose streamed arguments encode content in a wire grammar (patch formats, escaped strings) can expose the real content they introduce, so stream-content matchers (e.g. TTSR rules) run against plain source text instead of the wire format.
37
+ - Added `shake` compaction primitives (`collectShakeRegions`, `applyShakeRegion`, `applyShakeRegions`, `summarizeShakeRegions`, `DEFAULT_SHAKE_CONFIG`, `AGGRESSIVE_SHAKE_CONFIG`, plus the `ShakeRegion`/`ShakeConfig`/`ShakeSummaryItem`/`ShakeSummaryComplete`/`ProtectedToolMatcher` types) under `@oh-my-pi/pi-agent-core/compaction`. These detect heavy context regions — whole tool-call results plus large fenced/XML blocks — and either elide them with placeholders or extractively compress them through an injected completion backend (no LLM summary cut-point). The compressor is provider-agnostic: callers wire it to a local on-device model. Pure detection/mutation; no I/O.
96
38
 
97
- ## [15.10.5] - 2026-06-08
98
-
99
- ### Removed
39
+ ### Changed
100
40
 
101
- - Removed the `maxToolCallsPerTurn` option from `AgentOptions` and `AgentLoopConfig`, so assistant turns are no longer capped after a configured number of completed tool calls
41
+ - Changed `pruneSupersededToolResults` to allow omitted `supersedeKey` when `pruneUseless` is enabled, so useless-result pruning can run without read-style supersede grouping
42
+ - Changed steering handling so queued steering messages are now dequeued only at injection boundaries, with immediate mid-batch interrupt polling using `hasSteeringMessages`. Consumers constructing `AgentLoopConfig` directly with only `getSteeringMessages` no longer get mid-batch interrupts — steering degrades to boundary-only delivery until they also supply `hasSteeringMessages`
43
+ - Compaction, handoff, short-summary, and branch-summarization helpers now accept an `ApiKey` (static string or resolver) instead of a pre-resolved string, so a 401 mid-compaction force-refreshes and rotates the credential through the central auth-retry policy before any model-level fallback. The remote OpenAI compaction request is wrapped in `withAuth` and its HTTP failures now carry `.status`, so the retry classifier actually fires on remote-compaction 401s.
44
+ - `transformProviderContext` now receives the dispatch model as a second argument (`(context, model) => Context`), so per-request transforms can gate on model capabilities (vision input, provider, API family). Existing single-argument implementations keep working unchanged.
45
+ - Remote-compaction and summarization failures now throw pi-ai's typed `ProviderHttpError` instead of mutating plain `Error`s with a `.status` property; the generic `requestRemoteCompaction` error now carries `.status` (and response headers) too.
46
+ - Compaction and branch-summary file lists are now a single `<files>` tag instead of `<read-files>`/`<modified-files>`: paths render as the grouped, prefix-folded directory tree the find/search tools emit (`# dir/` headers, bare basenames), each annotated `(Read)`, `(Write)`, or `(RW)` — modified files that were also read get `(RW)`. Legacy tags in summaries written by earlier versions are still stripped and self-heal on the next compaction
47
+ - Editorial pass over the compaction prompts: fixed garbled grammar and missing articles, RFC-keyed prohibitions, deduped restated instructions; parsed markers (`<read-files>`/`<modified-files>`/`<previous-summary>`) and all output-format headings left byte-identical
48
+ - Catalog imports moved to the new `@oh-my-pi/pi-catalog` package: subpath imports (`calculateCost`, Codex wire constants) plus catalog values previously taken from the `@oh-my-pi/pi-ai` root (`getBundledModel`, `clampThinkingLevelForModel`), which pi-ai no longer re-exports; type-only `Model`/`Api`/`Effort` imports from pi-ai are unchanged
49
+ - Changed core custom and hook messages to convert to `developer` messages for provider context.
50
+ - Enabled streaming API calls to re-resolve credentials through the `getApiKey` callback when retries occur after authentication-related errors
51
+ - `Agent.abort(reason?)` now forwards `reason` to the underlying `AbortController`, and the synthesized aborted assistant message carries that reason on `errorMessage` (string or non-`AbortError` `Error` message) instead of always defaulting to `"Request was aborted"`. Bare `abort()` is unchanged.
52
+ - Changed `Agent.appendMessage`, `popMessage`, `clearMessages`, and `reset` to mutate `state.messages` and `state.pendingToolCalls` in place instead of allocating a fresh array/Set on every transition. Subscribers that capture `state.messages` by reference now observe updates without needing to re-read `state` after each event. The public type signature is unchanged (always `AgentMessage[]` / `Set<string>`).
102
53
 
103
54
  ### Fixed
104
55
 
56
+ - Fixed repetition loop handling to collapse repeated `thinking` blocks to a single representative copy when a loop is detected
57
+ - Fixed repetition-loop detection to ignore repeats that contain only digits, whitespace, or punctuation so legitimate numeric outputs no longer stop with a repetition-loop error
58
+ - Fixed false-positive repetition-loop checks across `text` and `thinking` stream boundaries by tracking loop detection per block type
59
+ - Fixed dynamic forced tool choices from queue hooks being filtered against the active per-turn tool set before provider dispatch. ([#1701](https://github.com/can1357/oh-my-pi/issues/1701))
60
+ - Fixed remote compaction input trimming to use unlimited context when `model.contextWindow` is unset
61
+ - Fixed a regression where steering messages could be injected into history during an aborted in-flight tool batch, leaving them hidden from queue consumers for post-abort continue
62
+ - Fixed whitespace-only error tool results so Anthropic requests no longer 400 with `tool_result: content cannot be empty if is_error is true` and wedge the session on every subsequent turn
63
+ - Fixed queued steering messages being drained into an externally aborted run: interrupting mid-tool execution (e.g. Enter with a pending steer) dequeued the steer into the dying run — it landed in history without a response and the post-abort resume saw an empty queue, so the agent stopped instead of continuing. Steering/follow-up/aside queue polls are now skipped once the run's abort signal fires, leaving the queue intact for `Agent.continue()`.
64
+ - Fixed `<read-files>` compaction lists recording the same file once per line-range/raw selector (`src/foo.ts:50-200`, `:raw`, `:1-50:raw`, …): read-tool selectors are now stripped before tracking, so reads dedupe to the base path and match their write/edit path when splitting read-only vs modified lists. Selector-polluted lists stored by earlier compactions self-heal on the next compaction. `readToolSupersedeKey()` now shares the same splitter (`splitReadSelector()`), gaining the `..` range alias and `L`-prefix forms it previously missed.
65
+ - Fixed `estimateTokens()` undercounting thinking-heavy assistant messages on replay: `thinkingSignature` payloads (OpenAI Responses encrypted reasoning items, Anthropic signed thinking blocks, etc.) and `redactedThinking.data` are now charged alongside the visible thinking text, so the local estimate tracks provider-reported usage instead of straddling the threshold on every turn ([#2275](https://github.com/can1357/oh-my-pi/issues/2275)).
66
+ - Fixed `Agent` runs so explicit reasoning disablement is forwarded to provider stream options and re-resolved per continuation, keeping mid-run thinking-off changes in sync with the next provider request.
105
67
  - Fixed stalled aborted assistant responses so the run now stops without waiting for provider iterator cleanup and returns the aborted message promptly
106
68
  - Fixed `afterToolCall` handling so it now runs for completed tool executions even after a run is aborted so tool post-processing still applies
107
69
  - Fixed `agentLoopDetailed().detailed()` so run telemetry and coverage are captured before `stream.result()` resolves.
@@ -112,96 +74,64 @@
112
74
  - Fixed tool-call completion so assistant messages on abort keep only completed tool-call blocks and continue processing tool calls when a length stop still included results
113
75
  - Fixed deliberate aborts (TTSR rule matches, user-interrupt labels) so a mid-stream tool-call block that never reached `toolcall_end` is retained on the aborted assistant message and paired with a placeholder result labeled by the abort reason, instead of being dropped; anonymous aborts (bare `abort()`) still drop incomplete tool calls whose partial arguments are unsafe to replay
114
76
  - Fixed runs that stopped with reason `length` after returning tool results so execution continues to handle additional tool calls
115
-
116
- ## [15.10.3] - 2026-06-08
117
-
118
- ### Added
119
-
120
- - Added a non-interrupting "aside" message channel to the agent loop (`AgentLoopConfig.getAsideMessages` / `Agent.setAsideMessageProvider`). Asides are drained at each step boundary (after a tool batch, before the next model call) and at the yield check, so passive notifications (e.g. background-job completions, late LSP diagnostics) reach the model *between requests* without waiting for the agent to stop and without aborting in-flight tools the way steering does.
121
-
122
- ### Changed
123
-
124
- - Changed core custom and hook messages to convert to `developer` messages for provider context.
125
-
126
- ### Fixed
127
-
128
77
  - Fixed the compaction spinner freezing (only repainting on a terminal resize) when compacting very large codex/OpenAI contexts. `buildOpenAiNativeHistory` re-collected the full known/custom tool-call id sets on every history-bearing message, rescanning the entire growing native history each time — O(N²) in history items — which blocked the event loop for seconds and starved the loader's animation timer and render scheduler. The sets are now maintained incrementally (linear), so building the compaction request no longer monopolizes the main thread.
78
+ - Fixed proxy stream silently returning a zero-token success response when the server disconnects without sending a `done` or `error` terminal SSE event. The stream now throws an error, surfacing the disconnect as an `error` event with `stopReason: "error"` and resolving `finalResultPromise`, instead of defaulting to `stopReason: "stop"` with empty content and leaving `stream.result()` callers hanging indefinitely.
79
+ - Fixed handling of short-lived API keys so that expired tokens are retried with a refreshed value during 401/usage-limit failures
80
+ - Ensured fallback API key resolution uses the initially configured static `apiKey` when `getApiKey` is present
81
+ - Wrapped oneshot LLM completions (`instrumentedCompleteSimple`: handoff, compaction/branch summaries) in an `EventLoopKeepalive`. These run outside the agent `#runLoop`, so without the keepalive Bun's event loop stopped servicing timers while parked on the completion promise — freezing host spinners (e.g. the `/handoff` loader) until an unrelated terminal resize poked the loop into rendering again.
82
+ - Surfaced Anthropic stream failures whose message starts with `Output blocked by conten` as normal assistant error lifecycle events, so interactive clients render content-filter blocks instead of silently dropping the streaming bubble at `agent_end`.
83
+ - Fixed the agent loop wedging the model when a `write`/`edit` tool call is truncated by `stop_reason: length` (e.g. an OpenCode Zen / Claude-3.5-Haiku turn that emits >~1000 lines of code, blowing past the 8K `max_tokens` output cap). The skipped tool result now surfaces an actionable hint — naming `stop_reason: length` and telling the model to split the payload into multiple smaller calls — instead of the generic "Tool call was not executed because the assistant ended its turn" placeholder, which left the auto-continue loop re-emitting the same oversized payload until the user gave up. Tools are still NOT executed when the arguments are truncated. ([#1785](https://github.com/can1357/oh-my-pi/issues/1785))
84
+ - Engaged GPT-5 Harmony leak detection on the committed assistant message (openai-codex only). `detectHarmonyLeakInAssistantMessage` now runs on the streamed `done`/`error` result and the trailing fallback, so a leaked final response is aborted-and-retried by the existing mitigation instead of being committed as-is. Tool-argument (`tool_arg`) scanning is gated on the trailing-garbage `T` co-signal and only fires when a caller supplies a parse boundary via `detectHarmonyLeakInAssistantMessage`'s new optional `toolArgParseEnd` resolver. The agent loop passes none — it cannot bound a streamed tool DSL — so that surface stays inert and a legitimate codex tool call whose content legitimately carries `to=functions.*` next to a channel word or non-Latin script (e.g. editing the harmony fixtures) is never hard-aborted.
85
+ - Fixed tool-output pruning and shake protection for `read`: ordinary file/URL reads are now eligible for compaction, while `read` calls whose `path` starts with `skill://` remain protected like native `skill` results.
129
86
 
130
87
  ### Removed
131
88
 
89
+ - Removed the `maxToolCallsPerTurn` option from `AgentOptions` and `AgentLoopConfig`, so assistant turns are no longer capped after a configured number of completed tool calls
132
90
  - Removed the now-dead `<turn-aborted>` marker from the OpenAI compaction output user-message filter, since `transformMessages` no longer emits that note.
133
91
  - Removed stale synthetic user-message tag filters from OpenAI remote compaction output preservation; developer messages are now dropped by role instead.
134
92
  - Tool executions now receive the active turn `AbortSignal` unconditionally.
93
+ - Removed the local-model `summarizeShakeRegions` compressor and related shake-summary prompt/types; shake now only provides mechanical artifact-backed elision primitives.
135
94
 
136
- ## [15.10.2] - 2026-06-08
95
+ ## [15.13.0] - 2026-06-14
137
96
 
138
- ### Fixed
97
+ ## [15.12.6] - 2026-06-14
139
98
 
140
- - Fixed proxy stream silently returning a zero-token success response when the server disconnects without sending a `done` or `error` terminal SSE event. The stream now throws an error, surfacing the disconnect as an `error` event with `stopReason: "error"` and resolving `finalResultPromise`, instead of defaulting to `stopReason: "stop"` with empty content and leaving `stream.result()` callers hanging indefinitely.
99
+ ## [15.12.4] - 2026-06-13
141
100
 
142
- ## [15.10.1] - 2026-06-07
101
+ ## [15.12.1] - 2026-06-12
143
102
 
144
- ### Added
103
+ ## [15.11.4] - 2026-06-12
145
104
 
146
- - Added optional `promptCacheKey` support to `AgentOptions` and `Agent` via a new `promptCacheKey` property so providers can receive a caller-provided prompt cache key
147
- - Added optional `ApiKeyResolveContext` parameter to `getApiKey` in `AgentOptions` and `AgentLoopConfig` so key resolvers can receive retry context
105
+ ## [15.11.2] - 2026-06-11
148
106
 
149
- ### Changed
107
+ ## [15.11.0] - 2026-06-10
150
108
 
151
- - Enabled streaming API calls to re-resolve credentials through the `getApiKey` callback when retries occur after authentication-related errors
152
- - `Agent.abort(reason?)` now forwards `reason` to the underlying `AbortController`, and the synthesized aborted assistant message carries that reason on `errorMessage` (string or non-`AbortError` `Error` message) instead of always defaulting to `"Request was aborted"`. Bare `abort()` is unchanged.
109
+ ## [15.10.12] - 2026-06-10
153
110
 
154
- ### Fixed
111
+ ## [15.10.11] - 2026-06-10
155
112
 
156
- - Fixed handling of short-lived API keys so that expired tokens are retried with a refreshed value during 401/usage-limit failures
157
- - Ensured fallback API key resolution uses the initially configured static `apiKey` when `getApiKey` is present
158
- - Wrapped oneshot LLM completions (`instrumentedCompleteSimple`: handoff, compaction/branch summaries) in an `EventLoopKeepalive`. These run outside the agent `#runLoop`, so without the keepalive Bun's event loop stopped servicing timers while parked on the completion promise — freezing host spinners (e.g. the `/handoff` loader) until an unrelated terminal resize poked the loop into rendering again.
113
+ ## [15.10.8] - 2026-06-09
159
114
 
160
- ## [15.9.5] - 2026-06-05
115
+ ## [15.10.5] - 2026-06-08
161
116
 
162
- ### Fixed
117
+ ## [15.10.3] - 2026-06-08
163
118
 
164
- - Surfaced Anthropic stream failures whose message starts with `Output blocked by conten` as normal assistant error lifecycle events, so interactive clients render content-filter blocks instead of silently dropping the streaming bubble at `agent_end`.
119
+ ## [15.10.2] - 2026-06-08
165
120
 
166
- ## [15.8.3] - 2026-06-03
121
+ ## [15.10.1] - 2026-06-07
167
122
 
168
- ### Added
123
+ ## [15.9.5] - 2026-06-05
169
124
 
170
- - Added `getReadToolPath(context)` to `@oh-my-pi/pi-agent-core/compaction/tool-protection` to extract a paired `read` tool call's `path` for embedders building read-targeted protection matchers
171
- - Added `getReadToolPath(context)` to `@oh-my-pi/pi-agent-core/compaction/tool-protection`: the shared primitive that extracts a paired `read` tool call's `path` argument, so embedders can build their own read-targeted compaction protection matchers (e.g. plan-file reads) the same way `isSkillReadToolResult` does.
125
+ ## [15.8.3] - 2026-06-03
172
126
 
173
127
  ## [15.8.2] - 2026-06-03
174
128
 
175
- ### Added
176
-
177
- - Added optional `AgentTool.matcherDigest(args)` hook: tools whose streamed arguments encode content in a wire grammar (patch formats, escaped strings) can expose the real content they introduce, so stream-content matchers (e.g. TTSR rules) run against plain source text instead of the wire format.
178
-
179
- ### Fixed
180
-
181
- - Fixed the agent loop wedging the model when a `write`/`edit` tool call is truncated by `stop_reason: length` (e.g. an OpenCode Zen / Claude-3.5-Haiku turn that emits >~1000 lines of code, blowing past the 8K `max_tokens` output cap). The skipped tool result now surfaces an actionable hint — naming `stop_reason: length` and telling the model to split the payload into multiple smaller calls — instead of the generic "Tool call was not executed because the assistant ended its turn" placeholder, which left the auto-continue loop re-emitting the same oversized payload until the user gave up. Tools are still NOT executed when the arguments are truncated. ([#1785](https://github.com/can1357/oh-my-pi/issues/1785))
182
-
183
129
  ## [15.8.0] - 2026-06-02
184
130
 
185
- ### Fixed
186
-
187
- - Engaged GPT-5 Harmony leak detection on the committed assistant message (openai-codex only). `detectHarmonyLeakInAssistantMessage` now runs on the streamed `done`/`error` result and the trailing fallback, so a leaked final response is aborted-and-retried by the existing mitigation instead of being committed as-is. Tool-argument (`tool_arg`) scanning is gated on the trailing-garbage `T` co-signal and only fires when a caller supplies a parse boundary via `detectHarmonyLeakInAssistantMessage`'s new optional `toolArgParseEnd` resolver. The agent loop passes none — it cannot bound a streamed tool DSL — so that surface stays inert and a legitimate codex tool call whose content legitimately carries `to=functions.*` next to a channel word or non-Latin script (e.g. editing the harmony fixtures) is never hard-aborted.
188
-
189
131
  ## [15.7.4] - 2026-05-31
190
132
 
191
- ### Removed
192
-
193
- - Removed the local-model `summarizeShakeRegions` compressor and related shake-summary prompt/types; shake now only provides mechanical artifact-backed elision primitives.
194
-
195
133
  ## [15.7.3] - 2026-05-31
196
134
 
197
- ### Added
198
-
199
- - Added `shake` compaction primitives (`collectShakeRegions`, `applyShakeRegion`, `applyShakeRegions`, `summarizeShakeRegions`, `DEFAULT_SHAKE_CONFIG`, `AGGRESSIVE_SHAKE_CONFIG`, plus the `ShakeRegion`/`ShakeConfig`/`ShakeSummaryItem`/`ShakeSummaryComplete`/`ProtectedToolMatcher` types) under `@oh-my-pi/pi-agent-core/compaction`. These detect heavy context regions — whole tool-call results plus large fenced/XML blocks — and either elide them with placeholders or extractively compress them through an injected completion backend (no LLM summary cut-point). The compressor is provider-agnostic: callers wire it to a local on-device model. Pure detection/mutation; no I/O.
200
-
201
- ### Fixed
202
-
203
- - Fixed tool-output pruning and shake protection for `read`: ordinary file/URL reads are now eligible for compaction, while `read` calls whose `path` starts with `skill://` remain protected like native `skill` results.
204
-
205
135
  ## [15.5.15] - 2026-05-30
206
136
 
207
137
  ### Added
@@ -224,10 +154,6 @@
224
154
 
225
155
  - Fixed compaction summarizer throws losing the provider's HTTP status. `generateSummary`, `generateHandoff`, `generateShortSummary`, and `generateTurnPrefixSummary` now route their `stopReason === "error"` throws through a `createSummarizationError` helper that copies `AssistantMessage.errorStatus` onto the thrown `Error` as `.status`, letting downstream consumers (e.g. `AgentSession.#isCompactionAuthFailure` in `@oh-my-pi/pi-coding-agent`) branch on real provider 401/403s without regex-scraping the message body.
226
156
 
227
- ### Changed
228
-
229
- - Changed `Agent.appendMessage`, `popMessage`, `clearMessages`, and `reset` to mutate `state.messages` and `state.pendingToolCalls` in place instead of allocating a fresh array/Set on every transition. Subscribers that capture `state.messages` by reference now observe updates without needing to re-read `state` after each event. The public type signature is unchanged (always `AgentMessage[]` / `Set<string>`).
230
-
231
157
  ## [15.5.0] - 2026-05-26
232
158
 
233
159
  ### Added
@@ -729,4 +655,4 @@ Initial release under @oh-my-pi scope. See previous releases at [badlogic/pi-mon
729
655
 
730
656
  - `Agent` constructor now has all options optional (empty options use defaults).
731
657
 
732
- - `queueMessage()` is now synchronous (no longer returns a Promise).
658
+ - `queueMessage()` is now synchronous (no longer returns a Promise).
@@ -308,6 +308,7 @@ export declare class Agent {
308
308
  getInterruptMode(): "immediate" | "wait";
309
309
  setTools(t: AgentTool<any>[]): void;
310
310
  replaceMessages(ms: AgentMessage[]): void;
311
+ replaceQueues(steering: AgentMessage[], followUp: AgentMessage[]): void;
311
312
  appendMessage(m: AgentMessage): void;
312
313
  popMessage(): AgentMessage | undefined;
313
314
  /**
@@ -324,6 +325,15 @@ export declare class Agent {
324
325
  clearFollowUpQueue(): void;
325
326
  clearAllQueues(): void;
326
327
  hasQueuedMessages(): boolean;
328
+ /** Non-consuming view of the pending steering queue (insertion order, newest
329
+ * last). The session layer derives its queued-message display/count from
330
+ * this live view instead of a mirror, so the agent-core queue stays the
331
+ * single source of truth. */
332
+ peekSteeringQueue(): readonly AgentMessage[];
333
+ /** Non-consuming view of the pending follow-up queue. See
334
+ * {@link peekSteeringQueue}. */
335
+ peekFollowUpQueue(): readonly AgentMessage[];
336
+ get isAborting(): boolean;
327
337
  /**
328
338
  * Remove and return the last steering message from the queue (LIFO).
329
339
  * Used by dequeue keybinding.
@@ -13,6 +13,15 @@
13
13
  */
14
14
  import type { FetchImpl, Message, Model } from "@oh-my-pi/pi-ai/types";
15
15
  export declare const OPENAI_REMOTE_COMPACTION_PRESERVE_KEY = "openaiRemoteCompaction";
16
+ /**
17
+ * Hard ceiling on remote compaction HTTP requests. Unlike every provider
18
+ * stream (guarded by first-event/idle watchdogs in pi-ai), these are raw
19
+ * fetches awaiting one non-streamed JSON body — a connection silently dropped
20
+ * by a middlebox would otherwise hang the whole compaction pipeline forever
21
+ * (frozen "Auto context-full maintenance…" spinner, manual /compact queueing
22
+ * behind it). On timeout the caller falls back to local summarization.
23
+ */
24
+ export declare const REMOTE_COMPACTION_TIMEOUT_MS = 180000;
16
25
  export type OpenAiRemoteCompactionItem = {
17
26
  type: "compaction" | "compaction_summary";
18
27
  encrypted_content?: string;
@@ -56,7 +65,9 @@ export declare function withOpenAiRemoteCompactionPreserveData(preserveData: Rec
56
65
  export declare function buildOpenAiNativeHistory(messages: Message[], model: Model, previousReplacementHistory?: Array<Record<string, unknown>>): Array<Record<string, unknown>>;
57
66
  export declare function requestOpenAiRemoteCompaction(model: Model, apiKey: string, compactInput: Array<Record<string, unknown>>, instructions: string, signal?: AbortSignal, opts?: {
58
67
  fetch?: FetchImpl;
68
+ timeoutMs?: number;
59
69
  }): Promise<OpenAiRemoteCompactionResponse>;
60
70
  export declare function requestRemoteCompaction(endpoint: string, request: RemoteCompactionRequest, signal?: AbortSignal, opts?: {
61
71
  fetch?: FetchImpl;
72
+ timeoutMs?: number;
62
73
  }): Promise<RemoteCompactionResponse>;
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@oh-my-pi/pi-agent-core",
4
- "version": "15.12.3",
4
+ "version": "15.13.0",
5
5
  "description": "General-purpose agent with transport abstraction, state management, and attachment support",
6
6
  "homepage": "https://omp.sh",
7
7
  "author": "Can Boluk",
@@ -35,11 +35,11 @@
35
35
  "fmt": "biome format --write ."
36
36
  },
37
37
  "dependencies": {
38
- "@oh-my-pi/pi-ai": "15.12.3",
39
- "@oh-my-pi/pi-catalog": "15.12.3",
40
- "@oh-my-pi/pi-natives": "15.12.3",
41
- "@oh-my-pi/pi-utils": "15.12.3",
42
- "@oh-my-pi/snapcompact": "15.12.3",
38
+ "@oh-my-pi/pi-ai": "15.13.0",
39
+ "@oh-my-pi/pi-catalog": "15.13.0",
40
+ "@oh-my-pi/pi-natives": "15.13.0",
41
+ "@oh-my-pi/pi-utils": "15.13.0",
42
+ "@oh-my-pi/snapcompact": "15.13.0",
43
43
  "@opentelemetry/api": "^1.9.1"
44
44
  },
45
45
  "devDependencies": {
package/src/agent-loop.ts CHANGED
@@ -15,7 +15,7 @@ import {
15
15
  validateToolArguments,
16
16
  zodToWireSchema,
17
17
  } from "@oh-my-pi/pi-ai";
18
- import { sanitizeText } from "@oh-my-pi/pi-utils";
18
+ import { logger, sanitizeText } from "@oh-my-pi/pi-utils";
19
19
  import {
20
20
  createHarmonyAuditEvent,
21
21
  detectHarmonyLeakInAssistantMessage,
@@ -708,6 +708,7 @@ async function runLoopBody(
708
708
  });
709
709
  }
710
710
  stream.push({ type: "turn_end", message, toolResults });
711
+
711
712
  stream.push(buildAgentEndEvent(newMessages, telemetry, stepCounter.count));
712
713
  stream.end(newMessages);
713
714
  return;
@@ -917,6 +918,10 @@ async function streamAssistantResponse(
917
918
  ? AbortSignal.any([signal, harmonyAbortController.signal])
918
919
  : harmonyAbortController.signal
919
920
  : signal;
921
+ const repetitionAbortController = new AbortController();
922
+ const finalRequestSignal = requestSignal
923
+ ? AbortSignal.any([requestSignal, repetitionAbortController.signal])
924
+ : repetitionAbortController.signal;
920
925
  const effectiveTemperature =
921
926
  harmonyRetryAttempt > 0 && config.temperature !== undefined ? config.temperature + 0.05 : config.temperature;
922
927
  const effectiveToolChoice = dynamicToolChoice ?? config.toolChoice;
@@ -984,7 +989,7 @@ async function streamAssistantResponse(
984
989
  reasoning: effectiveReasoning,
985
990
  disableReasoning: effectiveDisableReasoning,
986
991
  temperature: effectiveTemperature,
987
- signal: requestSignal,
992
+ signal: finalRequestSignal,
988
993
  onResponse: captureOnResponse,
989
994
  });
990
995
 
@@ -1013,6 +1018,56 @@ async function streamAssistantResponse(
1013
1018
  return aborted;
1014
1019
  };
1015
1020
 
1021
+ const finishRepetitionStream = async (
1022
+ kind: "text" | "thinking",
1023
+ pattern: string,
1024
+ count: number,
1025
+ ): Promise<AssistantMessage> => {
1026
+ repetitionAbortController.abort();
1027
+ try {
1028
+ const cleanup = responseIterator.return?.();
1029
+ if (cleanup) void cleanup.catch(() => {});
1030
+ } catch {
1031
+ // ignore
1032
+ }
1033
+ if (partialMessage) {
1034
+ truncateRepetition(partialMessage, kind, pattern);
1035
+ partialMessage.stopReason = "error";
1036
+ partialMessage.errorMessage = `Repetition loop detected: assistant repeated "${pattern.trim()}" ${count} times consecutively.`;
1037
+ }
1038
+ const finalMsg = snapshotAssistantMessage(
1039
+ partialMessage ?? {
1040
+ role: "assistant",
1041
+ content: [],
1042
+ api: config.model.api,
1043
+ provider: config.model.provider,
1044
+ model: config.model.id,
1045
+ usage: {
1046
+ input: 0,
1047
+ output: 0,
1048
+ cacheRead: 0,
1049
+ cacheWrite: 0,
1050
+ totalTokens: 0,
1051
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
1052
+ },
1053
+ stopReason: "error",
1054
+ errorMessage: `Repetition loop detected.`,
1055
+ timestamp: Date.now(),
1056
+ },
1057
+ );
1058
+ if (addedPartial) {
1059
+ context.messages[context.messages.length - 1] = finalMsg;
1060
+ } else {
1061
+ context.messages.push(finalMsg);
1062
+ }
1063
+ if (!addedPartial) {
1064
+ stream.push({ type: "message_start", message: snapshotAssistantMessage(finalMsg) });
1065
+ }
1066
+ stream.push({ type: "message_end", message: snapshotAssistantMessage(finalMsg) });
1067
+ await finishChat(finalMsg);
1068
+ return finalMsg;
1069
+ };
1070
+
1016
1071
  // Set up a single abort race: register the abort listener once for the whole
1017
1072
  // stream and reuse the same race promise for every iterator.next() instead of
1018
1073
  // allocating Promise.withResolvers and add/removeEventListener per event.
@@ -1029,6 +1084,14 @@ async function streamAssistantResponse(
1029
1084
  detachAbortListener = () => requestSignal.removeEventListener("abort", onAbort);
1030
1085
  }
1031
1086
 
1087
+ // Rolling tail of streamed text/thinking used for repetition-loop detection.
1088
+ // Bounded to REPETITION_WINDOW chars and reset when the active block kind
1089
+ // switches (text <-> thinking) so detection stays O(1) per delta and never
1090
+ // miscounts a repeated unit across a thinking/answer boundary.
1091
+ let repetitionTail = "";
1092
+ let repetitionKind: "text" | "thinking" | undefined;
1093
+ const isGeminiModel = config.model.provider.includes("google") || config.model.provider.includes("gemini");
1094
+
1032
1095
  try {
1033
1096
  while (true) {
1034
1097
  let next: IteratorResult<AssistantMessageEvent>;
@@ -1113,6 +1176,27 @@ async function streamAssistantResponse(
1113
1176
  assistantMessageEvent: snapshotAssistantMessageEvent(event),
1114
1177
  message: snapshotAssistantMessage(partialMessage),
1115
1178
  });
1179
+
1180
+ if (isGeminiModel && (event.type === "text_delta" || event.type === "thinking_delta")) {
1181
+ const kind = event.type === "text_delta" ? "text" : "thinking";
1182
+ if (repetitionKind !== kind) {
1183
+ repetitionKind = kind;
1184
+ repetitionTail = "";
1185
+ }
1186
+ repetitionTail += event.delta;
1187
+ if (repetitionTail.length > REPETITION_WINDOW) {
1188
+ repetitionTail = repetitionTail.slice(-REPETITION_WINDOW);
1189
+ }
1190
+ const repetition = detectRepetition(repetitionTail);
1191
+ if (repetition) {
1192
+ const [pattern, count] = repetition;
1193
+ logger.warn("Repetition loop detected during assistant stream, aborting.", {
1194
+ pattern,
1195
+ count,
1196
+ });
1197
+ return await finishRepetitionStream(kind, pattern, count);
1198
+ }
1199
+ }
1116
1200
  }
1117
1201
  break;
1118
1202
  }
@@ -1719,3 +1803,97 @@ function createSkippedToolResult(): AgentToolResult<any> {
1719
1803
  details: {},
1720
1804
  };
1721
1805
  }
1806
+
1807
+ const REPETITION_WINDOW = 250;
1808
+ const REPETITION_MIN_REPEATED_CHARS = 180;
1809
+
1810
+ function detectRepetition(text: string): [pattern: string, count: number] | null {
1811
+ if (text.length < REPETITION_MIN_REPEATED_CHARS) return null;
1812
+
1813
+ const windowSize = Math.min(text.length, REPETITION_WINDOW);
1814
+ const searchSpace = text.slice(-windowSize);
1815
+
1816
+ for (let len = 2; len <= 60; len++) {
1817
+ if (searchSpace.length < len * 4) continue;
1818
+
1819
+ const pattern = searchSpace.slice(-len);
1820
+ // Only treat a repeated unit as a pathological loop when it carries real
1821
+ // linguistic content (a letter or a pictographic emoji). Runs made purely of
1822
+ // digits, whitespace or punctuation are legitimate in tabular / hex / numeric
1823
+ // output (e.g. "00 00 00", "0, 0, 0", "| -- | -- |") and must not trip.
1824
+ if (!/[\p{L}\p{Extended_Pictographic}]/u.test(pattern)) continue;
1825
+
1826
+ let count = 0;
1827
+ let pos = searchSpace.length;
1828
+ while (pos >= len) {
1829
+ const chunk = searchSpace.slice(pos - len, pos);
1830
+ if (chunk === pattern) {
1831
+ count++;
1832
+ pos -= len;
1833
+ } else {
1834
+ break;
1835
+ }
1836
+ }
1837
+
1838
+ if (count >= 4 && len * count >= REPETITION_MIN_REPEATED_CHARS) {
1839
+ return [pattern, count];
1840
+ }
1841
+ }
1842
+ return null;
1843
+ }
1844
+
1845
+ function truncateRepetition(message: AssistantMessage, kind: "text" | "thinking", pattern: string): void {
1846
+ // A repetition loop streams into a single growing block (real providers) or a run
1847
+ // of same-kind blocks (some transports), always at the tail of the message. Gather
1848
+ // that trailing contiguous run and collapse its repeated copies down to one, so the
1849
+ // committed transcript keeps a representative sample instead of the full runaway.
1850
+ const matches = (block: AssistantContentBlock): boolean =>
1851
+ kind === "text" ? block.type === "text" : block.type === "thinking";
1852
+ const readBlock = (block: AssistantContentBlock): string =>
1853
+ block.type === "text" ? block.text : block.type === "thinking" ? block.thinking : "";
1854
+ const clearThinkingReplayAnchors = (block: AssistantContentBlock): void => {
1855
+ if (block.type !== "thinking") return;
1856
+ block.thinkingSignature = undefined;
1857
+ block.itemId = undefined;
1858
+ };
1859
+ const writeBlock = (block: AssistantContentBlock, value: string): void => {
1860
+ if (block.type === "text") {
1861
+ block.text = value;
1862
+ } else if (block.type === "thinking") {
1863
+ block.thinking = value;
1864
+ clearThinkingReplayAnchors(block);
1865
+ }
1866
+ };
1867
+
1868
+ const trailing: AssistantContentBlock[] = [];
1869
+ for (let i = message.content.length - 1; i >= 0; i--) {
1870
+ const block = message.content[i];
1871
+ if (!matches(block)) break;
1872
+ trailing.unshift(block);
1873
+ }
1874
+ if (trailing.length === 0) return;
1875
+ if (kind === "thinking") {
1876
+ for (const block of trailing) clearThinkingReplayAnchors(block);
1877
+ }
1878
+
1879
+ let joined = "";
1880
+ for (const block of trailing) joined += readBlock(block);
1881
+
1882
+ let kept = joined;
1883
+ while (kept.length >= pattern.length * 2 && kept.slice(kept.length - pattern.length * 2) === pattern + pattern) {
1884
+ kept = kept.slice(0, kept.length - pattern.length);
1885
+ }
1886
+
1887
+ let remainingToRemove = joined.length - kept.length;
1888
+ for (let i = trailing.length - 1; i >= 0 && remainingToRemove > 0; i--) {
1889
+ const block = trailing[i];
1890
+ const value = readBlock(block);
1891
+ if (value.length <= remainingToRemove) {
1892
+ remainingToRemove -= value.length;
1893
+ writeBlock(block, "");
1894
+ } else {
1895
+ writeBlock(block, value.slice(0, value.length - remainingToRemove));
1896
+ remainingToRemove = 0;
1897
+ }
1898
+ }
1899
+ }
package/src/agent.ts CHANGED
@@ -23,6 +23,7 @@ import {
23
23
  type ToolResultMessage,
24
24
  } from "@oh-my-pi/pi-ai";
25
25
  import { getBundledModel } from "@oh-my-pi/pi-catalog/models";
26
+ import { logger } from "@oh-my-pi/pi-utils";
26
27
  import { abortReasonText, agentLoop, agentLoopContinue } from "./agent-loop";
27
28
  import type { AppendOnlyContextManager } from "./append-only-context";
28
29
  import type { HarmonyAuditEvent } from "./harmony-leak";
@@ -706,6 +707,11 @@ export class Agent {
706
707
  this.#state.messages = ms.slice();
707
708
  }
708
709
 
710
+ replaceQueues(steering: AgentMessage[], followUp: AgentMessage[]) {
711
+ this.#steeringQueue = steering.slice();
712
+ this.#followUpQueue = followUp.slice();
713
+ }
714
+
709
715
  appendMessage(m: AgentMessage) {
710
716
  this.#state.messages.push(m);
711
717
  }
@@ -751,6 +757,24 @@ export class Agent {
751
757
  return this.#steeringQueue.length > 0 || this.#followUpQueue.length > 0;
752
758
  }
753
759
 
760
+ /** Non-consuming view of the pending steering queue (insertion order, newest
761
+ * last). The session layer derives its queued-message display/count from
762
+ * this live view instead of a mirror, so the agent-core queue stays the
763
+ * single source of truth. */
764
+ peekSteeringQueue(): readonly AgentMessage[] {
765
+ return this.#steeringQueue;
766
+ }
767
+
768
+ /** Non-consuming view of the pending follow-up queue. See
769
+ * {@link peekSteeringQueue}. */
770
+ peekFollowUpQueue(): readonly AgentMessage[] {
771
+ return this.#followUpQueue;
772
+ }
773
+
774
+ get isAborting(): boolean {
775
+ return this.#abortController?.signal.aborted === true && this.#state.isStreaming;
776
+ }
777
+
754
778
  #dequeueSteeringMessages(): AgentMessage[] {
755
779
  if (this.#steeringMode === "one-at-a-time") {
756
780
  if (this.#steeringQueue.length > 0) {
@@ -950,8 +974,13 @@ export class Agent {
950
974
  }
951
975
  : undefined;
952
976
 
953
- const getToolChoice = () =>
954
- this.#getToolChoice?.() ?? refreshToolChoiceForActiveTools(options?.toolChoice, this.#state.tools);
977
+ const getToolChoice = () => {
978
+ const queuedToolChoice = this.#getToolChoice?.();
979
+ if (queuedToolChoice !== undefined) {
980
+ return refreshToolChoiceForActiveTools(queuedToolChoice, this.#state.tools);
981
+ }
982
+ return refreshToolChoiceForActiveTools(options?.toolChoice, this.#state.tools);
983
+ };
955
984
 
956
985
  const config: AgentLoopConfig = {
957
986
  model,
@@ -1153,11 +1182,15 @@ export class Agent {
1153
1182
  const result = listener(e) as unknown;
1154
1183
  if (isPromise(result)) {
1155
1184
  result.catch(err => {
1156
- console.error("Agent listener rejected:", err instanceof Error ? err.message : err);
1185
+ logger.warn("Agent listener rejected", {
1186
+ error: err instanceof Error ? err.message : String(err),
1187
+ });
1157
1188
  });
1158
1189
  }
1159
1190
  } catch (err) {
1160
- console.error("Agent listener threw:", err instanceof Error ? err.message : err);
1191
+ logger.warn("Agent listener threw", {
1192
+ error: err instanceof Error ? err.message : String(err),
1193
+ });
1161
1194
  }
1162
1195
  }
1163
1196
  }
@@ -807,6 +807,7 @@ async function generateShortSummary(
807
807
  prompt: promptText,
808
808
  },
809
809
  signal,
810
+ { fetch: options?.fetch },
810
811
  );
811
812
  return remote.summary;
812
813
  }
@@ -1047,6 +1048,10 @@ export async function compact(
1047
1048
  );
1048
1049
  preserveData = withOpenAiRemoteCompactionPreserveData(previousPreserveData, remote);
1049
1050
  } catch (err) {
1051
+ // A user/session abort is a cancellation, not a remote failure —
1052
+ // swallowing it here would downgrade Esc into "fall back to local
1053
+ // summarization" and keep compaction running on an aborted signal.
1054
+ if (signal?.aborted) throw err;
1050
1055
  logger.warn("OpenAI remote compaction failed, falling back to local summarization", {
1051
1056
  error: err instanceof Error ? err.message : String(err),
1052
1057
  model: model.id,
@@ -1114,6 +1119,7 @@ export async function compact(
1114
1119
  // Same propagation as summaryOptions above — generateShortSummary
1115
1120
  // resolves its own reasoning via resolveCompactionEffort.
1116
1121
  thinkingLevel: options?.thinkingLevel,
1122
+ fetch: summaryOptions.fetch,
1117
1123
  },
1118
1124
  );
1119
1125
 
@@ -35,6 +35,23 @@ import { logger } from "@oh-my-pi/pi-utils";
35
35
 
36
36
  export const OPENAI_REMOTE_COMPACTION_PRESERVE_KEY = "openaiRemoteCompaction";
37
37
 
38
+ /**
39
+ * Hard ceiling on remote compaction HTTP requests. Unlike every provider
40
+ * stream (guarded by first-event/idle watchdogs in pi-ai), these are raw
41
+ * fetches awaiting one non-streamed JSON body — a connection silently dropped
42
+ * by a middlebox would otherwise hang the whole compaction pipeline forever
43
+ * (frozen "Auto context-full maintenance…" spinner, manual /compact queueing
44
+ * behind it). On timeout the caller falls back to local summarization.
45
+ */
46
+ export const REMOTE_COMPACTION_TIMEOUT_MS = 180_000;
47
+
48
+ /** Race the caller's signal against the request timeout; `timeoutMs <= 0` disables the watchdog. */
49
+ function withRequestTimeout(signal: AbortSignal | undefined, timeoutMs: number): AbortSignal | undefined {
50
+ if (timeoutMs <= 0) return signal;
51
+ const timeout = AbortSignal.timeout(timeoutMs);
52
+ return signal ? AbortSignal.any([signal, timeout]) : timeout;
53
+ }
54
+
38
55
  export type OpenAiRemoteCompactionItem = {
39
56
  type: "compaction" | "compaction_summary";
40
57
  encrypted_content?: string;
@@ -147,14 +164,6 @@ export function withOpenAiRemoteCompactionPreserveData(
147
164
  // Input/output filtering for OpenAI compact endpoint
148
165
  // ============================================================================
149
166
 
150
- function estimateOpenAiCompactInputTokens(input: Array<Record<string, unknown>>, instructions: string): number {
151
- let chars = instructions.length;
152
- for (const item of input) {
153
- chars += JSON.stringify(item).length;
154
- }
155
- return Math.ceil(chars / 4);
156
- }
157
-
158
167
  function shouldTrimOpenAiCompactInputItem(item: Record<string, unknown>): boolean {
159
168
  return item.type === "function_call_output" || (item.type === "message" && item.role === "developer");
160
169
  }
@@ -171,16 +180,29 @@ function trimOpenAiCompactInput(
171
180
  instructions: string,
172
181
  ): Array<Record<string, unknown>> {
173
182
  const trimmed = [...input];
174
- while (trimmed.length > 0 && estimateOpenAiCompactInputTokens(trimmed, instructions) > contextWindow) {
183
+ // Per-item serialized sizes are cached and decremented on removal.
184
+ // Re-stringifying the whole input per popped item was O(N²) in total chars
185
+ // — hundreds of MB of stringify churn on a 200k-token codex history,
186
+ // blocking the event loop for seconds (same class as the addOpenAiCallIds
187
+ // fix above).
188
+ const sizes = trimmed.map(item => JSON.stringify(item).length);
189
+ let chars = instructions.length;
190
+ for (const size of sizes) chars += size;
191
+ const removeAt = (index: number): void => {
192
+ chars -= sizes[index] ?? 0;
193
+ trimmed.splice(index, 1);
194
+ sizes.splice(index, 1);
195
+ };
196
+ while (trimmed.length > 0 && Math.ceil(chars / 4) > contextWindow) {
175
197
  const last = trimmed[trimmed.length - 1];
176
198
  if (last?.type === "function_call_output" || last?.type === "custom_tool_call_output") {
177
199
  const callId = typeof last.call_id === "string" ? last.call_id : undefined;
178
200
  const callType = last.type === "custom_tool_call_output" ? "custom_tool_call" : "function_call";
179
- trimmed.pop();
201
+ removeAt(trimmed.length - 1);
180
202
  if (callId) {
181
203
  const matchingCallIndex = trimmed.findLastIndex(item => item.type === callType && item.call_id === callId);
182
204
  if (matchingCallIndex >= 0) {
183
- trimmed.splice(matchingCallIndex, 1);
205
+ removeAt(matchingCallIndex);
184
206
  }
185
207
  }
186
208
  continue;
@@ -188,7 +210,7 @@ function trimOpenAiCompactInput(
188
210
  if (!last || !shouldTrimOpenAiCompactInputItem(last)) {
189
211
  break;
190
212
  }
191
- trimmed.pop();
213
+ removeAt(trimmed.length - 1);
192
214
  }
193
215
  return trimmed;
194
216
  }
@@ -429,12 +451,12 @@ export async function requestOpenAiRemoteCompaction(
429
451
  compactInput: Array<Record<string, unknown>>,
430
452
  instructions: string,
431
453
  signal?: AbortSignal,
432
- opts?: { fetch?: FetchImpl },
454
+ opts?: { fetch?: FetchImpl; timeoutMs?: number },
433
455
  ): Promise<OpenAiRemoteCompactionResponse> {
434
456
  const endpoint = resolveOpenAiCompactEndpoint(model);
435
457
  const request: OpenAiRemoteCompactionRequest = {
436
458
  model: model.id,
437
- input: trimOpenAiCompactInput(compactInput, model.contextWindow, instructions),
459
+ input: trimOpenAiCompactInput(compactInput, model.contextWindow ?? Number.POSITIVE_INFINITY, instructions),
438
460
  instructions,
439
461
  };
440
462
  const headers: Record<string, string> = {
@@ -457,7 +479,7 @@ export async function requestOpenAiRemoteCompaction(
457
479
  method: "POST",
458
480
  headers,
459
481
  body: JSON.stringify(request),
460
- signal,
482
+ signal: withRequestTimeout(signal, opts?.timeoutMs ?? REMOTE_COMPACTION_TIMEOUT_MS),
461
483
  });
462
484
 
463
485
  if (!response.ok) {
@@ -509,13 +531,13 @@ export async function requestRemoteCompaction(
509
531
  endpoint: string,
510
532
  request: RemoteCompactionRequest,
511
533
  signal?: AbortSignal,
512
- opts?: { fetch?: FetchImpl },
534
+ opts?: { fetch?: FetchImpl; timeoutMs?: number },
513
535
  ): Promise<RemoteCompactionResponse> {
514
536
  const response = await (opts?.fetch ?? fetch)(endpoint, {
515
537
  method: "POST",
516
538
  headers: { "content-type": "application/json" },
517
539
  body: JSON.stringify(request),
518
- signal,
540
+ signal: withRequestTimeout(signal, opts?.timeoutMs ?? REMOTE_COMPACTION_TIMEOUT_MS),
519
541
  });
520
542
 
521
543
  if (!response.ok) {