@prometheus-ai/agent-core 0.5.4 → 0.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,20 +12,21 @@
12
12
  * with `{ summary, shortSummary? }`.
13
13
  */
14
14
 
15
- import {
16
- CODEX_BASE_URL,
17
- getCodexAccountId,
18
- OPENAI_HEADER_VALUES,
19
- OPENAI_HEADERS,
20
- } from "@prometheus-ai/ai/providers/openai-codex/constants";
15
+ import { ProviderHttpError } from "@prometheus-ai/ai/errors";
21
16
  import { parseTextSignature } from "@prometheus-ai/ai/providers/openai-responses-shared";
22
17
  import { transformMessages } from "@prometheus-ai/ai/providers/transform-messages";
23
- import type { AssistantMessage, Message, Model } from "@prometheus-ai/ai/types";
18
+ import type { AssistantMessage, FetchImpl, Message, Model } from "@prometheus-ai/ai/types";
24
19
  import {
25
20
  getOpenAIResponsesHistoryItems,
26
21
  getOpenAIResponsesHistoryPayload,
27
22
  normalizeResponsesToolCallId,
28
23
  } from "@prometheus-ai/ai/utils";
24
+ import {
25
+ CODEX_BASE_URL,
26
+ getCodexAccountId,
27
+ OPENAI_HEADER_VALUES,
28
+ OPENAI_HEADERS,
29
+ } from "@prometheus-ai/catalog/wire/codex";
29
30
  import { logger } from "@prometheus-ai/utils";
30
31
 
31
32
  // ============================================================================
@@ -34,6 +35,23 @@ import { logger } from "@prometheus-ai/utils";
34
35
 
35
36
  export const OPENAI_REMOTE_COMPACTION_PRESERVE_KEY = "openaiRemoteCompaction";
36
37
 
38
+ /**
39
+ * Hard ceiling on remote compaction HTTP requests. Unlike every provider
40
+ * stream (guarded by first-event/idle watchdogs in pi-ai), these are raw
41
+ * fetches awaiting one non-streamed JSON body — a connection silently dropped
42
+ * by a middlebox would otherwise hang the whole compaction pipeline forever
43
+ * (frozen "Auto context-full maintenance…" spinner, manual /compact queueing
44
+ * behind it). On timeout the caller falls back to local summarization.
45
+ */
46
+ export const REMOTE_COMPACTION_TIMEOUT_MS = 180_000;
47
+
48
+ /** Race the caller's signal against the request timeout; `timeoutMs <= 0` disables the watchdog. */
49
+ function withRequestTimeout(signal: AbortSignal | undefined, timeoutMs: number): AbortSignal | undefined {
50
+ if (timeoutMs <= 0) return signal;
51
+ const timeout = AbortSignal.timeout(timeoutMs);
52
+ return signal ? AbortSignal.any([signal, timeout]) : timeout;
53
+ }
54
+
37
55
  export type OpenAiRemoteCompactionItem = {
38
56
  type: "compaction" | "compaction_summary";
39
57
  encrypted_content?: string;
@@ -146,50 +164,14 @@ export function withOpenAiRemoteCompactionPreserveData(
146
164
  // Input/output filtering for OpenAI compact endpoint
147
165
  // ============================================================================
148
166
 
149
- function estimateOpenAiCompactInputTokens(input: Array<Record<string, unknown>>, instructions: string): number {
150
- let chars = instructions.length;
151
- for (const item of input) {
152
- chars += JSON.stringify(item).length;
153
- }
154
- return Math.ceil(chars / 4);
155
- }
156
-
157
167
  function shouldTrimOpenAiCompactInputItem(item: Record<string, unknown>): boolean {
158
168
  return item.type === "function_call_output" || (item.type === "message" && item.role === "developer");
159
169
  }
160
170
 
161
- function shouldKeepOpenAiCompactOutputUserMessage(item: Record<string, unknown>): boolean {
162
- if (item.role !== "user") return false;
163
- const content = item.content;
164
- if (!Array.isArray(content) || content.length === 0) return false;
165
- const contextualFragmentPatterns = [
166
- [/^<system-reminder>[\s\S]*<\/system-reminder>$/i, /<system-reminder>/i],
167
- [/^#\s*AGENTS\.md instructions for\b[\s\S]*<\/INSTRUCTIONS>$/i, /# AGENTS.md instructions/],
168
- [/^<environment-context>[\s\S]*<\/environment-context>$/i, /<environment-context>/i],
169
- [/^<skill>[\s\S]*<\/skill>$/i, /<skill>/i],
170
- [/^<user-shell-command>[\s\S]*<\/user-shell-command>$/i, /<user-shell-command>/i],
171
- [/^<turn-aborted>[\s\S]*<\/turn-aborted>$/i, /<turn-aborted>/i],
172
- [/^<subagent-notification>[\s\S]*<\/subagent-notification>$/i, /<subagent-notification>/i],
173
- ] as const;
174
- return content.every(part => {
175
- if (!part || typeof part !== "object") return false;
176
- const candidate = part as { type?: unknown; text?: unknown };
177
- if (candidate.type === "input_image") return true;
178
- if (candidate.type !== "input_text" || typeof candidate.text !== "string") return false;
179
- const trimmed = candidate.text.trim();
180
- if (trimmed.length === 0) return false;
181
- return !contextualFragmentPatterns.some(([strictPattern, markerPattern]) => {
182
- return strictPattern.test(trimmed) || markerPattern.test(trimmed);
183
- });
184
- });
185
- }
186
-
187
171
  function shouldKeepOpenAiCompactOutputItem(item: Record<string, unknown>): boolean {
188
172
  if (item.type === "compaction" || item.type === "compaction_summary") return true;
189
173
  if (item.type !== "message") return false;
190
- if (item.role === "developer") return false;
191
- if (item.role === "assistant") return true;
192
- return shouldKeepOpenAiCompactOutputUserMessage(item);
174
+ return item.role === "assistant" || item.role === "user";
193
175
  }
194
176
 
195
177
  function trimOpenAiCompactInput(
@@ -198,16 +180,29 @@ function trimOpenAiCompactInput(
198
180
  instructions: string,
199
181
  ): Array<Record<string, unknown>> {
200
182
  const trimmed = [...input];
201
- while (trimmed.length > 0 && estimateOpenAiCompactInputTokens(trimmed, instructions) > contextWindow) {
183
+ // Per-item serialized sizes are cached and decremented on removal.
184
+ // Re-stringifying the whole input per popped item was O(N²) in total chars
185
+ // — hundreds of MB of stringify churn on a 200k-token codex history,
186
+ // blocking the event loop for seconds (same class as the addOpenAiCallIds
187
+ // fix above).
188
+ const sizes = trimmed.map(item => JSON.stringify(item).length);
189
+ let chars = instructions.length;
190
+ for (const size of sizes) chars += size;
191
+ const removeAt = (index: number): void => {
192
+ chars -= sizes[index] ?? 0;
193
+ trimmed.splice(index, 1);
194
+ sizes.splice(index, 1);
195
+ };
196
+ while (trimmed.length > 0 && Math.ceil(chars / 4) > contextWindow) {
202
197
  const last = trimmed[trimmed.length - 1];
203
198
  if (last?.type === "function_call_output" || last?.type === "custom_tool_call_output") {
204
199
  const callId = typeof last.call_id === "string" ? last.call_id : undefined;
205
200
  const callType = last.type === "custom_tool_call_output" ? "custom_tool_call" : "function_call";
206
- trimmed.pop();
201
+ removeAt(trimmed.length - 1);
207
202
  if (callId) {
208
203
  const matchingCallIndex = trimmed.findLastIndex(item => item.type === callType && item.call_id === callId);
209
204
  if (matchingCallIndex >= 0) {
210
- trimmed.splice(matchingCallIndex, 1);
205
+ removeAt(matchingCallIndex);
211
206
  }
212
207
  }
213
208
  continue;
@@ -215,29 +210,32 @@ function trimOpenAiCompactInput(
215
210
  if (!last || !shouldTrimOpenAiCompactInputItem(last)) {
216
211
  break;
217
212
  }
218
- trimmed.pop();
213
+ removeAt(trimmed.length - 1);
219
214
  }
220
215
  return trimmed;
221
216
  }
222
217
 
223
- function collectKnownOpenAiCallIds(items: Array<Record<string, unknown>>): Set<string> {
224
- const knownCallIds = new Set<string>();
218
+ // Register every tool-call id in `items` (and the subset using the custom-tool
219
+ // wire shape) into the running sets. The history builder maintains both sets
220
+ // incrementally as native history is appended, so this only scans the
221
+ // newly-added items (or, after a full-snapshot replace, the fresh input) rather
222
+ // than re-scanning the whole growing history per message — the latter was
223
+ // O(N²) and blocked the event loop for seconds while compacting large codex
224
+ // contexts (frozen spinner until the next forced render).
225
+ function addOpenAiCallIds(
226
+ items: Array<Record<string, unknown>>,
227
+ knownCallIds: Set<string>,
228
+ customCallIds: Set<string>,
229
+ ): void {
225
230
  for (const item of items) {
226
- if ((item.type === "function_call" || item.type === "custom_tool_call") && typeof item.call_id === "string") {
231
+ if (typeof item.call_id !== "string") continue;
232
+ if (item.type === "function_call") {
233
+ knownCallIds.add(item.call_id);
234
+ } else if (item.type === "custom_tool_call") {
227
235
  knownCallIds.add(item.call_id);
228
- }
229
- }
230
- return knownCallIds;
231
- }
232
-
233
- function collectCustomOpenAiCallIds(items: Array<Record<string, unknown>>): Set<string> {
234
- const customCallIds = new Set<string>();
235
- for (const item of items) {
236
- if (item.type === "custom_tool_call" && typeof item.call_id === "string") {
237
236
  customCallIds.add(item.call_id);
238
237
  }
239
238
  }
240
- return customCallIds;
241
239
  }
242
240
 
243
241
  // ============================================================================
@@ -265,16 +263,16 @@ export function buildOpenAiNativeHistory(
265
263
  const transformedMessages = transformMessages(messages, model, id => normalizeOpenAiCompactionToolCallId(id));
266
264
 
267
265
  let msgIndex = 0;
268
- let knownCallIds = collectKnownOpenAiCallIds(input);
269
- let customCallIds = collectCustomOpenAiCallIds(input);
266
+ const knownCallIds = new Set<string>();
267
+ const customCallIds = new Set<string>();
268
+ addOpenAiCallIds(input, knownCallIds, customCallIds);
270
269
  for (const message of transformedMessages) {
271
270
  if (message.role === "user" || message.role === "developer") {
272
271
  const providerPayload = (message as { providerPayload?: AssistantMessage["providerPayload"] }).providerPayload;
273
272
  const historyItems = getOpenAIResponsesHistoryItems(providerPayload, model.provider);
274
273
  if (historyItems) {
275
274
  input.push(...historyItems);
276
- knownCallIds = collectKnownOpenAiCallIds(input);
277
- customCallIds = collectCustomOpenAiCallIds(input);
275
+ addOpenAiCallIds(historyItems, knownCallIds, customCallIds);
278
276
  msgIndex++;
279
277
  continue;
280
278
  }
@@ -317,11 +315,13 @@ export function buildOpenAiNativeHistory(
317
315
  if (providerPayload) {
318
316
  if (providerPayload.dt) {
319
317
  input.push(...providerPayload.items);
318
+ addOpenAiCallIds(providerPayload.items, knownCallIds, customCallIds);
320
319
  } else {
321
320
  input.splice(0, input.length, ...providerPayload.items);
321
+ knownCallIds.clear();
322
+ customCallIds.clear();
323
+ addOpenAiCallIds(input, knownCallIds, customCallIds);
322
324
  }
323
- knownCallIds = collectKnownOpenAiCallIds(input);
324
- customCallIds = collectCustomOpenAiCallIds(input);
325
325
  msgIndex++;
326
326
  continue;
327
327
  }
@@ -451,11 +451,12 @@ export async function requestOpenAiRemoteCompaction(
451
451
  compactInput: Array<Record<string, unknown>>,
452
452
  instructions: string,
453
453
  signal?: AbortSignal,
454
+ opts?: { fetch?: FetchImpl; timeoutMs?: number },
454
455
  ): Promise<OpenAiRemoteCompactionResponse> {
455
456
  const endpoint = resolveOpenAiCompactEndpoint(model);
456
457
  const request: OpenAiRemoteCompactionRequest = {
457
458
  model: model.id,
458
- input: trimOpenAiCompactInput(compactInput, model.contextWindow, instructions),
459
+ input: trimOpenAiCompactInput(compactInput, model.contextWindow ?? Number.POSITIVE_INFINITY, instructions),
459
460
  instructions,
460
461
  };
461
462
  const headers: Record<string, string> = {
@@ -474,11 +475,11 @@ export async function requestOpenAiRemoteCompaction(
474
475
  headers[OPENAI_HEADERS.ORIGINATOR] = OPENAI_HEADER_VALUES.ORIGINATOR_CODEX;
475
476
  }
476
477
 
477
- const response = await fetch(endpoint, {
478
+ const response = await (opts?.fetch ?? fetch)(endpoint, {
478
479
  method: "POST",
479
480
  headers,
480
481
  body: JSON.stringify(request),
481
- signal,
482
+ signal: withRequestTimeout(signal, opts?.timeoutMs ?? REMOTE_COMPACTION_TIMEOUT_MS),
482
483
  });
483
484
 
484
485
  if (!response.ok) {
@@ -489,7 +490,13 @@ export async function requestOpenAiRemoteCompaction(
489
490
  statusText: response.statusText,
490
491
  errorText,
491
492
  });
492
- throw new Error(`Remote compaction failed (${response.status} ${response.statusText})`);
493
+ throw new ProviderHttpError(
494
+ `Remote compaction failed (${response.status} ${response.statusText})`,
495
+ response.status,
496
+ {
497
+ headers: response.headers,
498
+ },
499
+ );
493
500
  }
494
501
 
495
502
  const data = (await response.json()) as { output?: unknown[] } | undefined;
@@ -524,12 +531,13 @@ export async function requestRemoteCompaction(
524
531
  endpoint: string,
525
532
  request: RemoteCompactionRequest,
526
533
  signal?: AbortSignal,
534
+ opts?: { fetch?: FetchImpl; timeoutMs?: number },
527
535
  ): Promise<RemoteCompactionResponse> {
528
- const response = await fetch(endpoint, {
536
+ const response = await (opts?.fetch ?? fetch)(endpoint, {
529
537
  method: "POST",
530
538
  headers: { "content-type": "application/json" },
531
539
  body: JSON.stringify(request),
532
- signal,
540
+ signal: withRequestTimeout(signal, opts?.timeoutMs ?? REMOTE_COMPACTION_TIMEOUT_MS),
533
541
  });
534
542
 
535
543
  if (!response.ok) {
@@ -540,7 +548,13 @@ export async function requestRemoteCompaction(
540
548
  statusText: response.statusText,
541
549
  errorText,
542
550
  });
543
- throw new Error(`Remote compaction failed (${response.status} ${response.statusText})`);
551
+ throw new ProviderHttpError(
552
+ `Remote compaction failed (${response.status} ${response.statusText})`,
553
+ response.status,
554
+ {
555
+ headers: response.headers,
556
+ },
557
+ );
544
558
  }
545
559
 
546
560
  const data = (await response.json()) as RemoteCompactionResponse | undefined;
@@ -4,7 +4,7 @@ You MUST use EXACT format:
4
4
 
5
5
  ## Goal
6
6
 
7
- [What user trying to accomplish in this branch?]
7
+ [What is the user trying to accomplish in this branch?]
8
8
 
9
9
  ## Constraints & Preferences
10
10
  - [Constraints, preferences, requirements mentioned]
@@ -1,4 +1,4 @@
1
- Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. You MUST use this to build on the work that has already been done and NEVER duplicate work. Here is the summary produced by the other language model; you MUST use the information in this summary to assist with your own analysis:
1
+ Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that model used. You MUST build on the work already done and NEVER duplicate it. Here is that summary:
2
2
 
3
3
  <summary>
4
4
  {{summary}}
@@ -1,6 +1,6 @@
1
- You MUST summarize the conversation above into a structured context checkpoint handoff summary for another LLM to resume task.
1
+ You MUST summarize the conversation above into a structured handoff summary for another LLM to resume the task.
2
2
 
3
- IMPORTANT: If conversation ends with unanswered question to user or imperative/request awaiting user response (e.g., "Please run command and paste output"), you MUST preserve that exact question/request.
3
+ IMPORTANT: If the conversation ends with an unanswered question or a request awaiting user response (e.g., "Please run command and paste output"), you MUST preserve that exact question/request.
4
4
 
5
5
  You MUST use this format (sections can be omitted if not applicable):
6
6
 
@@ -1,13 +1,13 @@
1
- You MUST incorporate new messages above into the existing handoff summary in <previous-summary> tags, used by another LLM to resume task.
1
+ You MUST incorporate the new messages above into the existing handoff summary in <previous-summary> tags, used by another LLM to resume the task.
2
2
  RULES:
3
- - MUST preserve all information from previous summary
3
+ - MUST preserve all information from the previous summary
4
4
  - MUST add new progress, decisions, and context from new messages
5
5
  - MUST update Progress: move items from "In Progress" to "Done" when completed
6
6
  - MUST update "Next Steps" based on what was accomplished
7
7
  - MUST preserve exact file paths, function names, and error messages
8
8
  - You MAY remove anything no longer relevant
9
9
 
10
- IMPORTANT: If new messages end with unanswered question or request to user, you MUST add it to Critical Context (replacing any previous pending question if answered).
10
+ IMPORTANT: If the new messages end with an unanswered question or request to the user, you MUST add it to Critical Context (replacing any previous pending question if answered).
11
11
 
12
12
  You MUST use this format (omit sections if not applicable):
13
13
 
@@ -1,10 +1,5 @@
1
- {{#if readFiles.length}}
2
- {{#xml "read-files"}}
3
- {{join readFiles "\n"}}
4
- {{/xml}}
5
- {{/if}}
6
- {{#if modifiedFiles.length}}
7
- {{#xml "modified-files"}}
8
- {{join modifiedFiles "\n"}}
1
+ {{#if files}}
2
+ {{#xml "files"}}
3
+ {{files}}
9
4
  {{/xml}}
10
5
  {{/if}}
@@ -1,3 +1,3 @@
1
1
  Summarize conversations between users and AI coding assistants. Produce structured summaries in the exact specified format.
2
2
 
3
- Do NOT continue the conversation. Do NOT respond to questions in the conversation. Output ONLY the structured summary.
3
+ NEVER continue the conversation. NEVER respond to questions in it. Output ONLY the structured summary.
@@ -3,7 +3,7 @@
3
3
  */
4
4
 
5
5
  import type { ToolResultMessage } from "@prometheus-ai/ai";
6
- import type { AgentMessage } from "../types";
6
+ import type { AgentMessage, AgentToolCall } from "../types";
7
7
  import { estimateTokens } from "./compaction";
8
8
  import type { SessionEntry, SessionMessageEntry } from "./entries";
9
9
  import {
@@ -12,6 +12,7 @@ import {
12
12
  isSkillReadToolResult,
13
13
  type ProtectedToolMatcher,
14
14
  } from "./tool-protection";
15
+ import { splitReadSelector } from "./utils";
15
16
 
16
17
  export interface PruneConfig {
17
18
  /** Keep the most recent tool output tokens intact. */
@@ -20,12 +21,22 @@ export interface PruneConfig {
20
21
  minimumSavings: number;
21
22
  /** Tool-result protection matchers. String entries protect every result from that tool; predicates may inspect the paired tool call. */
22
23
  protectedTools: ProtectedToolMatcher[];
24
+ /**
25
+ * Optional supersede key function (see {@link SupersedePruneConfig.supersedeKey}).
26
+ * When provided, superseded tool results are pruned first — even inside the
27
+ * `protectTokens` window — before age-based victims. Absent, behavior is
28
+ * unchanged.
29
+ */
30
+ supersedeKey?: SupersedeKeyFn;
31
+ /** Useless-flagged results bypass the protect window (see {@link USELESS_NOTICE}). Default true. */
32
+ pruneUseless?: boolean;
23
33
  }
24
34
 
25
35
  export const DEFAULT_PRUNE_CONFIG: PruneConfig = {
26
36
  protectTokens: 40_000,
27
37
  minimumSavings: 20_000,
28
38
  protectedTools: ["skill", isSkillReadToolResult],
39
+ pruneUseless: true,
29
40
  };
30
41
 
31
42
  export interface PruneResult {
@@ -33,6 +44,39 @@ export interface PruneResult {
33
44
  tokensSaved: number;
34
45
  }
35
46
 
47
+ /** Exact placeholder written over a superseded tool result. */
48
+ export const SUPERSEDED_NOTICE = "[Superseded by a newer read of this file]";
49
+
50
+ /** Exact placeholder written over an elided useless tool result. */
51
+ export const USELESS_NOTICE = "[Uneventful result elided]";
52
+
53
+ /**
54
+ * Maps a tool call to a supersede key. Results sharing a key form a group in
55
+ * which every result except the newest is a supersede candidate. A key `K`
56
+ * additionally supersedes keys with prefix `K + "\u0000"` (selector-free read
57
+ * supersedes selector-carrying reads of the same base path). Return
58
+ * `undefined` to exempt a call from supersede grouping.
59
+ */
60
+ export type SupersedeKeyFn = (toolName: string, args: Record<string, unknown>) => string | undefined;
61
+
62
+ export interface SupersedePruneConfig {
63
+ /** Supersede key function; results sharing a key supersede older ones. */
64
+ supersedeKey?: SupersedeKeyFn;
65
+ /** Also prune results flagged useless by their tool. Default false. */
66
+ pruneUseless?: boolean;
67
+ /** Prune a candidate now when all messages after it total at most this many estimated tokens. Default 8 000. */
68
+ suffixTokenLimit?: number;
69
+ /** Prune all candidates when the last message is at least this old (prompt cache is cold anyway). Default 30 min. */
70
+ idleFlushMs?: number;
71
+ /** Clock override for tests. */
72
+ now?: number;
73
+ /** Tool-result protection matchers (same contract as {@link PruneConfig.protectedTools}). */
74
+ protectedTools: ProtectedToolMatcher[];
75
+ }
76
+
77
+ const DEFAULT_SUFFIX_TOKEN_LIMIT = 8_000;
78
+ const DEFAULT_IDLE_FLUSH_MS = 30 * 60_000;
79
+
36
80
  function createPrunedNotice(tokens: number): string {
37
81
  return `[Output truncated - ${tokens} tokens]`;
38
82
  }
@@ -44,18 +88,169 @@ function getToolResultMessage(entry: SessionEntry): ToolResultMessage | undefine
44
88
  return message as ToolResultMessage;
45
89
  }
46
90
 
47
- function estimatePrunedSavings(tokens: number): number {
48
- const noticeTokens = Math.ceil(createPrunedNotice(tokens).length / 4);
91
+ function estimatePrunedSavings(tokens: number, notice: string): number {
92
+ const noticeTokens = Math.ceil(notice.length / 4);
49
93
  return Math.max(0, tokens - noticeTokens);
50
94
  }
51
95
 
96
+ interface SupersedeCandidate {
97
+ entry: SessionMessageEntry;
98
+ message: ToolResultMessage;
99
+ /** Index of the entry within the `entries` array. */
100
+ index: number;
101
+ tokens: number;
102
+ /** Placeholder text written over the blanked result. */
103
+ notice: string;
104
+ }
105
+
106
+ /**
107
+ * Collect superseded tool results: for every unpruned, unprotected tool result
108
+ * whose paired call resolves a supersede key, a LATER result with the same key
109
+ * — or with a key that is the `"\u0000"`-prefix parent of this one — marks it
110
+ * superseded. Returned in message order.
111
+ */
112
+ function collectSupersededResults(
113
+ entries: readonly SessionEntry[],
114
+ toolCallsById: ReadonlyMap<string, AgentToolCall>,
115
+ supersedeKey: SupersedeKeyFn,
116
+ protectedTools: readonly ProtectedToolMatcher[],
117
+ ): SupersedeCandidate[] {
118
+ const candidates: SupersedeCandidate[] = [];
119
+ const seenKeys = new Set<string>();
120
+ for (let i = entries.length - 1; i >= 0; i--) {
121
+ const entry = entries[i];
122
+ const message = getToolResultMessage(entry);
123
+ if (!message || message.prunedAt !== undefined) continue;
124
+ const toolCall = toolCallsById.get(message.toolCallId);
125
+ if (!toolCall) continue;
126
+ if (isProtectedToolResult(message, toolCall, protectedTools)) continue;
127
+ const key = supersedeKey(toolCall.name, toolCall.arguments as Record<string, unknown>);
128
+ if (key === undefined) continue;
129
+ const separator = key.indexOf("\u0000");
130
+ const superseded = seenKeys.has(key) || (separator >= 0 && seenKeys.has(key.slice(0, separator)));
131
+ seenKeys.add(key);
132
+ if (!superseded) continue;
133
+ candidates.push({
134
+ entry: entry as SessionMessageEntry,
135
+ message,
136
+ index: i,
137
+ tokens: estimateTokens(message as AgentMessage),
138
+ notice: SUPERSEDED_NOTICE,
139
+ });
140
+ }
141
+ return candidates.reverse();
142
+ }
143
+
144
+ /**
145
+ * Collect tool results their tool flagged contextually useless (zero matches,
146
+ * elapsed wait): unpruned, non-error, unprotected, not in `exclude`, and large
147
+ * enough that blanking to {@link USELESS_NOTICE} actually saves tokens.
148
+ * Returned in message order.
149
+ */
150
+ function collectUselessResults(
151
+ entries: readonly SessionEntry[],
152
+ toolCallsById: ReadonlyMap<string, AgentToolCall>,
153
+ protectedTools: readonly ProtectedToolMatcher[],
154
+ exclude: ReadonlySet<ToolResultMessage>,
155
+ ): SupersedeCandidate[] {
156
+ const candidates: SupersedeCandidate[] = [];
157
+ for (let i = 0; i < entries.length; i++) {
158
+ const entry = entries[i];
159
+ const message = getToolResultMessage(entry);
160
+ if (message?.useless !== true || message.prunedAt !== undefined || message.isError === true) continue;
161
+ if (exclude.has(message)) continue;
162
+ if (isProtectedToolResult(message, toolCallsById.get(message.toolCallId), protectedTools)) continue;
163
+ const tokens = estimateTokens(message as AgentMessage);
164
+ if (estimatePrunedSavings(tokens, USELESS_NOTICE) <= 0) continue;
165
+ candidates.push({ entry: entry as SessionMessageEntry, message, index: i, tokens, notice: USELESS_NOTICE });
166
+ }
167
+ return candidates;
168
+ }
169
+
170
+ /**
171
+ * Prune superseded tool results (e.g. stale `read` outputs replaced by a newer
172
+ * read of the same file) and, when `pruneUseless` is set, results their tool
173
+ * flagged contextually useless. Cheap, incremental, and prompt-cache-aware: a
174
+ * candidate is pruned now only when the suffix after it is small (tail case —
175
+ * the read→edit→read loop) or when the context has been idle long enough that
176
+ * the provider cache is cold anyway (then ALL candidates flush).
177
+ */
178
+ export function pruneSupersededToolResults(entries: SessionEntry[], config: SupersedePruneConfig): PruneResult {
179
+ const toolCallsById = collectToolCallsById(entries);
180
+ const candidates = config.supersedeKey
181
+ ? collectSupersededResults(entries, toolCallsById, config.supersedeKey, config.protectedTools)
182
+ : [];
183
+ if (config.pruneUseless) {
184
+ const exclude = new Set(candidates.map(candidate => candidate.message));
185
+ candidates.push(...collectUselessResults(entries, toolCallsById, config.protectedTools, exclude));
186
+ candidates.sort((a, b) => a.index - b.index);
187
+ }
188
+ if (candidates.length === 0) return { prunedCount: 0, tokensSaved: 0 };
189
+
190
+ const now = config.now ?? Date.now();
191
+ let lastMessageTimestamp: number | undefined;
192
+ for (let i = entries.length - 1; i >= 0; i--) {
193
+ const entry = entries[i];
194
+ if (entry.type !== "message") continue;
195
+ const timestamp = (entry.message as AgentMessage).timestamp;
196
+ if (typeof timestamp === "number") lastMessageTimestamp = timestamp;
197
+ break;
198
+ }
199
+ const idle =
200
+ lastMessageTimestamp !== undefined && now - lastMessageTimestamp >= (config.idleFlushMs ?? DEFAULT_IDLE_FLUSH_MS);
201
+
202
+ let toPrune: SupersedeCandidate[];
203
+ if (idle) {
204
+ toPrune = candidates;
205
+ } else {
206
+ const suffixTokenLimit = config.suffixTokenLimit ?? DEFAULT_SUFFIX_TOKEN_LIMIT;
207
+ // suffixTokens[i] = estimated tokens of all messages strictly after entry i.
208
+ const suffixTokens = new Array<number>(entries.length);
209
+ let accumulated = 0;
210
+ for (let i = entries.length - 1; i >= 0; i--) {
211
+ suffixTokens[i] = accumulated;
212
+ const entry = entries[i];
213
+ if (entry.type === "message") accumulated += estimateTokens(entry.message as AgentMessage);
214
+ }
215
+ toPrune = candidates.filter(candidate => suffixTokens[candidate.index] <= suffixTokenLimit);
216
+ }
217
+ if (toPrune.length === 0) return { prunedCount: 0, tokensSaved: 0 };
218
+
219
+ const prunedAt = Date.now();
220
+ let tokensSaved = 0;
221
+ for (const candidate of toPrune) {
222
+ candidate.message.content = [{ type: "text", text: candidate.notice }];
223
+ candidate.message.prunedAt = prunedAt;
224
+ tokensSaved += estimatePrunedSavings(candidate.tokens, candidate.notice);
225
+ }
226
+ return { prunedCount: toPrune.length, tokensSaved };
227
+ }
228
+
52
229
  export function pruneToolOutputs(entries: SessionEntry[], config: PruneConfig = DEFAULT_PRUNE_CONFIG): PruneResult {
53
230
  let accumulatedTokens = 0;
54
231
  let tokensSaved = 0;
55
232
  let prunedCount = 0;
56
233
 
57
- const candidates: Array<{ entry: SessionMessageEntry; tokens: number }> = [];
234
+ const candidates: Array<{ entry: SessionMessageEntry; tokens: number; superseded: boolean; useless: boolean }> = [];
58
235
  const toolCallsById = collectToolCallsById(entries);
236
+ const supersededMessages = config.supersedeKey
237
+ ? new Set(
238
+ collectSupersededResults(entries, toolCallsById, config.supersedeKey, config.protectedTools).map(
239
+ candidate => candidate.message,
240
+ ),
241
+ )
242
+ : undefined;
243
+ const uselessMessages =
244
+ config.pruneUseless !== false
245
+ ? new Set(
246
+ collectUselessResults(
247
+ entries,
248
+ toolCallsById,
249
+ config.protectedTools,
250
+ supersededMessages ?? new Set(),
251
+ ).map(candidate => candidate.message),
252
+ )
253
+ : undefined;
59
254
 
60
255
  for (let i = entries.length - 1; i >= 0; i--) {
61
256
  const entry = entries[i];
@@ -70,17 +265,30 @@ export function pruneToolOutputs(entries: SessionEntry[], config: PruneConfig =
70
265
  continue;
71
266
  }
72
267
 
73
- if (accumulatedTokens < config.protectTokens || isProtected) {
268
+ // Superseded and useless results are pruned first: they bypass the
269
+ // protect window (a stale copy of re-read content — or a result the
270
+ // tool itself flagged as carrying no information — is dead weight at
271
+ // any age).
272
+ const superseded = supersededMessages?.has(message) ?? false;
273
+ const useless = uselessMessages?.has(message) ?? false;
274
+ if (!superseded && !useless && (accumulatedTokens < config.protectTokens || isProtected)) {
74
275
  accumulatedTokens += tokens;
75
276
  continue;
76
277
  }
77
278
 
78
- candidates.push({ entry: entry as SessionMessageEntry, tokens });
279
+ candidates.push({ entry: entry as SessionMessageEntry, tokens, superseded, useless });
79
280
  accumulatedTokens += tokens;
80
281
  }
81
282
 
82
283
  for (const candidate of candidates) {
83
- tokensSaved += estimatePrunedSavings(candidate.tokens);
284
+ tokensSaved += estimatePrunedSavings(
285
+ candidate.tokens,
286
+ candidate.superseded
287
+ ? SUPERSEDED_NOTICE
288
+ : candidate.useless
289
+ ? USELESS_NOTICE
290
+ : createPrunedNotice(candidate.tokens),
291
+ );
84
292
  }
85
293
 
86
294
  if (tokensSaved < config.minimumSavings || candidates.length === 0) {
@@ -90,10 +298,34 @@ export function pruneToolOutputs(entries: SessionEntry[], config: PruneConfig =
90
298
  const prunedAt = Date.now();
91
299
  for (const candidate of candidates) {
92
300
  const message = candidate.entry.message as ToolResultMessage;
93
- message.content = [{ type: "text", text: createPrunedNotice(candidate.tokens) }];
301
+ const notice = candidate.superseded
302
+ ? SUPERSEDED_NOTICE
303
+ : candidate.useless
304
+ ? USELESS_NOTICE
305
+ : createPrunedNotice(candidate.tokens);
306
+ message.content = [{ type: "text", text: notice }];
94
307
  message.prunedAt = prunedAt;
95
308
  prunedCount++;
96
309
  }
97
310
 
98
311
  return { prunedCount, tokensSaved };
99
312
  }
313
+
314
+ /**
315
+ * Supersede key for the `read` tool: the file path with the trailing line/raw
316
+ * selector stripped (the read tool's own splitter grammar via
317
+ * {@link splitReadSelector}, e.g. `src/foo.ts:50-200`, `:2-4:raw`).
318
+ * Internal/URL-scheme paths (`skill://…`, `https://…`) are exempt.
319
+ * Selector-free reads key on the bare path; selector-carrying reads key on
320
+ * `path + "\u0000" + selector`, so two reads collide only when the newer is
321
+ * selector-free or the selectors are identical (the pass's prefix rule lets a
322
+ * bare-path read supersede selector-carrying reads of the same file).
323
+ */
324
+ export function readToolSupersedeKey(toolName: string, args: Record<string, unknown>): string | undefined {
325
+ if (toolName !== "read") return undefined;
326
+ const path = args.path;
327
+ if (typeof path !== "string" || path.length === 0) return undefined;
328
+ if (path.includes("://")) return undefined;
329
+ const { path: base, sel } = splitReadSelector(path);
330
+ return sel === undefined ? base : `${base}\u0000${sel}`;
331
+ }