@prometheus-ai/agent-core 0.5.3 → 0.5.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/types/agent-loop.d.ts +7 -0
- package/dist/types/agent.d.ts +41 -13
- package/dist/types/compaction/branch-summarization.d.ts +3 -3
- package/dist/types/compaction/compaction.d.ts +11 -9
- package/dist/types/compaction/messages.d.ts +14 -2
- package/dist/types/compaction/openai.d.ts +18 -3
- package/dist/types/compaction/pruning.d.ts +55 -0
- package/dist/types/compaction/shake.d.ts +3 -1
- package/dist/types/compaction/utils.d.ts +18 -2
- package/dist/types/proxy.d.ts +4 -3
- package/dist/types/telemetry.d.ts +59 -57
- package/dist/types/types.d.ts +60 -16
- package/package.json +6 -4
- package/src/agent-loop.ts +660 -181
- package/src/agent.ts +103 -30
- package/src/compaction/branch-summarization.ts +8 -7
- package/src/compaction/compaction.ts +69 -34
- package/src/compaction/messages.ts +78 -64
- package/src/compaction/openai.ts +88 -74
- package/src/compaction/prompts/branch-summary.md +1 -1
- package/src/compaction/prompts/compaction-summary-context.md +1 -1
- package/src/compaction/prompts/compaction-summary.md +2 -2
- package/src/compaction/prompts/compaction-update-summary.md +3 -3
- package/src/compaction/prompts/file-operations.md +3 -8
- package/src/compaction/prompts/summarization-system.md +1 -1
- package/src/compaction/pruning.ts +240 -8
- package/src/compaction/shake.ts +7 -3
- package/src/compaction/utils.ts +97 -19
- package/src/proxy.ts +13 -7
- package/src/telemetry.ts +126 -113
- package/src/types.ts +65 -16
package/src/compaction/openai.ts
CHANGED
|
@@ -12,20 +12,21 @@
|
|
|
12
12
|
* with `{ summary, shortSummary? }`.
|
|
13
13
|
*/
|
|
14
14
|
|
|
15
|
-
import {
|
|
16
|
-
CODEX_BASE_URL,
|
|
17
|
-
getCodexAccountId,
|
|
18
|
-
OPENAI_HEADER_VALUES,
|
|
19
|
-
OPENAI_HEADERS,
|
|
20
|
-
} from "@prometheus-ai/ai/providers/openai-codex/constants";
|
|
15
|
+
import { ProviderHttpError } from "@prometheus-ai/ai/errors";
|
|
21
16
|
import { parseTextSignature } from "@prometheus-ai/ai/providers/openai-responses-shared";
|
|
22
17
|
import { transformMessages } from "@prometheus-ai/ai/providers/transform-messages";
|
|
23
|
-
import type { AssistantMessage, Message, Model } from "@prometheus-ai/ai/types";
|
|
18
|
+
import type { AssistantMessage, FetchImpl, Message, Model } from "@prometheus-ai/ai/types";
|
|
24
19
|
import {
|
|
25
20
|
getOpenAIResponsesHistoryItems,
|
|
26
21
|
getOpenAIResponsesHistoryPayload,
|
|
27
22
|
normalizeResponsesToolCallId,
|
|
28
23
|
} from "@prometheus-ai/ai/utils";
|
|
24
|
+
import {
|
|
25
|
+
CODEX_BASE_URL,
|
|
26
|
+
getCodexAccountId,
|
|
27
|
+
OPENAI_HEADER_VALUES,
|
|
28
|
+
OPENAI_HEADERS,
|
|
29
|
+
} from "@prometheus-ai/catalog/wire/codex";
|
|
29
30
|
import { logger } from "@prometheus-ai/utils";
|
|
30
31
|
|
|
31
32
|
// ============================================================================
|
|
@@ -34,6 +35,23 @@ import { logger } from "@prometheus-ai/utils";
|
|
|
34
35
|
|
|
35
36
|
export const OPENAI_REMOTE_COMPACTION_PRESERVE_KEY = "openaiRemoteCompaction";
|
|
36
37
|
|
|
38
|
+
/**
|
|
39
|
+
* Hard ceiling on remote compaction HTTP requests. Unlike every provider
|
|
40
|
+
* stream (guarded by first-event/idle watchdogs in pi-ai), these are raw
|
|
41
|
+
* fetches awaiting one non-streamed JSON body — a connection silently dropped
|
|
42
|
+
* by a middlebox would otherwise hang the whole compaction pipeline forever
|
|
43
|
+
* (frozen "Auto context-full maintenance…" spinner, manual /compact queueing
|
|
44
|
+
* behind it). On timeout the caller falls back to local summarization.
|
|
45
|
+
*/
|
|
46
|
+
export const REMOTE_COMPACTION_TIMEOUT_MS = 180_000;
|
|
47
|
+
|
|
48
|
+
/** Race the caller's signal against the request timeout; `timeoutMs <= 0` disables the watchdog. */
|
|
49
|
+
function withRequestTimeout(signal: AbortSignal | undefined, timeoutMs: number): AbortSignal | undefined {
|
|
50
|
+
if (timeoutMs <= 0) return signal;
|
|
51
|
+
const timeout = AbortSignal.timeout(timeoutMs);
|
|
52
|
+
return signal ? AbortSignal.any([signal, timeout]) : timeout;
|
|
53
|
+
}
|
|
54
|
+
|
|
37
55
|
export type OpenAiRemoteCompactionItem = {
|
|
38
56
|
type: "compaction" | "compaction_summary";
|
|
39
57
|
encrypted_content?: string;
|
|
@@ -146,50 +164,14 @@ export function withOpenAiRemoteCompactionPreserveData(
|
|
|
146
164
|
// Input/output filtering for OpenAI compact endpoint
|
|
147
165
|
// ============================================================================
|
|
148
166
|
|
|
149
|
-
function estimateOpenAiCompactInputTokens(input: Array<Record<string, unknown>>, instructions: string): number {
|
|
150
|
-
let chars = instructions.length;
|
|
151
|
-
for (const item of input) {
|
|
152
|
-
chars += JSON.stringify(item).length;
|
|
153
|
-
}
|
|
154
|
-
return Math.ceil(chars / 4);
|
|
155
|
-
}
|
|
156
|
-
|
|
157
167
|
function shouldTrimOpenAiCompactInputItem(item: Record<string, unknown>): boolean {
|
|
158
168
|
return item.type === "function_call_output" || (item.type === "message" && item.role === "developer");
|
|
159
169
|
}
|
|
160
170
|
|
|
161
|
-
function shouldKeepOpenAiCompactOutputUserMessage(item: Record<string, unknown>): boolean {
|
|
162
|
-
if (item.role !== "user") return false;
|
|
163
|
-
const content = item.content;
|
|
164
|
-
if (!Array.isArray(content) || content.length === 0) return false;
|
|
165
|
-
const contextualFragmentPatterns = [
|
|
166
|
-
[/^<system-reminder>[\s\S]*<\/system-reminder>$/i, /<system-reminder>/i],
|
|
167
|
-
[/^#\s*AGENTS\.md instructions for\b[\s\S]*<\/INSTRUCTIONS>$/i, /# AGENTS.md instructions/],
|
|
168
|
-
[/^<environment-context>[\s\S]*<\/environment-context>$/i, /<environment-context>/i],
|
|
169
|
-
[/^<skill>[\s\S]*<\/skill>$/i, /<skill>/i],
|
|
170
|
-
[/^<user-shell-command>[\s\S]*<\/user-shell-command>$/i, /<user-shell-command>/i],
|
|
171
|
-
[/^<turn-aborted>[\s\S]*<\/turn-aborted>$/i, /<turn-aborted>/i],
|
|
172
|
-
[/^<subagent-notification>[\s\S]*<\/subagent-notification>$/i, /<subagent-notification>/i],
|
|
173
|
-
] as const;
|
|
174
|
-
return content.every(part => {
|
|
175
|
-
if (!part || typeof part !== "object") return false;
|
|
176
|
-
const candidate = part as { type?: unknown; text?: unknown };
|
|
177
|
-
if (candidate.type === "input_image") return true;
|
|
178
|
-
if (candidate.type !== "input_text" || typeof candidate.text !== "string") return false;
|
|
179
|
-
const trimmed = candidate.text.trim();
|
|
180
|
-
if (trimmed.length === 0) return false;
|
|
181
|
-
return !contextualFragmentPatterns.some(([strictPattern, markerPattern]) => {
|
|
182
|
-
return strictPattern.test(trimmed) || markerPattern.test(trimmed);
|
|
183
|
-
});
|
|
184
|
-
});
|
|
185
|
-
}
|
|
186
|
-
|
|
187
171
|
function shouldKeepOpenAiCompactOutputItem(item: Record<string, unknown>): boolean {
|
|
188
172
|
if (item.type === "compaction" || item.type === "compaction_summary") return true;
|
|
189
173
|
if (item.type !== "message") return false;
|
|
190
|
-
|
|
191
|
-
if (item.role === "assistant") return true;
|
|
192
|
-
return shouldKeepOpenAiCompactOutputUserMessage(item);
|
|
174
|
+
return item.role === "assistant" || item.role === "user";
|
|
193
175
|
}
|
|
194
176
|
|
|
195
177
|
function trimOpenAiCompactInput(
|
|
@@ -198,16 +180,29 @@ function trimOpenAiCompactInput(
|
|
|
198
180
|
instructions: string,
|
|
199
181
|
): Array<Record<string, unknown>> {
|
|
200
182
|
const trimmed = [...input];
|
|
201
|
-
|
|
183
|
+
// Per-item serialized sizes are cached and decremented on removal.
|
|
184
|
+
// Re-stringifying the whole input per popped item was O(N²) in total chars
|
|
185
|
+
// — hundreds of MB of stringify churn on a 200k-token codex history,
|
|
186
|
+
// blocking the event loop for seconds (same class as the addOpenAiCallIds
|
|
187
|
+
// fix above).
|
|
188
|
+
const sizes = trimmed.map(item => JSON.stringify(item).length);
|
|
189
|
+
let chars = instructions.length;
|
|
190
|
+
for (const size of sizes) chars += size;
|
|
191
|
+
const removeAt = (index: number): void => {
|
|
192
|
+
chars -= sizes[index] ?? 0;
|
|
193
|
+
trimmed.splice(index, 1);
|
|
194
|
+
sizes.splice(index, 1);
|
|
195
|
+
};
|
|
196
|
+
while (trimmed.length > 0 && Math.ceil(chars / 4) > contextWindow) {
|
|
202
197
|
const last = trimmed[trimmed.length - 1];
|
|
203
198
|
if (last?.type === "function_call_output" || last?.type === "custom_tool_call_output") {
|
|
204
199
|
const callId = typeof last.call_id === "string" ? last.call_id : undefined;
|
|
205
200
|
const callType = last.type === "custom_tool_call_output" ? "custom_tool_call" : "function_call";
|
|
206
|
-
trimmed.
|
|
201
|
+
removeAt(trimmed.length - 1);
|
|
207
202
|
if (callId) {
|
|
208
203
|
const matchingCallIndex = trimmed.findLastIndex(item => item.type === callType && item.call_id === callId);
|
|
209
204
|
if (matchingCallIndex >= 0) {
|
|
210
|
-
|
|
205
|
+
removeAt(matchingCallIndex);
|
|
211
206
|
}
|
|
212
207
|
}
|
|
213
208
|
continue;
|
|
@@ -215,29 +210,32 @@ function trimOpenAiCompactInput(
|
|
|
215
210
|
if (!last || !shouldTrimOpenAiCompactInputItem(last)) {
|
|
216
211
|
break;
|
|
217
212
|
}
|
|
218
|
-
trimmed.
|
|
213
|
+
removeAt(trimmed.length - 1);
|
|
219
214
|
}
|
|
220
215
|
return trimmed;
|
|
221
216
|
}
|
|
222
217
|
|
|
223
|
-
|
|
224
|
-
|
|
218
|
+
// Register every tool-call id in `items` (and the subset using the custom-tool
|
|
219
|
+
// wire shape) into the running sets. The history builder maintains both sets
|
|
220
|
+
// incrementally as native history is appended, so this only scans the
|
|
221
|
+
// newly-added items (or, after a full-snapshot replace, the fresh input) rather
|
|
222
|
+
// than re-scanning the whole growing history per message — the latter was
|
|
223
|
+
// O(N²) and blocked the event loop for seconds while compacting large codex
|
|
224
|
+
// contexts (frozen spinner until the next forced render).
|
|
225
|
+
function addOpenAiCallIds(
|
|
226
|
+
items: Array<Record<string, unknown>>,
|
|
227
|
+
knownCallIds: Set<string>,
|
|
228
|
+
customCallIds: Set<string>,
|
|
229
|
+
): void {
|
|
225
230
|
for (const item of items) {
|
|
226
|
-
if (
|
|
231
|
+
if (typeof item.call_id !== "string") continue;
|
|
232
|
+
if (item.type === "function_call") {
|
|
233
|
+
knownCallIds.add(item.call_id);
|
|
234
|
+
} else if (item.type === "custom_tool_call") {
|
|
227
235
|
knownCallIds.add(item.call_id);
|
|
228
|
-
}
|
|
229
|
-
}
|
|
230
|
-
return knownCallIds;
|
|
231
|
-
}
|
|
232
|
-
|
|
233
|
-
function collectCustomOpenAiCallIds(items: Array<Record<string, unknown>>): Set<string> {
|
|
234
|
-
const customCallIds = new Set<string>();
|
|
235
|
-
for (const item of items) {
|
|
236
|
-
if (item.type === "custom_tool_call" && typeof item.call_id === "string") {
|
|
237
236
|
customCallIds.add(item.call_id);
|
|
238
237
|
}
|
|
239
238
|
}
|
|
240
|
-
return customCallIds;
|
|
241
239
|
}
|
|
242
240
|
|
|
243
241
|
// ============================================================================
|
|
@@ -265,16 +263,16 @@ export function buildOpenAiNativeHistory(
|
|
|
265
263
|
const transformedMessages = transformMessages(messages, model, id => normalizeOpenAiCompactionToolCallId(id));
|
|
266
264
|
|
|
267
265
|
let msgIndex = 0;
|
|
268
|
-
|
|
269
|
-
|
|
266
|
+
const knownCallIds = new Set<string>();
|
|
267
|
+
const customCallIds = new Set<string>();
|
|
268
|
+
addOpenAiCallIds(input, knownCallIds, customCallIds);
|
|
270
269
|
for (const message of transformedMessages) {
|
|
271
270
|
if (message.role === "user" || message.role === "developer") {
|
|
272
271
|
const providerPayload = (message as { providerPayload?: AssistantMessage["providerPayload"] }).providerPayload;
|
|
273
272
|
const historyItems = getOpenAIResponsesHistoryItems(providerPayload, model.provider);
|
|
274
273
|
if (historyItems) {
|
|
275
274
|
input.push(...historyItems);
|
|
276
|
-
knownCallIds
|
|
277
|
-
customCallIds = collectCustomOpenAiCallIds(input);
|
|
275
|
+
addOpenAiCallIds(historyItems, knownCallIds, customCallIds);
|
|
278
276
|
msgIndex++;
|
|
279
277
|
continue;
|
|
280
278
|
}
|
|
@@ -317,11 +315,13 @@ export function buildOpenAiNativeHistory(
|
|
|
317
315
|
if (providerPayload) {
|
|
318
316
|
if (providerPayload.dt) {
|
|
319
317
|
input.push(...providerPayload.items);
|
|
318
|
+
addOpenAiCallIds(providerPayload.items, knownCallIds, customCallIds);
|
|
320
319
|
} else {
|
|
321
320
|
input.splice(0, input.length, ...providerPayload.items);
|
|
321
|
+
knownCallIds.clear();
|
|
322
|
+
customCallIds.clear();
|
|
323
|
+
addOpenAiCallIds(input, knownCallIds, customCallIds);
|
|
322
324
|
}
|
|
323
|
-
knownCallIds = collectKnownOpenAiCallIds(input);
|
|
324
|
-
customCallIds = collectCustomOpenAiCallIds(input);
|
|
325
325
|
msgIndex++;
|
|
326
326
|
continue;
|
|
327
327
|
}
|
|
@@ -451,11 +451,12 @@ export async function requestOpenAiRemoteCompaction(
|
|
|
451
451
|
compactInput: Array<Record<string, unknown>>,
|
|
452
452
|
instructions: string,
|
|
453
453
|
signal?: AbortSignal,
|
|
454
|
+
opts?: { fetch?: FetchImpl; timeoutMs?: number },
|
|
454
455
|
): Promise<OpenAiRemoteCompactionResponse> {
|
|
455
456
|
const endpoint = resolveOpenAiCompactEndpoint(model);
|
|
456
457
|
const request: OpenAiRemoteCompactionRequest = {
|
|
457
458
|
model: model.id,
|
|
458
|
-
input: trimOpenAiCompactInput(compactInput, model.contextWindow, instructions),
|
|
459
|
+
input: trimOpenAiCompactInput(compactInput, model.contextWindow ?? Number.POSITIVE_INFINITY, instructions),
|
|
459
460
|
instructions,
|
|
460
461
|
};
|
|
461
462
|
const headers: Record<string, string> = {
|
|
@@ -474,11 +475,11 @@ export async function requestOpenAiRemoteCompaction(
|
|
|
474
475
|
headers[OPENAI_HEADERS.ORIGINATOR] = OPENAI_HEADER_VALUES.ORIGINATOR_CODEX;
|
|
475
476
|
}
|
|
476
477
|
|
|
477
|
-
const response = await fetch(endpoint, {
|
|
478
|
+
const response = await (opts?.fetch ?? fetch)(endpoint, {
|
|
478
479
|
method: "POST",
|
|
479
480
|
headers,
|
|
480
481
|
body: JSON.stringify(request),
|
|
481
|
-
signal,
|
|
482
|
+
signal: withRequestTimeout(signal, opts?.timeoutMs ?? REMOTE_COMPACTION_TIMEOUT_MS),
|
|
482
483
|
});
|
|
483
484
|
|
|
484
485
|
if (!response.ok) {
|
|
@@ -489,7 +490,13 @@ export async function requestOpenAiRemoteCompaction(
|
|
|
489
490
|
statusText: response.statusText,
|
|
490
491
|
errorText,
|
|
491
492
|
});
|
|
492
|
-
throw new
|
|
493
|
+
throw new ProviderHttpError(
|
|
494
|
+
`Remote compaction failed (${response.status} ${response.statusText})`,
|
|
495
|
+
response.status,
|
|
496
|
+
{
|
|
497
|
+
headers: response.headers,
|
|
498
|
+
},
|
|
499
|
+
);
|
|
493
500
|
}
|
|
494
501
|
|
|
495
502
|
const data = (await response.json()) as { output?: unknown[] } | undefined;
|
|
@@ -524,12 +531,13 @@ export async function requestRemoteCompaction(
|
|
|
524
531
|
endpoint: string,
|
|
525
532
|
request: RemoteCompactionRequest,
|
|
526
533
|
signal?: AbortSignal,
|
|
534
|
+
opts?: { fetch?: FetchImpl; timeoutMs?: number },
|
|
527
535
|
): Promise<RemoteCompactionResponse> {
|
|
528
|
-
const response = await fetch(endpoint, {
|
|
536
|
+
const response = await (opts?.fetch ?? fetch)(endpoint, {
|
|
529
537
|
method: "POST",
|
|
530
538
|
headers: { "content-type": "application/json" },
|
|
531
539
|
body: JSON.stringify(request),
|
|
532
|
-
signal,
|
|
540
|
+
signal: withRequestTimeout(signal, opts?.timeoutMs ?? REMOTE_COMPACTION_TIMEOUT_MS),
|
|
533
541
|
});
|
|
534
542
|
|
|
535
543
|
if (!response.ok) {
|
|
@@ -540,7 +548,13 @@ export async function requestRemoteCompaction(
|
|
|
540
548
|
statusText: response.statusText,
|
|
541
549
|
errorText,
|
|
542
550
|
});
|
|
543
|
-
throw new
|
|
551
|
+
throw new ProviderHttpError(
|
|
552
|
+
`Remote compaction failed (${response.status} ${response.statusText})`,
|
|
553
|
+
response.status,
|
|
554
|
+
{
|
|
555
|
+
headers: response.headers,
|
|
556
|
+
},
|
|
557
|
+
);
|
|
544
558
|
}
|
|
545
559
|
|
|
546
560
|
const data = (await response.json()) as RemoteCompactionResponse | undefined;
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that
|
|
1
|
+
Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that model used. You MUST build on the work already done and NEVER duplicate it. Here is that summary:
|
|
2
2
|
|
|
3
3
|
<summary>
|
|
4
4
|
{{summary}}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
You MUST summarize the conversation above into a structured
|
|
1
|
+
You MUST summarize the conversation above into a structured handoff summary for another LLM to resume the task.
|
|
2
2
|
|
|
3
|
-
IMPORTANT: If conversation ends with unanswered question
|
|
3
|
+
IMPORTANT: If the conversation ends with an unanswered question or a request awaiting user response (e.g., "Please run command and paste output"), you MUST preserve that exact question/request.
|
|
4
4
|
|
|
5
5
|
You MUST use this format (sections can be omitted if not applicable):
|
|
6
6
|
|
|
@@ -1,13 +1,13 @@
|
|
|
1
|
-
You MUST incorporate new messages above into the existing handoff summary in <previous-summary> tags, used by another LLM to resume task.
|
|
1
|
+
You MUST incorporate the new messages above into the existing handoff summary in <previous-summary> tags, used by another LLM to resume the task.
|
|
2
2
|
RULES:
|
|
3
|
-
- MUST preserve all information from previous summary
|
|
3
|
+
- MUST preserve all information from the previous summary
|
|
4
4
|
- MUST add new progress, decisions, and context from new messages
|
|
5
5
|
- MUST update Progress: move items from "In Progress" to "Done" when completed
|
|
6
6
|
- MUST update "Next Steps" based on what was accomplished
|
|
7
7
|
- MUST preserve exact file paths, function names, and error messages
|
|
8
8
|
- You MAY remove anything no longer relevant
|
|
9
9
|
|
|
10
|
-
IMPORTANT: If new messages end with unanswered question or request to user, you MUST add it to Critical Context (replacing any previous pending question if answered).
|
|
10
|
+
IMPORTANT: If the new messages end with an unanswered question or request to the user, you MUST add it to Critical Context (replacing any previous pending question if answered).
|
|
11
11
|
|
|
12
12
|
You MUST use this format (omit sections if not applicable):
|
|
13
13
|
|
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
Summarize conversations between users and AI coding assistants. Produce structured summaries in the exact specified format.
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
NEVER continue the conversation. NEVER respond to questions in it. Output ONLY the structured summary.
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
5
|
import type { ToolResultMessage } from "@prometheus-ai/ai";
|
|
6
|
-
import type { AgentMessage } from "../types";
|
|
6
|
+
import type { AgentMessage, AgentToolCall } from "../types";
|
|
7
7
|
import { estimateTokens } from "./compaction";
|
|
8
8
|
import type { SessionEntry, SessionMessageEntry } from "./entries";
|
|
9
9
|
import {
|
|
@@ -12,6 +12,7 @@ import {
|
|
|
12
12
|
isSkillReadToolResult,
|
|
13
13
|
type ProtectedToolMatcher,
|
|
14
14
|
} from "./tool-protection";
|
|
15
|
+
import { splitReadSelector } from "./utils";
|
|
15
16
|
|
|
16
17
|
export interface PruneConfig {
|
|
17
18
|
/** Keep the most recent tool output tokens intact. */
|
|
@@ -20,12 +21,22 @@ export interface PruneConfig {
|
|
|
20
21
|
minimumSavings: number;
|
|
21
22
|
/** Tool-result protection matchers. String entries protect every result from that tool; predicates may inspect the paired tool call. */
|
|
22
23
|
protectedTools: ProtectedToolMatcher[];
|
|
24
|
+
/**
|
|
25
|
+
* Optional supersede key function (see {@link SupersedePruneConfig.supersedeKey}).
|
|
26
|
+
* When provided, superseded tool results are pruned first — even inside the
|
|
27
|
+
* `protectTokens` window — before age-based victims. Absent, behavior is
|
|
28
|
+
* unchanged.
|
|
29
|
+
*/
|
|
30
|
+
supersedeKey?: SupersedeKeyFn;
|
|
31
|
+
/** Useless-flagged results bypass the protect window (see {@link USELESS_NOTICE}). Default true. */
|
|
32
|
+
pruneUseless?: boolean;
|
|
23
33
|
}
|
|
24
34
|
|
|
25
35
|
export const DEFAULT_PRUNE_CONFIG: PruneConfig = {
|
|
26
36
|
protectTokens: 40_000,
|
|
27
37
|
minimumSavings: 20_000,
|
|
28
38
|
protectedTools: ["skill", isSkillReadToolResult],
|
|
39
|
+
pruneUseless: true,
|
|
29
40
|
};
|
|
30
41
|
|
|
31
42
|
export interface PruneResult {
|
|
@@ -33,6 +44,39 @@ export interface PruneResult {
|
|
|
33
44
|
tokensSaved: number;
|
|
34
45
|
}
|
|
35
46
|
|
|
47
|
+
/** Exact placeholder written over a superseded tool result. */
|
|
48
|
+
export const SUPERSEDED_NOTICE = "[Superseded by a newer read of this file]";
|
|
49
|
+
|
|
50
|
+
/** Exact placeholder written over an elided useless tool result. */
|
|
51
|
+
export const USELESS_NOTICE = "[Uneventful result elided]";
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Maps a tool call to a supersede key. Results sharing a key form a group in
|
|
55
|
+
* which every result except the newest is a supersede candidate. A key `K`
|
|
56
|
+
* additionally supersedes keys with prefix `K + "\u0000"` (selector-free read
|
|
57
|
+
* supersedes selector-carrying reads of the same base path). Return
|
|
58
|
+
* `undefined` to exempt a call from supersede grouping.
|
|
59
|
+
*/
|
|
60
|
+
export type SupersedeKeyFn = (toolName: string, args: Record<string, unknown>) => string | undefined;
|
|
61
|
+
|
|
62
|
+
export interface SupersedePruneConfig {
|
|
63
|
+
/** Supersede key function; results sharing a key supersede older ones. */
|
|
64
|
+
supersedeKey?: SupersedeKeyFn;
|
|
65
|
+
/** Also prune results flagged useless by their tool. Default false. */
|
|
66
|
+
pruneUseless?: boolean;
|
|
67
|
+
/** Prune a candidate now when all messages after it total at most this many estimated tokens. Default 8 000. */
|
|
68
|
+
suffixTokenLimit?: number;
|
|
69
|
+
/** Prune all candidates when the last message is at least this old (prompt cache is cold anyway). Default 30 min. */
|
|
70
|
+
idleFlushMs?: number;
|
|
71
|
+
/** Clock override for tests. */
|
|
72
|
+
now?: number;
|
|
73
|
+
/** Tool-result protection matchers (same contract as {@link PruneConfig.protectedTools}). */
|
|
74
|
+
protectedTools: ProtectedToolMatcher[];
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
const DEFAULT_SUFFIX_TOKEN_LIMIT = 8_000;
|
|
78
|
+
const DEFAULT_IDLE_FLUSH_MS = 30 * 60_000;
|
|
79
|
+
|
|
36
80
|
function createPrunedNotice(tokens: number): string {
|
|
37
81
|
return `[Output truncated - ${tokens} tokens]`;
|
|
38
82
|
}
|
|
@@ -44,18 +88,169 @@ function getToolResultMessage(entry: SessionEntry): ToolResultMessage | undefine
|
|
|
44
88
|
return message as ToolResultMessage;
|
|
45
89
|
}
|
|
46
90
|
|
|
47
|
-
function estimatePrunedSavings(tokens: number): number {
|
|
48
|
-
const noticeTokens = Math.ceil(
|
|
91
|
+
function estimatePrunedSavings(tokens: number, notice: string): number {
|
|
92
|
+
const noticeTokens = Math.ceil(notice.length / 4);
|
|
49
93
|
return Math.max(0, tokens - noticeTokens);
|
|
50
94
|
}
|
|
51
95
|
|
|
96
|
+
interface SupersedeCandidate {
|
|
97
|
+
entry: SessionMessageEntry;
|
|
98
|
+
message: ToolResultMessage;
|
|
99
|
+
/** Index of the entry within the `entries` array. */
|
|
100
|
+
index: number;
|
|
101
|
+
tokens: number;
|
|
102
|
+
/** Placeholder text written over the blanked result. */
|
|
103
|
+
notice: string;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Collect superseded tool results: for every unpruned, unprotected tool result
|
|
108
|
+
* whose paired call resolves a supersede key, a LATER result with the same key
|
|
109
|
+
* — or with a key that is the `"\u0000"`-prefix parent of this one — marks it
|
|
110
|
+
* superseded. Returned in message order.
|
|
111
|
+
*/
|
|
112
|
+
function collectSupersededResults(
|
|
113
|
+
entries: readonly SessionEntry[],
|
|
114
|
+
toolCallsById: ReadonlyMap<string, AgentToolCall>,
|
|
115
|
+
supersedeKey: SupersedeKeyFn,
|
|
116
|
+
protectedTools: readonly ProtectedToolMatcher[],
|
|
117
|
+
): SupersedeCandidate[] {
|
|
118
|
+
const candidates: SupersedeCandidate[] = [];
|
|
119
|
+
const seenKeys = new Set<string>();
|
|
120
|
+
for (let i = entries.length - 1; i >= 0; i--) {
|
|
121
|
+
const entry = entries[i];
|
|
122
|
+
const message = getToolResultMessage(entry);
|
|
123
|
+
if (!message || message.prunedAt !== undefined) continue;
|
|
124
|
+
const toolCall = toolCallsById.get(message.toolCallId);
|
|
125
|
+
if (!toolCall) continue;
|
|
126
|
+
if (isProtectedToolResult(message, toolCall, protectedTools)) continue;
|
|
127
|
+
const key = supersedeKey(toolCall.name, toolCall.arguments as Record<string, unknown>);
|
|
128
|
+
if (key === undefined) continue;
|
|
129
|
+
const separator = key.indexOf("\u0000");
|
|
130
|
+
const superseded = seenKeys.has(key) || (separator >= 0 && seenKeys.has(key.slice(0, separator)));
|
|
131
|
+
seenKeys.add(key);
|
|
132
|
+
if (!superseded) continue;
|
|
133
|
+
candidates.push({
|
|
134
|
+
entry: entry as SessionMessageEntry,
|
|
135
|
+
message,
|
|
136
|
+
index: i,
|
|
137
|
+
tokens: estimateTokens(message as AgentMessage),
|
|
138
|
+
notice: SUPERSEDED_NOTICE,
|
|
139
|
+
});
|
|
140
|
+
}
|
|
141
|
+
return candidates.reverse();
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
/**
|
|
145
|
+
* Collect tool results their tool flagged contextually useless (zero matches,
|
|
146
|
+
* elapsed wait): unpruned, non-error, unprotected, not in `exclude`, and large
|
|
147
|
+
* enough that blanking to {@link USELESS_NOTICE} actually saves tokens.
|
|
148
|
+
* Returned in message order.
|
|
149
|
+
*/
|
|
150
|
+
function collectUselessResults(
|
|
151
|
+
entries: readonly SessionEntry[],
|
|
152
|
+
toolCallsById: ReadonlyMap<string, AgentToolCall>,
|
|
153
|
+
protectedTools: readonly ProtectedToolMatcher[],
|
|
154
|
+
exclude: ReadonlySet<ToolResultMessage>,
|
|
155
|
+
): SupersedeCandidate[] {
|
|
156
|
+
const candidates: SupersedeCandidate[] = [];
|
|
157
|
+
for (let i = 0; i < entries.length; i++) {
|
|
158
|
+
const entry = entries[i];
|
|
159
|
+
const message = getToolResultMessage(entry);
|
|
160
|
+
if (message?.useless !== true || message.prunedAt !== undefined || message.isError === true) continue;
|
|
161
|
+
if (exclude.has(message)) continue;
|
|
162
|
+
if (isProtectedToolResult(message, toolCallsById.get(message.toolCallId), protectedTools)) continue;
|
|
163
|
+
const tokens = estimateTokens(message as AgentMessage);
|
|
164
|
+
if (estimatePrunedSavings(tokens, USELESS_NOTICE) <= 0) continue;
|
|
165
|
+
candidates.push({ entry: entry as SessionMessageEntry, message, index: i, tokens, notice: USELESS_NOTICE });
|
|
166
|
+
}
|
|
167
|
+
return candidates;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
/**
|
|
171
|
+
* Prune superseded tool results (e.g. stale `read` outputs replaced by a newer
|
|
172
|
+
* read of the same file) and, when `pruneUseless` is set, results their tool
|
|
173
|
+
* flagged contextually useless. Cheap, incremental, and prompt-cache-aware: a
|
|
174
|
+
* candidate is pruned now only when the suffix after it is small (tail case —
|
|
175
|
+
* the read→edit→read loop) or when the context has been idle long enough that
|
|
176
|
+
* the provider cache is cold anyway (then ALL candidates flush).
|
|
177
|
+
*/
|
|
178
|
+
export function pruneSupersededToolResults(entries: SessionEntry[], config: SupersedePruneConfig): PruneResult {
|
|
179
|
+
const toolCallsById = collectToolCallsById(entries);
|
|
180
|
+
const candidates = config.supersedeKey
|
|
181
|
+
? collectSupersededResults(entries, toolCallsById, config.supersedeKey, config.protectedTools)
|
|
182
|
+
: [];
|
|
183
|
+
if (config.pruneUseless) {
|
|
184
|
+
const exclude = new Set(candidates.map(candidate => candidate.message));
|
|
185
|
+
candidates.push(...collectUselessResults(entries, toolCallsById, config.protectedTools, exclude));
|
|
186
|
+
candidates.sort((a, b) => a.index - b.index);
|
|
187
|
+
}
|
|
188
|
+
if (candidates.length === 0) return { prunedCount: 0, tokensSaved: 0 };
|
|
189
|
+
|
|
190
|
+
const now = config.now ?? Date.now();
|
|
191
|
+
let lastMessageTimestamp: number | undefined;
|
|
192
|
+
for (let i = entries.length - 1; i >= 0; i--) {
|
|
193
|
+
const entry = entries[i];
|
|
194
|
+
if (entry.type !== "message") continue;
|
|
195
|
+
const timestamp = (entry.message as AgentMessage).timestamp;
|
|
196
|
+
if (typeof timestamp === "number") lastMessageTimestamp = timestamp;
|
|
197
|
+
break;
|
|
198
|
+
}
|
|
199
|
+
const idle =
|
|
200
|
+
lastMessageTimestamp !== undefined && now - lastMessageTimestamp >= (config.idleFlushMs ?? DEFAULT_IDLE_FLUSH_MS);
|
|
201
|
+
|
|
202
|
+
let toPrune: SupersedeCandidate[];
|
|
203
|
+
if (idle) {
|
|
204
|
+
toPrune = candidates;
|
|
205
|
+
} else {
|
|
206
|
+
const suffixTokenLimit = config.suffixTokenLimit ?? DEFAULT_SUFFIX_TOKEN_LIMIT;
|
|
207
|
+
// suffixTokens[i] = estimated tokens of all messages strictly after entry i.
|
|
208
|
+
const suffixTokens = new Array<number>(entries.length);
|
|
209
|
+
let accumulated = 0;
|
|
210
|
+
for (let i = entries.length - 1; i >= 0; i--) {
|
|
211
|
+
suffixTokens[i] = accumulated;
|
|
212
|
+
const entry = entries[i];
|
|
213
|
+
if (entry.type === "message") accumulated += estimateTokens(entry.message as AgentMessage);
|
|
214
|
+
}
|
|
215
|
+
toPrune = candidates.filter(candidate => suffixTokens[candidate.index] <= suffixTokenLimit);
|
|
216
|
+
}
|
|
217
|
+
if (toPrune.length === 0) return { prunedCount: 0, tokensSaved: 0 };
|
|
218
|
+
|
|
219
|
+
const prunedAt = Date.now();
|
|
220
|
+
let tokensSaved = 0;
|
|
221
|
+
for (const candidate of toPrune) {
|
|
222
|
+
candidate.message.content = [{ type: "text", text: candidate.notice }];
|
|
223
|
+
candidate.message.prunedAt = prunedAt;
|
|
224
|
+
tokensSaved += estimatePrunedSavings(candidate.tokens, candidate.notice);
|
|
225
|
+
}
|
|
226
|
+
return { prunedCount: toPrune.length, tokensSaved };
|
|
227
|
+
}
|
|
228
|
+
|
|
52
229
|
export function pruneToolOutputs(entries: SessionEntry[], config: PruneConfig = DEFAULT_PRUNE_CONFIG): PruneResult {
|
|
53
230
|
let accumulatedTokens = 0;
|
|
54
231
|
let tokensSaved = 0;
|
|
55
232
|
let prunedCount = 0;
|
|
56
233
|
|
|
57
|
-
const candidates: Array<{ entry: SessionMessageEntry; tokens: number }> = [];
|
|
234
|
+
const candidates: Array<{ entry: SessionMessageEntry; tokens: number; superseded: boolean; useless: boolean }> = [];
|
|
58
235
|
const toolCallsById = collectToolCallsById(entries);
|
|
236
|
+
const supersededMessages = config.supersedeKey
|
|
237
|
+
? new Set(
|
|
238
|
+
collectSupersededResults(entries, toolCallsById, config.supersedeKey, config.protectedTools).map(
|
|
239
|
+
candidate => candidate.message,
|
|
240
|
+
),
|
|
241
|
+
)
|
|
242
|
+
: undefined;
|
|
243
|
+
const uselessMessages =
|
|
244
|
+
config.pruneUseless !== false
|
|
245
|
+
? new Set(
|
|
246
|
+
collectUselessResults(
|
|
247
|
+
entries,
|
|
248
|
+
toolCallsById,
|
|
249
|
+
config.protectedTools,
|
|
250
|
+
supersededMessages ?? new Set(),
|
|
251
|
+
).map(candidate => candidate.message),
|
|
252
|
+
)
|
|
253
|
+
: undefined;
|
|
59
254
|
|
|
60
255
|
for (let i = entries.length - 1; i >= 0; i--) {
|
|
61
256
|
const entry = entries[i];
|
|
@@ -70,17 +265,30 @@ export function pruneToolOutputs(entries: SessionEntry[], config: PruneConfig =
|
|
|
70
265
|
continue;
|
|
71
266
|
}
|
|
72
267
|
|
|
73
|
-
|
|
268
|
+
// Superseded and useless results are pruned first: they bypass the
|
|
269
|
+
// protect window (a stale copy of re-read content — or a result the
|
|
270
|
+
// tool itself flagged as carrying no information — is dead weight at
|
|
271
|
+
// any age).
|
|
272
|
+
const superseded = supersededMessages?.has(message) ?? false;
|
|
273
|
+
const useless = uselessMessages?.has(message) ?? false;
|
|
274
|
+
if (!superseded && !useless && (accumulatedTokens < config.protectTokens || isProtected)) {
|
|
74
275
|
accumulatedTokens += tokens;
|
|
75
276
|
continue;
|
|
76
277
|
}
|
|
77
278
|
|
|
78
|
-
candidates.push({ entry: entry as SessionMessageEntry, tokens });
|
|
279
|
+
candidates.push({ entry: entry as SessionMessageEntry, tokens, superseded, useless });
|
|
79
280
|
accumulatedTokens += tokens;
|
|
80
281
|
}
|
|
81
282
|
|
|
82
283
|
for (const candidate of candidates) {
|
|
83
|
-
tokensSaved += estimatePrunedSavings(
|
|
284
|
+
tokensSaved += estimatePrunedSavings(
|
|
285
|
+
candidate.tokens,
|
|
286
|
+
candidate.superseded
|
|
287
|
+
? SUPERSEDED_NOTICE
|
|
288
|
+
: candidate.useless
|
|
289
|
+
? USELESS_NOTICE
|
|
290
|
+
: createPrunedNotice(candidate.tokens),
|
|
291
|
+
);
|
|
84
292
|
}
|
|
85
293
|
|
|
86
294
|
if (tokensSaved < config.minimumSavings || candidates.length === 0) {
|
|
@@ -90,10 +298,34 @@ export function pruneToolOutputs(entries: SessionEntry[], config: PruneConfig =
|
|
|
90
298
|
const prunedAt = Date.now();
|
|
91
299
|
for (const candidate of candidates) {
|
|
92
300
|
const message = candidate.entry.message as ToolResultMessage;
|
|
93
|
-
|
|
301
|
+
const notice = candidate.superseded
|
|
302
|
+
? SUPERSEDED_NOTICE
|
|
303
|
+
: candidate.useless
|
|
304
|
+
? USELESS_NOTICE
|
|
305
|
+
: createPrunedNotice(candidate.tokens);
|
|
306
|
+
message.content = [{ type: "text", text: notice }];
|
|
94
307
|
message.prunedAt = prunedAt;
|
|
95
308
|
prunedCount++;
|
|
96
309
|
}
|
|
97
310
|
|
|
98
311
|
return { prunedCount, tokensSaved };
|
|
99
312
|
}
|
|
313
|
+
|
|
314
|
+
/**
|
|
315
|
+
* Supersede key for the `read` tool: the file path with the trailing line/raw
|
|
316
|
+
* selector stripped (the read tool's own splitter grammar via
|
|
317
|
+
* {@link splitReadSelector}, e.g. `src/foo.ts:50-200`, `:2-4:raw`).
|
|
318
|
+
* Internal/URL-scheme paths (`skill://…`, `https://…`) are exempt.
|
|
319
|
+
* Selector-free reads key on the bare path; selector-carrying reads key on
|
|
320
|
+
* `path + "\u0000" + selector`, so two reads collide only when the newer is
|
|
321
|
+
* selector-free or the selectors are identical (the pass's prefix rule lets a
|
|
322
|
+
* bare-path read supersede selector-carrying reads of the same file).
|
|
323
|
+
*/
|
|
324
|
+
export function readToolSupersedeKey(toolName: string, args: Record<string, unknown>): string | undefined {
|
|
325
|
+
if (toolName !== "read") return undefined;
|
|
326
|
+
const path = args.path;
|
|
327
|
+
if (typeof path !== "string" || path.length === 0) return undefined;
|
|
328
|
+
if (path.includes("://")) return undefined;
|
|
329
|
+
const { path: base, sel } = splitReadSelector(path);
|
|
330
|
+
return sel === undefined ? base : `${base}\u0000${sel}`;
|
|
331
|
+
}
|