@opengeni/runtime 0.2.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-2PO56VAL.js → chunk-D5KU3QUC.js} +240 -23
- package/dist/chunk-D5KU3QUC.js.map +1 -0
- package/dist/index.d.ts +106 -178
- package/dist/index.js +427 -161
- package/dist/index.js.map +1 -1
- package/dist/sandbox/index.d.ts +54 -6
- package/dist/sandbox/index.js +11 -1
- package/package.json +3 -3
- package/src/context-compaction.ts +217 -348
- package/src/image-history.ts +149 -0
- package/src/index.ts +195 -38
- package/src/sandbox/display-stack.ts +96 -12
- package/src/sandbox/index.ts +72 -12
- package/src/sandbox/providers/modal.ts +225 -0
- package/src/sandbox/routing/routing-session.ts +2 -2
- package/src/sandbox/selfhosted/session.ts +21 -5
- package/src/sandbox-computer.ts +88 -26
- package/dist/chunk-2PO56VAL.js.map +0 -1
|
@@ -1,60 +1,47 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Client-side conversation context compaction (the Azure path).
|
|
2
|
+
* Client-side conversation context compaction (the Azure/client path).
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
10
|
-
* This module is the Azure-safe replacement. It is built from two pure pieces
|
|
11
|
-
* plus one impure step the caller wires in:
|
|
12
|
-
*
|
|
13
|
-
* 1. `planCompaction` — given the active history items, the last turn's actual
|
|
14
|
-
* input-token count, and the token budget, decide WHETHER to compact and,
|
|
15
|
-
* if so, WHERE the orphan-safe cut boundary is (the prefix to summarize vs
|
|
16
|
-
* the recent tail to keep verbatim). Pure, exhaustively testable.
|
|
17
|
-
* 2. (caller) summarize the prefix into ONE plain user `message` item via a
|
|
18
|
-
* model call — see `buildCompactionMessages` / `SUMMARY_PREFIX`.
|
|
19
|
-
* 3. `applyCompaction` shape — the storage write the caller performs:
|
|
20
|
-
* supersede the prefix rows, insert the summary at the boundary position.
|
|
21
|
-
*
|
|
22
|
-
* Design constraints (non-negotiable):
|
|
23
|
-
* - The summary is a PLAIN user message, NOT the SDK `compaction` item type
|
|
24
|
-
* (that requires server-minted `encrypted_content`; a hand-rolled one risks
|
|
25
|
-
* an Azure 400).
|
|
26
|
-
* - ORPHAN SAFETY: the cut lands only at a clean turn boundary (start of a
|
|
27
|
-
* user message). No tool call_id may straddle the cut — for every
|
|
28
|
-
* `function_call` dropped, its `function_call_result` is also dropped, and
|
|
29
|
-
* vice versa. Reasoning items drop/keep with their whole turn.
|
|
30
|
-
* - SINGLE LIVE SUMMARY: each compaction folds the prior summary forward
|
|
31
|
-
* (summarize [prior summary] + [items since]); prior summaries are excluded
|
|
32
|
-
* from re-collection so drift stays bounded.
|
|
4
|
+
* This mirrors Codex CLI's compaction model: the checkpoint model sees the
|
|
5
|
+
* current active history plus one fixed checkpoint prompt, then the active
|
|
6
|
+
* history is rebuilt as all real user messages plus one summary message.
|
|
7
|
+
* Assistant messages, tool calls/results, reasoning, and images are removed
|
|
8
|
+
* from the active model-facing history; the database audit rows remain.
|
|
33
9
|
*/
|
|
34
10
|
|
|
35
11
|
export type CompactionItem = Record<string, unknown>;
|
|
36
12
|
|
|
37
13
|
/**
|
|
38
|
-
* Marker stored on the synthetic summary item so
|
|
39
|
-
* next
|
|
40
|
-
* lives in the item JSON, not a DB column, so it survives verbatim replay.
|
|
14
|
+
* Marker stored on the synthetic summary item so the UI can render it and the
|
|
15
|
+
* next rebuild can exclude old summaries from the retained user-message set.
|
|
41
16
|
*/
|
|
42
17
|
export const COMPACTION_SUMMARY_MARKER = "opengeni_context_summary";
|
|
43
18
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
export const
|
|
51
|
-
"
|
|
52
|
-
"
|
|
53
|
-
"
|
|
19
|
+
export const SUMMARY_BUFFER_TOKENS = 20_000;
|
|
20
|
+
export const COMPACT_USER_MESSAGE_MAX_TOKENS = 20_000;
|
|
21
|
+
export const CLIENT_COMPACTION_TRIGGER_FRACTION = 0.9;
|
|
22
|
+
|
|
23
|
+
// Verbatim from Codex CLI:
|
|
24
|
+
// codex-rs/prompts/templates/compact/prompt.md
|
|
25
|
+
export const COMPACTION_PROMPT = [
|
|
26
|
+
"You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task.",
|
|
27
|
+
"",
|
|
28
|
+
"Include:",
|
|
29
|
+
"- Current progress and key decisions made",
|
|
30
|
+
"- Important context, constraints, or user preferences",
|
|
31
|
+
"- What remains to be done (clear next steps)",
|
|
32
|
+
"- Any critical data, examples, or references needed to continue",
|
|
54
33
|
"",
|
|
55
|
-
"
|
|
34
|
+
"Be concise, structured, and focused on helping the next LLM seamlessly continue the work.",
|
|
56
35
|
].join("\n");
|
|
57
36
|
|
|
37
|
+
// Verbatim from Codex CLI:
|
|
38
|
+
// codex-rs/prompts/templates/compact/summary_prefix.md
|
|
39
|
+
export const SUMMARY_PREFIX =
|
|
40
|
+
"Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:";
|
|
41
|
+
|
|
42
|
+
export const USER_MESSAGE_TRUNCATION_MARKER =
|
|
43
|
+
"\n[... middle truncated for context compaction ...]\n";
|
|
44
|
+
|
|
58
45
|
const RESULT_TYPE_BY_CALL_TYPE: Record<string, string> = {
|
|
59
46
|
function_call: "function_call_result",
|
|
60
47
|
computer_call: "computer_call_result",
|
|
@@ -87,15 +74,14 @@ export function isUserMessage(item: unknown): boolean {
|
|
|
87
74
|
/** True for our synthetic compaction summary item. */
|
|
88
75
|
export function isCompactionSummary(item: unknown): boolean {
|
|
89
76
|
return (
|
|
90
|
-
isUserMessage(item)
|
|
91
|
-
(item as Record<string, unknown>)[COMPACTION_SUMMARY_MARKER] === true
|
|
77
|
+
isUserMessage(item)
|
|
78
|
+
&& (item as Record<string, unknown>)[COMPACTION_SUMMARY_MARKER] === true
|
|
92
79
|
);
|
|
93
80
|
}
|
|
94
81
|
|
|
95
82
|
/**
|
|
96
|
-
* Rough token estimate for an item: char/4 over its serialized text. Used
|
|
97
|
-
*
|
|
98
|
-
* token count, falling back to this when that is unavailable.
|
|
83
|
+
* Rough token estimate for an item: char/4 over its serialized text. Used for
|
|
84
|
+
* the pre-first-call fallback, per-user-message cap, and read-path airbag.
|
|
99
85
|
*/
|
|
100
86
|
export function estimateItemTokens(item: CompactionItem): number {
|
|
101
87
|
let text: string;
|
|
@@ -115,19 +101,94 @@ export function estimateTokens(items: readonly CompactionItem[]): number {
|
|
|
115
101
|
return total;
|
|
116
102
|
}
|
|
117
103
|
|
|
104
|
+
export function clientCompactionThresholdTokens(input: {
|
|
105
|
+
contextWindowTokens: number;
|
|
106
|
+
contextReservedOutputTokens: number;
|
|
107
|
+
}): number {
|
|
108
|
+
const available = Math.max(
|
|
109
|
+
0,
|
|
110
|
+
input.contextWindowTokens - input.contextReservedOutputTokens - SUMMARY_BUFFER_TOKENS,
|
|
111
|
+
);
|
|
112
|
+
return Math.floor(available * CLIENT_COMPACTION_TRIGGER_FRACTION);
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
export type ClientCompactionDecision = {
|
|
116
|
+
shouldCompact: boolean;
|
|
117
|
+
reason: "force" | "above_threshold" | "below_threshold" | "no_history";
|
|
118
|
+
signalTokens: number;
|
|
119
|
+
thresholdTokens: number;
|
|
120
|
+
};
|
|
121
|
+
|
|
122
|
+
export function decideClientCompaction(input: {
|
|
123
|
+
items: readonly CompactionItem[];
|
|
124
|
+
lastInputTokens?: number | null;
|
|
125
|
+
contextWindowTokens: number;
|
|
126
|
+
contextReservedOutputTokens: number;
|
|
127
|
+
force?: boolean;
|
|
128
|
+
}): ClientCompactionDecision {
|
|
129
|
+
const thresholdTokens = clientCompactionThresholdTokens(input);
|
|
130
|
+
const recorded =
|
|
131
|
+
typeof input.lastInputTokens === "number" && input.lastInputTokens > 0
|
|
132
|
+
? input.lastInputTokens
|
|
133
|
+
: 0;
|
|
134
|
+
const signalTokens = recorded > 0 ? recorded : estimateTokens(input.items);
|
|
135
|
+
if (input.items.length === 0) {
|
|
136
|
+
return { shouldCompact: false, reason: "no_history", signalTokens, thresholdTokens };
|
|
137
|
+
}
|
|
138
|
+
if (input.force) {
|
|
139
|
+
return { shouldCompact: true, reason: "force", signalTokens, thresholdTokens };
|
|
140
|
+
}
|
|
141
|
+
if (signalTokens > thresholdTokens) {
|
|
142
|
+
return { shouldCompact: true, reason: "above_threshold", signalTokens, thresholdTokens };
|
|
143
|
+
}
|
|
144
|
+
return { shouldCompact: false, reason: "below_threshold", signalTokens, thresholdTokens };
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
export class CompactionNeededError extends Error {
|
|
148
|
+
readonly signalTokens: number;
|
|
149
|
+
readonly thresholdTokens: number;
|
|
150
|
+
readonly signalSource: "provider" | "estimate";
|
|
151
|
+
|
|
152
|
+
constructor(input: {
|
|
153
|
+
signalTokens: number;
|
|
154
|
+
thresholdTokens: number;
|
|
155
|
+
signalSource: "provider" | "estimate";
|
|
156
|
+
}) {
|
|
157
|
+
super(
|
|
158
|
+
`Context compaction needed: signal ${input.signalTokens} tokens exceeded threshold ${input.thresholdTokens}`,
|
|
159
|
+
);
|
|
160
|
+
this.name = "CompactionNeededError";
|
|
161
|
+
this.signalTokens = input.signalTokens;
|
|
162
|
+
this.thresholdTokens = input.thresholdTokens;
|
|
163
|
+
this.signalSource = input.signalSource;
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
export function findCompactionNeededError(error: unknown, seen = new WeakSet<object>()): CompactionNeededError | null {
|
|
168
|
+
if (error instanceof CompactionNeededError) {
|
|
169
|
+
return error;
|
|
170
|
+
}
|
|
171
|
+
if (!error || typeof error !== "object") {
|
|
172
|
+
return null;
|
|
173
|
+
}
|
|
174
|
+
if (seen.has(error)) {
|
|
175
|
+
return null;
|
|
176
|
+
}
|
|
177
|
+
seen.add(error);
|
|
178
|
+
const record = error as Record<string, unknown>;
|
|
179
|
+
return (
|
|
180
|
+
findCompactionNeededError(record.cause, seen)
|
|
181
|
+
?? findCompactionNeededError(record.error, seen)
|
|
182
|
+
);
|
|
183
|
+
}
|
|
184
|
+
|
|
118
185
|
/**
|
|
119
186
|
* Walk backwards from the end of `items` keeping whole turns until the kept
|
|
120
187
|
* tail would exceed `keepRecentTokens`, and return the index of the first kept
|
|
121
|
-
* item.
|
|
122
|
-
*
|
|
123
|
-
*
|
|
124
|
-
* Returns `items.length` when nothing fits within the budget yet a boundary is
|
|
125
|
-
* required (degenerate); callers treat an index of 0 or length as "no useful
|
|
126
|
-
* cut".
|
|
188
|
+
* item. Retained for the read-path budget guard only; the client compaction
|
|
189
|
+
* rebuild no longer uses a keep-recent tail.
|
|
127
190
|
*/
|
|
128
191
|
export function findKeepBoundary(items: readonly CompactionItem[], keepRecentTokens: number): number {
|
|
129
|
-
// Indices that begin a user message (candidate boundaries). The first item is
|
|
130
|
-
// a boundary even if it is not a user message, so a cut at 0 is meaningful.
|
|
131
192
|
const boundaries: number[] = [];
|
|
132
193
|
for (let i = 0; i < items.length; i += 1) {
|
|
133
194
|
if (isUserMessage(items[i])) {
|
|
@@ -135,15 +196,8 @@ export function findKeepBoundary(items: readonly CompactionItem[], keepRecentTok
|
|
|
135
196
|
}
|
|
136
197
|
}
|
|
137
198
|
if (boundaries.length === 0) {
|
|
138
|
-
// No user-message boundary at all — cannot cut safely.
|
|
139
199
|
return 0;
|
|
140
200
|
}
|
|
141
|
-
// keepRecentTokens is a CAP on how much recent history is kept verbatim. We
|
|
142
|
-
// keep as much as fits: the EARLIEST user-message boundary whose tail
|
|
143
|
-
// [boundary, end) is still within the cap. Walking earliest -> latest, the
|
|
144
|
-
// first boundary that fits is that earliest one (tails only shrink as the
|
|
145
|
-
// boundary moves later). If even the last boundary's tail exceeds the cap we
|
|
146
|
-
// fall back to it (keep at least the final turn).
|
|
147
201
|
for (let b = 0; b < boundaries.length; b += 1) {
|
|
148
202
|
const boundary = boundaries[b]!;
|
|
149
203
|
if (estimateTokens(items.slice(boundary)) <= keepRecentTokens) {
|
|
@@ -156,26 +210,9 @@ export function findKeepBoundary(items: readonly CompactionItem[], keepRecentTok
|
|
|
156
210
|
/**
|
|
157
211
|
* READ-PATH BUDGET GUARD (last-resort backstop).
|
|
158
212
|
*
|
|
159
|
-
*
|
|
160
|
-
*
|
|
161
|
-
*
|
|
162
|
-
* model context window. The #61 orphan sanitizer is purely structural — it has
|
|
163
|
-
* NO size awareness — so without this guard an over-budget input is sent and
|
|
164
|
-
* 400s every turn, re-bricking the session.
|
|
165
|
-
*
|
|
166
|
-
* `enforceInputBudget` drops the OLDEST history at a clean turn boundary until
|
|
167
|
-
* the estimated input fits `maxTokens`, ALWAYS keeping the most recent turn(s).
|
|
168
|
-
* It is orphan-safe by construction: it only ever cuts at the start of a user
|
|
169
|
-
* message (via `findKeepBoundary`), so no tool-call pair is split. It is a
|
|
170
|
-
* crude data-loss fallback (no summary is generated) that exists solely so a
|
|
171
|
-
* single over-budget assembled input is never put on the wire — real context
|
|
172
|
-
* preservation is the summarizing pre-turn path; this is the airbag.
|
|
173
|
-
*
|
|
174
|
-
* Pure: returns a new array (same item references, in order) with an oldest
|
|
175
|
-
* prefix omitted, or the input unchanged when it already fits. The provided
|
|
176
|
-
* `trailingTokens` accounts for the un-stored part of the assembled input (the
|
|
177
|
-
* new user/continuation message + fixed system/tool overhead) so the cap is
|
|
178
|
-
* measured against the WHOLE request, not just the stored history.
|
|
213
|
+
* Drops the oldest history at a clean user-message boundary until an assembled
|
|
214
|
+
* input fits the request budget. This remains a request-local safety rail; it
|
|
215
|
+
* is not the compaction strategy.
|
|
179
216
|
*/
|
|
180
217
|
export function enforceInputBudget<T extends CompactionItem>(
|
|
181
218
|
items: readonly T[],
|
|
@@ -186,13 +223,7 @@ export function enforceInputBudget<T extends CompactionItem>(
|
|
|
186
223
|
if (items.length === 0 || total <= maxTokens) {
|
|
187
224
|
return { items: items.slice(), trimmed: false, droppedCount: 0, estimatedTokens: total };
|
|
188
225
|
}
|
|
189
|
-
// Budget left for the stored history once the fixed trailing cost is paid.
|
|
190
226
|
const historyBudget = Math.max(0, maxTokens - Math.max(0, trailingTokens));
|
|
191
|
-
// Find the EARLIEST user-message boundary whose tail fits historyBudget; that
|
|
192
|
-
// boundary's prefix is the oldest history we drop. findKeepBoundary already
|
|
193
|
-
// returns a clean turn boundary (start of a user message), so the cut is
|
|
194
|
-
// orphan-safe. A boundary of 0 means nothing could be dropped safely (no
|
|
195
|
-
// earlier boundary) — we leave the input as-is rather than orphan a pair.
|
|
196
227
|
const boundary = findKeepBoundary(items, historyBudget);
|
|
197
228
|
if (boundary <= 0) {
|
|
198
229
|
return { items: items.slice(), trimmed: false, droppedCount: 0, estimatedTokens: total };
|
|
@@ -206,291 +237,142 @@ export function enforceInputBudget<T extends CompactionItem>(
|
|
|
206
237
|
};
|
|
207
238
|
}
|
|
208
239
|
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
* was stale-low. The boundary walk is run with a SHRUNK keep-recent budget in
|
|
224
|
-
* this case so an over-budget history always yields a non-empty prefix to
|
|
225
|
-
* summarize (the everything-is-"recent" deadlock can't strand it un-compacted).
|
|
226
|
-
*/
|
|
227
|
-
hardForced: boolean;
|
|
228
|
-
/** Index (into the active items) where the kept tail begins. */
|
|
229
|
-
boundaryIndex: number;
|
|
230
|
-
/**
|
|
231
|
-
* The prefix items to summarize: active[0, boundaryIndex), EXCLUDING any
|
|
232
|
-
* prior compaction summary (which is folded forward via `priorSummaryItem`).
|
|
233
|
-
*/
|
|
234
|
-
prefixItems: CompactionItem[];
|
|
235
|
-
/** The prior live summary item folded into this compaction, if any. */
|
|
236
|
-
priorSummaryItem: CompactionItem | null;
|
|
237
|
-
/** Items kept verbatim: active[boundaryIndex, end). */
|
|
238
|
-
tailItems: CompactionItem[];
|
|
239
|
-
};
|
|
240
|
+
/**
|
|
241
|
+
* The exact checkpoint input shape: current active history followed by Codex's
|
|
242
|
+
* checkpoint prompt as a synthesized user message.
|
|
243
|
+
*/
|
|
244
|
+
export function buildCompactionPromptInput(items: readonly CompactionItem[]): CompactionItem[] {
|
|
245
|
+
return [
|
|
246
|
+
...items,
|
|
247
|
+
{
|
|
248
|
+
type: "message",
|
|
249
|
+
role: "user",
|
|
250
|
+
content: COMPACTION_PROMPT,
|
|
251
|
+
},
|
|
252
|
+
];
|
|
253
|
+
}
|
|
240
254
|
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
force?: boolean;
|
|
261
|
-
};
|
|
255
|
+
/**
|
|
256
|
+
* Build the active history after compaction:
|
|
257
|
+
* all real user messages (prior summaries excluded, images removed, each
|
|
258
|
+
* message capped) plus one marked summary item.
|
|
259
|
+
*/
|
|
260
|
+
export function buildCompactionReplacementHistory(
|
|
261
|
+
items: readonly CompactionItem[],
|
|
262
|
+
summaryBody: string,
|
|
263
|
+
): CompactionItem[] {
|
|
264
|
+
const history: CompactionItem[] = [];
|
|
265
|
+
for (const item of items) {
|
|
266
|
+
if (!isUserMessage(item) || isCompactionSummary(item)) {
|
|
267
|
+
continue;
|
|
268
|
+
}
|
|
269
|
+
history.push(compactUserMessage(item));
|
|
270
|
+
}
|
|
271
|
+
history.push(buildSummaryItem(summaryBody));
|
|
272
|
+
return history;
|
|
273
|
+
}
|
|
262
274
|
|
|
263
275
|
/**
|
|
264
|
-
*
|
|
265
|
-
*
|
|
266
|
-
* Trigger: signal tokens >= softFraction*B (soft) or hardFraction*B (hard).
|
|
267
|
-
* Signal = MAX(actual last-turn input tokens, char/4 estimate of the active
|
|
268
|
-
* items). The max — not "trust the recorded count, estimate only when it's
|
|
269
|
-
* null" — is the self-heal fix: `sessions.last_input_tokens` is written ONLY
|
|
270
|
-
* when a model response reports usage, so a turn that OVERFLOWS on its first
|
|
271
|
-
* model call records NOTHING and the column keeps a STALE-POSITIVE value from
|
|
272
|
-
* the last good turn (e.g. ~600k). Trusting that stale-low number let an
|
|
273
|
-
* actually-over-budget history (>1.05M) slip under the soft limit and overflow
|
|
274
|
-
* again, re-bricking with no self-heal. Taking the max means a bloated history
|
|
275
|
-
* triggers compaction regardless of a stale recorded count.
|
|
276
|
-
*
|
|
277
|
-
* Hard force (hardFraction*B): at/over the hard ceiling we compact even if the
|
|
278
|
-
* recorded count was stale-low, AND we run the boundary walk with a shrunk
|
|
279
|
-
* keep-recent budget so an over-budget history always yields a non-empty prefix
|
|
280
|
-
* — otherwise a history where the whole thing reads as "recent" (tail within
|
|
281
|
-
* keepRecentTokens) would find no prefix and strand the session over budget.
|
|
282
|
-
*
|
|
283
|
-
* Boundary: the earliest user-message boundary whose kept tail fits the
|
|
284
|
-
* (possibly shrunk) keep-recent budget. The prefix before it (minus any prior
|
|
285
|
-
* summary, which is folded forward) is what gets summarized.
|
|
276
|
+
* Build the synthetic summary item (a plain user message) appended to the
|
|
277
|
+
* rebuilt active history.
|
|
286
278
|
*/
|
|
287
|
-
export function
|
|
288
|
-
const
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
typeof input.lastInputTokens === "number" && input.lastInputTokens > 0
|
|
295
|
-
? input.lastInputTokens
|
|
296
|
-
: 0;
|
|
297
|
-
const signalTokens = Math.max(recorded, estimateTokens(input.items));
|
|
298
|
-
const hardForced = signalTokens >= hardLimit;
|
|
299
|
-
|
|
300
|
-
const empty: CompactionPlan = {
|
|
301
|
-
shouldCompact: false,
|
|
302
|
-
reason: "below_threshold",
|
|
303
|
-
signalTokens,
|
|
304
|
-
hardForced,
|
|
305
|
-
boundaryIndex: input.items.length,
|
|
306
|
-
prefixItems: [],
|
|
307
|
-
priorSummaryItem: null,
|
|
308
|
-
tailItems: [...input.items],
|
|
279
|
+
export function buildSummaryItem(summaryBody: string): CompactionItem {
|
|
280
|
+
const trimmed = summaryBody.trim();
|
|
281
|
+
return {
|
|
282
|
+
type: "message",
|
|
283
|
+
role: "user",
|
|
284
|
+
content: `${SUMMARY_PREFIX}\n${trimmed}`,
|
|
285
|
+
[COMPACTION_SUMMARY_MARKER]: true,
|
|
309
286
|
};
|
|
287
|
+
}
|
|
310
288
|
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
return empty;
|
|
318
|
-
}
|
|
319
|
-
|
|
320
|
-
// Under hard pressure, cap the verbatim tail well below B so the boundary walk
|
|
321
|
-
// is forced to leave a summarizable prefix even when last_input_tokens was
|
|
322
|
-
// stale-low and the history exceeds the window. We keep at most HALF the
|
|
323
|
-
// configured keep-recent budget (and never more than a quarter of B) — enough
|
|
324
|
-
// recent context to stay coherent, little enough that a real prefix always
|
|
325
|
-
// remains to compact. Soft compactions keep the full configured budget.
|
|
326
|
-
const effectiveKeepRecent = hardForced
|
|
327
|
-
? Math.min(
|
|
328
|
-
Math.floor(input.keepRecentTokens / 2),
|
|
329
|
-
Math.floor(input.inputBudgetTokens / 4),
|
|
330
|
-
)
|
|
331
|
-
: input.keepRecentTokens;
|
|
332
|
-
|
|
333
|
-
const boundaryIndex = findKeepBoundary(input.items, effectiveKeepRecent);
|
|
334
|
-
if (boundaryIndex <= 0) {
|
|
335
|
-
// No prefix to summarize (cut at the very start) — nothing to do.
|
|
336
|
-
return { ...empty, reason: "no_boundary", boundaryIndex };
|
|
289
|
+
function compactUserMessage(item: CompactionItem): CompactionItem {
|
|
290
|
+
const text = messageText(item);
|
|
291
|
+
const next = { ...item };
|
|
292
|
+
if (estimatedTextTokens(text) > COMPACT_USER_MESSAGE_MAX_TOKENS) {
|
|
293
|
+
next.content = truncateMiddleByEstimatedTokens(text, COMPACT_USER_MESSAGE_MAX_TOKENS);
|
|
294
|
+
return next;
|
|
337
295
|
}
|
|
296
|
+
next.content = contentWithoutImages(item);
|
|
297
|
+
return next;
|
|
298
|
+
}
|
|
338
299
|
|
|
339
|
-
|
|
340
|
-
|
|
300
|
+
function estimatedTextTokens(text: string): number {
|
|
301
|
+
return Math.ceil(text.length / 4);
|
|
302
|
+
}
|
|
341
303
|
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
for (const item of prefix) {
|
|
347
|
-
if (isCompactionSummary(item)) {
|
|
348
|
-
priorSummaryItem = item;
|
|
349
|
-
continue;
|
|
350
|
-
}
|
|
351
|
-
prefixItems.push(item);
|
|
304
|
+
function truncateMiddleByEstimatedTokens(text: string, maxTokens: number): string {
|
|
305
|
+
const maxChars = Math.max(0, maxTokens * 4);
|
|
306
|
+
if (text.length <= maxChars) {
|
|
307
|
+
return text;
|
|
352
308
|
}
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
// empty AND when the prefix contains ONLY a prior summary (boundary landed
|
|
356
|
-
// immediately after it): folding a summary forward over zero new items would
|
|
357
|
-
// burn a summarizer call to re-wrap identical content, emit a spurious
|
|
358
|
-
// compaction event, and — if the next turn is still above the soft threshold
|
|
359
|
-
// — loop. The single live summary already sits at the boundary, so leaving it
|
|
360
|
-
// in place is correct.
|
|
361
|
-
if (prefixItems.length === 0) {
|
|
362
|
-
return { ...empty, reason: "nothing_to_summarize", boundaryIndex };
|
|
309
|
+
if (maxChars <= USER_MESSAGE_TRUNCATION_MARKER.length) {
|
|
310
|
+
return USER_MESSAGE_TRUNCATION_MARKER.slice(0, maxChars);
|
|
363
311
|
}
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
signalTokens,
|
|
369
|
-
hardForced,
|
|
370
|
-
boundaryIndex,
|
|
371
|
-
prefixItems,
|
|
372
|
-
priorSummaryItem,
|
|
373
|
-
tailItems,
|
|
374
|
-
};
|
|
312
|
+
const keepChars = maxChars - USER_MESSAGE_TRUNCATION_MARKER.length;
|
|
313
|
+
const headChars = Math.ceil(keepChars / 2);
|
|
314
|
+
const tailChars = Math.floor(keepChars / 2);
|
|
315
|
+
return `${text.slice(0, headChars)}${USER_MESSAGE_TRUNCATION_MARKER}${text.slice(text.length - tailChars)}`;
|
|
375
316
|
}
|
|
376
317
|
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
if (!
|
|
380
|
-
return
|
|
318
|
+
function contentWithoutImages(item: CompactionItem): unknown {
|
|
319
|
+
const content = (item as { content?: unknown }).content;
|
|
320
|
+
if (!Array.isArray(content)) {
|
|
321
|
+
return content;
|
|
381
322
|
}
|
|
323
|
+
return content.filter((part) => {
|
|
324
|
+
if (!part || typeof part !== "object") {
|
|
325
|
+
return true;
|
|
326
|
+
}
|
|
327
|
+
const type = (part as { type?: unknown }).type;
|
|
328
|
+
return type !== "input_image" && type !== "image_url";
|
|
329
|
+
});
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
function messageText(item: CompactionItem): string {
|
|
382
333
|
const content = (item as { content?: unknown }).content;
|
|
383
334
|
if (typeof content === "string") {
|
|
384
|
-
return
|
|
335
|
+
return content;
|
|
385
336
|
}
|
|
386
337
|
if (Array.isArray(content)) {
|
|
387
|
-
|
|
338
|
+
return content
|
|
388
339
|
.map((part) => {
|
|
389
340
|
if (part && typeof part === "object") {
|
|
390
|
-
const
|
|
391
|
-
|
|
341
|
+
const record = part as { text?: unknown; content?: unknown };
|
|
342
|
+
if (typeof record.text === "string") {
|
|
343
|
+
return record.text;
|
|
344
|
+
}
|
|
345
|
+
if (typeof record.content === "string") {
|
|
346
|
+
return record.content;
|
|
347
|
+
}
|
|
392
348
|
}
|
|
393
349
|
return "";
|
|
394
350
|
})
|
|
395
351
|
.join("");
|
|
396
|
-
return stripSummaryPrefix(text);
|
|
397
352
|
}
|
|
398
353
|
return "";
|
|
399
354
|
}
|
|
400
355
|
|
|
401
|
-
function
|
|
402
|
-
|
|
403
|
-
const idx = text.indexOf(marker);
|
|
404
|
-
return idx >= 0 ? text.slice(idx + marker.length) : text;
|
|
405
|
-
}
|
|
406
|
-
|
|
407
|
-
/**
|
|
408
|
-
* Build the synthetic summary item (a plain user message) to insert at the
|
|
409
|
-
* boundary. `summaryBody` is the model-generated working-memory bridge.
|
|
410
|
-
*/
|
|
411
|
-
export function buildSummaryItem(summaryBody: string): CompactionItem {
|
|
412
|
-
return {
|
|
413
|
-
type: "message",
|
|
414
|
-
role: "user",
|
|
415
|
-
content: `${SUMMARY_PREFIX}${summaryBody}`,
|
|
416
|
-
[COMPACTION_SUMMARY_MARKER]: true,
|
|
417
|
-
};
|
|
418
|
-
}
|
|
419
|
-
|
|
420
|
-
/**
|
|
421
|
-
* Instruction prompt for the summarizer model call. Leans on OpenGeni's durable
|
|
422
|
-
* structured memory (the notebook) so the summary stays a light working-memory
|
|
423
|
-
* bridge, never a place secret values get copied.
|
|
424
|
-
*/
|
|
425
|
-
export const SUMMARY_INSTRUCTIONS = [
|
|
426
|
-
"You are compacting the earlier part of a long-running agent conversation into a compact working-memory checkpoint so the agent can continue past the model's context limit.",
|
|
427
|
-
"Durable facts already live in the workspace notebook and document bases (via MCP). Do NOT re-derive or copy those; summarize POINTERS, not contents.",
|
|
428
|
-
"Capture, concisely and factually:",
|
|
429
|
-
"- The current objective and the key decisions made so far.",
|
|
430
|
-
"- Open blockers and anything in-progress.",
|
|
431
|
-
"- Deployed / infrastructure state that has changed (what exists now).",
|
|
432
|
-
"- Environment and credential facts BY REFERENCE ONLY — name the env var keys, secret names, or notebook/document ids; NEVER copy a secret value, token, key, or password.",
|
|
433
|
-
"- Concrete next steps.",
|
|
434
|
-
"Say explicitly that durable facts are in the notebook and that this summary lists pointers, not contents.",
|
|
435
|
-
"Output only the summary body — no preamble, no markdown headers, plain prose or terse bullets.",
|
|
436
|
-
].join("\n");
|
|
437
|
-
|
|
438
|
-
/**
|
|
439
|
-
* Render the prefix items into a transcript the summarizer reads. Keeps it
|
|
440
|
-
* bounded by truncating individual items; the model call itself is what
|
|
441
|
-
* produces the compact result.
|
|
442
|
-
*/
|
|
443
|
-
export function renderPrefixTranscript(items: readonly CompactionItem[], priorSummaryText: string): string {
|
|
444
|
-
const lines: string[] = [];
|
|
445
|
-
if (priorSummaryText.trim().length > 0) {
|
|
446
|
-
lines.push("PRIOR CHECKPOINT SUMMARY (fold this forward; it already replaced even older history):");
|
|
447
|
-
lines.push(priorSummaryText.trim());
|
|
448
|
-
lines.push("");
|
|
449
|
-
lines.push("CONVERSATION SINCE THAT CHECKPOINT:");
|
|
450
|
-
} else {
|
|
451
|
-
lines.push("CONVERSATION TO SUMMARIZE:");
|
|
452
|
-
}
|
|
453
|
-
for (const item of items) {
|
|
454
|
-
lines.push(renderItem(item));
|
|
455
|
-
}
|
|
456
|
-
return lines.join("\n");
|
|
356
|
+
export function renderCompactionPromptInputForChat(input: readonly CompactionItem[]): string {
|
|
357
|
+
return input.map(renderItem).join("\n");
|
|
457
358
|
}
|
|
458
359
|
|
|
459
360
|
function renderItem(item: CompactionItem): string {
|
|
460
361
|
const type = itemType(item) ?? "unknown";
|
|
461
362
|
if (type === "message") {
|
|
462
363
|
const role = itemRole(item) ?? "assistant";
|
|
463
|
-
return `[${role}] ${
|
|
364
|
+
return `[${role}] ${truncateForTranscript(messageText(item), 4000)}`;
|
|
464
365
|
}
|
|
465
366
|
if (type === "reasoning") {
|
|
466
367
|
return "[reasoning] (omitted)";
|
|
467
368
|
}
|
|
468
369
|
if (RESULT_TYPES.has(type)) {
|
|
469
|
-
return `[tool_result] ${
|
|
370
|
+
return `[tool_result] ${truncateForTranscript(resultText(item), 2000)}`;
|
|
470
371
|
}
|
|
471
372
|
if (RESULT_TYPE_BY_CALL_TYPE[type]) {
|
|
472
|
-
return `[tool_call ${type}] ${
|
|
373
|
+
return `[tool_call ${type}] ${truncateForTranscript(callText(item), 1000)}`;
|
|
473
374
|
}
|
|
474
|
-
return `[${type}] ${
|
|
475
|
-
}
|
|
476
|
-
|
|
477
|
-
function messageText(item: CompactionItem): string {
|
|
478
|
-
const content = (item as { content?: unknown }).content;
|
|
479
|
-
if (typeof content === "string") {
|
|
480
|
-
return content;
|
|
481
|
-
}
|
|
482
|
-
if (Array.isArray(content)) {
|
|
483
|
-
return content
|
|
484
|
-
.map((part) => {
|
|
485
|
-
if (part && typeof part === "object") {
|
|
486
|
-
const t = (part as { text?: unknown }).text;
|
|
487
|
-
return typeof t === "string" ? t : "";
|
|
488
|
-
}
|
|
489
|
-
return "";
|
|
490
|
-
})
|
|
491
|
-
.join("");
|
|
492
|
-
}
|
|
493
|
-
return "";
|
|
375
|
+
return `[${type}] ${truncateForTranscript(safeStringify(item), 1000)}`;
|
|
494
376
|
}
|
|
495
377
|
|
|
496
378
|
function resultText(item: CompactionItem): string {
|
|
@@ -517,22 +399,9 @@ function safeStringify(value: unknown): string {
|
|
|
517
399
|
}
|
|
518
400
|
}
|
|
519
401
|
|
|
520
|
-
function
|
|
402
|
+
function truncateForTranscript(text: string, max: number): string {
|
|
521
403
|
if (text.length <= max) {
|
|
522
404
|
return text;
|
|
523
405
|
}
|
|
524
|
-
return `${text.slice(0, max)}
|
|
525
|
-
}
|
|
526
|
-
|
|
527
|
-
/**
|
|
528
|
-
* The summarizer model call payload: a system instruction plus the rendered
|
|
529
|
-
* prefix transcript. The caller turns this into a single model request (no
|
|
530
|
-
* tools, no streaming) and feeds the text result into `buildSummaryItem`.
|
|
531
|
-
*/
|
|
532
|
-
export function buildCompactionMessages(plan: CompactionPlan): { system: string; user: string } {
|
|
533
|
-
const priorText = compactionSummaryText(plan.priorSummaryItem);
|
|
534
|
-
return {
|
|
535
|
-
system: SUMMARY_INSTRUCTIONS,
|
|
536
|
-
user: renderPrefixTranscript(plan.prefixItems, priorText),
|
|
537
|
-
};
|
|
406
|
+
return `${text.slice(0, max)}... (${text.length - max} more chars)`;
|
|
538
407
|
}
|