@opengeni/runtime 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunk-2PO56VAL.js +3478 -0
- package/dist/chunk-2PO56VAL.js.map +1 -0
- package/dist/index.d.ts +912 -0
- package/dist/index.js +3663 -0
- package/dist/index.js.map +1 -0
- package/dist/sandbox/index.d.ts +1738 -0
- package/dist/sandbox/index.js +187 -0
- package/dist/sandbox/index.js.map +1 -0
- package/package.json +49 -0
- package/src/bundled_hashicorp_terraform_skills/LICENSE +373 -0
- package/src/bundled_hashicorp_terraform_skills/README.md +18 -0
- package/src/bundled_hashicorp_terraform_skills/UPSTREAM_GIT_SHA +1 -0
- package/src/bundled_hashicorp_terraform_skills/azure-verified-modules/SKILL.md +613 -0
- package/src/bundled_hashicorp_terraform_skills/checkov/SKILL.md +43 -0
- package/src/bundled_hashicorp_terraform_skills/refactor-module/SKILL.md +538 -0
- package/src/bundled_hashicorp_terraform_skills/social-media-marketing/SKILL.md +35 -0
- package/src/bundled_hashicorp_terraform_skills/terraform-search-import/SKILL.md +372 -0
- package/src/bundled_hashicorp_terraform_skills/terraform-search-import/references/MANUAL-IMPORT.md +113 -0
- package/src/bundled_hashicorp_terraform_skills/terraform-search-import/scripts/list_resources.sh +38 -0
- package/src/bundled_hashicorp_terraform_skills/terraform-stacks/SKILL.md +480 -0
- package/src/bundled_hashicorp_terraform_skills/terraform-stacks/references/api-monitoring.md +543 -0
- package/src/bundled_hashicorp_terraform_skills/terraform-stacks/references/component-blocks.md +476 -0
- package/src/bundled_hashicorp_terraform_skills/terraform-stacks/references/deployment-blocks.md +391 -0
- package/src/bundled_hashicorp_terraform_skills/terraform-stacks/references/examples.md +1529 -0
- package/src/bundled_hashicorp_terraform_skills/terraform-stacks/references/linked-stacks.md +187 -0
- package/src/bundled_hashicorp_terraform_skills/terraform-stacks/references/troubleshooting.md +671 -0
- package/src/bundled_hashicorp_terraform_skills/terraform-style-guide/SKILL.md +353 -0
- package/src/bundled_hashicorp_terraform_skills/terraform-test/SKILL.md +451 -0
- package/src/bundled_hashicorp_terraform_skills/terraform-test/references/CI_CD.md +80 -0
- package/src/bundled_hashicorp_terraform_skills/terraform-test/references/EXAMPLES.md +314 -0
- package/src/bundled_hashicorp_terraform_skills/terraform-test/references/MOCK_PROVIDERS.md +171 -0
- package/src/codex-tool-search.ts +267 -0
- package/src/context-compaction.ts +538 -0
- package/src/history-sanitizer.ts +719 -0
- package/src/index.ts +3299 -0
- package/src/sandbox/capabilities.ts +69 -0
- package/src/sandbox/channel-a.ts +1031 -0
- package/src/sandbox/display-stack.ts +231 -0
- package/src/sandbox/errors.ts +34 -0
- package/src/sandbox/index.ts +832 -0
- package/src/sandbox/providers/blaxel.ts +35 -0
- package/src/sandbox/providers/cloudflare.ts +24 -0
- package/src/sandbox/providers/daytona.ts +34 -0
- package/src/sandbox/providers/docker.ts +17 -0
- package/src/sandbox/providers/e2b.ts +36 -0
- package/src/sandbox/providers/index.ts +107 -0
- package/src/sandbox/providers/local.ts +13 -0
- package/src/sandbox/providers/modal.ts +55 -0
- package/src/sandbox/providers/none.ts +13 -0
- package/src/sandbox/providers/runloop.ts +32 -0
- package/src/sandbox/providers/selfhosted.ts +96 -0
- package/src/sandbox/providers/types.ts +38 -0
- package/src/sandbox/providers/vercel.ts +29 -0
- package/src/sandbox/recording.ts +286 -0
- package/src/sandbox/routing/backend-resolver.ts +189 -0
- package/src/sandbox/routing/routing-session.ts +455 -0
- package/src/sandbox/select.ts +371 -0
- package/src/sandbox/selfhosted/capabilities.ts +255 -0
- package/src/sandbox/selfhosted/control-rpc.ts +351 -0
- package/src/sandbox/selfhosted/session.ts +930 -0
- package/src/sandbox/selfhosted/testing.ts +230 -0
- package/src/sandbox/stream-port.ts +185 -0
- package/src/sandbox/stream-token.ts +90 -0
- package/src/sandbox/terminal-server.ts +203 -0
- package/src/sandbox-computer.ts +835 -0
|
@@ -0,0 +1,538 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Client-side conversation context compaction (the Azure path).
|
|
3
|
+
*
|
|
4
|
+
* OpenGeni runs long-lived agent sessions whose conversation truth
|
|
5
|
+
* (`session_history_items`) grows unbounded. On the OpenAI platform the
|
|
6
|
+
* Responses API compacts server-side (the SDK's `compaction()` capability). On
|
|
7
|
+
* Azure that capability 400s (`unsupported_parameter`), so the session
|
|
8
|
+
* eventually overflows the model context window and hard-fails every turn.
|
|
9
|
+
*
|
|
10
|
+
* This module is the Azure-safe replacement. It is built from two pure pieces
|
|
11
|
+
* plus one impure step the caller wires in:
|
|
12
|
+
*
|
|
13
|
+
* 1. `planCompaction` — given the active history items, the last turn's actual
|
|
14
|
+
* input-token count, and the token budget, decide WHETHER to compact and,
|
|
15
|
+
* if so, WHERE the orphan-safe cut boundary is (the prefix to summarize vs
|
|
16
|
+
* the recent tail to keep verbatim). Pure, exhaustively testable.
|
|
17
|
+
* 2. (caller) summarize the prefix into ONE plain user `message` item via a
|
|
18
|
+
* model call — see `buildCompactionMessages` / `SUMMARY_PREFIX`.
|
|
19
|
+
* 3. `applyCompaction` shape — the storage write the caller performs:
|
|
20
|
+
* supersede the prefix rows, insert the summary at the boundary position.
|
|
21
|
+
*
|
|
22
|
+
* Design constraints (non-negotiable):
|
|
23
|
+
* - The summary is a PLAIN user message, NOT the SDK `compaction` item type
|
|
24
|
+
* (that requires server-minted `encrypted_content`; a hand-rolled one risks
|
|
25
|
+
* an Azure 400).
|
|
26
|
+
* - ORPHAN SAFETY: the cut lands only at a clean turn boundary (start of a
|
|
27
|
+
* user message). No tool call_id may straddle the cut — for every
|
|
28
|
+
* `function_call` dropped, its `function_call_result` is also dropped, and
|
|
29
|
+
* vice versa. Reasoning items drop/keep with their whole turn.
|
|
30
|
+
* - SINGLE LIVE SUMMARY: each compaction folds the prior summary forward
|
|
31
|
+
* (summarize [prior summary] + [items since]); prior summaries are excluded
|
|
32
|
+
* from re-collection so drift stays bounded.
|
|
33
|
+
*/
|
|
34
|
+
|
|
35
|
+
export type CompactionItem = Record<string, unknown>;
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Marker stored on the synthetic summary item so it can be recognized on the
|
|
39
|
+
* next compaction (to fold it forward) and excluded from re-summarization. It
|
|
40
|
+
* lives in the item JSON, not a DB column, so it survives verbatim replay.
|
|
41
|
+
*/
|
|
42
|
+
export const COMPACTION_SUMMARY_MARKER = "opengeni_context_summary";
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Bridge text prepended to the summary body in the synthetic user message. It
|
|
46
|
+
* tells the model the preceding conversation was compacted and that durable
|
|
47
|
+
* facts live in the notebook — so it treats the summary as a working-memory
|
|
48
|
+
* pointer, not the whole truth.
|
|
49
|
+
*/
|
|
50
|
+
export const SUMMARY_PREFIX = [
|
|
51
|
+
"[CONTEXT CHECKPOINT] The earlier part of this conversation was automatically compacted to stay within the model context window.",
|
|
52
|
+
"Durable facts already live in the workspace notebook / document bases (via MCP) — the summary below is a light working-memory bridge, not a full transcript.",
|
|
53
|
+
"Trust it for current objective, decisions, blockers, deployed/infra state, and next steps; re-read the notebook for anything authoritative.",
|
|
54
|
+
"",
|
|
55
|
+
"SUMMARY:",
|
|
56
|
+
].join("\n");
|
|
57
|
+
|
|
58
|
+
const RESULT_TYPE_BY_CALL_TYPE: Record<string, string> = {
|
|
59
|
+
function_call: "function_call_result",
|
|
60
|
+
computer_call: "computer_call_result",
|
|
61
|
+
shell_call: "shell_call_output",
|
|
62
|
+
apply_patch_call: "apply_patch_call_output",
|
|
63
|
+
};
|
|
64
|
+
const RESULT_TYPES = new Set(Object.values(RESULT_TYPE_BY_CALL_TYPE));
|
|
65
|
+
|
|
66
|
+
function itemType(item: unknown): string | undefined {
|
|
67
|
+
if (!item || typeof item !== "object") {
|
|
68
|
+
return undefined;
|
|
69
|
+
}
|
|
70
|
+
const type = (item as { type?: unknown }).type;
|
|
71
|
+
return typeof type === "string" ? type : undefined;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
function itemRole(item: unknown): string | undefined {
|
|
75
|
+
if (!item || typeof item !== "object") {
|
|
76
|
+
return undefined;
|
|
77
|
+
}
|
|
78
|
+
const role = (item as { role?: unknown }).role;
|
|
79
|
+
return typeof role === "string" ? role : undefined;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/** A user-authored `message` item is the only legal turn boundary. */
|
|
83
|
+
export function isUserMessage(item: unknown): boolean {
|
|
84
|
+
return itemType(item) === "message" && itemRole(item) === "user";
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/** True for our synthetic compaction summary item. */
|
|
88
|
+
export function isCompactionSummary(item: unknown): boolean {
|
|
89
|
+
return (
|
|
90
|
+
isUserMessage(item) &&
|
|
91
|
+
(item as Record<string, unknown>)[COMPACTION_SUMMARY_MARKER] === true
|
|
92
|
+
);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* Rough token estimate for an item: char/4 over its serialized text. Used only
|
|
97
|
+
* for the tail-budget walk; the trigger decision uses the real last-turn input
|
|
98
|
+
* token count, falling back to this when that is unavailable.
|
|
99
|
+
*/
|
|
100
|
+
export function estimateItemTokens(item: CompactionItem): number {
|
|
101
|
+
let text: string;
|
|
102
|
+
try {
|
|
103
|
+
text = JSON.stringify(item);
|
|
104
|
+
} catch {
|
|
105
|
+
text = String(item);
|
|
106
|
+
}
|
|
107
|
+
return Math.ceil(text.length / 4);
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
export function estimateTokens(items: readonly CompactionItem[]): number {
|
|
111
|
+
let total = 0;
|
|
112
|
+
for (const item of items) {
|
|
113
|
+
total += estimateItemTokens(item);
|
|
114
|
+
}
|
|
115
|
+
return total;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
/**
|
|
119
|
+
* Walk backwards from the end of `items` keeping whole turns until the kept
|
|
120
|
+
* tail would exceed `keepRecentTokens`, and return the index of the first kept
|
|
121
|
+
* item. The returned index is always the start of a user message (a clean turn
|
|
122
|
+
* boundary), so the prefix [0, index) never splits a tool-call pair.
|
|
123
|
+
*
|
|
124
|
+
* Returns `items.length` when nothing fits within the budget yet a boundary is
|
|
125
|
+
* required (degenerate); callers treat an index of 0 or length as "no useful
|
|
126
|
+
* cut".
|
|
127
|
+
*/
|
|
128
|
+
export function findKeepBoundary(items: readonly CompactionItem[], keepRecentTokens: number): number {
|
|
129
|
+
// Indices that begin a user message (candidate boundaries). The first item is
|
|
130
|
+
// a boundary even if it is not a user message, so a cut at 0 is meaningful.
|
|
131
|
+
const boundaries: number[] = [];
|
|
132
|
+
for (let i = 0; i < items.length; i += 1) {
|
|
133
|
+
if (isUserMessage(items[i])) {
|
|
134
|
+
boundaries.push(i);
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
if (boundaries.length === 0) {
|
|
138
|
+
// No user-message boundary at all — cannot cut safely.
|
|
139
|
+
return 0;
|
|
140
|
+
}
|
|
141
|
+
// keepRecentTokens is a CAP on how much recent history is kept verbatim. We
|
|
142
|
+
// keep as much as fits: the EARLIEST user-message boundary whose tail
|
|
143
|
+
// [boundary, end) is still within the cap. Walking earliest -> latest, the
|
|
144
|
+
// first boundary that fits is that earliest one (tails only shrink as the
|
|
145
|
+
// boundary moves later). If even the last boundary's tail exceeds the cap we
|
|
146
|
+
// fall back to it (keep at least the final turn).
|
|
147
|
+
for (let b = 0; b < boundaries.length; b += 1) {
|
|
148
|
+
const boundary = boundaries[b]!;
|
|
149
|
+
if (estimateTokens(items.slice(boundary)) <= keepRecentTokens) {
|
|
150
|
+
return boundary;
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
return boundaries[boundaries.length - 1]!;
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
/**
|
|
157
|
+
* READ-PATH BUDGET GUARD (last-resort backstop).
|
|
158
|
+
*
|
|
159
|
+
* Pre-turn compaction is best-effort: it can no-op (summarizer model call
|
|
160
|
+
* fails, "client" mode off, a fresh user message arrives after a turn already
|
|
161
|
+
* ballooned the history) and STILL leave an assembled input that exceeds the
|
|
162
|
+
* model context window. The #61 orphan sanitizer is purely structural — it has
|
|
163
|
+
* NO size awareness — so without this guard an over-budget input is sent and
|
|
164
|
+
* 400s every turn, re-bricking the session.
|
|
165
|
+
*
|
|
166
|
+
* `enforceInputBudget` drops the OLDEST history at a clean turn boundary until
|
|
167
|
+
* the estimated input fits `maxTokens`, ALWAYS keeping the most recent turn(s).
|
|
168
|
+
* It is orphan-safe by construction: it only ever cuts at the start of a user
|
|
169
|
+
* message (via `findKeepBoundary`), so no tool-call pair is split. It is a
|
|
170
|
+
* crude data-loss fallback (no summary is generated) that exists solely so a
|
|
171
|
+
* single over-budget assembled input is never put on the wire — real context
|
|
172
|
+
* preservation is the summarizing pre-turn path; this is the airbag.
|
|
173
|
+
*
|
|
174
|
+
* Pure: returns a new array (same item references, in order) with an oldest
|
|
175
|
+
* prefix omitted, or the input unchanged when it already fits. The provided
|
|
176
|
+
* `trailingTokens` accounts for the un-stored part of the assembled input (the
|
|
177
|
+
* new user/continuation message + fixed system/tool overhead) so the cap is
|
|
178
|
+
* measured against the WHOLE request, not just the stored history.
|
|
179
|
+
*/
|
|
180
|
+
export function enforceInputBudget<T extends CompactionItem>(
|
|
181
|
+
items: readonly T[],
|
|
182
|
+
maxTokens: number,
|
|
183
|
+
trailingTokens = 0,
|
|
184
|
+
): { items: T[]; trimmed: boolean; droppedCount: number; estimatedTokens: number } {
|
|
185
|
+
const total = estimateTokens(items) + Math.max(0, trailingTokens);
|
|
186
|
+
if (items.length === 0 || total <= maxTokens) {
|
|
187
|
+
return { items: items.slice(), trimmed: false, droppedCount: 0, estimatedTokens: total };
|
|
188
|
+
}
|
|
189
|
+
// Budget left for the stored history once the fixed trailing cost is paid.
|
|
190
|
+
const historyBudget = Math.max(0, maxTokens - Math.max(0, trailingTokens));
|
|
191
|
+
// Find the EARLIEST user-message boundary whose tail fits historyBudget; that
|
|
192
|
+
// boundary's prefix is the oldest history we drop. findKeepBoundary already
|
|
193
|
+
// returns a clean turn boundary (start of a user message), so the cut is
|
|
194
|
+
// orphan-safe. A boundary of 0 means nothing could be dropped safely (no
|
|
195
|
+
// earlier boundary) — we leave the input as-is rather than orphan a pair.
|
|
196
|
+
const boundary = findKeepBoundary(items, historyBudget);
|
|
197
|
+
if (boundary <= 0) {
|
|
198
|
+
return { items: items.slice(), trimmed: false, droppedCount: 0, estimatedTokens: total };
|
|
199
|
+
}
|
|
200
|
+
const kept = items.slice(boundary);
|
|
201
|
+
return {
|
|
202
|
+
items: kept,
|
|
203
|
+
trimmed: true,
|
|
204
|
+
droppedCount: boundary,
|
|
205
|
+
estimatedTokens: estimateTokens(kept) + Math.max(0, trailingTokens),
|
|
206
|
+
};
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
export type CompactionPlan = {
|
|
210
|
+
/** Whether a compaction should run this turn. */
|
|
211
|
+
shouldCompact: boolean;
|
|
212
|
+
/** Why not, when shouldCompact is false (for logs/tests). */
|
|
213
|
+
reason: "below_threshold" | "no_boundary" | "nothing_to_summarize" | "compact";
|
|
214
|
+
/**
|
|
215
|
+
* The signal-token count the trigger decision was made on:
|
|
216
|
+
* max(actual last-turn input tokens, char/4 estimate of the active items).
|
|
217
|
+
* Recorded for logging / metrics and so a caller can reason about pressure.
|
|
218
|
+
*/
|
|
219
|
+
signalTokens: number;
|
|
220
|
+
/**
|
|
221
|
+
* True when the signal reached hardFraction*B — the session is at/over the
|
|
222
|
+
* hard ceiling and compaction was forced even if the recorded last-turn count
|
|
223
|
+
* was stale-low. The boundary walk is run with a SHRUNK keep-recent budget in
|
|
224
|
+
* this case so an over-budget history always yields a non-empty prefix to
|
|
225
|
+
* summarize (the everything-is-"recent" deadlock can't strand it un-compacted).
|
|
226
|
+
*/
|
|
227
|
+
hardForced: boolean;
|
|
228
|
+
/** Index (into the active items) where the kept tail begins. */
|
|
229
|
+
boundaryIndex: number;
|
|
230
|
+
/**
|
|
231
|
+
* The prefix items to summarize: active[0, boundaryIndex), EXCLUDING any
|
|
232
|
+
* prior compaction summary (which is folded forward via `priorSummaryItem`).
|
|
233
|
+
*/
|
|
234
|
+
prefixItems: CompactionItem[];
|
|
235
|
+
/** The prior live summary item folded into this compaction, if any. */
|
|
236
|
+
priorSummaryItem: CompactionItem | null;
|
|
237
|
+
/** Items kept verbatim: active[boundaryIndex, end). */
|
|
238
|
+
tailItems: CompactionItem[];
|
|
239
|
+
};
|
|
240
|
+
|
|
241
|
+
export type PlanCompactionInput = {
|
|
242
|
+
/** Active history items in position order (already excludes superseded rows). */
|
|
243
|
+
items: readonly CompactionItem[];
|
|
244
|
+
/**
|
|
245
|
+
* Actual input tokens reported for the last model call of the previous turn.
|
|
246
|
+
* Null/undefined falls back to a char/4 estimate over `items`.
|
|
247
|
+
*/
|
|
248
|
+
lastInputTokens?: number | null;
|
|
249
|
+
/** Usable input budget B = window - reserved output. */
|
|
250
|
+
inputBudgetTokens: number;
|
|
251
|
+
softFraction: number;
|
|
252
|
+
hardFraction: number;
|
|
253
|
+
keepRecentTokens: number;
|
|
254
|
+
/**
|
|
255
|
+
* Operator-forced compaction (the /compact command): bypass the soft-limit
|
|
256
|
+
* token trigger and compact now if there is anything to summarize. The
|
|
257
|
+
* boundary / nothing-to-summarize guards still apply — force never invents a
|
|
258
|
+
* cut that would orphan a tool-call pair or summarize an empty prefix.
|
|
259
|
+
*/
|
|
260
|
+
force?: boolean;
|
|
261
|
+
};
|
|
262
|
+
|
|
263
|
+
/**
|
|
264
|
+
* Decide whether and where to compact. Pure.
|
|
265
|
+
*
|
|
266
|
+
* Trigger: signal tokens >= softFraction*B (soft) or hardFraction*B (hard).
|
|
267
|
+
* Signal = MAX(actual last-turn input tokens, char/4 estimate of the active
|
|
268
|
+
* items). The max — not "trust the recorded count, estimate only when it's
|
|
269
|
+
* null" — is the self-heal fix: `sessions.last_input_tokens` is written ONLY
|
|
270
|
+
* when a model response reports usage, so a turn that OVERFLOWS on its first
|
|
271
|
+
* model call records NOTHING and the column keeps a STALE-POSITIVE value from
|
|
272
|
+
* the last good turn (e.g. ~600k). Trusting that stale-low number let an
|
|
273
|
+
* actually-over-budget history (>1.05M) slip under the soft limit and overflow
|
|
274
|
+
* again, re-bricking with no self-heal. Taking the max means a bloated history
|
|
275
|
+
* triggers compaction regardless of a stale recorded count.
|
|
276
|
+
*
|
|
277
|
+
* Hard force (hardFraction*B): at/over the hard ceiling we compact even if the
|
|
278
|
+
* recorded count was stale-low, AND we run the boundary walk with a shrunk
|
|
279
|
+
* keep-recent budget so an over-budget history always yields a non-empty prefix
|
|
280
|
+
* — otherwise a history where the whole thing reads as "recent" (tail within
|
|
281
|
+
* keepRecentTokens) would find no prefix and strand the session over budget.
|
|
282
|
+
*
|
|
283
|
+
* Boundary: the earliest user-message boundary whose kept tail fits the
|
|
284
|
+
* (possibly shrunk) keep-recent budget. The prefix before it (minus any prior
|
|
285
|
+
* summary, which is folded forward) is what gets summarized.
|
|
286
|
+
*/
|
|
287
|
+
export function planCompaction(input: PlanCompactionInput): CompactionPlan {
|
|
288
|
+
const softLimit = Math.floor(input.inputBudgetTokens * input.softFraction);
|
|
289
|
+
const hardLimit = Math.floor(input.inputBudgetTokens * input.hardFraction);
|
|
290
|
+
// Signal = MAX(recorded last-turn input tokens, estimate of the actual
|
|
291
|
+
// history). See the doc comment: the max is what defeats the stale-positive
|
|
292
|
+
// re-brick — a bloated history wins over a stale-low recorded count.
|
|
293
|
+
const recorded =
|
|
294
|
+
typeof input.lastInputTokens === "number" && input.lastInputTokens > 0
|
|
295
|
+
? input.lastInputTokens
|
|
296
|
+
: 0;
|
|
297
|
+
const signalTokens = Math.max(recorded, estimateTokens(input.items));
|
|
298
|
+
const hardForced = signalTokens >= hardLimit;
|
|
299
|
+
|
|
300
|
+
const empty: CompactionPlan = {
|
|
301
|
+
shouldCompact: false,
|
|
302
|
+
reason: "below_threshold",
|
|
303
|
+
signalTokens,
|
|
304
|
+
hardForced,
|
|
305
|
+
boundaryIndex: input.items.length,
|
|
306
|
+
prefixItems: [],
|
|
307
|
+
priorSummaryItem: null,
|
|
308
|
+
tailItems: [...input.items],
|
|
309
|
+
};
|
|
310
|
+
|
|
311
|
+
// force (operator /compact) bypasses the budget trigger; the structural
|
|
312
|
+
// guards below still run, so a forced compaction with nothing to summarize is
|
|
313
|
+
// still a no-op. A hard-forced compaction is, like soft, gated by those
|
|
314
|
+
// guards but additionally shrinks the keep-recent budget (below) so it can
|
|
315
|
+
// actually find a prefix when the whole history is over budget.
|
|
316
|
+
if (!input.force && signalTokens < softLimit) {
|
|
317
|
+
return empty;
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
// Under hard pressure, cap the verbatim tail well below B so the boundary walk
|
|
321
|
+
// is forced to leave a summarizable prefix even when last_input_tokens was
|
|
322
|
+
// stale-low and the history exceeds the window. We keep at most HALF the
|
|
323
|
+
// configured keep-recent budget (and never more than a quarter of B) — enough
|
|
324
|
+
// recent context to stay coherent, little enough that a real prefix always
|
|
325
|
+
// remains to compact. Soft compactions keep the full configured budget.
|
|
326
|
+
const effectiveKeepRecent = hardForced
|
|
327
|
+
? Math.min(
|
|
328
|
+
Math.floor(input.keepRecentTokens / 2),
|
|
329
|
+
Math.floor(input.inputBudgetTokens / 4),
|
|
330
|
+
)
|
|
331
|
+
: input.keepRecentTokens;
|
|
332
|
+
|
|
333
|
+
const boundaryIndex = findKeepBoundary(input.items, effectiveKeepRecent);
|
|
334
|
+
if (boundaryIndex <= 0) {
|
|
335
|
+
// No prefix to summarize (cut at the very start) — nothing to do.
|
|
336
|
+
return { ...empty, reason: "no_boundary", boundaryIndex };
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
const prefix = input.items.slice(0, boundaryIndex);
|
|
340
|
+
const tailItems = input.items.slice(boundaryIndex);
|
|
341
|
+
|
|
342
|
+
// Fold the prior live summary forward: pull it out of the prefix so it is not
|
|
343
|
+
// re-summarized verbatim, and hand it to the summarizer as prior context.
|
|
344
|
+
let priorSummaryItem: CompactionItem | null = null;
|
|
345
|
+
const prefixItems: CompactionItem[] = [];
|
|
346
|
+
for (const item of prefix) {
|
|
347
|
+
if (isCompactionSummary(item)) {
|
|
348
|
+
priorSummaryItem = item;
|
|
349
|
+
continue;
|
|
350
|
+
}
|
|
351
|
+
prefixItems.push(item);
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
// Nothing real to summarize. This fires both when the prefix is genuinely
|
|
355
|
+
// empty AND when the prefix contains ONLY a prior summary (boundary landed
|
|
356
|
+
// immediately after it): folding a summary forward over zero new items would
|
|
357
|
+
// burn a summarizer call to re-wrap identical content, emit a spurious
|
|
358
|
+
// compaction event, and — if the next turn is still above the soft threshold
|
|
359
|
+
// — loop. The single live summary already sits at the boundary, so leaving it
|
|
360
|
+
// in place is correct.
|
|
361
|
+
if (prefixItems.length === 0) {
|
|
362
|
+
return { ...empty, reason: "nothing_to_summarize", boundaryIndex };
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
return {
|
|
366
|
+
shouldCompact: true,
|
|
367
|
+
reason: "compact",
|
|
368
|
+
signalTokens,
|
|
369
|
+
hardForced,
|
|
370
|
+
boundaryIndex,
|
|
371
|
+
prefixItems,
|
|
372
|
+
priorSummaryItem,
|
|
373
|
+
tailItems,
|
|
374
|
+
};
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
/** Extract the plain-text body of the prior summary item, if any. */
|
|
378
|
+
export function compactionSummaryText(item: CompactionItem | null): string {
|
|
379
|
+
if (!item) {
|
|
380
|
+
return "";
|
|
381
|
+
}
|
|
382
|
+
const content = (item as { content?: unknown }).content;
|
|
383
|
+
if (typeof content === "string") {
|
|
384
|
+
return stripSummaryPrefix(content);
|
|
385
|
+
}
|
|
386
|
+
if (Array.isArray(content)) {
|
|
387
|
+
const text = content
|
|
388
|
+
.map((part) => {
|
|
389
|
+
if (part && typeof part === "object") {
|
|
390
|
+
const t = (part as { text?: unknown }).text;
|
|
391
|
+
return typeof t === "string" ? t : "";
|
|
392
|
+
}
|
|
393
|
+
return "";
|
|
394
|
+
})
|
|
395
|
+
.join("");
|
|
396
|
+
return stripSummaryPrefix(text);
|
|
397
|
+
}
|
|
398
|
+
return "";
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
function stripSummaryPrefix(text: string): string {
|
|
402
|
+
const marker = "SUMMARY:";
|
|
403
|
+
const idx = text.indexOf(marker);
|
|
404
|
+
return idx >= 0 ? text.slice(idx + marker.length) : text;
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
/**
|
|
408
|
+
* Build the synthetic summary item (a plain user message) to insert at the
|
|
409
|
+
* boundary. `summaryBody` is the model-generated working-memory bridge.
|
|
410
|
+
*/
|
|
411
|
+
export function buildSummaryItem(summaryBody: string): CompactionItem {
|
|
412
|
+
return {
|
|
413
|
+
type: "message",
|
|
414
|
+
role: "user",
|
|
415
|
+
content: `${SUMMARY_PREFIX}${summaryBody}`,
|
|
416
|
+
[COMPACTION_SUMMARY_MARKER]: true,
|
|
417
|
+
};
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
/**
|
|
421
|
+
* Instruction prompt for the summarizer model call. Leans on OpenGeni's durable
|
|
422
|
+
* structured memory (the notebook) so the summary stays a light working-memory
|
|
423
|
+
* bridge, never a place secret values get copied.
|
|
424
|
+
*/
|
|
425
|
+
export const SUMMARY_INSTRUCTIONS = [
|
|
426
|
+
"You are compacting the earlier part of a long-running agent conversation into a compact working-memory checkpoint so the agent can continue past the model's context limit.",
|
|
427
|
+
"Durable facts already live in the workspace notebook and document bases (via MCP). Do NOT re-derive or copy those; summarize POINTERS, not contents.",
|
|
428
|
+
"Capture, concisely and factually:",
|
|
429
|
+
"- The current objective and the key decisions made so far.",
|
|
430
|
+
"- Open blockers and anything in-progress.",
|
|
431
|
+
"- Deployed / infrastructure state that has changed (what exists now).",
|
|
432
|
+
"- Environment and credential facts BY REFERENCE ONLY — name the env var keys, secret names, or notebook/document ids; NEVER copy a secret value, token, key, or password.",
|
|
433
|
+
"- Concrete next steps.",
|
|
434
|
+
"Say explicitly that durable facts are in the notebook and that this summary lists pointers, not contents.",
|
|
435
|
+
"Output only the summary body — no preamble, no markdown headers, plain prose or terse bullets.",
|
|
436
|
+
].join("\n");
|
|
437
|
+
|
|
438
|
+
/**
|
|
439
|
+
* Render the prefix items into a transcript the summarizer reads. Keeps it
|
|
440
|
+
* bounded by truncating individual items; the model call itself is what
|
|
441
|
+
* produces the compact result.
|
|
442
|
+
*/
|
|
443
|
+
export function renderPrefixTranscript(items: readonly CompactionItem[], priorSummaryText: string): string {
|
|
444
|
+
const lines: string[] = [];
|
|
445
|
+
if (priorSummaryText.trim().length > 0) {
|
|
446
|
+
lines.push("PRIOR CHECKPOINT SUMMARY (fold this forward; it already replaced even older history):");
|
|
447
|
+
lines.push(priorSummaryText.trim());
|
|
448
|
+
lines.push("");
|
|
449
|
+
lines.push("CONVERSATION SINCE THAT CHECKPOINT:");
|
|
450
|
+
} else {
|
|
451
|
+
lines.push("CONVERSATION TO SUMMARIZE:");
|
|
452
|
+
}
|
|
453
|
+
for (const item of items) {
|
|
454
|
+
lines.push(renderItem(item));
|
|
455
|
+
}
|
|
456
|
+
return lines.join("\n");
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
function renderItem(item: CompactionItem): string {
|
|
460
|
+
const type = itemType(item) ?? "unknown";
|
|
461
|
+
if (type === "message") {
|
|
462
|
+
const role = itemRole(item) ?? "assistant";
|
|
463
|
+
return `[${role}] ${truncate(messageText(item), 4000)}`;
|
|
464
|
+
}
|
|
465
|
+
if (type === "reasoning") {
|
|
466
|
+
return "[reasoning] (omitted)";
|
|
467
|
+
}
|
|
468
|
+
if (RESULT_TYPES.has(type)) {
|
|
469
|
+
return `[tool_result] ${truncate(resultText(item), 2000)}`;
|
|
470
|
+
}
|
|
471
|
+
if (RESULT_TYPE_BY_CALL_TYPE[type]) {
|
|
472
|
+
return `[tool_call ${type}] ${truncate(callText(item), 1000)}`;
|
|
473
|
+
}
|
|
474
|
+
return `[${type}] ${truncate(safeStringify(item), 1000)}`;
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
function messageText(item: CompactionItem): string {
|
|
478
|
+
const content = (item as { content?: unknown }).content;
|
|
479
|
+
if (typeof content === "string") {
|
|
480
|
+
return content;
|
|
481
|
+
}
|
|
482
|
+
if (Array.isArray(content)) {
|
|
483
|
+
return content
|
|
484
|
+
.map((part) => {
|
|
485
|
+
if (part && typeof part === "object") {
|
|
486
|
+
const t = (part as { text?: unknown }).text;
|
|
487
|
+
return typeof t === "string" ? t : "";
|
|
488
|
+
}
|
|
489
|
+
return "";
|
|
490
|
+
})
|
|
491
|
+
.join("");
|
|
492
|
+
}
|
|
493
|
+
return "";
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
function resultText(item: CompactionItem): string {
|
|
497
|
+
const output = (item as { output?: unknown }).output;
|
|
498
|
+
if (typeof output === "string") {
|
|
499
|
+
return output;
|
|
500
|
+
}
|
|
501
|
+
return safeStringify(output ?? item);
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
function callText(item: CompactionItem): string {
|
|
505
|
+
const name = (item as { name?: unknown }).name;
|
|
506
|
+
const args = (item as { arguments?: unknown }).arguments;
|
|
507
|
+
const namePart = typeof name === "string" ? name : "";
|
|
508
|
+
const argPart = typeof args === "string" ? args : safeStringify(args ?? {});
|
|
509
|
+
return `${namePart} ${argPart}`.trim();
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
function safeStringify(value: unknown): string {
|
|
513
|
+
try {
|
|
514
|
+
return JSON.stringify(value);
|
|
515
|
+
} catch {
|
|
516
|
+
return String(value);
|
|
517
|
+
}
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
function truncate(text: string, max: number): string {
|
|
521
|
+
if (text.length <= max) {
|
|
522
|
+
return text;
|
|
523
|
+
}
|
|
524
|
+
return `${text.slice(0, max)}… (${text.length - max} more chars)`;
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
/**
|
|
528
|
+
* The summarizer model call payload: a system instruction plus the rendered
|
|
529
|
+
* prefix transcript. The caller turns this into a single model request (no
|
|
530
|
+
* tools, no streaming) and feeds the text result into `buildSummaryItem`.
|
|
531
|
+
*/
|
|
532
|
+
export function buildCompactionMessages(plan: CompactionPlan): { system: string; user: string } {
|
|
533
|
+
const priorText = compactionSummaryText(plan.priorSummaryItem);
|
|
534
|
+
return {
|
|
535
|
+
system: SUMMARY_INSTRUCTIONS,
|
|
536
|
+
user: renderPrefixTranscript(plan.prefixItems, priorText),
|
|
537
|
+
};
|
|
538
|
+
}
|