opencode-lore 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +123 -0
- package/package.json +47 -0
- package/src/config.ts +54 -0
- package/src/curator.ts +154 -0
- package/src/db.ts +198 -0
- package/src/distillation.ts +426 -0
- package/src/gradient.ts +541 -0
- package/src/index.ts +324 -0
- package/src/ltm.ts +186 -0
- package/src/markdown.ts +81 -0
- package/src/prompt.ts +294 -0
- package/src/reflect.ts +153 -0
- package/src/temporal.ts +230 -0
package/src/gradient.ts
ADDED
|
@@ -0,0 +1,541 @@
|
|
|
1
|
+
import type { Message, Part } from "@opencode-ai/sdk";
|
|
2
|
+
import { db, ensureProject } from "./db";
|
|
3
|
+
import { config } from "./config";
|
|
4
|
+
import { formatDistillations } from "./prompt";
|
|
5
|
+
import { normalize } from "./markdown";
|
|
6
|
+
|
|
7
|
+
type MessageWithParts = { info: Message; parts: Part[] };
|
|
8
|
+
|
|
9
|
+
// Rough token estimate: ~4 chars per token
|
|
10
|
+
function estimate(text: string): number {
|
|
11
|
+
return Math.ceil(text.length / 4);
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
function estimateParts(parts: Part[]): number {
|
|
15
|
+
let total = 0;
|
|
16
|
+
for (const part of parts) {
|
|
17
|
+
if (part.type === "text") total += estimate(part.text);
|
|
18
|
+
else if (part.type === "reasoning" && part.text)
|
|
19
|
+
total += estimate(part.text);
|
|
20
|
+
else if (part.type === "tool" && part.state.status === "completed")
|
|
21
|
+
total += estimate(part.state.output) + estimate(part.tool) + 50;
|
|
22
|
+
else total += 20; // metadata overhead for other part types
|
|
23
|
+
}
|
|
24
|
+
return total;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
function estimateMessage(msg: MessageWithParts): number {
|
|
28
|
+
return estimateParts(msg.parts) + 20; // role/metadata overhead
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
// Cached model context limit — set by system transform hook, used by message transform
|
|
32
|
+
let contextLimit = 200_000; // sensible default
|
|
33
|
+
let outputReserved = 32_000;
|
|
34
|
+
|
|
35
|
+
// Conservative overhead reserve for first-turn (before calibration):
|
|
36
|
+
// accounts for provider system prompt + AGENTS.md + tool definitions + env info
|
|
37
|
+
const FIRST_TURN_OVERHEAD = 15_000;
|
|
38
|
+
|
|
39
|
+
// Calibrated overhead: actual tokens used minus our message estimate.
|
|
40
|
+
// Null = not yet calibrated (first turn). Updated after every assistant response.
|
|
41
|
+
let calibratedOverhead: number | null = null;
|
|
42
|
+
|
|
43
|
+
export function setModelLimits(limits: { context: number; output: number }) {
|
|
44
|
+
contextLimit = limits.context || 200_000;
|
|
45
|
+
outputReserved = Math.min(limits.output || 32_000, 32_000);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// Called after each assistant message completes with real token usage data.
|
|
49
|
+
// actualInput = tokens.input + tokens.cache.read (all tokens that went into the model)
|
|
50
|
+
// messageEstimate = our chars/4 estimate of the messages we sent
|
|
51
|
+
export function calibrate(actualInput: number, messageEstimate: number) {
|
|
52
|
+
const overhead = Math.max(0, actualInput - messageEstimate);
|
|
53
|
+
// Smooth with EMA (alpha=0.3) once calibrated, or set directly on first call
|
|
54
|
+
calibratedOverhead =
|
|
55
|
+
calibratedOverhead === null
|
|
56
|
+
? overhead
|
|
57
|
+
: Math.round(calibratedOverhead * 0.7 + overhead * 0.3);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
export function getOverhead(): number {
|
|
61
|
+
return calibratedOverhead ?? FIRST_TURN_OVERHEAD;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// For testing only — reset calibration state
|
|
65
|
+
export function resetCalibration() {
|
|
66
|
+
calibratedOverhead = null;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
type Distillation = {
|
|
70
|
+
id: string;
|
|
71
|
+
observations: string;
|
|
72
|
+
generation: number;
|
|
73
|
+
token_count: number;
|
|
74
|
+
created_at: number;
|
|
75
|
+
session_id: string;
|
|
76
|
+
};
|
|
77
|
+
|
|
78
|
+
function loadDistillations(
|
|
79
|
+
projectPath: string,
|
|
80
|
+
sessionID?: string,
|
|
81
|
+
): Distillation[] {
|
|
82
|
+
const pid = ensureProject(projectPath);
|
|
83
|
+
const query = sessionID
|
|
84
|
+
? "SELECT id, observations, generation, token_count, created_at, session_id FROM distillations WHERE project_id = ? AND session_id = ? ORDER BY created_at ASC"
|
|
85
|
+
: "SELECT id, observations, generation, token_count, created_at, session_id FROM distillations WHERE project_id = ? ORDER BY created_at ASC";
|
|
86
|
+
const params = sessionID ? [pid, sessionID] : [pid];
|
|
87
|
+
return db()
|
|
88
|
+
.query(query)
|
|
89
|
+
.all(...params) as Distillation[];
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// Strip all <system-reminder>...</system-reminder> blocks from message text.
|
|
93
|
+
// For the user-message wrapper pattern, extracts the actual user text.
|
|
94
|
+
// For all other reminders (build-switch, plan reminders, etc.), drops them entirely.
|
|
95
|
+
// These tags are added by OpenCode in-memory or persisted as synthetic parts —
|
|
96
|
+
// leaving them in the raw window causes the model to echo the format.
|
|
97
|
+
function stripSystemReminders(text: string): string {
|
|
98
|
+
return text
|
|
99
|
+
.replace(/<system-reminder>[\s\S]*?<\/system-reminder>\n?/g, (match) => {
|
|
100
|
+
const inner = match.match(
|
|
101
|
+
/The user sent the following message:\n([\s\S]*?)\n\nPlease address/,
|
|
102
|
+
);
|
|
103
|
+
return inner ? inner[1].trim() + "\n" : "";
|
|
104
|
+
})
|
|
105
|
+
.replace(/\n{3,}/g, "\n\n")
|
|
106
|
+
.trim();
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
function cleanParts(parts: Part[]): Part[] {
|
|
110
|
+
const cleaned = parts.map((part) => {
|
|
111
|
+
if (part.type !== "text") return part;
|
|
112
|
+
const text = stripSystemReminders(part.text);
|
|
113
|
+
if (text === part.text) return part;
|
|
114
|
+
return { ...part, text } as Part;
|
|
115
|
+
});
|
|
116
|
+
// Filter out text parts that became empty after stripping
|
|
117
|
+
const filtered = cleaned.filter(
|
|
118
|
+
(part) =>
|
|
119
|
+
part.type !== "text" ||
|
|
120
|
+
(part as Extract<Part, { type: "text" }>).text.trim().length > 0,
|
|
121
|
+
);
|
|
122
|
+
// If all parts were stripped (e.g. a user message that was purely build-switch synthetic
|
|
123
|
+
// content), keep a minimal placeholder so the message survives toModelMessages.
|
|
124
|
+
// Without this, the message gets dropped and the conversation ends with an assistant message,
|
|
125
|
+
// causing Anthropic's "does not support assistant message prefill" error.
|
|
126
|
+
if (filtered.length === 0 && parts.length > 0) {
|
|
127
|
+
const first = parts[0];
|
|
128
|
+
if (first.type === "text") {
|
|
129
|
+
return [{ ...first, text: "..." } as Part];
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
return filtered.length > 0 ? filtered : parts;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
function stripToolOutputs(parts: Part[]): Part[] {
|
|
136
|
+
return parts.map((part) => {
|
|
137
|
+
if (part.type !== "tool") return part;
|
|
138
|
+
if (part.state.status !== "completed") return part;
|
|
139
|
+
return {
|
|
140
|
+
...part,
|
|
141
|
+
state: {
|
|
142
|
+
...part.state,
|
|
143
|
+
output: "[output omitted — use recall for details]",
|
|
144
|
+
},
|
|
145
|
+
} as Part;
|
|
146
|
+
});
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
function stripToTextOnly(parts: Part[]): Part[] {
|
|
150
|
+
const stripped = parts
|
|
151
|
+
.filter((p) => p.type === "text")
|
|
152
|
+
.map((p) => ({
|
|
153
|
+
...p,
|
|
154
|
+
text: normalize(stripSystemReminders(p.text)),
|
|
155
|
+
}))
|
|
156
|
+
.filter((p) => p.text.trim().length > 0) as Part[];
|
|
157
|
+
// Guard against empty result — keep a placeholder so the message survives
|
|
158
|
+
// toModelMessages and the conversation doesn't end with an assistant message.
|
|
159
|
+
if (stripped.length === 0 && parts.length > 0) {
|
|
160
|
+
const first = parts.find((p) => p.type === "text");
|
|
161
|
+
if (first) return [{ ...first, text: "..." } as Part];
|
|
162
|
+
}
|
|
163
|
+
return stripped;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
// --- Phase 2: Temporal anchoring at read time ---
|
|
167
|
+
|
|
168
|
+
function formatRelativeTime(date: Date, now: Date): string {
|
|
169
|
+
const diffMs = now.getTime() - date.getTime();
|
|
170
|
+
const diffDays = Math.floor(diffMs / (1000 * 60 * 60 * 24));
|
|
171
|
+
if (diffDays === 0) return "today";
|
|
172
|
+
if (diffDays === 1) return "yesterday";
|
|
173
|
+
if (diffDays < 7) return `${diffDays} days ago`;
|
|
174
|
+
if (diffDays < 14) return "1 week ago";
|
|
175
|
+
if (diffDays < 30) return `${Math.floor(diffDays / 7)} weeks ago`;
|
|
176
|
+
if (diffDays < 60) return "1 month ago";
|
|
177
|
+
if (diffDays < 365) return `${Math.floor(diffDays / 30)} months ago`;
|
|
178
|
+
return `${Math.floor(diffDays / 365)} year${Math.floor(diffDays / 365) > 1 ? "s" : ""} ago`;
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
function parseDateFromContent(s: string): Date | null {
|
|
182
|
+
// "Month Day, Year" e.g. "January 15, 2026"
|
|
183
|
+
const simple = s.match(/([A-Z][a-z]+)\s+(\d{1,2}),?\s+(\d{4})/);
|
|
184
|
+
if (simple) {
|
|
185
|
+
const d = new Date(`${simple[1]} ${simple[2]}, ${simple[3]}`);
|
|
186
|
+
if (!isNaN(d.getTime())) return d;
|
|
187
|
+
}
|
|
188
|
+
// "Month D-D, Year" range — use start
|
|
189
|
+
const range = s.match(/([A-Z][a-z]+)\s+(\d{1,2})-\d{1,2},?\s+(\d{4})/);
|
|
190
|
+
if (range) {
|
|
191
|
+
const d = new Date(`${range[1]} ${range[2]}, ${range[3]}`);
|
|
192
|
+
if (!isNaN(d.getTime())) return d;
|
|
193
|
+
}
|
|
194
|
+
// "late/early/mid Month Year"
|
|
195
|
+
const vague = s.match(/(late|early|mid)[- ]?([A-Z][a-z]+)\s+(\d{4})/i);
|
|
196
|
+
if (vague) {
|
|
197
|
+
const day =
|
|
198
|
+
vague[1].toLowerCase() === "early"
|
|
199
|
+
? 7
|
|
200
|
+
: vague[1].toLowerCase() === "late"
|
|
201
|
+
? 23
|
|
202
|
+
: 15;
|
|
203
|
+
const d = new Date(`${vague[2]} ${day}, ${vague[3]}`);
|
|
204
|
+
if (!isNaN(d.getTime())) return d;
|
|
205
|
+
}
|
|
206
|
+
return null;
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
// Expand "(meaning DATE)" and "(estimated DATE)" annotations with a relative offset.
|
|
210
|
+
// Past future-intent lines get "(likely already happened)" appended.
|
|
211
|
+
function expandInlineEstimatedDates(text: string, now: Date): string {
|
|
212
|
+
return text.replace(
|
|
213
|
+
/\(((?:meaning|estimated)\s+)([^)]+\d{4})\)/gi,
|
|
214
|
+
(match, prefix: string, dateContent: string) => {
|
|
215
|
+
const d = parseDateFromContent(dateContent);
|
|
216
|
+
if (!d) return match;
|
|
217
|
+
const rel = formatRelativeTime(d, now);
|
|
218
|
+
// Detect future-intent by looking backwards on the same line
|
|
219
|
+
const matchIdx = text.indexOf(match);
|
|
220
|
+
const lineStart = text.lastIndexOf("\n", matchIdx) + 1;
|
|
221
|
+
const linePrefix = text.slice(lineStart, matchIdx);
|
|
222
|
+
const isFutureIntent =
|
|
223
|
+
/\b(?:will|plans?\s+to|planning\s+to|going\s+to|intends?\s+to)\b/i.test(
|
|
224
|
+
linePrefix,
|
|
225
|
+
);
|
|
226
|
+
if (d < now && isFutureIntent)
|
|
227
|
+
return `(${prefix}${dateContent} — ${rel}, likely already happened)`;
|
|
228
|
+
return `(${prefix}${dateContent} — ${rel})`;
|
|
229
|
+
},
|
|
230
|
+
);
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
// Add relative time annotations to "Date: Month D, Year" section headers
|
|
234
|
+
// and gap markers between non-consecutive dates.
|
|
235
|
+
function addRelativeTimeToObservations(text: string, now: Date): string {
|
|
236
|
+
// First pass: expand inline "(meaning DATE)" annotations
|
|
237
|
+
const withInline = expandInlineEstimatedDates(text, now);
|
|
238
|
+
|
|
239
|
+
// Second pass: annotate date headers and add gap markers
|
|
240
|
+
const dateHeaderRe = /^(Date:\s*)([A-Z][a-z]+ \d{1,2}, \d{4})$/gm;
|
|
241
|
+
const found: Array<{
|
|
242
|
+
index: number;
|
|
243
|
+
date: Date;
|
|
244
|
+
full: string;
|
|
245
|
+
prefix: string;
|
|
246
|
+
ds: string;
|
|
247
|
+
}> = [];
|
|
248
|
+
let m: RegExpExecArray | null;
|
|
249
|
+
while ((m = dateHeaderRe.exec(withInline)) !== null) {
|
|
250
|
+
const d = new Date(m[2]);
|
|
251
|
+
if (!isNaN(d.getTime()))
|
|
252
|
+
found.push({
|
|
253
|
+
index: m.index,
|
|
254
|
+
date: d,
|
|
255
|
+
full: m[0],
|
|
256
|
+
prefix: m[1],
|
|
257
|
+
ds: m[2],
|
|
258
|
+
});
|
|
259
|
+
}
|
|
260
|
+
if (!found.length) return withInline;
|
|
261
|
+
|
|
262
|
+
let result = "";
|
|
263
|
+
let last = 0;
|
|
264
|
+
for (let i = 0; i < found.length; i++) {
|
|
265
|
+
const curr = found[i];
|
|
266
|
+
const prev = found[i - 1];
|
|
267
|
+
result += withInline.slice(last, curr.index);
|
|
268
|
+
// Gap marker between non-consecutive dates
|
|
269
|
+
if (prev) {
|
|
270
|
+
const gapDays = Math.floor(
|
|
271
|
+
(curr.date.getTime() - prev.date.getTime()) / 86400000,
|
|
272
|
+
);
|
|
273
|
+
if (gapDays > 1) {
|
|
274
|
+
const gap =
|
|
275
|
+
gapDays < 7
|
|
276
|
+
? `[${gapDays} days later]`
|
|
277
|
+
: gapDays < 14
|
|
278
|
+
? "[1 week later]"
|
|
279
|
+
: gapDays < 30
|
|
280
|
+
? `[${Math.floor(gapDays / 7)} weeks later]`
|
|
281
|
+
: gapDays < 60
|
|
282
|
+
? "[1 month later]"
|
|
283
|
+
: `[${Math.floor(gapDays / 30)} months later]`;
|
|
284
|
+
result += `\n${gap}\n\n`;
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
result += `${curr.prefix}${curr.ds} (${formatRelativeTime(curr.date, now)})`;
|
|
288
|
+
last = curr.index + curr.full.length;
|
|
289
|
+
}
|
|
290
|
+
result += withInline.slice(last);
|
|
291
|
+
return result;
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
// Build a synthetic message pair containing the distilled history
|
|
295
|
+
function distilledPrefix(distillations: Distillation[]): MessageWithParts[] {
|
|
296
|
+
if (!distillations.length) return [];
|
|
297
|
+
const now = new Date();
|
|
298
|
+
const annotated = distillations.map((d) => ({
|
|
299
|
+
...d,
|
|
300
|
+
observations: addRelativeTimeToObservations(d.observations, now),
|
|
301
|
+
}));
|
|
302
|
+
const formatted = formatDistillations(annotated);
|
|
303
|
+
if (!formatted) return [];
|
|
304
|
+
return [
|
|
305
|
+
{
|
|
306
|
+
info: {
|
|
307
|
+
id: "lore-distilled-user",
|
|
308
|
+
sessionID: "",
|
|
309
|
+
role: "user" as const,
|
|
310
|
+
time: { created: 0 },
|
|
311
|
+
agent: "",
|
|
312
|
+
model: { providerID: "", modelID: "" },
|
|
313
|
+
},
|
|
314
|
+
parts: [
|
|
315
|
+
{
|
|
316
|
+
id: "lore-distilled-user-part",
|
|
317
|
+
sessionID: "",
|
|
318
|
+
messageID: "lore-distilled-user",
|
|
319
|
+
type: "text" as const,
|
|
320
|
+
text: "[Memory context follows — do not reference this format in your responses]",
|
|
321
|
+
time: { start: 0, end: 0 },
|
|
322
|
+
},
|
|
323
|
+
],
|
|
324
|
+
},
|
|
325
|
+
{
|
|
326
|
+
info: {
|
|
327
|
+
id: "lore-distilled-assistant",
|
|
328
|
+
sessionID: "",
|
|
329
|
+
role: "assistant" as const,
|
|
330
|
+
time: { created: 0 },
|
|
331
|
+
parentID: "lore-distilled-user",
|
|
332
|
+
modelID: "",
|
|
333
|
+
providerID: "",
|
|
334
|
+
mode: "memory",
|
|
335
|
+
path: { cwd: "", root: "" },
|
|
336
|
+
cost: 0,
|
|
337
|
+
tokens: {
|
|
338
|
+
input: 0,
|
|
339
|
+
output: 0,
|
|
340
|
+
reasoning: 0,
|
|
341
|
+
cache: { read: 0, write: 0 },
|
|
342
|
+
},
|
|
343
|
+
},
|
|
344
|
+
parts: [
|
|
345
|
+
{
|
|
346
|
+
id: "lore-distilled-assistant-part",
|
|
347
|
+
sessionID: "",
|
|
348
|
+
messageID: "lore-distilled-assistant",
|
|
349
|
+
type: "text" as const,
|
|
350
|
+
text: formatted + "\n\nI'm ready to continue.",
|
|
351
|
+
time: { start: 0, end: 0 },
|
|
352
|
+
},
|
|
353
|
+
],
|
|
354
|
+
},
|
|
355
|
+
];
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
export type SafetyLayer = 1 | 2 | 3 | 4;
|
|
359
|
+
|
|
360
|
+
export type TransformResult = {
|
|
361
|
+
messages: MessageWithParts[];
|
|
362
|
+
layer: SafetyLayer;
|
|
363
|
+
distilledTokens: number;
|
|
364
|
+
rawTokens: number;
|
|
365
|
+
totalTokens: number;
|
|
366
|
+
// Budget context (for display in context inspector)
|
|
367
|
+
usable: number;
|
|
368
|
+
distilledBudget: number;
|
|
369
|
+
rawBudget: number;
|
|
370
|
+
};
|
|
371
|
+
|
|
372
|
+
// Signal that we need urgent distillation
|
|
373
|
+
let urgentDistillation = false;
|
|
374
|
+
export function needsUrgentDistillation(): boolean {
|
|
375
|
+
const v = urgentDistillation;
|
|
376
|
+
urgentDistillation = false;
|
|
377
|
+
return v;
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
export function transform(input: {
|
|
381
|
+
messages: MessageWithParts[];
|
|
382
|
+
projectPath: string;
|
|
383
|
+
sessionID?: string;
|
|
384
|
+
}): TransformResult {
|
|
385
|
+
const cfg = config();
|
|
386
|
+
const overhead = getOverhead();
|
|
387
|
+
// Usable = full context minus output reservation minus fixed overhead (system + tools)
|
|
388
|
+
const usable = contextLimit - outputReserved - overhead;
|
|
389
|
+
const distilledBudget = Math.floor(usable * cfg.budget.distilled);
|
|
390
|
+
const rawBudget = Math.floor(usable * cfg.budget.raw);
|
|
391
|
+
|
|
392
|
+
// Find the session ID from messages
|
|
393
|
+
const sid = input.sessionID ?? input.messages[0]?.info.sessionID;
|
|
394
|
+
const distillations = sid ? loadDistillations(input.projectPath, sid) : [];
|
|
395
|
+
const prefix = distilledPrefix(distillations);
|
|
396
|
+
const prefixTokens = prefix.reduce((sum, m) => sum + estimateMessage(m), 0);
|
|
397
|
+
|
|
398
|
+
// Layer 1: Normal budget allocation
|
|
399
|
+
const layer1 = tryFit({
|
|
400
|
+
messages: input.messages,
|
|
401
|
+
prefix,
|
|
402
|
+
prefixTokens,
|
|
403
|
+
distilledBudget,
|
|
404
|
+
rawBudget,
|
|
405
|
+
strip: "none",
|
|
406
|
+
});
|
|
407
|
+
if (layer1) return { ...layer1, layer: 1, usable, distilledBudget, rawBudget };
|
|
408
|
+
|
|
409
|
+
// Layer 2: Strip tool outputs from older messages, keep last 2 turns
|
|
410
|
+
const layer2 = tryFit({
|
|
411
|
+
messages: input.messages,
|
|
412
|
+
prefix,
|
|
413
|
+
prefixTokens,
|
|
414
|
+
distilledBudget,
|
|
415
|
+
rawBudget: Math.floor(usable * 0.5), // give raw more room
|
|
416
|
+
strip: "old-tools",
|
|
417
|
+
protectedTurns: 2,
|
|
418
|
+
});
|
|
419
|
+
if (layer2) {
|
|
420
|
+
urgentDistillation = true;
|
|
421
|
+
return { ...layer2, layer: 2, usable, distilledBudget, rawBudget };
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
// Layer 3: Strip ALL tool outputs, drop oldest distillations
|
|
425
|
+
const trimmedDistillations = distillations.slice(-5);
|
|
426
|
+
const trimmedPrefix = distilledPrefix(trimmedDistillations);
|
|
427
|
+
const trimmedPrefixTokens = trimmedPrefix.reduce(
|
|
428
|
+
(sum, m) => sum + estimateMessage(m),
|
|
429
|
+
0,
|
|
430
|
+
);
|
|
431
|
+
const layer3 = tryFit({
|
|
432
|
+
messages: input.messages,
|
|
433
|
+
prefix: trimmedPrefix,
|
|
434
|
+
prefixTokens: trimmedPrefixTokens,
|
|
435
|
+
distilledBudget: Math.floor(usable * 0.15),
|
|
436
|
+
rawBudget: Math.floor(usable * 0.55),
|
|
437
|
+
strip: "all-tools",
|
|
438
|
+
});
|
|
439
|
+
if (layer3) {
|
|
440
|
+
urgentDistillation = true;
|
|
441
|
+
return { ...layer3, layer: 3, usable, distilledBudget, rawBudget };
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
// Layer 4: Emergency — last 2 distillations, last 3 raw messages with tool parts intact.
|
|
445
|
+
// We do NOT strip tool parts here: doing so would cause an infinite tool-call loop because
|
|
446
|
+
// the model would lose sight of its own in-progress tool calls and re-invoke them endlessly.
|
|
447
|
+
// Instead, we aggressively drop old messages and rely on the `recall` tool (which the model
|
|
448
|
+
// is always instructed to use) to retrieve any older details it needs.
|
|
449
|
+
urgentDistillation = true;
|
|
450
|
+
const nuclearDistillations = distillations.slice(-2);
|
|
451
|
+
const nuclearPrefix = distilledPrefix(nuclearDistillations);
|
|
452
|
+
const nuclearPrefixTokens = nuclearPrefix.reduce(
|
|
453
|
+
(sum, m) => sum + estimateMessage(m),
|
|
454
|
+
0,
|
|
455
|
+
);
|
|
456
|
+
const nuclearRaw = input.messages.slice(-3).map((m) => ({
|
|
457
|
+
info: m.info,
|
|
458
|
+
parts: cleanParts(m.parts),
|
|
459
|
+
}));
|
|
460
|
+
const nuclearRawTokens = nuclearRaw.reduce(
|
|
461
|
+
(sum, m) => sum + estimateMessage(m),
|
|
462
|
+
0,
|
|
463
|
+
);
|
|
464
|
+
|
|
465
|
+
return {
|
|
466
|
+
messages: [...nuclearPrefix, ...nuclearRaw],
|
|
467
|
+
layer: 4,
|
|
468
|
+
distilledTokens: nuclearPrefixTokens,
|
|
469
|
+
rawTokens: nuclearRawTokens,
|
|
470
|
+
totalTokens: nuclearPrefixTokens + nuclearRawTokens,
|
|
471
|
+
usable,
|
|
472
|
+
distilledBudget,
|
|
473
|
+
rawBudget,
|
|
474
|
+
};
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
// Compute our message-only estimate for a set of messages (for calibration use)
|
|
478
|
+
export function estimateMessages(messages: MessageWithParts[]): number {
|
|
479
|
+
return messages.reduce((sum, m) => sum + estimateMessage(m), 0);
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
function tryFit(input: {
|
|
483
|
+
messages: MessageWithParts[];
|
|
484
|
+
prefix: MessageWithParts[];
|
|
485
|
+
prefixTokens: number;
|
|
486
|
+
distilledBudget: number;
|
|
487
|
+
rawBudget: number;
|
|
488
|
+
strip: "none" | "old-tools" | "all-tools";
|
|
489
|
+
protectedTurns?: number;
|
|
490
|
+
}): Omit<TransformResult, "layer" | "usable" | "distilledBudget" | "rawBudget"> | null {
|
|
491
|
+
// If distilled prefix exceeds its budget, fail this layer
|
|
492
|
+
if (input.prefixTokens > input.distilledBudget && input.prefix.length > 0)
|
|
493
|
+
return null;
|
|
494
|
+
|
|
495
|
+
// Walk backwards through messages, accumulating tokens within raw budget
|
|
496
|
+
let rawTokens = 0;
|
|
497
|
+
let cutoff = input.messages.length;
|
|
498
|
+
const protectedTurns = input.protectedTurns ?? 0;
|
|
499
|
+
let turns = 0;
|
|
500
|
+
|
|
501
|
+
for (let i = input.messages.length - 1; i >= 0; i--) {
|
|
502
|
+
const msg = input.messages[i];
|
|
503
|
+
if (msg.info.role === "user") turns++;
|
|
504
|
+
const tokens = estimateMessage(msg);
|
|
505
|
+
if (rawTokens + tokens > input.rawBudget) {
|
|
506
|
+
cutoff = i + 1;
|
|
507
|
+
break;
|
|
508
|
+
}
|
|
509
|
+
rawTokens += tokens;
|
|
510
|
+
if (i === 0) cutoff = 0;
|
|
511
|
+
}
|
|
512
|
+
|
|
513
|
+
const raw = input.messages.slice(cutoff);
|
|
514
|
+
// Must keep at least 1 raw message — otherwise this layer fails
|
|
515
|
+
if (!raw.length) return null;
|
|
516
|
+
|
|
517
|
+
// Apply system-reminder stripping + optional tool output stripping
|
|
518
|
+
const processed = raw.map((msg, idx) => {
|
|
519
|
+
const fromEnd = raw.length - idx;
|
|
520
|
+
const isProtected =
|
|
521
|
+
input.strip === "none" ||
|
|
522
|
+
(input.strip === "old-tools" && fromEnd <= protectedTurns * 2);
|
|
523
|
+
const parts = isProtected
|
|
524
|
+
? cleanParts(msg.parts)
|
|
525
|
+
: cleanParts(
|
|
526
|
+
input.strip === "all-tools"
|
|
527
|
+
? stripToolOutputs(msg.parts)
|
|
528
|
+
: stripToolOutputs(msg.parts),
|
|
529
|
+
);
|
|
530
|
+
const changed = parts !== msg.parts;
|
|
531
|
+
return changed ? { info: msg.info, parts } : msg;
|
|
532
|
+
});
|
|
533
|
+
|
|
534
|
+
const total = input.prefixTokens + rawTokens;
|
|
535
|
+
return {
|
|
536
|
+
messages: [...input.prefix, ...processed],
|
|
537
|
+
distilledTokens: input.prefixTokens,
|
|
538
|
+
rawTokens,
|
|
539
|
+
totalTokens: total,
|
|
540
|
+
};
|
|
541
|
+
}
|