opencode-lore 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/distillation.ts +26 -0
- package/src/index.ts +65 -32
package/package.json
CHANGED
package/src/distillation.ts
CHANGED
|
@@ -117,6 +117,32 @@ export type Distillation = {
|
|
|
117
117
|
created_at: number;
|
|
118
118
|
};
|
|
119
119
|
|
|
120
|
+
/** Load all distillations for a session, oldest first. */
|
|
121
|
+
export function loadForSession(
|
|
122
|
+
projectPath: string,
|
|
123
|
+
sessionID: string,
|
|
124
|
+
): Distillation[] {
|
|
125
|
+
const pid = ensureProject(projectPath);
|
|
126
|
+
const rows = db()
|
|
127
|
+
.query(
|
|
128
|
+
"SELECT id, project_id, session_id, observations, source_ids, generation, token_count, created_at FROM distillations WHERE project_id = ? AND session_id = ? ORDER BY created_at ASC",
|
|
129
|
+
)
|
|
130
|
+
.all(pid, sessionID) as Array<{
|
|
131
|
+
id: string;
|
|
132
|
+
project_id: string;
|
|
133
|
+
session_id: string;
|
|
134
|
+
observations: string;
|
|
135
|
+
source_ids: string;
|
|
136
|
+
generation: number;
|
|
137
|
+
token_count: number;
|
|
138
|
+
created_at: number;
|
|
139
|
+
}>;
|
|
140
|
+
return rows.map((r) => ({
|
|
141
|
+
...r,
|
|
142
|
+
source_ids: JSON.parse(r.source_ids) as string[],
|
|
143
|
+
}));
|
|
144
|
+
}
|
|
145
|
+
|
|
120
146
|
function storeDistillation(input: {
|
|
121
147
|
projectPath: string;
|
|
122
148
|
sessionID: string;
|
package/src/index.ts
CHANGED
|
@@ -188,7 +188,12 @@ export const LorePlugin: Plugin = async (ctx) => {
|
|
|
188
188
|
if (
|
|
189
189
|
msg.role === "assistant" &&
|
|
190
190
|
msg.tokens &&
|
|
191
|
-
|
|
191
|
+
// Include cache.write: tokens written to cache were fully sent to the
|
|
192
|
+
// model (they were processed, just not read from a prior cache slot).
|
|
193
|
+
// Omitting cache.write causes a dramatic undercount on cold-cache turns
|
|
194
|
+
// where cache.read=0 but 150K+ tokens were written — leading the gradient
|
|
195
|
+
// to think only 3 tokens went in and passing the full session as layer 0.
|
|
196
|
+
(msg.tokens.input > 0 || msg.tokens.cache.read > 0 || msg.tokens.cache.write > 0)
|
|
192
197
|
) {
|
|
193
198
|
const pending = temporal.undistilledCount(projectPath, msg.sessionID);
|
|
194
199
|
if (pending >= config().distillation.maxSegment) {
|
|
@@ -201,6 +206,9 @@ export const LorePlugin: Plugin = async (ctx) => {
|
|
|
201
206
|
// Calibrate overhead estimate using real token counts.
|
|
202
207
|
// Also store the exact input count + message count for the proactive
|
|
203
208
|
// layer-0 decision (avoids full chars/4 re-estimation each turn).
|
|
209
|
+
// actualInput = all tokens the model processed as input, regardless of
|
|
210
|
+
// whether they were new (input), read from cache (cache.read), or newly
|
|
211
|
+
// written to cache (cache.write). All three contribute to the context window.
|
|
204
212
|
const allMsgs = await ctx.client.session.messages({
|
|
205
213
|
path: { id: msg.sessionID },
|
|
206
214
|
});
|
|
@@ -209,7 +217,8 @@ export const LorePlugin: Plugin = async (ctx) => {
|
|
|
209
217
|
.filter((m) => m.info.id !== msg.id)
|
|
210
218
|
.map((m) => ({ info: m.info, parts: m.parts }));
|
|
211
219
|
const msgEstimate = estimateMessages(withParts);
|
|
212
|
-
const actualInput =
|
|
220
|
+
const actualInput =
|
|
221
|
+
msg.tokens.input + msg.tokens.cache.read + msg.tokens.cache.write;
|
|
213
222
|
calibrate(actualInput, msgEstimate, msg.sessionID, withParts.length);
|
|
214
223
|
}
|
|
215
224
|
}
|
|
@@ -224,43 +233,44 @@ export const LorePlugin: Plugin = async (ctx) => {
|
|
|
224
233
|
// 1. Force the gradient transform to escalate on the next call (skip layer 0/1)
|
|
225
234
|
// 2. Force distillation to capture all temporal data before compaction
|
|
226
235
|
// 3. Trigger compaction so the session recovers without user intervention
|
|
227
|
-
const
|
|
228
|
-
|
|
236
|
+
const rawError = (event.properties as Record<string, unknown>).error;
|
|
237
|
+
// Diagnostic: log the full error shape so we can verify our detection matches
|
|
238
|
+
console.error("[lore] session.error received:", JSON.stringify(rawError, null, 2));
|
|
239
|
+
|
|
240
|
+
const error = rawError as
|
|
241
|
+
| { name?: string; message?: string; data?: { message?: string } }
|
|
229
242
|
| undefined;
|
|
243
|
+
// Match both shapes: error.data.message (APIError wrapper) and error.message (direct)
|
|
244
|
+
const errorMessage = error?.data?.message ?? error?.message ?? "";
|
|
230
245
|
const isPromptTooLong =
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
246
|
+
typeof errorMessage === "string" &&
|
|
247
|
+
(errorMessage.includes("prompt is too long") ||
|
|
248
|
+
errorMessage.includes("context length exceeded") ||
|
|
249
|
+
errorMessage.includes("maximum context length") ||
|
|
250
|
+
errorMessage.includes("ContextWindowExceededError") ||
|
|
251
|
+
errorMessage.includes("too many tokens"));
|
|
252
|
+
|
|
253
|
+
console.error(
|
|
254
|
+
`[lore] session.error isPromptTooLong=${isPromptTooLong} (name=${error?.name}, message=${errorMessage.substring(0, 120)})`,
|
|
255
|
+
);
|
|
236
256
|
|
|
237
257
|
if (isPromptTooLong) {
|
|
238
258
|
const sessionID = (event.properties as Record<string, unknown>).sessionID as
|
|
239
259
|
| string
|
|
240
260
|
| undefined;
|
|
241
261
|
console.error(
|
|
242
|
-
`[lore] detected 'prompt too long' error — forcing distillation +
|
|
262
|
+
`[lore] detected 'prompt too long' error — forcing distillation + layer escalation (session: ${sessionID?.substring(0, 16)})`,
|
|
243
263
|
);
|
|
244
264
|
// Force layer 2 on next transform — layers 0 and 1 were already too large.
|
|
265
|
+
// The gradient at layers 2-4 will compress the context enough for the next turn.
|
|
266
|
+
// Do NOT call session.summarize() here — it sends all messages to the model,
|
|
267
|
+
// which would overflow again and create a stuck compaction loop.
|
|
245
268
|
setForceMinLayer(2);
|
|
246
269
|
|
|
247
270
|
if (sessionID) {
|
|
248
|
-
// Force distillation to capture all undistilled messages
|
|
249
|
-
//
|
|
271
|
+
// Force distillation to capture all undistilled messages into the temporal
|
|
272
|
+
// store so they're preserved even if the session is later compacted manually.
|
|
250
273
|
await backgroundDistill(sessionID, true);
|
|
251
|
-
|
|
252
|
-
// Trigger compaction automatically — the compacting hook will inject
|
|
253
|
-
// Lore's custom distillation-aware prompt.
|
|
254
|
-
try {
|
|
255
|
-
const sessions = await ctx.client.session.list();
|
|
256
|
-
const session = sessions.data?.find((s) => s.id.startsWith(sessionID));
|
|
257
|
-
if (session) {
|
|
258
|
-
// providerID/modelID are optional — omit to use the session's current model
|
|
259
|
-
await ctx.client.session.summarize({ path: { id: session.id } });
|
|
260
|
-
}
|
|
261
|
-
} catch (e) {
|
|
262
|
-
console.error("[lore] auto-compaction failed:", e);
|
|
263
|
-
}
|
|
264
274
|
}
|
|
265
275
|
}
|
|
266
276
|
}
|
|
@@ -401,17 +411,25 @@ export const LorePlugin: Plugin = async (ctx) => {
|
|
|
401
411
|
}
|
|
402
412
|
},
|
|
403
413
|
|
|
404
|
-
// Replace compaction prompt with distillation-aware prompt when
|
|
405
|
-
//
|
|
406
|
-
//
|
|
414
|
+
// Replace compaction prompt with distillation-aware prompt when /compact is used.
|
|
415
|
+
// Strategy: run chunked distillation first so all messages are captured in segments
|
|
416
|
+
// that each fit within the model's context, then inject the pre-computed summaries
|
|
417
|
+
// as context so the model consolidates them rather than re-reading all raw messages.
|
|
418
|
+
// This prevents the overflow→compaction→overflow stuck loop.
|
|
407
419
|
"experimental.session.compacting": async (input, output) => {
|
|
408
|
-
//
|
|
409
|
-
//
|
|
410
|
-
//
|
|
420
|
+
// Chunked distillation: split all undistilled messages into segments that each
|
|
421
|
+
// fit within the model's context window and distill them independently.
|
|
422
|
+
// This is safe even when the full session exceeds the context limit.
|
|
411
423
|
if (input.sessionID && activeSessions.has(input.sessionID)) {
|
|
412
424
|
await backgroundDistill(input.sessionID, true);
|
|
413
425
|
}
|
|
414
426
|
|
|
427
|
+
// Load all distillation summaries produced for this session (oldest first).
|
|
428
|
+
// These are the chunked observations — the model will consolidate them.
|
|
429
|
+
const distillations = input.sessionID
|
|
430
|
+
? distillation.loadForSession(projectPath, input.sessionID)
|
|
431
|
+
: [];
|
|
432
|
+
|
|
415
433
|
const entries = ltm.forProject(projectPath, config().crossProject);
|
|
416
434
|
const knowledge = entries.length
|
|
417
435
|
? formatKnowledge(
|
|
@@ -423,9 +441,24 @@ export const LorePlugin: Plugin = async (ctx) => {
|
|
|
423
441
|
)
|
|
424
442
|
: "";
|
|
425
443
|
|
|
444
|
+
// Inject each distillation chunk as a context string so the model has access
|
|
445
|
+
// to pre-computed summaries. Even if the raw messages overflow context, these
|
|
446
|
+
// summaries are compact and will fit.
|
|
447
|
+
if (distillations.length > 0) {
|
|
448
|
+
output.context.push(
|
|
449
|
+
`## Lore Pre-computed Session Summaries\n\nThe following ${distillations.length} summary chunk(s) were pre-computed from the conversation history. Use these as the authoritative source — do not re-summarize the raw messages above if they conflict.\n\n` +
|
|
450
|
+
distillations
|
|
451
|
+
.map(
|
|
452
|
+
(d, i) =>
|
|
453
|
+
`### Chunk ${i + 1}${d.generation > 0 ? " (consolidated)" : ""}\n${d.observations}`,
|
|
454
|
+
)
|
|
455
|
+
.join("\n\n"),
|
|
456
|
+
);
|
|
457
|
+
}
|
|
458
|
+
|
|
426
459
|
output.prompt = `You are creating a distilled memory summary for an AI coding agent. This summary will be the ONLY context available in the next part of the conversation.
|
|
427
460
|
|
|
428
|
-
Structure your response as follows:
|
|
461
|
+
${distillations.length > 0 ? "Lore has pre-computed chunked summaries of the session history (injected above as context). Consolidate those summaries into a single coherent narrative. Do NOT re-read or re-summarize the raw conversation messages — trust the pre-computed summaries.\n\n" : ""}Structure your response as follows:
|
|
429
462
|
|
|
430
463
|
## Session History
|
|
431
464
|
|