opencode-lore 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/distillation.ts +26 -0
- package/src/gradient.ts +25 -6
- package/src/index.ts +68 -34
package/package.json
CHANGED
package/src/distillation.ts
CHANGED
|
@@ -117,6 +117,32 @@ export type Distillation = {
|
|
|
117
117
|
created_at: number;
|
|
118
118
|
};
|
|
119
119
|
|
|
120
|
+
/** Load all distillations for a session, oldest first. */
|
|
121
|
+
export function loadForSession(
|
|
122
|
+
projectPath: string,
|
|
123
|
+
sessionID: string,
|
|
124
|
+
): Distillation[] {
|
|
125
|
+
const pid = ensureProject(projectPath);
|
|
126
|
+
const rows = db()
|
|
127
|
+
.query(
|
|
128
|
+
"SELECT id, project_id, session_id, observations, source_ids, generation, token_count, created_at FROM distillations WHERE project_id = ? AND session_id = ? ORDER BY created_at ASC",
|
|
129
|
+
)
|
|
130
|
+
.all(pid, sessionID) as Array<{
|
|
131
|
+
id: string;
|
|
132
|
+
project_id: string;
|
|
133
|
+
session_id: string;
|
|
134
|
+
observations: string;
|
|
135
|
+
source_ids: string;
|
|
136
|
+
generation: number;
|
|
137
|
+
token_count: number;
|
|
138
|
+
created_at: number;
|
|
139
|
+
}>;
|
|
140
|
+
return rows.map((r) => ({
|
|
141
|
+
...r,
|
|
142
|
+
source_ids: JSON.parse(r.source_ids) as string[],
|
|
143
|
+
}));
|
|
144
|
+
}
|
|
145
|
+
|
|
120
146
|
function storeDistillation(input: {
|
|
121
147
|
projectPath: string;
|
|
122
148
|
sessionID: string;
|
package/src/gradient.ts
CHANGED
|
@@ -722,8 +722,27 @@ export function transform(input: {
|
|
|
722
722
|
const maxInput = contextLimit - outputReserved;
|
|
723
723
|
const sid = input.sessionID ?? input.messages[0]?.info.sessionID;
|
|
724
724
|
|
|
725
|
+
// True when we have real API token data from a previous turn in this session.
|
|
726
|
+
// When false (first turn / session change), chars/4 estimates can undercount by
|
|
727
|
+
// up to 1.8x — so tryFit output must be validated with a safety multiplier before
|
|
728
|
+
// being used, to prevent sending an apparently-fitting window that actually overflows.
|
|
729
|
+
const calibrated = lastKnownInput > 0 && sid === lastKnownSessionID;
|
|
730
|
+
|
|
731
|
+
// On uncalibrated turns, apply this multiplier to tryFit's estimated total to
|
|
732
|
+
// approximate the real token count. 1.5 is conservative but not so aggressive
|
|
733
|
+
// that it forces layer 4 on modestly-sized sessions.
|
|
734
|
+
const UNCALIBRATED_SAFETY = 1.5;
|
|
735
|
+
|
|
736
|
+
// Returns true if the tryFit result is safe to use: either we have calibrated
|
|
737
|
+
// data (exact) or the estimated total * safety factor fits within maxInput.
|
|
738
|
+
function fitsWithSafetyMargin(result: { totalTokens: number } | null): boolean {
|
|
739
|
+
if (!result) return false;
|
|
740
|
+
if (calibrated) return true;
|
|
741
|
+
return result.totalTokens * UNCALIBRATED_SAFETY <= maxInput;
|
|
742
|
+
}
|
|
743
|
+
|
|
725
744
|
let expectedInput: number;
|
|
726
|
-
if (
|
|
745
|
+
if (calibrated) {
|
|
727
746
|
// Exact approach: prior API count + estimate of only the new messages.
|
|
728
747
|
const newMsgCount = Math.max(0, input.messages.length - lastKnownMessageCount);
|
|
729
748
|
const newMsgTokens = newMsgCount > 0
|
|
@@ -793,7 +812,7 @@ export function transform(input: {
|
|
|
793
812
|
rawBudget,
|
|
794
813
|
strip: "none",
|
|
795
814
|
});
|
|
796
|
-
if (layer1) return { ...layer1
|
|
815
|
+
if (fitsWithSafetyMargin(layer1)) return { ...layer1!, layer: 1, usable, distilledBudget, rawBudget };
|
|
797
816
|
}
|
|
798
817
|
|
|
799
818
|
// Layer 1 didn't fit (or was force-skipped) — reset the raw window cache.
|
|
@@ -812,9 +831,9 @@ export function transform(input: {
|
|
|
812
831
|
strip: "old-tools",
|
|
813
832
|
protectedTurns: 2,
|
|
814
833
|
});
|
|
815
|
-
if (layer2) {
|
|
834
|
+
if (fitsWithSafetyMargin(layer2)) {
|
|
816
835
|
urgentDistillation = true;
|
|
817
|
-
return { ...layer2
|
|
836
|
+
return { ...layer2!, layer: 2, usable, distilledBudget, rawBudget };
|
|
818
837
|
}
|
|
819
838
|
}
|
|
820
839
|
|
|
@@ -833,9 +852,9 @@ export function transform(input: {
|
|
|
833
852
|
rawBudget: Math.floor(usable * 0.55),
|
|
834
853
|
strip: "all-tools",
|
|
835
854
|
});
|
|
836
|
-
if (layer3) {
|
|
855
|
+
if (fitsWithSafetyMargin(layer3)) {
|
|
837
856
|
urgentDistillation = true;
|
|
838
|
-
return { ...layer3
|
|
857
|
+
return { ...layer3!, layer: 3, usable, distilledBudget, rawBudget };
|
|
839
858
|
}
|
|
840
859
|
|
|
841
860
|
// Layer 4: Emergency — last 2 distillations, last 3 raw messages with tool parts intact.
|
package/src/index.ts
CHANGED
|
@@ -188,7 +188,12 @@ export const LorePlugin: Plugin = async (ctx) => {
|
|
|
188
188
|
if (
|
|
189
189
|
msg.role === "assistant" &&
|
|
190
190
|
msg.tokens &&
|
|
191
|
-
|
|
191
|
+
// Include cache.write: tokens written to cache were fully sent to the
|
|
192
|
+
// model (they were processed, just not read from a prior cache slot).
|
|
193
|
+
// Omitting cache.write causes a dramatic undercount on cold-cache turns
|
|
194
|
+
// where cache.read=0 but 150K+ tokens were written — leading the gradient
|
|
195
|
+
// to think only 3 tokens went in and passing the full session as layer 0.
|
|
196
|
+
(msg.tokens.input > 0 || msg.tokens.cache.read > 0 || msg.tokens.cache.write > 0)
|
|
192
197
|
) {
|
|
193
198
|
const pending = temporal.undistilledCount(projectPath, msg.sessionID);
|
|
194
199
|
if (pending >= config().distillation.maxSegment) {
|
|
@@ -201,6 +206,9 @@ export const LorePlugin: Plugin = async (ctx) => {
|
|
|
201
206
|
// Calibrate overhead estimate using real token counts.
|
|
202
207
|
// Also store the exact input count + message count for the proactive
|
|
203
208
|
// layer-0 decision (avoids full chars/4 re-estimation each turn).
|
|
209
|
+
// actualInput = all tokens the model processed as input, regardless of
|
|
210
|
+
// whether they were new (input), read from cache (cache.read), or newly
|
|
211
|
+
// written to cache (cache.write). All three contribute to the context window.
|
|
204
212
|
const allMsgs = await ctx.client.session.messages({
|
|
205
213
|
path: { id: msg.sessionID },
|
|
206
214
|
});
|
|
@@ -209,7 +217,8 @@ export const LorePlugin: Plugin = async (ctx) => {
|
|
|
209
217
|
.filter((m) => m.info.id !== msg.id)
|
|
210
218
|
.map((m) => ({ info: m.info, parts: m.parts }));
|
|
211
219
|
const msgEstimate = estimateMessages(withParts);
|
|
212
|
-
const actualInput =
|
|
220
|
+
const actualInput =
|
|
221
|
+
msg.tokens.input + msg.tokens.cache.read + msg.tokens.cache.write;
|
|
213
222
|
calibrate(actualInput, msgEstimate, msg.sessionID, withParts.length);
|
|
214
223
|
}
|
|
215
224
|
}
|
|
@@ -224,43 +233,44 @@ export const LorePlugin: Plugin = async (ctx) => {
|
|
|
224
233
|
// 1. Force the gradient transform to escalate on the next call (skip layer 0/1)
|
|
225
234
|
// 2. Force distillation to capture all temporal data before compaction
|
|
226
235
|
// 3. Trigger compaction so the session recovers without user intervention
|
|
227
|
-
const
|
|
228
|
-
|
|
236
|
+
const rawError = (event.properties as Record<string, unknown>).error;
|
|
237
|
+
// Diagnostic: log the full error shape so we can verify our detection matches
|
|
238
|
+
console.error("[lore] session.error received:", JSON.stringify(rawError, null, 2));
|
|
239
|
+
|
|
240
|
+
const error = rawError as
|
|
241
|
+
| { name?: string; message?: string; data?: { message?: string } }
|
|
229
242
|
| undefined;
|
|
243
|
+
// Match both shapes: error.data.message (APIError wrapper) and error.message (direct)
|
|
244
|
+
const errorMessage = error?.data?.message ?? error?.message ?? "";
|
|
230
245
|
const isPromptTooLong =
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
246
|
+
typeof errorMessage === "string" &&
|
|
247
|
+
(errorMessage.includes("prompt is too long") ||
|
|
248
|
+
errorMessage.includes("context length exceeded") ||
|
|
249
|
+
errorMessage.includes("maximum context length") ||
|
|
250
|
+
errorMessage.includes("ContextWindowExceededError") ||
|
|
251
|
+
errorMessage.includes("too many tokens"));
|
|
252
|
+
|
|
253
|
+
console.error(
|
|
254
|
+
`[lore] session.error isPromptTooLong=${isPromptTooLong} (name=${error?.name}, message=${errorMessage.substring(0, 120)})`,
|
|
255
|
+
);
|
|
236
256
|
|
|
237
257
|
if (isPromptTooLong) {
|
|
238
258
|
const sessionID = (event.properties as Record<string, unknown>).sessionID as
|
|
239
259
|
| string
|
|
240
260
|
| undefined;
|
|
241
261
|
console.error(
|
|
242
|
-
`[lore] detected 'prompt too long' error — forcing distillation +
|
|
262
|
+
`[lore] detected 'prompt too long' error — forcing distillation + layer escalation (session: ${sessionID?.substring(0, 16)})`,
|
|
243
263
|
);
|
|
244
264
|
// Force layer 2 on next transform — layers 0 and 1 were already too large.
|
|
265
|
+
// The gradient at layers 2-4 will compress the context enough for the next turn.
|
|
266
|
+
// Do NOT call session.summarize() here — it sends all messages to the model,
|
|
267
|
+
// which would overflow again and create a stuck compaction loop.
|
|
245
268
|
setForceMinLayer(2);
|
|
246
269
|
|
|
247
270
|
if (sessionID) {
|
|
248
|
-
// Force distillation to capture all undistilled messages
|
|
249
|
-
//
|
|
271
|
+
// Force distillation to capture all undistilled messages into the temporal
|
|
272
|
+
// store so they're preserved even if the session is later compacted manually.
|
|
250
273
|
await backgroundDistill(sessionID, true);
|
|
251
|
-
|
|
252
|
-
// Trigger compaction automatically — the compacting hook will inject
|
|
253
|
-
// Lore's custom distillation-aware prompt.
|
|
254
|
-
try {
|
|
255
|
-
const sessions = await ctx.client.session.list();
|
|
256
|
-
const session = sessions.data?.find((s) => s.id.startsWith(sessionID));
|
|
257
|
-
if (session) {
|
|
258
|
-
// providerID/modelID are optional — omit to use the session's current model
|
|
259
|
-
await ctx.client.session.summarize({ path: { id: session.id } });
|
|
260
|
-
}
|
|
261
|
-
} catch (e) {
|
|
262
|
-
console.error("[lore] auto-compaction failed:", e);
|
|
263
|
-
}
|
|
264
274
|
}
|
|
265
275
|
}
|
|
266
276
|
}
|
|
@@ -379,12 +389,13 @@ export const LorePlugin: Plugin = async (ctx) => {
|
|
|
379
389
|
// Layer 0 means all messages fit within the context budget — leave them alone
|
|
380
390
|
// so the append-only sequence stays intact for prompt caching.
|
|
381
391
|
if (result.layer > 0) {
|
|
392
|
+
// The API requires the conversation to end with a user message.
|
|
393
|
+
// Always drop trailing non-user messages — even assistant messages with
|
|
394
|
+
// tool parts. A hard API error is worse than the model re-invoking a tool.
|
|
382
395
|
while (
|
|
383
396
|
result.messages.length > 0 &&
|
|
384
397
|
result.messages.at(-1)!.info.role !== "user"
|
|
385
398
|
) {
|
|
386
|
-
const last = result.messages.at(-1)!;
|
|
387
|
-
if (last.parts.some((p) => p.type === "tool")) break;
|
|
388
399
|
const dropped = result.messages.pop()!;
|
|
389
400
|
console.error(
|
|
390
401
|
"[lore] WARN: dropping trailing",
|
|
@@ -401,17 +412,25 @@ export const LorePlugin: Plugin = async (ctx) => {
|
|
|
401
412
|
}
|
|
402
413
|
},
|
|
403
414
|
|
|
404
|
-
// Replace compaction prompt with distillation-aware prompt when
|
|
405
|
-
//
|
|
406
|
-
//
|
|
415
|
+
// Replace compaction prompt with distillation-aware prompt when /compact is used.
|
|
416
|
+
// Strategy: run chunked distillation first so all messages are captured in segments
|
|
417
|
+
// that each fit within the model's context, then inject the pre-computed summaries
|
|
418
|
+
// as context so the model consolidates them rather than re-reading all raw messages.
|
|
419
|
+
// This prevents the overflow→compaction→overflow stuck loop.
|
|
407
420
|
"experimental.session.compacting": async (input, output) => {
|
|
408
|
-
//
|
|
409
|
-
//
|
|
410
|
-
//
|
|
421
|
+
// Chunked distillation: split all undistilled messages into segments that each
|
|
422
|
+
// fit within the model's context window and distill them independently.
|
|
423
|
+
// This is safe even when the full session exceeds the context limit.
|
|
411
424
|
if (input.sessionID && activeSessions.has(input.sessionID)) {
|
|
412
425
|
await backgroundDistill(input.sessionID, true);
|
|
413
426
|
}
|
|
414
427
|
|
|
428
|
+
// Load all distillation summaries produced for this session (oldest first).
|
|
429
|
+
// These are the chunked observations — the model will consolidate them.
|
|
430
|
+
const distillations = input.sessionID
|
|
431
|
+
? distillation.loadForSession(projectPath, input.sessionID)
|
|
432
|
+
: [];
|
|
433
|
+
|
|
415
434
|
const entries = ltm.forProject(projectPath, config().crossProject);
|
|
416
435
|
const knowledge = entries.length
|
|
417
436
|
? formatKnowledge(
|
|
@@ -423,9 +442,24 @@ export const LorePlugin: Plugin = async (ctx) => {
|
|
|
423
442
|
)
|
|
424
443
|
: "";
|
|
425
444
|
|
|
445
|
+
// Inject each distillation chunk as a context string so the model has access
|
|
446
|
+
// to pre-computed summaries. Even if the raw messages overflow context, these
|
|
447
|
+
// summaries are compact and will fit.
|
|
448
|
+
if (distillations.length > 0) {
|
|
449
|
+
output.context.push(
|
|
450
|
+
`## Lore Pre-computed Session Summaries\n\nThe following ${distillations.length} summary chunk(s) were pre-computed from the conversation history. Use these as the authoritative source — do not re-summarize the raw messages above if they conflict.\n\n` +
|
|
451
|
+
distillations
|
|
452
|
+
.map(
|
|
453
|
+
(d, i) =>
|
|
454
|
+
`### Chunk ${i + 1}${d.generation > 0 ? " (consolidated)" : ""}\n${d.observations}`,
|
|
455
|
+
)
|
|
456
|
+
.join("\n\n"),
|
|
457
|
+
);
|
|
458
|
+
}
|
|
459
|
+
|
|
426
460
|
output.prompt = `You are creating a distilled memory summary for an AI coding agent. This summary will be the ONLY context available in the next part of the conversation.
|
|
427
461
|
|
|
428
|
-
Structure your response as follows:
|
|
462
|
+
${distillations.length > 0 ? "Lore has pre-computed chunked summaries of the session history (injected above as context). Consolidate those summaries into a single coherent narrative. Do NOT re-read or re-summarize the raw conversation messages — trust the pre-computed summaries.\n\n" : ""}Structure your response as follows:
|
|
429
463
|
|
|
430
464
|
## Session History
|
|
431
465
|
|