opencode-lore 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "opencode-lore",
3
- "version": "0.2.0",
3
+ "version": "0.2.1",
4
4
  "type": "module",
5
5
  "license": "MIT",
6
6
  "description": "Three-tier memory architecture for OpenCode — distillation, not summarization",
@@ -117,6 +117,32 @@ export type Distillation = {
117
117
  created_at: number;
118
118
  };
119
119
 
120
+ /** Load all distillations for a session, oldest first. */
121
+ export function loadForSession(
122
+ projectPath: string,
123
+ sessionID: string,
124
+ ): Distillation[] {
125
+ const pid = ensureProject(projectPath);
126
+ const rows = db()
127
+ .query(
128
+ "SELECT id, project_id, session_id, observations, source_ids, generation, token_count, created_at FROM distillations WHERE project_id = ? AND session_id = ? ORDER BY created_at ASC",
129
+ )
130
+ .all(pid, sessionID) as Array<{
131
+ id: string;
132
+ project_id: string;
133
+ session_id: string;
134
+ observations: string;
135
+ source_ids: string;
136
+ generation: number;
137
+ token_count: number;
138
+ created_at: number;
139
+ }>;
140
+ return rows.map((r) => ({
141
+ ...r,
142
+ source_ids: JSON.parse(r.source_ids) as string[],
143
+ }));
144
+ }
145
+
120
146
  function storeDistillation(input: {
121
147
  projectPath: string;
122
148
  sessionID: string;
package/src/index.ts CHANGED
@@ -188,7 +188,12 @@ export const LorePlugin: Plugin = async (ctx) => {
188
188
  if (
189
189
  msg.role === "assistant" &&
190
190
  msg.tokens &&
191
- (msg.tokens.input > 0 || msg.tokens.cache.read > 0)
191
+ // Include cache.write: tokens written to cache were fully sent to the
192
+ // model (they were processed, just not read from a prior cache slot).
193
+ // Omitting cache.write causes a dramatic undercount on cold-cache turns
194
+ // where cache.read=0 but 150K+ tokens were written — leading the gradient
195
+ // to think only 3 tokens went in and passing the full session as layer 0.
196
+ (msg.tokens.input > 0 || msg.tokens.cache.read > 0 || msg.tokens.cache.write > 0)
192
197
  ) {
193
198
  const pending = temporal.undistilledCount(projectPath, msg.sessionID);
194
199
  if (pending >= config().distillation.maxSegment) {
@@ -201,6 +206,9 @@ export const LorePlugin: Plugin = async (ctx) => {
201
206
  // Calibrate overhead estimate using real token counts.
202
207
  // Also store the exact input count + message count for the proactive
203
208
  // layer-0 decision (avoids full chars/4 re-estimation each turn).
209
+ // actualInput = all tokens the model processed as input, regardless of
210
+ // whether they were new (input), read from cache (cache.read), or newly
211
+ // written to cache (cache.write). All three contribute to the context window.
204
212
  const allMsgs = await ctx.client.session.messages({
205
213
  path: { id: msg.sessionID },
206
214
  });
@@ -209,7 +217,8 @@ export const LorePlugin: Plugin = async (ctx) => {
209
217
  .filter((m) => m.info.id !== msg.id)
210
218
  .map((m) => ({ info: m.info, parts: m.parts }));
211
219
  const msgEstimate = estimateMessages(withParts);
212
- const actualInput = msg.tokens.input + msg.tokens.cache.read;
220
+ const actualInput =
221
+ msg.tokens.input + msg.tokens.cache.read + msg.tokens.cache.write;
213
222
  calibrate(actualInput, msgEstimate, msg.sessionID, withParts.length);
214
223
  }
215
224
  }
@@ -224,43 +233,44 @@ export const LorePlugin: Plugin = async (ctx) => {
224
233
  // 1. Force the gradient transform to escalate on the next call (skip layer 0/1)
225
234
  // 2. Force distillation to capture all temporal data before compaction
226
235
  // 3. Trigger compaction so the session recovers without user intervention
227
- const error = (event.properties as Record<string, unknown>).error as
228
- | { name?: string; data?: { message?: string } }
236
+ const rawError = (event.properties as Record<string, unknown>).error;
237
+ // Diagnostic: log the full error shape so we can verify our detection matches
238
+ console.error("[lore] session.error received:", JSON.stringify(rawError, null, 2));
239
+
240
+ const error = rawError as
241
+ | { name?: string; message?: string; data?: { message?: string } }
229
242
  | undefined;
243
+ // Match both shapes: error.data.message (APIError wrapper) and error.message (direct)
244
+ const errorMessage = error?.data?.message ?? error?.message ?? "";
230
245
  const isPromptTooLong =
231
- error?.name === "APIError" &&
232
- typeof error?.data?.message === "string" &&
233
- (error.data.message.includes("prompt is too long") ||
234
- error.data.message.includes("context length exceeded") ||
235
- error.data.message.includes("maximum context length"));
246
+ typeof errorMessage === "string" &&
247
+ (errorMessage.includes("prompt is too long") ||
248
+ errorMessage.includes("context length exceeded") ||
249
+ errorMessage.includes("maximum context length") ||
250
+ errorMessage.includes("ContextWindowExceededError") ||
251
+ errorMessage.includes("too many tokens"));
252
+
253
+ console.error(
254
+ `[lore] session.error isPromptTooLong=${isPromptTooLong} (name=${error?.name}, message=${errorMessage.substring(0, 120)})`,
255
+ );
236
256
 
237
257
  if (isPromptTooLong) {
238
258
  const sessionID = (event.properties as Record<string, unknown>).sessionID as
239
259
  | string
240
260
  | undefined;
241
261
  console.error(
242
- `[lore] detected 'prompt too long' error — forcing distillation + compaction (session: ${sessionID?.substring(0, 16)})`,
262
+ `[lore] detected 'prompt too long' error — forcing distillation + layer escalation (session: ${sessionID?.substring(0, 16)})`,
243
263
  );
244
264
  // Force layer 2 on next transform — layers 0 and 1 were already too large.
265
+ // The gradient at layers 2-4 will compress the context enough for the next turn.
266
+ // Do NOT call session.summarize() here — it sends all messages to the model,
267
+ // which would overflow again and create a stuck compaction loop.
245
268
  setForceMinLayer(2);
246
269
 
247
270
  if (sessionID) {
248
- // Force distillation to capture all undistilled messages before
249
- // compaction replaces the session message history.
271
+ // Force distillation to capture all undistilled messages into the temporal
272
+ // store so they're preserved even if the session is later compacted manually.
250
273
  await backgroundDistill(sessionID, true);
251
-
252
- // Trigger compaction automatically — the compacting hook will inject
253
- // Lore's custom distillation-aware prompt.
254
- try {
255
- const sessions = await ctx.client.session.list();
256
- const session = sessions.data?.find((s) => s.id.startsWith(sessionID));
257
- if (session) {
258
- // providerID/modelID are optional — omit to use the session's current model
259
- await ctx.client.session.summarize({ path: { id: session.id } });
260
- }
261
- } catch (e) {
262
- console.error("[lore] auto-compaction failed:", e);
263
- }
264
274
  }
265
275
  }
266
276
  }
@@ -401,17 +411,25 @@ export const LorePlugin: Plugin = async (ctx) => {
401
411
  }
402
412
  },
403
413
 
404
- // Replace compaction prompt with distillation-aware prompt when manual /compact is used.
405
- // Also force distillation first so all temporal data is captured before compaction
406
- // replaces the session message history.
414
+ // Replace compaction prompt with distillation-aware prompt when /compact is used.
415
+ // Strategy: run chunked distillation first so all messages are captured in segments
416
+ // that each fit within the model's context, then inject the pre-computed summaries
417
+ // as context so the model consolidates them rather than re-reading all raw messages.
418
+ // This prevents the overflow→compaction→overflow stuck loop.
407
419
  "experimental.session.compacting": async (input, output) => {
408
- // Force distillation to capture any undistilled messages. This is critical:
409
- // compaction will replace all messages with a summary, so we must persist
410
- // everything to Lore's temporal store before that happens.
420
+ // Chunked distillation: split all undistilled messages into segments that each
421
+ // fit within the model's context window and distill them independently.
422
+ // This is safe even when the full session exceeds the context limit.
411
423
  if (input.sessionID && activeSessions.has(input.sessionID)) {
412
424
  await backgroundDistill(input.sessionID, true);
413
425
  }
414
426
 
427
+ // Load all distillation summaries produced for this session (oldest first).
428
+ // These are the chunked observations — the model will consolidate them.
429
+ const distillations = input.sessionID
430
+ ? distillation.loadForSession(projectPath, input.sessionID)
431
+ : [];
432
+
415
433
  const entries = ltm.forProject(projectPath, config().crossProject);
416
434
  const knowledge = entries.length
417
435
  ? formatKnowledge(
@@ -423,9 +441,24 @@ export const LorePlugin: Plugin = async (ctx) => {
423
441
  )
424
442
  : "";
425
443
 
444
+ // Inject each distillation chunk as a context string so the model has access
445
+ // to pre-computed summaries. Even if the raw messages overflow context, these
446
+ // summaries are compact and will fit.
447
+ if (distillations.length > 0) {
448
+ output.context.push(
449
+ `## Lore Pre-computed Session Summaries\n\nThe following ${distillations.length} summary chunk(s) were pre-computed from the conversation history. Use these as the authoritative source — do not re-summarize the raw messages above if they conflict.\n\n` +
450
+ distillations
451
+ .map(
452
+ (d, i) =>
453
+ `### Chunk ${i + 1}${d.generation > 0 ? " (consolidated)" : ""}\n${d.observations}`,
454
+ )
455
+ .join("\n\n"),
456
+ );
457
+ }
458
+
426
459
  output.prompt = `You are creating a distilled memory summary for an AI coding agent. This summary will be the ONLY context available in the next part of the conversation.
427
460
 
428
- Structure your response as follows:
461
+ ${distillations.length > 0 ? "Lore has pre-computed chunked summaries of the session history (injected above as context). Consolidate those summaries into a single coherent narrative. Do NOT re-read or re-summarize the raw conversation messages — trust the pre-computed summaries.\n\n" : ""}Structure your response as follows:
429
462
 
430
463
  ## Session History
431
464