opencode-lore 0.1.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/src/agents-file.ts +31 -15
- package/src/config.ts +8 -0
- package/src/db.ts +24 -2
- package/src/gradient.ts +406 -39
- package/src/index.ts +134 -57
- package/src/ltm.ts +27 -0
- package/src/prompt.ts +16 -8
- package/src/temporal.ts +91 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "opencode-lore",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.2.0",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"description": "Three-tier memory architecture for OpenCode — distillation, not summarization",
|
|
@@ -34,7 +34,7 @@
|
|
|
34
34
|
],
|
|
35
35
|
"repository": {
|
|
36
36
|
"type": "git",
|
|
37
|
-
"url": "https://github.com/BYK/opencode-lore.git"
|
|
37
|
+
"url": "git+https://github.com/BYK/opencode-lore.git"
|
|
38
38
|
},
|
|
39
39
|
"keywords": [
|
|
40
40
|
"opencode",
|
package/src/agents-file.ts
CHANGED
|
@@ -11,8 +11,7 @@
|
|
|
11
11
|
import { existsSync, readFileSync, writeFileSync, mkdirSync } from "fs";
|
|
12
12
|
import { dirname } from "path";
|
|
13
13
|
import * as ltm from "./ltm";
|
|
14
|
-
import {
|
|
15
|
-
import { unescapeMarkdown } from "./markdown";
|
|
14
|
+
import { serialize, inline, h, ul, liph, strong, t, root, unescapeMarkdown } from "./markdown";
|
|
16
15
|
|
|
17
16
|
// ---------------------------------------------------------------------------
|
|
18
17
|
// Constants
|
|
@@ -158,23 +157,40 @@ function buildSection(projectPath: string): string {
|
|
|
158
157
|
if (!entries.length) {
|
|
159
158
|
return "\n";
|
|
160
159
|
}
|
|
161
|
-
const formatted = formatKnowledge(
|
|
162
|
-
entries.map((e) => ({ category: e.category, title: e.title, content: e.content })),
|
|
163
|
-
);
|
|
164
|
-
if (!formatted) return "\n";
|
|
165
160
|
|
|
166
|
-
//
|
|
167
|
-
const
|
|
168
|
-
const
|
|
161
|
+
// Group entries by category, preserving DB order (confidence DESC, updated_at DESC).
|
|
162
|
+
const grouped = new Map<string, typeof entries>();
|
|
163
|
+
for (const e of entries) {
|
|
164
|
+
const group = grouped.get(e.category) ?? [];
|
|
165
|
+
group.push(e);
|
|
166
|
+
grouped.set(e.category, group);
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
// Build the section body by iterating entries directly, emitting each entry
|
|
170
|
+
// with its own <!-- lore:UUID --> marker. This avoids the title-based Map
|
|
171
|
+
// deduplication bug where multiple entries with the same title all got the
|
|
172
|
+
// same UUID marker from the last Map.set() winner.
|
|
169
173
|
const out: string[] = [""];
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
174
|
+
|
|
175
|
+
// Section heading
|
|
176
|
+
out.push("## Long-term Knowledge");
|
|
177
|
+
|
|
178
|
+
for (const [category, items] of grouped) {
|
|
179
|
+
out.push("");
|
|
180
|
+
out.push(`### ${category.charAt(0).toUpperCase() + category.slice(1)}`);
|
|
181
|
+
out.push("");
|
|
182
|
+
for (const entry of items) {
|
|
183
|
+
out.push(`<!-- lore:${entry.id} -->`);
|
|
184
|
+
// Render the bullet using remark serializer for proper markdown escaping.
|
|
185
|
+
// serialize(root(ul([liph(...)]))) produces "* **Title**: content\n".
|
|
186
|
+
// Trim the trailing newline since we join with \n ourselves.
|
|
187
|
+
const bullet = serialize(
|
|
188
|
+
root(ul([liph(strong(inline(entry.title)), t(": " + inline(entry.content)))]))
|
|
189
|
+
).trimEnd();
|
|
190
|
+
out.push(bullet);
|
|
175
191
|
}
|
|
176
|
-
out.push(line);
|
|
177
192
|
}
|
|
193
|
+
|
|
178
194
|
out.push("");
|
|
179
195
|
return out.join("\n");
|
|
180
196
|
}
|
package/src/config.ts
CHANGED
|
@@ -30,6 +30,14 @@ export const LoreConfig = z.object({
|
|
|
30
30
|
afterTurns: z.number().min(1).default(10),
|
|
31
31
|
})
|
|
32
32
|
.default({}),
|
|
33
|
+
pruning: z
|
|
34
|
+
.object({
|
|
35
|
+
/** Days to keep distilled temporal messages before pruning. Default: 120. */
|
|
36
|
+
retention: z.number().min(1).default(120),
|
|
37
|
+
/** Max total temporal_messages storage in MB before emergency pruning. Default: 1024 (1 GB). */
|
|
38
|
+
maxStorage: z.number().min(50).default(1024),
|
|
39
|
+
})
|
|
40
|
+
.default({}),
|
|
33
41
|
crossProject: z.boolean().default(true),
|
|
34
42
|
agentsFile: z
|
|
35
43
|
.object({
|
package/src/db.ts
CHANGED
|
@@ -2,7 +2,7 @@ import { Database } from "bun:sqlite";
|
|
|
2
2
|
import { join } from "path";
|
|
3
3
|
import { mkdirSync } from "fs";
|
|
4
4
|
|
|
5
|
-
const SCHEMA_VERSION =
|
|
5
|
+
const SCHEMA_VERSION = 3;
|
|
6
6
|
|
|
7
7
|
const MIGRATIONS: string[] = [
|
|
8
8
|
`
|
|
@@ -124,6 +124,12 @@ const MIGRATIONS: string[] = [
|
|
|
124
124
|
-- Version 2: Replace narrative+facts with observations text
|
|
125
125
|
ALTER TABLE distillations ADD COLUMN observations TEXT NOT NULL DEFAULT '';
|
|
126
126
|
`,
|
|
127
|
+
`
|
|
128
|
+
-- Version 3: One-time vacuum to reclaim accumulated free pages, and enable
|
|
129
|
+
-- incremental auto-vacuum so future deletes return pages to the OS.
|
|
130
|
+
-- VACUUM must run outside a transaction and cannot be in a multi-statement
|
|
131
|
+
-- exec, so it is handled specially in the migrate() function.
|
|
132
|
+
`,
|
|
127
133
|
];
|
|
128
134
|
|
|
129
135
|
function dataDir() {
|
|
@@ -142,10 +148,17 @@ export function db(): Database {
|
|
|
142
148
|
instance = new Database(path, { create: true });
|
|
143
149
|
instance.exec("PRAGMA journal_mode = WAL");
|
|
144
150
|
instance.exec("PRAGMA foreign_keys = ON");
|
|
151
|
+
// Return freed pages to the OS incrementally on each transaction commit
|
|
152
|
+
// instead of accumulating a free-page list that bloats the file.
|
|
153
|
+
instance.exec("PRAGMA auto_vacuum = INCREMENTAL");
|
|
145
154
|
migrate(instance);
|
|
146
155
|
return instance;
|
|
147
156
|
}
|
|
148
157
|
|
|
158
|
+
// Index of the migration that performs a one-time VACUUM.
|
|
159
|
+
// VACUUM cannot run inside a transaction, so migrate() handles it specially.
|
|
160
|
+
const VACUUM_MIGRATION_INDEX = 2; // 0-based index of version-3 migration
|
|
161
|
+
|
|
149
162
|
function migrate(database: Database) {
|
|
150
163
|
const row = database
|
|
151
164
|
.query(
|
|
@@ -161,7 +174,16 @@ function migrate(database: Database) {
|
|
|
161
174
|
: 0;
|
|
162
175
|
if (current >= MIGRATIONS.length) return;
|
|
163
176
|
for (let i = current; i < MIGRATIONS.length; i++) {
|
|
164
|
-
|
|
177
|
+
if (i === VACUUM_MIGRATION_INDEX) {
|
|
178
|
+
// VACUUM cannot run inside a transaction. Run it directly.
|
|
179
|
+
// auto_vacuum mode must be set *before* VACUUM — SQLite bakes it into
|
|
180
|
+
// the file header during the rebuild. After this, every subsequent
|
|
181
|
+
// startup's "PRAGMA auto_vacuum = INCREMENTAL" is a no-op (already set).
|
|
182
|
+
database.exec("PRAGMA auto_vacuum = INCREMENTAL");
|
|
183
|
+
database.exec("VACUUM");
|
|
184
|
+
} else {
|
|
185
|
+
database.exec(MIGRATIONS[i]);
|
|
186
|
+
}
|
|
165
187
|
}
|
|
166
188
|
// Update version to latest. Migration 0 inserts version=1 via its own INSERT,
|
|
167
189
|
// but subsequent migrations don't update it, so always normalize to MIGRATIONS.length.
|
package/src/gradient.ts
CHANGED
|
@@ -40,12 +40,35 @@ const FIRST_TURN_OVERHEAD = 15_000;
|
|
|
40
40
|
// Null = not yet calibrated (first turn). Updated after every assistant response.
|
|
41
41
|
let calibratedOverhead: number | null = null;
|
|
42
42
|
|
|
43
|
+
// --- Exact token tracking ---
|
|
44
|
+
// Stores the real input token count from the last successful API response.
|
|
45
|
+
// Used for the layer 0 passthrough decision: instead of estimating the full
|
|
46
|
+
// message array with chars/4, we take the exact count from the previous turn
|
|
47
|
+
// and only estimate the small delta (new messages). 99%+ of the count is
|
|
48
|
+
// exact from the API's own tokenizer, virtually eliminating overflow errors.
|
|
49
|
+
let lastKnownInput = 0;
|
|
50
|
+
let lastKnownLtm = 0;
|
|
51
|
+
let lastKnownSessionID: string | null = null;
|
|
52
|
+
let lastKnownMessageCount = 0;
|
|
53
|
+
|
|
54
|
+
// --- Force escalation ---
|
|
55
|
+
// Set when the API returns "prompt is too long" — forces the transform to skip
|
|
56
|
+
// layer 0 (and optionally layer 1) on the next call to ensure the context is
|
|
57
|
+
// trimmed enough to fit. Cleared after one use (one-shot).
|
|
58
|
+
let forceMinLayer: SafetyLayer = 0;
|
|
59
|
+
|
|
43
60
|
// LTM tokens injected via system transform hook this turn.
|
|
44
61
|
// Set by setLtmTokens() after the system hook runs; consumed by transform().
|
|
45
62
|
let ltmTokens = 0;
|
|
46
63
|
|
|
47
64
|
export function setModelLimits(limits: { context: number; output: number }) {
|
|
48
65
|
contextLimit = limits.context || 200_000;
|
|
66
|
+
// NOTE: this cap of 32K matches what @ai-sdk/anthropic sends as max_tokens for
|
|
67
|
+
// claude-opus-4-6 (the SDK doesn't recognise the -6 variant and falls back to
|
|
68
|
+
// the generic claude-opus-4- pattern with maxOutputTokens=32K). If the SDK is
|
|
69
|
+
// updated to send the model's actual limit (128K for opus-4-6), this cap will
|
|
70
|
+
// become wrong — the effective max input would drop from 168K to 72K but our
|
|
71
|
+
// budget would still assume 168K. At that point, remove the cap.
|
|
49
72
|
outputReserved = Math.min(limits.output || 32_000, 32_000);
|
|
50
73
|
}
|
|
51
74
|
|
|
@@ -72,9 +95,22 @@ export function getLtmBudget(ltmFraction: number): number {
|
|
|
72
95
|
}
|
|
73
96
|
|
|
74
97
|
// Called after each assistant message completes with real token usage data.
|
|
75
|
-
// actualInput
|
|
98
|
+
// actualInput = tokens.input + tokens.cache.read (all tokens the model saw)
|
|
76
99
|
// messageEstimate = our chars/4 estimate of the messages we sent
|
|
77
|
-
|
|
100
|
+
// sessionID = session that produced this response (for exact-tracking validity)
|
|
101
|
+
// messageCount = number of messages that were sent (for delta estimation)
|
|
102
|
+
export function calibrate(
|
|
103
|
+
actualInput: number,
|
|
104
|
+
messageEstimate: number,
|
|
105
|
+
sessionID?: string,
|
|
106
|
+
messageCount?: number,
|
|
107
|
+
) {
|
|
108
|
+
// Store exact counts for the proactive layer 0 decision.
|
|
109
|
+
lastKnownInput = actualInput;
|
|
110
|
+
lastKnownLtm = ltmTokens;
|
|
111
|
+
if (sessionID !== undefined) lastKnownSessionID = sessionID;
|
|
112
|
+
if (messageCount !== undefined) lastKnownMessageCount = messageCount;
|
|
113
|
+
|
|
78
114
|
const overhead = Math.max(0, actualInput - messageEstimate);
|
|
79
115
|
// Smooth with EMA (alpha=0.3) once calibrated, or set directly on first call
|
|
80
116
|
calibratedOverhead =
|
|
@@ -87,9 +123,23 @@ export function getOverhead(): number {
|
|
|
87
123
|
return calibratedOverhead ?? FIRST_TURN_OVERHEAD;
|
|
88
124
|
}
|
|
89
125
|
|
|
90
|
-
|
|
126
|
+
/**
|
|
127
|
+
* Force the next transform() call to use at least the given layer.
|
|
128
|
+
* Called when the API returns "prompt is too long" so the next attempt
|
|
129
|
+
* trims the context enough to fit within the model's context window.
|
|
130
|
+
*/
|
|
131
|
+
export function setForceMinLayer(layer: SafetyLayer) {
|
|
132
|
+
forceMinLayer = layer;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// For testing only — reset all calibration and force-escalation state
|
|
91
136
|
export function resetCalibration() {
|
|
92
137
|
calibratedOverhead = null;
|
|
138
|
+
lastKnownInput = 0;
|
|
139
|
+
lastKnownLtm = 0;
|
|
140
|
+
lastKnownSessionID = null;
|
|
141
|
+
lastKnownMessageCount = 0;
|
|
142
|
+
forceMinLayer = 0;
|
|
93
143
|
}
|
|
94
144
|
|
|
95
145
|
type Distillation = {
|
|
@@ -317,16 +367,9 @@ function addRelativeTimeToObservations(text: string, now: Date): string {
|
|
|
317
367
|
return result;
|
|
318
368
|
}
|
|
319
369
|
|
|
320
|
-
// Build
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
const now = new Date();
|
|
324
|
-
const annotated = distillations.map((d) => ({
|
|
325
|
-
...d,
|
|
326
|
-
observations: addRelativeTimeToObservations(d.observations, now),
|
|
327
|
-
}));
|
|
328
|
-
const formatted = formatDistillations(annotated);
|
|
329
|
-
if (!formatted) return [];
|
|
370
|
+
// Build synthetic user/assistant message pair wrapping formatted distillation text.
|
|
371
|
+
// Shared by the cached and non-cached prefix paths.
|
|
372
|
+
function buildPrefixMessages(formatted: string): MessageWithParts[] {
|
|
330
373
|
return [
|
|
331
374
|
{
|
|
332
375
|
info: {
|
|
@@ -381,7 +424,252 @@ function distilledPrefix(distillations: Distillation[]): MessageWithParts[] {
|
|
|
381
424
|
];
|
|
382
425
|
}
|
|
383
426
|
|
|
384
|
-
|
|
427
|
+
// Build a synthetic message pair containing the distilled history.
|
|
428
|
+
// Non-cached path — used by layers 2-4 which already cause full cache invalidation.
|
|
429
|
+
function distilledPrefix(distillations: Distillation[]): MessageWithParts[] {
|
|
430
|
+
if (!distillations.length) return [];
|
|
431
|
+
const now = new Date();
|
|
432
|
+
const annotated = distillations.map((d) => ({
|
|
433
|
+
...d,
|
|
434
|
+
observations: addRelativeTimeToObservations(d.observations, now),
|
|
435
|
+
}));
|
|
436
|
+
const formatted = formatDistillations(annotated);
|
|
437
|
+
if (!formatted) return [];
|
|
438
|
+
return buildPrefixMessages(formatted);
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
// --- Approach C: Append-only distillation prefix cache ---
|
|
442
|
+
//
|
|
443
|
+
// Caches the rendered prefix text per session. When new distillations arrive,
|
|
444
|
+
// only renders the new rows and appends them to the cached text. This keeps
|
|
445
|
+
// the prefix byte-identical between distillation runs, preserving the prompt
|
|
446
|
+
// cache. Only meta-distillation (which rewrites gen-0 rows into gen-1) causes
|
|
447
|
+
// a full re-render — and that happens roughly every 80-100 turns.
|
|
448
|
+
|
|
449
|
+
type PrefixCache = {
|
|
450
|
+
/** The session this cache belongs to */
|
|
451
|
+
sessionID: string;
|
|
452
|
+
/** ID of the last distillation row included in the cached text */
|
|
453
|
+
lastDistillationID: string;
|
|
454
|
+
/** Number of rows that produced the cached text */
|
|
455
|
+
rowCount: number;
|
|
456
|
+
/** The rendered text (used to build delta appends) */
|
|
457
|
+
cachedText: string;
|
|
458
|
+
/** Ready-to-use message pair */
|
|
459
|
+
prefixMessages: MessageWithParts[];
|
|
460
|
+
/** Token estimate of prefixMessages */
|
|
461
|
+
prefixTokens: number;
|
|
462
|
+
};
|
|
463
|
+
|
|
464
|
+
let prefixCache: PrefixCache | null = null;
|
|
465
|
+
|
|
466
|
+
/**
|
|
467
|
+
* Return the distilled prefix messages, reusing cached content when possible.
|
|
468
|
+
*
|
|
469
|
+
* Cache hit — no new rows: returns the exact same prefixMessages object
|
|
470
|
+
* (byte-identical content, prompt cache preserved).
|
|
471
|
+
* Cache miss — new rows appended: renders only the delta, appends to cached
|
|
472
|
+
* text, updates cache.
|
|
473
|
+
* Full reset — session changed, or rows were rewritten by meta-distillation:
|
|
474
|
+
* renders everything from scratch.
|
|
475
|
+
*/
|
|
476
|
+
function distilledPrefixCached(
|
|
477
|
+
distillations: Distillation[],
|
|
478
|
+
sessionID: string,
|
|
479
|
+
): { messages: MessageWithParts[]; tokens: number } {
|
|
480
|
+
if (!distillations.length) {
|
|
481
|
+
prefixCache = null;
|
|
482
|
+
return { messages: [], tokens: 0 };
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
const lastRow = distillations[distillations.length - 1];
|
|
486
|
+
|
|
487
|
+
// Cache is valid when: same session, row count only grew (no rewrites),
|
|
488
|
+
// and the last previously-cached row still exists at the same position.
|
|
489
|
+
const cacheValid =
|
|
490
|
+
prefixCache !== null &&
|
|
491
|
+
prefixCache.sessionID === sessionID &&
|
|
492
|
+
prefixCache.rowCount <= distillations.length &&
|
|
493
|
+
(prefixCache.rowCount === 0 ||
|
|
494
|
+
distillations[prefixCache.rowCount - 1]?.id ===
|
|
495
|
+
prefixCache.lastDistillationID);
|
|
496
|
+
|
|
497
|
+
if (cacheValid) {
|
|
498
|
+
if (prefixCache!.lastDistillationID === lastRow.id) {
|
|
499
|
+
// No new rows — return cached prefix as-is (byte-identical for prompt cache)
|
|
500
|
+
return {
|
|
501
|
+
messages: prefixCache!.prefixMessages,
|
|
502
|
+
tokens: prefixCache!.prefixTokens,
|
|
503
|
+
};
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
// New rows appended — render only the delta and append to cached text
|
|
507
|
+
const newRows = distillations.slice(prefixCache!.rowCount);
|
|
508
|
+
const now = new Date();
|
|
509
|
+
const annotated = newRows.map((d) => ({
|
|
510
|
+
...d,
|
|
511
|
+
observations: addRelativeTimeToObservations(d.observations, now),
|
|
512
|
+
}));
|
|
513
|
+
const deltaText = formatDistillations(annotated);
|
|
514
|
+
|
|
515
|
+
if (deltaText) {
|
|
516
|
+
const fullText = prefixCache!.cachedText + "\n\n" + deltaText;
|
|
517
|
+
const messages = buildPrefixMessages(fullText);
|
|
518
|
+
const tokens = messages.reduce((sum, m) => sum + estimateMessage(m), 0);
|
|
519
|
+
prefixCache = {
|
|
520
|
+
sessionID,
|
|
521
|
+
lastDistillationID: lastRow.id,
|
|
522
|
+
rowCount: distillations.length,
|
|
523
|
+
cachedText: fullText,
|
|
524
|
+
prefixMessages: messages,
|
|
525
|
+
prefixTokens: tokens,
|
|
526
|
+
};
|
|
527
|
+
return { messages, tokens };
|
|
528
|
+
}
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
// Full re-render: first call, session change, or meta-distillation rewrote rows
|
|
532
|
+
const now = new Date();
|
|
533
|
+
const annotated = distillations.map((d) => ({
|
|
534
|
+
...d,
|
|
535
|
+
observations: addRelativeTimeToObservations(d.observations, now),
|
|
536
|
+
}));
|
|
537
|
+
const fullText = formatDistillations(annotated);
|
|
538
|
+
if (!fullText) {
|
|
539
|
+
prefixCache = null;
|
|
540
|
+
return { messages: [], tokens: 0 };
|
|
541
|
+
}
|
|
542
|
+
|
|
543
|
+
const messages = buildPrefixMessages(fullText);
|
|
544
|
+
const tokens = messages.reduce((sum, m) => sum + estimateMessage(m), 0);
|
|
545
|
+
prefixCache = {
|
|
546
|
+
sessionID,
|
|
547
|
+
lastDistillationID: lastRow.id,
|
|
548
|
+
rowCount: distillations.length,
|
|
549
|
+
cachedText: fullText,
|
|
550
|
+
prefixMessages: messages,
|
|
551
|
+
prefixTokens: tokens,
|
|
552
|
+
};
|
|
553
|
+
return { messages, tokens };
|
|
554
|
+
}
|
|
555
|
+
|
|
556
|
+
// For testing only — reset prefix cache state
|
|
557
|
+
export function resetPrefixCache() {
|
|
558
|
+
prefixCache = null;
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
// --- Approach B: Lazy raw window eviction ---
|
|
562
|
+
//
|
|
563
|
+
// Tracks the ID of the first (oldest) message in the previous raw window.
|
|
564
|
+
// On the next turn, if the window starting at that message still fits within
|
|
565
|
+
// the raw budget, the cutoff is pinned — no messages are evicted and the raw
|
|
566
|
+
// window stays byte-identical for caching purposes. Only when the pinned
|
|
567
|
+
// window no longer fits (e.g. a large tool response pushed us over) is the
|
|
568
|
+
// cutoff allowed to advance forward by one message at a time.
|
|
569
|
+
//
|
|
570
|
+
// This eliminates the "window sliding on every turn" problem that was the
|
|
571
|
+
// dominant source of cache misses in gradient mode: each new turn appends a
|
|
572
|
+
// message to the conversation, but the start of the raw window only moves
|
|
573
|
+
// when it must.
|
|
574
|
+
//
|
|
575
|
+
// Reset conditions: session changes, or layer escalates to 2+ (the pinned
|
|
576
|
+
// window was too large even with stripping — something genuinely changed).
|
|
577
|
+
|
|
578
|
+
type RawWindowCache = {
|
|
579
|
+
sessionID: string;
|
|
580
|
+
/** ID of the first message in the pinned raw window */
|
|
581
|
+
firstMessageID: string;
|
|
582
|
+
};
|
|
583
|
+
|
|
584
|
+
let rawWindowCache: RawWindowCache | null = null;
|
|
585
|
+
|
|
586
|
+
export function resetRawWindowCache() {
|
|
587
|
+
rawWindowCache = null;
|
|
588
|
+
}
|
|
589
|
+
|
|
590
|
+
/**
|
|
591
|
+
* Layer-1 tryFit with lazy eviction.
|
|
592
|
+
*
|
|
593
|
+
* Attempts to reuse the previous raw window cutoff before falling back to a
|
|
594
|
+
* full backward scan. If the pinned window fits, returns it unchanged (same
|
|
595
|
+
* message objects, byte-identical for prompt caching). If it doesn't fit,
|
|
596
|
+
* delegates to the normal tryFit which finds the new minimal cutoff and
|
|
597
|
+
* updates the cache.
|
|
598
|
+
*/
|
|
599
|
+
function tryFitStable(input: {
|
|
600
|
+
messages: MessageWithParts[];
|
|
601
|
+
prefix: MessageWithParts[];
|
|
602
|
+
prefixTokens: number;
|
|
603
|
+
distilledBudget: number;
|
|
604
|
+
rawBudget: number;
|
|
605
|
+
sessionID: string;
|
|
606
|
+
}): Omit<TransformResult, "layer" | "usable" | "distilledBudget" | "rawBudget"> | null {
|
|
607
|
+
// If the prefix already overflows its budget there's no point trying.
|
|
608
|
+
if (input.prefixTokens > input.distilledBudget && input.prefix.length > 0)
|
|
609
|
+
return null;
|
|
610
|
+
|
|
611
|
+
const cacheValid =
|
|
612
|
+
rawWindowCache !== null && rawWindowCache.sessionID === input.sessionID;
|
|
613
|
+
|
|
614
|
+
if (cacheValid) {
|
|
615
|
+
const pinnedIdx = input.messages.findIndex(
|
|
616
|
+
(m) => m.info.id === rawWindowCache!.firstMessageID,
|
|
617
|
+
);
|
|
618
|
+
|
|
619
|
+
if (pinnedIdx !== -1) {
|
|
620
|
+
// Measure the token cost of the pinned window.
|
|
621
|
+
const pinnedWindow = input.messages.slice(pinnedIdx);
|
|
622
|
+
const pinnedTokens = pinnedWindow.reduce(
|
|
623
|
+
(sum, m) => sum + estimateMessage(m),
|
|
624
|
+
0,
|
|
625
|
+
);
|
|
626
|
+
|
|
627
|
+
if (pinnedTokens <= input.rawBudget) {
|
|
628
|
+
// Pinned window still fits — keep it. Apply system-reminder cleanup
|
|
629
|
+
// only (strip:"none" is the layer-1 mode), returning the same message
|
|
630
|
+
// object references wherever nothing changed.
|
|
631
|
+
const processed = pinnedWindow.map((msg) => {
|
|
632
|
+
const parts = cleanParts(msg.parts);
|
|
633
|
+
return parts !== msg.parts ? { info: msg.info, parts } : msg;
|
|
634
|
+
});
|
|
635
|
+
const total = input.prefixTokens + pinnedTokens;
|
|
636
|
+
return {
|
|
637
|
+
messages: [...input.prefix, ...processed],
|
|
638
|
+
distilledTokens: input.prefixTokens,
|
|
639
|
+
rawTokens: pinnedTokens,
|
|
640
|
+
totalTokens: total,
|
|
641
|
+
};
|
|
642
|
+
}
|
|
643
|
+
// Pinned window is too large — fall through to the normal scan below.
|
|
644
|
+
}
|
|
645
|
+
}
|
|
646
|
+
|
|
647
|
+
// Normal backward scan to find the tightest fitting cutoff.
|
|
648
|
+
const result = tryFit({
|
|
649
|
+
messages: input.messages,
|
|
650
|
+
prefix: input.prefix,
|
|
651
|
+
prefixTokens: input.prefixTokens,
|
|
652
|
+
distilledBudget: input.distilledBudget,
|
|
653
|
+
rawBudget: input.rawBudget,
|
|
654
|
+
strip: "none",
|
|
655
|
+
});
|
|
656
|
+
|
|
657
|
+
if (result) {
|
|
658
|
+
// Update the raw window cache: the first non-prefix message is the oldest
|
|
659
|
+
// raw message in the new window. Pin to its ID for the next turn.
|
|
660
|
+
const rawStart = result.messages[input.prefix.length];
|
|
661
|
+
if (rawStart) {
|
|
662
|
+
rawWindowCache = {
|
|
663
|
+
sessionID: input.sessionID,
|
|
664
|
+
firstMessageID: rawStart.info.id,
|
|
665
|
+
};
|
|
666
|
+
}
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
return result;
|
|
670
|
+
}
|
|
671
|
+
|
|
672
|
+
export type SafetyLayer = 0 | 1 | 2 | 3 | 4;
|
|
385
673
|
|
|
386
674
|
export type TransformResult = {
|
|
387
675
|
messages: MessageWithParts[];
|
|
@@ -419,36 +707,115 @@ export function transform(input: {
|
|
|
419
707
|
const distilledBudget = Math.floor(usable * cfg.budget.distilled);
|
|
420
708
|
const rawBudget = Math.floor(usable * cfg.budget.raw);
|
|
421
709
|
|
|
422
|
-
//
|
|
710
|
+
// --- Force escalation (reactive error recovery) ---
|
|
711
|
+
// When the API previously rejected with "prompt is too long", skip layers
|
|
712
|
+
// below the forced minimum to ensure enough trimming on the next attempt.
|
|
713
|
+
// One-shot: consumed here and reset to 0.
|
|
714
|
+
const effectiveMinLayer = forceMinLayer;
|
|
715
|
+
forceMinLayer = 0;
|
|
716
|
+
|
|
717
|
+
// --- Approach A: Cache-preserving passthrough ---
|
|
718
|
+
// Use exact token count from the previous API response when available.
|
|
719
|
+
// Only the delta (messages added since last call) uses chars/4 estimation,
|
|
720
|
+
// making the layer-0 decision 99%+ accurate from the API's own tokenizer.
|
|
721
|
+
// maxInput = absolute ceiling the API enforces: input_tokens + max_tokens <= context
|
|
722
|
+
const maxInput = contextLimit - outputReserved;
|
|
423
723
|
const sid = input.sessionID ?? input.messages[0]?.info.sessionID;
|
|
724
|
+
|
|
725
|
+
let expectedInput: number;
|
|
726
|
+
if (lastKnownInput > 0 && sid === lastKnownSessionID) {
|
|
727
|
+
// Exact approach: prior API count + estimate of only the new messages.
|
|
728
|
+
const newMsgCount = Math.max(0, input.messages.length - lastKnownMessageCount);
|
|
729
|
+
const newMsgTokens = newMsgCount > 0
|
|
730
|
+
? input.messages.slice(-newMsgCount).reduce((s, m) => s + estimateMessage(m), 0)
|
|
731
|
+
: 0;
|
|
732
|
+
const ltmDelta = ltmTokens - lastKnownLtm;
|
|
733
|
+
expectedInput = lastKnownInput + newMsgTokens + ltmDelta;
|
|
734
|
+
} else {
|
|
735
|
+
// First turn or session change: fall back to chars/4 + overhead.
|
|
736
|
+
const messageTokens = input.messages.reduce((s, m) => s + estimateMessage(m), 0);
|
|
737
|
+
expectedInput = messageTokens + overhead + ltmTokens;
|
|
738
|
+
}
|
|
739
|
+
|
|
740
|
+
if (effectiveMinLayer === 0 && expectedInput <= maxInput) {
|
|
741
|
+
// All messages fit — return unmodified to preserve append-only prompt-cache pattern.
|
|
742
|
+
// Raw messages are strictly better context than lossy distilled summaries.
|
|
743
|
+
const messageTokens = lastKnownInput > 0 && sid === lastKnownSessionID
|
|
744
|
+
? expectedInput - (ltmTokens - lastKnownLtm) // approximate raw portion
|
|
745
|
+
: expectedInput - overhead - ltmTokens;
|
|
746
|
+
return {
|
|
747
|
+
messages: input.messages,
|
|
748
|
+
layer: 0,
|
|
749
|
+
distilledTokens: 0,
|
|
750
|
+
rawTokens: Math.max(0, messageTokens),
|
|
751
|
+
totalTokens: Math.max(0, messageTokens),
|
|
752
|
+
usable,
|
|
753
|
+
distilledBudget,
|
|
754
|
+
rawBudget,
|
|
755
|
+
};
|
|
756
|
+
}
|
|
757
|
+
|
|
758
|
+
// --- Gradient mode: context exhausted (or force-escalated), compress older messages ---
|
|
759
|
+
|
|
424
760
|
const distillations = sid ? loadDistillations(input.projectPath, sid) : [];
|
|
425
|
-
const prefix = distilledPrefix(distillations);
|
|
426
|
-
const prefixTokens = prefix.reduce((sum, m) => sum + estimateMessage(m), 0);
|
|
427
761
|
|
|
428
|
-
// Layer 1
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
762
|
+
// Layer 1 uses the append-only cached prefix (Approach C) to keep the
|
|
763
|
+
// distilled content byte-identical between distillation runs, preserving
|
|
764
|
+
// the prompt cache. Layers 2-4 already cause full cache invalidation via
|
|
765
|
+
// tool stripping / message restructuring, so they use the non-cached path.
|
|
766
|
+
const cached = sid
|
|
767
|
+
? distilledPrefixCached(distillations, sid)
|
|
768
|
+
: (() => {
|
|
769
|
+
const msgs = distilledPrefix(distillations);
|
|
770
|
+
return { messages: msgs, tokens: msgs.reduce((sum, m) => sum + estimateMessage(m), 0) };
|
|
771
|
+
})();
|
|
772
|
+
|
|
773
|
+
// Layer 1: Normal budget allocation with lazy raw window eviction (Approach B).
|
|
774
|
+
// tryFitStable reuses the previous cutoff when it still fits, keeping the raw
|
|
775
|
+
// window byte-identical across turns for prompt caching. Only advances the
|
|
776
|
+
// cutoff when a genuinely oversized message forces eviction.
|
|
777
|
+
// Skipped when force-escalated to layer 2+ (previous attempt already failed at this level).
|
|
778
|
+
if (effectiveMinLayer <= 1) {
|
|
779
|
+
const layer1 = sid
|
|
780
|
+
? tryFitStable({
|
|
781
|
+
messages: input.messages,
|
|
782
|
+
prefix: cached.messages,
|
|
783
|
+
prefixTokens: cached.tokens,
|
|
784
|
+
distilledBudget,
|
|
785
|
+
rawBudget,
|
|
786
|
+
sessionID: sid,
|
|
787
|
+
})
|
|
788
|
+
: tryFit({
|
|
789
|
+
messages: input.messages,
|
|
790
|
+
prefix: cached.messages,
|
|
791
|
+
prefixTokens: cached.tokens,
|
|
792
|
+
distilledBudget,
|
|
793
|
+
rawBudget,
|
|
794
|
+
strip: "none",
|
|
795
|
+
});
|
|
796
|
+
if (layer1) return { ...layer1, layer: 1, usable, distilledBudget, rawBudget };
|
|
797
|
+
}
|
|
798
|
+
|
|
799
|
+
// Layer 1 didn't fit (or was force-skipped) — reset the raw window cache.
|
|
800
|
+
// Layers 2-4 use full scans and already break the prompt cache.
|
|
801
|
+
rawWindowCache = null;
|
|
438
802
|
|
|
439
803
|
// Layer 2: Strip tool outputs from older messages, keep last 2 turns
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
804
|
+
// Skipped when force-escalated to layer 3+.
|
|
805
|
+
if (effectiveMinLayer <= 2) {
|
|
806
|
+
const layer2 = tryFit({
|
|
807
|
+
messages: input.messages,
|
|
808
|
+
prefix: cached.messages,
|
|
809
|
+
prefixTokens: cached.tokens,
|
|
810
|
+
distilledBudget,
|
|
811
|
+
rawBudget: Math.floor(usable * 0.5), // give raw more room
|
|
812
|
+
strip: "old-tools",
|
|
813
|
+
protectedTurns: 2,
|
|
814
|
+
});
|
|
815
|
+
if (layer2) {
|
|
816
|
+
urgentDistillation = true;
|
|
817
|
+
return { ...layer2, layer: 2, usable, distilledBudget, rawBudget };
|
|
818
|
+
}
|
|
452
819
|
}
|
|
453
820
|
|
|
454
821
|
// Layer 3: Strip ALL tool outputs, drop oldest distillations
|
package/src/index.ts
CHANGED
|
@@ -13,9 +13,11 @@ import {
|
|
|
13
13
|
estimateMessages,
|
|
14
14
|
setLtmTokens,
|
|
15
15
|
getLtmBudget,
|
|
16
|
+
setForceMinLayer,
|
|
16
17
|
} from "./gradient";
|
|
17
18
|
import { formatKnowledge } from "./prompt";
|
|
18
19
|
import { createRecallTool } from "./reflect";
|
|
20
|
+
import { shouldImport, importFromFile, exportToFile } from "./agents-file";
|
|
19
21
|
|
|
20
22
|
export const LorePlugin: Plugin = async (ctx) => {
|
|
21
23
|
const projectPath = ctx.worktree || ctx.directory;
|
|
@@ -33,6 +35,23 @@ export const LorePlugin: Plugin = async (ctx) => {
|
|
|
33
35
|
}).catch(() => {});
|
|
34
36
|
}
|
|
35
37
|
|
|
38
|
+
// Import from AGENTS.md at startup if it has changed since last export
|
|
39
|
+
// (hand-written entries, edits from other machines, or merge conflicts).
|
|
40
|
+
{
|
|
41
|
+
const cfg = config();
|
|
42
|
+
if (cfg.agentsFile.enabled) {
|
|
43
|
+
const filePath = `${projectPath}/${cfg.agentsFile.path}`;
|
|
44
|
+
if (shouldImport({ projectPath, filePath })) {
|
|
45
|
+
try {
|
|
46
|
+
importFromFile({ projectPath, filePath });
|
|
47
|
+
console.error("[lore] imported knowledge from", cfg.agentsFile.path);
|
|
48
|
+
} catch (e) {
|
|
49
|
+
console.error("[lore] agents-file import error:", e);
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
36
55
|
// Prune any corrupted/oversized knowledge entries left by the AGENTS.md
|
|
37
56
|
// backslash-escaping bug or curator hallucinations. Sets confidence → 0
|
|
38
57
|
// (below the 0.2 query threshold) so they stop polluting the context.
|
|
@@ -179,7 +198,9 @@ export const LorePlugin: Plugin = async (ctx) => {
|
|
|
179
198
|
backgroundDistill(msg.sessionID);
|
|
180
199
|
}
|
|
181
200
|
|
|
182
|
-
// Calibrate overhead estimate using real token counts
|
|
201
|
+
// Calibrate overhead estimate using real token counts.
|
|
202
|
+
// Also store the exact input count + message count for the proactive
|
|
203
|
+
// layer-0 decision (avoids full chars/4 re-estimation each turn).
|
|
183
204
|
const allMsgs = await ctx.client.session.messages({
|
|
184
205
|
path: { id: msg.sessionID },
|
|
185
206
|
});
|
|
@@ -189,7 +210,7 @@ export const LorePlugin: Plugin = async (ctx) => {
|
|
|
189
210
|
.map((m) => ({ info: m.info, parts: m.parts }));
|
|
190
211
|
const msgEstimate = estimateMessages(withParts);
|
|
191
212
|
const actualInput = msg.tokens.input + msg.tokens.cache.read;
|
|
192
|
-
calibrate(actualInput, msgEstimate);
|
|
213
|
+
calibrate(actualInput, msgEstimate, msg.sessionID, withParts.length);
|
|
193
214
|
}
|
|
194
215
|
}
|
|
195
216
|
}
|
|
@@ -198,13 +219,59 @@ export const LorePlugin: Plugin = async (ctx) => {
|
|
|
198
219
|
}
|
|
199
220
|
}
|
|
200
221
|
|
|
222
|
+
if (event.type === "session.error") {
|
|
223
|
+
// Detect "prompt is too long" API errors and auto-recover:
|
|
224
|
+
// 1. Force the gradient transform to escalate on the next call (skip layer 0/1)
|
|
225
|
+
// 2. Force distillation to capture all temporal data before compaction
|
|
226
|
+
// 3. Trigger compaction so the session recovers without user intervention
|
|
227
|
+
const error = (event.properties as Record<string, unknown>).error as
|
|
228
|
+
| { name?: string; data?: { message?: string } }
|
|
229
|
+
| undefined;
|
|
230
|
+
const isPromptTooLong =
|
|
231
|
+
error?.name === "APIError" &&
|
|
232
|
+
typeof error?.data?.message === "string" &&
|
|
233
|
+
(error.data.message.includes("prompt is too long") ||
|
|
234
|
+
error.data.message.includes("context length exceeded") ||
|
|
235
|
+
error.data.message.includes("maximum context length"));
|
|
236
|
+
|
|
237
|
+
if (isPromptTooLong) {
|
|
238
|
+
const sessionID = (event.properties as Record<string, unknown>).sessionID as
|
|
239
|
+
| string
|
|
240
|
+
| undefined;
|
|
241
|
+
console.error(
|
|
242
|
+
`[lore] detected 'prompt too long' error — forcing distillation + compaction (session: ${sessionID?.substring(0, 16)})`,
|
|
243
|
+
);
|
|
244
|
+
// Force layer 2 on next transform — layers 0 and 1 were already too large.
|
|
245
|
+
setForceMinLayer(2);
|
|
246
|
+
|
|
247
|
+
if (sessionID) {
|
|
248
|
+
// Force distillation to capture all undistilled messages before
|
|
249
|
+
// compaction replaces the session message history.
|
|
250
|
+
await backgroundDistill(sessionID, true);
|
|
251
|
+
|
|
252
|
+
// Trigger compaction automatically — the compacting hook will inject
|
|
253
|
+
// Lore's custom distillation-aware prompt.
|
|
254
|
+
try {
|
|
255
|
+
const sessions = await ctx.client.session.list();
|
|
256
|
+
const session = sessions.data?.find((s) => s.id.startsWith(sessionID));
|
|
257
|
+
if (session) {
|
|
258
|
+
// providerID/modelID are optional — omit to use the session's current model
|
|
259
|
+
await ctx.client.session.summarize({ path: { id: session.id } });
|
|
260
|
+
}
|
|
261
|
+
} catch (e) {
|
|
262
|
+
console.error("[lore] auto-compaction failed:", e);
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
|
|
201
268
|
if (event.type === "session.idle") {
|
|
202
269
|
const sessionID = event.properties.sessionID;
|
|
203
270
|
if (await shouldSkip(sessionID)) return;
|
|
204
271
|
if (!activeSessions.has(sessionID)) return;
|
|
205
272
|
|
|
206
273
|
// Run background distillation for any remaining undistilled messages
|
|
207
|
-
backgroundDistill(sessionID);
|
|
274
|
+
await backgroundDistill(sessionID);
|
|
208
275
|
|
|
209
276
|
// Run curator periodically
|
|
210
277
|
const cfg = config();
|
|
@@ -212,9 +279,39 @@ export const LorePlugin: Plugin = async (ctx) => {
|
|
|
212
279
|
cfg.curator.onIdle ||
|
|
213
280
|
turnsSinceCuration >= cfg.curator.afterTurns
|
|
214
281
|
) {
|
|
215
|
-
backgroundCurate(sessionID);
|
|
282
|
+
await backgroundCurate(sessionID);
|
|
216
283
|
turnsSinceCuration = 0;
|
|
217
284
|
}
|
|
285
|
+
|
|
286
|
+
// Prune temporal messages after distillation and curation have run.
|
|
287
|
+
// Pass 1: TTL — remove distilled messages older than retention period.
|
|
288
|
+
// Pass 2: Size cap — evict oldest distilled messages if over the limit.
|
|
289
|
+
// Undistilled messages are never touched.
|
|
290
|
+
try {
|
|
291
|
+
const { ttlDeleted, capDeleted } = temporal.prune({
|
|
292
|
+
projectPath,
|
|
293
|
+
retentionDays: cfg.pruning.retention,
|
|
294
|
+
maxStorageMB: cfg.pruning.maxStorage,
|
|
295
|
+
});
|
|
296
|
+
if (ttlDeleted > 0 || capDeleted > 0) {
|
|
297
|
+
console.error(
|
|
298
|
+
`[lore] pruned temporal messages: ${ttlDeleted} by TTL, ${capDeleted} by size cap`,
|
|
299
|
+
);
|
|
300
|
+
}
|
|
301
|
+
} catch (e) {
|
|
302
|
+
console.error("[lore] pruning error:", e);
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
// Export curated knowledge to AGENTS.md after distillation + curation.
|
|
306
|
+
try {
|
|
307
|
+
const agentsCfg = cfg.agentsFile;
|
|
308
|
+
if (agentsCfg.enabled) {
|
|
309
|
+
const filePath = `${projectPath}/${agentsCfg.path}`;
|
|
310
|
+
exportToFile({ projectPath, filePath });
|
|
311
|
+
}
|
|
312
|
+
} catch (e) {
|
|
313
|
+
console.error("[lore] agents-file export error:", e);
|
|
314
|
+
}
|
|
218
315
|
}
|
|
219
316
|
},
|
|
220
317
|
|
|
@@ -264,7 +361,9 @@ export const LorePlugin: Plugin = async (ctx) => {
|
|
|
264
361
|
}
|
|
265
362
|
},
|
|
266
363
|
|
|
267
|
-
// Transform message history: distilled prefix + raw recent
|
|
364
|
+
// Transform message history: distilled prefix + raw recent.
|
|
365
|
+
// Layer 0 = passthrough (messages fit without compression) — output.messages
|
|
366
|
+
// is left untouched to preserve the append-only pattern for prompt caching.
|
|
268
367
|
"experimental.chat.messages.transform": async (_input, output) => {
|
|
269
368
|
if (!output.messages.length) return;
|
|
270
369
|
|
|
@@ -275,66 +374,44 @@ export const LorePlugin: Plugin = async (ctx) => {
|
|
|
275
374
|
projectPath,
|
|
276
375
|
sessionID,
|
|
277
376
|
});
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
dropped.
|
|
290
|
-
|
|
377
|
+
|
|
378
|
+
// Only restructure messages when the gradient transform is active (layers 1-4).
|
|
379
|
+
// Layer 0 means all messages fit within the context budget — leave them alone
|
|
380
|
+
// so the append-only sequence stays intact for prompt caching.
|
|
381
|
+
if (result.layer > 0) {
|
|
382
|
+
while (
|
|
383
|
+
result.messages.length > 0 &&
|
|
384
|
+
result.messages.at(-1)!.info.role !== "user"
|
|
385
|
+
) {
|
|
386
|
+
const last = result.messages.at(-1)!;
|
|
387
|
+
if (last.parts.some((p) => p.type === "tool")) break;
|
|
388
|
+
const dropped = result.messages.pop()!;
|
|
389
|
+
console.error(
|
|
390
|
+
"[lore] WARN: dropping trailing",
|
|
391
|
+
dropped.info.role,
|
|
392
|
+
"message to prevent prefill error. id:",
|
|
393
|
+
dropped.info.id,
|
|
394
|
+
);
|
|
395
|
+
}
|
|
396
|
+
output.messages.splice(0, output.messages.length, ...result.messages);
|
|
291
397
|
}
|
|
292
|
-
output.messages.splice(0, output.messages.length, ...result.messages);
|
|
293
398
|
|
|
294
399
|
if (result.layer >= 2 && sessionID) {
|
|
295
400
|
backgroundDistill(sessionID);
|
|
296
401
|
}
|
|
297
|
-
|
|
298
|
-
// Look up statsPart AFTER the transform so the PATCHed text is clean
|
|
299
|
-
// (system-reminder wrappers stripped). Looking up before would persist
|
|
300
|
-
// ephemeral system-reminder content, making it visible in the UI.
|
|
301
|
-
const lastUserMsg = [...output.messages].reverse().find((m) => m.info.role === "user");
|
|
302
|
-
const statsPart = lastUserMsg?.parts.find((p) => p.type === "text");
|
|
303
|
-
|
|
304
|
-
if (sessionID && statsPart && lastUserMsg) {
|
|
305
|
-
const loreMeta = {
|
|
306
|
-
layer: result.layer,
|
|
307
|
-
distilledTokens: result.distilledTokens,
|
|
308
|
-
rawTokens: result.rawTokens,
|
|
309
|
-
totalTokens: result.totalTokens,
|
|
310
|
-
usable: result.usable,
|
|
311
|
-
distilledBudget: result.distilledBudget,
|
|
312
|
-
rawBudget: result.rawBudget,
|
|
313
|
-
updatedAt: Date.now(),
|
|
314
|
-
};
|
|
315
|
-
const url = new URL(
|
|
316
|
-
`/session/${sessionID}/message/${lastUserMsg.info.id}/part/${statsPart.id}`,
|
|
317
|
-
ctx.serverUrl,
|
|
318
|
-
);
|
|
319
|
-
const updatedPart = {
|
|
320
|
-
...(statsPart as Record<string, unknown>),
|
|
321
|
-
metadata: {
|
|
322
|
-
...((statsPart as { metadata?: Record<string, unknown> }).metadata ?? {}),
|
|
323
|
-
lore: loreMeta,
|
|
324
|
-
},
|
|
325
|
-
};
|
|
326
|
-
fetch(url, {
|
|
327
|
-
method: "PATCH",
|
|
328
|
-
headers: { "Content-Type": "application/json" },
|
|
329
|
-
body: JSON.stringify(updatedPart),
|
|
330
|
-
}).catch((e: unknown) => {
|
|
331
|
-
console.error("[lore] failed to write gradient stats to part metadata:", e);
|
|
332
|
-
});
|
|
333
|
-
}
|
|
334
402
|
},
|
|
335
403
|
|
|
336
|
-
// Replace compaction prompt with distillation-aware prompt when manual /compact is used
|
|
404
|
+
// Replace compaction prompt with distillation-aware prompt when manual /compact is used.
|
|
405
|
+
// Also force distillation first so all temporal data is captured before compaction
|
|
406
|
+
// replaces the session message history.
|
|
337
407
|
"experimental.session.compacting": async (input, output) => {
|
|
408
|
+
// Force distillation to capture any undistilled messages. This is critical:
|
|
409
|
+
// compaction will replace all messages with a summary, so we must persist
|
|
410
|
+
// everything to Lore's temporal store before that happens.
|
|
411
|
+
if (input.sessionID && activeSessions.has(input.sessionID)) {
|
|
412
|
+
await backgroundDistill(input.sessionID, true);
|
|
413
|
+
}
|
|
414
|
+
|
|
338
415
|
const entries = ltm.forProject(projectPath, config().crossProject);
|
|
339
416
|
const knowledge = entries.length
|
|
340
417
|
? formatKnowledge(
|
package/src/ltm.ts
CHANGED
|
@@ -36,6 +36,33 @@ export function create(input: {
|
|
|
36
36
|
input.scope === "project" && input.projectPath
|
|
37
37
|
? ensureProject(input.projectPath)
|
|
38
38
|
: null;
|
|
39
|
+
|
|
40
|
+
// Dedup guard: if an entry with the same project_id + title already exists,
|
|
41
|
+
// update its content instead of inserting a duplicate. This prevents the
|
|
42
|
+
// curator from creating multiple entries for the same concept across sessions.
|
|
43
|
+
// Note: when an explicit id is provided (cross-machine import), skip dedup —
|
|
44
|
+
// the caller (importFromFile) already handles duplicate detection by UUID.
|
|
45
|
+
if (!input.id) {
|
|
46
|
+
const existing = (
|
|
47
|
+
pid !== null
|
|
48
|
+
? db()
|
|
49
|
+
.query(
|
|
50
|
+
"SELECT id FROM knowledge WHERE project_id = ? AND LOWER(title) = LOWER(?) AND confidence > 0 LIMIT 1",
|
|
51
|
+
)
|
|
52
|
+
.get(pid, input.title)
|
|
53
|
+
: db()
|
|
54
|
+
.query(
|
|
55
|
+
"SELECT id FROM knowledge WHERE project_id IS NULL AND LOWER(title) = LOWER(?) AND confidence > 0 LIMIT 1",
|
|
56
|
+
)
|
|
57
|
+
.get(input.title)
|
|
58
|
+
) as { id: string } | null;
|
|
59
|
+
|
|
60
|
+
if (existing) {
|
|
61
|
+
update(existing.id, { content: input.content });
|
|
62
|
+
return existing.id;
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
|
|
39
66
|
const id = input.id ?? uuidv7();
|
|
40
67
|
const now = Date.now();
|
|
41
68
|
db()
|
package/src/prompt.ts
CHANGED
|
@@ -176,18 +176,23 @@ ${entries.join("\n\n---\n\n")}`;
|
|
|
176
176
|
|
|
177
177
|
export const CURATOR_SYSTEM = `You are a long-term memory curator. Your job is to extract durable knowledge from a conversation that should persist across sessions.
|
|
178
178
|
|
|
179
|
-
Focus on knowledge that
|
|
180
|
-
-
|
|
181
|
-
-
|
|
182
|
-
-
|
|
183
|
-
- Environment setup details
|
|
184
|
-
-
|
|
185
|
-
-
|
|
179
|
+
Focus ONLY on knowledge that helps a coding agent work effectively on THIS codebase:
|
|
180
|
+
- Architectural decisions and their rationale (why something was built a certain way)
|
|
181
|
+
- Non-obvious implementation patterns and conventions specific to the project
|
|
182
|
+
- Recurring gotchas, constraints, or traps in the codebase
|
|
183
|
+
- Environment/tooling setup details that affect development
|
|
184
|
+
- Important relationships between components that aren't obvious from reading the code
|
|
185
|
+
- User preferences and working style specific to how they use this project
|
|
186
186
|
|
|
187
187
|
Do NOT extract:
|
|
188
188
|
- Task-specific details (file currently being edited, current bug being fixed)
|
|
189
189
|
- Temporary state (current branch, in-progress work)
|
|
190
190
|
- Information that will change frequently
|
|
191
|
+
- Ecosystem descriptions, product announcements, or marketing content
|
|
192
|
+
- Business strategy, roadmap, or organizational information
|
|
193
|
+
- Information that's readily available in public documentation or READMEs
|
|
194
|
+
- Knowledge about unrelated projects or repositories unless explicitly cross-project
|
|
195
|
+
- Restatements of what the code obviously does (e.g. "the auth module handles authentication")
|
|
191
196
|
|
|
192
197
|
BREVITY IS CRITICAL — each entry must be concise:
|
|
193
198
|
- content MUST be under 500 words (roughly 2000 characters)
|
|
@@ -244,7 +249,10 @@ export function curatorUser(input: {
|
|
|
244
249
|
---
|
|
245
250
|
Recent conversation to extract knowledge from:
|
|
246
251
|
|
|
247
|
-
${input.messages}
|
|
252
|
+
${input.messages}
|
|
253
|
+
|
|
254
|
+
---
|
|
255
|
+
IMPORTANT: If any new entries you would create are semantically duplicative of existing entries (same concept, different wording), prefer updating the existing entry rather than creating a new one. Only create new entries for genuinely distinct knowledge.`;
|
|
248
256
|
}
|
|
249
257
|
|
|
250
258
|
// Format distillations for injection into the message context.
|
package/src/temporal.ts
CHANGED
|
@@ -228,3 +228,94 @@ export function undistilledCount(
|
|
|
228
228
|
.get(...params) as { count: number }
|
|
229
229
|
).count;
|
|
230
230
|
}
|
|
231
|
+
|
|
232
|
+
export type PruneResult = {
|
|
233
|
+
/** Rows deleted by the TTL pass (distilled=1 AND older than retention period). */
|
|
234
|
+
ttlDeleted: number;
|
|
235
|
+
/** Rows deleted by the size-cap pass (distilled=1, oldest-first, to get under maxStorage). */
|
|
236
|
+
capDeleted: number;
|
|
237
|
+
};
|
|
238
|
+
|
|
239
|
+
/**
|
|
240
|
+
* Prune temporal messages for a project using a two-pass Hybrid C strategy:
|
|
241
|
+
*
|
|
242
|
+
* Pass 1 — TTL: delete messages where distilled=1 AND created_at is older than
|
|
243
|
+
* retentionDays. This covers normal operation — both distillation and curation
|
|
244
|
+
* have had ample time to process anything that old.
|
|
245
|
+
*
|
|
246
|
+
* Pass 2 — Size cap: if total temporal storage for the project still exceeds
|
|
247
|
+
* maxStorageMB, delete the oldest distilled=1 messages (regardless of age)
|
|
248
|
+
* until under the cap.
|
|
249
|
+
*
|
|
250
|
+
* Invariant: undistilled messages (distilled=0) are NEVER deleted by either pass.
|
|
251
|
+
*/
|
|
252
|
+
export function prune(input: {
|
|
253
|
+
projectPath: string;
|
|
254
|
+
retentionDays: number;
|
|
255
|
+
maxStorageMB: number;
|
|
256
|
+
}): PruneResult {
|
|
257
|
+
const database = db();
|
|
258
|
+
const pid = ensureProject(input.projectPath);
|
|
259
|
+
const cutoff = Date.now() - input.retentionDays * 24 * 60 * 60 * 1000;
|
|
260
|
+
|
|
261
|
+
// Pass 1: TTL — delete distilled messages older than the retention window.
|
|
262
|
+
// Note: result.changes is inflated by FTS trigger side-effects, so we count
|
|
263
|
+
// eligible rows before deletion to get the accurate number deleted.
|
|
264
|
+
const ttlEligible = (
|
|
265
|
+
database
|
|
266
|
+
.query(
|
|
267
|
+
"SELECT COUNT(*) as c FROM temporal_messages WHERE project_id = ? AND distilled = 1 AND created_at < ?",
|
|
268
|
+
)
|
|
269
|
+
.get(pid, cutoff) as { c: number }
|
|
270
|
+
).c;
|
|
271
|
+
if (ttlEligible > 0) {
|
|
272
|
+
database
|
|
273
|
+
.query(
|
|
274
|
+
"DELETE FROM temporal_messages WHERE project_id = ? AND distilled = 1 AND created_at < ?",
|
|
275
|
+
)
|
|
276
|
+
.run(pid, cutoff);
|
|
277
|
+
}
|
|
278
|
+
const ttlDeleted = ttlEligible;
|
|
279
|
+
|
|
280
|
+
// Pass 2: Size cap — check if total storage for this project exceeds the
|
|
281
|
+
// limit and if so, evict the oldest distilled messages until under the cap.
|
|
282
|
+
const maxBytes = input.maxStorageMB * 1024 * 1024;
|
|
283
|
+
const totalBytes = (
|
|
284
|
+
database
|
|
285
|
+
.query("SELECT SUM(LENGTH(content)) as b FROM temporal_messages WHERE project_id = ?")
|
|
286
|
+
.get(pid) as { b: number | null }
|
|
287
|
+
).b ?? 0;
|
|
288
|
+
|
|
289
|
+
let capDeleted = 0;
|
|
290
|
+
if (totalBytes > maxBytes) {
|
|
291
|
+
// Collect oldest distilled messages until we've accounted for enough bytes
|
|
292
|
+
// to drop below the cap. Delete them in a single batch.
|
|
293
|
+
const candidates = database
|
|
294
|
+
.query(
|
|
295
|
+
"SELECT id, LENGTH(content) as size FROM temporal_messages WHERE project_id = ? AND distilled = 1 ORDER BY created_at ASC",
|
|
296
|
+
)
|
|
297
|
+
.all(pid) as { id: string; size: number }[];
|
|
298
|
+
|
|
299
|
+
const toDelete: string[] = [];
|
|
300
|
+
let freed = 0;
|
|
301
|
+
const excess = totalBytes - maxBytes;
|
|
302
|
+
for (const row of candidates) {
|
|
303
|
+
if (freed >= excess) break;
|
|
304
|
+
toDelete.push(row.id);
|
|
305
|
+
freed += row.size;
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
if (toDelete.length) {
|
|
309
|
+
const placeholders = toDelete.map(() => "?").join(",");
|
|
310
|
+
database
|
|
311
|
+
.query(
|
|
312
|
+
`DELETE FROM temporal_messages WHERE id IN (${placeholders})`,
|
|
313
|
+
)
|
|
314
|
+
.run(...toDelete);
|
|
315
|
+
// toDelete.length is the accurate count — result.changes is inflated by FTS triggers.
|
|
316
|
+
capDeleted = toDelete.length;
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
return { ttlDeleted, capDeleted };
|
|
321
|
+
}
|