opencode-lore 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -0
- package/package.json +2 -1
- package/src/agents-file.ts +318 -0
- package/src/config.ts +15 -8
- package/src/curator.ts +22 -2
- package/src/db.ts +11 -0
- package/src/gradient.ts +31 -1
- package/src/index.ts +50 -4
- package/src/ltm.ts +177 -1
- package/src/markdown.ts +29 -0
- package/src/prompt.ts +39 -3
package/README.md
CHANGED
|
@@ -94,6 +94,17 @@ To use a local clone instead of the published package:
|
|
|
94
94
|
}
|
|
95
95
|
```
|
|
96
96
|
|
|
97
|
+
## What to expect
|
|
98
|
+
|
|
99
|
+
Once Lore is active, you should notice several changes:
|
|
100
|
+
|
|
101
|
+
- **Higher cache reuse** — Lore keeps your context stable across turns, so the provider cache hits more often. You'll see higher cache read rates and lower costs.
|
|
102
|
+
- **No more compactions** — Lore disables the built-in compaction system and replaces it with incremental distillation. Your context never gets wiped and rebuilt from a lossy summary.
|
|
103
|
+
- **Steady context usage around 70–80%** — the gradient context manager dynamically balances distilled history, raw messages, and knowledge to keep you in the sweet spot — enough room for the model to work, but no wasted context.
|
|
104
|
+
- **Agent doesn't degrade in long sessions** — instead of getting progressively dumber as compaction loses details, the agent stays sharp because distillation preserves the operational facts that matter.
|
|
105
|
+
- **Better recall across and within sessions** — the agent remembers specific details from earlier in the conversation and from previous sessions, including file paths, decisions, error messages, and why things were done a certain way.
|
|
106
|
+
- **Automatic `AGENTS.md` export** — Lore periodically exports curated knowledge to an `AGENTS.md` file in your repo. This is the [universal format](https://agenticaistandard.org/) read by 16+ AI coding tools (Codex, Jules, Cursor, Copilot, Windsurf, and more), so the knowledge benefits every tool — not just OpenCode.
|
|
107
|
+
|
|
97
108
|
## What gets stored
|
|
98
109
|
|
|
99
110
|
All data lives locally in `~/.local/share/opencode-lore/lore.db`:
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "opencode-lore",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.3",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"description": "Three-tier memory architecture for OpenCode — distillation, not summarization",
|
|
@@ -17,6 +17,7 @@
|
|
|
17
17
|
},
|
|
18
18
|
"dependencies": {
|
|
19
19
|
"remark": "^15.0.1",
|
|
20
|
+
"uuidv7": "^1.1.0",
|
|
20
21
|
"zod": "^3.25.0"
|
|
21
22
|
},
|
|
22
23
|
"devDependencies": {
|
|
@@ -0,0 +1,318 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* agents-file.ts — AGENTS.md export/import/sync for lore.
|
|
3
|
+
*
|
|
4
|
+
* Lore owns a clearly delimited section inside the file, bounded by HTML
|
|
5
|
+
* comment markers. Everything outside those markers is preserved verbatim.
|
|
6
|
+
* Each knowledge entry is preceded by a hidden <!-- lore:UUID --> comment so
|
|
7
|
+
* the same entry can be tracked across machines and merge conflicts resolved
|
|
8
|
+
* without duplication.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import { existsSync, readFileSync, writeFileSync, mkdirSync } from "fs";
|
|
12
|
+
import { dirname } from "path";
|
|
13
|
+
import * as ltm from "./ltm";
|
|
14
|
+
import { formatKnowledge } from "./prompt";
|
|
15
|
+
import { unescapeMarkdown } from "./markdown";
|
|
16
|
+
|
|
17
|
+
// ---------------------------------------------------------------------------
|
|
18
|
+
// Constants
|
|
19
|
+
// ---------------------------------------------------------------------------
|
|
20
|
+
|
|
21
|
+
export const LORE_SECTION_START =
|
|
22
|
+
"<!-- This section is auto-maintained by lore (https://github.com/BYK/opencode-lore) -->";
|
|
23
|
+
export const LORE_SECTION_END = "<!-- End lore-managed section -->";
|
|
24
|
+
|
|
25
|
+
/** Regex matching a valid UUID (v4 or v7) — 8-4-4-4-12 hex groups. */
|
|
26
|
+
const UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/;
|
|
27
|
+
|
|
28
|
+
/** Matches `<!-- lore:UUID -->` tracking markers. */
|
|
29
|
+
const MARKER_RE = /^<!--\s*lore:([0-9a-f-]+)\s*-->$/;
|
|
30
|
+
|
|
31
|
+
// ---------------------------------------------------------------------------
|
|
32
|
+
// Types
|
|
33
|
+
// ---------------------------------------------------------------------------
|
|
34
|
+
|
|
35
|
+
export type ParsedFileEntry = {
|
|
36
|
+
/** UUID from `<!-- lore:UUID -->` marker, or null for hand-written entries. */
|
|
37
|
+
id: string | null;
|
|
38
|
+
category: string;
|
|
39
|
+
title: string;
|
|
40
|
+
content: string;
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
// ---------------------------------------------------------------------------
|
|
44
|
+
// Section extraction helpers
|
|
45
|
+
// ---------------------------------------------------------------------------
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Split file content into three parts: before, lore section body, after.
|
|
49
|
+
* Returns null for section body when markers are absent.
|
|
50
|
+
*/
|
|
51
|
+
function splitFile(fileContent: string): {
|
|
52
|
+
before: string;
|
|
53
|
+
section: string | null;
|
|
54
|
+
after: string;
|
|
55
|
+
} {
|
|
56
|
+
const startIdx = fileContent.indexOf(LORE_SECTION_START);
|
|
57
|
+
const endIdx = fileContent.indexOf(LORE_SECTION_END);
|
|
58
|
+
|
|
59
|
+
if (startIdx === -1 || endIdx === -1 || endIdx < startIdx) {
|
|
60
|
+
return { before: fileContent, section: null, after: "" };
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
const before = fileContent.slice(0, startIdx);
|
|
64
|
+
const section = fileContent.slice(
|
|
65
|
+
startIdx + LORE_SECTION_START.length,
|
|
66
|
+
endIdx,
|
|
67
|
+
);
|
|
68
|
+
const after = fileContent.slice(endIdx + LORE_SECTION_END.length);
|
|
69
|
+
return { before, section, after };
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// ---------------------------------------------------------------------------
|
|
73
|
+
// Parse entries from a lore section body (or any markdown block)
|
|
74
|
+
// ---------------------------------------------------------------------------
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Extract ParsedFileEntry objects from a markdown section body.
|
|
78
|
+
* Handles:
|
|
79
|
+
* - `<!-- lore:UUID -->` markers before bullet points → id set
|
|
80
|
+
* - Bare bullet points without markers → id null
|
|
81
|
+
* - Category derived from the nearest preceding `### Heading`
|
|
82
|
+
* - Malformed or non-UUID markers → id null (hand-written)
|
|
83
|
+
* - Duplicate UUIDs → both returned; caller deduplicates
|
|
84
|
+
*/
|
|
85
|
+
export function parseEntriesFromSection(section: string): ParsedFileEntry[] {
|
|
86
|
+
const lines = section.split("\n");
|
|
87
|
+
const entries: ParsedFileEntry[] = [];
|
|
88
|
+
let currentCategory = "pattern";
|
|
89
|
+
let pendingId: string | null = null;
|
|
90
|
+
|
|
91
|
+
for (const raw of lines) {
|
|
92
|
+
const line = raw.trim();
|
|
93
|
+
|
|
94
|
+
// Category heading: ### Decision / ### Gotcha / etc.
|
|
95
|
+
const headingMatch = line.match(/^###\s+(.+)$/);
|
|
96
|
+
if (headingMatch) {
|
|
97
|
+
currentCategory = headingMatch[1].toLowerCase();
|
|
98
|
+
pendingId = null;
|
|
99
|
+
continue;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// Marker line: <!-- lore:UUID -->
|
|
103
|
+
const markerMatch = line.match(MARKER_RE);
|
|
104
|
+
if (markerMatch) {
|
|
105
|
+
const candidate = markerMatch[1];
|
|
106
|
+
pendingId = UUID_RE.test(candidate) ? candidate : null;
|
|
107
|
+
continue;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// Bullet entry: * **Title**: Content
|
|
111
|
+
const bulletMatch = line.match(/^\*\s+\*\*(.+?)\*\*:\s*(.+)$/);
|
|
112
|
+
if (bulletMatch) {
|
|
113
|
+
// Unescape remark's markdown escapes (e.g. \< → <, \\ → \).
|
|
114
|
+
// Without this, each export/import cycle doubles the backslash-escapes,
|
|
115
|
+
// exponentially inflating stored content.
|
|
116
|
+
entries.push({
|
|
117
|
+
id: pendingId,
|
|
118
|
+
category: currentCategory,
|
|
119
|
+
title: unescapeMarkdown(bulletMatch[1].trim()),
|
|
120
|
+
content: unescapeMarkdown(bulletMatch[2].trim()),
|
|
121
|
+
});
|
|
122
|
+
pendingId = null; // consume the pending marker
|
|
123
|
+
continue;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// Any non-matching non-empty line resets the pending marker
|
|
127
|
+
if (line !== "" && !line.startsWith("##") && !line.startsWith("<!--")) {
|
|
128
|
+
pendingId = null;
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
return entries;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// ---------------------------------------------------------------------------
|
|
136
|
+
// Content hash (for change detection)
|
|
137
|
+
// ---------------------------------------------------------------------------
|
|
138
|
+
|
|
139
|
+
function hashSection(section: string): string {
|
|
140
|
+
let h = 0;
|
|
141
|
+
for (let i = 0; i < section.length; i++) {
|
|
142
|
+
h = (Math.imul(31, h) + section.charCodeAt(i)) | 0;
|
|
143
|
+
}
|
|
144
|
+
// Convert to unsigned hex string
|
|
145
|
+
return (h >>> 0).toString(16).padStart(8, "0");
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// ---------------------------------------------------------------------------
|
|
149
|
+
// Build the lore section body from DB entries
|
|
150
|
+
// ---------------------------------------------------------------------------
|
|
151
|
+
|
|
152
|
+
function buildSection(projectPath: string): string {
|
|
153
|
+
// Export only project-specific entries (cross_project=0, project_id = this project).
|
|
154
|
+
// Cross-project entries live in the shared DB on each machine and don't belong
|
|
155
|
+
// in a per-project AGENTS.md — including them would inflate the file with
|
|
156
|
+
// unrelated knowledge from every other project the user has worked on.
|
|
157
|
+
const entries = ltm.forProject(projectPath, false);
|
|
158
|
+
if (!entries.length) {
|
|
159
|
+
return "\n";
|
|
160
|
+
}
|
|
161
|
+
const formatted = formatKnowledge(
|
|
162
|
+
entries.map((e) => ({ category: e.category, title: e.title, content: e.content })),
|
|
163
|
+
);
|
|
164
|
+
if (!formatted) return "\n";
|
|
165
|
+
|
|
166
|
+
// Inject <!-- lore:UUID --> above each bullet line
|
|
167
|
+
const idByTitle = new Map(entries.map((e) => [e.title, e.id]));
|
|
168
|
+
const lines = formatted.split("\n");
|
|
169
|
+
const out: string[] = [""];
|
|
170
|
+
for (const line of lines) {
|
|
171
|
+
const bulletMatch = line.match(/^\*\s+\*\*(.+?)\*\*/);
|
|
172
|
+
if (bulletMatch) {
|
|
173
|
+
const id = idByTitle.get(bulletMatch[1]);
|
|
174
|
+
if (id) out.push(`<!-- lore:${id} -->`);
|
|
175
|
+
}
|
|
176
|
+
out.push(line);
|
|
177
|
+
}
|
|
178
|
+
out.push("");
|
|
179
|
+
return out.join("\n");
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
// ---------------------------------------------------------------------------
|
|
183
|
+
// Export
|
|
184
|
+
// ---------------------------------------------------------------------------
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* Write current knowledge entries into the AGENTS.md file, preserving all
|
|
188
|
+
* non-lore content. Creates the file if it doesn't exist.
|
|
189
|
+
*/
|
|
190
|
+
export function exportToFile(input: {
|
|
191
|
+
projectPath: string;
|
|
192
|
+
filePath: string;
|
|
193
|
+
}): void {
|
|
194
|
+
const sectionBody = buildSection(input.projectPath);
|
|
195
|
+
const newSection =
|
|
196
|
+
LORE_SECTION_START + sectionBody + LORE_SECTION_END + "\n";
|
|
197
|
+
|
|
198
|
+
let fileContent = "";
|
|
199
|
+
if (existsSync(input.filePath)) {
|
|
200
|
+
fileContent = readFileSync(input.filePath, "utf8");
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
const { before, after } = splitFile(fileContent);
|
|
204
|
+
|
|
205
|
+
// Ensure there's a blank line separator before the section when appending
|
|
206
|
+
const prefix = before.trimEnd();
|
|
207
|
+
const prefixWithSep = prefix.length > 0 ? prefix + "\n\n" : "";
|
|
208
|
+
const suffix = after.trimStart();
|
|
209
|
+
const suffixWithSep = suffix.length > 0 ? "\n" + suffix : "";
|
|
210
|
+
|
|
211
|
+
const result = prefixWithSep + newSection + suffixWithSep;
|
|
212
|
+
|
|
213
|
+
mkdirSync(dirname(input.filePath), { recursive: true });
|
|
214
|
+
writeFileSync(input.filePath, result, "utf8");
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
// ---------------------------------------------------------------------------
|
|
218
|
+
// shouldImport
|
|
219
|
+
// ---------------------------------------------------------------------------
|
|
220
|
+
|
|
221
|
+
/**
|
|
222
|
+
* Returns true if the file needs to be imported:
|
|
223
|
+
* - File exists and has never been processed (no lore markers)
|
|
224
|
+
* - File exists and its lore section differs from what lore would currently produce
|
|
225
|
+
*/
|
|
226
|
+
export function shouldImport(input: {
|
|
227
|
+
projectPath: string;
|
|
228
|
+
filePath: string;
|
|
229
|
+
}): boolean {
|
|
230
|
+
if (!existsSync(input.filePath)) return false;
|
|
231
|
+
|
|
232
|
+
const fileContent = readFileSync(input.filePath, "utf8");
|
|
233
|
+
const { section } = splitFile(fileContent);
|
|
234
|
+
|
|
235
|
+
if (section === null) {
|
|
236
|
+
// No lore markers — this is a hand-written file that hasn't been imported
|
|
237
|
+
return fileContent.trim().length > 0;
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
// Compare the file's lore section body against what we'd produce now
|
|
241
|
+
const expected = buildSection(input.projectPath);
|
|
242
|
+
return hashSection(section) !== hashSection(expected);
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
// ---------------------------------------------------------------------------
|
|
246
|
+
// Import
|
|
247
|
+
// ---------------------------------------------------------------------------
|
|
248
|
+
|
|
249
|
+
/**
|
|
250
|
+
* Import knowledge entries from the agents file into the local DB.
|
|
251
|
+
*
|
|
252
|
+
* Behaviour per entry:
|
|
253
|
+
* - Known UUID (already in DB) → update content if it changed (manual edit)
|
|
254
|
+
* - Unknown UUID (other machine)→ create with that exact ID
|
|
255
|
+
* - No UUID (hand-written) → create with a new UUIDv7
|
|
256
|
+
* - Duplicate UUID in same file → first occurrence wins, rest ignored
|
|
257
|
+
*/
|
|
258
|
+
export function importFromFile(input: {
|
|
259
|
+
projectPath: string;
|
|
260
|
+
filePath: string;
|
|
261
|
+
}): void {
|
|
262
|
+
if (!existsSync(input.filePath)) return;
|
|
263
|
+
|
|
264
|
+
const fileContent = readFileSync(input.filePath, "utf8");
|
|
265
|
+
const { section, before } = splitFile(fileContent);
|
|
266
|
+
|
|
267
|
+
// Determine what to parse:
|
|
268
|
+
// - If lore markers exist: parse ONLY the lore section body (avoid re-importing our own output)
|
|
269
|
+
// - If no markers: parse the full file (first-time hand-written AGENTS.md import)
|
|
270
|
+
const textToParse = section ?? fileContent;
|
|
271
|
+
|
|
272
|
+
const fileEntries = parseEntriesFromSection(textToParse);
|
|
273
|
+
if (!fileEntries.length) return;
|
|
274
|
+
|
|
275
|
+
const seenIds = new Set<string>();
|
|
276
|
+
|
|
277
|
+
for (const entry of fileEntries) {
|
|
278
|
+
if (entry.id !== null) {
|
|
279
|
+
// Deduplicate: if same UUID appears twice in file, first wins
|
|
280
|
+
if (seenIds.has(entry.id)) continue;
|
|
281
|
+
seenIds.add(entry.id);
|
|
282
|
+
|
|
283
|
+
const existing = ltm.get(entry.id);
|
|
284
|
+
if (existing) {
|
|
285
|
+
// Known entry — update only if content changed (manual edit in file)
|
|
286
|
+
if (existing.content !== entry.content) {
|
|
287
|
+
ltm.update(entry.id, { content: entry.content });
|
|
288
|
+
}
|
|
289
|
+
} else {
|
|
290
|
+
// Unknown UUID — entry came from another machine, preserve its ID
|
|
291
|
+
ltm.create({
|
|
292
|
+
projectPath: input.projectPath,
|
|
293
|
+
category: entry.category,
|
|
294
|
+
title: entry.title,
|
|
295
|
+
content: entry.content,
|
|
296
|
+
scope: "project",
|
|
297
|
+
id: entry.id,
|
|
298
|
+
});
|
|
299
|
+
}
|
|
300
|
+
} else {
|
|
301
|
+
// Hand-written entry — create with a new UUIDv7
|
|
302
|
+
// Check for a near-duplicate by title to avoid double-import on re-runs
|
|
303
|
+
const existing = ltm.forProject(input.projectPath, true);
|
|
304
|
+
const titleMatch = existing.find(
|
|
305
|
+
(e) => e.title.toLowerCase() === entry.title.toLowerCase(),
|
|
306
|
+
);
|
|
307
|
+
if (!titleMatch) {
|
|
308
|
+
ltm.create({
|
|
309
|
+
projectPath: input.projectPath,
|
|
310
|
+
category: entry.category,
|
|
311
|
+
title: entry.title,
|
|
312
|
+
content: entry.content,
|
|
313
|
+
scope: "project",
|
|
314
|
+
});
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
}
|
package/src/config.ts
CHANGED
|
@@ -12,6 +12,8 @@ export const LoreConfig = z.object({
|
|
|
12
12
|
distilled: z.number().min(0.05).max(0.5).default(0.25),
|
|
13
13
|
raw: z.number().min(0.1).max(0.7).default(0.4),
|
|
14
14
|
output: z.number().min(0.1).max(0.5).default(0.25),
|
|
15
|
+
/** Max fraction of usable context reserved for LTM system-prompt injection. Default: 0.10 (10%). */
|
|
16
|
+
ltm: z.number().min(0.02).max(0.3).default(0.10),
|
|
15
17
|
})
|
|
16
18
|
.default({}),
|
|
17
19
|
distillation: z
|
|
@@ -29,6 +31,14 @@ export const LoreConfig = z.object({
|
|
|
29
31
|
})
|
|
30
32
|
.default({}),
|
|
31
33
|
crossProject: z.boolean().default(true),
|
|
34
|
+
agentsFile: z
|
|
35
|
+
.object({
|
|
36
|
+
/** Set to false to disable all AGENTS.md export/import behaviour. */
|
|
37
|
+
enabled: z.boolean().default(true),
|
|
38
|
+
/** Path to the agents file, relative to the project root. */
|
|
39
|
+
path: z.string().default("AGENTS.md"),
|
|
40
|
+
})
|
|
41
|
+
.default({}),
|
|
32
42
|
});
|
|
33
43
|
|
|
34
44
|
export type LoreConfig = z.infer<typeof LoreConfig>;
|
|
@@ -40,14 +50,11 @@ export function config(): LoreConfig {
|
|
|
40
50
|
}
|
|
41
51
|
|
|
42
52
|
export async function load(directory: string): Promise<LoreConfig> {
|
|
43
|
-
const
|
|
44
|
-
|
|
45
|
-
const
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
current = LoreConfig.parse(raw);
|
|
49
|
-
return current;
|
|
50
|
-
}
|
|
53
|
+
const file = Bun.file(`${directory}/.lore.json`);
|
|
54
|
+
if (await file.exists()) {
|
|
55
|
+
const raw = await file.json();
|
|
56
|
+
current = LoreConfig.parse(raw);
|
|
57
|
+
return current;
|
|
51
58
|
}
|
|
52
59
|
current = LoreConfig.parse({});
|
|
53
60
|
return current;
|
package/src/curator.ts
CHANGED
|
@@ -5,6 +5,14 @@ import * as ltm from "./ltm";
|
|
|
5
5
|
import { CURATOR_SYSTEM, curatorUser } from "./prompt";
|
|
6
6
|
import { workerSessionIDs } from "./distillation";
|
|
7
7
|
|
|
8
|
+
/**
|
|
9
|
+
* Maximum length (chars) for a single knowledge entry's content.
|
|
10
|
+
* ~500 tokens. Entries exceeding this are truncated with a notice.
|
|
11
|
+
* The curator prompt also instructs the model to stay within this limit,
|
|
12
|
+
* so truncation is a last-resort safety net.
|
|
13
|
+
*/
|
|
14
|
+
const MAX_ENTRY_CONTENT_LENGTH = 2000;
|
|
15
|
+
|
|
8
16
|
type Client = ReturnType<typeof createOpencodeClient>;
|
|
9
17
|
|
|
10
18
|
const workerSessions = new Map<string, string>();
|
|
@@ -120,11 +128,18 @@ export async function run(input: {
|
|
|
120
128
|
|
|
121
129
|
for (const op of ops) {
|
|
122
130
|
if (op.op === "create") {
|
|
131
|
+
// Truncate oversized content — the model should stay within the prompt's
|
|
132
|
+
// 500-word limit, but enforce it here as a hard safety net.
|
|
133
|
+
const content =
|
|
134
|
+
op.content.length > MAX_ENTRY_CONTENT_LENGTH
|
|
135
|
+
? op.content.slice(0, MAX_ENTRY_CONTENT_LENGTH) +
|
|
136
|
+
" [truncated — entry too long]"
|
|
137
|
+
: op.content;
|
|
123
138
|
ltm.create({
|
|
124
139
|
projectPath: op.scope === "project" ? input.projectPath : undefined,
|
|
125
140
|
category: op.category,
|
|
126
141
|
title: op.title,
|
|
127
|
-
content
|
|
142
|
+
content,
|
|
128
143
|
session: input.sessionID,
|
|
129
144
|
scope: op.scope,
|
|
130
145
|
crossProject: op.crossProject ?? true,
|
|
@@ -133,7 +148,12 @@ export async function run(input: {
|
|
|
133
148
|
} else if (op.op === "update") {
|
|
134
149
|
const entry = ltm.get(op.id);
|
|
135
150
|
if (entry) {
|
|
136
|
-
|
|
151
|
+
const content =
|
|
152
|
+
op.content !== undefined && op.content.length > MAX_ENTRY_CONTENT_LENGTH
|
|
153
|
+
? op.content.slice(0, MAX_ENTRY_CONTENT_LENGTH) +
|
|
154
|
+
" [truncated — entry too long]"
|
|
155
|
+
: op.content;
|
|
156
|
+
ltm.update(op.id, { content, confidence: op.confidence });
|
|
137
157
|
updated++;
|
|
138
158
|
}
|
|
139
159
|
} else if (op.op === "delete") {
|
package/src/db.ts
CHANGED
|
@@ -196,3 +196,14 @@ export function projectId(path: string): string | undefined {
|
|
|
196
196
|
.get(path) as { id: string } | null;
|
|
197
197
|
return row?.id;
|
|
198
198
|
}
|
|
199
|
+
|
|
200
|
+
/**
|
|
201
|
+
* Returns true if Lore has never been used before (no projects in the DB).
|
|
202
|
+
* Must be called before ensureProject() to get an accurate result.
|
|
203
|
+
*/
|
|
204
|
+
export function isFirstRun(): boolean {
|
|
205
|
+
const row = db()
|
|
206
|
+
.query("SELECT COUNT(*) as count FROM projects")
|
|
207
|
+
.get() as { count: number };
|
|
208
|
+
return row.count === 0;
|
|
209
|
+
}
|
package/src/gradient.ts
CHANGED
|
@@ -40,11 +40,37 @@ const FIRST_TURN_OVERHEAD = 15_000;
|
|
|
40
40
|
// Null = not yet calibrated (first turn). Updated after every assistant response.
|
|
41
41
|
let calibratedOverhead: number | null = null;
|
|
42
42
|
|
|
43
|
+
// LTM tokens injected via system transform hook this turn.
|
|
44
|
+
// Set by setLtmTokens() after the system hook runs; consumed by transform().
|
|
45
|
+
let ltmTokens = 0;
|
|
46
|
+
|
|
43
47
|
export function setModelLimits(limits: { context: number; output: number }) {
|
|
44
48
|
contextLimit = limits.context || 200_000;
|
|
45
49
|
outputReserved = Math.min(limits.output || 32_000, 32_000);
|
|
46
50
|
}
|
|
47
51
|
|
|
52
|
+
/** Called by the system transform hook after formatting LTM knowledge. */
|
|
53
|
+
export function setLtmTokens(tokens: number) {
|
|
54
|
+
ltmTokens = tokens;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/** Returns the current LTM token count (for tests and diagnostics). */
|
|
58
|
+
export function getLtmTokens(): number {
|
|
59
|
+
return ltmTokens;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Returns the token budget available for LTM system-prompt injection.
|
|
64
|
+
* This is the usable context (after output + overhead) multiplied by
|
|
65
|
+
* the configured ltm budget fraction. Call this from the system transform
|
|
66
|
+
* hook to cap how many tokens formatKnowledge may use.
|
|
67
|
+
*/
|
|
68
|
+
export function getLtmBudget(ltmFraction: number): number {
|
|
69
|
+
const overhead = calibratedOverhead ?? FIRST_TURN_OVERHEAD;
|
|
70
|
+
const usable = Math.max(0, contextLimit - outputReserved - overhead);
|
|
71
|
+
return Math.floor(usable * ltmFraction);
|
|
72
|
+
}
|
|
73
|
+
|
|
48
74
|
// Called after each assistant message completes with real token usage data.
|
|
49
75
|
// actualInput = tokens.input + tokens.cache.read (all tokens that went into the model)
|
|
50
76
|
// messageEstimate = our chars/4 estimate of the messages we sent
|
|
@@ -385,7 +411,11 @@ export function transform(input: {
|
|
|
385
411
|
const cfg = config();
|
|
386
412
|
const overhead = getOverhead();
|
|
387
413
|
// Usable = full context minus output reservation minus fixed overhead (system + tools)
|
|
388
|
-
|
|
414
|
+
// minus LTM tokens already injected into the system prompt this turn.
|
|
415
|
+
const usable = Math.max(
|
|
416
|
+
0,
|
|
417
|
+
contextLimit - outputReserved - overhead - ltmTokens,
|
|
418
|
+
);
|
|
389
419
|
const distilledBudget = Math.floor(usable * cfg.budget.distilled);
|
|
390
420
|
const rawBudget = Math.floor(usable * cfg.budget.raw);
|
|
391
421
|
|
package/src/index.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import type { Plugin } from "@opencode-ai/plugin";
|
|
2
2
|
import { load, config } from "./config";
|
|
3
|
-
import { ensureProject } from "./db";
|
|
3
|
+
import { ensureProject, isFirstRun } from "./db";
|
|
4
4
|
import * as temporal from "./temporal";
|
|
5
5
|
import * as ltm from "./ltm";
|
|
6
6
|
import * as distillation from "./distillation";
|
|
@@ -11,6 +11,8 @@ import {
|
|
|
11
11
|
needsUrgentDistillation,
|
|
12
12
|
calibrate,
|
|
13
13
|
estimateMessages,
|
|
14
|
+
setLtmTokens,
|
|
15
|
+
getLtmBudget,
|
|
14
16
|
} from "./gradient";
|
|
15
17
|
import { formatKnowledge } from "./prompt";
|
|
16
18
|
import { createRecallTool } from "./reflect";
|
|
@@ -18,8 +20,27 @@ import { createRecallTool } from "./reflect";
|
|
|
18
20
|
export const LorePlugin: Plugin = async (ctx) => {
|
|
19
21
|
const projectPath = ctx.worktree || ctx.directory;
|
|
20
22
|
await load(ctx.directory);
|
|
23
|
+
let firstRun = isFirstRun();
|
|
21
24
|
ensureProject(projectPath);
|
|
22
25
|
|
|
26
|
+
if (firstRun) {
|
|
27
|
+
ctx.client.tui.showToast({
|
|
28
|
+
body: {
|
|
29
|
+
message: "Lore is active — your agent will get smarter every session",
|
|
30
|
+
variant: "success",
|
|
31
|
+
duration: 5000,
|
|
32
|
+
},
|
|
33
|
+
}).catch(() => {});
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// Prune any corrupted/oversized knowledge entries left by the AGENTS.md
|
|
37
|
+
// backslash-escaping bug or curator hallucinations. Sets confidence → 0
|
|
38
|
+
// (below the 0.2 query threshold) so they stop polluting the context.
|
|
39
|
+
const pruned = ltm.pruneOversized(2000);
|
|
40
|
+
if (pruned > 0) {
|
|
41
|
+
console.error(`[lore] pruned ${pruned} oversized knowledge entries (confidence set to 0)`);
|
|
42
|
+
}
|
|
43
|
+
|
|
23
44
|
// Track user turns for periodic curation
|
|
24
45
|
let turnsSinceCuration = 0;
|
|
25
46
|
|
|
@@ -197,14 +218,31 @@ export const LorePlugin: Plugin = async (ctx) => {
|
|
|
197
218
|
}
|
|
198
219
|
},
|
|
199
220
|
|
|
200
|
-
// Inject LTM knowledge into system prompt
|
|
221
|
+
// Inject LTM knowledge into system prompt — relevance-ranked and budget-capped.
|
|
201
222
|
"experimental.chat.system.transform": async (input, output) => {
|
|
223
|
+
// One-time first-run note so the agent acknowledges Lore is active.
|
|
224
|
+
// Cleared after first injection to avoid repeating on subsequent turns.
|
|
225
|
+
if (firstRun) {
|
|
226
|
+
output.system.push(
|
|
227
|
+
"[Lore plugin] This is the first time Lore has been activated. " +
|
|
228
|
+
"Briefly let the user know that Lore is now active and their " +
|
|
229
|
+
"coding agent will get progressively smarter on this codebase " +
|
|
230
|
+
"over time as knowledge accumulates across sessions.",
|
|
231
|
+
);
|
|
232
|
+
firstRun = false;
|
|
233
|
+
}
|
|
234
|
+
|
|
202
235
|
if (input.model?.limit) {
|
|
203
236
|
setModelLimits(input.model.limit);
|
|
204
237
|
}
|
|
205
238
|
|
|
206
|
-
const
|
|
207
|
-
|
|
239
|
+
const cfg = config();
|
|
240
|
+
const ltmBudget = getLtmBudget(cfg.budget.ltm);
|
|
241
|
+
const entries = ltm.forSession(projectPath, input.sessionID, ltmBudget);
|
|
242
|
+
if (!entries.length) {
|
|
243
|
+
setLtmTokens(0);
|
|
244
|
+
return;
|
|
245
|
+
}
|
|
208
246
|
|
|
209
247
|
const formatted = formatKnowledge(
|
|
210
248
|
entries.map((e) => ({
|
|
@@ -212,9 +250,17 @@ export const LorePlugin: Plugin = async (ctx) => {
|
|
|
212
250
|
title: e.title,
|
|
213
251
|
content: e.content,
|
|
214
252
|
})),
|
|
253
|
+
ltmBudget,
|
|
215
254
|
);
|
|
255
|
+
|
|
216
256
|
if (formatted) {
|
|
257
|
+
// Track how many tokens we actually consumed so the gradient manager
|
|
258
|
+
// can deduct them from the usable budget for message injection.
|
|
259
|
+
const ltmTokenCount = Math.ceil(formatted.length / 4);
|
|
260
|
+
setLtmTokens(ltmTokenCount);
|
|
217
261
|
output.system.push(formatted);
|
|
262
|
+
} else {
|
|
263
|
+
setLtmTokens(0);
|
|
218
264
|
}
|
|
219
265
|
},
|
|
220
266
|
|
package/src/ltm.ts
CHANGED
|
@@ -1,6 +1,12 @@
|
|
|
1
|
+
import { uuidv7 } from "uuidv7";
|
|
1
2
|
import { db, ensureProject } from "./db";
|
|
2
3
|
import { ftsQuery } from "./temporal";
|
|
3
4
|
|
|
5
|
+
// Rough token estimate: ~4 chars per token
|
|
6
|
+
function estimateTokens(text: string): number {
|
|
7
|
+
return Math.ceil(text.length / 4);
|
|
8
|
+
}
|
|
9
|
+
|
|
4
10
|
export type KnowledgeEntry = {
|
|
5
11
|
id: string;
|
|
6
12
|
project_id: string | null;
|
|
@@ -23,12 +29,14 @@ export function create(input: {
|
|
|
23
29
|
session?: string;
|
|
24
30
|
scope: "project" | "global";
|
|
25
31
|
crossProject?: boolean;
|
|
32
|
+
/** Explicit ID to use — for cross-machine import via agents-file. Defaults to a new UUIDv7. */
|
|
33
|
+
id?: string;
|
|
26
34
|
}): string {
|
|
27
35
|
const pid =
|
|
28
36
|
input.scope === "project" && input.projectPath
|
|
29
37
|
? ensureProject(input.projectPath)
|
|
30
38
|
: null;
|
|
31
|
-
const id =
|
|
39
|
+
const id = input.id ?? uuidv7();
|
|
32
40
|
const now = Date.now();
|
|
33
41
|
db()
|
|
34
42
|
.query(
|
|
@@ -100,6 +108,155 @@ export function forProject(
|
|
|
100
108
|
.all(pid) as KnowledgeEntry[];
|
|
101
109
|
}
|
|
102
110
|
|
|
111
|
+
/**
|
|
112
|
+
* Build a relevance-ranked, budget-capped list of knowledge entries for injection
|
|
113
|
+
* into the system prompt of a live session.
|
|
114
|
+
*
|
|
115
|
+
* Strategy:
|
|
116
|
+
* 1. Project-specific entries (project_id = current project, cross_project = 0)
|
|
117
|
+
* always get priority — they were curated specifically for this codebase.
|
|
118
|
+
* 2. Cross-project entries are scored for relevance against recent session context
|
|
119
|
+
* (last distillation + recent raw messages). Only entries that match are included.
|
|
120
|
+
* 3. All candidates are ranked by score * confidence, then greedily packed into
|
|
121
|
+
* the token budget (smallest-first within same score band to maximize count).
|
|
122
|
+
* 4. If there's no session context yet (first turn), fall back to top entries by
|
|
123
|
+
* confidence only.
|
|
124
|
+
*
|
|
125
|
+
* @param projectPath Current project path
|
|
126
|
+
* @param sessionID Current session ID (for context extraction)
|
|
127
|
+
* @param maxTokens Hard token budget for the entire formatted block
|
|
128
|
+
*/
|
|
129
|
+
export function forSession(
|
|
130
|
+
projectPath: string,
|
|
131
|
+
sessionID: string | undefined,
|
|
132
|
+
maxTokens: number,
|
|
133
|
+
): KnowledgeEntry[] {
|
|
134
|
+
const pid = ensureProject(projectPath);
|
|
135
|
+
|
|
136
|
+
// --- 1. Load project-specific entries (always relevant) ---
|
|
137
|
+
const projectEntries = db()
|
|
138
|
+
.query(
|
|
139
|
+
`SELECT * FROM knowledge
|
|
140
|
+
WHERE project_id = ? AND cross_project = 0 AND confidence > 0.2
|
|
141
|
+
ORDER BY confidence DESC, updated_at DESC`,
|
|
142
|
+
)
|
|
143
|
+
.all(pid) as KnowledgeEntry[];
|
|
144
|
+
|
|
145
|
+
// --- 2. Load cross-project candidates ---
|
|
146
|
+
const crossEntries = db()
|
|
147
|
+
.query(
|
|
148
|
+
`SELECT * FROM knowledge
|
|
149
|
+
WHERE (project_id IS NULL OR cross_project = 1) AND confidence > 0.2
|
|
150
|
+
ORDER BY confidence DESC, updated_at DESC`,
|
|
151
|
+
)
|
|
152
|
+
.all() as KnowledgeEntry[];
|
|
153
|
+
|
|
154
|
+
if (!crossEntries.length && !projectEntries.length) return [];
|
|
155
|
+
|
|
156
|
+
// --- 3. Build session context for relevance scoring ---
|
|
157
|
+
// Combine the most recent distillation text + last ~10 raw messages for this session
|
|
158
|
+
let sessionContext = "";
|
|
159
|
+
if (sessionID) {
|
|
160
|
+
const distRow = db()
|
|
161
|
+
.query(
|
|
162
|
+
`SELECT observations FROM distillations
|
|
163
|
+
WHERE project_id = ? AND session_id = ?
|
|
164
|
+
ORDER BY created_at DESC LIMIT 1`,
|
|
165
|
+
)
|
|
166
|
+
.get(pid, sessionID) as { observations: string } | null;
|
|
167
|
+
if (distRow?.observations) {
|
|
168
|
+
sessionContext += distRow.observations + "\n";
|
|
169
|
+
}
|
|
170
|
+
const recentMsgs = db()
|
|
171
|
+
.query(
|
|
172
|
+
`SELECT content FROM temporal_messages
|
|
173
|
+
WHERE project_id = ? AND session_id = ?
|
|
174
|
+
ORDER BY created_at DESC LIMIT 10`,
|
|
175
|
+
)
|
|
176
|
+
.all(pid, sessionID) as Array<{ content: string }>;
|
|
177
|
+
if (recentMsgs.length) {
|
|
178
|
+
sessionContext += recentMsgs.map((m) => m.content).join("\n");
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
// --- 4. Score cross-project entries by relevance ---
|
|
183
|
+
// Use FTS5 matching: extract terms from session context and score each entry
|
|
184
|
+
type Scored = { entry: KnowledgeEntry; score: number };
|
|
185
|
+
let scoredCross: Scored[];
|
|
186
|
+
|
|
187
|
+
if (sessionContext.trim().length > 20) {
|
|
188
|
+
// Build a term set from session context (top 30 meaningful words)
|
|
189
|
+
const contextTerms = sessionContext
|
|
190
|
+
.replace(/[^\w\s]/g, " ")
|
|
191
|
+
.toLowerCase()
|
|
192
|
+
.split(/\s+/)
|
|
193
|
+
.filter((w) => w.length > 3)
|
|
194
|
+
.reduce<Map<string, number>>((acc, w) => {
|
|
195
|
+
acc.set(w, (acc.get(w) ?? 0) + 1);
|
|
196
|
+
return acc;
|
|
197
|
+
}, new Map());
|
|
198
|
+
|
|
199
|
+
// Sort by frequency, take top 30 terms
|
|
200
|
+
const topTerms = [...contextTerms.entries()]
|
|
201
|
+
.sort((a, b) => b[1] - a[1])
|
|
202
|
+
.slice(0, 30)
|
|
203
|
+
.map(([w]) => w);
|
|
204
|
+
|
|
205
|
+
scoredCross = crossEntries.map((entry) => {
|
|
206
|
+
const haystack =
|
|
207
|
+
(entry.title + " " + entry.content).replace(/[^\w\s]/g, " ").toLowerCase();
|
|
208
|
+
let hits = 0;
|
|
209
|
+
for (const term of topTerms) {
|
|
210
|
+
// Count how many context terms appear in this entry (simple overlap)
|
|
211
|
+
if (haystack.includes(term)) hits++;
|
|
212
|
+
}
|
|
213
|
+
// Score = fraction of top terms matched, weighted by confidence
|
|
214
|
+
const relevance = topTerms.length > 0 ? hits / topTerms.length : 0;
|
|
215
|
+
return { entry, score: relevance * entry.confidence };
|
|
216
|
+
});
|
|
217
|
+
|
|
218
|
+
// Only keep entries with at least one term match
|
|
219
|
+
scoredCross = scoredCross.filter((s) => s.score > 0);
|
|
220
|
+
} else {
|
|
221
|
+
// No session context yet — take top cross-project entries by confidence
|
|
222
|
+
scoredCross = crossEntries.slice(0, 10).map((entry) => ({
|
|
223
|
+
entry,
|
|
224
|
+
score: entry.confidence,
|
|
225
|
+
}));
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
// Sort cross-project by score desc
|
|
229
|
+
scoredCross.sort((a, b) => b.score - a.score);
|
|
230
|
+
|
|
231
|
+
// --- 5. Pack into token budget ---
|
|
232
|
+
// Project entries get first pick (fully relevant); cross entries fill remaining budget.
|
|
233
|
+
// Use a greedy fit: iterate candidates and include if they fit.
|
|
234
|
+
const HEADER_OVERHEAD_TOKENS = 15; // "## Long-term Knowledge\n"
|
|
235
|
+
let used = HEADER_OVERHEAD_TOKENS;
|
|
236
|
+
const result: KnowledgeEntry[] = [];
|
|
237
|
+
|
|
238
|
+
function tryAdd(entry: KnowledgeEntry): boolean {
|
|
239
|
+
const cost = estimateTokens(entry.title + entry.content) + 10;
|
|
240
|
+
if (used + cost > maxTokens) return false;
|
|
241
|
+
result.push(entry);
|
|
242
|
+
used += cost;
|
|
243
|
+
return true;
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
// Project-specific first
|
|
247
|
+
for (const entry of projectEntries) {
|
|
248
|
+
tryAdd(entry);
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
// Then cross-project by relevance score
|
|
252
|
+
for (const { entry } of scoredCross) {
|
|
253
|
+
if (used >= maxTokens) break;
|
|
254
|
+
tryAdd(entry);
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
return result;
|
|
258
|
+
}
|
|
259
|
+
|
|
103
260
|
export function all(): KnowledgeEntry[] {
|
|
104
261
|
return db()
|
|
105
262
|
.query(
|
|
@@ -184,3 +341,22 @@ export function get(id: string): KnowledgeEntry | null {
|
|
|
184
341
|
.query("SELECT * FROM knowledge WHERE id = ?")
|
|
185
342
|
.get(id) as KnowledgeEntry | null;
|
|
186
343
|
}
|
|
344
|
+
|
|
345
|
+
/**
|
|
346
|
+
* Prune knowledge entries whose content exceeds maxLength characters.
|
|
347
|
+
* These are typically corrupted entries from AGENTS.md roundtrip escaping bugs
|
|
348
|
+
* or curator hallucinations with full code dumps.
|
|
349
|
+
*
|
|
350
|
+
* Rather than hard-deleting, sets confidence to 0 so they're excluded from
|
|
351
|
+
* queries (confidence > 0.2) but can be inspected for debugging.
|
|
352
|
+
*
|
|
353
|
+
* @returns Number of entries pruned
|
|
354
|
+
*/
|
|
355
|
+
export function pruneOversized(maxLength: number): number {
|
|
356
|
+
const result = db()
|
|
357
|
+
.query(
|
|
358
|
+
"UPDATE knowledge SET confidence = 0, updated_at = ? WHERE LENGTH(content) > ? AND confidence > 0",
|
|
359
|
+
)
|
|
360
|
+
.run(Date.now(), maxLength);
|
|
361
|
+
return result.changes;
|
|
362
|
+
}
|
package/src/markdown.ts
CHANGED
|
@@ -40,6 +40,35 @@ export function normalize(md: string): string {
|
|
|
40
40
|
return processor.stringify(processor.parse(once));
|
|
41
41
|
}
|
|
42
42
|
|
|
43
|
+
/**
|
|
44
|
+
* Unescape a markdown-serialized inline string back to plain text.
|
|
45
|
+
*
|
|
46
|
+
* remark's serializer escapes special characters with backslashes
|
|
47
|
+
* (e.g. `<` → `\<`, `*` → `\*`, `\` → `\\`). When we read content
|
|
48
|
+
* back from an AGENTS.md file we must unescape it so it round-trips
|
|
49
|
+
* cleanly — otherwise each export/import cycle doubles the escapes.
|
|
50
|
+
*
|
|
51
|
+
* Uses remark's own parser to extract the text value, which handles
|
|
52
|
+
* all escape sequences correctly.
|
|
53
|
+
*/
|
|
54
|
+
export function unescapeMarkdown(md: string): string {
|
|
55
|
+
const tree = processor.parse(md);
|
|
56
|
+
// Collect all text node values from the first paragraph
|
|
57
|
+
const texts: string[] = [];
|
|
58
|
+
const para = tree.children[0];
|
|
59
|
+
if (para && para.type === "paragraph") {
|
|
60
|
+
for (const child of para.children) {
|
|
61
|
+
if (child.type === "text") texts.push(child.value);
|
|
62
|
+
else if (child.type === "strong" || child.type === "emphasis") {
|
|
63
|
+
for (const gc of child.children) {
|
|
64
|
+
if (gc.type === "text") texts.push(gc.value);
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
return texts.join("") || md;
|
|
70
|
+
}
|
|
71
|
+
|
|
43
72
|
// --- Node builders ---
|
|
44
73
|
|
|
45
74
|
export function h(depth: 1 | 2 | 3 | 4 | 5 | 6, value: string): Heading {
|
package/src/prompt.ts
CHANGED
|
@@ -189,20 +189,31 @@ Do NOT extract:
|
|
|
189
189
|
- Temporary state (current branch, in-progress work)
|
|
190
190
|
- Information that will change frequently
|
|
191
191
|
|
|
192
|
+
BREVITY IS CRITICAL — each entry must be concise:
|
|
193
|
+
- content MUST be under 500 words (roughly 2000 characters)
|
|
194
|
+
- Focus on the actionable insight, not the full story behind it
|
|
195
|
+
- If a pattern requires more detail, split into multiple focused entries
|
|
196
|
+
- Omit code examples unless a single short snippet is essential
|
|
197
|
+
- Never include full file contents, large diffs, or complete command outputs
|
|
198
|
+
|
|
199
|
+
crossProject flag:
|
|
200
|
+
- Default is true — most useful knowledge is worth sharing across projects
|
|
201
|
+
- Set crossProject to false for things that are meaningless outside this specific repo (e.g. a config path, a project-local naming convention that conflicts with your usual style)
|
|
202
|
+
|
|
192
203
|
Produce a JSON array of operations:
|
|
193
204
|
[
|
|
194
205
|
{
|
|
195
206
|
"op": "create",
|
|
196
207
|
"category": "decision" | "pattern" | "preference" | "architecture" | "gotcha",
|
|
197
208
|
"title": "Short descriptive title",
|
|
198
|
-
"content": "
|
|
209
|
+
"content": "Concise knowledge entry — under 500 words",
|
|
199
210
|
"scope": "project" | "global",
|
|
200
211
|
"crossProject": true
|
|
201
212
|
},
|
|
202
213
|
{
|
|
203
214
|
"op": "update",
|
|
204
215
|
"id": "existing-entry-id",
|
|
205
|
-
"content": "Updated content",
|
|
216
|
+
"content": "Updated content — under 500 words",
|
|
206
217
|
"confidence": 0.0-1.0
|
|
207
218
|
},
|
|
208
219
|
{
|
|
@@ -267,13 +278,38 @@ export function formatDistillations(
|
|
|
267
278
|
return sections.join("\n\n");
|
|
268
279
|
}
|
|
269
280
|
|
|
281
|
+
// Rough token estimate used for budget-gating knowledge entries.
|
|
282
|
+
// Consistent with gradient.ts: ~4 chars per token.
|
|
283
|
+
function estimateTokens(text: string): number {
|
|
284
|
+
return Math.ceil(text.length / 4);
|
|
285
|
+
}
|
|
286
|
+
|
|
270
287
|
export function formatKnowledge(
|
|
271
288
|
entries: Array<{ category: string; title: string; content: string }>,
|
|
289
|
+
maxTokens?: number,
|
|
272
290
|
): string {
|
|
273
291
|
if (!entries.length) return "";
|
|
274
292
|
|
|
293
|
+
// Apply token budget: greedily include entries (already sorted by confidence
|
|
294
|
+
// DESC from the DB query) until the budget is exhausted. Overhead accounts for
|
|
295
|
+
// the section heading and per-entry markdown scaffolding (~50 chars each).
|
|
296
|
+
let included = entries;
|
|
297
|
+
if (maxTokens !== undefined) {
|
|
298
|
+
const HEADER_OVERHEAD = 50; // "## Long-term Knowledge\n### Category\n"
|
|
299
|
+
let used = HEADER_OVERHEAD;
|
|
300
|
+
const fitting: typeof entries = [];
|
|
301
|
+
for (const e of entries) {
|
|
302
|
+
const cost = estimateTokens(e.title + e.content) + 10; // per-entry bullet overhead
|
|
303
|
+
if (used + cost > maxTokens) continue; // skip; keep trying smaller entries
|
|
304
|
+
fitting.push(e);
|
|
305
|
+
used += cost;
|
|
306
|
+
}
|
|
307
|
+
included = fitting;
|
|
308
|
+
if (!included.length) return "";
|
|
309
|
+
}
|
|
310
|
+
|
|
275
311
|
const grouped: Record<string, Array<{ title: string; content: string }>> = {};
|
|
276
|
-
for (const e of
|
|
312
|
+
for (const e of included) {
|
|
277
313
|
const group = grouped[e.category] ?? (grouped[e.category] = []);
|
|
278
314
|
group.push(e);
|
|
279
315
|
}
|