opencode-lore 0.3.9 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/agents-file.ts +1 -1
- package/src/config.ts +2 -0
- package/src/curator.ts +88 -3
- package/src/distillation.ts +1 -1
- package/src/gradient.ts +40 -19
- package/src/index.ts +46 -25
- package/src/ltm.ts +2 -2
- package/src/prompt.ts +82 -10
- package/src/temporal.ts +2 -2
package/package.json
CHANGED
package/src/agents-file.ts
CHANGED
|
@@ -18,7 +18,7 @@ import { serialize, inline, h, ul, liph, strong, t, root, unescapeMarkdown } fro
|
|
|
18
18
|
// ---------------------------------------------------------------------------
|
|
19
19
|
|
|
20
20
|
export const LORE_SECTION_START =
|
|
21
|
-
"<!-- This section is
|
|
21
|
+
"<!-- This section is maintained by the coding agent via lore (https://github.com/BYK/opencode-lore) -->";
|
|
22
22
|
export const LORE_SECTION_END = "<!-- End lore-managed section -->";
|
|
23
23
|
|
|
24
24
|
/** Regex matching a valid UUID (v4 or v7) — 8-4-4-4-12 hex groups. */
|
package/src/config.ts
CHANGED
|
@@ -28,6 +28,8 @@ export const LoreConfig = z.object({
|
|
|
28
28
|
enabled: z.boolean().default(true),
|
|
29
29
|
onIdle: z.boolean().default(true),
|
|
30
30
|
afterTurns: z.number().min(1).default(10),
|
|
31
|
+
/** Max knowledge entries per project before consolidation triggers. Default: 25. */
|
|
32
|
+
maxEntries: z.number().min(10).default(25),
|
|
31
33
|
})
|
|
32
34
|
.default({}),
|
|
33
35
|
pruning: z
|
package/src/curator.ts
CHANGED
|
@@ -2,16 +2,16 @@ import type { createOpencodeClient } from "@opencode-ai/sdk";
|
|
|
2
2
|
import { config } from "./config";
|
|
3
3
|
import * as temporal from "./temporal";
|
|
4
4
|
import * as ltm from "./ltm";
|
|
5
|
-
import { CURATOR_SYSTEM, curatorUser } from "./prompt";
|
|
5
|
+
import { CURATOR_SYSTEM, curatorUser, CONSOLIDATION_SYSTEM, consolidationUser } from "./prompt";
|
|
6
6
|
import { workerSessionIDs } from "./distillation";
|
|
7
7
|
|
|
8
8
|
/**
|
|
9
9
|
* Maximum length (chars) for a single knowledge entry's content.
|
|
10
|
-
* ~
|
|
10
|
+
* ~400 tokens at chars/3. Entries exceeding this are truncated with a notice.
|
|
11
11
|
* The curator prompt also instructs the model to stay within this limit,
|
|
12
12
|
* so truncation is a last-resort safety net.
|
|
13
13
|
*/
|
|
14
|
-
const MAX_ENTRY_CONTENT_LENGTH =
|
|
14
|
+
const MAX_ENTRY_CONTENT_LENGTH = 1200;
|
|
15
15
|
|
|
16
16
|
type Client = ReturnType<typeof createOpencodeClient>;
|
|
17
17
|
|
|
@@ -172,3 +172,88 @@ export async function run(input: {
|
|
|
172
172
|
export function resetCurationTracker() {
|
|
173
173
|
lastCuratedAt = 0;
|
|
174
174
|
}
|
|
175
|
+
|
|
176
|
+
/**
|
|
177
|
+
* Consolidation pass: reviews ALL project entries and merges/trims/deletes
|
|
178
|
+
* to reduce entry count to cfg.curator.maxEntries. Only runs when the current
|
|
179
|
+
* entry count exceeds the target. Uses the same worker session as curation.
|
|
180
|
+
*
|
|
181
|
+
* Only "update" and "delete" ops are applied — consolidation never creates entries.
|
|
182
|
+
*/
|
|
183
|
+
export async function consolidate(input: {
|
|
184
|
+
client: Client;
|
|
185
|
+
projectPath: string;
|
|
186
|
+
sessionID: string;
|
|
187
|
+
model?: { providerID: string; modelID: string };
|
|
188
|
+
}): Promise<{ updated: number; deleted: number }> {
|
|
189
|
+
const cfg = config();
|
|
190
|
+
if (!cfg.curator.enabled) return { updated: 0, deleted: 0 };
|
|
191
|
+
|
|
192
|
+
const entries = ltm.forProject(input.projectPath, cfg.crossProject);
|
|
193
|
+
if (entries.length <= cfg.curator.maxEntries) return { updated: 0, deleted: 0 };
|
|
194
|
+
|
|
195
|
+
const entriesForPrompt = entries.map((e) => ({
|
|
196
|
+
id: e.id,
|
|
197
|
+
category: e.category,
|
|
198
|
+
title: e.title,
|
|
199
|
+
content: e.content,
|
|
200
|
+
}));
|
|
201
|
+
|
|
202
|
+
const userContent = consolidationUser({
|
|
203
|
+
entries: entriesForPrompt,
|
|
204
|
+
targetMax: cfg.curator.maxEntries,
|
|
205
|
+
});
|
|
206
|
+
const workerID = await ensureWorkerSession(input.client, input.sessionID);
|
|
207
|
+
const model = input.model ?? cfg.model;
|
|
208
|
+
const parts = [
|
|
209
|
+
{ type: "text" as const, text: `${CONSOLIDATION_SYSTEM}\n\n${userContent}` },
|
|
210
|
+
];
|
|
211
|
+
|
|
212
|
+
await input.client.session.prompt({
|
|
213
|
+
path: { id: workerID },
|
|
214
|
+
body: {
|
|
215
|
+
parts,
|
|
216
|
+
agent: "lore-curator",
|
|
217
|
+
...(model ? { model } : {}),
|
|
218
|
+
},
|
|
219
|
+
});
|
|
220
|
+
|
|
221
|
+
const msgs = await input.client.session.messages({
|
|
222
|
+
path: { id: workerID },
|
|
223
|
+
query: { limit: 2 },
|
|
224
|
+
});
|
|
225
|
+
const last = msgs.data?.at(-1);
|
|
226
|
+
if (!last || last.info.role !== "assistant") return { updated: 0, deleted: 0 };
|
|
227
|
+
|
|
228
|
+
const responsePart = last.parts.find((p) => p.type === "text");
|
|
229
|
+
if (!responsePart || responsePart.type !== "text") return { updated: 0, deleted: 0 };
|
|
230
|
+
|
|
231
|
+
const ops = parseOps(responsePart.text);
|
|
232
|
+
let updated = 0;
|
|
233
|
+
let deleted = 0;
|
|
234
|
+
|
|
235
|
+
for (const op of ops) {
|
|
236
|
+
// Consolidation only applies update and delete — never create.
|
|
237
|
+
if (op.op === "update") {
|
|
238
|
+
const entry = ltm.get(op.id);
|
|
239
|
+
if (entry) {
|
|
240
|
+
const content =
|
|
241
|
+
op.content !== undefined && op.content.length > MAX_ENTRY_CONTENT_LENGTH
|
|
242
|
+
? op.content.slice(0, MAX_ENTRY_CONTENT_LENGTH) +
|
|
243
|
+
" [truncated — entry too long]"
|
|
244
|
+
: op.content;
|
|
245
|
+
ltm.update(op.id, { content, confidence: op.confidence });
|
|
246
|
+
updated++;
|
|
247
|
+
}
|
|
248
|
+
} else if (op.op === "delete") {
|
|
249
|
+
const entry = ltm.get(op.id);
|
|
250
|
+
if (entry) {
|
|
251
|
+
ltm.remove(op.id);
|
|
252
|
+
deleted++;
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
// "create" ops are silently ignored — consolidation must not add entries.
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
return { updated, deleted };
|
|
259
|
+
}
|
package/src/distillation.ts
CHANGED
|
@@ -153,7 +153,7 @@ function storeDistillation(input: {
|
|
|
153
153
|
const pid = ensureProject(input.projectPath);
|
|
154
154
|
const id = crypto.randomUUID();
|
|
155
155
|
const sourceJson = JSON.stringify(input.sourceIDs);
|
|
156
|
-
const tokens = Math.ceil(input.observations.length /
|
|
156
|
+
const tokens = Math.ceil(input.observations.length / 3);
|
|
157
157
|
db()
|
|
158
158
|
.query(
|
|
159
159
|
`INSERT INTO distillations (id, project_id, session_id, narrative, facts, observations, source_ids, generation, token_count, created_at)
|
package/src/gradient.ts
CHANGED
|
@@ -6,9 +6,12 @@ import { normalize } from "./markdown";
|
|
|
6
6
|
|
|
7
7
|
type MessageWithParts = { info: Message; parts: Part[] };
|
|
8
8
|
|
|
9
|
-
//
|
|
9
|
+
// Token estimate: ~3 chars per token. Validated against real API data across
|
|
10
|
+
// 200+ turn-pairs: chars/3 gives ~1.68x ratio (actual/estimate), best among
|
|
11
|
+
// heuristics tested. The gap is overhead (system prompt, tool definitions,
|
|
12
|
+
// conversation structure) which calibratedOverhead captures via EMA.
|
|
10
13
|
function estimate(text: string): number {
|
|
11
|
-
return Math.ceil(text.length /
|
|
14
|
+
return Math.ceil(text.length / 3);
|
|
12
15
|
}
|
|
13
16
|
|
|
14
17
|
function estimateParts(parts: Part[]): number {
|
|
@@ -70,6 +73,8 @@ type SessionState = {
|
|
|
70
73
|
lastWindowMessageIDs: Set<string>;
|
|
71
74
|
/** One-shot force escalation: skip layers below this on the next transform() */
|
|
72
75
|
forceMinLayer: SafetyLayer;
|
|
76
|
+
/** Token estimate from the most recent transform() output (compressed window) */
|
|
77
|
+
lastTransformEstimate: number;
|
|
73
78
|
/** Distilled prefix cache (Approach C) */
|
|
74
79
|
prefixCache: PrefixCache | null;
|
|
75
80
|
/** Raw window pin cache (Approach B) */
|
|
@@ -85,6 +90,7 @@ function makeSessionState(): SessionState {
|
|
|
85
90
|
lastLayer: 0,
|
|
86
91
|
lastWindowMessageIDs: new Set(),
|
|
87
92
|
forceMinLayer: 0,
|
|
93
|
+
lastTransformEstimate: 0,
|
|
88
94
|
prefixCache: null,
|
|
89
95
|
rawWindowCache: null,
|
|
90
96
|
};
|
|
@@ -139,22 +145,36 @@ export function getLtmBudget(ltmFraction: number): number {
|
|
|
139
145
|
}
|
|
140
146
|
|
|
141
147
|
// Called after each assistant message completes with real token usage data.
|
|
142
|
-
// actualInput = tokens.input + tokens.cache.read
|
|
143
|
-
// messageEstimate = our chars/4 estimate of the messages we sent
|
|
148
|
+
// actualInput = tokens.input + tokens.cache.read + tokens.cache.write
|
|
144
149
|
// sessionID = session that produced this response (for exact-tracking validity)
|
|
145
150
|
// messageCount = number of messages that were sent (for delta estimation)
|
|
151
|
+
//
|
|
152
|
+
// Overhead calibration uses lastTransformEstimate (the token estimate from the
|
|
153
|
+
// compressed window that was actually sent to the model) instead of re-estimating
|
|
154
|
+
// all session messages. On compressed sessions, all-message estimate >> actualInput,
|
|
155
|
+
// which clamped overhead to 0 and broke budget calculations.
|
|
146
156
|
export function calibrate(
|
|
147
157
|
actualInput: number,
|
|
148
|
-
messageEstimate: number,
|
|
149
158
|
sessionID?: string,
|
|
150
159
|
messageCount?: number,
|
|
151
160
|
) {
|
|
161
|
+
// Use the transform's own estimate for the compressed window it produced.
|
|
162
|
+
// This is the correct baseline: it estimates the same messages the model saw.
|
|
163
|
+
const messageEstimate = sessionID
|
|
164
|
+
? getSessionState(sessionID).lastTransformEstimate
|
|
165
|
+
: 0;
|
|
166
|
+
|
|
152
167
|
// Update global overhead calibration (shared across sessions — model-level).
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
168
|
+
// Skip when actualInput > 0 but no transform estimate exists yet (no baseline
|
|
169
|
+
// to compare against). Allow when both are 0 (test setup to zero overhead) or
|
|
170
|
+
// when we have a real transform estimate.
|
|
171
|
+
if (messageEstimate > 0 || actualInput === 0) {
|
|
172
|
+
const overhead = Math.max(0, actualInput - messageEstimate);
|
|
173
|
+
calibratedOverhead =
|
|
174
|
+
calibratedOverhead === null
|
|
175
|
+
? overhead
|
|
176
|
+
: Math.round(calibratedOverhead * 0.7 + overhead * 0.3);
|
|
177
|
+
}
|
|
158
178
|
|
|
159
179
|
// Store per-session exact counts for the proactive layer 0 decision.
|
|
160
180
|
if (sessionID !== undefined) {
|
|
@@ -800,20 +820,20 @@ function transformInner(input: {
|
|
|
800
820
|
|
|
801
821
|
// --- Approach A: Cache-preserving passthrough ---
|
|
802
822
|
// Use exact token count from the previous API response when available.
|
|
803
|
-
|
|
804
|
-
|
|
823
|
+
// Only the delta (messages added since last call) uses chars/3 estimation,
|
|
824
|
+
// making the layer-0 decision highly accurate from the API's own tokenizer.
|
|
805
825
|
// maxInput = absolute ceiling the API enforces: input_tokens + max_tokens <= context
|
|
806
826
|
const maxInput = contextLimit - outputReserved;
|
|
807
827
|
|
|
808
828
|
// True when we have real API token data from a previous turn in this session.
|
|
809
|
-
// When false (first turn / session change), chars/
|
|
810
|
-
//
|
|
811
|
-
// being used
|
|
829
|
+
// When false (first turn / session change), chars/3 estimates may still diverge
|
|
830
|
+
// from the real tokenizer — so tryFit output must be validated with a safety
|
|
831
|
+
// multiplier before being used.
|
|
812
832
|
const calibrated = sessState.lastKnownInput > 0;
|
|
813
833
|
|
|
814
834
|
// On uncalibrated turns, apply this multiplier to tryFit's estimated total to
|
|
815
|
-
// approximate the real token count.
|
|
816
|
-
//
|
|
835
|
+
// approximate the real token count. chars/3 undercounts by ~1.68x on real data,
|
|
836
|
+
// but overhead EMA captures most of the gap. 1.5 provides a safe margin.
|
|
817
837
|
const UNCALIBRATED_SAFETY = 1.5;
|
|
818
838
|
|
|
819
839
|
// Returns true if the tryFit result is safe to use: either we have calibrated
|
|
@@ -830,7 +850,7 @@ function transformInner(input: {
|
|
|
830
850
|
// Prevents the calibration oscillation: a compressed turn stores
|
|
831
851
|
// lastKnownInput=100K for a 50-message window, but the next turn's
|
|
832
852
|
// input.messages has 300 raw messages. The delta estimation treats the 250
|
|
833
|
-
// evicted messages as "new" and undercounts
|
|
853
|
+
// evicted messages as "new" and undercounts their tokens, producing an
|
|
834
854
|
// expectedInput that fits in layer 0 — but the actual tokens are ~190K.
|
|
835
855
|
// Only applied when calibrated (same session, per-session state) to avoid
|
|
836
856
|
// affecting other sessions including worker sessions.
|
|
@@ -851,7 +871,7 @@ function transformInner(input: {
|
|
|
851
871
|
const ltmDelta = ltmTokens - sessState.lastKnownLtm;
|
|
852
872
|
expectedInput = sessState.lastKnownInput + newMsgTokens + ltmDelta;
|
|
853
873
|
} else {
|
|
854
|
-
// First turn or session change: fall back to chars/
|
|
874
|
+
// First turn or session change: fall back to chars/3 estimate + overhead.
|
|
855
875
|
const messageTokens = input.messages.reduce((s, m) => s + estimateMessage(m), 0);
|
|
856
876
|
expectedInput = messageTokens + overhead + ltmTokens;
|
|
857
877
|
}
|
|
@@ -1009,6 +1029,7 @@ export function transform(input: {
|
|
|
1009
1029
|
if (sid) {
|
|
1010
1030
|
const state = getSessionState(sid);
|
|
1011
1031
|
state.lastTransformedCount = result.messages.length;
|
|
1032
|
+
state.lastTransformEstimate = result.totalTokens;
|
|
1012
1033
|
state.lastLayer = result.layer;
|
|
1013
1034
|
state.lastWindowMessageIDs = new Set(result.messages.map((m) => m.info.id));
|
|
1014
1035
|
}
|
package/src/index.ts
CHANGED
|
@@ -10,7 +10,6 @@ import {
|
|
|
10
10
|
setModelLimits,
|
|
11
11
|
needsUrgentDistillation,
|
|
12
12
|
calibrate,
|
|
13
|
-
estimateMessages,
|
|
14
13
|
setLtmTokens,
|
|
15
14
|
getLtmBudget,
|
|
16
15
|
setForceMinLayer,
|
|
@@ -56,7 +55,7 @@ export const LorePlugin: Plugin = async (ctx) => {
|
|
|
56
55
|
// Prune any corrupted/oversized knowledge entries left by the AGENTS.md
|
|
57
56
|
// backslash-escaping bug or curator hallucinations. Sets confidence → 0
|
|
58
57
|
// (below the 0.2 query threshold) so they stop polluting the context.
|
|
59
|
-
const pruned = ltm.pruneOversized(
|
|
58
|
+
const pruned = ltm.pruneOversized(1200);
|
|
60
59
|
if (pruned > 0) {
|
|
61
60
|
console.error(`[lore] pruned ${pruned} oversized knowledge entries (confidence set to 0)`);
|
|
62
61
|
}
|
|
@@ -204,28 +203,15 @@ export const LorePlugin: Plugin = async (ctx) => {
|
|
|
204
203
|
backgroundDistill(msg.sessionID);
|
|
205
204
|
}
|
|
206
205
|
|
|
207
|
-
// Calibrate overhead
|
|
208
|
-
//
|
|
209
|
-
//
|
|
210
|
-
//
|
|
211
|
-
//
|
|
212
|
-
//
|
|
213
|
-
const
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
if (allMsgs.data) {
|
|
217
|
-
const withParts = allMsgs.data
|
|
218
|
-
.filter((m) => m.info.id !== msg.id)
|
|
219
|
-
.map((m) => ({ info: m.info, parts: m.parts }));
|
|
220
|
-
const msgEstimate = estimateMessages(withParts);
|
|
221
|
-
const actualInput =
|
|
222
|
-
msg.tokens.input + msg.tokens.cache.read + msg.tokens.cache.write;
|
|
223
|
-
// Use the compressed message count (from the last transform output),
|
|
224
|
-
// not the total DB count. On layer 0 these are equal. On layers 1-4,
|
|
225
|
-
// the model only saw the compressed window — calibrate must track that
|
|
226
|
-
// count so the next turn's delta is computed correctly.
|
|
227
|
-
calibrate(actualInput, msgEstimate, msg.sessionID, getLastTransformedCount(msg.sessionID) || withParts.length);
|
|
228
|
-
}
|
|
206
|
+
// Calibrate overhead using real token counts from the API response.
|
|
207
|
+
// actualInput = all tokens the model processed (input + cache.read + cache.write).
|
|
208
|
+
// The message estimate comes from the transform's own output (stored in
|
|
209
|
+
// session state as lastTransformEstimate), NOT from re-estimating all session
|
|
210
|
+
// messages. On compressed sessions, all-message estimate >> actualInput, which
|
|
211
|
+
// previously clamped overhead to 0 and broke budget calculations.
|
|
212
|
+
const actualInput =
|
|
213
|
+
msg.tokens.input + msg.tokens.cache.read + msg.tokens.cache.write;
|
|
214
|
+
calibrate(actualInput, msg.sessionID, getLastTransformedCount(msg.sessionID));
|
|
229
215
|
}
|
|
230
216
|
}
|
|
231
217
|
} catch {
|
|
@@ -301,6 +287,29 @@ export const LorePlugin: Plugin = async (ctx) => {
|
|
|
301
287
|
turnsSinceCuration = 0;
|
|
302
288
|
}
|
|
303
289
|
|
|
290
|
+
// Consolidate entries if count exceeds cfg.curator.maxEntries.
|
|
291
|
+
// Runs after normal curation so newly created entries are counted.
|
|
292
|
+
// Only triggers when truly over the limit to avoid redundant LLM calls.
|
|
293
|
+
try {
|
|
294
|
+
const allEntries = ltm.forProject(projectPath);
|
|
295
|
+
if (allEntries.length > cfg.curator.maxEntries) {
|
|
296
|
+
console.error(
|
|
297
|
+
`[lore] entry count ${allEntries.length} exceeds maxEntries ${cfg.curator.maxEntries} — running consolidation`,
|
|
298
|
+
);
|
|
299
|
+
const { updated, deleted } = await curator.consolidate({
|
|
300
|
+
client: ctx.client,
|
|
301
|
+
projectPath,
|
|
302
|
+
sessionID,
|
|
303
|
+
model: cfg.model,
|
|
304
|
+
});
|
|
305
|
+
if (updated > 0 || deleted > 0) {
|
|
306
|
+
console.error(`[lore] consolidation: ${updated} updated, ${deleted} deleted`);
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
} catch (e) {
|
|
310
|
+
console.error("[lore] consolidation error:", e);
|
|
311
|
+
}
|
|
312
|
+
|
|
304
313
|
// Prune temporal messages after distillation and curation have run.
|
|
305
314
|
// Pass 1: TTL — remove distilled messages older than retention period.
|
|
306
315
|
// Pass 2: Size cap — evict oldest distilled messages if over the limit.
|
|
@@ -371,12 +380,24 @@ export const LorePlugin: Plugin = async (ctx) => {
|
|
|
371
380
|
if (formatted) {
|
|
372
381
|
// Track how many tokens we actually consumed so the gradient manager
|
|
373
382
|
// can deduct them from the usable budget for message injection.
|
|
374
|
-
const ltmTokenCount = Math.ceil(formatted.length /
|
|
383
|
+
const ltmTokenCount = Math.ceil(formatted.length / 3);
|
|
375
384
|
setLtmTokens(ltmTokenCount);
|
|
376
385
|
output.system.push(formatted);
|
|
377
386
|
} else {
|
|
378
387
|
setLtmTokens(0);
|
|
379
388
|
}
|
|
389
|
+
|
|
390
|
+
// Remind the agent to include the agents file in commits.
|
|
391
|
+
// It is always modified after the lore export runs (post-session) so it
|
|
392
|
+
// appears as unstaged when the agent goes to commit — the agent must not
|
|
393
|
+
// skip it just because it looks auto-generated.
|
|
394
|
+
if (cfg.agentsFile.enabled) {
|
|
395
|
+
output.system.push(
|
|
396
|
+
`When making git commits, always check if ${cfg.agentsFile.path} has ` +
|
|
397
|
+
`unstaged changes and include it in the commit. This file contains ` +
|
|
398
|
+
`shared project knowledge managed by lore and must be version-controlled.`,
|
|
399
|
+
);
|
|
400
|
+
}
|
|
380
401
|
},
|
|
381
402
|
|
|
382
403
|
// Transform message history: distilled prefix + raw recent.
|
package/src/ltm.ts
CHANGED
|
@@ -2,9 +2,9 @@ import { uuidv7 } from "uuidv7";
|
|
|
2
2
|
import { db, ensureProject } from "./db";
|
|
3
3
|
import { ftsQuery } from "./temporal";
|
|
4
4
|
|
|
5
|
-
//
|
|
5
|
+
// ~3 chars per token — validated as best heuristic against real API data.
|
|
6
6
|
function estimateTokens(text: string): number {
|
|
7
|
-
return Math.ceil(text.length /
|
|
7
|
+
return Math.ceil(text.length / 3);
|
|
8
8
|
}
|
|
9
9
|
|
|
10
10
|
export type KnowledgeEntry = {
|
package/src/prompt.ts
CHANGED
|
@@ -195,12 +195,24 @@ Do NOT extract:
|
|
|
195
195
|
- Restatements of what the code obviously does (e.g. "the auth module handles authentication")
|
|
196
196
|
|
|
197
197
|
BREVITY IS CRITICAL — each entry must be concise:
|
|
198
|
-
- content MUST be under
|
|
198
|
+
- content MUST be under 150 words (~600 characters). Capture ONE specific actionable
|
|
199
|
+
insight in 2-3 sentences. Prefer terse technical language.
|
|
200
|
+
- Each "gotcha": one specific trap + its fix in 1-2 sentences
|
|
201
|
+
- Each "architecture": one design decision and its key constraint
|
|
199
202
|
- Focus on the actionable insight, not the full story behind it
|
|
200
|
-
- If a pattern requires more detail, split into multiple focused entries
|
|
203
|
+
- If a pattern requires more detail, split into multiple focused entries (each under 150 words)
|
|
201
204
|
- Omit code examples unless a single short snippet is essential
|
|
202
205
|
- Never include full file contents, large diffs, or complete command outputs
|
|
203
206
|
|
|
207
|
+
PREFER UPDATES OVER CREATES:
|
|
208
|
+
- Before creating a new entry, always check if an existing entry covers the same system
|
|
209
|
+
or component. Update the existing entry rather than creating a new one.
|
|
210
|
+
- When updating, REPLACE the full content with a concise rewrite — do not append to
|
|
211
|
+
the existing content or repeat what was already there.
|
|
212
|
+
- If multiple existing entries cover the same system from different angles (e.g. different
|
|
213
|
+
bugs in the same module), consolidate them: update one with merged insights, delete the
|
|
214
|
+
rest. Fewer, denser entries are better than many scattered ones.
|
|
215
|
+
|
|
204
216
|
crossProject flag:
|
|
205
217
|
- Default is true — most useful knowledge is worth sharing across projects
|
|
206
218
|
- Set crossProject to false for things that are meaningless outside this specific repo (e.g. a config path, a project-local naming convention that conflicts with your usual style)
|
|
@@ -211,14 +223,14 @@ Produce a JSON array of operations:
|
|
|
211
223
|
"op": "create",
|
|
212
224
|
"category": "decision" | "pattern" | "preference" | "architecture" | "gotcha",
|
|
213
225
|
"title": "Short descriptive title",
|
|
214
|
-
"content": "Concise knowledge entry — under
|
|
226
|
+
"content": "Concise knowledge entry — under 150 words",
|
|
215
227
|
"scope": "project" | "global",
|
|
216
228
|
"crossProject": true
|
|
217
229
|
},
|
|
218
230
|
{
|
|
219
231
|
"op": "update",
|
|
220
232
|
"id": "existing-entry-id",
|
|
221
|
-
"content": "Updated content — under
|
|
233
|
+
"content": "Updated content — under 150 words",
|
|
222
234
|
"confidence": 0.0-1.0
|
|
223
235
|
},
|
|
224
236
|
{
|
|
@@ -241,8 +253,9 @@ export function curatorUser(input: {
|
|
|
241
253
|
content: string;
|
|
242
254
|
}>;
|
|
243
255
|
}): string {
|
|
244
|
-
const
|
|
245
|
-
|
|
256
|
+
const count = input.existing.length;
|
|
257
|
+
const existing = count
|
|
258
|
+
? `Existing knowledge entries (${count} total — you may update or delete these):\n${input.existing.map((e) => `- [${e.id}] (${e.category}) ${e.title}: ${e.content}`).join("\n")}`
|
|
246
259
|
: "No existing knowledge entries.";
|
|
247
260
|
return `${existing}
|
|
248
261
|
|
|
@@ -252,7 +265,67 @@ Recent conversation to extract knowledge from:
|
|
|
252
265
|
${input.messages}
|
|
253
266
|
|
|
254
267
|
---
|
|
255
|
-
IMPORTANT:
|
|
268
|
+
IMPORTANT:
|
|
269
|
+
1. Prefer updating existing entries over creating new ones. If a new insight refines or
|
|
270
|
+
extends an existing entry on the same topic, update that entry — don't create a new one.
|
|
271
|
+
2. When updating, REPLACE the content with a complete rewrite — never append.
|
|
272
|
+
3. If entries cover the same system from different angles, merge them: update one, delete the rest.
|
|
273
|
+
4. Only create a new entry for genuinely distinct knowledge with no existing home.
|
|
274
|
+
5. Keep all entries under 150 words. If an existing entry is too long, use an update op to trim it.`;
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
/**
|
|
278
|
+
* System prompt for the consolidation pass.
|
|
279
|
+
* Unlike the normal curator (which extracts from conversation), consolidation
|
|
280
|
+
* reviews the FULL entry corpus and aggressively merges/trims/deletes to reduce
|
|
281
|
+
* entry count while preserving the most actionable knowledge.
|
|
282
|
+
*/
|
|
283
|
+
export const CONSOLIDATION_SYSTEM = `You are a long-term memory curator performing a consolidation pass. The knowledge base has grown too large and needs to be trimmed.
|
|
284
|
+
|
|
285
|
+
Your goal: reduce the entry count to the target maximum while preserving the most valuable knowledge.
|
|
286
|
+
|
|
287
|
+
CONSOLIDATION RULES:
|
|
288
|
+
1. MERGE related entries — if multiple entries describe the same system, module, or concept
|
|
289
|
+
from different angles (e.g. several bug fixes in the same component), merge them into
|
|
290
|
+
ONE concise entry. Use an "update" op for the surviving entry and "delete" ops for the rest.
|
|
291
|
+
2. TRIM verbose entries — any entry over 150 words must be trimmed to its essential insight.
|
|
292
|
+
Use an "update" op with the rewritten content.
|
|
293
|
+
3. DELETE low-value entries:
|
|
294
|
+
- Stale entries about bugs that have been fixed and no longer need gotcha warnings
|
|
295
|
+
- Entries whose knowledge is fully subsumed by another entry
|
|
296
|
+
- Entries about one-off incidents with no recurring applicability
|
|
297
|
+
- General advice available in any documentation
|
|
298
|
+
4. PRESERVE:
|
|
299
|
+
- Entries describing non-obvious design decisions specific to this codebase
|
|
300
|
+
- Entries about recurring traps that a developer would hit again
|
|
301
|
+
- Entries that capture a hard-won gotcha with a concrete fix
|
|
302
|
+
|
|
303
|
+
OUTPUT: A JSON array of "update" and "delete" ops only. No "create" ops — you are not
|
|
304
|
+
extracting new knowledge, only consolidating existing knowledge.
|
|
305
|
+
|
|
306
|
+
- "update": Replace content with a concise rewrite (under 150 words). Use to merge survivors or trim verbose entries.
|
|
307
|
+
- "delete": Remove entries that are merged, stale, or low-value.
|
|
308
|
+
|
|
309
|
+
Output ONLY valid JSON. No markdown fences, no explanation, no preamble.`;
|
|
310
|
+
|
|
311
|
+
export function consolidationUser(input: {
|
|
312
|
+
entries: Array<{
|
|
313
|
+
id: string;
|
|
314
|
+
category: string;
|
|
315
|
+
title: string;
|
|
316
|
+
content: string;
|
|
317
|
+
}>;
|
|
318
|
+
targetMax: number;
|
|
319
|
+
}): string {
|
|
320
|
+
const count = input.entries.length;
|
|
321
|
+
const listed = input.entries
|
|
322
|
+
.map((e) => `- [${e.id}] (${e.category}) ${e.title}: ${e.content}`)
|
|
323
|
+
.join("\n");
|
|
324
|
+
return `Current knowledge entries (${count} total, target max: ${input.targetMax}):
|
|
325
|
+
|
|
326
|
+
${listed}
|
|
327
|
+
|
|
328
|
+
Produce update/delete ops to reduce entry count to at most ${input.targetMax}. Prioritize merging related entries and trimming verbose ones over outright deletion.`;
|
|
256
329
|
}
|
|
257
330
|
|
|
258
331
|
// Format distillations for injection into the message context.
|
|
@@ -286,10 +359,9 @@ export function formatDistillations(
|
|
|
286
359
|
return sections.join("\n\n");
|
|
287
360
|
}
|
|
288
361
|
|
|
289
|
-
//
|
|
290
|
-
// Consistent with gradient.ts: ~4 chars per token.
|
|
362
|
+
// ~3 chars per token — validated as best heuristic against real API data.
|
|
291
363
|
function estimateTokens(text: string): number {
|
|
292
|
-
return Math.ceil(text.length /
|
|
364
|
+
return Math.ceil(text.length / 3);
|
|
293
365
|
}
|
|
294
366
|
|
|
295
367
|
export function formatKnowledge(
|
package/src/temporal.ts
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import { db, ensureProject } from "./db";
|
|
2
2
|
import type { Message, Part } from "@opencode-ai/sdk";
|
|
3
3
|
|
|
4
|
-
//
|
|
4
|
+
// ~3 chars per token — validated as best heuristic against real API data.
|
|
5
5
|
function estimate(text: string): number {
|
|
6
|
-
return Math.ceil(text.length /
|
|
6
|
+
return Math.ceil(text.length / 3);
|
|
7
7
|
}
|
|
8
8
|
|
|
9
9
|
function partsToText(parts: Part[]): string {
|