opencode-lore 0.2.5 → 0.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/gradient.ts +46 -6
- package/src/index.ts +16 -13
- package/src/ltm.ts +96 -70
package/package.json
CHANGED
package/src/gradient.ts
CHANGED
|
@@ -63,6 +63,26 @@ export function getLastTransformedCount(): number {
|
|
|
63
63
|
return lastTransformedCount;
|
|
64
64
|
}
|
|
65
65
|
|
|
66
|
+
/** Returns the layer used by the most recent transform() call. For testing. */
|
|
67
|
+
export function getLastLayer(): SafetyLayer {
|
|
68
|
+
return lastLayer;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// The layer used by the most recent transform() call.
|
|
72
|
+
// Used for the sticky-layer guard: once gradient mode activates (layer >= 1),
|
|
73
|
+
// we don't allow fallback to layer 0 until the session genuinely shrinks
|
|
74
|
+
// (e.g. after compaction). This prevents the calibration oscillation where a
|
|
75
|
+
// compressed turn records 100K + 50-msg count, and the next turn's delta
|
|
76
|
+
// estimation treats 250 evicted messages as "new", undercounts their tokens
|
|
77
|
+
// via chars/4, and incorrectly concludes layer 0 passes.
|
|
78
|
+
let lastLayer: SafetyLayer = 0;
|
|
79
|
+
|
|
80
|
+
// The set of message IDs included in the most recent transform() output.
|
|
81
|
+
// Used for accurate delta estimation: instead of counting messages by index
|
|
82
|
+
// (which breaks after compression changes the window), we identify exactly
|
|
83
|
+
// which messages are genuinely new since the last window.
|
|
84
|
+
let lastWindowMessageIDs: Set<string> = new Set();
|
|
85
|
+
|
|
66
86
|
// --- Force escalation ---
|
|
67
87
|
// Set when the API returns "prompt is too long" — forces the transform to skip
|
|
68
88
|
// layer 0 (and optionally layer 1) on the next call to ensure the context is
|
|
@@ -153,6 +173,8 @@ export function resetCalibration() {
|
|
|
153
173
|
lastKnownMessageCount = 0;
|
|
154
174
|
lastTransformedCount = 0;
|
|
155
175
|
forceMinLayer = 0;
|
|
176
|
+
lastLayer = 0;
|
|
177
|
+
lastWindowMessageIDs = new Set();
|
|
156
178
|
}
|
|
157
179
|
|
|
158
180
|
type Distillation = {
|
|
@@ -724,7 +746,7 @@ function transformInner(input: {
|
|
|
724
746
|
// When the API previously rejected with "prompt is too long", skip layers
|
|
725
747
|
// below the forced minimum to ensure enough trimming on the next attempt.
|
|
726
748
|
// One-shot: consumed here and reset to 0.
|
|
727
|
-
|
|
749
|
+
let effectiveMinLayer = forceMinLayer;
|
|
728
750
|
forceMinLayer = 0;
|
|
729
751
|
|
|
730
752
|
// --- Approach A: Cache-preserving passthrough ---
|
|
@@ -754,13 +776,29 @@ function transformInner(input: {
|
|
|
754
776
|
return result.totalTokens * UNCALIBRATED_SAFETY <= maxInput;
|
|
755
777
|
}
|
|
756
778
|
|
|
779
|
+
// --- Sticky layer guard (Option C) ---
|
|
780
|
+
// After a compressed turn (layer >= 1), don't allow layer 0 re-entry until
|
|
781
|
+
// the session genuinely shrinks (e.g. after compaction deletes messages).
|
|
782
|
+
// Prevents the calibration oscillation: a compressed turn stores
|
|
783
|
+
// lastKnownInput=100K for a 50-message window, but the next turn's
|
|
784
|
+
// input.messages has 300 raw messages. The delta estimation treats the 250
|
|
785
|
+
// evicted messages as "new" and undercounts them via chars/4, producing an
|
|
786
|
+
// expectedInput that fits in layer 0 — but the actual tokens are ~190K.
|
|
787
|
+
// Only applied when calibrated (same session) to avoid affecting other sessions.
|
|
788
|
+
if (calibrated && lastLayer >= 1 && input.messages.length >= lastKnownMessageCount) {
|
|
789
|
+
effectiveMinLayer = Math.max(effectiveMinLayer, 1) as SafetyLayer;
|
|
790
|
+
}
|
|
791
|
+
|
|
757
792
|
let expectedInput: number;
|
|
758
793
|
if (calibrated) {
|
|
759
|
-
// Exact approach: prior API count + estimate of only
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
794
|
+
// Exact approach: prior API count + estimate of only genuinely new messages.
|
|
795
|
+
// Use message ID tracking (Option B) to identify new messages accurately.
|
|
796
|
+
// After compression, the "last window" is a subset of the full message array —
|
|
797
|
+
// counting by index would treat evicted messages as new (off-by-250 error).
|
|
798
|
+
const newMessages = lastWindowMessageIDs.size > 0
|
|
799
|
+
? input.messages.filter((m) => !lastWindowMessageIDs.has(m.info.id))
|
|
800
|
+
: input.messages.slice(-Math.max(0, input.messages.length - lastKnownMessageCount));
|
|
801
|
+
const newMsgTokens = newMessages.reduce((s, m) => s + estimateMessage(m), 0);
|
|
764
802
|
const ltmDelta = ltmTokens - lastKnownLtm;
|
|
765
803
|
expectedInput = lastKnownInput + newMsgTokens + ltmDelta;
|
|
766
804
|
} else {
|
|
@@ -918,6 +956,8 @@ export function transform(input: {
|
|
|
918
956
|
}): TransformResult {
|
|
919
957
|
const result = transformInner(input);
|
|
920
958
|
lastTransformedCount = result.messages.length;
|
|
959
|
+
lastLayer = result.layer;
|
|
960
|
+
lastWindowMessageIDs = new Set(result.messages.map((m) => m.info.id));
|
|
921
961
|
return result;
|
|
922
962
|
}
|
|
923
963
|
|
package/src/index.ts
CHANGED
|
@@ -395,28 +395,31 @@ export const LorePlugin: Plugin = async (ctx) => {
|
|
|
395
395
|
// so the append-only sequence stays intact for prompt caching.
|
|
396
396
|
if (result.layer > 0) {
|
|
397
397
|
// The API requires the conversation to end with a user message.
|
|
398
|
-
// Drop trailing
|
|
399
|
-
//
|
|
400
|
-
//
|
|
401
|
-
//
|
|
398
|
+
// Drop trailing pure-text assistant messages (no tool parts), which would
|
|
399
|
+
// cause an Anthropic "does not support assistant message prefill" error.
|
|
400
|
+
//
|
|
401
|
+
// Crucially, assistant messages that contain tool parts (completed OR pending)
|
|
402
|
+
// must NOT be dropped:
|
|
403
|
+
// - Completed tool parts: OpenCode's SDK converts these into tool_result blocks
|
|
404
|
+
// sent as user-role messages at the API level. The conversation already ends
|
|
405
|
+
// with a user message — dropping would strip the entire current agentic turn
|
|
406
|
+
// and cause an infinite tool-call loop (the model restarts from scratch).
|
|
407
|
+
// - Pending tool parts: the tool call hasn't returned yet; dropping would make
|
|
408
|
+
// the model re-issue the same tool call on the next turn.
|
|
402
409
|
while (
|
|
403
410
|
result.messages.length > 0 &&
|
|
404
411
|
result.messages.at(-1)!.info.role !== "user"
|
|
405
412
|
) {
|
|
406
413
|
const last = result.messages.at(-1)!;
|
|
407
|
-
const
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
console.error(
|
|
412
|
-
"[lore] WARN: cannot drop trailing assistant message with pending tool call — may cause prefill error. id:",
|
|
413
|
-
last.info.id,
|
|
414
|
-
);
|
|
414
|
+
const hasToolParts = last.parts.some((p) => p.type === "tool");
|
|
415
|
+
if (hasToolParts) {
|
|
416
|
+
// Tool parts → tool_result (user-role) at the API level → no prefill error.
|
|
417
|
+
// Stop dropping; the conversation ends correctly as-is.
|
|
415
418
|
break;
|
|
416
419
|
}
|
|
417
420
|
const dropped = result.messages.pop()!;
|
|
418
421
|
console.error(
|
|
419
|
-
"[lore] WARN: dropping trailing",
|
|
422
|
+
"[lore] WARN: dropping trailing pure-text",
|
|
420
423
|
dropped.info.role,
|
|
421
424
|
"message to prevent prefill error. id:",
|
|
422
425
|
dropped.info.id,
|
package/src/ltm.ts
CHANGED
|
@@ -135,19 +135,72 @@ export function forProject(
|
|
|
135
135
|
.all(pid) as KnowledgeEntry[];
|
|
136
136
|
}
|
|
137
137
|
|
|
138
|
+
type Scored = { entry: KnowledgeEntry; score: number };
|
|
139
|
+
|
|
140
|
+
/** Max entries per pool to include on first turn when no session context exists. */
|
|
141
|
+
const NO_CONTEXT_FALLBACK_CAP = 10;
|
|
142
|
+
|
|
143
|
+
/** Number of top-confidence project entries always included as a safety net,
|
|
144
|
+
* even when they don't match any session context terms. This guards against
|
|
145
|
+
* the coarse term-overlap scoring accidentally excluding important project
|
|
146
|
+
* knowledge. */
|
|
147
|
+
const PROJECT_SAFETY_NET = 5;
|
|
148
|
+
|
|
149
|
+
/**
|
|
150
|
+
* Score entries by term overlap with session context.
|
|
151
|
+
* Returns score = (fraction of topTerms matched) * entry.confidence.
|
|
152
|
+
*/
|
|
153
|
+
function scoreEntries(
|
|
154
|
+
entries: KnowledgeEntry[],
|
|
155
|
+
topTerms: string[],
|
|
156
|
+
): Scored[] {
|
|
157
|
+
return entries.map((entry) => {
|
|
158
|
+
const haystack =
|
|
159
|
+
(entry.title + " " + entry.content).replace(/[^\w\s]/g, " ").toLowerCase();
|
|
160
|
+
let hits = 0;
|
|
161
|
+
for (const term of topTerms) {
|
|
162
|
+
if (haystack.includes(term)) hits++;
|
|
163
|
+
}
|
|
164
|
+
const relevance = topTerms.length > 0 ? hits / topTerms.length : 0;
|
|
165
|
+
return { entry, score: relevance * entry.confidence };
|
|
166
|
+
});
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
/**
|
|
170
|
+
* Extract the top 30 meaningful terms (>3 chars) from text, sorted by frequency.
|
|
171
|
+
*/
|
|
172
|
+
function extractTopTerms(text: string): string[] {
|
|
173
|
+
const freq = text
|
|
174
|
+
.replace(/[^\w\s]/g, " ")
|
|
175
|
+
.toLowerCase()
|
|
176
|
+
.split(/\s+/)
|
|
177
|
+
.filter((w) => w.length > 3)
|
|
178
|
+
.reduce<Map<string, number>>((acc, w) => {
|
|
179
|
+
acc.set(w, (acc.get(w) ?? 0) + 1);
|
|
180
|
+
return acc;
|
|
181
|
+
}, new Map());
|
|
182
|
+
|
|
183
|
+
return [...freq.entries()]
|
|
184
|
+
.sort((a, b) => b[1] - a[1])
|
|
185
|
+
.slice(0, 30)
|
|
186
|
+
.map(([w]) => w);
|
|
187
|
+
}
|
|
188
|
+
|
|
138
189
|
/**
|
|
139
190
|
* Build a relevance-ranked, budget-capped list of knowledge entries for injection
|
|
140
191
|
* into the system prompt of a live session.
|
|
141
192
|
*
|
|
142
193
|
* Strategy:
|
|
143
|
-
* 1.
|
|
144
|
-
*
|
|
145
|
-
* 2.
|
|
146
|
-
*
|
|
147
|
-
*
|
|
148
|
-
*
|
|
149
|
-
*
|
|
150
|
-
*
|
|
194
|
+
* 1. Both project-specific and cross-project entries are scored for relevance
|
|
195
|
+
* against recent session context (last distillation + recent raw messages).
|
|
196
|
+
* 2. Project entries get a safety net: the top PROJECT_SAFETY_NET entries by
|
|
197
|
+
* confidence are always included even if they have zero relevance score.
|
|
198
|
+
* This ensures the most important project knowledge is never lost to
|
|
199
|
+
* coarse term-overlap scoring.
|
|
200
|
+
* 3. All scored entries are merged into a single pool and greedily packed
|
|
201
|
+
* into the token budget by score descending.
|
|
202
|
+
* 4. If there's no session context yet (first turn), fall back to top entries
|
|
203
|
+
* by confidence only (capped at NO_CONTEXT_FALLBACK_CAP per pool).
|
|
151
204
|
*
|
|
152
205
|
* @param projectPath Current project path
|
|
153
206
|
* @param sessionID Current session ID (for context extraction)
|
|
@@ -160,7 +213,7 @@ export function forSession(
|
|
|
160
213
|
): KnowledgeEntry[] {
|
|
161
214
|
const pid = ensureProject(projectPath);
|
|
162
215
|
|
|
163
|
-
// --- 1. Load project-specific entries
|
|
216
|
+
// --- 1. Load project-specific entries ---
|
|
164
217
|
const projectEntries = db()
|
|
165
218
|
.query(
|
|
166
219
|
`SELECT * FROM knowledge
|
|
@@ -181,7 +234,6 @@ export function forSession(
|
|
|
181
234
|
if (!crossEntries.length && !projectEntries.length) return [];
|
|
182
235
|
|
|
183
236
|
// --- 3. Build session context for relevance scoring ---
|
|
184
|
-
// Combine the most recent distillation text + last ~10 raw messages for this session
|
|
185
237
|
let sessionContext = "";
|
|
186
238
|
if (sessionID) {
|
|
187
239
|
const distRow = db()
|
|
@@ -206,79 +258,53 @@ export function forSession(
|
|
|
206
258
|
}
|
|
207
259
|
}
|
|
208
260
|
|
|
209
|
-
// --- 4. Score
|
|
210
|
-
|
|
211
|
-
type Scored = { entry: KnowledgeEntry; score: number };
|
|
261
|
+
// --- 4. Score both pools by relevance ---
|
|
262
|
+
let scoredProject: Scored[];
|
|
212
263
|
let scoredCross: Scored[];
|
|
213
264
|
|
|
214
265
|
if (sessionContext.trim().length > 20) {
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
scoredCross = crossEntries.
|
|
233
|
-
const haystack =
|
|
234
|
-
(entry.title + " " + entry.content).replace(/[^\w\s]/g, " ").toLowerCase();
|
|
235
|
-
let hits = 0;
|
|
236
|
-
for (const term of topTerms) {
|
|
237
|
-
// Count how many context terms appear in this entry (simple overlap)
|
|
238
|
-
if (haystack.includes(term)) hits++;
|
|
239
|
-
}
|
|
240
|
-
// Score = fraction of top terms matched, weighted by confidence
|
|
241
|
-
const relevance = topTerms.length > 0 ? hits / topTerms.length : 0;
|
|
242
|
-
return { entry, score: relevance * entry.confidence };
|
|
243
|
-
});
|
|
244
|
-
|
|
245
|
-
// Only keep entries with at least one term match
|
|
246
|
-
scoredCross = scoredCross.filter((s) => s.score > 0);
|
|
266
|
+
const topTerms = extractTopTerms(sessionContext);
|
|
267
|
+
|
|
268
|
+
// Score project entries — include matched + safety net of top-N by confidence
|
|
269
|
+
const rawScored = scoreEntries(projectEntries, topTerms);
|
|
270
|
+
const matched = rawScored.filter((s) => s.score > 0);
|
|
271
|
+
const matchedIds = new Set(matched.map((s) => s.entry.id));
|
|
272
|
+
|
|
273
|
+
// Safety net: top PROJECT_SAFETY_NET entries by confidence that weren't already matched.
|
|
274
|
+
// Given a tiny score (0.001 * confidence) so they sort below genuinely matched entries.
|
|
275
|
+
const safetyNet = projectEntries
|
|
276
|
+
.filter((e) => !matchedIds.has(e.id))
|
|
277
|
+
.slice(0, PROJECT_SAFETY_NET)
|
|
278
|
+
.map((e) => ({ entry: e, score: 0.001 * e.confidence }));
|
|
279
|
+
|
|
280
|
+
scoredProject = [...matched, ...safetyNet];
|
|
281
|
+
|
|
282
|
+
// Score cross-project entries — only include entries with at least one term match
|
|
283
|
+
scoredCross = scoreEntries(crossEntries, topTerms).filter((s) => s.score > 0);
|
|
247
284
|
} else {
|
|
248
|
-
// No session context
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
score: entry.confidence
|
|
252
|
-
|
|
285
|
+
// No session context — fall back to top entries by confidence, capped
|
|
286
|
+
scoredProject = projectEntries
|
|
287
|
+
.slice(0, NO_CONTEXT_FALLBACK_CAP)
|
|
288
|
+
.map((entry) => ({ entry, score: entry.confidence }));
|
|
289
|
+
scoredCross = crossEntries
|
|
290
|
+
.slice(0, NO_CONTEXT_FALLBACK_CAP)
|
|
291
|
+
.map((entry) => ({ entry, score: entry.confidence }));
|
|
253
292
|
}
|
|
254
293
|
|
|
255
|
-
//
|
|
256
|
-
|
|
294
|
+
// --- 5. Merge and pack into token budget by score descending ---
|
|
295
|
+
const allScored = [...scoredProject, ...scoredCross];
|
|
296
|
+
allScored.sort((a, b) => b.score - a.score);
|
|
257
297
|
|
|
258
|
-
|
|
259
|
-
// Project entries get first pick (fully relevant); cross entries fill remaining budget.
|
|
260
|
-
// Use a greedy fit: iterate candidates and include if they fit.
|
|
261
|
-
const HEADER_OVERHEAD_TOKENS = 15; // "## Long-term Knowledge\n"
|
|
298
|
+
const HEADER_OVERHEAD_TOKENS = 15;
|
|
262
299
|
let used = HEADER_OVERHEAD_TOKENS;
|
|
263
300
|
const result: KnowledgeEntry[] = [];
|
|
264
301
|
|
|
265
|
-
|
|
302
|
+
for (const { entry } of allScored) {
|
|
303
|
+
if (used >= maxTokens) break;
|
|
266
304
|
const cost = estimateTokens(entry.title + entry.content) + 10;
|
|
267
|
-
if (used + cost > maxTokens)
|
|
305
|
+
if (used + cost > maxTokens) continue;
|
|
268
306
|
result.push(entry);
|
|
269
307
|
used += cost;
|
|
270
|
-
return true;
|
|
271
|
-
}
|
|
272
|
-
|
|
273
|
-
// Project-specific first
|
|
274
|
-
for (const entry of projectEntries) {
|
|
275
|
-
tryAdd(entry);
|
|
276
|
-
}
|
|
277
|
-
|
|
278
|
-
// Then cross-project by relevance score
|
|
279
|
-
for (const { entry } of scoredCross) {
|
|
280
|
-
if (used >= maxTokens) break;
|
|
281
|
-
tryAdd(entry);
|
|
282
308
|
}
|
|
283
309
|
|
|
284
310
|
return result;
|