opencode-lore 0.2.4 → 0.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/package.json +1 -1
  2. package/src/gradient.ts +95 -21
  3. package/src/ltm.ts +96 -70
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "opencode-lore",
3
- "version": "0.2.4",
3
+ "version": "0.2.7",
4
4
  "type": "module",
5
5
  "license": "MIT",
6
6
  "description": "Three-tier memory architecture for OpenCode — distillation, not summarization",
package/src/gradient.ts CHANGED
@@ -63,6 +63,26 @@ export function getLastTransformedCount(): number {
63
63
  return lastTransformedCount;
64
64
  }
65
65
 
66
+ /** Returns the layer used by the most recent transform() call. For testing. */
67
+ export function getLastLayer(): SafetyLayer {
68
+ return lastLayer;
69
+ }
70
+
71
+ // The layer used by the most recent transform() call.
72
+ // Used for the sticky-layer guard: once gradient mode activates (layer >= 1),
73
+ // we don't allow fallback to layer 0 until the session genuinely shrinks
74
+ // (e.g. after compaction). This prevents the calibration oscillation where a
75
+ // compressed turn records 100K + 50-msg count, and the next turn's delta
76
+ // estimation treats 250 evicted messages as "new", undercounts their tokens
77
+ // via chars/4, and incorrectly concludes layer 0 passes.
78
+ let lastLayer: SafetyLayer = 0;
79
+
80
+ // The set of message IDs included in the most recent transform() output.
81
+ // Used for accurate delta estimation: instead of counting messages by index
82
+ // (which breaks after compression changes the window), we identify exactly
83
+ // which messages are genuinely new since the last window.
84
+ let lastWindowMessageIDs: Set<string> = new Set();
85
+
66
86
  // --- Force escalation ---
67
87
  // Set when the API returns "prompt is too long" — forces the transform to skip
68
88
  // layer 0 (and optionally layer 1) on the next call to ensure the context is
@@ -153,6 +173,8 @@ export function resetCalibration() {
153
173
  lastKnownMessageCount = 0;
154
174
  lastTransformedCount = 0;
155
175
  forceMinLayer = 0;
176
+ lastLayer = 0;
177
+ lastWindowMessageIDs = new Set();
156
178
  }
157
179
 
158
180
  type Distillation = {
@@ -724,7 +746,7 @@ function transformInner(input: {
724
746
  // When the API previously rejected with "prompt is too long", skip layers
725
747
  // below the forced minimum to ensure enough trimming on the next attempt.
726
748
  // One-shot: consumed here and reset to 0.
727
- const effectiveMinLayer = forceMinLayer;
749
+ let effectiveMinLayer = forceMinLayer;
728
750
  forceMinLayer = 0;
729
751
 
730
752
  // --- Approach A: Cache-preserving passthrough ---
@@ -754,13 +776,29 @@ function transformInner(input: {
754
776
  return result.totalTokens * UNCALIBRATED_SAFETY <= maxInput;
755
777
  }
756
778
 
779
+ // --- Sticky layer guard (Option C) ---
780
+ // After a compressed turn (layer >= 1), don't allow layer 0 re-entry until
781
+ // the session genuinely shrinks (e.g. after compaction deletes messages).
782
+ // Prevents the calibration oscillation: a compressed turn stores
783
+ // lastKnownInput=100K for a 50-message window, but the next turn's
784
+ // input.messages has 300 raw messages. The delta estimation treats the 250
785
+ // evicted messages as "new" and undercounts them via chars/4, producing an
786
+ // expectedInput that fits in layer 0 — but the actual tokens are ~190K.
787
+ // Only applied when calibrated (same session) to avoid affecting other sessions.
788
+ if (calibrated && lastLayer >= 1 && input.messages.length >= lastKnownMessageCount) {
789
+ effectiveMinLayer = Math.max(effectiveMinLayer, 1) as SafetyLayer;
790
+ }
791
+
757
792
  let expectedInput: number;
758
793
  if (calibrated) {
759
- // Exact approach: prior API count + estimate of only the new messages.
760
- const newMsgCount = Math.max(0, input.messages.length - lastKnownMessageCount);
761
- const newMsgTokens = newMsgCount > 0
762
- ? input.messages.slice(-newMsgCount).reduce((s, m) => s + estimateMessage(m), 0)
763
- : 0;
794
+ // Exact approach: prior API count + estimate of only genuinely new messages.
795
+ // Use message ID tracking (Option B) to identify new messages accurately.
796
+ // After compression, the "last window" is a subset of the full message array —
797
+ // counting by index would treat evicted messages as new (off-by-250 error).
798
+ const newMessages = lastWindowMessageIDs.size > 0
799
+ ? input.messages.filter((m) => !lastWindowMessageIDs.has(m.info.id))
800
+ : input.messages.slice(-Math.max(0, input.messages.length - lastKnownMessageCount));
801
+ const newMsgTokens = newMessages.reduce((s, m) => s + estimateMessage(m), 0);
764
802
  const ltmDelta = ltmTokens - lastKnownLtm;
765
803
  expectedInput = lastKnownInput + newMsgTokens + ltmDelta;
766
804
  } else {
@@ -918,6 +956,8 @@ export function transform(input: {
918
956
  }): TransformResult {
919
957
  const result = transformInner(input);
920
958
  lastTransformedCount = result.messages.length;
959
+ lastLayer = result.layer;
960
+ lastWindowMessageIDs = new Set(result.messages.map((m) => m.info.id));
921
961
  return result;
922
962
  }
923
963
 
@@ -926,6 +966,23 @@ export function estimateMessages(messages: MessageWithParts[]): number {
926
966
  return messages.reduce((sum, m) => sum + estimateMessage(m), 0);
927
967
  }
928
968
 
969
+ // Identify the current agentic turn: the last user message plus all subsequent
970
+ // assistant messages that share its ID as parentID. These messages form an atomic
971
+ // unit — the model must see all of them or it will lose track of its own prior
972
+ // tool calls and re-issue them in an infinite loop.
973
+ function currentTurnStart(messages: MessageWithParts[]): number {
974
+ // Find the last user message
975
+ let lastUserIdx = -1;
976
+ for (let i = messages.length - 1; i >= 0; i--) {
977
+ if (messages[i].info.role === "user") {
978
+ lastUserIdx = i;
979
+ break;
980
+ }
981
+ }
982
+ if (lastUserIdx === -1) return 0; // no user message — treat all as current turn
983
+ return lastUserIdx;
984
+ }
985
+
929
986
  function tryFit(input: {
930
987
  messages: MessageWithParts[];
931
988
  prefix: MessageWithParts[];
@@ -939,32 +996,49 @@ function tryFit(input: {
939
996
  if (input.prefixTokens > input.distilledBudget && input.prefix.length > 0)
940
997
  return null;
941
998
 
942
- // Walk backwards through messages, accumulating tokens within raw budget
943
- let rawTokens = 0;
944
- let cutoff = input.messages.length;
999
+ // Identify the current turn (last user message + all following assistant messages).
1000
+ // These are always included — they must never be evicted. If they alone exceed the
1001
+ // raw budget, escalate to the next layer (which strips tool outputs to reduce size).
1002
+ const turnStart = currentTurnStart(input.messages);
1003
+ const currentTurn = input.messages.slice(turnStart);
1004
+ const currentTurnTokens = currentTurn.reduce((s, m) => s + estimateMessage(m), 0);
1005
+
1006
+ if (currentTurnTokens > input.rawBudget) {
1007
+ // Current turn alone exceeds budget — can't fit even with everything else dropped.
1008
+ // Signal failure so the caller escalates to the next layer (tool-output stripping).
1009
+ return null;
1010
+ }
1011
+
1012
+ // Walk backwards through older messages (before the current turn),
1013
+ // filling the remaining budget after reserving space for the current turn.
1014
+ const olderMessages = input.messages.slice(0, turnStart);
1015
+ const remainingBudget = input.rawBudget - currentTurnTokens;
1016
+ let olderTokens = 0;
1017
+ let cutoff = olderMessages.length; // default: include none of the older messages
945
1018
  const protectedTurns = input.protectedTurns ?? 0;
946
- let turns = 0;
947
1019
 
948
- for (let i = input.messages.length - 1; i >= 0; i--) {
949
- const msg = input.messages[i];
950
- if (msg.info.role === "user") turns++;
1020
+ for (let i = olderMessages.length - 1; i >= 0; i--) {
1021
+ const msg = olderMessages[i];
951
1022
  const tokens = estimateMessage(msg);
952
- if (rawTokens + tokens > input.rawBudget) {
1023
+ if (olderTokens + tokens > remainingBudget) {
953
1024
  cutoff = i + 1;
954
1025
  break;
955
1026
  }
956
- rawTokens += tokens;
1027
+ olderTokens += tokens;
957
1028
  if (i === 0) cutoff = 0;
958
1029
  }
959
1030
 
960
- const raw = input.messages.slice(cutoff);
961
- // Must keep at least 1 raw message — otherwise this layer fails
962
- if (!raw.length) return null;
1031
+ const rawMessages = [...olderMessages.slice(cutoff), ...currentTurn];
1032
+ const rawTokens = olderTokens + currentTurnTokens;
963
1033
 
964
- // Apply system-reminder stripping + optional tool output stripping
965
- const processed = raw.map((msg, idx) => {
966
- const fromEnd = raw.length - idx;
1034
+ // Apply system-reminder stripping + optional tool output stripping.
1035
+ // The current turn (end of rawMessages) is always "protected" — never stripped.
1036
+ const currentTurnSet = new Set(currentTurn.map((m) => m.info.id));
1037
+ const processed = rawMessages.map((msg, idx) => {
1038
+ const fromEnd = rawMessages.length - idx;
1039
+ const isCurrentTurn = currentTurnSet.has(msg.info.id);
967
1040
  const isProtected =
1041
+ isCurrentTurn ||
968
1042
  input.strip === "none" ||
969
1043
  (input.strip === "old-tools" && fromEnd <= protectedTurns * 2);
970
1044
  const parts = isProtected
package/src/ltm.ts CHANGED
@@ -135,19 +135,72 @@ export function forProject(
135
135
  .all(pid) as KnowledgeEntry[];
136
136
  }
137
137
 
138
+ type Scored = { entry: KnowledgeEntry; score: number };
139
+
140
+ /** Max entries per pool to include on first turn when no session context exists. */
141
+ const NO_CONTEXT_FALLBACK_CAP = 10;
142
+
143
+ /** Number of top-confidence project entries always included as a safety net,
144
+ * even when they don't match any session context terms. This guards against
145
+ * the coarse term-overlap scoring accidentally excluding important project
146
+ * knowledge. */
147
+ const PROJECT_SAFETY_NET = 5;
148
+
149
+ /**
150
+ * Score entries by term overlap with session context.
151
+ * Returns score = (fraction of topTerms matched) * entry.confidence.
152
+ */
153
+ function scoreEntries(
154
+ entries: KnowledgeEntry[],
155
+ topTerms: string[],
156
+ ): Scored[] {
157
+ return entries.map((entry) => {
158
+ const haystack =
159
+ (entry.title + " " + entry.content).replace(/[^\w\s]/g, " ").toLowerCase();
160
+ let hits = 0;
161
+ for (const term of topTerms) {
162
+ if (haystack.includes(term)) hits++;
163
+ }
164
+ const relevance = topTerms.length > 0 ? hits / topTerms.length : 0;
165
+ return { entry, score: relevance * entry.confidence };
166
+ });
167
+ }
168
+
169
+ /**
170
+ * Extract the top 30 meaningful terms (>3 chars) from text, sorted by frequency.
171
+ */
172
+ function extractTopTerms(text: string): string[] {
173
+ const freq = text
174
+ .replace(/[^\w\s]/g, " ")
175
+ .toLowerCase()
176
+ .split(/\s+/)
177
+ .filter((w) => w.length > 3)
178
+ .reduce<Map<string, number>>((acc, w) => {
179
+ acc.set(w, (acc.get(w) ?? 0) + 1);
180
+ return acc;
181
+ }, new Map());
182
+
183
+ return [...freq.entries()]
184
+ .sort((a, b) => b[1] - a[1])
185
+ .slice(0, 30)
186
+ .map(([w]) => w);
187
+ }
188
+
138
189
  /**
139
190
  * Build a relevance-ranked, budget-capped list of knowledge entries for injection
140
191
  * into the system prompt of a live session.
141
192
  *
142
193
  * Strategy:
143
- * 1. Project-specific entries (project_id = current project, cross_project = 0)
144
- * always get priority they were curated specifically for this codebase.
145
- * 2. Cross-project entries are scored for relevance against recent session context
146
- * (last distillation + recent raw messages). Only entries that match are included.
147
- * 3. All candidates are ranked by score * confidence, then greedily packed into
148
- * the token budget (smallest-first within same score band to maximize count).
149
- * 4. If there's no session context yet (first turn), fall back to top entries by
150
- * confidence only.
194
+ * 1. Both project-specific and cross-project entries are scored for relevance
195
+ * against recent session context (last distillation + recent raw messages).
196
+ * 2. Project entries get a safety net: the top PROJECT_SAFETY_NET entries by
197
+ * confidence are always included even if they have zero relevance score.
198
+ * This ensures the most important project knowledge is never lost to
199
+ * coarse term-overlap scoring.
200
+ * 3. All scored entries are merged into a single pool and greedily packed
201
+ * into the token budget by score descending.
202
+ * 4. If there's no session context yet (first turn), fall back to top entries
203
+ * by confidence only (capped at NO_CONTEXT_FALLBACK_CAP per pool).
151
204
  *
152
205
  * @param projectPath Current project path
153
206
  * @param sessionID Current session ID (for context extraction)
@@ -160,7 +213,7 @@ export function forSession(
160
213
  ): KnowledgeEntry[] {
161
214
  const pid = ensureProject(projectPath);
162
215
 
163
- // --- 1. Load project-specific entries (always relevant) ---
216
+ // --- 1. Load project-specific entries ---
164
217
  const projectEntries = db()
165
218
  .query(
166
219
  `SELECT * FROM knowledge
@@ -181,7 +234,6 @@ export function forSession(
181
234
  if (!crossEntries.length && !projectEntries.length) return [];
182
235
 
183
236
  // --- 3. Build session context for relevance scoring ---
184
- // Combine the most recent distillation text + last ~10 raw messages for this session
185
237
  let sessionContext = "";
186
238
  if (sessionID) {
187
239
  const distRow = db()
@@ -206,79 +258,53 @@ export function forSession(
206
258
  }
207
259
  }
208
260
 
209
- // --- 4. Score cross-project entries by relevance ---
210
- // Use FTS5 matching: extract terms from session context and score each entry
211
- type Scored = { entry: KnowledgeEntry; score: number };
261
+ // --- 4. Score both pools by relevance ---
262
+ let scoredProject: Scored[];
212
263
  let scoredCross: Scored[];
213
264
 
214
265
  if (sessionContext.trim().length > 20) {
215
- // Build a term set from session context (top 30 meaningful words)
216
- const contextTerms = sessionContext
217
- .replace(/[^\w\s]/g, " ")
218
- .toLowerCase()
219
- .split(/\s+/)
220
- .filter((w) => w.length > 3)
221
- .reduce<Map<string, number>>((acc, w) => {
222
- acc.set(w, (acc.get(w) ?? 0) + 1);
223
- return acc;
224
- }, new Map());
225
-
226
- // Sort by frequency, take top 30 terms
227
- const topTerms = [...contextTerms.entries()]
228
- .sort((a, b) => b[1] - a[1])
229
- .slice(0, 30)
230
- .map(([w]) => w);
231
-
232
- scoredCross = crossEntries.map((entry) => {
233
- const haystack =
234
- (entry.title + " " + entry.content).replace(/[^\w\s]/g, " ").toLowerCase();
235
- let hits = 0;
236
- for (const term of topTerms) {
237
- // Count how many context terms appear in this entry (simple overlap)
238
- if (haystack.includes(term)) hits++;
239
- }
240
- // Score = fraction of top terms matched, weighted by confidence
241
- const relevance = topTerms.length > 0 ? hits / topTerms.length : 0;
242
- return { entry, score: relevance * entry.confidence };
243
- });
244
-
245
- // Only keep entries with at least one term match
246
- scoredCross = scoredCross.filter((s) => s.score > 0);
266
+ const topTerms = extractTopTerms(sessionContext);
267
+
268
+ // Score project entries — include matched + safety net of top-N by confidence
269
+ const rawScored = scoreEntries(projectEntries, topTerms);
270
+ const matched = rawScored.filter((s) => s.score > 0);
271
+ const matchedIds = new Set(matched.map((s) => s.entry.id));
272
+
273
+ // Safety net: top PROJECT_SAFETY_NET entries by confidence that weren't already matched.
274
+ // Given a tiny score (0.001 * confidence) so they sort below genuinely matched entries.
275
+ const safetyNet = projectEntries
276
+ .filter((e) => !matchedIds.has(e.id))
277
+ .slice(0, PROJECT_SAFETY_NET)
278
+ .map((e) => ({ entry: e, score: 0.001 * e.confidence }));
279
+
280
+ scoredProject = [...matched, ...safetyNet];
281
+
282
+ // Score cross-project entries — only include entries with at least one term match
283
+ scoredCross = scoreEntries(crossEntries, topTerms).filter((s) => s.score > 0);
247
284
  } else {
248
- // No session context yet take top cross-project entries by confidence
249
- scoredCross = crossEntries.slice(0, 10).map((entry) => ({
250
- entry,
251
- score: entry.confidence,
252
- }));
285
+ // No session context — fall back to top entries by confidence, capped
286
+ scoredProject = projectEntries
287
+ .slice(0, NO_CONTEXT_FALLBACK_CAP)
288
+ .map((entry) => ({ entry, score: entry.confidence }));
289
+ scoredCross = crossEntries
290
+ .slice(0, NO_CONTEXT_FALLBACK_CAP)
291
+ .map((entry) => ({ entry, score: entry.confidence }));
253
292
  }
254
293
 
255
- // Sort cross-project by score desc
256
- scoredCross.sort((a, b) => b.score - a.score);
294
+ // --- 5. Merge and pack into token budget by score descending ---
295
+ const allScored = [...scoredProject, ...scoredCross];
296
+ allScored.sort((a, b) => b.score - a.score);
257
297
 
258
- // --- 5. Pack into token budget ---
259
- // Project entries get first pick (fully relevant); cross entries fill remaining budget.
260
- // Use a greedy fit: iterate candidates and include if they fit.
261
- const HEADER_OVERHEAD_TOKENS = 15; // "## Long-term Knowledge\n"
298
+ const HEADER_OVERHEAD_TOKENS = 15;
262
299
  let used = HEADER_OVERHEAD_TOKENS;
263
300
  const result: KnowledgeEntry[] = [];
264
301
 
265
- function tryAdd(entry: KnowledgeEntry): boolean {
302
+ for (const { entry } of allScored) {
303
+ if (used >= maxTokens) break;
266
304
  const cost = estimateTokens(entry.title + entry.content) + 10;
267
- if (used + cost > maxTokens) return false;
305
+ if (used + cost > maxTokens) continue;
268
306
  result.push(entry);
269
307
  used += cost;
270
- return true;
271
- }
272
-
273
- // Project-specific first
274
- for (const entry of projectEntries) {
275
- tryAdd(entry);
276
- }
277
-
278
- // Then cross-project by relevance score
279
- for (const { entry } of scoredCross) {
280
- if (used >= maxTokens) break;
281
- tryAdd(entry);
282
308
  }
283
309
 
284
310
  return result;