opencode-lore 0.4.0 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "opencode-lore",
3
- "version": "0.4.0",
3
+ "version": "0.4.2",
4
4
  "type": "module",
5
5
  "license": "MIT",
6
6
  "description": "Three-tier memory architecture for OpenCode — distillation, not summarization",
@@ -18,9 +18,20 @@ import { serialize, inline, h, ul, liph, strong, t, root, unescapeMarkdown } fro
18
18
  // ---------------------------------------------------------------------------
19
19
 
20
20
  export const LORE_SECTION_START =
21
- "<!-- This section is auto-maintained by lore (https://github.com/BYK/opencode-lore) -->";
21
+ "<!-- This section is maintained by the coding agent via lore (https://github.com/BYK/opencode-lore) -->";
22
22
  export const LORE_SECTION_END = "<!-- End lore-managed section -->";
23
23
 
24
+ /**
25
+ * All known start-marker variants, ordered newest-first.
26
+ * When we renamed the marker in the past, old files kept the old text.
27
+ * splitFile() matches any of these so it can strip all lore sections
28
+ * regardless of which marker version was used to write them.
29
+ */
30
+ const ALL_START_MARKERS = [
31
+ LORE_SECTION_START,
32
+ "<!-- This section is auto-maintained by lore (https://github.com/BYK/opencode-lore) -->",
33
+ ] as const;
34
+
24
35
  /** Regex matching a valid UUID (v4 or v7) — 8-4-4-4-12 hex groups. */
25
36
  const UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/;
26
37
 
@@ -45,26 +56,70 @@ export type ParsedFileEntry = {
45
56
 
46
57
  /**
47
58
  * Split file content into three parts: before, lore section body, after.
48
- * Returns null for section body when markers are absent.
59
+ * Returns null for section body when no lore markers are found.
60
+ *
61
+ * Handles multiple lore sections (from duplication bugs) and all known
62
+ * start-marker variants (old + new text) by:
63
+ * - Collecting every lore section span in the file
64
+ * - Returning `before` = content before the first section
65
+ * - Returning `after` = content after the last section (all intermediate
66
+ * sections are discarded)
67
+ * - Returning `section` = body of the first section found (for import
68
+ * and shouldImport to read the canonical content)
69
+ *
70
+ * This is self-healing: a file with N duplicate sections will be collapsed
71
+ * to exactly one on the next exportToFile() call.
49
72
  */
50
73
  function splitFile(fileContent: string): {
51
74
  before: string;
52
75
  section: string | null;
53
76
  after: string;
54
77
  } {
55
- const startIdx = fileContent.indexOf(LORE_SECTION_START);
56
- const endIdx = fileContent.indexOf(LORE_SECTION_END);
78
+ // Collect every lore section span in the file, matching all known
79
+ // start-marker variants (current + historical renamed markers).
80
+ // Each span records: where the section body begins/ends and where the
81
+ // full span (including end-marker) ends.
82
+ type Span = { markerStart: number; bodyStart: number; bodyEnd: number; spanEnd: number };
83
+ const spans: Span[] = [];
84
+
85
+ let searchFrom = 0;
86
+ while (searchFrom < fileContent.length) {
87
+ // Find the earliest occurrence of any known start marker
88
+ let markerStart = -1;
89
+ let markerLen = 0;
90
+ for (const marker of ALL_START_MARKERS) {
91
+ const idx = fileContent.indexOf(marker, searchFrom);
92
+ if (idx !== -1 && (markerStart === -1 || idx < markerStart)) {
93
+ markerStart = idx;
94
+ markerLen = marker.length;
95
+ }
96
+ }
97
+ if (markerStart === -1) break; // no more start markers
98
+
99
+ const bodyStart = markerStart + markerLen;
100
+ const endIdx = fileContent.indexOf(LORE_SECTION_END, bodyStart);
101
+ if (endIdx === -1) {
102
+ // Unclosed section — consume to EOF
103
+ spans.push({ markerStart, bodyStart, bodyEnd: fileContent.length, spanEnd: fileContent.length });
104
+ break;
105
+ }
57
106
 
58
- if (startIdx === -1 || endIdx === -1 || endIdx < startIdx) {
107
+ spans.push({ markerStart, bodyStart, bodyEnd: endIdx, spanEnd: endIdx + LORE_SECTION_END.length });
108
+ searchFrom = endIdx + LORE_SECTION_END.length;
109
+ }
110
+
111
+ if (spans.length === 0) {
59
112
  return { before: fileContent, section: null, after: "" };
60
113
  }
61
114
 
62
- const before = fileContent.slice(0, startIdx);
63
- const section = fileContent.slice(
64
- startIdx + LORE_SECTION_START.length,
65
- endIdx,
66
- );
67
- const after = fileContent.slice(endIdx + LORE_SECTION_END.length);
115
+ // before = everything before the first lore section (start marker not included)
116
+ // section = body of the first section (used by shouldImport and importFromFile)
117
+ // after = everything after the LAST lore section's end marker
118
+ // Any intermediate duplicate sections are discarded.
119
+ const before = fileContent.slice(0, spans[0].markerStart);
120
+ const section = fileContent.slice(spans[0].bodyStart, spans[0].bodyEnd);
121
+ const after = fileContent.slice(spans[spans.length - 1].spanEnd);
122
+
68
123
  return { before, section, after };
69
124
  }
70
125
 
@@ -153,7 +153,7 @@ function storeDistillation(input: {
153
153
  const pid = ensureProject(input.projectPath);
154
154
  const id = crypto.randomUUID();
155
155
  const sourceJson = JSON.stringify(input.sourceIDs);
156
- const tokens = Math.ceil(input.observations.length / 4);
156
+ const tokens = Math.ceil(input.observations.length / 3);
157
157
  db()
158
158
  .query(
159
159
  `INSERT INTO distillations (id, project_id, session_id, narrative, facts, observations, source_ids, generation, token_count, created_at)
package/src/gradient.ts CHANGED
@@ -6,9 +6,12 @@ import { normalize } from "./markdown";
6
6
 
7
7
  type MessageWithParts = { info: Message; parts: Part[] };
8
8
 
9
- // Rough token estimate: ~4 chars per token
9
+ // Token estimate: ~3 chars per token. Validated against real API data across
10
+ // 200+ turn-pairs: chars/3 gives ~1.68x ratio (actual/estimate), best among
11
+ // heuristics tested. The gap is overhead (system prompt, tool definitions,
12
+ // conversation structure) which calibratedOverhead captures via EMA.
10
13
  function estimate(text: string): number {
11
- return Math.ceil(text.length / 4);
14
+ return Math.ceil(text.length / 3);
12
15
  }
13
16
 
14
17
  function estimateParts(parts: Part[]): number {
@@ -70,6 +73,8 @@ type SessionState = {
70
73
  lastWindowMessageIDs: Set<string>;
71
74
  /** One-shot force escalation: skip layers below this on the next transform() */
72
75
  forceMinLayer: SafetyLayer;
76
+ /** Token estimate from the most recent transform() output (compressed window) */
77
+ lastTransformEstimate: number;
73
78
  /** Distilled prefix cache (Approach C) */
74
79
  prefixCache: PrefixCache | null;
75
80
  /** Raw window pin cache (Approach B) */
@@ -85,6 +90,7 @@ function makeSessionState(): SessionState {
85
90
  lastLayer: 0,
86
91
  lastWindowMessageIDs: new Set(),
87
92
  forceMinLayer: 0,
93
+ lastTransformEstimate: 0,
88
94
  prefixCache: null,
89
95
  rawWindowCache: null,
90
96
  };
@@ -139,22 +145,36 @@ export function getLtmBudget(ltmFraction: number): number {
139
145
  }
140
146
 
141
147
  // Called after each assistant message completes with real token usage data.
142
- // actualInput = tokens.input + tokens.cache.read (all tokens the model saw)
143
- // messageEstimate = our chars/4 estimate of the messages we sent
148
+ // actualInput = tokens.input + tokens.cache.read + tokens.cache.write
144
149
  // sessionID = session that produced this response (for exact-tracking validity)
145
150
  // messageCount = number of messages that were sent (for delta estimation)
151
+ //
152
+ // Overhead calibration uses lastTransformEstimate (the token estimate from the
153
+ // compressed window that was actually sent to the model) instead of re-estimating
154
+ // all session messages. On compressed sessions, all-message estimate >> actualInput,
155
+ // which clamped overhead to 0 and broke budget calculations.
146
156
  export function calibrate(
147
157
  actualInput: number,
148
- messageEstimate: number,
149
158
  sessionID?: string,
150
159
  messageCount?: number,
151
160
  ) {
161
+ // Use the transform's own estimate for the compressed window it produced.
162
+ // This is the correct baseline: it estimates the same messages the model saw.
163
+ const messageEstimate = sessionID
164
+ ? getSessionState(sessionID).lastTransformEstimate
165
+ : 0;
166
+
152
167
  // Update global overhead calibration (shared across sessions — model-level).
153
- const overhead = Math.max(0, actualInput - messageEstimate);
154
- calibratedOverhead =
155
- calibratedOverhead === null
156
- ? overhead
157
- : Math.round(calibratedOverhead * 0.7 + overhead * 0.3);
168
+ // Skip when actualInput > 0 but no transform estimate exists yet (no baseline
169
+ // to compare against). Allow when both are 0 (test setup to zero overhead) or
170
+ // when we have a real transform estimate.
171
+ if (messageEstimate > 0 || actualInput === 0) {
172
+ const overhead = Math.max(0, actualInput - messageEstimate);
173
+ calibratedOverhead =
174
+ calibratedOverhead === null
175
+ ? overhead
176
+ : Math.round(calibratedOverhead * 0.7 + overhead * 0.3);
177
+ }
158
178
 
159
179
  // Store per-session exact counts for the proactive layer 0 decision.
160
180
  if (sessionID !== undefined) {
@@ -800,20 +820,20 @@ function transformInner(input: {
800
820
 
801
821
  // --- Approach A: Cache-preserving passthrough ---
802
822
  // Use exact token count from the previous API response when available.
803
- // Only the delta (messages added since last call) uses chars/4 estimation,
804
- // making the layer-0 decision 99%+ accurate from the API's own tokenizer.
823
+ // Only the delta (messages added since last call) uses chars/3 estimation,
824
+ // making the layer-0 decision highly accurate from the API's own tokenizer.
805
825
  // maxInput = absolute ceiling the API enforces: input_tokens + max_tokens <= context
806
826
  const maxInput = contextLimit - outputReserved;
807
827
 
808
828
  // True when we have real API token data from a previous turn in this session.
809
- // When false (first turn / session change), chars/4 estimates can undercount by
810
- // up to 1.8x — so tryFit output must be validated with a safety multiplier before
811
- // being used, to prevent sending an apparently-fitting window that actually overflows.
829
+ // When false (first turn / session change), chars/3 estimates may still diverge
830
+ // from the real tokenizer — so tryFit output must be validated with a safety
831
+ // multiplier before being used.
812
832
  const calibrated = sessState.lastKnownInput > 0;
813
833
 
814
834
  // On uncalibrated turns, apply this multiplier to tryFit's estimated total to
815
- // approximate the real token count. 1.5 is conservative but not so aggressive
816
- // that it forces layer 4 on modestly-sized sessions.
835
+ // approximate the real token count. chars/3 undercounts by ~1.68x on real data,
836
+ // but overhead EMA captures most of the gap. 1.5 provides a safe margin.
817
837
  const UNCALIBRATED_SAFETY = 1.5;
818
838
 
819
839
  // Returns true if the tryFit result is safe to use: either we have calibrated
@@ -830,7 +850,7 @@ function transformInner(input: {
830
850
  // Prevents the calibration oscillation: a compressed turn stores
831
851
  // lastKnownInput=100K for a 50-message window, but the next turn's
832
852
  // input.messages has 300 raw messages. The delta estimation treats the 250
833
- // evicted messages as "new" and undercounts them via chars/4, producing an
853
+ // evicted messages as "new" and undercounts their tokens, producing an
834
854
  // expectedInput that fits in layer 0 — but the actual tokens are ~190K.
835
855
  // Only applied when calibrated (same session, per-session state) to avoid
836
856
  // affecting other sessions including worker sessions.
@@ -851,7 +871,7 @@ function transformInner(input: {
851
871
  const ltmDelta = ltmTokens - sessState.lastKnownLtm;
852
872
  expectedInput = sessState.lastKnownInput + newMsgTokens + ltmDelta;
853
873
  } else {
854
- // First turn or session change: fall back to chars/4 + overhead.
874
+ // First turn or session change: fall back to chars/3 estimate + overhead.
855
875
  const messageTokens = input.messages.reduce((s, m) => s + estimateMessage(m), 0);
856
876
  expectedInput = messageTokens + overhead + ltmTokens;
857
877
  }
@@ -1009,6 +1029,7 @@ export function transform(input: {
1009
1029
  if (sid) {
1010
1030
  const state = getSessionState(sid);
1011
1031
  state.lastTransformedCount = result.messages.length;
1032
+ state.lastTransformEstimate = result.totalTokens;
1012
1033
  state.lastLayer = result.layer;
1013
1034
  state.lastWindowMessageIDs = new Set(result.messages.map((m) => m.info.id));
1014
1035
  }
package/src/index.ts CHANGED
@@ -10,7 +10,6 @@ import {
10
10
  setModelLimits,
11
11
  needsUrgentDistillation,
12
12
  calibrate,
13
- estimateMessages,
14
13
  setLtmTokens,
15
14
  getLtmBudget,
16
15
  setForceMinLayer,
@@ -204,28 +203,15 @@ export const LorePlugin: Plugin = async (ctx) => {
204
203
  backgroundDistill(msg.sessionID);
205
204
  }
206
205
 
207
- // Calibrate overhead estimate using real token counts.
208
- // Also store the exact input count + message count for the proactive
209
- // layer-0 decision (avoids full chars/4 re-estimation each turn).
210
- // actualInput = all tokens the model processed as input, regardless of
211
- // whether they were new (input), read from cache (cache.read), or newly
212
- // written to cache (cache.write). All three contribute to the context window.
213
- const allMsgs = await ctx.client.session.messages({
214
- path: { id: msg.sessionID },
215
- });
216
- if (allMsgs.data) {
217
- const withParts = allMsgs.data
218
- .filter((m) => m.info.id !== msg.id)
219
- .map((m) => ({ info: m.info, parts: m.parts }));
220
- const msgEstimate = estimateMessages(withParts);
221
- const actualInput =
222
- msg.tokens.input + msg.tokens.cache.read + msg.tokens.cache.write;
223
- // Use the compressed message count (from the last transform output),
224
- // not the total DB count. On layer 0 these are equal. On layers 1-4,
225
- // the model only saw the compressed window — calibrate must track that
226
- // count so the next turn's delta is computed correctly.
227
- calibrate(actualInput, msgEstimate, msg.sessionID, getLastTransformedCount(msg.sessionID) || withParts.length);
228
- }
206
+ // Calibrate overhead using real token counts from the API response.
207
+ // actualInput = all tokens the model processed (input + cache.read + cache.write).
208
+ // The message estimate comes from the transform's own output (stored in
209
+ // session state as lastTransformEstimate), NOT from re-estimating all session
210
+ // messages. On compressed sessions, all-message estimate >> actualInput, which
211
+ // previously clamped overhead to 0 and broke budget calculations.
212
+ const actualInput =
213
+ msg.tokens.input + msg.tokens.cache.read + msg.tokens.cache.write;
214
+ calibrate(actualInput, msg.sessionID, getLastTransformedCount(msg.sessionID));
229
215
  }
230
216
  }
231
217
  } catch {
@@ -394,13 +380,24 @@ export const LorePlugin: Plugin = async (ctx) => {
394
380
  if (formatted) {
395
381
  // Track how many tokens we actually consumed so the gradient manager
396
382
  // can deduct them from the usable budget for message injection.
397
- // Use /3 (not /4) — consistent with ltm.ts and prompt.ts estimators.
398
383
  const ltmTokenCount = Math.ceil(formatted.length / 3);
399
384
  setLtmTokens(ltmTokenCount);
400
385
  output.system.push(formatted);
401
386
  } else {
402
387
  setLtmTokens(0);
403
388
  }
389
+
390
+ // Remind the agent to include the agents file in commits.
391
+ // It is always modified after the lore export runs (post-session) so it
392
+ // appears as unstaged when the agent goes to commit — the agent must not
393
+ // skip it just because it looks auto-generated.
394
+ if (cfg.agentsFile.enabled) {
395
+ output.system.push(
396
+ `When making git commits, always check if ${cfg.agentsFile.path} has ` +
397
+ `unstaged changes and include it in the commit. This file contains ` +
398
+ `shared project knowledge managed by lore and must be version-controlled.`,
399
+ );
400
+ }
404
401
  },
405
402
 
406
403
  // Transform message history: distilled prefix + raw recent.
package/src/ltm.ts CHANGED
@@ -2,8 +2,7 @@ import { uuidv7 } from "uuidv7";
2
2
  import { db, ensureProject } from "./db";
3
3
  import { ftsQuery } from "./temporal";
4
4
 
5
- // Rough token estimate: ~3 chars per token (conservative for markdown-heavy technical text;
6
- // real tokenization of code terms and special chars runs ~3.0-3.5 chars/token, not 4).
5
+ // ~3 chars per token validated as best heuristic against real API data.
7
6
  function estimateTokens(text: string): number {
8
7
  return Math.ceil(text.length / 3);
9
8
  }
package/src/prompt.ts CHANGED
@@ -359,8 +359,7 @@ export function formatDistillations(
359
359
  return sections.join("\n\n");
360
360
  }
361
361
 
362
- // Rough token estimate used for budget-gating knowledge entries.
363
- // Uses ~3 chars/token (conservative for markdown-heavy technical text).
362
+ // ~3 chars per token validated as best heuristic against real API data.
364
363
  function estimateTokens(text: string): number {
365
364
  return Math.ceil(text.length / 3);
366
365
  }
package/src/temporal.ts CHANGED
@@ -1,9 +1,9 @@
1
1
  import { db, ensureProject } from "./db";
2
2
  import type { Message, Part } from "@opencode-ai/sdk";
3
3
 
4
- // Estimate token count from text length (rough: 1 token 4 chars)
4
+ // ~3 chars per token validated as best heuristic against real API data.
5
5
  function estimate(text: string): number {
6
- return Math.ceil(text.length / 4);
6
+ return Math.ceil(text.length / 3);
7
7
  }
8
8
 
9
9
  function partsToText(parts: Part[]): string {