@loreai/core 0.10.2 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. package/dist/bun/config.d.ts +8 -0
  2. package/dist/bun/config.d.ts.map +1 -1
  3. package/dist/bun/db.d.ts.map +1 -1
  4. package/dist/bun/distillation.d.ts +74 -2
  5. package/dist/bun/distillation.d.ts.map +1 -1
  6. package/dist/bun/embedding.d.ts.map +1 -1
  7. package/dist/bun/gradient.d.ts +72 -0
  8. package/dist/bun/gradient.d.ts.map +1 -1
  9. package/dist/bun/index.d.ts +4 -2
  10. package/dist/bun/index.d.ts.map +1 -1
  11. package/dist/bun/index.js +530 -67
  12. package/dist/bun/index.js.map +4 -4
  13. package/dist/bun/prompt.d.ts +8 -2
  14. package/dist/bun/prompt.d.ts.map +1 -1
  15. package/dist/bun/temporal.d.ts +31 -0
  16. package/dist/bun/temporal.d.ts.map +1 -1
  17. package/dist/bun/types.d.ts +9 -0
  18. package/dist/bun/types.d.ts.map +1 -1
  19. package/dist/bun/worker-model.d.ts +90 -0
  20. package/dist/bun/worker-model.d.ts.map +1 -0
  21. package/dist/node/config.d.ts +8 -0
  22. package/dist/node/config.d.ts.map +1 -1
  23. package/dist/node/db.d.ts.map +1 -1
  24. package/dist/node/distillation.d.ts +74 -2
  25. package/dist/node/distillation.d.ts.map +1 -1
  26. package/dist/node/embedding.d.ts.map +1 -1
  27. package/dist/node/gradient.d.ts +72 -0
  28. package/dist/node/gradient.d.ts.map +1 -1
  29. package/dist/node/index.d.ts +4 -2
  30. package/dist/node/index.d.ts.map +1 -1
  31. package/dist/node/index.js +530 -67
  32. package/dist/node/index.js.map +4 -4
  33. package/dist/node/prompt.d.ts +8 -2
  34. package/dist/node/prompt.d.ts.map +1 -1
  35. package/dist/node/temporal.d.ts +31 -0
  36. package/dist/node/temporal.d.ts.map +1 -1
  37. package/dist/node/types.d.ts +9 -0
  38. package/dist/node/types.d.ts.map +1 -1
  39. package/dist/node/worker-model.d.ts +90 -0
  40. package/dist/node/worker-model.d.ts.map +1 -0
  41. package/dist/types/config.d.ts +8 -0
  42. package/dist/types/config.d.ts.map +1 -1
  43. package/dist/types/db.d.ts.map +1 -1
  44. package/dist/types/distillation.d.ts +74 -2
  45. package/dist/types/distillation.d.ts.map +1 -1
  46. package/dist/types/embedding.d.ts.map +1 -1
  47. package/dist/types/gradient.d.ts +72 -0
  48. package/dist/types/gradient.d.ts.map +1 -1
  49. package/dist/types/index.d.ts +4 -2
  50. package/dist/types/index.d.ts.map +1 -1
  51. package/dist/types/prompt.d.ts +8 -2
  52. package/dist/types/prompt.d.ts.map +1 -1
  53. package/dist/types/temporal.d.ts +31 -0
  54. package/dist/types/temporal.d.ts.map +1 -1
  55. package/dist/types/types.d.ts +9 -0
  56. package/dist/types/types.d.ts.map +1 -1
  57. package/dist/types/worker-model.d.ts +90 -0
  58. package/dist/types/worker-model.d.ts.map +1 -0
  59. package/package.json +1 -1
  60. package/src/config.ts +53 -6
  61. package/src/db.ts +57 -1
  62. package/src/distillation.ts +225 -28
  63. package/src/embedding.ts +7 -0
  64. package/src/gradient.ts +262 -8
  65. package/src/index.ts +16 -0
  66. package/src/lat-reader.ts +4 -4
  67. package/src/ltm.ts +17 -17
  68. package/src/prompt.ts +101 -0
  69. package/src/recall.ts +4 -4
  70. package/src/temporal.ts +41 -10
  71. package/src/types.ts +9 -0
  72. package/src/worker-model.ts +264 -0
package/src/gradient.ts CHANGED
@@ -4,6 +4,7 @@ import { db, ensureProject, loadForceMinLayer, saveForceMinLayer } from "./db";
4
4
  import { config } from "./config";
5
5
  import { formatDistillations } from "./prompt";
6
6
  import { normalize } from "./markdown";
7
+ import * as log from "./log";
7
8
 
8
9
  type MessageWithParts = LoreMessageWithParts;
9
10
 
@@ -36,6 +37,15 @@ function estimateMessage(msg: MessageWithParts): number {
36
37
  let contextLimit = 200_000; // sensible default
37
38
  let outputReserved = 32_000;
38
39
 
40
+ // Cost-aware layer-0 token cap. When > 0, the layer-0 passthrough gate uses
41
+ // min(maxInput, maxLayer0Tokens) instead of maxInput alone. Derived from the
42
+ // model's cache-read cost: cap = targetCostPerTurn / costPerToken. This prevents
43
+ // expensive models from sending huge contexts at layer 0, where cache-read costs
44
+ // compound linearly across turns. Set to 0 to disable (use full context).
45
+ let maxLayer0Tokens = 0;
46
+
47
+ const MIN_LAYER0_FLOOR = 40_000;
48
+
39
49
  // Conservative overhead reserve for first-turn (before calibration):
40
50
  // accounts for provider system prompt + AGENTS.md + tool definitions + env info
41
51
  const FIRST_TURN_OVERHEAD = 15_000;
@@ -83,6 +93,29 @@ type SessionState = {
83
93
  prefixCache: PrefixCache | null;
84
94
  /** Raw window pin cache (Approach B) */
85
95
  rawWindowCache: RawWindowCache | null;
96
+ /**
97
+ * Wall-clock timestamp (epoch ms) of the most recent transform() call for this
98
+ * session. Used by onIdleResume() to detect cold-cache resumption — when the
99
+ * gap between turns exceeds Anthropic's prompt cache eviction window (5 min
100
+ * default / 1 hour extended), the byte-identity caching subsystems
101
+ * (prefixCache, rawWindowCache) are providing no value because the cache is
102
+ * already cold. Refreshing them on resume lets us produce a better-fitting
103
+ * window without paying a cache cost we'd otherwise be trying to preserve.
104
+ * 0 = never set (first turn).
105
+ */
106
+ lastTurnAt: number;
107
+ /**
108
+ * Set true by onIdleResume() when an idle-resume reset just fired; consumed
109
+ * (and cleared) by the LTM degraded-recovery branch in the OpenCode hook to
110
+ * skip the conversation-vs-LTM token comparison. After idle eviction the
111
+ * cache-bust cost is effectively zero, so we should always recover LTM on
112
+ * the post-idle turn regardless of conversation size.
113
+ */
114
+ cameOutOfIdle: boolean;
115
+ /** Consecutive turns at layer >= 2. When >= 3, log a compaction hint. */
116
+ consecutiveHighLayer: number;
117
+ /** Hash of the first message IDs in the last transform output — for cache-bust diagnostics. */
118
+ lastPrefixHash: string;
86
119
  };
87
120
 
88
121
  function makeSessionState(): SessionState {
@@ -97,6 +130,10 @@ function makeSessionState(): SessionState {
97
130
  lastTransformEstimate: 0,
98
131
  prefixCache: null,
99
132
  rawWindowCache: null,
133
+ lastTurnAt: 0,
134
+ cameOutOfIdle: false,
135
+ consecutiveHighLayer: 0,
136
+ lastPrefixHash: "",
100
137
  };
101
138
  }
102
139
 
@@ -116,6 +153,65 @@ function getSessionState(sessionID: string): SessionState {
116
153
  return state;
117
154
  }
118
155
 
156
+ /**
157
+ * Detect cold-cache resumption and refresh byte-identity caches.
158
+ *
159
+ * Anthropic's prompt cache evicts entries after ~5 minutes (default tier) /
160
+ * ~1 hour (extended tier). When a session resumes after the eviction window,
161
+ * the cache is provably cold — every prefix we've been carefully keeping
162
+ * byte-stable (`prefixCache`, `rawWindowCache`, plus the host's per-session
163
+ * LTM cache) provides no benefit on this turn. Worse, the LTM block was
164
+ * scored against the conversation context as it was on the previous turn,
165
+ * which may have drifted significantly in N hours.
166
+ *
167
+ * On resume after `thresholdMs`:
168
+ * - reset the distilled prefix cache (next turn re-renders from scratch)
169
+ * - reset the raw window pin cache (next turn picks a fresh cutoff)
170
+ * - set `cameOutOfIdle` so the OpenCode host can also clear `ltmSessionCache`
171
+ * and bypass the conversation-vs-LTM cost comparison in the LTM
172
+ * degraded-recovery branch
173
+ *
174
+ * Importantly, this does NOT touch:
175
+ * - reasoning blocks (Anthropic's April 23 postmortem identifies dropping
176
+ * reasoning blocks as the root cause of forgetfulness/repetition; Lore
177
+ * preserves reasoning by policy across all gradient layers)
178
+ * - the gradient layer (cold cache doesn't change token budgets;
179
+ * calibration's actualInput = input + cache.read + cache.write already
180
+ * accounts for cache misses correctly)
181
+ * - calibration state (`lastKnownInput`, overhead EMA, message-ID set) —
182
+ * the next API response will refresh these via the normal calibrate() path
183
+ *
184
+ * Set `thresholdMs <= 0` to disable. Returns true if a reset fired so the
185
+ * caller can log/observe.
186
+ */
187
+ export function onIdleResume(
188
+ sessionID: string,
189
+ thresholdMs: number,
190
+ now: number = Date.now(),
191
+ ): { triggered: false } | { triggered: true; idleMs: number } {
192
+ if (thresholdMs <= 0) return { triggered: false };
193
+ const state = getSessionState(sessionID);
194
+ if (state.lastTurnAt === 0) return { triggered: false }; // first turn — nothing to refresh
195
+ const idleMs = now - state.lastTurnAt;
196
+ if (idleMs < thresholdMs) return { triggered: false };
197
+ state.prefixCache = null;
198
+ state.rawWindowCache = null;
199
+ state.cameOutOfIdle = true;
200
+ return { triggered: true, idleMs };
201
+ }
202
+
203
+ /**
204
+ * Read-and-clear the cameOutOfIdle flag. The OpenCode host's LTM degraded-
205
+ * recovery branch consumes this to decide whether to bypass the
206
+ * conversation-vs-LTM token comparison on a post-idle turn.
207
+ */
208
+ export function consumeCameOutOfIdle(sessionID: string): boolean {
209
+ const state = sessionStates.get(sessionID);
210
+ if (!state || !state.cameOutOfIdle) return false;
211
+ state.cameOutOfIdle = false;
212
+ return true;
213
+ }
214
+
119
215
  // LTM tokens injected via system transform hook this turn.
120
216
  // Set by setLtmTokens() after the system hook runs; consumed by transform().
121
217
  let ltmTokens = 0;
@@ -131,6 +227,27 @@ export function setModelLimits(limits: { context: number; output: number }) {
131
227
  outputReserved = Math.min(limits.output || 32_000, 32_000);
132
228
  }
133
229
 
230
+ /**
231
+ * Set the cost-aware layer-0 token cap. When the cap > 0, the layer-0
232
+ * passthrough gate uses `min(maxInput, cap)` instead of `maxInput` alone.
233
+ *
234
+ * Call from the host adapter after computing the cap from model pricing:
235
+ * `cap = max(targetCostPerTurn / model.cost.cache.read, MIN_LAYER0_FLOOR)`
236
+ */
237
+ export function setMaxLayer0Tokens(tokens: number) {
238
+ maxLayer0Tokens = Math.max(0, Math.floor(tokens));
239
+ }
240
+
241
+ /** Compute the layer-0 token cap from a per-turn cost target and cache-read price. */
242
+ export function computeLayer0Cap(
243
+ targetCostPerTurn: number,
244
+ cacheReadCostPerToken: number,
245
+ ): number {
246
+ if (targetCostPerTurn <= 0 || cacheReadCostPerToken <= 0) return 0;
247
+ const rawCap = Math.floor(targetCostPerTurn / cacheReadCostPerToken);
248
+ return Math.max(rawCap, MIN_LAYER0_FLOOR);
249
+ }
250
+
134
251
  /** Called by the system transform hook after formatting LTM knowledge. */
135
252
  export function setLtmTokens(tokens: number) {
136
253
  ltmTokens = tokens;
@@ -251,6 +368,37 @@ export function resetCalibration(sessionID?: string) {
251
368
  }
252
369
  }
253
370
 
371
+ /**
372
+ * For testing only — observe session-state cache fields without exposing the
373
+ * full type. Returns null when the session has no state. The boolean fields
374
+ * answer "does this cache hold something right now?" — sufficient for asserting
375
+ * that onIdleResume() reset them.
376
+ */
377
+ export function inspectSessionState(sessionID: string): {
378
+ hasPrefixCache: boolean;
379
+ hasRawWindowCache: boolean;
380
+ cameOutOfIdle: boolean;
381
+ lastTurnAt: number;
382
+ } | null {
383
+ const state = sessionStates.get(sessionID);
384
+ if (!state) return null;
385
+ return {
386
+ hasPrefixCache: state.prefixCache !== null,
387
+ hasRawWindowCache: state.rawWindowCache !== null,
388
+ cameOutOfIdle: state.cameOutOfIdle,
389
+ lastTurnAt: state.lastTurnAt,
390
+ };
391
+ }
392
+
393
+ /**
394
+ * For testing only — set the session's lastTurnAt field. Used to simulate
395
+ * idle gaps without sleeping. Creates the session state if not present so
396
+ * tests don't need to seed it via a transform() call.
397
+ */
398
+ export function setLastTurnAtForTest(sessionID: string, ms: number): void {
399
+ getSessionState(sessionID).lastTurnAt = ms;
400
+ }
401
+
254
402
  type Distillation = {
255
403
  id: string;
256
404
  observations: string;
@@ -320,20 +468,41 @@ function cleanParts(parts: LorePart[]): LorePart[] {
320
468
  return filtered.length > 0 ? filtered : parts;
321
469
  }
322
470
 
471
+ // Upper bound on how much of the output the path-extraction regex scans.
472
+ // Two mitigations for catastrophic backtracking in `PATH_RE`:
473
+ // 1. Skip entirely if the input contains no '/' (a path requires at least
474
+ // one separator, so without one the regex has no possible match yet
475
+ // still backtracks O(n²) on long runs of [\w.-]).
476
+ // 2. Cap the scanned slice at this limit so even crafted inputs with a
477
+ // '/' somewhere don't stall the worker. The annotation only needs a
478
+ // few representative paths — sampling the first 64KB is plenty.
479
+ const ANNOTATION_PATH_SCAN_LIMIT = 64 * 1024;
480
+ const PATH_RE = /(?:[\w.-]+\/)+[\w.-]+\.\w{1,5}/g;
481
+
323
482
  // Build a metadata annotation for a stripped tool output, preserving key signals
324
483
  // about what was lost without requiring an LLM call. Inspired by the per-token
325
484
  // scalar bias β from "Fast KV Compaction via Attention Matching" (Zweiger et al.,
326
485
  // 2025) — when tokens are removed, preserving metadata about the removed content
327
486
  // helps the model compensate for information loss and decide whether to recall.
328
487
  // Reference: https://arxiv.org/abs/2602.16284
329
- function toolStripAnnotation(toolName: string, output: string): string {
488
+ export function toolStripAnnotation(toolName: string, output: string): string {
330
489
  const lines = output.split("\n").length;
331
- const chars = output.length;
332
490
 
333
491
  // Detect key signals via lightweight heuristics — no LLM call
334
492
  const hasError = /\b(?:error|fail(?:ed|ure)?|exception|panic|traceback)\b/i.test(output);
335
- const paths = output.match(/(?:[\w.-]+\/)+[\w.-]+\.\w{1,5}/g);
336
- const uniquePaths = paths ? [...new Set(paths)].slice(0, 5) : [];
493
+
494
+ // Path extraction: skip entirely if no '/' is present (cheap O(n) check
495
+ // via indexOf) to avoid PATH_RE's O(n²) backtracking on long runs of
496
+ // [\w.-] without a separator. Otherwise sample the first N KB.
497
+ let uniquePaths: string[] = [];
498
+ if (output.indexOf("/") !== -1) {
499
+ const pathScan =
500
+ output.length > ANNOTATION_PATH_SCAN_LIMIT
501
+ ? output.slice(0, ANNOTATION_PATH_SCAN_LIMIT)
502
+ : output;
503
+ const paths = pathScan.match(PATH_RE);
504
+ if (paths) uniquePaths = [...new Set(paths)].slice(0, 5);
505
+ }
337
506
 
338
507
  let annotation = `[output omitted — ${toolName}: ${lines} lines`;
339
508
  if (hasError) annotation += ", contained errors";
@@ -1113,7 +1282,20 @@ function transformInner(input: {
1113
1282
  // estimated at 146K passes layer 0 but actually costs 214K → overflow.
1114
1283
  const layer0Input = calibrated ? expectedInput : expectedInput * UNCALIBRATED_SAFETY;
1115
1284
 
1116
- if (effectiveMinLayer === 0 && layer0Input <= maxInput) {
1285
+ // Cost-aware layer-0 cap: use the smaller of the API limit and the cost-derived
1286
+ // cap. When maxLayer0Tokens is 0 (disabled), fall back to pure maxInput.
1287
+ let layer0Ceiling = maxLayer0Tokens > 0
1288
+ ? Math.min(maxInput, maxLayer0Tokens)
1289
+ : maxInput;
1290
+
1291
+ // Cold-cache awareness: on the first turn (uncalibrated = no prior API data),
1292
+ // the entire context is a cache WRITE at 12.5× the cache-read price. Use 70%
1293
+ // of the normal cap to reduce the cold-write cost.
1294
+ if (!calibrated && layer0Ceiling < maxInput) {
1295
+ layer0Ceiling = Math.floor(layer0Ceiling * 0.7);
1296
+ }
1297
+
1298
+ if (effectiveMinLayer === 0 && layer0Input <= layer0Ceiling) {
1117
1299
  // All messages fit — return unmodified to preserve append-only prompt-cache pattern.
1118
1300
  // Raw messages are strictly better context than lossy distilled summaries.
1119
1301
  const messageTokens = calibrated
@@ -1222,11 +1404,19 @@ function transformInner(input: {
1222
1404
  return { ...layer3!, layer: 3, usable, distilledBudget, rawBudget };
1223
1405
  }
1224
1406
 
1225
- // Layer 4: Emergency — last 2 distillations, last 3 raw messages with tool parts intact.
1407
+ // Layer 4: Emergency — last 2 distillations + token-budget raw tail.
1226
1408
  // We do NOT strip tool parts here: doing so would cause an infinite tool-call loop because
1227
1409
  // the model would lose sight of its own in-progress tool calls and re-invoke them endlessly.
1228
1410
  // Instead, we aggressively drop old messages and rely on the `recall` tool (which the model
1229
1411
  // is always instructed to use) to retrieve any older details it needs.
1412
+ //
1413
+ // Token-budget tail (F7): instead of a fixed `slice(-3)`, size the raw
1414
+ // tail using `clamp(usable * 0.25, 2_000, 8_000)` tokens — matching
1415
+ // upstream OpenCode's tail-budget formula for compaction. The current
1416
+ // agentic turn (from `currentTurnStart()`) is ALWAYS fully included even
1417
+ // if it alone exceeds the tail budget — layer 4 is the terminal layer
1418
+ // and must always return. Remaining budget is filled backward with older
1419
+ // messages.
1230
1420
  urgentDistillation = true;
1231
1421
  const nuclearDistillations = distillations.slice(-2);
1232
1422
  const nuclearPrefix = distilledPrefix(nuclearDistillations);
@@ -1234,15 +1424,40 @@ function transformInner(input: {
1234
1424
  (sum, m) => sum + estimateMessage(m),
1235
1425
  0,
1236
1426
  );
1237
- const nuclearRaw = input.messages.slice(-3).map((m) => ({
1427
+
1428
+ // Token budget for the raw tail. clamp(usable * 0.25, 2K, 8K).
1429
+ const tailBudget = Math.max(2_000, Math.min(8_000, Math.floor(usable * 0.25)));
1430
+
1431
+ // Current turn is always included (non-negotiable — dropping it causes
1432
+ // the infinite tool-call loop). Clean parts but never strip tool outputs.
1433
+ const nuclearTurnStart = currentTurnStart(input.messages);
1434
+ const currentTurn = input.messages.slice(nuclearTurnStart).map((m) => ({
1238
1435
  info: m.info,
1239
1436
  parts: cleanParts(m.parts),
1240
1437
  }));
1241
- const nuclearRawTokens = nuclearRaw.reduce(
1438
+ const currentTurnTokens = currentTurn.reduce(
1242
1439
  (sum, m) => sum + estimateMessage(m),
1243
1440
  0,
1244
1441
  );
1245
1442
 
1443
+ // Fill remaining budget walking backward from the turn boundary.
1444
+ const olderMessages: MessageWithParts[] = [];
1445
+ let olderTokens = 0;
1446
+ const remaining = Math.max(0, tailBudget - currentTurnTokens);
1447
+ for (let i = nuclearTurnStart - 1; i >= 0 && olderTokens < remaining; i--) {
1448
+ const msg = input.messages[i];
1449
+ const est = estimateMessage(msg);
1450
+ if (olderTokens + est > remaining) break;
1451
+ olderMessages.unshift({
1452
+ info: msg.info,
1453
+ parts: cleanParts(msg.parts),
1454
+ });
1455
+ olderTokens += est;
1456
+ }
1457
+
1458
+ const nuclearRaw = [...olderMessages, ...currentTurn];
1459
+ const nuclearRawTokens = olderTokens + currentTurnTokens;
1460
+
1246
1461
  return {
1247
1462
  messages: [...nuclearPrefix, ...nuclearRaw],
1248
1463
  layer: 4,
@@ -1282,6 +1497,45 @@ export function transform(input: {
1282
1497
  state.lastTransformEstimate = result.totalTokens;
1283
1498
  state.lastLayer = result.layer;
1284
1499
  state.lastWindowMessageIDs = new Set(result.messages.map((m) => m.info.id));
1500
+ // Mark wall-clock for onIdleResume() — must record on every transform()
1501
+ // so the next-turn idle check has an accurate baseline. Done after the
1502
+ // result fields above so a thrown transformInner doesn't update it.
1503
+ state.lastTurnAt = Date.now();
1504
+
1505
+ // --- Cache-bust diagnostics (LORE_DEBUG only) ---
1506
+ // Track byte-identity of the message prefix. When the prefix hash changes
1507
+ // between consecutive turns, it means Anthropic's prompt cache is invalidated
1508
+ // and the entire context is re-written (12.5× cache-read price). This helps
1509
+ // identify which code paths are breaking byte-identity.
1510
+ const prefixIds = result.messages.slice(0, 5).map((m) => m.info.id).join(",");
1511
+ const prefixHash = `${result.layer}:${prefixIds}`;
1512
+ if (state.lastPrefixHash && state.lastPrefixHash !== prefixHash) {
1513
+ log.info(
1514
+ `cache-bust detected: session=${sid} layer=${state.lastLayer}→${result.layer}` +
1515
+ ` msgs=${state.lastTransformedCount}→${result.messages.length}` +
1516
+ ` prefix=${state.lastPrefixHash.slice(0, 30)}→${prefixHash.slice(0, 30)}`,
1517
+ );
1518
+ }
1519
+ state.lastPrefixHash = prefixHash;
1520
+
1521
+ // --- Compaction hint ---
1522
+ if (result.layer >= 2) {
1523
+ state.consecutiveHighLayer++;
1524
+ if (state.consecutiveHighLayer === 3) {
1525
+ log.info(
1526
+ `session ${sid} has been at gradient layer ${result.layer}+ for 3 consecutive turns.` +
1527
+ ` Consider running /compact to reset the context window.`,
1528
+ );
1529
+ }
1530
+ } else {
1531
+ state.consecutiveHighLayer = 0;
1532
+ }
1533
+
1534
+ log.info(
1535
+ `gradient: session=${sid} layer=${result.layer} tokens=${result.totalTokens}` +
1536
+ ` (distilled=${result.distilledTokens} raw=${result.rawTokens})` +
1537
+ ` usable=${result.usable} cap=${maxLayer0Tokens || "off"}`,
1538
+ );
1285
1539
  }
1286
1540
  return result;
1287
1541
  }
package/src/index.ts CHANGED
@@ -60,6 +60,8 @@ export {
60
60
  export {
61
61
  transform,
62
62
  setModelLimits,
63
+ setMaxLayer0Tokens,
64
+ computeLayer0Cap,
63
65
  needsUrgentDistillation,
64
66
  calibrate,
65
67
  setLtmTokens,
@@ -68,6 +70,13 @@ export {
68
70
  setForceMinLayer,
69
71
  getLastTransformedCount,
70
72
  getLastTransformEstimate,
73
+ toolStripAnnotation,
74
+ onIdleResume,
75
+ consumeCameOutOfIdle,
76
+ // Test-only — exposed at the barrel so host-package tests can simulate idle
77
+ // gaps without sleeping. Not part of the public API.
78
+ setLastTurnAtForTest,
79
+ inspectSessionState,
71
80
  } from "./gradient";
72
81
  export {
73
82
  formatKnowledge,
@@ -81,9 +90,16 @@ export {
81
90
  CONSOLIDATION_SYSTEM,
82
91
  consolidationUser,
83
92
  QUERY_EXPANSION_SYSTEM,
93
+ COMPACT_SUMMARY_TEMPLATE,
94
+ buildCompactPrompt,
84
95
  } from "./prompt";
85
96
  export { shouldImport, importFromFile, exportToFile } from "./agents-file";
86
97
  export { workerSessionIDs, isWorkerSession } from "./worker";
98
+ export * as workerModel from "./worker-model";
99
+ export {
100
+ WORKER_JUDGE_SYSTEM,
101
+ workerJudgeUser,
102
+ } from "./worker-model";
87
103
  export {
88
104
  ftsQuery,
89
105
  ftsQueryOr,
package/src/lat-reader.ts CHANGED
@@ -290,8 +290,8 @@ export function searchScored(input: {
290
290
  const ftsSQL = `SELECT s.id, s.project_id, s.file, s.heading, s.depth, s.content,
291
291
  s.content_hash, s.first_paragraph, s.updated_at,
292
292
  bm25(lat_sections_fts, 6.0, 2.0) as rank
293
- FROM lat_sections s
294
- JOIN lat_sections_fts f ON s.rowid = f.rowid
293
+ FROM lat_sections_fts f
294
+ CROSS JOIN lat_sections s ON s.rowid = f.rowid
295
295
  WHERE lat_sections_fts MATCH ?
296
296
  AND s.project_id = ?
297
297
  ORDER BY rank LIMIT ?`;
@@ -335,8 +335,8 @@ export function scoreForSession(
335
335
  `SELECT s.id, s.project_id, s.file, s.heading, s.depth, s.content,
336
336
  s.content_hash, s.first_paragraph, s.updated_at,
337
337
  bm25(lat_sections_fts, 6.0, 2.0) as rank
338
- FROM lat_sections s
339
- JOIN lat_sections_fts f ON s.rowid = f.rowid
338
+ FROM lat_sections_fts f
339
+ CROSS JOIN lat_sections s ON s.rowid = f.rowid
340
340
  WHERE lat_sections_fts MATCH ?
341
341
  AND s.project_id = ?
342
342
  ORDER BY rank`,
package/src/ltm.ts CHANGED
@@ -215,11 +215,11 @@ function scoreEntriesFTS(sessionContext: string): Map<string, number> {
215
215
  try {
216
216
  const results = db()
217
217
  .query(
218
- `SELECT k.id, bm25(knowledge_fts, ?, ?, ?) as rank
219
- FROM knowledge k
220
- JOIN knowledge_fts f ON k.rowid = f.rowid
221
- WHERE knowledge_fts MATCH ?
222
- AND k.confidence > 0.2`,
218
+ `SELECT k.id, bm25(knowledge_fts, ?, ?, ?) as rank
219
+ FROM knowledge_fts f
220
+ CROSS JOIN knowledge k ON k.rowid = f.rowid
221
+ WHERE knowledge_fts MATCH ?
222
+ AND k.confidence > 0.2`,
223
223
  )
224
224
  .all(title, content, category, q) as Array<{
225
225
  id: string;
@@ -460,14 +460,14 @@ export function search(input: {
460
460
  const pid = input.projectPath ? ensureProject(input.projectPath) : null;
461
461
 
462
462
  const ftsSQL = pid
463
- ? `SELECT ${KNOWLEDGE_COLS_K} FROM knowledge k
464
- JOIN knowledge_fts f ON k.rowid = f.rowid
463
+ ? `SELECT ${KNOWLEDGE_COLS_K} FROM knowledge_fts f
464
+ CROSS JOIN knowledge k ON k.rowid = f.rowid
465
465
  WHERE knowledge_fts MATCH ?
466
466
  AND (k.project_id = ? OR k.project_id IS NULL OR k.cross_project = 1)
467
467
  AND k.confidence > 0.2
468
468
  ORDER BY bm25(knowledge_fts, ?, ?, ?) LIMIT ?`
469
- : `SELECT ${KNOWLEDGE_COLS_K} FROM knowledge k
470
- JOIN knowledge_fts f ON k.rowid = f.rowid
469
+ : `SELECT ${KNOWLEDGE_COLS_K} FROM knowledge_fts f
470
+ CROSS JOIN knowledge k ON k.rowid = f.rowid
471
471
  WHERE knowledge_fts MATCH ?
472
472
  AND k.confidence > 0.2
473
473
  ORDER BY bm25(knowledge_fts, ?, ?, ?) LIMIT ?`;
@@ -517,14 +517,14 @@ export function searchScored(input: {
517
517
  const { title, content, category } = ftsWeights();
518
518
 
519
519
  const ftsSQL = pid
520
- ? `SELECT ${KNOWLEDGE_COLS_K}, bm25(knowledge_fts, ?, ?, ?) as rank FROM knowledge k
521
- JOIN knowledge_fts f ON k.rowid = f.rowid
520
+ ? `SELECT ${KNOWLEDGE_COLS_K}, bm25(knowledge_fts, ?, ?, ?) as rank FROM knowledge_fts f
521
+ CROSS JOIN knowledge k ON k.rowid = f.rowid
522
522
  WHERE knowledge_fts MATCH ?
523
523
  AND (k.project_id = ? OR k.project_id IS NULL OR k.cross_project = 1)
524
524
  AND k.confidence > 0.2
525
525
  ORDER BY rank LIMIT ?`
526
- : `SELECT ${KNOWLEDGE_COLS_K}, bm25(knowledge_fts, ?, ?, ?) as rank FROM knowledge k
527
- JOIN knowledge_fts f ON k.rowid = f.rowid
526
+ : `SELECT ${KNOWLEDGE_COLS_K}, bm25(knowledge_fts, ?, ?, ?) as rank FROM knowledge_fts f
527
+ CROSS JOIN knowledge k ON k.rowid = f.rowid
528
528
  WHERE knowledge_fts MATCH ?
529
529
  AND k.confidence > 0.2
530
530
  ORDER BY rank LIMIT ?`;
@@ -569,8 +569,8 @@ export function searchScoredOtherProjects(input: {
569
569
  // Find entries from other projects that are NOT cross-project (those are
570
570
  // already included in the normal search via the cross_project=1 filter).
571
571
  // Also exclude entries with no project_id (global) — already included.
572
- const ftsSQL = `SELECT ${KNOWLEDGE_COLS_K}, bm25(knowledge_fts, ?, ?, ?) as rank FROM knowledge k
573
- JOIN knowledge_fts f ON k.rowid = f.rowid
572
+ const ftsSQL = `SELECT ${KNOWLEDGE_COLS_K}, bm25(knowledge_fts, ?, ?, ?) as rank FROM knowledge_fts f
573
+ CROSS JOIN knowledge k ON k.rowid = f.rowid
574
574
  WHERE knowledge_fts MATCH ?
575
575
  AND k.project_id IS NOT NULL
576
576
  AND k.project_id != ?
@@ -819,8 +819,8 @@ export function check(projectPath: string): IntegrityIssue[] {
819
819
  const { title, content, category } = config().search.ftsWeights;
820
820
  const matches = db()
821
821
  .query(
822
- `SELECT k.id, k.title FROM knowledge k
823
- JOIN knowledge_fts f ON k.rowid = f.rowid
822
+ `SELECT k.id, k.title FROM knowledge_fts f
823
+ CROSS JOIN knowledge k ON k.rowid = f.rowid
824
824
  WHERE knowledge_fts MATCH ?
825
825
  AND k.id != ?
826
826
  AND k.confidence > 0.2
package/src/prompt.ts CHANGED
@@ -184,14 +184,30 @@ EXACT NUMBERS: When two segments report different numbers for what seems like th
184
184
 
185
185
  EARLY-SESSION CONTENT: Bug fixes, code changes, and decisions from the start of a session are just as important as later work. Never drop them just because the segment is short or old. If the first segment contains a specific bug fix with file paths and root cause, it MUST survive into the reflection.
186
186
 
187
+ ANCHORED UPDATES: If the prompt includes a <previous-meta-summary> block, treat it as the current consolidated state. Update it using the NEW observation segments — preserve still-true details, remove stale details, and merge in new facts. Keep the same section headings. Do NOT re-derive unchanged sections verbatim unless the new segments contradict them.
188
+
187
189
  Output ONLY an <observations> block with the consolidated observations.`;
188
190
 
189
191
  export function recursiveUser(
190
192
  distillations: Array<{ observations: string }>,
193
+ previousMeta?: string,
191
194
  ): string {
192
195
  const entries = distillations.map(
193
196
  (d, i) => `Segment ${i + 1}:\n${d.observations}`,
194
197
  );
198
+ if (previousMeta) {
199
+ return `Update the anchored meta-summary below using the NEW observation segments. Preserve still-true details, remove stale details, and merge in new facts. Keep the same section headings.
200
+
201
+ <previous-meta-summary>
202
+ ${previousMeta}
203
+ </previous-meta-summary>
204
+
205
+ ---
206
+
207
+ New observation segments to merge (chronological order):
208
+
209
+ ${entries.join("\n\n---\n\n")}`;
210
+ }
195
211
  return `Observation segments to consolidate (chronological order):
196
212
 
197
213
  ${entries.join("\n\n---\n\n")}`;
@@ -388,6 +404,91 @@ export function formatDistillations(
388
404
  return sections.join("\n\n");
389
405
  }
390
406
 
407
+ // Strict Markdown skeleton for the /compact session summary. Task-oriented
408
+ // sections so the next agent starting from the compacted context has a clear
409
+ // "where am I, what's next, what's blocked" briefing. Derived from upstream
410
+ // OpenCode's SUMMARY_TEMPLATE (session/compaction.ts in #23870) with a "(none)"
411
+ // directive added for explicit empty sections and a closing "I'm ready to
412
+ // continue." sentinel to preserve Lore's post-compact UX.
413
+ export const COMPACT_SUMMARY_TEMPLATE = `Output exactly this Markdown structure. Keep every section in this order, even when empty (use "(none)").
414
+
415
+ ---
416
+ ## Goal
417
+ - [single-sentence task summary]
418
+
419
+ ## Constraints & Preferences
420
+ - [user constraints, preferences, specs, or "(none)"]
421
+
422
+ ## Progress
423
+ ### Done
424
+ - [completed work or "(none)"]
425
+
426
+ ### In Progress
427
+ - [current work or "(none)"]
428
+
429
+ ### Blocked
430
+ - [blockers or "(none)"]
431
+
432
+ ## Key Decisions
433
+ - [decision and why, or "(none)"]
434
+
435
+ ## Next Steps
436
+ - [ordered next actions or "(none)"]
437
+
438
+ ## Critical Context
439
+ - [important technical facts, errors, open questions, or "(none)"]
440
+
441
+ ## Relevant Files
442
+ - [file or directory path: why it matters, or "(none)"]
443
+ ---
444
+
445
+ Rules:
446
+ - Keep every section, even when empty.
447
+ - Use terse bullets, not prose paragraphs.
448
+ - Preserve exact file paths, commands, error strings, and identifiers when known.
449
+ - Do not mention the summary process or that context was compacted.
450
+ - End with "I'm ready to continue." on its own line after the closing "---".`;
451
+
452
+ // Build the user-facing prompt passed to the compaction agent during /compact.
453
+ // Lore injects pre-computed distillations as context separately; this prompt
454
+ // just tells the model how to render its summary.
455
+ //
456
+ // `hasDistillations` is a boolean rather than the full array because this
457
+ // function only cares about presence — the distillation bodies are pushed into
458
+ // `output.context` separately by the caller. Passing the array shape would be
459
+ // misleading dead weight.
460
+ //
461
+ // `previousSummary` is the prior `/compact` output text (typically from the
462
+ // most recent assistant message with `info.summary === true`). When present,
463
+ // the prompt asks the model to UPDATE the anchored summary in place rather
464
+ // than re-derive from scratch — matching upstream OpenCode's behavior at
465
+ // `compaction.ts:121-132` (`buildPrompt`). When absent, the prompt is
466
+ // byte-identical to today's non-anchored output.
467
+ //
468
+ // F1b (this parameter) is OpenCode-specific: the retrieval path uses
469
+ // `client.session.messages` to find the prior summary by `info.summary === true`.
470
+ // See `findPreviousCompactSummary` in `packages/opencode/src/index.ts`.
471
+ export function buildCompactPrompt(input: {
472
+ hasDistillations: boolean;
473
+ knowledge?: string;
474
+ previousSummary?: string;
475
+ }): string {
476
+ const distillSection = input.hasDistillations
477
+ ? "Lore has pre-computed chunked summaries of the session history (injected above as context). Use them as the authoritative source — do NOT re-read raw conversation messages that conflict with them.\n\n"
478
+ : "";
479
+
480
+ const anchorBlock = input.previousSummary
481
+ ? `A prior compacted summary exists for this session. Update it using the conversation history above: preserve still-true details, remove stale details, and merge in new facts. Keep every section in place.\n\n<previous-summary>\n${input.previousSummary}\n</previous-summary>\n\n`
482
+ : "";
483
+
484
+ const knowledgeBlock = input.knowledge ? `\n${input.knowledge}\n` : "";
485
+
486
+ return `You are producing a compacted session summary for an AI coding agent. This summary will be the ONLY context available in the next part of the conversation.
487
+
488
+ ${distillSection}${anchorBlock}${COMPACT_SUMMARY_TEMPLATE}
489
+ ${knowledgeBlock}`;
490
+ }
491
+
391
492
  // ~3 chars per token — validated as best heuristic against real API data.
392
493
  function estimateTokens(text: string): number {
393
494
  return Math.ceil(text.length / 3);
package/src/recall.ts CHANGED
@@ -116,14 +116,14 @@ function searchDistillationsScored(input: {
116
116
 
117
117
  const ftsSQL = input.sessionID
118
118
  ? `SELECT d.id, d.observations, d.generation, d.created_at, d.session_id, rank
119
- FROM distillations d
120
- JOIN distillation_fts f ON d.rowid = f.rowid
119
+ FROM distillation_fts f
120
+ CROSS JOIN distillations d ON d.rowid = f.rowid
121
121
  WHERE distillation_fts MATCH ?
122
122
  AND d.project_id = ? AND d.session_id = ?
123
123
  ORDER BY rank LIMIT ?`
124
124
  : `SELECT d.id, d.observations, d.generation, d.created_at, d.session_id, rank
125
- FROM distillations d
126
- JOIN distillation_fts f ON d.rowid = f.rowid
125
+ FROM distillation_fts f
126
+ CROSS JOIN distillations d ON d.rowid = f.rowid
127
127
  WHERE distillation_fts MATCH ?
128
128
  AND d.project_id = ?
129
129
  ORDER BY rank LIMIT ?`;