@loreai/core 0.10.2 → 0.11.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bun/config.d.ts +8 -0
- package/dist/bun/config.d.ts.map +1 -1
- package/dist/bun/db.d.ts.map +1 -1
- package/dist/bun/distillation.d.ts +74 -2
- package/dist/bun/distillation.d.ts.map +1 -1
- package/dist/bun/embedding.d.ts.map +1 -1
- package/dist/bun/gradient.d.ts +72 -0
- package/dist/bun/gradient.d.ts.map +1 -1
- package/dist/bun/index.d.ts +4 -2
- package/dist/bun/index.d.ts.map +1 -1
- package/dist/bun/index.js +554 -76
- package/dist/bun/index.js.map +4 -4
- package/dist/bun/prompt.d.ts +8 -2
- package/dist/bun/prompt.d.ts.map +1 -1
- package/dist/bun/temporal.d.ts +31 -0
- package/dist/bun/temporal.d.ts.map +1 -1
- package/dist/bun/types.d.ts +9 -0
- package/dist/bun/types.d.ts.map +1 -1
- package/dist/bun/worker-model.d.ts +90 -0
- package/dist/bun/worker-model.d.ts.map +1 -0
- package/dist/node/config.d.ts +8 -0
- package/dist/node/config.d.ts.map +1 -1
- package/dist/node/db.d.ts.map +1 -1
- package/dist/node/distillation.d.ts +74 -2
- package/dist/node/distillation.d.ts.map +1 -1
- package/dist/node/embedding.d.ts.map +1 -1
- package/dist/node/gradient.d.ts +72 -0
- package/dist/node/gradient.d.ts.map +1 -1
- package/dist/node/index.d.ts +4 -2
- package/dist/node/index.d.ts.map +1 -1
- package/dist/node/index.js +554 -76
- package/dist/node/index.js.map +4 -4
- package/dist/node/prompt.d.ts +8 -2
- package/dist/node/prompt.d.ts.map +1 -1
- package/dist/node/temporal.d.ts +31 -0
- package/dist/node/temporal.d.ts.map +1 -1
- package/dist/node/types.d.ts +9 -0
- package/dist/node/types.d.ts.map +1 -1
- package/dist/node/worker-model.d.ts +90 -0
- package/dist/node/worker-model.d.ts.map +1 -0
- package/dist/types/config.d.ts +8 -0
- package/dist/types/config.d.ts.map +1 -1
- package/dist/types/db.d.ts.map +1 -1
- package/dist/types/distillation.d.ts +74 -2
- package/dist/types/distillation.d.ts.map +1 -1
- package/dist/types/embedding.d.ts.map +1 -1
- package/dist/types/gradient.d.ts +72 -0
- package/dist/types/gradient.d.ts.map +1 -1
- package/dist/types/index.d.ts +4 -2
- package/dist/types/index.d.ts.map +1 -1
- package/dist/types/prompt.d.ts +8 -2
- package/dist/types/prompt.d.ts.map +1 -1
- package/dist/types/temporal.d.ts +31 -0
- package/dist/types/temporal.d.ts.map +1 -1
- package/dist/types/types.d.ts +9 -0
- package/dist/types/types.d.ts.map +1 -1
- package/dist/types/worker-model.d.ts +90 -0
- package/dist/types/worker-model.d.ts.map +1 -0
- package/package.json +1 -1
- package/src/config.ts +53 -6
- package/src/db.ts +68 -6
- package/src/distillation.ts +225 -28
- package/src/embedding.ts +7 -0
- package/src/gradient.ts +305 -17
- package/src/index.ts +16 -0
- package/src/lat-reader.ts +4 -4
- package/src/ltm.ts +17 -17
- package/src/prompt.ts +101 -0
- package/src/recall.ts +4 -4
- package/src/temporal.ts +41 -10
- package/src/types.ts +9 -0
- package/src/worker-model.ts +264 -0
package/src/embedding.ts
CHANGED
|
@@ -12,6 +12,11 @@ import { db } from "./db";
|
|
|
12
12
|
import { config } from "./config";
|
|
13
13
|
import * as log from "./log";
|
|
14
14
|
|
|
15
|
+
/** Timeout for embedding API fetch calls (ms). Prevents a hanging API from
|
|
16
|
+
* blocking the recall tool indefinitely. 10s is generous for typical 100-500ms
|
|
17
|
+
* embedding calls but bounded enough to avoid minutes-long hangs. */
|
|
18
|
+
const EMBED_TIMEOUT_MS = 10_000;
|
|
19
|
+
|
|
15
20
|
// ---------------------------------------------------------------------------
|
|
16
21
|
// Provider interface
|
|
17
22
|
// ---------------------------------------------------------------------------
|
|
@@ -58,6 +63,7 @@ class VoyageProvider implements EmbeddingProvider {
|
|
|
58
63
|
input_type: inputType,
|
|
59
64
|
output_dimension: this.dimensions,
|
|
60
65
|
}),
|
|
66
|
+
signal: AbortSignal.timeout(EMBED_TIMEOUT_MS),
|
|
61
67
|
});
|
|
62
68
|
|
|
63
69
|
if (!res.ok) {
|
|
@@ -112,6 +118,7 @@ class OpenAIProvider implements EmbeddingProvider {
|
|
|
112
118
|
Authorization: `Bearer ${this.apiKey}`,
|
|
113
119
|
},
|
|
114
120
|
body: JSON.stringify(body),
|
|
121
|
+
signal: AbortSignal.timeout(EMBED_TIMEOUT_MS),
|
|
115
122
|
});
|
|
116
123
|
|
|
117
124
|
if (!res.ok) {
|
package/src/gradient.ts
CHANGED
|
@@ -4,6 +4,7 @@ import { db, ensureProject, loadForceMinLayer, saveForceMinLayer } from "./db";
|
|
|
4
4
|
import { config } from "./config";
|
|
5
5
|
import { formatDistillations } from "./prompt";
|
|
6
6
|
import { normalize } from "./markdown";
|
|
7
|
+
import * as log from "./log";
|
|
7
8
|
|
|
8
9
|
type MessageWithParts = LoreMessageWithParts;
|
|
9
10
|
|
|
@@ -36,6 +37,15 @@ function estimateMessage(msg: MessageWithParts): number {
|
|
|
36
37
|
let contextLimit = 200_000; // sensible default
|
|
37
38
|
let outputReserved = 32_000;
|
|
38
39
|
|
|
40
|
+
// Cost-aware layer-0 token cap. When > 0, the layer-0 passthrough gate uses
|
|
41
|
+
// min(maxInput, maxLayer0Tokens) instead of maxInput alone. Derived from the
|
|
42
|
+
// model's cache-read cost: cap = targetCostPerTurn / costPerToken. This prevents
|
|
43
|
+
// expensive models from sending huge contexts at layer 0, where cache-read costs
|
|
44
|
+
// compound linearly across turns. Set to 0 to disable (use full context).
|
|
45
|
+
let maxLayer0Tokens = 0;
|
|
46
|
+
|
|
47
|
+
const MIN_LAYER0_FLOOR = 40_000;
|
|
48
|
+
|
|
39
49
|
// Conservative overhead reserve for first-turn (before calibration):
|
|
40
50
|
// accounts for provider system prompt + AGENTS.md + tool definitions + env info
|
|
41
51
|
const FIRST_TURN_OVERHEAD = 15_000;
|
|
@@ -83,6 +93,29 @@ type SessionState = {
|
|
|
83
93
|
prefixCache: PrefixCache | null;
|
|
84
94
|
/** Raw window pin cache (Approach B) */
|
|
85
95
|
rawWindowCache: RawWindowCache | null;
|
|
96
|
+
/**
|
|
97
|
+
* Wall-clock timestamp (epoch ms) of the most recent transform() call for this
|
|
98
|
+
* session. Used by onIdleResume() to detect cold-cache resumption — when the
|
|
99
|
+
* gap between turns exceeds Anthropic's prompt cache eviction window (5 min
|
|
100
|
+
* default / 1 hour extended), the byte-identity caching subsystems
|
|
101
|
+
* (prefixCache, rawWindowCache) are providing no value because the cache is
|
|
102
|
+
* already cold. Refreshing them on resume lets us produce a better-fitting
|
|
103
|
+
* window without paying a cache cost we'd otherwise be trying to preserve.
|
|
104
|
+
* 0 = never set (first turn).
|
|
105
|
+
*/
|
|
106
|
+
lastTurnAt: number;
|
|
107
|
+
/**
|
|
108
|
+
* Set true by onIdleResume() when an idle-resume reset just fired; consumed
|
|
109
|
+
* (and cleared) by the LTM degraded-recovery branch in the OpenCode hook to
|
|
110
|
+
* skip the conversation-vs-LTM token comparison. After idle eviction the
|
|
111
|
+
* cache-bust cost is effectively zero, so we should always recover LTM on
|
|
112
|
+
* the post-idle turn regardless of conversation size.
|
|
113
|
+
*/
|
|
114
|
+
cameOutOfIdle: boolean;
|
|
115
|
+
/** Consecutive turns at layer >= 2. When >= 3, log a compaction hint. */
|
|
116
|
+
consecutiveHighLayer: number;
|
|
117
|
+
/** Hash of the first message IDs in the last transform output — for cache-bust diagnostics. */
|
|
118
|
+
lastPrefixHash: string;
|
|
86
119
|
};
|
|
87
120
|
|
|
88
121
|
function makeSessionState(): SessionState {
|
|
@@ -97,6 +130,10 @@ function makeSessionState(): SessionState {
|
|
|
97
130
|
lastTransformEstimate: 0,
|
|
98
131
|
prefixCache: null,
|
|
99
132
|
rawWindowCache: null,
|
|
133
|
+
lastTurnAt: 0,
|
|
134
|
+
cameOutOfIdle: false,
|
|
135
|
+
consecutiveHighLayer: 0,
|
|
136
|
+
lastPrefixHash: "",
|
|
100
137
|
};
|
|
101
138
|
}
|
|
102
139
|
|
|
@@ -116,6 +153,65 @@ function getSessionState(sessionID: string): SessionState {
|
|
|
116
153
|
return state;
|
|
117
154
|
}
|
|
118
155
|
|
|
156
|
+
/**
|
|
157
|
+
* Detect cold-cache resumption and refresh byte-identity caches.
|
|
158
|
+
*
|
|
159
|
+
* Anthropic's prompt cache evicts entries after ~5 minutes (default tier) /
|
|
160
|
+
* ~1 hour (extended tier). When a session resumes after the eviction window,
|
|
161
|
+
* the cache is provably cold — every prefix we've been carefully keeping
|
|
162
|
+
* byte-stable (`prefixCache`, `rawWindowCache`, plus the host's per-session
|
|
163
|
+
* LTM cache) provides no benefit on this turn. Worse, the LTM block was
|
|
164
|
+
* scored against the conversation context as it was on the previous turn,
|
|
165
|
+
* which may have drifted significantly in N hours.
|
|
166
|
+
*
|
|
167
|
+
* On resume after `thresholdMs`:
|
|
168
|
+
* - reset the distilled prefix cache (next turn re-renders from scratch)
|
|
169
|
+
* - reset the raw window pin cache (next turn picks a fresh cutoff)
|
|
170
|
+
* - set `cameOutOfIdle` so the OpenCode host can also clear `ltmSessionCache`
|
|
171
|
+
* and bypass the conversation-vs-LTM cost comparison in the LTM
|
|
172
|
+
* degraded-recovery branch
|
|
173
|
+
*
|
|
174
|
+
* Importantly, this does NOT touch:
|
|
175
|
+
* - reasoning blocks (Anthropic's April 23 postmortem identifies dropping
|
|
176
|
+
* reasoning blocks as the root cause of forgetfulness/repetition; Lore
|
|
177
|
+
* preserves reasoning by policy across all gradient layers)
|
|
178
|
+
* - the gradient layer (cold cache doesn't change token budgets;
|
|
179
|
+
* calibration's actualInput = input + cache.read + cache.write already
|
|
180
|
+
* accounts for cache misses correctly)
|
|
181
|
+
* - calibration state (`lastKnownInput`, overhead EMA, message-ID set) —
|
|
182
|
+
* the next API response will refresh these via the normal calibrate() path
|
|
183
|
+
*
|
|
184
|
+
* Set `thresholdMs <= 0` to disable. Returns true if a reset fired so the
|
|
185
|
+
* caller can log/observe.
|
|
186
|
+
*/
|
|
187
|
+
export function onIdleResume(
|
|
188
|
+
sessionID: string,
|
|
189
|
+
thresholdMs: number,
|
|
190
|
+
now: number = Date.now(),
|
|
191
|
+
): { triggered: false } | { triggered: true; idleMs: number } {
|
|
192
|
+
if (thresholdMs <= 0) return { triggered: false };
|
|
193
|
+
const state = getSessionState(sessionID);
|
|
194
|
+
if (state.lastTurnAt === 0) return { triggered: false }; // first turn — nothing to refresh
|
|
195
|
+
const idleMs = now - state.lastTurnAt;
|
|
196
|
+
if (idleMs < thresholdMs) return { triggered: false };
|
|
197
|
+
state.prefixCache = null;
|
|
198
|
+
state.rawWindowCache = null;
|
|
199
|
+
state.cameOutOfIdle = true;
|
|
200
|
+
return { triggered: true, idleMs };
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
/**
|
|
204
|
+
* Read-and-clear the cameOutOfIdle flag. The OpenCode host's LTM degraded-
|
|
205
|
+
* recovery branch consumes this to decide whether to bypass the
|
|
206
|
+
* conversation-vs-LTM token comparison on a post-idle turn.
|
|
207
|
+
*/
|
|
208
|
+
export function consumeCameOutOfIdle(sessionID: string): boolean {
|
|
209
|
+
const state = sessionStates.get(sessionID);
|
|
210
|
+
if (!state || !state.cameOutOfIdle) return false;
|
|
211
|
+
state.cameOutOfIdle = false;
|
|
212
|
+
return true;
|
|
213
|
+
}
|
|
214
|
+
|
|
119
215
|
// LTM tokens injected via system transform hook this turn.
|
|
120
216
|
// Set by setLtmTokens() after the system hook runs; consumed by transform().
|
|
121
217
|
let ltmTokens = 0;
|
|
@@ -131,6 +227,27 @@ export function setModelLimits(limits: { context: number; output: number }) {
|
|
|
131
227
|
outputReserved = Math.min(limits.output || 32_000, 32_000);
|
|
132
228
|
}
|
|
133
229
|
|
|
230
|
+
/**
|
|
231
|
+
* Set the cost-aware layer-0 token cap. When the cap > 0, the layer-0
|
|
232
|
+
* passthrough gate uses `min(maxInput, cap)` instead of `maxInput` alone.
|
|
233
|
+
*
|
|
234
|
+
* Call from the host adapter after computing the cap from model pricing:
|
|
235
|
+
* `cap = max(targetCostPerTurn / model.cost.cache.read, MIN_LAYER0_FLOOR)`
|
|
236
|
+
*/
|
|
237
|
+
export function setMaxLayer0Tokens(tokens: number) {
|
|
238
|
+
maxLayer0Tokens = Math.max(0, Math.floor(tokens));
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
/** Compute the layer-0 token cap from a per-turn cost target and cache-read price. */
|
|
242
|
+
export function computeLayer0Cap(
|
|
243
|
+
targetCostPerTurn: number,
|
|
244
|
+
cacheReadCostPerToken: number,
|
|
245
|
+
): number {
|
|
246
|
+
if (targetCostPerTurn <= 0 || cacheReadCostPerToken <= 0) return 0;
|
|
247
|
+
const rawCap = Math.floor(targetCostPerTurn / cacheReadCostPerToken);
|
|
248
|
+
return Math.max(rawCap, MIN_LAYER0_FLOOR);
|
|
249
|
+
}
|
|
250
|
+
|
|
134
251
|
/** Called by the system transform hook after formatting LTM knowledge. */
|
|
135
252
|
export function setLtmTokens(tokens: number) {
|
|
136
253
|
ltmTokens = tokens;
|
|
@@ -251,6 +368,37 @@ export function resetCalibration(sessionID?: string) {
|
|
|
251
368
|
}
|
|
252
369
|
}
|
|
253
370
|
|
|
371
|
+
/**
|
|
372
|
+
* For testing only — observe session-state cache fields without exposing the
|
|
373
|
+
* full type. Returns null when the session has no state. The boolean fields
|
|
374
|
+
* answer "does this cache hold something right now?" — sufficient for asserting
|
|
375
|
+
* that onIdleResume() reset them.
|
|
376
|
+
*/
|
|
377
|
+
export function inspectSessionState(sessionID: string): {
|
|
378
|
+
hasPrefixCache: boolean;
|
|
379
|
+
hasRawWindowCache: boolean;
|
|
380
|
+
cameOutOfIdle: boolean;
|
|
381
|
+
lastTurnAt: number;
|
|
382
|
+
} | null {
|
|
383
|
+
const state = sessionStates.get(sessionID);
|
|
384
|
+
if (!state) return null;
|
|
385
|
+
return {
|
|
386
|
+
hasPrefixCache: state.prefixCache !== null,
|
|
387
|
+
hasRawWindowCache: state.rawWindowCache !== null,
|
|
388
|
+
cameOutOfIdle: state.cameOutOfIdle,
|
|
389
|
+
lastTurnAt: state.lastTurnAt,
|
|
390
|
+
};
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
/**
|
|
394
|
+
* For testing only — set the session's lastTurnAt field. Used to simulate
|
|
395
|
+
* idle gaps without sleeping. Creates the session state if not present so
|
|
396
|
+
* tests don't need to seed it via a transform() call.
|
|
397
|
+
*/
|
|
398
|
+
export function setLastTurnAtForTest(sessionID: string, ms: number): void {
|
|
399
|
+
getSessionState(sessionID).lastTurnAt = ms;
|
|
400
|
+
}
|
|
401
|
+
|
|
254
402
|
type Distillation = {
|
|
255
403
|
id: string;
|
|
256
404
|
observations: string;
|
|
@@ -320,20 +468,41 @@ function cleanParts(parts: LorePart[]): LorePart[] {
|
|
|
320
468
|
return filtered.length > 0 ? filtered : parts;
|
|
321
469
|
}
|
|
322
470
|
|
|
471
|
+
// Upper bound on how much of the output the path-extraction regex scans.
|
|
472
|
+
// Two mitigations for catastrophic backtracking in `PATH_RE`:
|
|
473
|
+
// 1. Skip entirely if the input contains no '/' (a path requires at least
|
|
474
|
+
// one separator, so without one the regex has no possible match yet
|
|
475
|
+
// still backtracks O(n²) on long runs of [\w.-]).
|
|
476
|
+
// 2. Cap the scanned slice at this limit so even crafted inputs with a
|
|
477
|
+
// '/' somewhere don't stall the worker. The annotation only needs a
|
|
478
|
+
// few representative paths — sampling the first 64KB is plenty.
|
|
479
|
+
const ANNOTATION_PATH_SCAN_LIMIT = 64 * 1024;
|
|
480
|
+
const PATH_RE = /(?:[\w.-]+\/)+[\w.-]+\.\w{1,5}/g;
|
|
481
|
+
|
|
323
482
|
// Build a metadata annotation for a stripped tool output, preserving key signals
|
|
324
483
|
// about what was lost without requiring an LLM call. Inspired by the per-token
|
|
325
484
|
// scalar bias β from "Fast KV Compaction via Attention Matching" (Zweiger et al.,
|
|
326
485
|
// 2025) — when tokens are removed, preserving metadata about the removed content
|
|
327
486
|
// helps the model compensate for information loss and decide whether to recall.
|
|
328
487
|
// Reference: https://arxiv.org/abs/2602.16284
|
|
329
|
-
function toolStripAnnotation(toolName: string, output: string): string {
|
|
488
|
+
export function toolStripAnnotation(toolName: string, output: string): string {
|
|
330
489
|
const lines = output.split("\n").length;
|
|
331
|
-
const chars = output.length;
|
|
332
490
|
|
|
333
491
|
// Detect key signals via lightweight heuristics — no LLM call
|
|
334
492
|
const hasError = /\b(?:error|fail(?:ed|ure)?|exception|panic|traceback)\b/i.test(output);
|
|
335
|
-
|
|
336
|
-
|
|
493
|
+
|
|
494
|
+
// Path extraction: skip entirely if no '/' is present (cheap O(n) check
|
|
495
|
+
// via indexOf) to avoid PATH_RE's O(n²) backtracking on long runs of
|
|
496
|
+
// [\w.-] without a separator. Otherwise sample the first N KB.
|
|
497
|
+
let uniquePaths: string[] = [];
|
|
498
|
+
if (output.indexOf("/") !== -1) {
|
|
499
|
+
const pathScan =
|
|
500
|
+
output.length > ANNOTATION_PATH_SCAN_LIMIT
|
|
501
|
+
? output.slice(0, ANNOTATION_PATH_SCAN_LIMIT)
|
|
502
|
+
: output;
|
|
503
|
+
const paths = pathScan.match(PATH_RE);
|
|
504
|
+
if (paths) uniquePaths = [...new Set(paths)].slice(0, 5);
|
|
505
|
+
}
|
|
337
506
|
|
|
338
507
|
let annotation = `[output omitted — ${toolName}: ${lines} lines`;
|
|
339
508
|
if (hasError) annotation += ", contained errors";
|
|
@@ -1113,7 +1282,20 @@ function transformInner(input: {
|
|
|
1113
1282
|
// estimated at 146K passes layer 0 but actually costs 214K → overflow.
|
|
1114
1283
|
const layer0Input = calibrated ? expectedInput : expectedInput * UNCALIBRATED_SAFETY;
|
|
1115
1284
|
|
|
1116
|
-
|
|
1285
|
+
// Cost-aware layer-0 cap: use the smaller of the API limit and the cost-derived
|
|
1286
|
+
// cap. When maxLayer0Tokens is 0 (disabled), fall back to pure maxInput.
|
|
1287
|
+
let layer0Ceiling = maxLayer0Tokens > 0
|
|
1288
|
+
? Math.min(maxInput, maxLayer0Tokens)
|
|
1289
|
+
: maxInput;
|
|
1290
|
+
|
|
1291
|
+
// Cold-cache awareness: on the first turn (uncalibrated = no prior API data),
|
|
1292
|
+
// the entire context is a cache WRITE at 12.5× the cache-read price. Use 70%
|
|
1293
|
+
// of the normal cap to reduce the cold-write cost.
|
|
1294
|
+
if (!calibrated && layer0Ceiling < maxInput) {
|
|
1295
|
+
layer0Ceiling = Math.floor(layer0Ceiling * 0.7);
|
|
1296
|
+
}
|
|
1297
|
+
|
|
1298
|
+
if (effectiveMinLayer === 0 && layer0Input <= layer0Ceiling) {
|
|
1117
1299
|
// All messages fit — return unmodified to preserve append-only prompt-cache pattern.
|
|
1118
1300
|
// Raw messages are strictly better context than lossy distilled summaries.
|
|
1119
1301
|
const messageTokens = calibrated
|
|
@@ -1140,6 +1322,7 @@ function transformInner(input: {
|
|
|
1140
1322
|
const turnStart = currentTurnStart(input.messages);
|
|
1141
1323
|
const dedupMessages = deduplicateToolOutputs(input.messages, turnStart);
|
|
1142
1324
|
|
|
1325
|
+
|
|
1143
1326
|
const distillations = sid ? loadDistillations(input.projectPath, sid) : [];
|
|
1144
1327
|
|
|
1145
1328
|
// Layer 1 uses the append-only cached prefix (Approach C) to keep the
|
|
@@ -1222,11 +1405,19 @@ function transformInner(input: {
|
|
|
1222
1405
|
return { ...layer3!, layer: 3, usable, distilledBudget, rawBudget };
|
|
1223
1406
|
}
|
|
1224
1407
|
|
|
1225
|
-
// Layer 4: Emergency — last 2 distillations
|
|
1408
|
+
// Layer 4: Emergency — last 2 distillations + token-budget raw tail.
|
|
1226
1409
|
// We do NOT strip tool parts here: doing so would cause an infinite tool-call loop because
|
|
1227
1410
|
// the model would lose sight of its own in-progress tool calls and re-invoke them endlessly.
|
|
1228
1411
|
// Instead, we aggressively drop old messages and rely on the `recall` tool (which the model
|
|
1229
1412
|
// is always instructed to use) to retrieve any older details it needs.
|
|
1413
|
+
//
|
|
1414
|
+
// Token-budget tail (F7): instead of a fixed `slice(-3)`, size the raw
|
|
1415
|
+
// tail using `clamp(usable * 0.25, 2_000, 8_000)` tokens — matching
|
|
1416
|
+
// upstream OpenCode's tail-budget formula for compaction. The current
|
|
1417
|
+
// agentic turn (from `currentTurnStart()`) is ALWAYS fully included even
|
|
1418
|
+
// if it alone exceeds the tail budget — layer 4 is the terminal layer
|
|
1419
|
+
// and must always return. Remaining budget is filled backward with older
|
|
1420
|
+
// messages.
|
|
1230
1421
|
urgentDistillation = true;
|
|
1231
1422
|
const nuclearDistillations = distillations.slice(-2);
|
|
1232
1423
|
const nuclearPrefix = distilledPrefix(nuclearDistillations);
|
|
@@ -1234,15 +1425,40 @@ function transformInner(input: {
|
|
|
1234
1425
|
(sum, m) => sum + estimateMessage(m),
|
|
1235
1426
|
0,
|
|
1236
1427
|
);
|
|
1237
|
-
|
|
1428
|
+
|
|
1429
|
+
// Token budget for the raw tail. clamp(usable * 0.25, 2K, 8K).
|
|
1430
|
+
const tailBudget = Math.max(2_000, Math.min(8_000, Math.floor(usable * 0.25)));
|
|
1431
|
+
|
|
1432
|
+
// Current turn is always included (non-negotiable — dropping it causes
|
|
1433
|
+
// the infinite tool-call loop). Clean parts but never strip tool outputs.
|
|
1434
|
+
const nuclearTurnStart = currentTurnStart(input.messages);
|
|
1435
|
+
const currentTurn = input.messages.slice(nuclearTurnStart).map((m) => ({
|
|
1238
1436
|
info: m.info,
|
|
1239
1437
|
parts: cleanParts(m.parts),
|
|
1240
1438
|
}));
|
|
1241
|
-
const
|
|
1439
|
+
const currentTurnTokens = currentTurn.reduce(
|
|
1242
1440
|
(sum, m) => sum + estimateMessage(m),
|
|
1243
1441
|
0,
|
|
1244
1442
|
);
|
|
1245
1443
|
|
|
1444
|
+
// Fill remaining budget walking backward from the turn boundary.
|
|
1445
|
+
const olderMessages: MessageWithParts[] = [];
|
|
1446
|
+
let olderTokens = 0;
|
|
1447
|
+
const remaining = Math.max(0, tailBudget - currentTurnTokens);
|
|
1448
|
+
for (let i = nuclearTurnStart - 1; i >= 0 && olderTokens < remaining; i--) {
|
|
1449
|
+
const msg = input.messages[i];
|
|
1450
|
+
const est = estimateMessage(msg);
|
|
1451
|
+
if (olderTokens + est > remaining) break;
|
|
1452
|
+
olderMessages.unshift({
|
|
1453
|
+
info: msg.info,
|
|
1454
|
+
parts: cleanParts(msg.parts),
|
|
1455
|
+
});
|
|
1456
|
+
olderTokens += est;
|
|
1457
|
+
}
|
|
1458
|
+
|
|
1459
|
+
const nuclearRaw = [...olderMessages, ...currentTurn];
|
|
1460
|
+
const nuclearRawTokens = olderTokens + currentTurnTokens;
|
|
1461
|
+
|
|
1246
1462
|
return {
|
|
1247
1463
|
messages: [...nuclearPrefix, ...nuclearRaw],
|
|
1248
1464
|
layer: 4,
|
|
@@ -1282,6 +1498,45 @@ export function transform(input: {
|
|
|
1282
1498
|
state.lastTransformEstimate = result.totalTokens;
|
|
1283
1499
|
state.lastLayer = result.layer;
|
|
1284
1500
|
state.lastWindowMessageIDs = new Set(result.messages.map((m) => m.info.id));
|
|
1501
|
+
// Mark wall-clock for onIdleResume() — must record on every transform()
|
|
1502
|
+
// so the next-turn idle check has an accurate baseline. Done after the
|
|
1503
|
+
// result fields above so a thrown transformInner doesn't update it.
|
|
1504
|
+
state.lastTurnAt = Date.now();
|
|
1505
|
+
|
|
1506
|
+
// --- Cache-bust diagnostics (LORE_DEBUG only) ---
|
|
1507
|
+
// Track byte-identity of the message prefix. When the prefix hash changes
|
|
1508
|
+
// between consecutive turns, it means Anthropic's prompt cache is invalidated
|
|
1509
|
+
// and the entire context is re-written (12.5× cache-read price). This helps
|
|
1510
|
+
// identify which code paths are breaking byte-identity.
|
|
1511
|
+
const prefixIds = result.messages.slice(0, 5).map((m) => m.info.id).join(",");
|
|
1512
|
+
const prefixHash = `${result.layer}:${prefixIds}`;
|
|
1513
|
+
if (state.lastPrefixHash && state.lastPrefixHash !== prefixHash) {
|
|
1514
|
+
log.info(
|
|
1515
|
+
`cache-bust detected: session=${sid} layer=${state.lastLayer}→${result.layer}` +
|
|
1516
|
+
` msgs=${state.lastTransformedCount}→${result.messages.length}` +
|
|
1517
|
+
` prefix=${state.lastPrefixHash.slice(0, 30)}→${prefixHash.slice(0, 30)}`,
|
|
1518
|
+
);
|
|
1519
|
+
}
|
|
1520
|
+
state.lastPrefixHash = prefixHash;
|
|
1521
|
+
|
|
1522
|
+
// --- Compaction hint ---
|
|
1523
|
+
if (result.layer >= 2) {
|
|
1524
|
+
state.consecutiveHighLayer++;
|
|
1525
|
+
if (state.consecutiveHighLayer === 3) {
|
|
1526
|
+
log.info(
|
|
1527
|
+
`session ${sid} has been at gradient layer ${result.layer}+ for 3 consecutive turns.` +
|
|
1528
|
+
` Consider running /compact to reset the context window.`,
|
|
1529
|
+
);
|
|
1530
|
+
}
|
|
1531
|
+
} else {
|
|
1532
|
+
state.consecutiveHighLayer = 0;
|
|
1533
|
+
}
|
|
1534
|
+
|
|
1535
|
+
log.info(
|
|
1536
|
+
`gradient: session=${sid} layer=${result.layer} tokens=${result.totalTokens}` +
|
|
1537
|
+
` (distilled=${result.distilledTokens} raw=${result.rawTokens})` +
|
|
1538
|
+
` usable=${result.usable} cap=${maxLayer0Tokens || "off"}`,
|
|
1539
|
+
);
|
|
1285
1540
|
}
|
|
1286
1541
|
return result;
|
|
1287
1542
|
}
|
|
@@ -1291,21 +1546,54 @@ export function estimateMessages(messages: MessageWithParts[]): number {
|
|
|
1291
1546
|
return messages.reduce((sum, m) => sum + estimateMessage(m), 0);
|
|
1292
1547
|
}
|
|
1293
1548
|
|
|
1294
|
-
// Identify the current agentic turn:
|
|
1295
|
-
//
|
|
1296
|
-
//
|
|
1297
|
-
//
|
|
1549
|
+
// Identify the current agentic turn: walk backwards from the end to find the
|
|
1550
|
+
// boundary where it's safe to strip tool outputs. The "current turn" includes:
|
|
1551
|
+
// 1. All messages from the last user message onwards (the explicit turn boundary)
|
|
1552
|
+
// 2. All messages that are part of an unfinished tool-call chain BEFORE that user
|
|
1553
|
+
// message — because subagent/child user messages can appear mid-chain, and the
|
|
1554
|
+
// parent's tool-call chain must be kept intact or the model re-issues tool calls.
|
|
1555
|
+
//
|
|
1556
|
+
// The heuristic: walk backwards from the last user message, and if we see assistant
|
|
1557
|
+
// messages with tool parts (tool-call chains), keep extending the boundary back.
|
|
1558
|
+
// Stop when we hit a user message that's followed by a non-tool assistant (a clean
|
|
1559
|
+
// conversational boundary, not a mid-chain subagent injection).
|
|
1298
1560
|
function currentTurnStart(messages: MessageWithParts[]): number {
|
|
1299
|
-
|
|
1300
|
-
|
|
1561
|
+
if (messages.length === 0) return 0;
|
|
1562
|
+
|
|
1563
|
+
// Start from the last user message
|
|
1564
|
+
let boundary = messages.length;
|
|
1301
1565
|
for (let i = messages.length - 1; i >= 0; i--) {
|
|
1302
1566
|
if (messages[i].info.role === "user") {
|
|
1303
|
-
|
|
1567
|
+
boundary = i;
|
|
1304
1568
|
break;
|
|
1305
1569
|
}
|
|
1306
1570
|
}
|
|
1307
|
-
if (
|
|
1308
|
-
|
|
1571
|
+
if (boundary === messages.length) return 0; // no user message — protect all
|
|
1572
|
+
|
|
1573
|
+
// Now walk backwards past any tool-call chains that precede this user message.
|
|
1574
|
+
// A tool-call chain looks like: ...assistant(tool-calls) → user(subagent) → ...
|
|
1575
|
+
// We keep extending boundary back while we see tool-bearing assistant messages.
|
|
1576
|
+
for (let i = boundary - 1; i >= 0; i--) {
|
|
1577
|
+
const msg = messages[i];
|
|
1578
|
+
const hasToolParts = msg.parts.some(isToolPart);
|
|
1579
|
+
if (hasToolParts) {
|
|
1580
|
+
// This assistant message has tools — it's part of an active chain.
|
|
1581
|
+
// Extend the boundary to include it.
|
|
1582
|
+
boundary = i;
|
|
1583
|
+
continue;
|
|
1584
|
+
}
|
|
1585
|
+
if (msg.info.role === "user") {
|
|
1586
|
+
// A user message with no tool-bearing assistant before it — this might be
|
|
1587
|
+
// another subagent injection. Keep walking back.
|
|
1588
|
+
boundary = i;
|
|
1589
|
+
continue;
|
|
1590
|
+
}
|
|
1591
|
+
// Non-tool assistant message (pure text response) — this is a clean boundary.
|
|
1592
|
+
// The chain above this point is a completed conversation turn.
|
|
1593
|
+
break;
|
|
1594
|
+
}
|
|
1595
|
+
|
|
1596
|
+
return boundary;
|
|
1309
1597
|
}
|
|
1310
1598
|
|
|
1311
1599
|
function tryFit(input: {
|
package/src/index.ts
CHANGED
|
@@ -60,6 +60,8 @@ export {
|
|
|
60
60
|
export {
|
|
61
61
|
transform,
|
|
62
62
|
setModelLimits,
|
|
63
|
+
setMaxLayer0Tokens,
|
|
64
|
+
computeLayer0Cap,
|
|
63
65
|
needsUrgentDistillation,
|
|
64
66
|
calibrate,
|
|
65
67
|
setLtmTokens,
|
|
@@ -68,6 +70,13 @@ export {
|
|
|
68
70
|
setForceMinLayer,
|
|
69
71
|
getLastTransformedCount,
|
|
70
72
|
getLastTransformEstimate,
|
|
73
|
+
toolStripAnnotation,
|
|
74
|
+
onIdleResume,
|
|
75
|
+
consumeCameOutOfIdle,
|
|
76
|
+
// Test-only — exposed at the barrel so host-package tests can simulate idle
|
|
77
|
+
// gaps without sleeping. Not part of the public API.
|
|
78
|
+
setLastTurnAtForTest,
|
|
79
|
+
inspectSessionState,
|
|
71
80
|
} from "./gradient";
|
|
72
81
|
export {
|
|
73
82
|
formatKnowledge,
|
|
@@ -81,9 +90,16 @@ export {
|
|
|
81
90
|
CONSOLIDATION_SYSTEM,
|
|
82
91
|
consolidationUser,
|
|
83
92
|
QUERY_EXPANSION_SYSTEM,
|
|
93
|
+
COMPACT_SUMMARY_TEMPLATE,
|
|
94
|
+
buildCompactPrompt,
|
|
84
95
|
} from "./prompt";
|
|
85
96
|
export { shouldImport, importFromFile, exportToFile } from "./agents-file";
|
|
86
97
|
export { workerSessionIDs, isWorkerSession } from "./worker";
|
|
98
|
+
export * as workerModel from "./worker-model";
|
|
99
|
+
export {
|
|
100
|
+
WORKER_JUDGE_SYSTEM,
|
|
101
|
+
workerJudgeUser,
|
|
102
|
+
} from "./worker-model";
|
|
87
103
|
export {
|
|
88
104
|
ftsQuery,
|
|
89
105
|
ftsQueryOr,
|
package/src/lat-reader.ts
CHANGED
|
@@ -290,8 +290,8 @@ export function searchScored(input: {
|
|
|
290
290
|
const ftsSQL = `SELECT s.id, s.project_id, s.file, s.heading, s.depth, s.content,
|
|
291
291
|
s.content_hash, s.first_paragraph, s.updated_at,
|
|
292
292
|
bm25(lat_sections_fts, 6.0, 2.0) as rank
|
|
293
|
-
FROM
|
|
294
|
-
JOIN
|
|
293
|
+
FROM lat_sections_fts f
|
|
294
|
+
CROSS JOIN lat_sections s ON s.rowid = f.rowid
|
|
295
295
|
WHERE lat_sections_fts MATCH ?
|
|
296
296
|
AND s.project_id = ?
|
|
297
297
|
ORDER BY rank LIMIT ?`;
|
|
@@ -335,8 +335,8 @@ export function scoreForSession(
|
|
|
335
335
|
`SELECT s.id, s.project_id, s.file, s.heading, s.depth, s.content,
|
|
336
336
|
s.content_hash, s.first_paragraph, s.updated_at,
|
|
337
337
|
bm25(lat_sections_fts, 6.0, 2.0) as rank
|
|
338
|
-
FROM
|
|
339
|
-
JOIN
|
|
338
|
+
FROM lat_sections_fts f
|
|
339
|
+
CROSS JOIN lat_sections s ON s.rowid = f.rowid
|
|
340
340
|
WHERE lat_sections_fts MATCH ?
|
|
341
341
|
AND s.project_id = ?
|
|
342
342
|
ORDER BY rank`,
|
package/src/ltm.ts
CHANGED
|
@@ -215,11 +215,11 @@ function scoreEntriesFTS(sessionContext: string): Map<string, number> {
|
|
|
215
215
|
try {
|
|
216
216
|
const results = db()
|
|
217
217
|
.query(
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
218
|
+
`SELECT k.id, bm25(knowledge_fts, ?, ?, ?) as rank
|
|
219
|
+
FROM knowledge_fts f
|
|
220
|
+
CROSS JOIN knowledge k ON k.rowid = f.rowid
|
|
221
|
+
WHERE knowledge_fts MATCH ?
|
|
222
|
+
AND k.confidence > 0.2`,
|
|
223
223
|
)
|
|
224
224
|
.all(title, content, category, q) as Array<{
|
|
225
225
|
id: string;
|
|
@@ -460,14 +460,14 @@ export function search(input: {
|
|
|
460
460
|
const pid = input.projectPath ? ensureProject(input.projectPath) : null;
|
|
461
461
|
|
|
462
462
|
const ftsSQL = pid
|
|
463
|
-
? `SELECT ${KNOWLEDGE_COLS_K} FROM
|
|
464
|
-
JOIN
|
|
463
|
+
? `SELECT ${KNOWLEDGE_COLS_K} FROM knowledge_fts f
|
|
464
|
+
CROSS JOIN knowledge k ON k.rowid = f.rowid
|
|
465
465
|
WHERE knowledge_fts MATCH ?
|
|
466
466
|
AND (k.project_id = ? OR k.project_id IS NULL OR k.cross_project = 1)
|
|
467
467
|
AND k.confidence > 0.2
|
|
468
468
|
ORDER BY bm25(knowledge_fts, ?, ?, ?) LIMIT ?`
|
|
469
|
-
: `SELECT ${KNOWLEDGE_COLS_K} FROM
|
|
470
|
-
JOIN
|
|
469
|
+
: `SELECT ${KNOWLEDGE_COLS_K} FROM knowledge_fts f
|
|
470
|
+
CROSS JOIN knowledge k ON k.rowid = f.rowid
|
|
471
471
|
WHERE knowledge_fts MATCH ?
|
|
472
472
|
AND k.confidence > 0.2
|
|
473
473
|
ORDER BY bm25(knowledge_fts, ?, ?, ?) LIMIT ?`;
|
|
@@ -517,14 +517,14 @@ export function searchScored(input: {
|
|
|
517
517
|
const { title, content, category } = ftsWeights();
|
|
518
518
|
|
|
519
519
|
const ftsSQL = pid
|
|
520
|
-
? `SELECT ${KNOWLEDGE_COLS_K}, bm25(knowledge_fts, ?, ?, ?) as rank FROM
|
|
521
|
-
JOIN
|
|
520
|
+
? `SELECT ${KNOWLEDGE_COLS_K}, bm25(knowledge_fts, ?, ?, ?) as rank FROM knowledge_fts f
|
|
521
|
+
CROSS JOIN knowledge k ON k.rowid = f.rowid
|
|
522
522
|
WHERE knowledge_fts MATCH ?
|
|
523
523
|
AND (k.project_id = ? OR k.project_id IS NULL OR k.cross_project = 1)
|
|
524
524
|
AND k.confidence > 0.2
|
|
525
525
|
ORDER BY rank LIMIT ?`
|
|
526
|
-
: `SELECT ${KNOWLEDGE_COLS_K}, bm25(knowledge_fts, ?, ?, ?) as rank FROM
|
|
527
|
-
JOIN
|
|
526
|
+
: `SELECT ${KNOWLEDGE_COLS_K}, bm25(knowledge_fts, ?, ?, ?) as rank FROM knowledge_fts f
|
|
527
|
+
CROSS JOIN knowledge k ON k.rowid = f.rowid
|
|
528
528
|
WHERE knowledge_fts MATCH ?
|
|
529
529
|
AND k.confidence > 0.2
|
|
530
530
|
ORDER BY rank LIMIT ?`;
|
|
@@ -569,8 +569,8 @@ export function searchScoredOtherProjects(input: {
|
|
|
569
569
|
// Find entries from other projects that are NOT cross-project (those are
|
|
570
570
|
// already included in the normal search via the cross_project=1 filter).
|
|
571
571
|
// Also exclude entries with no project_id (global) — already included.
|
|
572
|
-
const ftsSQL = `SELECT ${KNOWLEDGE_COLS_K}, bm25(knowledge_fts, ?, ?, ?) as rank FROM
|
|
573
|
-
JOIN
|
|
572
|
+
const ftsSQL = `SELECT ${KNOWLEDGE_COLS_K}, bm25(knowledge_fts, ?, ?, ?) as rank FROM knowledge_fts f
|
|
573
|
+
CROSS JOIN knowledge k ON k.rowid = f.rowid
|
|
574
574
|
WHERE knowledge_fts MATCH ?
|
|
575
575
|
AND k.project_id IS NOT NULL
|
|
576
576
|
AND k.project_id != ?
|
|
@@ -819,8 +819,8 @@ export function check(projectPath: string): IntegrityIssue[] {
|
|
|
819
819
|
const { title, content, category } = config().search.ftsWeights;
|
|
820
820
|
const matches = db()
|
|
821
821
|
.query(
|
|
822
|
-
`SELECT k.id, k.title FROM
|
|
823
|
-
JOIN
|
|
822
|
+
`SELECT k.id, k.title FROM knowledge_fts f
|
|
823
|
+
CROSS JOIN knowledge k ON k.rowid = f.rowid
|
|
824
824
|
WHERE knowledge_fts MATCH ?
|
|
825
825
|
AND k.id != ?
|
|
826
826
|
AND k.confidence > 0.2
|