@loreai/core 0.18.0 → 0.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bun/agents-file.d.ts.map +1 -1
- package/dist/bun/config.d.ts.map +1 -1
- package/dist/bun/curator.d.ts.map +1 -1
- package/dist/bun/db.d.ts +86 -1
- package/dist/bun/db.d.ts.map +1 -1
- package/dist/bun/distillation.d.ts +2 -13
- package/dist/bun/distillation.d.ts.map +1 -1
- package/dist/bun/embedding.d.ts +5 -1
- package/dist/bun/embedding.d.ts.map +1 -1
- package/dist/bun/git.d.ts.map +1 -1
- package/dist/bun/gradient.d.ts +13 -1
- package/dist/bun/gradient.d.ts.map +1 -1
- package/dist/bun/hosted.d.ts +36 -0
- package/dist/bun/hosted.d.ts.map +1 -0
- package/dist/bun/index.d.ts +3 -2
- package/dist/bun/index.d.ts.map +1 -1
- package/dist/bun/index.js +1049 -247
- package/dist/bun/index.js.map +4 -4
- package/dist/bun/lat-reader.d.ts.map +1 -1
- package/dist/bun/ltm.d.ts +99 -5
- package/dist/bun/ltm.d.ts.map +1 -1
- package/dist/bun/session-limiter.d.ts +26 -0
- package/dist/bun/session-limiter.d.ts.map +1 -0
- package/dist/bun/temporal.d.ts +2 -0
- package/dist/bun/temporal.d.ts.map +1 -1
- package/dist/node/agents-file.d.ts.map +1 -1
- package/dist/node/config.d.ts.map +1 -1
- package/dist/node/curator.d.ts.map +1 -1
- package/dist/node/db.d.ts +86 -1
- package/dist/node/db.d.ts.map +1 -1
- package/dist/node/distillation.d.ts +2 -13
- package/dist/node/distillation.d.ts.map +1 -1
- package/dist/node/embedding.d.ts +5 -1
- package/dist/node/embedding.d.ts.map +1 -1
- package/dist/node/git.d.ts.map +1 -1
- package/dist/node/gradient.d.ts +13 -1
- package/dist/node/gradient.d.ts.map +1 -1
- package/dist/node/hosted.d.ts +36 -0
- package/dist/node/hosted.d.ts.map +1 -0
- package/dist/node/index.d.ts +3 -2
- package/dist/node/index.d.ts.map +1 -1
- package/dist/node/index.js +1049 -247
- package/dist/node/index.js.map +4 -4
- package/dist/node/lat-reader.d.ts.map +1 -1
- package/dist/node/ltm.d.ts +99 -5
- package/dist/node/ltm.d.ts.map +1 -1
- package/dist/node/session-limiter.d.ts +26 -0
- package/dist/node/session-limiter.d.ts.map +1 -0
- package/dist/node/temporal.d.ts +2 -0
- package/dist/node/temporal.d.ts.map +1 -1
- package/dist/types/agents-file.d.ts.map +1 -1
- package/dist/types/config.d.ts.map +1 -1
- package/dist/types/curator.d.ts.map +1 -1
- package/dist/types/db.d.ts +86 -1
- package/dist/types/db.d.ts.map +1 -1
- package/dist/types/distillation.d.ts +2 -13
- package/dist/types/distillation.d.ts.map +1 -1
- package/dist/types/embedding.d.ts +5 -1
- package/dist/types/embedding.d.ts.map +1 -1
- package/dist/types/git.d.ts.map +1 -1
- package/dist/types/gradient.d.ts +13 -1
- package/dist/types/gradient.d.ts.map +1 -1
- package/dist/types/hosted.d.ts +36 -0
- package/dist/types/hosted.d.ts.map +1 -0
- package/dist/types/index.d.ts +3 -2
- package/dist/types/index.d.ts.map +1 -1
- package/dist/types/lat-reader.d.ts.map +1 -1
- package/dist/types/ltm.d.ts +99 -5
- package/dist/types/ltm.d.ts.map +1 -1
- package/dist/types/session-limiter.d.ts +26 -0
- package/dist/types/session-limiter.d.ts.map +1 -0
- package/dist/types/temporal.d.ts +2 -0
- package/dist/types/temporal.d.ts.map +1 -1
- package/package.json +3 -1
- package/src/agents-file.ts +12 -0
- package/src/config.ts +10 -5
- package/src/curator.ts +54 -2
- package/src/db.ts +386 -6
- package/src/distillation.ts +55 -14
- package/src/embedding.ts +71 -8
- package/src/git.ts +4 -0
- package/src/gradient.ts +227 -74
- package/src/hosted.ts +46 -0
- package/src/index.ts +12 -0
- package/src/lat-reader.ts +4 -0
- package/src/ltm.ts +480 -45
- package/src/session-limiter.ts +47 -0
- package/src/temporal.ts +10 -0
package/src/embedding.ts
CHANGED
|
@@ -28,6 +28,27 @@ import type {
|
|
|
28
28
|
* embedding calls but bounded enough to avoid minutes-long hangs. */
|
|
29
29
|
const EMBED_TIMEOUT_MS = 10_000;
|
|
30
30
|
|
|
31
|
+
/**
|
|
32
|
+
* Safe per-text character limit for local ONNX inference. The Nomic v1.5 model
|
|
33
|
+
* supports up to 8192 tokens, but ONNX runtime OOMs on inputs near that ceiling
|
|
34
|
+
* (error codes 284432024, 287180544, 144786472). Pre-truncating to ~4096 tokens
|
|
35
|
+
* worth of characters keeps the tensor well within safe allocation bounds.
|
|
36
|
+
* The worker's `truncation: true` remains as a safety net.
|
|
37
|
+
*/
|
|
38
|
+
const LOCAL_MAX_CHARS = 4096 * 4; // ~4096 tokens × ~4 chars/token
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Truncate a string to LOCAL_MAX_CHARS without splitting a UTF-16 surrogate pair.
|
|
42
|
+
* If the cut falls on a high surrogate (0xD800-0xDBFF), backs up one char.
|
|
43
|
+
*/
|
|
44
|
+
function safeLocalTruncate(text: string): string {
|
|
45
|
+
if (text.length <= LOCAL_MAX_CHARS) return text;
|
|
46
|
+
let end = LOCAL_MAX_CHARS;
|
|
47
|
+
const code = text.charCodeAt(end - 1);
|
|
48
|
+
if (code >= 0xD800 && code <= 0xDBFF) end--; // don't split surrogate pair
|
|
49
|
+
return text.slice(0, end);
|
|
50
|
+
}
|
|
51
|
+
|
|
31
52
|
// ---------------------------------------------------------------------------
|
|
32
53
|
// Provider interface
|
|
33
54
|
// ---------------------------------------------------------------------------
|
|
@@ -272,7 +293,21 @@ class LocalProvider implements EmbeddingProvider {
|
|
|
272
293
|
workerUrl = vendorWorkerUrl;
|
|
273
294
|
}
|
|
274
295
|
} else {
|
|
275
|
-
|
|
296
|
+
// In CJS bundles (gateway npm package), esbuild shims import.meta as
|
|
297
|
+
// an empty object {}, so import.meta.url is undefined. Fall back to
|
|
298
|
+
// __filename which esbuild defines in CJS output.
|
|
299
|
+
const selfUrl = typeof import.meta.url === "string" ? import.meta.url : undefined;
|
|
300
|
+
if (selfUrl) {
|
|
301
|
+
workerUrl = new URL(
|
|
302
|
+
`./embedding-worker${selfUrl.endsWith(".ts") ? ".ts" : ".js"}`,
|
|
303
|
+
selfUrl,
|
|
304
|
+
);
|
|
305
|
+
} else {
|
|
306
|
+
// CJS fallback: __filename is defined by esbuild's CJS output.
|
|
307
|
+
// The embedding-worker.cjs is built alongside the main bundle.
|
|
308
|
+
const { pathToFileURL } = await import("node:url");
|
|
309
|
+
workerUrl = new URL("./embedding-worker.cjs", pathToFileURL(__filename));
|
|
310
|
+
}
|
|
276
311
|
}
|
|
277
312
|
|
|
278
313
|
const vendor = vendorModelInfo();
|
|
@@ -318,9 +353,10 @@ class LocalProvider implements EmbeddingProvider {
|
|
|
318
353
|
localProviderKnownBroken = true;
|
|
319
354
|
if (!localProviderErrorLogged) {
|
|
320
355
|
localProviderErrorLogged = true;
|
|
321
|
-
log.
|
|
356
|
+
log.error(
|
|
322
357
|
`local embedding provider failed to init: ${msg.error}. ` +
|
|
323
358
|
`Set VOYAGE_API_KEY/OPENAI_API_KEY for automatic remote fallback.`,
|
|
359
|
+
new Error(`embedding worker init failed: ${msg.error}`),
|
|
324
360
|
);
|
|
325
361
|
}
|
|
326
362
|
for (const [, p] of this.pendingRequests) {
|
|
@@ -337,6 +373,7 @@ class LocalProvider implements EmbeddingProvider {
|
|
|
337
373
|
this.worker.on("error", (err: Error) => {
|
|
338
374
|
this.workerInitError = err.message;
|
|
339
375
|
this.workerReady = false;
|
|
376
|
+
log.error("embedding worker crashed:", err);
|
|
340
377
|
for (const [, p] of this.pendingRequests) {
|
|
341
378
|
p.reject(new LocalProviderUnavailableError(err));
|
|
342
379
|
}
|
|
@@ -347,6 +384,10 @@ class LocalProvider implements EmbeddingProvider {
|
|
|
347
384
|
this.worker.on("exit", (code) => {
|
|
348
385
|
if (code !== 0 && !this.workerInitError) {
|
|
349
386
|
this.workerInitError = `embedding worker exited with code ${code}`;
|
|
387
|
+
log.error(
|
|
388
|
+
this.workerInitError,
|
|
389
|
+
new Error(this.workerInitError),
|
|
390
|
+
);
|
|
350
391
|
}
|
|
351
392
|
this.workerReady = false;
|
|
352
393
|
for (const [, p] of this.pendingRequests) {
|
|
@@ -382,9 +423,13 @@ class LocalProvider implements EmbeddingProvider {
|
|
|
382
423
|
async embed(texts: string[], inputType: "document" | "query"): Promise<Float32Array[]> {
|
|
383
424
|
await this.ensureWorker();
|
|
384
425
|
|
|
426
|
+
// Pre-truncate texts that exceed the safe ONNX inference limit.
|
|
427
|
+
// This prevents OOM on single inputs near the model's 8192-token max.
|
|
428
|
+
const truncated = texts.map(safeLocalTruncate);
|
|
429
|
+
|
|
385
430
|
// Prepend Nomic task instruction prefix.
|
|
386
431
|
const prefix = inputType === "document" ? "search_document: " : "search_query: ";
|
|
387
|
-
const prefixed =
|
|
432
|
+
const prefixed = truncated.map((t) => prefix + t);
|
|
388
433
|
|
|
389
434
|
const id = this.nextRequestId++;
|
|
390
435
|
// Recall queries (single query-type texts) get high priority so they
|
|
@@ -706,14 +751,25 @@ type VectorHit = { id: string; similarity: number };
|
|
|
706
751
|
* Search all knowledge entries with embeddings by cosine similarity.
|
|
707
752
|
* Returns top-k entries sorted by similarity descending.
|
|
708
753
|
* Pure brute-force — fine for <100 entries (microseconds).
|
|
754
|
+
*
|
|
755
|
+
* @param excludeCategories Optional category names to exclude from results.
|
|
756
|
+
* Useful when preferences are injected in a separate system block and
|
|
757
|
+
* shouldn't compete for vector search slots with context-bound entries.
|
|
709
758
|
*/
|
|
710
759
|
export function vectorSearch(
|
|
711
760
|
queryEmbedding: Float32Array,
|
|
712
761
|
limit = 10,
|
|
762
|
+
excludeCategories?: string[],
|
|
713
763
|
): VectorHit[] {
|
|
764
|
+
let sql = "SELECT id, embedding FROM knowledge WHERE embedding IS NOT NULL AND confidence > 0.2";
|
|
765
|
+
const params: string[] = [];
|
|
766
|
+
if (excludeCategories?.length) {
|
|
767
|
+
sql += ` AND category NOT IN (${excludeCategories.map(() => "?").join(",")})`;
|
|
768
|
+
params.push(...excludeCategories);
|
|
769
|
+
}
|
|
714
770
|
const rows = db()
|
|
715
|
-
.query(
|
|
716
|
-
.all() as Array<{ id: string; embedding: Buffer }>;
|
|
771
|
+
.query(sql)
|
|
772
|
+
.all(...params) as Array<{ id: string; embedding: Buffer }>;
|
|
717
773
|
|
|
718
774
|
const scored: VectorHit[] = [];
|
|
719
775
|
for (const row of rows) {
|
|
@@ -817,6 +873,7 @@ export function embedKnowledgeEntry(
|
|
|
817
873
|
title: string,
|
|
818
874
|
content: string,
|
|
819
875
|
): void {
|
|
876
|
+
if (!isAvailable()) return;
|
|
820
877
|
const text = `${title}\n${content}`;
|
|
821
878
|
embed([text], "document")
|
|
822
879
|
.then(([vec]) => {
|
|
@@ -825,7 +882,7 @@ export function embedKnowledgeEntry(
|
|
|
825
882
|
.run(toBlob(vec), id);
|
|
826
883
|
})
|
|
827
884
|
.catch((err) => {
|
|
828
|
-
log.
|
|
885
|
+
log.error("embedding failed for knowledge entry", id, ":", err);
|
|
829
886
|
});
|
|
830
887
|
}
|
|
831
888
|
|
|
@@ -838,6 +895,7 @@ export function embedDistillation(
|
|
|
838
895
|
id: string,
|
|
839
896
|
observations: string,
|
|
840
897
|
): void {
|
|
898
|
+
if (!isAvailable()) return;
|
|
841
899
|
embed([observations], "document")
|
|
842
900
|
.then(([vec]) => {
|
|
843
901
|
db()
|
|
@@ -845,7 +903,7 @@ export function embedDistillation(
|
|
|
845
903
|
.run(toBlob(vec), id);
|
|
846
904
|
})
|
|
847
905
|
.catch((err) => {
|
|
848
|
-
log.
|
|
906
|
+
log.error("embedding failed for distillation", id, ":", err);
|
|
849
907
|
});
|
|
850
908
|
}
|
|
851
909
|
|
|
@@ -859,6 +917,7 @@ export function embedTemporalMessage(
|
|
|
859
917
|
id: string,
|
|
860
918
|
content: string,
|
|
861
919
|
): void {
|
|
920
|
+
if (!isAvailable()) return;
|
|
862
921
|
// Skip very short messages — they don't carry enough semantic signal
|
|
863
922
|
// to be useful in vector search and would waste embedding capacity.
|
|
864
923
|
if (content.length < 50) return;
|
|
@@ -870,7 +929,7 @@ export function embedTemporalMessage(
|
|
|
870
929
|
.run(toBlob(vec), id);
|
|
871
930
|
})
|
|
872
931
|
.catch((err) => {
|
|
873
|
-
log.
|
|
932
|
+
log.error("embedding failed for temporal message", id, ":", err);
|
|
874
933
|
});
|
|
875
934
|
}
|
|
876
935
|
|
|
@@ -1174,6 +1233,8 @@ export async function backfillEmbeddings(): Promise<number> {
|
|
|
1174
1233
|
} catch (err) {
|
|
1175
1234
|
// log.error sends to Sentry via captureException
|
|
1176
1235
|
log.error(`embedding backfill batch failed (${batch.length} items):`, err);
|
|
1236
|
+
// Provider is dead — no point retrying remaining batches.
|
|
1237
|
+
if (err instanceof LocalProviderUnavailableError) break;
|
|
1177
1238
|
}
|
|
1178
1239
|
// No yieldToEventLoop() needed — embed() is truly async (worker thread).
|
|
1179
1240
|
}
|
|
@@ -1234,6 +1295,8 @@ export async function backfillDistillationEmbeddings(): Promise<number> {
|
|
|
1234
1295
|
} catch (err) {
|
|
1235
1296
|
// log.error sends to Sentry via captureException
|
|
1236
1297
|
log.error(`distillation embedding backfill batch failed (${batch.length} items):`, err);
|
|
1298
|
+
// Provider is dead — no point retrying remaining batches.
|
|
1299
|
+
if (err instanceof LocalProviderUnavailableError) break;
|
|
1237
1300
|
}
|
|
1238
1301
|
|
|
1239
1302
|
if (embedded >= nextProgressAt) {
|
package/src/git.ts
CHANGED
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
*/
|
|
14
14
|
|
|
15
15
|
import { execSync } from "child_process";
|
|
16
|
+
import { isHostedMode } from "./hosted";
|
|
16
17
|
|
|
17
18
|
// ---------------------------------------------------------------------------
|
|
18
19
|
// URL normalization
|
|
@@ -95,6 +96,9 @@ export function clearGitRemoteCache(): void {
|
|
|
95
96
|
* subprocess calls — `git remote -v` only runs once per unique path.
|
|
96
97
|
*/
|
|
97
98
|
export function getGitRemote(path: string): string | null {
|
|
99
|
+
// In hosted mode, never run git subprocesses with client-controlled cwd.
|
|
100
|
+
if (isHostedMode()) return null;
|
|
101
|
+
|
|
98
102
|
const cached = gitRemoteCache.get(path);
|
|
99
103
|
if (cached !== undefined) return cached;
|
|
100
104
|
|
package/src/gradient.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import type { LoreMessage, LorePart, LoreMessageWithParts, LoreToolPart, LoreTextPart, LoreToolState, LoreToolStateCompleted } from "./types";
|
|
2
2
|
import { isTextPart, isReasoningPart, isToolPart } from "./types";
|
|
3
|
-
import { db, ensureProject, loadForceMinLayer, saveForceMinLayer } from "./db";
|
|
3
|
+
import { db, ensureProject, loadForceMinLayer, saveForceMinLayer, saveSessionTracking, loadSessionTracking } from "./db";
|
|
4
4
|
import { config } from "./config";
|
|
5
5
|
import { formatDistillations } from "./prompt";
|
|
6
6
|
import { normalize } from "./markdown";
|
|
@@ -98,9 +98,42 @@ export function updateBustRate(
|
|
|
98
98
|
cacheWrite: number,
|
|
99
99
|
cacheRead: number,
|
|
100
100
|
sessionID?: string,
|
|
101
|
+
lastLayer?: number,
|
|
101
102
|
): void {
|
|
102
103
|
if (!sessionID) return;
|
|
103
104
|
const state = getSessionState(sessionID);
|
|
105
|
+
|
|
106
|
+
// Layer 4 (emergency) is structurally a full cache write — feeding its
|
|
107
|
+
// bust stats into the EMA and cap adaptation creates a death spiral where
|
|
108
|
+
// the cap ratchets down to MIN_CONTEXT_FLOOR and prevents the session from
|
|
109
|
+
// ever fitting in layers 1-3 again. Skip EMA updates entirely.
|
|
110
|
+
// This check is BEFORE the total===0 guard so that the consecutiveLayer4
|
|
111
|
+
// counter is always updated regardless of whether usage was reported.
|
|
112
|
+
if (lastLayer === 4) {
|
|
113
|
+
state.consecutiveLayer4++;
|
|
114
|
+
|
|
115
|
+
// Recovery hatch: after 5+ consecutive Layer 4 turns, the shrunken cap
|
|
116
|
+
// may be what's trapping us. Relax it by 10% per turn to give layers
|
|
117
|
+
// 1-3 a chance to fit. From 130K floor: turns 5-9 → 143K→157K→173K→190K→209K.
|
|
118
|
+
if (
|
|
119
|
+
state.consecutiveLayer4 >= 5 &&
|
|
120
|
+
state.dynamicContextCap > 0 &&
|
|
121
|
+
maxContextTokensCeiling > 0
|
|
122
|
+
) {
|
|
123
|
+
state.dynamicContextCap = Math.min(
|
|
124
|
+
maxContextTokensCeiling,
|
|
125
|
+
Math.floor(state.dynamicContextCap * 1.10),
|
|
126
|
+
);
|
|
127
|
+
}
|
|
128
|
+
return;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// Non-Layer-4 turn: reset the consecutive counter (also before total===0
|
|
132
|
+
// guard — a zero-usage non-L4 turn must not leave a stale count).
|
|
133
|
+
if (lastLayer !== undefined) {
|
|
134
|
+
state.consecutiveLayer4 = 0;
|
|
135
|
+
}
|
|
136
|
+
|
|
104
137
|
const total = cacheWrite + cacheRead;
|
|
105
138
|
if (total === 0) return;
|
|
106
139
|
|
|
@@ -253,6 +286,10 @@ type SessionState = {
|
|
|
253
286
|
postIdleCompact: boolean;
|
|
254
287
|
/** Consecutive turns at layer >= 2. When >= 3, log a compaction hint. */
|
|
255
288
|
consecutiveHighLayer: number;
|
|
289
|
+
/** Consecutive Layer 4 turns — used to skip bust-rate EMA updates
|
|
290
|
+
* (Layer 4 busts are structural, not a caching signal) and to trigger
|
|
291
|
+
* a recovery hatch that relaxes dynamicContextCap after prolonged trapping. */
|
|
292
|
+
consecutiveLayer4: number;
|
|
256
293
|
|
|
257
294
|
// --- Cost-aware context cap dynamic state ---
|
|
258
295
|
|
|
@@ -298,6 +335,7 @@ function makeSessionState(): SessionState {
|
|
|
298
335
|
cameOutOfIdle: false,
|
|
299
336
|
postIdleCompact: false,
|
|
300
337
|
consecutiveHighLayer: 0,
|
|
338
|
+
consecutiveLayer4: 0,
|
|
301
339
|
|
|
302
340
|
bustRateEMA: -1,
|
|
303
341
|
interBustIntervalEMA: -1,
|
|
@@ -319,6 +357,27 @@ function getSessionState(sessionID: string): SessionState {
|
|
|
319
357
|
// forceMinLayer=2, but if OpenCode restarts before the next turn,
|
|
320
358
|
// the in-memory escalation would be lost without this.
|
|
321
359
|
state.forceMinLayer = loadForceMinLayer(sessionID) as SafetyLayer;
|
|
360
|
+
|
|
361
|
+
// Restore gradient calibration state from DB (v24) — avoids uncalibrated
|
|
362
|
+
// first turns after restart. Without this, dynamicContextCap reverts to
|
|
363
|
+
// the static ceiling, bustRateEMA is uninitialized, and lastTurnAt=0
|
|
364
|
+
// prevents onIdleResume() from detecting idle gaps.
|
|
365
|
+
//
|
|
366
|
+
// Atomic restore: lastTurnAt > 0 is the proxy for "gradient state was
|
|
367
|
+
// ever flushed to DB". Restore all fields together or none — avoids
|
|
368
|
+
// per-field sentinel fragility where a valid value (e.g. lastLayer=0)
|
|
369
|
+
// could be mistaken for "never persisted".
|
|
370
|
+
const persisted = loadSessionTracking(sessionID);
|
|
371
|
+
if (persisted && persisted.lastTurnAt > 0) {
|
|
372
|
+
state.dynamicContextCap = persisted.dynamicContextCap;
|
|
373
|
+
state.bustRateEMA = persisted.bustRateEMA;
|
|
374
|
+
state.interBustIntervalEMA = persisted.interBustIntervalEMA;
|
|
375
|
+
state.lastLayer = persisted.lastLayer as SafetyLayer;
|
|
376
|
+
state.lastKnownInput = persisted.lastKnownInput;
|
|
377
|
+
state.lastTurnAt = persisted.lastTurnAt;
|
|
378
|
+
state.lastBustAt = persisted.lastBustAt;
|
|
379
|
+
}
|
|
380
|
+
|
|
322
381
|
sessionStates.set(sessionID, state);
|
|
323
382
|
}
|
|
324
383
|
return state;
|
|
@@ -584,6 +643,9 @@ export function inspectSessionState(sessionID: string): {
|
|
|
584
643
|
postIdleCompact: boolean;
|
|
585
644
|
lastTurnAt: number;
|
|
586
645
|
distillationSnapshot: DistillationSnapshot | null;
|
|
646
|
+
bustRateEMA: number;
|
|
647
|
+
dynamicContextCap: number;
|
|
648
|
+
consecutiveLayer4: number;
|
|
587
649
|
} | null {
|
|
588
650
|
const state = sessionStates.get(sessionID);
|
|
589
651
|
if (!state) return null;
|
|
@@ -594,6 +656,9 @@ export function inspectSessionState(sessionID: string): {
|
|
|
594
656
|
postIdleCompact: state.postIdleCompact,
|
|
595
657
|
lastTurnAt: state.lastTurnAt,
|
|
596
658
|
distillationSnapshot: state.distillationSnapshot,
|
|
659
|
+
bustRateEMA: state.bustRateEMA,
|
|
660
|
+
dynamicContextCap: state.dynamicContextCap,
|
|
661
|
+
consecutiveLayer4: state.consecutiveLayer4,
|
|
597
662
|
};
|
|
598
663
|
}
|
|
599
664
|
|
|
@@ -606,6 +671,28 @@ export function setLastTurnAtForTest(sessionID: string, ms: number): void {
|
|
|
606
671
|
getSessionState(sessionID).lastTurnAt = ms;
|
|
607
672
|
}
|
|
608
673
|
|
|
674
|
+
/**
|
|
675
|
+
* Persist gradient calibration state to the session_state table.
|
|
676
|
+
*
|
|
677
|
+
* Designed to be called periodically (e.g. every 30s from the idle scheduler
|
|
678
|
+
* tick) rather than on every mutation, to avoid write amplification on the
|
|
679
|
+
* hot path. Max data loss on crash is one tick interval (~30s).
|
|
680
|
+
*/
|
|
681
|
+
export function saveGradientState(sessionID: string): void {
|
|
682
|
+
const state = sessionStates.get(sessionID);
|
|
683
|
+
if (!state) return;
|
|
684
|
+
|
|
685
|
+
saveSessionTracking(sessionID, {
|
|
686
|
+
dynamicContextCap: state.dynamicContextCap,
|
|
687
|
+
bustRateEMA: state.bustRateEMA,
|
|
688
|
+
interBustIntervalEMA: state.interBustIntervalEMA,
|
|
689
|
+
lastLayer: state.lastLayer,
|
|
690
|
+
lastKnownInput: state.lastKnownInput,
|
|
691
|
+
lastTurnAt: state.lastTurnAt,
|
|
692
|
+
lastBustAt: state.lastBustAt,
|
|
693
|
+
});
|
|
694
|
+
}
|
|
695
|
+
|
|
609
696
|
type Distillation = {
|
|
610
697
|
id: string;
|
|
611
698
|
observations: string;
|
|
@@ -1132,8 +1219,54 @@ function buildPrefixMessages(formatted: string): MessageWithParts[] {
|
|
|
1132
1219
|
];
|
|
1133
1220
|
}
|
|
1134
1221
|
|
|
1222
|
+
// --- Importance-aware distillation selection ---
|
|
1223
|
+
//
|
|
1224
|
+
// When a compression stage limits distillation count (distLimit < Infinity),
|
|
1225
|
+
// selects the most valuable distillations rather than blindly taking the last N.
|
|
1226
|
+
// Scoring: 70% recency (position in chronological order) + 30% content signal.
|
|
1227
|
+
// Results are re-sorted chronologically after selection so the prefix cache
|
|
1228
|
+
// (Approach C) remains byte-stable when the same distillations are selected.
|
|
1229
|
+
//
|
|
1230
|
+
// Content signals (lightweight keyword detection, no LLM call):
|
|
1231
|
+
// - Decisions: "decision"/"decided"/"chose" → +0.3
|
|
1232
|
+
// - Gotchas/bugs: "gotcha"/"bug"/"fix"/"error" → +0.2
|
|
1233
|
+
// - Architecture: "architecture"/"pattern" → +0.1
|
|
1234
|
+
// - Meta-distilled (gen >= 1): +0.2 (consolidation = higher value density)
|
|
1235
|
+
|
|
1236
|
+
const DECISION_RE = /\b(?:decision|decided|chose|chosen|agreed)\b/i;
|
|
1237
|
+
const GOTCHA_RE = /\b(?:gotcha|(?:critical|known|subtle)\s+bug|broken|crash(?:ed|es)?|regression)\b/i;
|
|
1238
|
+
const ARCH_RE = /\b(?:architecture|design.(?:decision|pattern)|system.design)\b/i;
|
|
1239
|
+
|
|
1240
|
+
function importanceBonus(d: Distillation): number {
|
|
1241
|
+
let bonus = 0;
|
|
1242
|
+
if (DECISION_RE.test(d.observations)) bonus += 0.3;
|
|
1243
|
+
if (GOTCHA_RE.test(d.observations)) bonus += 0.2;
|
|
1244
|
+
if (ARCH_RE.test(d.observations)) bonus += 0.1;
|
|
1245
|
+
if (d.generation >= 1) bonus += 0.2;
|
|
1246
|
+
return Math.min(bonus, 1.0);
|
|
1247
|
+
}
|
|
1248
|
+
|
|
1249
|
+
function selectDistillations(all: Distillation[], limit: number): Distillation[] {
|
|
1250
|
+
if (all.length <= limit) return all;
|
|
1251
|
+
|
|
1252
|
+
// Recency: normalize to [0, 0.7] where oldest = 0.0, newest = 0.7.
|
|
1253
|
+
// Use (length - 1) as divisor so the last entry gets full recency weight.
|
|
1254
|
+
const maxIdx = all.length - 1;
|
|
1255
|
+
const scored = all.map((d, i) => ({
|
|
1256
|
+
d,
|
|
1257
|
+
score: (maxIdx > 0 ? (i / maxIdx) : 1) * 0.7 + importanceBonus(d) * 0.3,
|
|
1258
|
+
}));
|
|
1259
|
+
|
|
1260
|
+
// Keep top N by score, then re-sort chronologically (cache-safe).
|
|
1261
|
+
return scored
|
|
1262
|
+
.sort((a, b) => b.score - a.score)
|
|
1263
|
+
.slice(0, limit)
|
|
1264
|
+
.map((s) => s.d)
|
|
1265
|
+
.sort((a, b) => a.created_at - b.created_at);
|
|
1266
|
+
}
|
|
1267
|
+
|
|
1135
1268
|
// Build a synthetic message pair containing the distilled history.
|
|
1136
|
-
// Non-cached path — used by layers 2
|
|
1269
|
+
// Non-cached path — used by layers 2+ which already cause full cache invalidation.
|
|
1137
1270
|
function distilledPrefix(distillations: Distillation[]): MessageWithParts[] {
|
|
1138
1271
|
if (!distillations.length) return [];
|
|
1139
1272
|
const formatted = formatDistillations(distillations);
|
|
@@ -1324,7 +1457,7 @@ function tryFitStable(input: {
|
|
|
1324
1457
|
rawBudget: number;
|
|
1325
1458
|
sessionID: string;
|
|
1326
1459
|
sessState: SessionState;
|
|
1327
|
-
}): Omit<TransformResult, "layer" | "usable" | "distilledBudget" | "rawBudget"> | null {
|
|
1460
|
+
}): Omit<TransformResult, "layer" | "usable" | "distilledBudget" | "rawBudget" | "refreshLtm"> | null {
|
|
1328
1461
|
// If the prefix already overflows its budget there's no point trying.
|
|
1329
1462
|
if (input.prefixTokens > input.distilledBudget && input.prefix.length > 0)
|
|
1330
1463
|
return null;
|
|
@@ -1425,6 +1558,25 @@ function tryFitStable(input: {
|
|
|
1425
1558
|
|
|
1426
1559
|
export type SafetyLayer = 0 | 1 | 2 | 3 | 4;
|
|
1427
1560
|
|
|
1561
|
+
// --- Compression stage table ---
|
|
1562
|
+
// Defines the escalation path for layers 1-3. Each stage tries increasingly
|
|
1563
|
+
// aggressive compression: tool stripping, tighter budgets, distillation trimming.
|
|
1564
|
+
// Adding a new intermediate stage = one table entry.
|
|
1565
|
+
type CompressionStage = {
|
|
1566
|
+
strip: "none" | "old-tools" | "all-tools";
|
|
1567
|
+
rawFrac: number | null; // fraction of usable; null = use default rawBudget
|
|
1568
|
+
distFrac: number | null; // fraction of usable; null = use default distilledBudget
|
|
1569
|
+
distLimit: number; // Infinity = all, 5 = last 5, etc.
|
|
1570
|
+
protectedTurns: number; // turns exempt from tool stripping
|
|
1571
|
+
useStableWindow: boolean; // use tryFitStable (Approach B pin cache)
|
|
1572
|
+
};
|
|
1573
|
+
|
|
1574
|
+
const COMPRESSION_STAGES: CompressionStage[] = [
|
|
1575
|
+
{ strip: "none", rawFrac: null, distFrac: null, distLimit: Infinity, protectedTurns: 0, useStableWindow: true },
|
|
1576
|
+
{ strip: "old-tools", rawFrac: 0.50, distFrac: null, distLimit: Infinity, protectedTurns: 2, useStableWindow: false },
|
|
1577
|
+
{ strip: "all-tools", rawFrac: 0.55, distFrac: 0.15, distLimit: 5, protectedTurns: 0, useStableWindow: false },
|
|
1578
|
+
];
|
|
1579
|
+
|
|
1428
1580
|
export type TransformResult = {
|
|
1429
1581
|
messages: MessageWithParts[];
|
|
1430
1582
|
layer: SafetyLayer;
|
|
@@ -1435,6 +1587,10 @@ export type TransformResult = {
|
|
|
1435
1587
|
usable: number;
|
|
1436
1588
|
distilledBudget: number;
|
|
1437
1589
|
rawBudget: number;
|
|
1590
|
+
// Signals that the pipeline should re-run forSession() to refresh LTM
|
|
1591
|
+
// relevance scoring. Set on Layer 4 (emergency) where the context is
|
|
1592
|
+
// fully reset and mid-session knowledge may have changed relevance.
|
|
1593
|
+
refreshLtm: boolean;
|
|
1438
1594
|
};
|
|
1439
1595
|
|
|
1440
1596
|
// Per-session urgent distillation tracking.
|
|
@@ -1530,7 +1686,10 @@ function transformInner(input: {
|
|
|
1530
1686
|
// Pinning to the *actual* last layer prevents all downward oscillation.
|
|
1531
1687
|
// Only applied when calibrated (same session, per-session state) to avoid
|
|
1532
1688
|
// affecting other sessions including worker sessions.
|
|
1533
|
-
|
|
1689
|
+
// Layer 4 (emergency) already blows the cache — stickiness there just traps
|
|
1690
|
+
// the session at emergency permanently. Only apply stickiness for layers 1-3
|
|
1691
|
+
// where dropping back would bust a warm cache.
|
|
1692
|
+
if (calibrated && sessState.lastLayer >= 1 && sessState.lastLayer <= 3 && input.messages.length >= sessState.lastKnownMessageCount) {
|
|
1534
1693
|
effectiveMinLayer = Math.max(effectiveMinLayer, sessState.lastLayer) as SafetyLayer;
|
|
1535
1694
|
}
|
|
1536
1695
|
|
|
@@ -1608,6 +1767,7 @@ function transformInner(input: {
|
|
|
1608
1767
|
usable,
|
|
1609
1768
|
distilledBudget,
|
|
1610
1769
|
rawBudget,
|
|
1770
|
+
refreshLtm: false,
|
|
1611
1771
|
};
|
|
1612
1772
|
}
|
|
1613
1773
|
|
|
@@ -1627,7 +1787,7 @@ function transformInner(input: {
|
|
|
1627
1787
|
|
|
1628
1788
|
// Layer 1 uses the append-only cached prefix (Approach C) to keep the
|
|
1629
1789
|
// distilled content byte-identical between distillation runs, preserving
|
|
1630
|
-
// the prompt cache. Layers 2
|
|
1790
|
+
// the prompt cache. Layers 2+ already cause full cache invalidation via
|
|
1631
1791
|
// tool stripping / message restructuring, so they use the non-cached path.
|
|
1632
1792
|
const cached = sid
|
|
1633
1793
|
? distilledPrefixCached(distillations, sid, sessState)
|
|
@@ -1636,79 +1796,71 @@ function transformInner(input: {
|
|
|
1636
1796
|
return { messages: msgs, tokens: msgs.reduce((sum, m) => sum + estimateMessage(m), 0) };
|
|
1637
1797
|
})();
|
|
1638
1798
|
|
|
1639
|
-
//
|
|
1640
|
-
//
|
|
1641
|
-
//
|
|
1642
|
-
//
|
|
1643
|
-
//
|
|
1644
|
-
|
|
1645
|
-
|
|
1646
|
-
|
|
1647
|
-
|
|
1648
|
-
|
|
1649
|
-
|
|
1650
|
-
|
|
1651
|
-
|
|
1652
|
-
|
|
1653
|
-
|
|
1654
|
-
|
|
1655
|
-
|
|
1656
|
-
|
|
1657
|
-
|
|
1658
|
-
|
|
1659
|
-
|
|
1660
|
-
|
|
1661
|
-
strip: "none",
|
|
1662
|
-
});
|
|
1663
|
-
if (fitsWithSafetyMargin(layer1)) {
|
|
1664
|
-
if (cached.tokens === 0 && sid) {
|
|
1665
|
-
urgentDistillationMap.set(sid, true);
|
|
1666
|
-
}
|
|
1667
|
-
return { ...layer1!, layer: 1, usable, distilledBudget, rawBudget };
|
|
1799
|
+
// --- Compression stages (layers 1-3) ---
|
|
1800
|
+
// Data-driven table replaces three hardcoded layer blocks. Each stage
|
|
1801
|
+
// escalates tool stripping and/or tightens distillation budgets.
|
|
1802
|
+
// Stage 0 (layer 1): stable window (Approach B), no stripping
|
|
1803
|
+
// Stage 1 (layer 2): strip old tool outputs, protect last 2 turns
|
|
1804
|
+
// Stage 2 (layer 3): strip ALL tool outputs, keep only 5 distillations
|
|
1805
|
+
for (let s = 0; s < COMPRESSION_STAGES.length; s++) {
|
|
1806
|
+
const stageLayer = (s + 1) as SafetyLayer;
|
|
1807
|
+
if (effectiveMinLayer > stageLayer) continue;
|
|
1808
|
+
|
|
1809
|
+
const stage = COMPRESSION_STAGES[s];
|
|
1810
|
+
const stageRawBudget = stage.rawFrac !== null ? Math.floor(usable * stage.rawFrac) : rawBudget;
|
|
1811
|
+
const stageDistBudget = stage.distFrac !== null ? Math.floor(usable * stage.distFrac) : distilledBudget;
|
|
1812
|
+
|
|
1813
|
+
// Determine prefix: if distLimit is finite, re-render with trimmed distillations.
|
|
1814
|
+
// Otherwise use the cached prefix (Approach C, byte-identical for cache).
|
|
1815
|
+
let stagePrefix = cached.messages;
|
|
1816
|
+
let stagePrefixTokens = cached.tokens;
|
|
1817
|
+
if (stage.distLimit !== Infinity && distillations.length > stage.distLimit) {
|
|
1818
|
+
const trimmed = selectDistillations(distillations, stage.distLimit);
|
|
1819
|
+
stagePrefix = distilledPrefix(trimmed);
|
|
1820
|
+
stagePrefixTokens = stagePrefix.reduce((sum, m) => sum + estimateMessage(m), 0);
|
|
1668
1821
|
}
|
|
1669
|
-
}
|
|
1670
1822
|
|
|
1671
|
-
|
|
1672
|
-
|
|
1673
|
-
|
|
1823
|
+
// Stage 0 (layer 1) uses tryFitStable for Approach B pin cache.
|
|
1824
|
+
// Higher stages reset the raw window cache and use plain tryFit.
|
|
1825
|
+
let result: Omit<TransformResult, "layer" | "usable" | "distilledBudget" | "rawBudget" | "refreshLtm"> | null;
|
|
1826
|
+
if (stage.useStableWindow && sid) {
|
|
1827
|
+
result = tryFitStable({
|
|
1828
|
+
messages: dedupMessages,
|
|
1829
|
+
prefix: stagePrefix,
|
|
1830
|
+
prefixTokens: stagePrefixTokens,
|
|
1831
|
+
distilledBudget: stageDistBudget,
|
|
1832
|
+
rawBudget: stageRawBudget,
|
|
1833
|
+
sessionID: sid,
|
|
1834
|
+
sessState,
|
|
1835
|
+
});
|
|
1836
|
+
} else {
|
|
1837
|
+
// Reset raw window cache when leaving stage 0 — higher stages use full
|
|
1838
|
+
// scans and already break the prompt cache. Must fire even when stage 1
|
|
1839
|
+
// is skipped via effectiveMinLayer (e.g. forceMinLayer = 3).
|
|
1840
|
+
sessState.rawWindowCache = null;
|
|
1841
|
+
result = tryFit({
|
|
1842
|
+
messages: dedupMessages,
|
|
1843
|
+
prefix: stagePrefix,
|
|
1844
|
+
prefixTokens: stagePrefixTokens,
|
|
1845
|
+
distilledBudget: stageDistBudget,
|
|
1846
|
+
rawBudget: stageRawBudget,
|
|
1847
|
+
strip: stage.strip,
|
|
1848
|
+
protectedTurns: stage.protectedTurns,
|
|
1849
|
+
});
|
|
1850
|
+
}
|
|
1674
1851
|
|
|
1675
|
-
|
|
1676
|
-
|
|
1677
|
-
|
|
1678
|
-
|
|
1679
|
-
|
|
1680
|
-
|
|
1681
|
-
|
|
1682
|
-
distilledBudget,
|
|
1683
|
-
rawBudget: Math.floor(usable * 0.5), // give raw more room
|
|
1684
|
-
strip: "old-tools",
|
|
1685
|
-
protectedTurns: 2,
|
|
1686
|
-
});
|
|
1687
|
-
if (fitsWithSafetyMargin(layer2)) {
|
|
1688
|
-
if (sid) urgentDistillationMap.set(sid, true);
|
|
1689
|
-
return { ...layer2!, layer: 2, usable, distilledBudget, rawBudget };
|
|
1852
|
+
if (fitsWithSafetyMargin(result)) {
|
|
1853
|
+
// Trigger urgent distillation when: (a) higher stages always need it, or
|
|
1854
|
+
// (b) stage 0 with no distillations = first time in gradient mode.
|
|
1855
|
+
if (sid && (s > 0 || cached.tokens === 0)) {
|
|
1856
|
+
urgentDistillationMap.set(sid, true);
|
|
1857
|
+
}
|
|
1858
|
+
return { ...result!, layer: stageLayer, usable, distilledBudget, rawBudget, refreshLtm: false };
|
|
1690
1859
|
}
|
|
1691
1860
|
}
|
|
1692
1861
|
|
|
1693
|
-
//
|
|
1694
|
-
|
|
1695
|
-
const trimmedPrefix = distilledPrefix(trimmedDistillations);
|
|
1696
|
-
const trimmedPrefixTokens = trimmedPrefix.reduce(
|
|
1697
|
-
(sum, m) => sum + estimateMessage(m),
|
|
1698
|
-
0,
|
|
1699
|
-
);
|
|
1700
|
-
const layer3 = tryFit({
|
|
1701
|
-
messages: dedupMessages,
|
|
1702
|
-
prefix: trimmedPrefix,
|
|
1703
|
-
prefixTokens: trimmedPrefixTokens,
|
|
1704
|
-
distilledBudget: Math.floor(usable * 0.15),
|
|
1705
|
-
rawBudget: Math.floor(usable * 0.55),
|
|
1706
|
-
strip: "all-tools",
|
|
1707
|
-
});
|
|
1708
|
-
if (fitsWithSafetyMargin(layer3)) {
|
|
1709
|
-
if (sid) urgentDistillationMap.set(sid, true);
|
|
1710
|
-
return { ...layer3!, layer: 3, usable, distilledBudget, rawBudget };
|
|
1711
|
-
}
|
|
1862
|
+
// All compression stages exhausted — reset raw window cache before emergency.
|
|
1863
|
+
sessState.rawWindowCache = null;
|
|
1712
1864
|
|
|
1713
1865
|
// Layer 4: Emergency — last 2 distillations + token-budget raw tail.
|
|
1714
1866
|
// We do NOT strip tool parts here: doing so would cause an infinite tool-call loop because
|
|
@@ -1724,7 +1876,7 @@ function transformInner(input: {
|
|
|
1724
1876
|
// and must always return. Remaining budget is filled backward with older
|
|
1725
1877
|
// messages.
|
|
1726
1878
|
if (sid) urgentDistillationMap.set(sid, true);
|
|
1727
|
-
const nuclearDistillations = distillations
|
|
1879
|
+
const nuclearDistillations = selectDistillations(distillations, 2);
|
|
1728
1880
|
const nuclearPrefix = distilledPrefix(nuclearDistillations);
|
|
1729
1881
|
const nuclearPrefixTokens = nuclearPrefix.reduce(
|
|
1730
1882
|
(sum, m) => sum + estimateMessage(m),
|
|
@@ -1773,6 +1925,7 @@ function transformInner(input: {
|
|
|
1773
1925
|
usable,
|
|
1774
1926
|
distilledBudget,
|
|
1775
1927
|
rawBudget,
|
|
1928
|
+
refreshLtm: true,
|
|
1776
1929
|
};
|
|
1777
1930
|
}
|
|
1778
1931
|
|
|
@@ -1893,7 +2046,7 @@ function tryFit(input: {
|
|
|
1893
2046
|
rawBudget: number;
|
|
1894
2047
|
strip: "none" | "old-tools" | "all-tools";
|
|
1895
2048
|
protectedTurns?: number;
|
|
1896
|
-
}): Omit<TransformResult, "layer" | "usable" | "distilledBudget" | "rawBudget"> | null {
|
|
2049
|
+
}): Omit<TransformResult, "layer" | "usable" | "distilledBudget" | "rawBudget" | "refreshLtm"> | null {
|
|
1897
2050
|
// If distilled prefix exceeds its budget, fail this layer
|
|
1898
2051
|
if (input.prefixTokens > input.distilledBudget && input.prefix.length > 0)
|
|
1899
2052
|
return null;
|