@loreai/core 0.18.0 → 0.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bun/curator.d.ts.map +1 -1
- package/dist/bun/db.d.ts +73 -0
- package/dist/bun/db.d.ts.map +1 -1
- package/dist/bun/distillation.d.ts +2 -13
- package/dist/bun/distillation.d.ts.map +1 -1
- package/dist/bun/embedding.d.ts +5 -1
- package/dist/bun/embedding.d.ts.map +1 -1
- package/dist/bun/gradient.d.ts +9 -0
- package/dist/bun/gradient.d.ts.map +1 -1
- package/dist/bun/index.d.ts +2 -2
- package/dist/bun/index.d.ts.map +1 -1
- package/dist/bun/index.js +817 -99
- package/dist/bun/index.js.map +4 -4
- package/dist/bun/ltm.d.ts +99 -5
- package/dist/bun/ltm.d.ts.map +1 -1
- package/dist/bun/session-limiter.d.ts +26 -0
- package/dist/bun/session-limiter.d.ts.map +1 -0
- package/dist/bun/temporal.d.ts +2 -0
- package/dist/bun/temporal.d.ts.map +1 -1
- package/dist/node/curator.d.ts.map +1 -1
- package/dist/node/db.d.ts +73 -0
- package/dist/node/db.d.ts.map +1 -1
- package/dist/node/distillation.d.ts +2 -13
- package/dist/node/distillation.d.ts.map +1 -1
- package/dist/node/embedding.d.ts +5 -1
- package/dist/node/embedding.d.ts.map +1 -1
- package/dist/node/gradient.d.ts +9 -0
- package/dist/node/gradient.d.ts.map +1 -1
- package/dist/node/index.d.ts +2 -2
- package/dist/node/index.d.ts.map +1 -1
- package/dist/node/index.js +817 -99
- package/dist/node/index.js.map +4 -4
- package/dist/node/ltm.d.ts +99 -5
- package/dist/node/ltm.d.ts.map +1 -1
- package/dist/node/session-limiter.d.ts +26 -0
- package/dist/node/session-limiter.d.ts.map +1 -0
- package/dist/node/temporal.d.ts +2 -0
- package/dist/node/temporal.d.ts.map +1 -1
- package/dist/types/curator.d.ts.map +1 -1
- package/dist/types/db.d.ts +73 -0
- package/dist/types/db.d.ts.map +1 -1
- package/dist/types/distillation.d.ts +2 -13
- package/dist/types/distillation.d.ts.map +1 -1
- package/dist/types/embedding.d.ts +5 -1
- package/dist/types/embedding.d.ts.map +1 -1
- package/dist/types/gradient.d.ts +9 -0
- package/dist/types/gradient.d.ts.map +1 -1
- package/dist/types/index.d.ts +2 -2
- package/dist/types/index.d.ts.map +1 -1
- package/dist/types/ltm.d.ts +99 -5
- package/dist/types/ltm.d.ts.map +1 -1
- package/dist/types/session-limiter.d.ts +26 -0
- package/dist/types/session-limiter.d.ts.map +1 -0
- package/dist/types/temporal.d.ts +2 -0
- package/dist/types/temporal.d.ts.map +1 -1
- package/package.json +2 -1
- package/src/curator.ts +54 -2
- package/src/db.ts +347 -0
- package/src/distillation.ts +55 -14
- package/src/embedding.ts +28 -3
- package/src/gradient.ts +183 -74
- package/src/index.ts +8 -0
- package/src/ltm.ts +480 -45
- package/src/session-limiter.ts +47 -0
- package/src/temporal.ts +10 -0
package/src/embedding.ts
CHANGED
|
@@ -272,7 +272,21 @@ class LocalProvider implements EmbeddingProvider {
|
|
|
272
272
|
workerUrl = vendorWorkerUrl;
|
|
273
273
|
}
|
|
274
274
|
} else {
|
|
275
|
-
|
|
275
|
+
// In CJS bundles (gateway npm package), esbuild shims import.meta as
|
|
276
|
+
// an empty object {}, so import.meta.url is undefined. Fall back to
|
|
277
|
+
// __filename which esbuild defines in CJS output.
|
|
278
|
+
const selfUrl = typeof import.meta.url === "string" ? import.meta.url : undefined;
|
|
279
|
+
if (selfUrl) {
|
|
280
|
+
workerUrl = new URL(
|
|
281
|
+
`./embedding-worker${selfUrl.endsWith(".ts") ? ".ts" : ".js"}`,
|
|
282
|
+
selfUrl,
|
|
283
|
+
);
|
|
284
|
+
} else {
|
|
285
|
+
// CJS fallback: __filename is defined by esbuild's CJS output.
|
|
286
|
+
// The embedding-worker.cjs is built alongside the main bundle.
|
|
287
|
+
const { pathToFileURL } = await import("node:url");
|
|
288
|
+
workerUrl = new URL("./embedding-worker.cjs", pathToFileURL(__filename));
|
|
289
|
+
}
|
|
276
290
|
}
|
|
277
291
|
|
|
278
292
|
const vendor = vendorModelInfo();
|
|
@@ -706,14 +720,25 @@ type VectorHit = { id: string; similarity: number };
|
|
|
706
720
|
* Search all knowledge entries with embeddings by cosine similarity.
|
|
707
721
|
* Returns top-k entries sorted by similarity descending.
|
|
708
722
|
* Pure brute-force — fine for <100 entries (microseconds).
|
|
723
|
+
*
|
|
724
|
+
* @param excludeCategories Optional category names to exclude from results.
|
|
725
|
+
* Useful when preferences are injected in a separate system block and
|
|
726
|
+
* shouldn't compete for vector search slots with context-bound entries.
|
|
709
727
|
*/
|
|
710
728
|
export function vectorSearch(
|
|
711
729
|
queryEmbedding: Float32Array,
|
|
712
730
|
limit = 10,
|
|
731
|
+
excludeCategories?: string[],
|
|
713
732
|
): VectorHit[] {
|
|
733
|
+
let sql = "SELECT id, embedding FROM knowledge WHERE embedding IS NOT NULL AND confidence > 0.2";
|
|
734
|
+
const params: string[] = [];
|
|
735
|
+
if (excludeCategories?.length) {
|
|
736
|
+
sql += ` AND category NOT IN (${excludeCategories.map(() => "?").join(",")})`;
|
|
737
|
+
params.push(...excludeCategories);
|
|
738
|
+
}
|
|
714
739
|
const rows = db()
|
|
715
|
-
.query(
|
|
716
|
-
.all() as Array<{ id: string; embedding: Buffer }>;
|
|
740
|
+
.query(sql)
|
|
741
|
+
.all(...params) as Array<{ id: string; embedding: Buffer }>;
|
|
717
742
|
|
|
718
743
|
const scored: VectorHit[] = [];
|
|
719
744
|
for (const row of rows) {
|
package/src/gradient.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import type { LoreMessage, LorePart, LoreMessageWithParts, LoreToolPart, LoreTextPart, LoreToolState, LoreToolStateCompleted } from "./types";
|
|
2
2
|
import { isTextPart, isReasoningPart, isToolPart } from "./types";
|
|
3
|
-
import { db, ensureProject, loadForceMinLayer, saveForceMinLayer } from "./db";
|
|
3
|
+
import { db, ensureProject, loadForceMinLayer, saveForceMinLayer, saveSessionTracking, loadSessionTracking } from "./db";
|
|
4
4
|
import { config } from "./config";
|
|
5
5
|
import { formatDistillations } from "./prompt";
|
|
6
6
|
import { normalize } from "./markdown";
|
|
@@ -319,6 +319,27 @@ function getSessionState(sessionID: string): SessionState {
|
|
|
319
319
|
// forceMinLayer=2, but if OpenCode restarts before the next turn,
|
|
320
320
|
// the in-memory escalation would be lost without this.
|
|
321
321
|
state.forceMinLayer = loadForceMinLayer(sessionID) as SafetyLayer;
|
|
322
|
+
|
|
323
|
+
// Restore gradient calibration state from DB (v24) — avoids uncalibrated
|
|
324
|
+
// first turns after restart. Without this, dynamicContextCap reverts to
|
|
325
|
+
// the static ceiling, bustRateEMA is uninitialized, and lastTurnAt=0
|
|
326
|
+
// prevents onIdleResume() from detecting idle gaps.
|
|
327
|
+
//
|
|
328
|
+
// Atomic restore: lastTurnAt > 0 is the proxy for "gradient state was
|
|
329
|
+
// ever flushed to DB". Restore all fields together or none — avoids
|
|
330
|
+
// per-field sentinel fragility where a valid value (e.g. lastLayer=0)
|
|
331
|
+
// could be mistaken for "never persisted".
|
|
332
|
+
const persisted = loadSessionTracking(sessionID);
|
|
333
|
+
if (persisted && persisted.lastTurnAt > 0) {
|
|
334
|
+
state.dynamicContextCap = persisted.dynamicContextCap;
|
|
335
|
+
state.bustRateEMA = persisted.bustRateEMA;
|
|
336
|
+
state.interBustIntervalEMA = persisted.interBustIntervalEMA;
|
|
337
|
+
state.lastLayer = persisted.lastLayer as SafetyLayer;
|
|
338
|
+
state.lastKnownInput = persisted.lastKnownInput;
|
|
339
|
+
state.lastTurnAt = persisted.lastTurnAt;
|
|
340
|
+
state.lastBustAt = persisted.lastBustAt;
|
|
341
|
+
}
|
|
342
|
+
|
|
322
343
|
sessionStates.set(sessionID, state);
|
|
323
344
|
}
|
|
324
345
|
return state;
|
|
@@ -606,6 +627,28 @@ export function setLastTurnAtForTest(sessionID: string, ms: number): void {
|
|
|
606
627
|
getSessionState(sessionID).lastTurnAt = ms;
|
|
607
628
|
}
|
|
608
629
|
|
|
630
|
+
/**
|
|
631
|
+
* Persist gradient calibration state to the session_state table.
|
|
632
|
+
*
|
|
633
|
+
* Designed to be called periodically (e.g. every 30s from the idle scheduler
|
|
634
|
+
* tick) rather than on every mutation, to avoid write amplification on the
|
|
635
|
+
* hot path. Max data loss on crash is one tick interval (~30s).
|
|
636
|
+
*/
|
|
637
|
+
export function saveGradientState(sessionID: string): void {
|
|
638
|
+
const state = sessionStates.get(sessionID);
|
|
639
|
+
if (!state) return;
|
|
640
|
+
|
|
641
|
+
saveSessionTracking(sessionID, {
|
|
642
|
+
dynamicContextCap: state.dynamicContextCap,
|
|
643
|
+
bustRateEMA: state.bustRateEMA,
|
|
644
|
+
interBustIntervalEMA: state.interBustIntervalEMA,
|
|
645
|
+
lastLayer: state.lastLayer,
|
|
646
|
+
lastKnownInput: state.lastKnownInput,
|
|
647
|
+
lastTurnAt: state.lastTurnAt,
|
|
648
|
+
lastBustAt: state.lastBustAt,
|
|
649
|
+
});
|
|
650
|
+
}
|
|
651
|
+
|
|
609
652
|
type Distillation = {
|
|
610
653
|
id: string;
|
|
611
654
|
observations: string;
|
|
@@ -1132,8 +1175,54 @@ function buildPrefixMessages(formatted: string): MessageWithParts[] {
|
|
|
1132
1175
|
];
|
|
1133
1176
|
}
|
|
1134
1177
|
|
|
1178
|
+
// --- Importance-aware distillation selection ---
|
|
1179
|
+
//
|
|
1180
|
+
// When a compression stage limits distillation count (distLimit < Infinity),
|
|
1181
|
+
// selects the most valuable distillations rather than blindly taking the last N.
|
|
1182
|
+
// Scoring: 70% recency (position in chronological order) + 30% content signal.
|
|
1183
|
+
// Results are re-sorted chronologically after selection so the prefix cache
|
|
1184
|
+
// (Approach C) remains byte-stable when the same distillations are selected.
|
|
1185
|
+
//
|
|
1186
|
+
// Content signals (lightweight keyword detection, no LLM call):
|
|
1187
|
+
// - Decisions: "decision"/"decided"/"chose" → +0.3
|
|
1188
|
+
// - Gotchas/bugs: "gotcha"/"bug"/"fix"/"error" → +0.2
|
|
1189
|
+
// - Architecture: "architecture"/"pattern" → +0.1
|
|
1190
|
+
// - Meta-distilled (gen >= 1): +0.2 (consolidation = higher value density)
|
|
1191
|
+
|
|
1192
|
+
const DECISION_RE = /\b(?:decision|decided|chose|chosen|agreed)\b/i;
|
|
1193
|
+
const GOTCHA_RE = /\b(?:gotcha|(?:critical|known|subtle)\s+bug|broken|crash(?:ed|es)?|regression)\b/i;
|
|
1194
|
+
const ARCH_RE = /\b(?:architecture|design.(?:decision|pattern)|system.design)\b/i;
|
|
1195
|
+
|
|
1196
|
+
function importanceBonus(d: Distillation): number {
|
|
1197
|
+
let bonus = 0;
|
|
1198
|
+
if (DECISION_RE.test(d.observations)) bonus += 0.3;
|
|
1199
|
+
if (GOTCHA_RE.test(d.observations)) bonus += 0.2;
|
|
1200
|
+
if (ARCH_RE.test(d.observations)) bonus += 0.1;
|
|
1201
|
+
if (d.generation >= 1) bonus += 0.2;
|
|
1202
|
+
return Math.min(bonus, 1.0);
|
|
1203
|
+
}
|
|
1204
|
+
|
|
1205
|
+
function selectDistillations(all: Distillation[], limit: number): Distillation[] {
|
|
1206
|
+
if (all.length <= limit) return all;
|
|
1207
|
+
|
|
1208
|
+
// Recency: normalize to [0, 0.7] where oldest = 0.0, newest = 0.7.
|
|
1209
|
+
// Use (length - 1) as divisor so the last entry gets full recency weight.
|
|
1210
|
+
const maxIdx = all.length - 1;
|
|
1211
|
+
const scored = all.map((d, i) => ({
|
|
1212
|
+
d,
|
|
1213
|
+
score: (maxIdx > 0 ? (i / maxIdx) : 1) * 0.7 + importanceBonus(d) * 0.3,
|
|
1214
|
+
}));
|
|
1215
|
+
|
|
1216
|
+
// Keep top N by score, then re-sort chronologically (cache-safe).
|
|
1217
|
+
return scored
|
|
1218
|
+
.sort((a, b) => b.score - a.score)
|
|
1219
|
+
.slice(0, limit)
|
|
1220
|
+
.map((s) => s.d)
|
|
1221
|
+
.sort((a, b) => a.created_at - b.created_at);
|
|
1222
|
+
}
|
|
1223
|
+
|
|
1135
1224
|
// Build a synthetic message pair containing the distilled history.
|
|
1136
|
-
// Non-cached path — used by layers 2
|
|
1225
|
+
// Non-cached path — used by layers 2+ which already cause full cache invalidation.
|
|
1137
1226
|
function distilledPrefix(distillations: Distillation[]): MessageWithParts[] {
|
|
1138
1227
|
if (!distillations.length) return [];
|
|
1139
1228
|
const formatted = formatDistillations(distillations);
|
|
@@ -1324,7 +1413,7 @@ function tryFitStable(input: {
|
|
|
1324
1413
|
rawBudget: number;
|
|
1325
1414
|
sessionID: string;
|
|
1326
1415
|
sessState: SessionState;
|
|
1327
|
-
}): Omit<TransformResult, "layer" | "usable" | "distilledBudget" | "rawBudget"> | null {
|
|
1416
|
+
}): Omit<TransformResult, "layer" | "usable" | "distilledBudget" | "rawBudget" | "refreshLtm"> | null {
|
|
1328
1417
|
// If the prefix already overflows its budget there's no point trying.
|
|
1329
1418
|
if (input.prefixTokens > input.distilledBudget && input.prefix.length > 0)
|
|
1330
1419
|
return null;
|
|
@@ -1425,6 +1514,25 @@ function tryFitStable(input: {
|
|
|
1425
1514
|
|
|
1426
1515
|
export type SafetyLayer = 0 | 1 | 2 | 3 | 4;
|
|
1427
1516
|
|
|
1517
|
+
// --- Compression stage table ---
|
|
1518
|
+
// Defines the escalation path for layers 1-3. Each stage tries increasingly
|
|
1519
|
+
// aggressive compression: tool stripping, tighter budgets, distillation trimming.
|
|
1520
|
+
// Adding a new intermediate stage = one table entry.
|
|
1521
|
+
type CompressionStage = {
|
|
1522
|
+
strip: "none" | "old-tools" | "all-tools";
|
|
1523
|
+
rawFrac: number | null; // fraction of usable; null = use default rawBudget
|
|
1524
|
+
distFrac: number | null; // fraction of usable; null = use default distilledBudget
|
|
1525
|
+
distLimit: number; // Infinity = all, 5 = last 5, etc.
|
|
1526
|
+
protectedTurns: number; // turns exempt from tool stripping
|
|
1527
|
+
useStableWindow: boolean; // use tryFitStable (Approach B pin cache)
|
|
1528
|
+
};
|
|
1529
|
+
|
|
1530
|
+
const COMPRESSION_STAGES: CompressionStage[] = [
|
|
1531
|
+
{ strip: "none", rawFrac: null, distFrac: null, distLimit: Infinity, protectedTurns: 0, useStableWindow: true },
|
|
1532
|
+
{ strip: "old-tools", rawFrac: 0.50, distFrac: null, distLimit: Infinity, protectedTurns: 2, useStableWindow: false },
|
|
1533
|
+
{ strip: "all-tools", rawFrac: 0.55, distFrac: 0.15, distLimit: 5, protectedTurns: 0, useStableWindow: false },
|
|
1534
|
+
];
|
|
1535
|
+
|
|
1428
1536
|
export type TransformResult = {
|
|
1429
1537
|
messages: MessageWithParts[];
|
|
1430
1538
|
layer: SafetyLayer;
|
|
@@ -1435,6 +1543,10 @@ export type TransformResult = {
|
|
|
1435
1543
|
usable: number;
|
|
1436
1544
|
distilledBudget: number;
|
|
1437
1545
|
rawBudget: number;
|
|
1546
|
+
// Signals that the pipeline should re-run forSession() to refresh LTM
|
|
1547
|
+
// relevance scoring. Set on Layer 4 (emergency) where the context is
|
|
1548
|
+
// fully reset and mid-session knowledge may have changed relevance.
|
|
1549
|
+
refreshLtm: boolean;
|
|
1438
1550
|
};
|
|
1439
1551
|
|
|
1440
1552
|
// Per-session urgent distillation tracking.
|
|
@@ -1530,7 +1642,10 @@ function transformInner(input: {
|
|
|
1530
1642
|
// Pinning to the *actual* last layer prevents all downward oscillation.
|
|
1531
1643
|
// Only applied when calibrated (same session, per-session state) to avoid
|
|
1532
1644
|
// affecting other sessions including worker sessions.
|
|
1533
|
-
|
|
1645
|
+
// Layer 4 (emergency) already blows the cache — stickiness there just traps
|
|
1646
|
+
// the session at emergency permanently. Only apply stickiness for layers 1-3
|
|
1647
|
+
// where dropping back would bust a warm cache.
|
|
1648
|
+
if (calibrated && sessState.lastLayer >= 1 && sessState.lastLayer <= 3 && input.messages.length >= sessState.lastKnownMessageCount) {
|
|
1534
1649
|
effectiveMinLayer = Math.max(effectiveMinLayer, sessState.lastLayer) as SafetyLayer;
|
|
1535
1650
|
}
|
|
1536
1651
|
|
|
@@ -1608,6 +1723,7 @@ function transformInner(input: {
|
|
|
1608
1723
|
usable,
|
|
1609
1724
|
distilledBudget,
|
|
1610
1725
|
rawBudget,
|
|
1726
|
+
refreshLtm: false,
|
|
1611
1727
|
};
|
|
1612
1728
|
}
|
|
1613
1729
|
|
|
@@ -1627,7 +1743,7 @@ function transformInner(input: {
|
|
|
1627
1743
|
|
|
1628
1744
|
// Layer 1 uses the append-only cached prefix (Approach C) to keep the
|
|
1629
1745
|
// distilled content byte-identical between distillation runs, preserving
|
|
1630
|
-
// the prompt cache. Layers 2
|
|
1746
|
+
// the prompt cache. Layers 2+ already cause full cache invalidation via
|
|
1631
1747
|
// tool stripping / message restructuring, so they use the non-cached path.
|
|
1632
1748
|
const cached = sid
|
|
1633
1749
|
? distilledPrefixCached(distillations, sid, sessState)
|
|
@@ -1636,79 +1752,71 @@ function transformInner(input: {
|
|
|
1636
1752
|
return { messages: msgs, tokens: msgs.reduce((sum, m) => sum + estimateMessage(m), 0) };
|
|
1637
1753
|
})();
|
|
1638
1754
|
|
|
1639
|
-
//
|
|
1640
|
-
//
|
|
1641
|
-
//
|
|
1642
|
-
//
|
|
1643
|
-
//
|
|
1644
|
-
|
|
1645
|
-
|
|
1646
|
-
|
|
1647
|
-
|
|
1648
|
-
|
|
1649
|
-
|
|
1650
|
-
|
|
1651
|
-
|
|
1652
|
-
|
|
1653
|
-
|
|
1654
|
-
|
|
1655
|
-
|
|
1656
|
-
|
|
1657
|
-
|
|
1658
|
-
|
|
1659
|
-
|
|
1660
|
-
|
|
1661
|
-
strip: "none",
|
|
1662
|
-
});
|
|
1663
|
-
if (fitsWithSafetyMargin(layer1)) {
|
|
1664
|
-
if (cached.tokens === 0 && sid) {
|
|
1665
|
-
urgentDistillationMap.set(sid, true);
|
|
1666
|
-
}
|
|
1667
|
-
return { ...layer1!, layer: 1, usable, distilledBudget, rawBudget };
|
|
1755
|
+
// --- Compression stages (layers 1-3) ---
|
|
1756
|
+
// Data-driven table replaces three hardcoded layer blocks. Each stage
|
|
1757
|
+
// escalates tool stripping and/or tightens distillation budgets.
|
|
1758
|
+
// Stage 0 (layer 1): stable window (Approach B), no stripping
|
|
1759
|
+
// Stage 1 (layer 2): strip old tool outputs, protect last 2 turns
|
|
1760
|
+
// Stage 2 (layer 3): strip ALL tool outputs, keep only 5 distillations
|
|
1761
|
+
for (let s = 0; s < COMPRESSION_STAGES.length; s++) {
|
|
1762
|
+
const stageLayer = (s + 1) as SafetyLayer;
|
|
1763
|
+
if (effectiveMinLayer > stageLayer) continue;
|
|
1764
|
+
|
|
1765
|
+
const stage = COMPRESSION_STAGES[s];
|
|
1766
|
+
const stageRawBudget = stage.rawFrac !== null ? Math.floor(usable * stage.rawFrac) : rawBudget;
|
|
1767
|
+
const stageDistBudget = stage.distFrac !== null ? Math.floor(usable * stage.distFrac) : distilledBudget;
|
|
1768
|
+
|
|
1769
|
+
// Determine prefix: if distLimit is finite, re-render with trimmed distillations.
|
|
1770
|
+
// Otherwise use the cached prefix (Approach C, byte-identical for cache).
|
|
1771
|
+
let stagePrefix = cached.messages;
|
|
1772
|
+
let stagePrefixTokens = cached.tokens;
|
|
1773
|
+
if (stage.distLimit !== Infinity && distillations.length > stage.distLimit) {
|
|
1774
|
+
const trimmed = selectDistillations(distillations, stage.distLimit);
|
|
1775
|
+
stagePrefix = distilledPrefix(trimmed);
|
|
1776
|
+
stagePrefixTokens = stagePrefix.reduce((sum, m) => sum + estimateMessage(m), 0);
|
|
1668
1777
|
}
|
|
1669
|
-
}
|
|
1670
1778
|
|
|
1671
|
-
|
|
1672
|
-
|
|
1673
|
-
|
|
1779
|
+
// Stage 0 (layer 1) uses tryFitStable for Approach B pin cache.
|
|
1780
|
+
// Higher stages reset the raw window cache and use plain tryFit.
|
|
1781
|
+
let result: Omit<TransformResult, "layer" | "usable" | "distilledBudget" | "rawBudget" | "refreshLtm"> | null;
|
|
1782
|
+
if (stage.useStableWindow && sid) {
|
|
1783
|
+
result = tryFitStable({
|
|
1784
|
+
messages: dedupMessages,
|
|
1785
|
+
prefix: stagePrefix,
|
|
1786
|
+
prefixTokens: stagePrefixTokens,
|
|
1787
|
+
distilledBudget: stageDistBudget,
|
|
1788
|
+
rawBudget: stageRawBudget,
|
|
1789
|
+
sessionID: sid,
|
|
1790
|
+
sessState,
|
|
1791
|
+
});
|
|
1792
|
+
} else {
|
|
1793
|
+
// Reset raw window cache when leaving stage 0 — higher stages use full
|
|
1794
|
+
// scans and already break the prompt cache. Must fire even when stage 1
|
|
1795
|
+
// is skipped via effectiveMinLayer (e.g. forceMinLayer = 3).
|
|
1796
|
+
sessState.rawWindowCache = null;
|
|
1797
|
+
result = tryFit({
|
|
1798
|
+
messages: dedupMessages,
|
|
1799
|
+
prefix: stagePrefix,
|
|
1800
|
+
prefixTokens: stagePrefixTokens,
|
|
1801
|
+
distilledBudget: stageDistBudget,
|
|
1802
|
+
rawBudget: stageRawBudget,
|
|
1803
|
+
strip: stage.strip,
|
|
1804
|
+
protectedTurns: stage.protectedTurns,
|
|
1805
|
+
});
|
|
1806
|
+
}
|
|
1674
1807
|
|
|
1675
|
-
|
|
1676
|
-
|
|
1677
|
-
|
|
1678
|
-
|
|
1679
|
-
|
|
1680
|
-
|
|
1681
|
-
|
|
1682
|
-
distilledBudget,
|
|
1683
|
-
rawBudget: Math.floor(usable * 0.5), // give raw more room
|
|
1684
|
-
strip: "old-tools",
|
|
1685
|
-
protectedTurns: 2,
|
|
1686
|
-
});
|
|
1687
|
-
if (fitsWithSafetyMargin(layer2)) {
|
|
1688
|
-
if (sid) urgentDistillationMap.set(sid, true);
|
|
1689
|
-
return { ...layer2!, layer: 2, usable, distilledBudget, rawBudget };
|
|
1808
|
+
if (fitsWithSafetyMargin(result)) {
|
|
1809
|
+
// Trigger urgent distillation when: (a) higher stages always need it, or
|
|
1810
|
+
// (b) stage 0 with no distillations = first time in gradient mode.
|
|
1811
|
+
if (sid && (s > 0 || cached.tokens === 0)) {
|
|
1812
|
+
urgentDistillationMap.set(sid, true);
|
|
1813
|
+
}
|
|
1814
|
+
return { ...result!, layer: stageLayer, usable, distilledBudget, rawBudget, refreshLtm: false };
|
|
1690
1815
|
}
|
|
1691
1816
|
}
|
|
1692
1817
|
|
|
1693
|
-
//
|
|
1694
|
-
|
|
1695
|
-
const trimmedPrefix = distilledPrefix(trimmedDistillations);
|
|
1696
|
-
const trimmedPrefixTokens = trimmedPrefix.reduce(
|
|
1697
|
-
(sum, m) => sum + estimateMessage(m),
|
|
1698
|
-
0,
|
|
1699
|
-
);
|
|
1700
|
-
const layer3 = tryFit({
|
|
1701
|
-
messages: dedupMessages,
|
|
1702
|
-
prefix: trimmedPrefix,
|
|
1703
|
-
prefixTokens: trimmedPrefixTokens,
|
|
1704
|
-
distilledBudget: Math.floor(usable * 0.15),
|
|
1705
|
-
rawBudget: Math.floor(usable * 0.55),
|
|
1706
|
-
strip: "all-tools",
|
|
1707
|
-
});
|
|
1708
|
-
if (fitsWithSafetyMargin(layer3)) {
|
|
1709
|
-
if (sid) urgentDistillationMap.set(sid, true);
|
|
1710
|
-
return { ...layer3!, layer: 3, usable, distilledBudget, rawBudget };
|
|
1711
|
-
}
|
|
1818
|
+
// All compression stages exhausted — reset raw window cache before emergency.
|
|
1819
|
+
sessState.rawWindowCache = null;
|
|
1712
1820
|
|
|
1713
1821
|
// Layer 4: Emergency — last 2 distillations + token-budget raw tail.
|
|
1714
1822
|
// We do NOT strip tool parts here: doing so would cause an infinite tool-call loop because
|
|
@@ -1724,7 +1832,7 @@ function transformInner(input: {
|
|
|
1724
1832
|
// and must always return. Remaining budget is filled backward with older
|
|
1725
1833
|
// messages.
|
|
1726
1834
|
if (sid) urgentDistillationMap.set(sid, true);
|
|
1727
|
-
const nuclearDistillations = distillations
|
|
1835
|
+
const nuclearDistillations = selectDistillations(distillations, 2);
|
|
1728
1836
|
const nuclearPrefix = distilledPrefix(nuclearDistillations);
|
|
1729
1837
|
const nuclearPrefixTokens = nuclearPrefix.reduce(
|
|
1730
1838
|
(sum, m) => sum + estimateMessage(m),
|
|
@@ -1773,6 +1881,7 @@ function transformInner(input: {
|
|
|
1773
1881
|
usable,
|
|
1774
1882
|
distilledBudget,
|
|
1775
1883
|
rawBudget,
|
|
1884
|
+
refreshLtm: true,
|
|
1776
1885
|
};
|
|
1777
1886
|
}
|
|
1778
1887
|
|
|
@@ -1893,7 +2002,7 @@ function tryFit(input: {
|
|
|
1893
2002
|
rawBudget: number;
|
|
1894
2003
|
strip: "none" | "old-tools" | "all-tools";
|
|
1895
2004
|
protectedTurns?: number;
|
|
1896
|
-
}): Omit<TransformResult, "layer" | "usable" | "distilledBudget" | "rawBudget"> | null {
|
|
2005
|
+
}): Omit<TransformResult, "layer" | "usable" | "distilledBudget" | "rawBudget" | "refreshLtm"> | null {
|
|
1897
2006
|
// If distilled prefix exceeds its budget, fail this layer
|
|
1898
2007
|
if (input.prefixTokens > input.distilledBudget && input.prefix.length > 0)
|
|
1899
2008
|
return null;
|
package/src/index.ts
CHANGED
|
@@ -73,6 +73,13 @@ export {
|
|
|
73
73
|
loadSessionCosts,
|
|
74
74
|
loadAllSessionCosts,
|
|
75
75
|
type SessionCostSnapshot,
|
|
76
|
+
saveSessionTracking,
|
|
77
|
+
loadSessionTracking,
|
|
78
|
+
loadHeaderSessionIndex,
|
|
79
|
+
type SessionTrackingState,
|
|
80
|
+
type LoadedSessionTracking,
|
|
81
|
+
getKV,
|
|
82
|
+
setKV,
|
|
76
83
|
getMeta,
|
|
77
84
|
setMeta,
|
|
78
85
|
getInstanceId,
|
|
@@ -100,6 +107,7 @@ export {
|
|
|
100
107
|
onIdleResume,
|
|
101
108
|
getLastTurnAt,
|
|
102
109
|
consumeCameOutOfIdle,
|
|
110
|
+
saveGradientState,
|
|
103
111
|
// Test-only — exposed at the barrel so host-package tests can simulate idle
|
|
104
112
|
// gaps without sleeping. Not part of the public API.
|
|
105
113
|
setLastTurnAtForTest,
|