@loreai/core 0.12.0 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bun/agents-file.d.ts +29 -8
- package/dist/bun/agents-file.d.ts.map +1 -1
- package/dist/bun/config.d.ts +1 -0
- package/dist/bun/config.d.ts.map +1 -1
- package/dist/bun/db.d.ts.map +1 -1
- package/dist/bun/distillation.d.ts +29 -0
- package/dist/bun/distillation.d.ts.map +1 -1
- package/dist/bun/embedding.d.ts +15 -1
- package/dist/bun/embedding.d.ts.map +1 -1
- package/dist/bun/gradient.d.ts +53 -5
- package/dist/bun/gradient.d.ts.map +1 -1
- package/dist/bun/index.d.ts +4 -4
- package/dist/bun/index.d.ts.map +1 -1
- package/dist/bun/index.js +696 -243
- package/dist/bun/index.js.map +4 -4
- package/dist/bun/pattern-extract.d.ts +36 -0
- package/dist/bun/pattern-extract.d.ts.map +1 -0
- package/dist/bun/recall.d.ts +1 -0
- package/dist/bun/recall.d.ts.map +1 -1
- package/dist/bun/search.d.ts +13 -1
- package/dist/bun/search.d.ts.map +1 -1
- package/dist/bun/types.d.ts +41 -1
- package/dist/bun/types.d.ts.map +1 -1
- package/dist/bun/worker-model.d.ts +22 -0
- package/dist/bun/worker-model.d.ts.map +1 -1
- package/dist/node/agents-file.d.ts +29 -8
- package/dist/node/agents-file.d.ts.map +1 -1
- package/dist/node/config.d.ts +1 -0
- package/dist/node/config.d.ts.map +1 -1
- package/dist/node/db.d.ts.map +1 -1
- package/dist/node/distillation.d.ts +29 -0
- package/dist/node/distillation.d.ts.map +1 -1
- package/dist/node/embedding.d.ts +15 -1
- package/dist/node/embedding.d.ts.map +1 -1
- package/dist/node/gradient.d.ts +53 -5
- package/dist/node/gradient.d.ts.map +1 -1
- package/dist/node/index.d.ts +4 -4
- package/dist/node/index.d.ts.map +1 -1
- package/dist/node/index.js +696 -243
- package/dist/node/index.js.map +4 -4
- package/dist/node/pattern-extract.d.ts +36 -0
- package/dist/node/pattern-extract.d.ts.map +1 -0
- package/dist/node/recall.d.ts +1 -0
- package/dist/node/recall.d.ts.map +1 -1
- package/dist/node/search.d.ts +13 -1
- package/dist/node/search.d.ts.map +1 -1
- package/dist/node/types.d.ts +41 -1
- package/dist/node/types.d.ts.map +1 -1
- package/dist/node/worker-model.d.ts +22 -0
- package/dist/node/worker-model.d.ts.map +1 -1
- package/dist/types/agents-file.d.ts +29 -8
- package/dist/types/agents-file.d.ts.map +1 -1
- package/dist/types/config.d.ts +1 -0
- package/dist/types/config.d.ts.map +1 -1
- package/dist/types/db.d.ts.map +1 -1
- package/dist/types/distillation.d.ts +29 -0
- package/dist/types/distillation.d.ts.map +1 -1
- package/dist/types/embedding.d.ts +15 -1
- package/dist/types/embedding.d.ts.map +1 -1
- package/dist/types/gradient.d.ts +53 -5
- package/dist/types/gradient.d.ts.map +1 -1
- package/dist/types/index.d.ts +4 -4
- package/dist/types/index.d.ts.map +1 -1
- package/dist/types/pattern-extract.d.ts +36 -0
- package/dist/types/pattern-extract.d.ts.map +1 -0
- package/dist/types/recall.d.ts +1 -0
- package/dist/types/recall.d.ts.map +1 -1
- package/dist/types/search.d.ts +13 -1
- package/dist/types/search.d.ts.map +1 -1
- package/dist/types/types.d.ts +41 -1
- package/dist/types/types.d.ts.map +1 -1
- package/dist/types/worker-model.d.ts +22 -0
- package/dist/types/worker-model.d.ts.map +1 -1
- package/package.json +3 -2
- package/src/agents-file.ts +111 -28
- package/src/config.ts +25 -18
- package/src/curator.ts +2 -2
- package/src/db.ts +19 -2
- package/src/distillation.ts +152 -15
- package/src/embedding.ts +158 -14
- package/src/gradient.ts +398 -227
- package/src/index.ts +13 -5
- package/src/pattern-extract.ts +108 -0
- package/src/recall.ts +124 -6
- package/src/search.ts +37 -1
- package/src/types.ts +41 -1
- package/src/worker-model.ts +142 -5
package/src/gradient.ts
CHANGED
|
@@ -72,6 +72,13 @@ let calibratedOverhead: number | null = null;
|
|
|
72
72
|
// response via UNCALIBRATED_SAFETY.
|
|
73
73
|
// ---------------------------------------------------------------------------
|
|
74
74
|
|
|
75
|
+
type DistillationSnapshot = {
|
|
76
|
+
/** Cached distillation rows from the most recent DB read */
|
|
77
|
+
rows: Distillation[];
|
|
78
|
+
/** ID of the last user message when this snapshot was taken */
|
|
79
|
+
lastUserMsgId: string | null;
|
|
80
|
+
};
|
|
81
|
+
|
|
75
82
|
type SessionState = {
|
|
76
83
|
/** Exact input token count from the last successful API response */
|
|
77
84
|
lastKnownInput: number;
|
|
@@ -89,6 +96,8 @@ type SessionState = {
|
|
|
89
96
|
forceMinLayer: SafetyLayer;
|
|
90
97
|
/** Token estimate from the most recent transform() output (compressed window) */
|
|
91
98
|
lastTransformEstimate: number;
|
|
99
|
+
/** LTM tokens injected for this session's current turn (per-session isolation) */
|
|
100
|
+
ltmTokens: number;
|
|
92
101
|
/** Distilled prefix cache (Approach C) */
|
|
93
102
|
prefixCache: PrefixCache | null;
|
|
94
103
|
/** Raw window pin cache (Approach B) */
|
|
@@ -112,10 +121,36 @@ type SessionState = {
|
|
|
112
121
|
* the post-idle turn regardless of conversation size.
|
|
113
122
|
*/
|
|
114
123
|
cameOutOfIdle: boolean;
|
|
124
|
+
/**
|
|
125
|
+
* Set true by onIdleResume() alongside cameOutOfIdle; consumed (and cleared)
|
|
126
|
+
* by transformInner() to activate the post-idle compact layer. When true AND
|
|
127
|
+
* distillations exist, transform skips layer 0 (full-raw passthrough) and
|
|
128
|
+
* uses a tighter raw budget for layer 1. Rationale: on a cold cache the
|
|
129
|
+
* entire context is a cache WRITE — a smaller total means lower write cost,
|
|
130
|
+
* and aggressive idle distillation already captured the older history.
|
|
131
|
+
*/
|
|
132
|
+
postIdleCompact: boolean;
|
|
115
133
|
/** Consecutive turns at layer >= 2. When >= 3, log a compaction hint. */
|
|
116
134
|
consecutiveHighLayer: number;
|
|
117
135
|
/** Hash of the first message IDs in the last transform output — for cache-bust diagnostics. */
|
|
118
136
|
lastPrefixHash: string;
|
|
137
|
+
/** Cumulative cache-bust count for this session (prefix hash changed between turns). */
|
|
138
|
+
bustCount: number;
|
|
139
|
+
/** Total transform() calls for this session — used with bustCount for rate calculation. */
|
|
140
|
+
transformCount: number;
|
|
141
|
+
/**
|
|
142
|
+
* Distillation row snapshot — cached to avoid hitting the DB on every
|
|
143
|
+
* transform() call. Refreshed only at turn boundaries (when a new user
|
|
144
|
+
* message appears) or on first call / idle resume. During autonomous
|
|
145
|
+
* tool-call chains this stays frozen, keeping the distilled prefix
|
|
146
|
+
* byte-identical across consecutive API calls and preserving the prompt
|
|
147
|
+
* cache.
|
|
148
|
+
*
|
|
149
|
+
* Cost context: each prefix refresh costs context_size × cache_write_price
|
|
150
|
+
* (~$1.88 per bust at 500K Sonnet). New distillations have near-zero
|
|
151
|
+
* marginal value mid-chain since the model already has raw messages.
|
|
152
|
+
*/
|
|
153
|
+
distillationSnapshot: DistillationSnapshot | null;
|
|
119
154
|
};
|
|
120
155
|
|
|
121
156
|
function makeSessionState(): SessionState {
|
|
@@ -128,12 +163,17 @@ function makeSessionState(): SessionState {
|
|
|
128
163
|
lastWindowMessageIDs: new Set(),
|
|
129
164
|
forceMinLayer: 0,
|
|
130
165
|
lastTransformEstimate: 0,
|
|
166
|
+
ltmTokens: 0,
|
|
131
167
|
prefixCache: null,
|
|
132
168
|
rawWindowCache: null,
|
|
133
169
|
lastTurnAt: 0,
|
|
134
170
|
cameOutOfIdle: false,
|
|
171
|
+
postIdleCompact: false,
|
|
135
172
|
consecutiveHighLayer: 0,
|
|
136
173
|
lastPrefixHash: "",
|
|
174
|
+
bustCount: 0,
|
|
175
|
+
transformCount: 0,
|
|
176
|
+
distillationSnapshot: null,
|
|
137
177
|
};
|
|
138
178
|
}
|
|
139
179
|
|
|
@@ -196,10 +236,22 @@ export function onIdleResume(
|
|
|
196
236
|
if (idleMs < thresholdMs) return { triggered: false };
|
|
197
237
|
state.prefixCache = null;
|
|
198
238
|
state.rawWindowCache = null;
|
|
239
|
+
state.distillationSnapshot = null;
|
|
199
240
|
state.cameOutOfIdle = true;
|
|
241
|
+
state.postIdleCompact = true;
|
|
200
242
|
return { triggered: true, idleMs };
|
|
201
243
|
}
|
|
202
244
|
|
|
245
|
+
/**
|
|
246
|
+
* Return the wall-clock timestamp (epoch ms) of the most recent transform()
|
|
247
|
+
* call for this session. Returns 0 if the session has never been transformed.
|
|
248
|
+
* Used by callers (e.g. meta-distillation gating) to check whether the
|
|
249
|
+
* upstream prompt cache is likely still warm.
|
|
250
|
+
*/
|
|
251
|
+
export function getLastTurnAt(sessionID: string): number {
|
|
252
|
+
return sessionStates.get(sessionID)?.lastTurnAt ?? 0;
|
|
253
|
+
}
|
|
254
|
+
|
|
203
255
|
/**
|
|
204
256
|
* Read-and-clear the cameOutOfIdle flag. The OpenCode host's LTM degraded-
|
|
205
257
|
* recovery branch consumes this to decide whether to bypass the
|
|
@@ -213,8 +265,9 @@ export function consumeCameOutOfIdle(sessionID: string): boolean {
|
|
|
213
265
|
}
|
|
214
266
|
|
|
215
267
|
// LTM tokens injected via system transform hook this turn.
|
|
216
|
-
//
|
|
217
|
-
|
|
268
|
+
// Per-session when a sessionID is provided (preferred), with a module-level
|
|
269
|
+
// fallback for callers that don't have a session ID.
|
|
270
|
+
let ltmTokensFallback = 0;
|
|
218
271
|
|
|
219
272
|
export function setModelLimits(limits: { context: number; output: number }) {
|
|
220
273
|
contextLimit = limits.context || 200_000;
|
|
@@ -248,14 +301,25 @@ export function computeLayer0Cap(
|
|
|
248
301
|
return Math.max(rawCap, MIN_LAYER0_FLOOR);
|
|
249
302
|
}
|
|
250
303
|
|
|
251
|
-
/** Called by the system transform hook after formatting LTM knowledge.
|
|
252
|
-
|
|
253
|
-
|
|
304
|
+
/** Called by the system transform hook after formatting LTM knowledge.
|
|
305
|
+
* When sessionID is provided, stores on per-session state to prevent
|
|
306
|
+
* cross-session budget contamination. Falls back to module-level global
|
|
307
|
+
* for callers without a session ID. */
|
|
308
|
+
export function setLtmTokens(tokens: number, sessionID?: string) {
|
|
309
|
+
if (sessionID) {
|
|
310
|
+
getSessionState(sessionID).ltmTokens = tokens;
|
|
311
|
+
}
|
|
312
|
+
ltmTokensFallback = tokens;
|
|
254
313
|
}
|
|
255
314
|
|
|
256
|
-
/** Returns the
|
|
257
|
-
|
|
258
|
-
|
|
315
|
+
/** Returns the LTM token count for the given session, falling back to
|
|
316
|
+
* the module-level global when no session ID is provided. */
|
|
317
|
+
export function getLtmTokens(sessionID?: string): number {
|
|
318
|
+
if (sessionID) {
|
|
319
|
+
const state = sessionStates.get(sessionID);
|
|
320
|
+
if (state) return state.ltmTokens;
|
|
321
|
+
}
|
|
322
|
+
return ltmTokensFallback;
|
|
259
323
|
}
|
|
260
324
|
|
|
261
325
|
/**
|
|
@@ -306,7 +370,7 @@ export function calibrate(
|
|
|
306
370
|
if (sessionID !== undefined) {
|
|
307
371
|
const state = getSessionState(sessionID);
|
|
308
372
|
state.lastKnownInput = actualInput;
|
|
309
|
-
state.lastKnownLtm = ltmTokens;
|
|
373
|
+
state.lastKnownLtm = state.ltmTokens;
|
|
310
374
|
if (messageCount !== undefined) state.lastKnownMessageCount = messageCount;
|
|
311
375
|
}
|
|
312
376
|
}
|
|
@@ -378,7 +442,9 @@ export function inspectSessionState(sessionID: string): {
|
|
|
378
442
|
hasPrefixCache: boolean;
|
|
379
443
|
hasRawWindowCache: boolean;
|
|
380
444
|
cameOutOfIdle: boolean;
|
|
445
|
+
postIdleCompact: boolean;
|
|
381
446
|
lastTurnAt: number;
|
|
447
|
+
distillationSnapshot: DistillationSnapshot | null;
|
|
382
448
|
} | null {
|
|
383
449
|
const state = sessionStates.get(sessionID);
|
|
384
450
|
if (!state) return null;
|
|
@@ -386,7 +452,9 @@ export function inspectSessionState(sessionID: string): {
|
|
|
386
452
|
hasPrefixCache: state.prefixCache !== null,
|
|
387
453
|
hasRawWindowCache: state.rawWindowCache !== null,
|
|
388
454
|
cameOutOfIdle: state.cameOutOfIdle,
|
|
455
|
+
postIdleCompact: state.postIdleCompact,
|
|
389
456
|
lastTurnAt: state.lastTurnAt,
|
|
457
|
+
distillationSnapshot: state.distillationSnapshot,
|
|
390
458
|
};
|
|
391
459
|
}
|
|
392
460
|
|
|
@@ -425,6 +493,46 @@ function loadDistillations(
|
|
|
425
493
|
.all(...params) as Distillation[];
|
|
426
494
|
}
|
|
427
495
|
|
|
496
|
+
// Cached distillation loader — avoids hitting the DB on every transform() call.
|
|
497
|
+
// Refreshed only at turn boundaries (when a new user message appears), on first
|
|
498
|
+
// call (null snapshot), or after idle resume (snapshot cleared by onIdleResume).
|
|
499
|
+
// During autonomous tool-call chains (consecutive assistant→tool→assistant with
|
|
500
|
+
// the same last user message), returns the cached rows so the distilled prefix
|
|
501
|
+
// stays byte-identical and preserves the Anthropic prompt cache.
|
|
502
|
+
function loadDistillationsCached(
|
|
503
|
+
projectPath: string,
|
|
504
|
+
sessionID: string,
|
|
505
|
+
messages: MessageWithParts[],
|
|
506
|
+
sessState: SessionState,
|
|
507
|
+
): Distillation[] {
|
|
508
|
+
// Find the last user message ID in the input
|
|
509
|
+
let lastUserMsgId: string | null = null;
|
|
510
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
511
|
+
if (messages[i].info.role === "user") {
|
|
512
|
+
lastUserMsgId = messages[i].info.id;
|
|
513
|
+
break;
|
|
514
|
+
}
|
|
515
|
+
}
|
|
516
|
+
|
|
517
|
+
const snapshot = sessState.distillationSnapshot;
|
|
518
|
+
|
|
519
|
+
// Cache hit: same user message = still in the same tool-call chain
|
|
520
|
+
if (snapshot && snapshot.lastUserMsgId === lastUserMsgId) {
|
|
521
|
+
return snapshot.rows;
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
// Cache miss: new user message (turn boundary), first call, or post-idle
|
|
525
|
+
const rows = loadDistillations(projectPath, sessionID);
|
|
526
|
+
sessState.distillationSnapshot = { rows, lastUserMsgId };
|
|
527
|
+
|
|
528
|
+
log.info(
|
|
529
|
+
`distillation refresh: ${rows.length} rows` +
|
|
530
|
+
` (user msg ${lastUserMsgId?.substring(0, 16) ?? "none"})`,
|
|
531
|
+
);
|
|
532
|
+
|
|
533
|
+
return rows;
|
|
534
|
+
}
|
|
535
|
+
|
|
428
536
|
// Strip all <system-reminder>...</system-reminder> blocks from message text.
|
|
429
537
|
// For the user-message wrapper pattern, extracts the actual user text.
|
|
430
538
|
// For all other reminders (build-switch, plan reminders, etc.), drops them entirely.
|
|
@@ -534,6 +642,15 @@ function simpleHash(str: string): number {
|
|
|
534
642
|
return hash;
|
|
535
643
|
}
|
|
536
644
|
|
|
645
|
+
/** Parsed read-tool input: file path plus optional line range. */
|
|
646
|
+
type ReadRange = {
|
|
647
|
+
path: string;
|
|
648
|
+
/** 1-based start line. undefined = from beginning. */
|
|
649
|
+
offset: number | undefined;
|
|
650
|
+
/** Number of lines to read. undefined = to end. */
|
|
651
|
+
limit: number | undefined;
|
|
652
|
+
};
|
|
653
|
+
|
|
537
654
|
/** Extract file path from a tool's input JSON.
|
|
538
655
|
* Handles common formats: {"path": "/foo.ts"}, {"filePath": "/foo.ts"},
|
|
539
656
|
* and plain text fallback. */
|
|
@@ -548,10 +665,72 @@ function extractFilePath(input: string): string | undefined {
|
|
|
548
665
|
}
|
|
549
666
|
}
|
|
550
667
|
|
|
668
|
+
/** Extract file path + line range from a read tool's input. */
|
|
669
|
+
function extractReadRange(input: string): ReadRange | undefined {
|
|
670
|
+
try {
|
|
671
|
+
const parsed = JSON.parse(input);
|
|
672
|
+
const path = parsed.path || parsed.filePath || parsed.file;
|
|
673
|
+
if (!path) return undefined;
|
|
674
|
+
const offset = typeof parsed.offset === "number" ? parsed.offset : undefined;
|
|
675
|
+
const limit = typeof parsed.limit === "number" ? parsed.limit : undefined;
|
|
676
|
+
return { path, offset, limit };
|
|
677
|
+
} catch {
|
|
678
|
+
const match = input.match(/(?:[\w.-]+\/)+[\w.-]+\.\w{1,5}/);
|
|
679
|
+
if (!match) return undefined;
|
|
680
|
+
return { path: match[0], offset: undefined, limit: undefined };
|
|
681
|
+
}
|
|
682
|
+
}
|
|
683
|
+
|
|
684
|
+
/**
|
|
685
|
+
* Does `later` cover the line range of `earlier`?
|
|
686
|
+
*
|
|
687
|
+
* Coverage rules:
|
|
688
|
+
* - Full-file read (no offset/limit) covers everything for the same path.
|
|
689
|
+
* - A ranged read covers another ranged read when its [offset, offset+limit)
|
|
690
|
+
* interval is a superset of (or equal to) the other's interval.
|
|
691
|
+
* - A ranged read does NOT cover a full-file read.
|
|
692
|
+
*/
|
|
693
|
+
export function laterReadCovers(later: ReadRange, earlier: ReadRange): boolean {
|
|
694
|
+
if (later.path !== earlier.path) return false;
|
|
695
|
+
|
|
696
|
+
// Full-file read covers everything for the same path.
|
|
697
|
+
if (later.offset === undefined && later.limit === undefined) return true;
|
|
698
|
+
|
|
699
|
+
// Later is a ranged read but earlier is full-file — can't cover.
|
|
700
|
+
if (earlier.offset === undefined && earlier.limit === undefined) return false;
|
|
701
|
+
|
|
702
|
+
// Both have ranges. Compute intervals.
|
|
703
|
+
const laterStart = later.offset ?? 1;
|
|
704
|
+
const earlierStart = earlier.offset ?? 1;
|
|
705
|
+
|
|
706
|
+
// An open-ended later read (no limit) covers if its start <= earlier start.
|
|
707
|
+
if (later.limit === undefined) return laterStart <= earlierStart;
|
|
708
|
+
|
|
709
|
+
// Earlier is open-ended but later isn't — later can't cover infinite range.
|
|
710
|
+
if (earlier.limit === undefined) return false;
|
|
711
|
+
|
|
712
|
+
// Both bounded: [start, start+limit) superset check.
|
|
713
|
+
const laterEnd = laterStart + later.limit;
|
|
714
|
+
const earlierEnd = earlierStart + earlier.limit;
|
|
715
|
+
return laterStart <= earlierStart && laterEnd >= earlierEnd;
|
|
716
|
+
}
|
|
717
|
+
|
|
718
|
+
/** Format a range label for dedup annotations. */
|
|
719
|
+
function rangeLabel(range: ReadRange): string {
|
|
720
|
+
if (range.offset !== undefined && range.limit !== undefined) {
|
|
721
|
+
return ` lines ${range.offset}-${range.offset + range.limit - 1}`;
|
|
722
|
+
}
|
|
723
|
+
if (range.offset !== undefined) {
|
|
724
|
+
return ` from line ${range.offset}`;
|
|
725
|
+
}
|
|
726
|
+
return "";
|
|
727
|
+
}
|
|
728
|
+
|
|
551
729
|
/** Annotation for deduplicated tool output — follows the toolStripAnnotation() pattern. */
|
|
552
|
-
function dedupAnnotation(toolName: string, filePath?: string): string {
|
|
730
|
+
function dedupAnnotation(toolName: string, filePath?: string, range?: ReadRange): string {
|
|
553
731
|
if (filePath) {
|
|
554
|
-
|
|
732
|
+
const rl = range ? rangeLabel(range) : "";
|
|
733
|
+
return `[earlier read of ${filePath}${rl} — see latest read below for current content]`;
|
|
555
734
|
}
|
|
556
735
|
return `[duplicate output — same content as later ${toolName} in this session — use recall for details]`;
|
|
557
736
|
}
|
|
@@ -563,7 +742,9 @@ function dedupAnnotation(toolName: string, filePath?: string): string {
|
|
|
563
742
|
*
|
|
564
743
|
* Deduplicates by:
|
|
565
744
|
* 1. Exact content hash: identical tool outputs (same file read twice, same command output)
|
|
566
|
-
* 2.
|
|
745
|
+
* 2. Range-aware file reads: read_file/read outputs for the same path where a later
|
|
746
|
+
* read covers the same or wider line range (full-file covers everything; a ranged
|
|
747
|
+
* read only covers another ranged read when its interval is a superset).
|
|
567
748
|
*
|
|
568
749
|
* The current turn (from currentTurnIdx onward) is never touched — the model
|
|
569
750
|
* needs full context for its active work. Tool parts are never removed entirely;
|
|
@@ -577,11 +758,13 @@ export function deduplicateToolOutputs(
|
|
|
577
758
|
): MessageWithParts[] {
|
|
578
759
|
// Track latest occurrence: contentKey → latest message index
|
|
579
760
|
const contentLatest = new Map<string, number>();
|
|
580
|
-
// Track latest read by file path: "read:path" → latest message index
|
|
581
|
-
const fileLatest = new Map<string, number>();
|
|
582
761
|
|
|
583
|
-
//
|
|
584
|
-
//
|
|
762
|
+
// Track all read ranges per file path, ordered by message index (ascending).
|
|
763
|
+
// Each entry records the range and the message index so the second pass can
|
|
764
|
+
// check whether any later read covers the current read's range.
|
|
765
|
+
const fileReads = new Map<string, Array<{ range: ReadRange; msgIdx: number }>>();
|
|
766
|
+
|
|
767
|
+
// First pass: scan all messages (including current turn) to build tracking maps.
|
|
585
768
|
for (let i = 0; i < messages.length; i++) {
|
|
586
769
|
for (const part of messages[i].parts) {
|
|
587
770
|
if (!isToolPart(part) || part.state.status !== "completed") continue;
|
|
@@ -591,13 +774,20 @@ export function deduplicateToolOutputs(
|
|
|
591
774
|
const key = `${part.tool}:${simpleHash(output)}`;
|
|
592
775
|
contentLatest.set(key, i);
|
|
593
776
|
|
|
594
|
-
// For read-type tools,
|
|
777
|
+
// For read-type tools, record the full range info
|
|
595
778
|
if (part.tool === "read_file" || part.tool === "read") {
|
|
596
779
|
const inputStr = typeof part.state.input === "string"
|
|
597
780
|
? part.state.input
|
|
598
781
|
: JSON.stringify(part.state.input);
|
|
599
|
-
const
|
|
600
|
-
if (
|
|
782
|
+
const range = extractReadRange(inputStr);
|
|
783
|
+
if (range) {
|
|
784
|
+
let entries = fileReads.get(range.path);
|
|
785
|
+
if (!entries) {
|
|
786
|
+
entries = [];
|
|
787
|
+
fileReads.set(range.path, entries);
|
|
788
|
+
}
|
|
789
|
+
entries.push({ range, msgIdx: i });
|
|
790
|
+
}
|
|
601
791
|
}
|
|
602
792
|
}
|
|
603
793
|
}
|
|
@@ -617,19 +807,31 @@ export function deduplicateToolOutputs(
|
|
|
617
807
|
const contentKey = `${part.tool}:${simpleHash(output)}`;
|
|
618
808
|
const isLatestContent = contentLatest.get(contentKey) === msgIdx;
|
|
619
809
|
|
|
620
|
-
// Check
|
|
621
|
-
|
|
622
|
-
let
|
|
810
|
+
// Check range-aware file dedup for read tools: does any later read
|
|
811
|
+
// of the same file cover this read's range?
|
|
812
|
+
let readRange: ReadRange | undefined;
|
|
813
|
+
let coveredByLater = false;
|
|
623
814
|
if (part.tool === "read_file" || part.tool === "read") {
|
|
624
815
|
const inputStr = typeof part.state.input === "string"
|
|
625
816
|
? part.state.input
|
|
626
817
|
: JSON.stringify(part.state.input);
|
|
627
|
-
|
|
628
|
-
if (
|
|
818
|
+
readRange = extractReadRange(inputStr);
|
|
819
|
+
if (readRange) {
|
|
820
|
+
const entries = fileReads.get(readRange.path);
|
|
821
|
+
if (entries) {
|
|
822
|
+
// Check if any entry with a higher message index covers this range
|
|
823
|
+
for (const entry of entries) {
|
|
824
|
+
if (entry.msgIdx > msgIdx && laterReadCovers(entry.range, readRange)) {
|
|
825
|
+
coveredByLater = true;
|
|
826
|
+
break;
|
|
827
|
+
}
|
|
828
|
+
}
|
|
829
|
+
}
|
|
830
|
+
}
|
|
629
831
|
}
|
|
630
832
|
|
|
631
|
-
// Keep if this is both the latest content AND
|
|
632
|
-
if (isLatestContent &&
|
|
833
|
+
// Keep if this is both the latest content AND not covered by a later read
|
|
834
|
+
if (isLatestContent && !coveredByLater) return part;
|
|
633
835
|
|
|
634
836
|
// This is a duplicate — replace with compact annotation
|
|
635
837
|
partsChanged = true;
|
|
@@ -637,7 +839,7 @@ export function deduplicateToolOutputs(
|
|
|
637
839
|
...part,
|
|
638
840
|
state: {
|
|
639
841
|
...part.state,
|
|
640
|
-
output: dedupAnnotation(part.tool,
|
|
842
|
+
output: dedupAnnotation(part.tool, readRange?.path, readRange),
|
|
641
843
|
},
|
|
642
844
|
} as LorePart;
|
|
643
845
|
});
|
|
@@ -671,8 +873,14 @@ function sanitizeToolParts(
|
|
|
671
873
|
if (status === "completed" || status === "error") return part;
|
|
672
874
|
|
|
673
875
|
// pending or running → convert to error so SDK emits tool_result
|
|
876
|
+
// Use a deterministic timestamp (0) instead of Date.now() so that
|
|
877
|
+
// repeated transform() calls on the same stale pending part produce
|
|
878
|
+
// identical bytes. OpenCode's prompt-loop cache fix (e148f00aa)
|
|
879
|
+
// preserves old pending parts across iterations; Date.now() here
|
|
880
|
+
// would re-stamp them each call → different bytes → cache bust.
|
|
674
881
|
partsChanged = true;
|
|
675
|
-
const
|
|
882
|
+
const existingStart =
|
|
883
|
+
"time" in part.state ? part.state.time.start : 0;
|
|
676
884
|
return {
|
|
677
885
|
...part,
|
|
678
886
|
state: {
|
|
@@ -682,8 +890,8 @@ function sanitizeToolParts(
|
|
|
682
890
|
metadata:
|
|
683
891
|
"metadata" in part.state ? part.state.metadata : undefined,
|
|
684
892
|
time: {
|
|
685
|
-
start:
|
|
686
|
-
end:
|
|
893
|
+
start: existingStart,
|
|
894
|
+
end: existingStart,
|
|
687
895
|
},
|
|
688
896
|
},
|
|
689
897
|
} as LorePart;
|
|
@@ -728,134 +936,6 @@ function stripToTextOnly(parts: LorePart[]): LorePart[] {
|
|
|
728
936
|
return stripped;
|
|
729
937
|
}
|
|
730
938
|
|
|
731
|
-
// --- Phase 2: Temporal anchoring at read time ---
|
|
732
|
-
|
|
733
|
-
function formatRelativeTime(date: Date, now: Date): string {
|
|
734
|
-
const diffMs = now.getTime() - date.getTime();
|
|
735
|
-
const diffDays = Math.floor(diffMs / (1000 * 60 * 60 * 24));
|
|
736
|
-
if (diffDays === 0) return "today";
|
|
737
|
-
if (diffDays === 1) return "yesterday";
|
|
738
|
-
if (diffDays < 7) return `${diffDays} days ago`;
|
|
739
|
-
if (diffDays < 14) return "1 week ago";
|
|
740
|
-
if (diffDays < 30) return `${Math.floor(diffDays / 7)} weeks ago`;
|
|
741
|
-
if (diffDays < 60) return "1 month ago";
|
|
742
|
-
if (diffDays < 365) return `${Math.floor(diffDays / 30)} months ago`;
|
|
743
|
-
return `${Math.floor(diffDays / 365)} year${Math.floor(diffDays / 365) > 1 ? "s" : ""} ago`;
|
|
744
|
-
}
|
|
745
|
-
|
|
746
|
-
function parseDateFromContent(s: string): Date | null {
|
|
747
|
-
// "Month Day, Year" e.g. "January 15, 2026"
|
|
748
|
-
const simple = s.match(/([A-Z][a-z]+)\s+(\d{1,2}),?\s+(\d{4})/);
|
|
749
|
-
if (simple) {
|
|
750
|
-
const d = new Date(`${simple[1]} ${simple[2]}, ${simple[3]}`);
|
|
751
|
-
if (!isNaN(d.getTime())) return d;
|
|
752
|
-
}
|
|
753
|
-
// "Month D-D, Year" range — use start
|
|
754
|
-
const range = s.match(/([A-Z][a-z]+)\s+(\d{1,2})-\d{1,2},?\s+(\d{4})/);
|
|
755
|
-
if (range) {
|
|
756
|
-
const d = new Date(`${range[1]} ${range[2]}, ${range[3]}`);
|
|
757
|
-
if (!isNaN(d.getTime())) return d;
|
|
758
|
-
}
|
|
759
|
-
// "late/early/mid Month Year"
|
|
760
|
-
const vague = s.match(/(late|early|mid)[- ]?([A-Z][a-z]+)\s+(\d{4})/i);
|
|
761
|
-
if (vague) {
|
|
762
|
-
const day =
|
|
763
|
-
vague[1].toLowerCase() === "early"
|
|
764
|
-
? 7
|
|
765
|
-
: vague[1].toLowerCase() === "late"
|
|
766
|
-
? 23
|
|
767
|
-
: 15;
|
|
768
|
-
const d = new Date(`${vague[2]} ${day}, ${vague[3]}`);
|
|
769
|
-
if (!isNaN(d.getTime())) return d;
|
|
770
|
-
}
|
|
771
|
-
return null;
|
|
772
|
-
}
|
|
773
|
-
|
|
774
|
-
// Expand "(meaning DATE)" and "(estimated DATE)" annotations with a relative offset.
|
|
775
|
-
// Past future-intent lines get "(likely already happened)" appended.
|
|
776
|
-
function expandInlineEstimatedDates(text: string, now: Date): string {
|
|
777
|
-
return text.replace(
|
|
778
|
-
/\(((?:meaning|estimated)\s+)([^)]+\d{4})\)/gi,
|
|
779
|
-
(match, prefix: string, dateContent: string) => {
|
|
780
|
-
const d = parseDateFromContent(dateContent);
|
|
781
|
-
if (!d) return match;
|
|
782
|
-
const rel = formatRelativeTime(d, now);
|
|
783
|
-
// Detect future-intent by looking backwards on the same line
|
|
784
|
-
const matchIdx = text.indexOf(match);
|
|
785
|
-
const lineStart = text.lastIndexOf("\n", matchIdx) + 1;
|
|
786
|
-
const linePrefix = text.slice(lineStart, matchIdx);
|
|
787
|
-
const isFutureIntent =
|
|
788
|
-
/\b(?:will|plans?\s+to|planning\s+to|going\s+to|intends?\s+to)\b/i.test(
|
|
789
|
-
linePrefix,
|
|
790
|
-
);
|
|
791
|
-
if (d < now && isFutureIntent)
|
|
792
|
-
return `(${prefix}${dateContent} — ${rel}, likely already happened)`;
|
|
793
|
-
return `(${prefix}${dateContent} — ${rel})`;
|
|
794
|
-
},
|
|
795
|
-
);
|
|
796
|
-
}
|
|
797
|
-
|
|
798
|
-
// Add relative time annotations to "Date: Month D, Year" section headers
|
|
799
|
-
// and gap markers between non-consecutive dates.
|
|
800
|
-
function addRelativeTimeToObservations(text: string, now: Date): string {
|
|
801
|
-
// First pass: expand inline "(meaning DATE)" annotations
|
|
802
|
-
const withInline = expandInlineEstimatedDates(text, now);
|
|
803
|
-
|
|
804
|
-
// Second pass: annotate date headers and add gap markers
|
|
805
|
-
const dateHeaderRe = /^(Date:\s*)([A-Z][a-z]+ \d{1,2}, \d{4})$/gm;
|
|
806
|
-
const found: Array<{
|
|
807
|
-
index: number;
|
|
808
|
-
date: Date;
|
|
809
|
-
full: string;
|
|
810
|
-
prefix: string;
|
|
811
|
-
ds: string;
|
|
812
|
-
}> = [];
|
|
813
|
-
let m: RegExpExecArray | null;
|
|
814
|
-
while ((m = dateHeaderRe.exec(withInline)) !== null) {
|
|
815
|
-
const d = new Date(m[2]);
|
|
816
|
-
if (!isNaN(d.getTime()))
|
|
817
|
-
found.push({
|
|
818
|
-
index: m.index,
|
|
819
|
-
date: d,
|
|
820
|
-
full: m[0],
|
|
821
|
-
prefix: m[1],
|
|
822
|
-
ds: m[2],
|
|
823
|
-
});
|
|
824
|
-
}
|
|
825
|
-
if (!found.length) return withInline;
|
|
826
|
-
|
|
827
|
-
let result = "";
|
|
828
|
-
let last = 0;
|
|
829
|
-
for (let i = 0; i < found.length; i++) {
|
|
830
|
-
const curr = found[i];
|
|
831
|
-
const prev = found[i - 1];
|
|
832
|
-
result += withInline.slice(last, curr.index);
|
|
833
|
-
// Gap marker between non-consecutive dates
|
|
834
|
-
if (prev) {
|
|
835
|
-
const gapDays = Math.floor(
|
|
836
|
-
(curr.date.getTime() - prev.date.getTime()) / 86400000,
|
|
837
|
-
);
|
|
838
|
-
if (gapDays > 1) {
|
|
839
|
-
const gap =
|
|
840
|
-
gapDays < 7
|
|
841
|
-
? `[${gapDays} days later]`
|
|
842
|
-
: gapDays < 14
|
|
843
|
-
? "[1 week later]"
|
|
844
|
-
: gapDays < 30
|
|
845
|
-
? `[${Math.floor(gapDays / 7)} weeks later]`
|
|
846
|
-
: gapDays < 60
|
|
847
|
-
? "[1 month later]"
|
|
848
|
-
: `[${Math.floor(gapDays / 30)} months later]`;
|
|
849
|
-
result += `\n${gap}\n\n`;
|
|
850
|
-
}
|
|
851
|
-
}
|
|
852
|
-
result += `${curr.prefix}${curr.ds} (${formatRelativeTime(curr.date, now)})`;
|
|
853
|
-
last = curr.index + curr.full.length;
|
|
854
|
-
}
|
|
855
|
-
result += withInline.slice(last);
|
|
856
|
-
return result;
|
|
857
|
-
}
|
|
858
|
-
|
|
859
939
|
// Build synthetic user/assistant message pair wrapping formatted distillation text.
|
|
860
940
|
// Shared by the cached and non-cached prefix paths.
|
|
861
941
|
function buildPrefixMessages(formatted: string): MessageWithParts[] {
|
|
@@ -917,12 +997,7 @@ function buildPrefixMessages(formatted: string): MessageWithParts[] {
|
|
|
917
997
|
// Non-cached path — used by layers 2-4 which already cause full cache invalidation.
|
|
918
998
|
function distilledPrefix(distillations: Distillation[]): MessageWithParts[] {
|
|
919
999
|
if (!distillations.length) return [];
|
|
920
|
-
const
|
|
921
|
-
const annotated = distillations.map((d) => ({
|
|
922
|
-
...d,
|
|
923
|
-
observations: addRelativeTimeToObservations(d.observations, now),
|
|
924
|
-
}));
|
|
925
|
-
const formatted = formatDistillations(annotated);
|
|
1000
|
+
const formatted = formatDistillations(distillations);
|
|
926
1001
|
if (!formatted) return [];
|
|
927
1002
|
return buildPrefixMessages(formatted);
|
|
928
1003
|
}
|
|
@@ -995,12 +1070,7 @@ function distilledPrefixCached(
|
|
|
995
1070
|
|
|
996
1071
|
// New rows appended — render only the delta and append to cached text
|
|
997
1072
|
const newRows = distillations.slice(prefixCache!.rowCount);
|
|
998
|
-
const
|
|
999
|
-
const annotated = newRows.map((d) => ({
|
|
1000
|
-
...d,
|
|
1001
|
-
observations: addRelativeTimeToObservations(d.observations, now),
|
|
1002
|
-
}));
|
|
1003
|
-
const deltaText = formatDistillations(annotated);
|
|
1073
|
+
const deltaText = formatDistillations(newRows);
|
|
1004
1074
|
|
|
1005
1075
|
if (deltaText) {
|
|
1006
1076
|
const fullText = prefixCache!.cachedText + "\n\n" + deltaText;
|
|
@@ -1019,12 +1089,7 @@ function distilledPrefixCached(
|
|
|
1019
1089
|
}
|
|
1020
1090
|
|
|
1021
1091
|
// Full re-render: first call or meta-distillation rewrote rows
|
|
1022
|
-
const
|
|
1023
|
-
const annotated = distillations.map((d) => ({
|
|
1024
|
-
...d,
|
|
1025
|
-
observations: addRelativeTimeToObservations(d.observations, now),
|
|
1026
|
-
}));
|
|
1027
|
-
const fullText = formatDistillations(annotated);
|
|
1092
|
+
const fullText = formatDistillations(distillations);
|
|
1028
1093
|
if (!fullText) {
|
|
1029
1094
|
sessState.prefixCache = null;
|
|
1030
1095
|
return { messages: [], tokens: 0 };
|
|
@@ -1053,6 +1118,16 @@ export function resetPrefixCache(sessionID?: string) {
|
|
|
1053
1118
|
}
|
|
1054
1119
|
}
|
|
1055
1120
|
|
|
1121
|
+
// For testing only — reset distillation snapshot for a specific session (or all)
|
|
1122
|
+
export function resetDistillationSnapshot(sessionID?: string) {
|
|
1123
|
+
if (sessionID) {
|
|
1124
|
+
const state = sessionStates.get(sessionID);
|
|
1125
|
+
if (state) state.distillationSnapshot = null;
|
|
1126
|
+
} else {
|
|
1127
|
+
for (const state of sessionStates.values()) state.distillationSnapshot = null;
|
|
1128
|
+
}
|
|
1129
|
+
}
|
|
1130
|
+
|
|
1056
1131
|
// --- Approach B: Lazy raw window eviction ---
|
|
1057
1132
|
//
|
|
1058
1133
|
// Tracks the ID of the first (oldest) message in the previous raw window.
|
|
@@ -1072,8 +1147,14 @@ export function resetPrefixCache(sessionID?: string) {
|
|
|
1072
1147
|
|
|
1073
1148
|
type RawWindowCache = {
|
|
1074
1149
|
sessionID: string;
|
|
1075
|
-
/**
|
|
1076
|
-
|
|
1150
|
+
/** Number of raw messages (excluding prefix) in the pinned window at creation. */
|
|
1151
|
+
pinnedRawCount: number;
|
|
1152
|
+
/** Total number of messages in the input array when the pin was created.
|
|
1153
|
+
* Used to compute how many new messages were appended since. */
|
|
1154
|
+
pinnedTotalCount: number;
|
|
1155
|
+
/** rawBudget that was in effect when the pin was created — used for the
|
|
1156
|
+
* pin-validity check so that global budget fluctuations don't evict the pin. */
|
|
1157
|
+
pinnedBudget: number;
|
|
1077
1158
|
};
|
|
1078
1159
|
|
|
1079
1160
|
// For testing only — reset raw window cache state for a specific session (or all)
|
|
@@ -1114,36 +1195,63 @@ function tryFitStable(input: {
|
|
|
1114
1195
|
rawWindowCache !== null && rawWindowCache.sessionID === input.sessionID;
|
|
1115
1196
|
|
|
1116
1197
|
if (cacheValid) {
|
|
1117
|
-
|
|
1118
|
-
|
|
1198
|
+
// Compute the pinned index from the stored raw count + new message growth.
|
|
1199
|
+
// newMessages = messages appended since pin creation (typically 2 per turn).
|
|
1200
|
+
// The pinned window grows to include them: pinnedRawCount + newMessages.
|
|
1201
|
+
// This is resilient to front-trimming by the host (e.g. OpenCode evicting
|
|
1202
|
+
// old messages) because the offset is relative to the tail.
|
|
1203
|
+
const newMessages = Math.max(0, input.messages.length - rawWindowCache!.pinnedTotalCount);
|
|
1204
|
+
const windowSize = rawWindowCache!.pinnedRawCount + newMessages;
|
|
1205
|
+
const pinnedIdx = Math.max(0, input.messages.length - windowSize);
|
|
1206
|
+
|
|
1207
|
+
// Measure the token cost of the pinned window.
|
|
1208
|
+
const pinnedWindow = input.messages.slice(pinnedIdx);
|
|
1209
|
+
const pinnedTokens = pinnedWindow.reduce(
|
|
1210
|
+
(sum, m) => sum + estimateMessage(m),
|
|
1211
|
+
0,
|
|
1119
1212
|
);
|
|
1120
1213
|
|
|
1121
|
-
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1134
|
-
|
|
1135
|
-
|
|
1136
|
-
|
|
1137
|
-
|
|
1138
|
-
|
|
1139
|
-
|
|
1140
|
-
distilledTokens: input.prefixTokens,
|
|
1141
|
-
rawTokens: pinnedTokens,
|
|
1142
|
-
totalTokens: total,
|
|
1214
|
+
// Use the budget that was in effect when the pin was created with a 15%
|
|
1215
|
+
// hysteresis margin so that small budget fluctuations from overhead drift
|
|
1216
|
+
// and deduplicateToolOutputs() token-estimate changes don't evict the pin.
|
|
1217
|
+
// The high-water mark (max of pinned and current budgets) prevents overhead
|
|
1218
|
+
// EMA drift from shrinking the effective budget below what was valid when
|
|
1219
|
+
// the pin was created — the budget shrank due to overhead drift, not because
|
|
1220
|
+
// the context limit changed.
|
|
1221
|
+
const highWaterBudget = Math.max(rawWindowCache!.pinnedBudget, input.rawBudget);
|
|
1222
|
+
const effectiveBudget = highWaterBudget * 1.15;
|
|
1223
|
+
if (pinnedTokens <= effectiveBudget) {
|
|
1224
|
+
// Pinned window still fits within the hysteresis margin of the high-water
|
|
1225
|
+
// budget. Re-pin at the current budget when the old hysteresis is exceeded
|
|
1226
|
+
// so that next turn's check uses a fresh baseline.
|
|
1227
|
+
if (pinnedTokens > rawWindowCache!.pinnedBudget * 1.15) {
|
|
1228
|
+
input.sessState.rawWindowCache = {
|
|
1229
|
+
...rawWindowCache!,
|
|
1230
|
+
pinnedRawCount: pinnedWindow.length,
|
|
1231
|
+
pinnedTotalCount: input.messages.length,
|
|
1232
|
+
pinnedBudget: input.rawBudget,
|
|
1143
1233
|
};
|
|
1144
1234
|
}
|
|
1145
|
-
//
|
|
1235
|
+
// Apply system-reminder cleanup only (strip:"none" is the layer-1 mode),
|
|
1236
|
+
// returning the same message object references wherever nothing changed.
|
|
1237
|
+
const processed = pinnedWindow.map((msg) => {
|
|
1238
|
+
const parts = cleanParts(msg.parts);
|
|
1239
|
+
return parts !== msg.parts ? { info: msg.info, parts } : msg;
|
|
1240
|
+
});
|
|
1241
|
+
const total = input.prefixTokens + pinnedTokens;
|
|
1242
|
+
return {
|
|
1243
|
+
messages: [...input.prefix, ...processed],
|
|
1244
|
+
distilledTokens: input.prefixTokens,
|
|
1245
|
+
rawTokens: pinnedTokens,
|
|
1246
|
+
totalTokens: total,
|
|
1247
|
+
};
|
|
1146
1248
|
}
|
|
1249
|
+
// Pinned window is too large for both budgets — fall through to rescan.
|
|
1250
|
+
log.info(
|
|
1251
|
+
`pin-overflow: session=${input.sessionID} pinnedTokens=${pinnedTokens} ` +
|
|
1252
|
+
`pinnedBudget=${rawWindowCache!.pinnedBudget} effectiveBudget=${Math.round(effectiveBudget)} ` +
|
|
1253
|
+
`currentRawBudget=${input.rawBudget} windowSize=${pinnedWindow.length}`,
|
|
1254
|
+
);
|
|
1147
1255
|
}
|
|
1148
1256
|
|
|
1149
1257
|
// Normal backward scan to find the tightest fitting cutoff.
|
|
@@ -1157,13 +1265,18 @@ function tryFitStable(input: {
|
|
|
1157
1265
|
});
|
|
1158
1266
|
|
|
1159
1267
|
if (result) {
|
|
1160
|
-
// Update the raw window cache: the
|
|
1161
|
-
//
|
|
1162
|
-
|
|
1163
|
-
|
|
1268
|
+
// Update the raw window cache: store the raw message count and total message
|
|
1269
|
+
// count so we can reconstruct the window position on the next turn even after
|
|
1270
|
+
// front-trimming by the host (e.g. OpenCode evicting old messages).
|
|
1271
|
+
// Snapshot the current rawBudget so future pin checks use the budget that
|
|
1272
|
+
// was in effect when this window was chosen (Option 1: snapshot isolation).
|
|
1273
|
+
const rawMessageCount = result.messages.length - input.prefix.length;
|
|
1274
|
+
if (rawMessageCount > 0) {
|
|
1164
1275
|
input.sessState.rawWindowCache = {
|
|
1165
1276
|
sessionID: input.sessionID,
|
|
1166
|
-
|
|
1277
|
+
pinnedRawCount: rawMessageCount,
|
|
1278
|
+
pinnedTotalCount: input.messages.length,
|
|
1279
|
+
pinnedBudget: input.rawBudget,
|
|
1167
1280
|
};
|
|
1168
1281
|
}
|
|
1169
1282
|
}
|
|
@@ -1200,21 +1313,27 @@ function transformInner(input: {
|
|
|
1200
1313
|
}): TransformResult {
|
|
1201
1314
|
const cfg = config();
|
|
1202
1315
|
const overhead = getOverhead();
|
|
1316
|
+
|
|
1317
|
+
// --- Session state (must precede budget computation) ---
|
|
1318
|
+
const sid = input.sessionID ?? input.messages[0]?.info.sessionID;
|
|
1319
|
+
const sessState = sid ? getSessionState(sid) : makeSessionState();
|
|
1320
|
+
|
|
1203
1321
|
// Usable = full context minus output reservation minus fixed overhead (system + tools)
|
|
1204
1322
|
// minus LTM tokens already injected into the system prompt this turn.
|
|
1323
|
+
// Read LTM tokens from per-session state to avoid cross-session contamination.
|
|
1324
|
+
const sessLtmTokens = sid ? sessState.ltmTokens : ltmTokensFallback;
|
|
1205
1325
|
const usable = Math.max(
|
|
1206
1326
|
0,
|
|
1207
|
-
contextLimit - outputReserved - overhead -
|
|
1327
|
+
contextLimit - outputReserved - overhead - sessLtmTokens,
|
|
1208
1328
|
);
|
|
1209
1329
|
const distilledBudget = Math.floor(usable * cfg.budget.distilled);
|
|
1210
|
-
|
|
1330
|
+
// Base raw budget. May be overridden below for post-idle compact mode.
|
|
1331
|
+
let rawBudget = Math.floor(usable * cfg.budget.raw);
|
|
1211
1332
|
|
|
1212
1333
|
// --- Force escalation (reactive error recovery) ---
|
|
1213
1334
|
// When the API previously rejected with "prompt is too long", skip layers
|
|
1214
1335
|
// below the forced minimum to ensure enough trimming on the next attempt.
|
|
1215
1336
|
// One-shot: consumed here and reset to 0 (both in-memory and on disk).
|
|
1216
|
-
const sid = input.sessionID ?? input.messages[0]?.info.sessionID;
|
|
1217
|
-
const sessState = sid ? getSessionState(sid) : makeSessionState();
|
|
1218
1337
|
let effectiveMinLayer = sessState.forceMinLayer;
|
|
1219
1338
|
sessState.forceMinLayer = 0;
|
|
1220
1339
|
if (sid && effectiveMinLayer > 0) saveForceMinLayer(sid, 0);
|
|
@@ -1246,17 +1365,43 @@ function transformInner(input: {
|
|
|
1246
1365
|
}
|
|
1247
1366
|
|
|
1248
1367
|
// --- Sticky layer guard (Option C) ---
|
|
1249
|
-
// After a compressed turn (layer >=
|
|
1368
|
+
// After a compressed turn (layer >= N), don't allow re-entry below N until
|
|
1250
1369
|
// the session genuinely shrinks (e.g. after compaction deletes messages).
|
|
1251
|
-
// Prevents
|
|
1252
|
-
// lastKnownInput=100K for a 50-message
|
|
1253
|
-
//
|
|
1254
|
-
//
|
|
1255
|
-
//
|
|
1370
|
+
// Prevents calibration oscillation AND layer-transition cache busts:
|
|
1371
|
+
// - 0→1→0: compressed turn stores lastKnownInput=100K for a 50-message
|
|
1372
|
+
// window, next turn's 300 raw messages produce an undercounted
|
|
1373
|
+
// expectedInput that "fits" in layer 0 but actually overflows.
|
|
1374
|
+
// - 1→2→1: layer 2 strips tool outputs (different bytes), bouncing back
|
|
1375
|
+
// to layer 1 restores them (different bytes again) → two busts.
|
|
1376
|
+
// Pinning to the *actual* last layer prevents all downward oscillation.
|
|
1256
1377
|
// Only applied when calibrated (same session, per-session state) to avoid
|
|
1257
1378
|
// affecting other sessions including worker sessions.
|
|
1258
1379
|
if (calibrated && sessState.lastLayer >= 1 && input.messages.length >= sessState.lastKnownMessageCount) {
|
|
1380
|
+
effectiveMinLayer = Math.max(effectiveMinLayer, sessState.lastLayer) as SafetyLayer;
|
|
1381
|
+
}
|
|
1382
|
+
|
|
1383
|
+
// --- Post-idle compact layer ---
|
|
1384
|
+
// When the cache just went cold (onIdleResume fired), skip layer 0 full-raw
|
|
1385
|
+
// passthrough and use a tighter raw budget. Rationale: the entire context is
|
|
1386
|
+
// a cache WRITE regardless — a smaller total costs less to write, and
|
|
1387
|
+
// aggressive idle distillation already captured older history in the prefix.
|
|
1388
|
+
// The flag is one-shot: consumed here and reset so subsequent turns use
|
|
1389
|
+
// normal budgets once the cache is warm.
|
|
1390
|
+
const postIdleCompact = sessState.postIdleCompact;
|
|
1391
|
+
if (postIdleCompact) {
|
|
1392
|
+
sessState.postIdleCompact = false;
|
|
1393
|
+
// Skip layer 0 — don't pass through all raw messages on a cold cache.
|
|
1259
1394
|
effectiveMinLayer = Math.max(effectiveMinLayer, 1) as SafetyLayer;
|
|
1395
|
+
// Use a tighter raw budget: 20% of usable instead of the normal 40%.
|
|
1396
|
+
// The distilled prefix covers the older history; the raw window only
|
|
1397
|
+
// needs the current turn + minimal recent context. This reduces the
|
|
1398
|
+
// total cold-cache write cost by up to 20% of usable (~29K tokens on
|
|
1399
|
+
// a 200K context model).
|
|
1400
|
+
rawBudget = Math.floor(usable * 0.20);
|
|
1401
|
+
log.info(
|
|
1402
|
+
`post-idle compact: session=${sid} rawBudget=${rawBudget}` +
|
|
1403
|
+
` (${Math.floor(usable * cfg.budget.raw)}→${rawBudget})`,
|
|
1404
|
+
);
|
|
1260
1405
|
}
|
|
1261
1406
|
|
|
1262
1407
|
let expectedInput: number;
|
|
@@ -1269,12 +1414,12 @@ function transformInner(input: {
|
|
|
1269
1414
|
? input.messages.filter((m) => !sessState.lastWindowMessageIDs.has(m.info.id))
|
|
1270
1415
|
: input.messages.slice(-Math.max(0, input.messages.length - sessState.lastKnownMessageCount));
|
|
1271
1416
|
const newMsgTokens = newMessages.reduce((s, m) => s + estimateMessage(m), 0);
|
|
1272
|
-
const ltmDelta =
|
|
1417
|
+
const ltmDelta = sessLtmTokens - sessState.lastKnownLtm;
|
|
1273
1418
|
expectedInput = sessState.lastKnownInput + newMsgTokens + ltmDelta;
|
|
1274
1419
|
} else {
|
|
1275
1420
|
// First turn or session change: fall back to chars/3 estimate + overhead.
|
|
1276
1421
|
const messageTokens = input.messages.reduce((s, m) => s + estimateMessage(m), 0);
|
|
1277
|
-
expectedInput = messageTokens + overhead +
|
|
1422
|
+
expectedInput = messageTokens + overhead + sessLtmTokens;
|
|
1278
1423
|
}
|
|
1279
1424
|
|
|
1280
1425
|
// When uncalibrated, apply safety multiplier to the layer-0 decision too.
|
|
@@ -1299,8 +1444,8 @@ function transformInner(input: {
|
|
|
1299
1444
|
// All messages fit — return unmodified to preserve append-only prompt-cache pattern.
|
|
1300
1445
|
// Raw messages are strictly better context than lossy distilled summaries.
|
|
1301
1446
|
const messageTokens = calibrated
|
|
1302
|
-
? expectedInput - (
|
|
1303
|
-
: expectedInput - overhead -
|
|
1447
|
+
? expectedInput - (sessLtmTokens - sessState.lastKnownLtm) // approximate raw portion
|
|
1448
|
+
: expectedInput - overhead - sessLtmTokens;
|
|
1304
1449
|
return {
|
|
1305
1450
|
messages: input.messages,
|
|
1306
1451
|
layer: 0,
|
|
@@ -1323,7 +1468,9 @@ function transformInner(input: {
|
|
|
1323
1468
|
const dedupMessages = deduplicateToolOutputs(input.messages, turnStart);
|
|
1324
1469
|
|
|
1325
1470
|
|
|
1326
|
-
const distillations = sid
|
|
1471
|
+
const distillations = sid
|
|
1472
|
+
? loadDistillationsCached(input.projectPath, sid, input.messages, sessState)
|
|
1473
|
+
: [];
|
|
1327
1474
|
|
|
1328
1475
|
// Layer 1 uses the append-only cached prefix (Approach C) to keep the
|
|
1329
1476
|
// distilled content byte-identical between distillation runs, preserving
|
|
@@ -1503,19 +1650,43 @@ export function transform(input: {
|
|
|
1503
1650
|
// result fields above so a thrown transformInner doesn't update it.
|
|
1504
1651
|
state.lastTurnAt = Date.now();
|
|
1505
1652
|
|
|
1506
|
-
// --- Cache-bust diagnostics
|
|
1653
|
+
// --- Cache-bust diagnostics ---
|
|
1507
1654
|
// Track byte-identity of the message prefix. When the prefix hash changes
|
|
1508
1655
|
// between consecutive turns, it means Anthropic's prompt cache is invalidated
|
|
1509
1656
|
// and the entire context is re-written (12.5× cache-read price). This helps
|
|
1510
1657
|
// identify which code paths are breaking byte-identity.
|
|
1511
|
-
|
|
1512
|
-
|
|
1658
|
+
//
|
|
1659
|
+
// Use a content-based fingerprint (role + text snippet) rather than message
|
|
1660
|
+
// IDs, since IDs can be unstable (gateway generates fresh UUIDs, OpenCode
|
|
1661
|
+
// may regenerate messages in-place). Content hashes are a better proxy for
|
|
1662
|
+
// Anthropic's actual byte-identity cache.
|
|
1663
|
+
const prefixFingerprint = result.messages.slice(0, 5).map((m) => {
|
|
1664
|
+
const text = m.parts
|
|
1665
|
+
.map((p) => {
|
|
1666
|
+
if (isTextPart(p)) return p.text?.slice(0, 40) ?? "";
|
|
1667
|
+
if (isReasoningPart(p)) return p.text?.slice(0, 40) ?? "";
|
|
1668
|
+
return p.type;
|
|
1669
|
+
})
|
|
1670
|
+
.join("|");
|
|
1671
|
+
return `${m.info.role}:${text.slice(0, 60)}`;
|
|
1672
|
+
}).join(",");
|
|
1673
|
+
const prefixHash = `${result.layer}:${prefixFingerprint}`;
|
|
1674
|
+
state.transformCount++;
|
|
1513
1675
|
if (state.lastPrefixHash && state.lastPrefixHash !== prefixHash) {
|
|
1676
|
+
state.bustCount++;
|
|
1677
|
+
const rate = state.bustCount / state.transformCount;
|
|
1514
1678
|
log.info(
|
|
1515
|
-
`cache-bust
|
|
1679
|
+
`cache-bust #${state.bustCount} (${(rate * 100).toFixed(0)}%): session=${sid}` +
|
|
1680
|
+
` layer=${state.lastLayer}→${result.layer}` +
|
|
1516
1681
|
` msgs=${state.lastTransformedCount}→${result.messages.length}` +
|
|
1517
1682
|
` prefix=${state.lastPrefixHash.slice(0, 30)}→${prefixHash.slice(0, 30)}`,
|
|
1518
1683
|
);
|
|
1684
|
+
if (state.transformCount >= 20 && rate > 0.5) {
|
|
1685
|
+
log.warn(
|
|
1686
|
+
`HIGH BUST RATE: session ${sid} has ${(rate * 100).toFixed(0)}% bust rate` +
|
|
1687
|
+
` (${state.bustCount}/${state.transformCount} transforms)`,
|
|
1688
|
+
);
|
|
1689
|
+
}
|
|
1519
1690
|
}
|
|
1520
1691
|
state.lastPrefixHash = prefixHash;
|
|
1521
1692
|
|