@loreai/gateway 0.13.4 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +49694 -3155
- package/package.json +14 -6
- package/src/batch-queue.ts +21 -1
- package/src/cache-analytics.ts +344 -0
- package/src/cli/agents.ts +107 -0
- package/src/cli/bin.ts +11 -0
- package/src/cli/help.ts +55 -0
- package/src/cli/lib/binary.ts +353 -0
- package/src/cli/lib/bspatch.ts +306 -0
- package/src/cli/lib/delta-upgrade.ts +790 -0
- package/src/cli/lib/errors.ts +48 -0
- package/src/cli/lib/ghcr.ts +389 -0
- package/src/cli/lib/patch-cache.ts +342 -0
- package/src/cli/lib/upgrade.ts +454 -0
- package/src/cli/lib/version-check.ts +385 -0
- package/src/cli/main.ts +152 -0
- package/src/cli/run.ts +181 -0
- package/src/cli/start.ts +82 -0
- package/src/cli/upgrade.ts +311 -0
- package/src/cli/version.ts +22 -0
- package/src/idle.ts +0 -6
- package/src/index.ts +27 -27
- package/src/llm-adapter.ts +100 -28
- package/src/pipeline.ts +254 -177
- package/src/recall.ts +223 -91
- package/src/temporal-adapter.ts +3 -0
- package/src/translate/anthropic.ts +50 -6
- package/src/translate/types.ts +54 -9
- package/dist/index.js.map +0 -7
package/src/pipeline.ts
CHANGED
|
@@ -16,7 +16,6 @@ import {
|
|
|
16
16
|
load,
|
|
17
17
|
config as loreConfig,
|
|
18
18
|
ensureProject,
|
|
19
|
-
isFirstRun,
|
|
20
19
|
temporal,
|
|
21
20
|
ltm,
|
|
22
21
|
distillation,
|
|
@@ -92,6 +91,7 @@ import {
|
|
|
92
91
|
import type { UpstreamInterceptor } from "./recorder";
|
|
93
92
|
import { startIdleScheduler, buildIdleWorkHandler } from "./idle";
|
|
94
93
|
import { getWorkerModel, resetWorkerModelState } from "./worker-model";
|
|
94
|
+
import { analyzeCacheTurn } from "./cache-analytics";
|
|
95
95
|
import {
|
|
96
96
|
RECALL_GATEWAY_TOOL,
|
|
97
97
|
RECALL_TOOL_NAME,
|
|
@@ -100,10 +100,12 @@ import {
|
|
|
100
100
|
hasRecallToolUse,
|
|
101
101
|
hasOtherToolUse,
|
|
102
102
|
clientHasRecallTool,
|
|
103
|
-
isPendingRecallValid,
|
|
104
|
-
injectPendingRecall,
|
|
105
103
|
buildRecallFollowUp,
|
|
106
|
-
|
|
104
|
+
buildRecallMarker,
|
|
105
|
+
recallStoreKey,
|
|
106
|
+
expandRecallMarkers,
|
|
107
|
+
cleanupRecallStore,
|
|
108
|
+
replaceRecallWithMarker,
|
|
107
109
|
} from "./recall";
|
|
108
110
|
|
|
109
111
|
// ---------------------------------------------------------------------------
|
|
@@ -143,6 +145,7 @@ export async function resetPipelineState(): Promise<void> {
|
|
|
143
145
|
cachedProjectPath = null;
|
|
144
146
|
sessions.clear();
|
|
145
147
|
ltmSessionCache.clear();
|
|
148
|
+
ltmPinnedText.clear();
|
|
146
149
|
// Shut down batch queue gracefully before clearing the client
|
|
147
150
|
if (llmClient && "shutdown" in llmClient) {
|
|
148
151
|
await (llmClient as LLMClient & { shutdown: () => Promise<void> }).shutdown();
|
|
@@ -175,6 +178,46 @@ const ltmSessionCache = new Map<
|
|
|
175
178
|
{ formatted: string; tokenCount: number }
|
|
176
179
|
>();
|
|
177
180
|
|
|
181
|
+
/**
|
|
182
|
+
* Pinned LTM text per session — the text currently being injected into the
|
|
183
|
+
* system prompt. When ltmSessionCache is invalidated and recomputed, we
|
|
184
|
+
* compare the new text against the pin. Only update if >5% character
|
|
185
|
+
* difference to avoid cache busts from minor BM25 re-ranking changes.
|
|
186
|
+
*/
|
|
187
|
+
const ltmPinnedText = new Map<
|
|
188
|
+
string,
|
|
189
|
+
{ formatted: string; tokenCount: number }
|
|
190
|
+
>();
|
|
191
|
+
|
|
192
|
+
/**
|
|
193
|
+
* Measure character-level difference between two strings as a ratio (0..1).
|
|
194
|
+
* Uses a simple length + common-prefix heuristic — not a full diff, but
|
|
195
|
+
* sufficient to detect "substantially the same" vs "meaningfully different".
|
|
196
|
+
*/
|
|
197
|
+
function textDiffRatio(a: string, b: string): number {
|
|
198
|
+
if (a === b) return 0;
|
|
199
|
+
if (!a || !b) return 1;
|
|
200
|
+
|
|
201
|
+
// Common prefix length
|
|
202
|
+
const minLen = Math.min(a.length, b.length);
|
|
203
|
+
const maxLen = Math.max(a.length, b.length);
|
|
204
|
+
let common = 0;
|
|
205
|
+
for (let i = 0; i < minLen; i++) {
|
|
206
|
+
if (a[i] === b[i]) common++;
|
|
207
|
+
else break;
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
// Common suffix length (non-overlapping with prefix)
|
|
211
|
+
let suffix = 0;
|
|
212
|
+
for (let i = 0; i < minLen - common; i++) {
|
|
213
|
+
if (a[a.length - 1 - i] === b[b.length - 1 - i]) suffix++;
|
|
214
|
+
else break;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
const matched = common + suffix;
|
|
218
|
+
return 1 - matched / maxLen;
|
|
219
|
+
}
|
|
220
|
+
|
|
178
221
|
/** Cached LLM client for background workers. */
|
|
179
222
|
let llmClient: LLMClient | null = null;
|
|
180
223
|
|
|
@@ -242,8 +285,6 @@ async function initIfNeeded(projectPath: string, config?: GatewayConfig): Promis
|
|
|
242
285
|
config.upstreamAnthropic,
|
|
243
286
|
() => resolveAuth(),
|
|
244
287
|
sessionModelID,
|
|
245
|
-
// onLtmInvalidated: clear the LTM session cache
|
|
246
|
-
() => ltmSessionCache.clear(),
|
|
247
288
|
);
|
|
248
289
|
stopIdleScheduler = startIdleScheduler(config, sessions, idleHandler);
|
|
249
290
|
}
|
|
@@ -298,17 +339,23 @@ function getOrCreateSession(
|
|
|
298
339
|
lastRequestTime: Date.now(),
|
|
299
340
|
messageCount: 0,
|
|
300
341
|
turnsSinceCuration: 0,
|
|
342
|
+
recallStore: new Map(),
|
|
343
|
+
cacheAnalytics: {
|
|
344
|
+
lastRequestBody: null,
|
|
345
|
+
lastRequestBodyLength: 0,
|
|
346
|
+
lastCacheRead: 0,
|
|
347
|
+
lastCacheCreation: 0,
|
|
348
|
+
turnCount: 0,
|
|
349
|
+
bustCount: 0,
|
|
350
|
+
},
|
|
301
351
|
};
|
|
302
352
|
sessions.set(sessionID, state);
|
|
303
353
|
}
|
|
304
354
|
state.lastRequestTime = Date.now();
|
|
305
355
|
|
|
306
|
-
//
|
|
307
|
-
if (
|
|
308
|
-
|
|
309
|
-
`lazy cleanup: discarding expired pending recall for session ${sessionID.slice(0, 16)}`,
|
|
310
|
-
);
|
|
311
|
-
state.pendingRecall = undefined;
|
|
356
|
+
// Ensure recallStore exists (upgrade from older session state)
|
|
357
|
+
if (!state.recallStore) {
|
|
358
|
+
state.recallStore = new Map();
|
|
312
359
|
}
|
|
313
360
|
|
|
314
361
|
return state;
|
|
@@ -369,6 +416,13 @@ async function identifySession(
|
|
|
369
416
|
// Upstream forwarding
|
|
370
417
|
// ---------------------------------------------------------------------------
|
|
371
418
|
|
|
419
|
+
/** Result from forwardToUpstream — includes the serialized body for cache analytics. */
|
|
420
|
+
type UpstreamResult = {
|
|
421
|
+
response: Response;
|
|
422
|
+
/** The serialized JSON body sent to the upstream provider. */
|
|
423
|
+
serializedBody: string;
|
|
424
|
+
};
|
|
425
|
+
|
|
372
426
|
/**
|
|
373
427
|
* Forward a request to the upstream provider (Anthropic or OpenAI).
|
|
374
428
|
*
|
|
@@ -376,14 +430,15 @@ async function identifySession(
|
|
|
376
430
|
* interceptor is called instead of `fetch` directly. This enables recording
|
|
377
431
|
* and replay without modifying individual call sites.
|
|
378
432
|
*
|
|
379
|
-
* Returns the raw fetch Response
|
|
433
|
+
* Returns the raw fetch Response alongside the serialized request body
|
|
434
|
+
* (for cache analytics prefix comparison).
|
|
380
435
|
*/
|
|
381
436
|
async function forwardToUpstream(
|
|
382
437
|
req: GatewayRequest,
|
|
383
438
|
config: GatewayConfig,
|
|
384
439
|
interceptor?: UpstreamInterceptor,
|
|
385
440
|
cache?: AnthropicCacheOptions,
|
|
386
|
-
): Promise<
|
|
441
|
+
): Promise<UpstreamResult> {
|
|
387
442
|
let url: string;
|
|
388
443
|
let headers: Record<string, string>;
|
|
389
444
|
let body: unknown;
|
|
@@ -405,10 +460,11 @@ async function forwardToUpstream(
|
|
|
405
460
|
body = result.body;
|
|
406
461
|
}
|
|
407
462
|
|
|
463
|
+
const serializedBody = JSON.stringify(body);
|
|
408
464
|
const effectiveInterceptor = interceptor ?? activeInterceptor;
|
|
409
465
|
|
|
410
466
|
if (effectiveInterceptor) {
|
|
411
|
-
|
|
467
|
+
const response = await effectiveInterceptor(
|
|
412
468
|
body,
|
|
413
469
|
req.model,
|
|
414
470
|
req.stream,
|
|
@@ -416,16 +472,18 @@ async function forwardToUpstream(
|
|
|
416
472
|
fetch(url, {
|
|
417
473
|
method: "POST",
|
|
418
474
|
headers,
|
|
419
|
-
body:
|
|
475
|
+
body: serializedBody,
|
|
420
476
|
}),
|
|
421
477
|
);
|
|
478
|
+
return { response, serializedBody };
|
|
422
479
|
}
|
|
423
480
|
|
|
424
|
-
|
|
481
|
+
const response = await fetch(url, {
|
|
425
482
|
method: "POST",
|
|
426
483
|
headers,
|
|
427
|
-
body:
|
|
484
|
+
body: serializedBody,
|
|
428
485
|
});
|
|
486
|
+
return { response, serializedBody };
|
|
429
487
|
}
|
|
430
488
|
|
|
431
489
|
// ---------------------------------------------------------------------------
|
|
@@ -483,44 +541,46 @@ function buildStreamingResponse(
|
|
|
483
541
|
recallContext.sessionState.sessionID,
|
|
484
542
|
);
|
|
485
543
|
|
|
544
|
+
const scope = input.scope ?? "all";
|
|
545
|
+
|
|
546
|
+
// Store recall result for marker round-trip expansion
|
|
547
|
+
const storeKey = recallStoreKey(input.query, scope);
|
|
548
|
+
const position = resp.content.indexOf(recallBlock);
|
|
549
|
+
recallContext.sessionState.recallStore.set(storeKey, {
|
|
550
|
+
toolUseId: recallBlock.id,
|
|
551
|
+
input,
|
|
552
|
+
position,
|
|
553
|
+
result,
|
|
554
|
+
});
|
|
555
|
+
|
|
556
|
+
// Emit marker text block in place of the suppressed recall block
|
|
557
|
+
const markerText = buildRecallMarker(input.query, scope);
|
|
558
|
+
const markerIdx = recallAccum.clientBlockCount();
|
|
559
|
+
const syntheticMarker = [
|
|
560
|
+
formatSSEEvent("content_block_start", JSON.stringify({
|
|
561
|
+
type: "content_block_start",
|
|
562
|
+
index: markerIdx,
|
|
563
|
+
content_block: { type: "text", text: "" },
|
|
564
|
+
})),
|
|
565
|
+
formatSSEEvent("content_block_delta", JSON.stringify({
|
|
566
|
+
type: "content_block_delta",
|
|
567
|
+
index: markerIdx,
|
|
568
|
+
delta: { type: "text_delta", text: markerText },
|
|
569
|
+
})),
|
|
570
|
+
formatSSEEvent("content_block_stop", JSON.stringify({
|
|
571
|
+
type: "content_block_stop",
|
|
572
|
+
index: markerIdx,
|
|
573
|
+
})),
|
|
574
|
+
].join("");
|
|
575
|
+
controller.enqueue(encoder.encode(syntheticMarker));
|
|
576
|
+
|
|
486
577
|
if (recallAccum.hasOtherTools()) {
|
|
487
|
-
//
|
|
488
|
-
const position = resp.content.indexOf(recallBlock);
|
|
489
|
-
recallContext.sessionState.pendingRecall = {
|
|
490
|
-
toolUseId: recallBlock.id,
|
|
491
|
-
input,
|
|
492
|
-
position,
|
|
493
|
-
result,
|
|
494
|
-
timestamp: Date.now(),
|
|
495
|
-
};
|
|
578
|
+
// Forward held-back events, close stream
|
|
496
579
|
log.info(
|
|
497
|
-
`recall (stream, mixed): stored
|
|
580
|
+
`recall (stream, mixed): stored result for session ` +
|
|
498
581
|
`${recallContext.sessionState.sessionID.slice(0, 16)}`,
|
|
499
582
|
);
|
|
500
583
|
|
|
501
|
-
// Emit a synthetic "[Searching memory...]" text block after all
|
|
502
|
-
// other tool blocks. The accumulator already re-indexed other
|
|
503
|
-
// tools to fill the gap, so this goes at clientBlockCount.
|
|
504
|
-
const searchingIdx = recallAccum.clientBlockCount();
|
|
505
|
-
const syntheticCase2 = [
|
|
506
|
-
formatSSEEvent("content_block_start", JSON.stringify({
|
|
507
|
-
type: "content_block_start",
|
|
508
|
-
index: searchingIdx,
|
|
509
|
-
content_block: { type: "text", text: "" },
|
|
510
|
-
})),
|
|
511
|
-
formatSSEEvent("content_block_delta", JSON.stringify({
|
|
512
|
-
type: "content_block_delta",
|
|
513
|
-
index: searchingIdx,
|
|
514
|
-
delta: { type: "text_delta", text: "\n\n[Searching memory...]" },
|
|
515
|
-
})),
|
|
516
|
-
formatSSEEvent("content_block_stop", JSON.stringify({
|
|
517
|
-
type: "content_block_stop",
|
|
518
|
-
index: searchingIdx,
|
|
519
|
-
})),
|
|
520
|
-
].join("");
|
|
521
|
-
controller.enqueue(encoder.encode(syntheticCase2));
|
|
522
|
-
|
|
523
|
-
// Forward the held-back message_delta + message_stop
|
|
524
584
|
const heldBack = recallAccum.heldBackEvents();
|
|
525
585
|
if (heldBack) {
|
|
526
586
|
controller.enqueue(encoder.encode(heldBack));
|
|
@@ -528,51 +588,50 @@ function buildStreamingResponse(
|
|
|
528
588
|
|
|
529
589
|
controller.close();
|
|
530
590
|
|
|
531
|
-
// Post-stream:
|
|
532
|
-
const
|
|
533
|
-
onComplete(
|
|
591
|
+
// Post-stream: store response with marker text (not raw tool_use)
|
|
592
|
+
const markerResp = replaceRecallWithMarker(resp);
|
|
593
|
+
onComplete(markerResp);
|
|
534
594
|
return;
|
|
535
595
|
}
|
|
536
596
|
|
|
537
|
-
//
|
|
597
|
+
// Recall-only — send follow-up, pipe continuation
|
|
538
598
|
log.info(
|
|
539
599
|
`recall (stream, only): executing follow-up for session ` +
|
|
540
600
|
`${recallContext.sessionState.sessionID.slice(0, 16)}`,
|
|
541
601
|
);
|
|
542
602
|
|
|
543
|
-
// Emit a synthetic "[Searching memory...]" text block at the
|
|
544
|
-
// suppressed recall index so the client sees a natural indicator
|
|
545
|
-
// during the pause while the recall executes.
|
|
546
|
-
const searchingIndex = recallAccum.clientBlockCount();
|
|
547
|
-
const syntheticBlock = [
|
|
548
|
-
formatSSEEvent("content_block_start", JSON.stringify({
|
|
549
|
-
type: "content_block_start",
|
|
550
|
-
index: searchingIndex,
|
|
551
|
-
content_block: { type: "text", text: "" },
|
|
552
|
-
})),
|
|
553
|
-
formatSSEEvent("content_block_delta", JSON.stringify({
|
|
554
|
-
type: "content_block_delta",
|
|
555
|
-
index: searchingIndex,
|
|
556
|
-
delta: { type: "text_delta", text: "\n\n[Searching memory...]" },
|
|
557
|
-
})),
|
|
558
|
-
formatSSEEvent("content_block_stop", JSON.stringify({
|
|
559
|
-
type: "content_block_stop",
|
|
560
|
-
index: searchingIndex,
|
|
561
|
-
})),
|
|
562
|
-
].join("");
|
|
563
|
-
controller.enqueue(encoder.encode(syntheticBlock));
|
|
564
|
-
|
|
565
603
|
const followUp = buildRecallFollowUp(
|
|
566
604
|
recallContext.modifiedReq,
|
|
567
605
|
resp,
|
|
568
606
|
result,
|
|
569
607
|
recallBlock,
|
|
570
608
|
);
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
609
|
+
let followUpResponse: Response;
|
|
610
|
+
try {
|
|
611
|
+
({ response: followUpResponse } = await forwardToUpstream(
|
|
612
|
+
followUp,
|
|
613
|
+
recallContext.config,
|
|
614
|
+
undefined,
|
|
615
|
+
recallContext.cacheOptions,
|
|
616
|
+
));
|
|
617
|
+
} catch (fetchErr) {
|
|
618
|
+
log.error(
|
|
619
|
+
`recall follow-up fetch error for session ${recallContext.sessionState.sessionID.slice(0, 16)}:`,
|
|
620
|
+
fetchErr,
|
|
621
|
+
);
|
|
622
|
+
const heldBack = recallAccum.heldBackEvents();
|
|
623
|
+
if (heldBack) {
|
|
624
|
+
controller.enqueue(encoder.encode(heldBack));
|
|
625
|
+
}
|
|
626
|
+
controller.close();
|
|
627
|
+
const markerResp = replaceRecallWithMarker(resp);
|
|
628
|
+
onComplete(markerResp);
|
|
629
|
+
return;
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
log.info(
|
|
633
|
+
`recall follow-up response: status=${followUpResponse.status} ` +
|
|
634
|
+
`hasBody=${!!followUpResponse.body} session=${recallContext.sessionState.sessionID.slice(0, 16)}`,
|
|
576
635
|
);
|
|
577
636
|
|
|
578
637
|
if (!followUpResponse.ok) {
|
|
@@ -586,22 +645,21 @@ function buildStreamingResponse(
|
|
|
586
645
|
controller.enqueue(encoder.encode(heldBack));
|
|
587
646
|
}
|
|
588
647
|
controller.close();
|
|
589
|
-
const
|
|
590
|
-
onComplete(
|
|
648
|
+
const markerResp = replaceRecallWithMarker(resp);
|
|
649
|
+
onComplete(markerResp);
|
|
591
650
|
return;
|
|
592
651
|
}
|
|
593
652
|
|
|
594
653
|
// Pipe the continuation stream into the same HTTP response.
|
|
595
654
|
// Suppress message_start (client already has one) and re-index
|
|
596
655
|
// content blocks to continue from where the client left off.
|
|
597
|
-
// +1 accounts for the synthetic
|
|
598
|
-
// Use clientBlockCount (not recallBlockIndex) — this is the number
|
|
599
|
-
// of blocks the client has already seen, so continuation blocks
|
|
600
|
-
// start at clientBlockCount + 1 (for the synthetic block).
|
|
656
|
+
// +1 accounts for the synthetic marker block.
|
|
601
657
|
const blockOffset = recallAccum.clientBlockCount() + 1;
|
|
602
658
|
const contReader = followUpResponse.body!.getReader();
|
|
659
|
+
let contEventCount = 0;
|
|
603
660
|
|
|
604
661
|
for await (const { event: contEvent, data: contData } of parseSSEStream(contReader)) {
|
|
662
|
+
contEventCount++;
|
|
605
663
|
if (contEvent === "message_start") {
|
|
606
664
|
// Suppress — client already received one
|
|
607
665
|
continue;
|
|
@@ -634,19 +692,18 @@ function buildStreamingResponse(
|
|
|
634
692
|
controller.enqueue(encoder.encode(forwarded));
|
|
635
693
|
}
|
|
636
694
|
|
|
695
|
+
log.info(
|
|
696
|
+
`recall follow-up stream complete: ${contEventCount} events piped, ` +
|
|
697
|
+
`session=${recallContext.sessionState.sessionID.slice(0, 16)}`,
|
|
698
|
+
);
|
|
699
|
+
|
|
637
700
|
controller.close();
|
|
638
701
|
|
|
639
|
-
// Post-stream:
|
|
640
|
-
//
|
|
641
|
-
//
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
// response's pre-recall content + continuation's content.
|
|
645
|
-
// For now, call onComplete with the original response so at least
|
|
646
|
-
// the pre-recall content is stored. The continuation's text is
|
|
647
|
-
// visible to the client but not separately stored — acceptable
|
|
648
|
-
// since temporal storage captures the full conversation on next turn.
|
|
649
|
-
onComplete(resp);
|
|
702
|
+
// Post-stream: store response with marker text for temporal storage.
|
|
703
|
+
// The marker replaces the raw tool_use, so future turns can
|
|
704
|
+
// round-trip the marker ↔ tool_use/tool_result correctly.
|
|
705
|
+
const markerResp = replaceRecallWithMarker(resp);
|
|
706
|
+
onComplete(markerResp);
|
|
650
707
|
return;
|
|
651
708
|
}
|
|
652
709
|
}
|
|
@@ -795,6 +852,8 @@ function postResponse(
|
|
|
795
852
|
resp: GatewayResponse,
|
|
796
853
|
sessionState: SessionState,
|
|
797
854
|
config: GatewayConfig,
|
|
855
|
+
/** Serialized JSON body sent upstream — for cache prefix comparison. */
|
|
856
|
+
requestBody?: string,
|
|
798
857
|
): void {
|
|
799
858
|
const { sessionID, projectPath } = sessionState;
|
|
800
859
|
|
|
@@ -810,6 +869,11 @@ function postResponse(
|
|
|
810
869
|
getLastTransformedCount(sessionID),
|
|
811
870
|
);
|
|
812
871
|
|
|
872
|
+
// --- Cache analytics ---
|
|
873
|
+
if (requestBody) {
|
|
874
|
+
analyzeCacheTurn(sessionState.cacheAnalytics, requestBody, resp.usage, sessionID);
|
|
875
|
+
}
|
|
876
|
+
|
|
813
877
|
// --- Temporal storage ---
|
|
814
878
|
// Store all messages (user + assistant) from this turn.
|
|
815
879
|
// Convert gateway messages to Lore format.
|
|
@@ -1010,7 +1074,7 @@ async function handlePassthrough(
|
|
|
1010
1074
|
req: GatewayRequest,
|
|
1011
1075
|
config: GatewayConfig,
|
|
1012
1076
|
): Promise<Response> {
|
|
1013
|
-
const upstreamResponse = await forwardToUpstream(req, config);
|
|
1077
|
+
const { response: upstreamResponse } = await forwardToUpstream(req, config);
|
|
1014
1078
|
|
|
1015
1079
|
// For streaming, pipe through unchanged
|
|
1016
1080
|
if (req.stream && upstreamResponse.body) {
|
|
@@ -1079,25 +1143,18 @@ async function handleConversationTurn(
|
|
|
1079
1143
|
// Track session model for worker model discovery
|
|
1080
1144
|
lastSeenSessionModel = req.model;
|
|
1081
1145
|
|
|
1082
|
-
// ---
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
)
|
|
1090
|
-
} else {
|
|
1091
|
-
log.warn(
|
|
1092
|
-
`failed to inject pending recall — conversation structure mismatch`,
|
|
1093
|
-
);
|
|
1094
|
-
}
|
|
1095
|
-
} else {
|
|
1096
|
-
log.warn(
|
|
1097
|
-
`discarding expired pending recall for session ${sessionID.slice(0, 16)}`,
|
|
1146
|
+
// --- Expand recall markers from previous turns ---
|
|
1147
|
+
// Scan all assistant messages for marker text blocks and restore them
|
|
1148
|
+
// to tool_use + tool_result pairs before forwarding upstream.
|
|
1149
|
+
if (sessionState.recallStore.size > 0) {
|
|
1150
|
+
const expanded = expandRecallMarkers(req, sessionState.recallStore);
|
|
1151
|
+
if (expanded) {
|
|
1152
|
+
log.info(
|
|
1153
|
+
`expanded recall markers for session ${sessionID.slice(0, 16)}`,
|
|
1098
1154
|
);
|
|
1099
1155
|
}
|
|
1100
|
-
|
|
1156
|
+
// Clean up orphaned store entries (markers evicted by gradient)
|
|
1157
|
+
cleanupRecallStore(req, sessionState.recallStore);
|
|
1101
1158
|
}
|
|
1102
1159
|
|
|
1103
1160
|
log.info(
|
|
@@ -1130,8 +1187,8 @@ async function handleConversationTurn(
|
|
|
1130
1187
|
);
|
|
1131
1188
|
}
|
|
1132
1189
|
|
|
1133
|
-
// --- 6. LTM injection
|
|
1134
|
-
let
|
|
1190
|
+
// --- 6. LTM injection (kept separate from host system prompt for caching) ---
|
|
1191
|
+
let ltmText: string | undefined;
|
|
1135
1192
|
if (cfg.knowledge.enabled) {
|
|
1136
1193
|
try {
|
|
1137
1194
|
let cached = ltmSessionCache.get(sessionID);
|
|
@@ -1159,8 +1216,21 @@ async function handleConversationTurn(
|
|
|
1159
1216
|
}
|
|
1160
1217
|
|
|
1161
1218
|
if (cached) {
|
|
1162
|
-
|
|
1163
|
-
|
|
1219
|
+
// Content-diff pinning: only update the injected LTM text if the
|
|
1220
|
+
// new content differs by >5% from what's currently pinned. This
|
|
1221
|
+
// prevents cache busts from minor BM25 re-ranking after background
|
|
1222
|
+
// curation/consolidation invalidates the LTM cache.
|
|
1223
|
+
const pinned = ltmPinnedText.get(sessionID);
|
|
1224
|
+
if (pinned && textDiffRatio(pinned.formatted, cached.formatted) < 0.05) {
|
|
1225
|
+
// Near-identical — keep the pinned text to preserve cache prefix
|
|
1226
|
+
ltmText = pinned.formatted;
|
|
1227
|
+
setLtmTokens(pinned.tokenCount, sessionID);
|
|
1228
|
+
} else {
|
|
1229
|
+
// Substantially different or first injection — pin the new text
|
|
1230
|
+
ltmPinnedText.set(sessionID, cached);
|
|
1231
|
+
ltmText = cached.formatted;
|
|
1232
|
+
setLtmTokens(cached.tokenCount, sessionID);
|
|
1233
|
+
}
|
|
1164
1234
|
} else {
|
|
1165
1235
|
setLtmTokens(0, sessionID);
|
|
1166
1236
|
}
|
|
@@ -1175,25 +1245,6 @@ async function handleConversationTurn(
|
|
|
1175
1245
|
consumeCameOutOfIdle(sessionID);
|
|
1176
1246
|
}
|
|
1177
1247
|
|
|
1178
|
-
// First-run greeting
|
|
1179
|
-
if (isFirstRun()) {
|
|
1180
|
-
modifiedSystem +=
|
|
1181
|
-
"\n\n[Lore plugin] This is the first time Lore has been activated. " +
|
|
1182
|
-
"Briefly let the user know that Lore is now active and their " +
|
|
1183
|
-
"coding agent will get progressively smarter on this codebase " +
|
|
1184
|
-
"over time as knowledge accumulates across sessions.";
|
|
1185
|
-
}
|
|
1186
|
-
|
|
1187
|
-
// Lore knowledge file commit reminder
|
|
1188
|
-
if (cfg.knowledge.enabled) {
|
|
1189
|
-
const filesToTrack = [".lore.md"];
|
|
1190
|
-
if (cfg.agentsFile.enabled) filesToTrack.push(cfg.agentsFile.path);
|
|
1191
|
-
modifiedSystem +=
|
|
1192
|
-
`\n\nWhen making git commits, always check if ${filesToTrack.join(" and ")} ` +
|
|
1193
|
-
`have unstaged changes and include them in the commit. These files contain ` +
|
|
1194
|
-
`shared project knowledge managed by lore and must be version-controlled.`;
|
|
1195
|
-
}
|
|
1196
|
-
|
|
1197
1248
|
// --- 7. Gradient transform on messages ---
|
|
1198
1249
|
const loreMessages = gatewayMessagesToLore(req.messages, sessionID);
|
|
1199
1250
|
resolveToolResults(loreMessages);
|
|
@@ -1225,34 +1276,54 @@ async function handleConversationTurn(
|
|
|
1225
1276
|
|
|
1226
1277
|
const modifiedReq: GatewayRequest = {
|
|
1227
1278
|
...req,
|
|
1228
|
-
system
|
|
1279
|
+
// Host system prompt is passed through unmodified — LTM is injected
|
|
1280
|
+
// as a separate system block via cache options for prefix stability.
|
|
1229
1281
|
messages: transformedMessages,
|
|
1230
1282
|
};
|
|
1231
1283
|
|
|
1232
|
-
// --- 8b. Inject recall tool ---
|
|
1284
|
+
// --- 8b. Inject recall tool (with git reminder appended to description) ---
|
|
1233
1285
|
// Only inject if the client doesn't already have a recall tool (e.g. from
|
|
1234
1286
|
// a host plugin like OpenCode) and the request has other tools (so it's a
|
|
1235
1287
|
// coding agent, not a bare chat).
|
|
1236
1288
|
if (modifiedReq.tools.length > 0 && !clientHasRecallTool(modifiedReq.tools)) {
|
|
1237
|
-
|
|
1289
|
+
// Build the recall tool with git reminder baked into its description.
|
|
1290
|
+
// This keeps the reminder in the stable tools prefix (1h cache) rather
|
|
1291
|
+
// than the volatile system prompt.
|
|
1292
|
+
const recallTool = cfg.knowledge.enabled
|
|
1293
|
+
? {
|
|
1294
|
+
...RECALL_GATEWAY_TOOL,
|
|
1295
|
+
description:
|
|
1296
|
+
RECALL_GATEWAY_TOOL.description +
|
|
1297
|
+
"\n\nWhen making git commits, always check if .lore.md " +
|
|
1298
|
+
"has unstaged changes and include it in the commit. " +
|
|
1299
|
+
"This file contains shared project knowledge managed " +
|
|
1300
|
+
"by lore and must be version-controlled.",
|
|
1301
|
+
}
|
|
1302
|
+
: RECALL_GATEWAY_TOOL;
|
|
1303
|
+
modifiedReq.tools = [...modifiedReq.tools, recallTool];
|
|
1238
1304
|
}
|
|
1239
1305
|
|
|
1240
1306
|
// --- 9. Forward to upstream ---
|
|
1241
|
-
// Enable prompt caching for conversation turns:
|
|
1242
|
-
// - System prompt:
|
|
1243
|
-
// -
|
|
1307
|
+
// Enable prompt caching for conversation turns with layered breakpoints:
|
|
1308
|
+
// - System prompt: 1h TTL (host prompt is very stable within a session)
|
|
1309
|
+
// - LTM: separate system block (no breakpoint, benefits from prefix)
|
|
1310
|
+
// - Tools: 1h TTL on last tool (recall + git reminder are static)
|
|
1311
|
+
// - Conversation: 5m TTL on last message block
|
|
1244
1312
|
// Title/summary passthrough (handlePassthrough) never reaches here — it
|
|
1245
1313
|
// forwards the raw request without buildAnthropicRequest, so no caching.
|
|
1246
1314
|
const cacheOptions: AnthropicCacheOptions = {
|
|
1247
|
-
systemTTL: "
|
|
1315
|
+
systemTTL: "1h",
|
|
1316
|
+
ltmSystem: ltmText,
|
|
1317
|
+
cacheTools: true,
|
|
1248
1318
|
cacheConversation: true,
|
|
1249
1319
|
};
|
|
1250
|
-
const upstreamResponse
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
|
|
1254
|
-
|
|
1255
|
-
|
|
1320
|
+
const { response: upstreamResponse, serializedBody: requestBody } =
|
|
1321
|
+
await forwardToUpstream(
|
|
1322
|
+
modifiedReq,
|
|
1323
|
+
config,
|
|
1324
|
+
undefined,
|
|
1325
|
+
cacheOptions,
|
|
1326
|
+
);
|
|
1256
1327
|
|
|
1257
1328
|
if (!upstreamResponse.ok) {
|
|
1258
1329
|
const errorBody = await upstreamResponse.text();
|
|
@@ -1273,7 +1344,7 @@ async function handleConversationTurn(
|
|
|
1273
1344
|
);
|
|
1274
1345
|
return buildStreamingResponse(
|
|
1275
1346
|
upstreamResponse,
|
|
1276
|
-
(resp) => postResponse(req, resp, sessionState, config),
|
|
1347
|
+
(resp) => postResponse(req, resp, sessionState, config, requestBody),
|
|
1277
1348
|
hasRecallTool
|
|
1278
1349
|
? { modifiedReq, config, sessionState, cacheOptions }
|
|
1279
1350
|
: undefined,
|
|
@@ -1292,46 +1363,49 @@ async function handleConversationTurn(
|
|
|
1292
1363
|
sessionState.sessionID,
|
|
1293
1364
|
);
|
|
1294
1365
|
|
|
1366
|
+
// Store recall result for marker round-trip expansion
|
|
1367
|
+
const storeKey = recallStoreKey(input.query, input.scope ?? "all");
|
|
1368
|
+
const position = resp.content.indexOf(recallBlock);
|
|
1369
|
+
sessionState.recallStore.set(storeKey, {
|
|
1370
|
+
toolUseId: recallBlock.id,
|
|
1371
|
+
input,
|
|
1372
|
+
position,
|
|
1373
|
+
result,
|
|
1374
|
+
});
|
|
1375
|
+
|
|
1376
|
+
// Replace recall tool_use with marker text in the response
|
|
1377
|
+
const markerResp = replaceRecallWithMarker(resp);
|
|
1378
|
+
|
|
1295
1379
|
if (hasOtherToolUse(resp)) {
|
|
1296
|
-
//
|
|
1297
|
-
const position = resp.content.indexOf(recallBlock);
|
|
1298
|
-
sessionState.pendingRecall = {
|
|
1299
|
-
toolUseId: recallBlock.id,
|
|
1300
|
-
input,
|
|
1301
|
-
position,
|
|
1302
|
-
result,
|
|
1303
|
-
timestamp: Date.now(),
|
|
1304
|
-
};
|
|
1380
|
+
// Mixed tools — return response with marker replacing recall tool_use
|
|
1305
1381
|
log.info(
|
|
1306
|
-
`recall (non-stream, mixed): stored
|
|
1382
|
+
`recall (non-stream, mixed): stored result for session ${sessionState.sessionID.slice(0, 16)}`,
|
|
1307
1383
|
);
|
|
1308
|
-
|
|
1309
|
-
|
|
1310
|
-
return nonStreamHttpResponse(cleanResp);
|
|
1384
|
+
postResponse(req, markerResp, sessionState, config, requestBody);
|
|
1385
|
+
return nonStreamHttpResponse(markerResp);
|
|
1311
1386
|
}
|
|
1312
1387
|
|
|
1313
|
-
//
|
|
1388
|
+
// Recall-only — send follow-up request for seamless UX
|
|
1314
1389
|
log.info(
|
|
1315
1390
|
`recall (non-stream, only): executing follow-up for session ${sessionState.sessionID.slice(0, 16)}`,
|
|
1316
1391
|
);
|
|
1317
1392
|
const followUp = buildRecallFollowUp(modifiedReq, resp, result, recallBlock);
|
|
1318
|
-
|
|
1319
|
-
|
|
1393
|
+
let followUpResponse: Response;
|
|
1394
|
+
({ response: followUpResponse } = await forwardToUpstream(
|
|
1320
1395
|
followUp,
|
|
1321
1396
|
config,
|
|
1322
1397
|
undefined,
|
|
1323
1398
|
cacheOptions,
|
|
1324
|
-
);
|
|
1399
|
+
));
|
|
1325
1400
|
|
|
1326
1401
|
if (!followUpResponse.ok) {
|
|
1327
1402
|
const errorBody = await followUpResponse.text();
|
|
1328
1403
|
log.error(
|
|
1329
1404
|
`recall follow-up upstream error: ${followUpResponse.status} ${errorBody.slice(0, 500)}`,
|
|
1330
1405
|
);
|
|
1331
|
-
// Fall back to
|
|
1332
|
-
|
|
1333
|
-
|
|
1334
|
-
return nonStreamHttpResponse(cleanResp);
|
|
1406
|
+
// Fall back to response with marker (no continuation)
|
|
1407
|
+
postResponse(req, markerResp, sessionState, config, requestBody);
|
|
1408
|
+
return nonStreamHttpResponse(markerResp);
|
|
1335
1409
|
}
|
|
1336
1410
|
|
|
1337
1411
|
const continuationResp = await accumulateNonStreamResponse(followUpResponse);
|
|
@@ -1350,11 +1424,11 @@ async function handleConversationTurn(
|
|
|
1350
1424
|
resp.usage.cacheCreationInputTokens;
|
|
1351
1425
|
}
|
|
1352
1426
|
|
|
1353
|
-
postResponse(req, continuationResp, sessionState, config);
|
|
1427
|
+
postResponse(req, continuationResp, sessionState, config, requestBody);
|
|
1354
1428
|
return nonStreamHttpResponse(continuationResp);
|
|
1355
1429
|
}
|
|
1356
1430
|
|
|
1357
|
-
postResponse(req, resp, sessionState, config);
|
|
1431
|
+
postResponse(req, resp, sessionState, config, requestBody);
|
|
1358
1432
|
return nonStreamHttpResponse(resp);
|
|
1359
1433
|
}
|
|
1360
1434
|
|
|
@@ -1417,6 +1491,9 @@ export function loreMessagesToGateway(
|
|
|
1417
1491
|
content.push({
|
|
1418
1492
|
type: "thinking",
|
|
1419
1493
|
thinking: (part as { text: string }).text ?? "",
|
|
1494
|
+
...((part as { signature?: string }).signature != null
|
|
1495
|
+
? { signature: (part as { signature?: string }).signature }
|
|
1496
|
+
: undefined),
|
|
1420
1497
|
});
|
|
1421
1498
|
break;
|
|
1422
1499
|
case "tool": {
|