@loreai/gateway 0.13.3 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +49694 -3155
- package/package.json +14 -6
- package/src/batch-queue.ts +21 -1
- package/src/cache-analytics.ts +344 -0
- package/src/cli/agents.ts +107 -0
- package/src/cli/bin.ts +11 -0
- package/src/cli/help.ts +55 -0
- package/src/cli/lib/binary.ts +353 -0
- package/src/cli/lib/bspatch.ts +306 -0
- package/src/cli/lib/delta-upgrade.ts +790 -0
- package/src/cli/lib/errors.ts +48 -0
- package/src/cli/lib/ghcr.ts +389 -0
- package/src/cli/lib/patch-cache.ts +342 -0
- package/src/cli/lib/upgrade.ts +454 -0
- package/src/cli/lib/version-check.ts +385 -0
- package/src/cli/main.ts +152 -0
- package/src/cli/run.ts +181 -0
- package/src/cli/start.ts +82 -0
- package/src/cli/upgrade.ts +311 -0
- package/src/cli/version.ts +22 -0
- package/src/idle.ts +0 -6
- package/src/index.ts +27 -27
- package/src/llm-adapter.ts +100 -28
- package/src/pipeline.ts +254 -177
- package/src/recall.ts +223 -91
- package/src/temporal-adapter.ts +3 -0
- package/src/translate/anthropic.ts +50 -6
- package/src/translate/types.ts +54 -9
- package/dist/index.js.map +0 -7
package/src/recall.ts
CHANGED
|
@@ -1,15 +1,19 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Gateway recall interception — transparent memory search for any client.
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
* transparently. Two strategies based on whether recall is the only tool:
|
|
4
|
+
* Uses a unified "Marker and Expand" strategy:
|
|
6
5
|
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
10
|
-
*
|
|
11
|
-
*
|
|
12
|
-
*
|
|
6
|
+
* 1. **On response (to client):** The recall `tool_use` block is replaced
|
|
7
|
+
* with a human-readable marker text block
|
|
8
|
+
* (`📚 Searching <scope> for "<query>"…`). The recall is executed
|
|
9
|
+
* internally and the result is stored in session state.
|
|
10
|
+
*
|
|
11
|
+
* 2. **On request (from client):** Marker text blocks in the conversation
|
|
12
|
+
* are expanded back into the original `tool_use` + `tool_result` pairs
|
|
13
|
+
* before forwarding upstream.
|
|
14
|
+
*
|
|
15
|
+
* For recall-only responses, a follow-up call is still made internally
|
|
16
|
+
* so the model can continue in the same HTTP response (seamless UX).
|
|
13
17
|
*
|
|
14
18
|
* All recall execution delegates to `runRecall()` from `@loreai/core`.
|
|
15
19
|
*/
|
|
@@ -28,7 +32,7 @@ import type {
|
|
|
28
32
|
GatewayResponse,
|
|
29
33
|
GatewayToolUseBlock,
|
|
30
34
|
GatewayMessage,
|
|
31
|
-
|
|
35
|
+
RecallStore,
|
|
32
36
|
} from "./translate/types";
|
|
33
37
|
|
|
34
38
|
// ---------------------------------------------------------------------------
|
|
@@ -59,15 +63,205 @@ export const RECALL_GATEWAY_TOOL: GatewayTool = {
|
|
|
59
63
|
export const RECALL_TOOL_NAME = "recall";
|
|
60
64
|
|
|
61
65
|
// ---------------------------------------------------------------------------
|
|
62
|
-
//
|
|
66
|
+
// Marker utilities — human-readable text ↔ recall tool round-trip
|
|
63
67
|
// ---------------------------------------------------------------------------
|
|
64
68
|
|
|
65
|
-
/**
|
|
66
|
-
const
|
|
69
|
+
/** Scope → human-readable label for marker text. */
|
|
70
|
+
const SCOPE_LABELS: Record<string, string> = {
|
|
71
|
+
all: "all archives",
|
|
72
|
+
session: "session history",
|
|
73
|
+
project: "project archives",
|
|
74
|
+
knowledge: "knowledge base",
|
|
75
|
+
};
|
|
76
|
+
|
|
77
|
+
/** Reverse: label → scope enum. */
|
|
78
|
+
const LABEL_TO_SCOPE: Record<string, RecallScope> = Object.fromEntries(
|
|
79
|
+
Object.entries(SCOPE_LABELS).map(([k, v]) => [v, k as RecallScope]),
|
|
80
|
+
);
|
|
81
|
+
|
|
82
|
+
/** Map a recall scope to a human-readable label. */
|
|
83
|
+
export function scopeToLabel(scope: string = "all"): string {
|
|
84
|
+
return SCOPE_LABELS[scope] ?? SCOPE_LABELS.all;
|
|
85
|
+
}
|
|
67
86
|
|
|
68
|
-
/**
|
|
69
|
-
export function
|
|
70
|
-
return
|
|
87
|
+
/** Map a human-readable label back to a scope enum value. */
|
|
88
|
+
export function labelToScope(label: string): RecallScope {
|
|
89
|
+
return LABEL_TO_SCOPE[label] ?? "all";
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Build a marker text string for a recall tool call.
|
|
94
|
+
*
|
|
95
|
+
* Format: `📚 Searching <scope-label> for "<query>"…`
|
|
96
|
+
*/
|
|
97
|
+
export function buildRecallMarker(query: string, scope: string = "all"): string {
|
|
98
|
+
return `📚 Searching ${scopeToLabel(scope)} for "${query}"…`;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
/** Regex to parse a recall marker back into query + scope. */
|
|
102
|
+
const MARKER_REGEX = /📚 Searching (.+?) for "(.+?)"…/;
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* Parse a recall marker text block, returning query and scope if valid.
|
|
106
|
+
* Returns null if the text doesn't match the marker format.
|
|
107
|
+
*/
|
|
108
|
+
export function parseRecallMarker(
|
|
109
|
+
text: string,
|
|
110
|
+
): { query: string; scope: RecallScope } | null {
|
|
111
|
+
const match = MARKER_REGEX.exec(text);
|
|
112
|
+
if (!match) return null;
|
|
113
|
+
return {
|
|
114
|
+
query: match[2],
|
|
115
|
+
scope: labelToScope(match[1]),
|
|
116
|
+
};
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
/** Derive a store key from query + scope. */
|
|
120
|
+
export function recallStoreKey(query: string, scope: string = "all"): string {
|
|
121
|
+
return `${scope}:${query}`;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
// ---------------------------------------------------------------------------
|
|
125
|
+
// Marker expansion — restore tool_use + tool_result from markers on inbound
|
|
126
|
+
// ---------------------------------------------------------------------------
|
|
127
|
+
|
|
128
|
+
/**
|
|
129
|
+
* Find recall marker text blocks in the conversation and expand them
|
|
130
|
+
* back into tool_use + tool_result pairs for the upstream API.
|
|
131
|
+
*
|
|
132
|
+
* Scans ALL assistant messages (not just the last one) since markers
|
|
133
|
+
* persist across turns until gradient evicts the message.
|
|
134
|
+
*
|
|
135
|
+
* Mutates the request in-place. Returns true if any expansion was performed.
|
|
136
|
+
*/
|
|
137
|
+
export function expandRecallMarkers(
|
|
138
|
+
req: GatewayRequest,
|
|
139
|
+
store: RecallStore,
|
|
140
|
+
): boolean {
|
|
141
|
+
let expanded = false;
|
|
142
|
+
|
|
143
|
+
// Iterate forward; when we splice messages the index is adjusted.
|
|
144
|
+
for (let i = 0; i < req.messages.length; i++) {
|
|
145
|
+
const msg = req.messages[i];
|
|
146
|
+
if (msg.role !== "assistant") continue;
|
|
147
|
+
|
|
148
|
+
// Find the first (should be only) recall marker in this message.
|
|
149
|
+
// We process one marker per assistant message per pass; the outer
|
|
150
|
+
// loop will revisit if there's more than one (rare).
|
|
151
|
+
let markerIdx = -1;
|
|
152
|
+
let parsed: { query: string; scope: RecallScope } | null = null;
|
|
153
|
+
for (let j = 0; j < msg.content.length; j++) {
|
|
154
|
+
const block = msg.content[j];
|
|
155
|
+
if (block.type !== "text") continue;
|
|
156
|
+
parsed = parseRecallMarker(block.text);
|
|
157
|
+
if (parsed) {
|
|
158
|
+
markerIdx = j;
|
|
159
|
+
break;
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
if (markerIdx < 0 || !parsed) continue;
|
|
164
|
+
|
|
165
|
+
const key = recallStoreKey(parsed.query, parsed.scope);
|
|
166
|
+
const stored = store.get(key);
|
|
167
|
+
if (!stored) continue; // No stored result — leave marker as-is
|
|
168
|
+
|
|
169
|
+
// Check if there's non-tool content AFTER the marker in this message.
|
|
170
|
+
// This happens when recall-only follow-up piped continuation content
|
|
171
|
+
// (text blocks) into the same assistant message. Tool_use blocks after
|
|
172
|
+
// the marker are from the same turn (mixed tools) and stay together.
|
|
173
|
+
const afterMarker = msg.content.slice(markerIdx + 1);
|
|
174
|
+
const hasContinuationAfter = afterMarker.length > 0 &&
|
|
175
|
+
afterMarker.some((b) => b.type !== "tool_use");
|
|
176
|
+
|
|
177
|
+
// Replace marker with tool_use
|
|
178
|
+
msg.content[markerIdx] = {
|
|
179
|
+
type: "tool_use",
|
|
180
|
+
id: stored.toolUseId,
|
|
181
|
+
name: RECALL_TOOL_NAME,
|
|
182
|
+
input: stored.input,
|
|
183
|
+
};
|
|
184
|
+
|
|
185
|
+
// Truncate assistant message at the tool_use (remove continuation)
|
|
186
|
+
if (hasContinuationAfter) {
|
|
187
|
+
msg.content.length = markerIdx + 1;
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
// Build synthetic tool_result user message
|
|
191
|
+
const toolResultMsg: GatewayMessage = {
|
|
192
|
+
role: "user",
|
|
193
|
+
content: [
|
|
194
|
+
{
|
|
195
|
+
type: "tool_result",
|
|
196
|
+
toolUseId: stored.toolUseId,
|
|
197
|
+
content: stored.result,
|
|
198
|
+
},
|
|
199
|
+
],
|
|
200
|
+
};
|
|
201
|
+
|
|
202
|
+
if (hasContinuationAfter) {
|
|
203
|
+
// Split: insert tool_result user message + continuation assistant
|
|
204
|
+
// message after the current assistant message.
|
|
205
|
+
const continuationMsg: GatewayMessage = {
|
|
206
|
+
role: "assistant",
|
|
207
|
+
content: afterMarker,
|
|
208
|
+
};
|
|
209
|
+
req.messages.splice(i + 1, 0, toolResultMsg, continuationMsg);
|
|
210
|
+
// Skip past the two newly inserted messages
|
|
211
|
+
i += 2;
|
|
212
|
+
} else {
|
|
213
|
+
// No split needed — insert tool_result into the following user message.
|
|
214
|
+
// Prepend (unshift) so the recall result appears before existing
|
|
215
|
+
// tool_results — matching the tool_use order in the assistant message.
|
|
216
|
+
const nextMsg = req.messages[i + 1];
|
|
217
|
+
if (nextMsg?.role === "user") {
|
|
218
|
+
nextMsg.content.unshift({
|
|
219
|
+
type: "tool_result",
|
|
220
|
+
toolUseId: stored.toolUseId,
|
|
221
|
+
content: stored.result,
|
|
222
|
+
});
|
|
223
|
+
} else {
|
|
224
|
+
// No following user message — insert a synthetic one
|
|
225
|
+
req.messages.splice(i + 1, 0, toolResultMsg);
|
|
226
|
+
i += 1;
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
expanded = true;
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
return expanded;
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
/**
|
|
237
|
+
* Clean up orphaned recall store entries whose markers no longer
|
|
238
|
+
* appear in the conversation (e.g. gradient evicted the turn).
|
|
239
|
+
*/
|
|
240
|
+
export function cleanupRecallStore(
|
|
241
|
+
req: GatewayRequest,
|
|
242
|
+
store: RecallStore,
|
|
243
|
+
): void {
|
|
244
|
+
if (store.size === 0) return;
|
|
245
|
+
|
|
246
|
+
// Collect all marker keys still present in assistant messages
|
|
247
|
+
const activeKeys = new Set<string>();
|
|
248
|
+
for (const msg of req.messages) {
|
|
249
|
+
if (msg.role !== "assistant") continue;
|
|
250
|
+
for (const block of msg.content) {
|
|
251
|
+
if (block.type !== "text") continue;
|
|
252
|
+
const parsed = parseRecallMarker(block.text);
|
|
253
|
+
if (parsed) {
|
|
254
|
+
activeKeys.add(recallStoreKey(parsed.query, parsed.scope));
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
// Remove entries not referenced by any current marker
|
|
260
|
+
for (const key of store.keys()) {
|
|
261
|
+
if (!activeKeys.has(key)) {
|
|
262
|
+
store.delete(key);
|
|
263
|
+
}
|
|
264
|
+
}
|
|
71
265
|
}
|
|
72
266
|
|
|
73
267
|
// ---------------------------------------------------------------------------
|
|
@@ -212,90 +406,28 @@ export function buildRecallFollowUp(
|
|
|
212
406
|
}
|
|
213
407
|
|
|
214
408
|
// ---------------------------------------------------------------------------
|
|
215
|
-
//
|
|
216
|
-
// ---------------------------------------------------------------------------
|
|
217
|
-
|
|
218
|
-
/**
|
|
219
|
-
* Inject a pending recall result into the current request.
|
|
220
|
-
*
|
|
221
|
-
* Finds the last assistant message in `req.messages`, inserts the recall
|
|
222
|
-
* tool_use block at the recorded position, and inserts a tool_result block
|
|
223
|
-
* into the following user message.
|
|
224
|
-
*
|
|
225
|
-
* Mutates the request in-place for efficiency. Returns true if injection
|
|
226
|
-
* was performed, false if the conversation structure didn't match
|
|
227
|
-
* (e.g., no trailing assistant→user pair).
|
|
228
|
-
*/
|
|
229
|
-
export function injectPendingRecall(
|
|
230
|
-
req: GatewayRequest,
|
|
231
|
-
pending: PendingRecall,
|
|
232
|
-
): boolean {
|
|
233
|
-
const messages = req.messages;
|
|
234
|
-
if (messages.length < 2) return false;
|
|
235
|
-
|
|
236
|
-
// Find the last assistant message followed by a user message.
|
|
237
|
-
// The pending recall was from the previous turn's assistant response.
|
|
238
|
-
let assistantIdx = -1;
|
|
239
|
-
for (let i = messages.length - 2; i >= 0; i--) {
|
|
240
|
-
if (
|
|
241
|
-
messages[i].role === "assistant" &&
|
|
242
|
-
messages[i + 1]?.role === "user"
|
|
243
|
-
) {
|
|
244
|
-
assistantIdx = i;
|
|
245
|
-
break;
|
|
246
|
-
}
|
|
247
|
-
}
|
|
248
|
-
|
|
249
|
-
if (assistantIdx < 0) {
|
|
250
|
-
log.warn("injectPendingRecall: no assistant→user pair found");
|
|
251
|
-
return false;
|
|
252
|
-
}
|
|
253
|
-
|
|
254
|
-
const assistantMsg = messages[assistantIdx];
|
|
255
|
-
const userMsg = messages[assistantIdx + 1];
|
|
256
|
-
|
|
257
|
-
// Insert recall tool_use into assistant message at the recorded position.
|
|
258
|
-
// Clamp to content length in case the message was modified by gradient.
|
|
259
|
-
const insertPos = Math.min(pending.position, assistantMsg.content.length);
|
|
260
|
-
const recallToolUse: GatewayToolUseBlock = {
|
|
261
|
-
type: "tool_use",
|
|
262
|
-
id: pending.toolUseId,
|
|
263
|
-
name: RECALL_TOOL_NAME,
|
|
264
|
-
input: pending.input,
|
|
265
|
-
};
|
|
266
|
-
assistantMsg.content.splice(insertPos, 0, recallToolUse);
|
|
267
|
-
|
|
268
|
-
// Insert recall tool_result into the user message.
|
|
269
|
-
// Add it at the beginning alongside any other tool_results.
|
|
270
|
-
userMsg.content.unshift({
|
|
271
|
-
type: "tool_result",
|
|
272
|
-
toolUseId: pending.toolUseId,
|
|
273
|
-
content: pending.result,
|
|
274
|
-
});
|
|
275
|
-
|
|
276
|
-
// Strip recall from tools list for this request
|
|
277
|
-
req.tools = req.tools.filter((t) => t.name !== RECALL_TOOL_NAME);
|
|
278
|
-
|
|
279
|
-
return true;
|
|
280
|
-
}
|
|
281
|
-
|
|
282
|
-
// ---------------------------------------------------------------------------
|
|
283
|
-
// Response content stripping (Case 2: remove recall from response)
|
|
409
|
+
// Response content rewriting — replace recall tool_use with marker text
|
|
284
410
|
// ---------------------------------------------------------------------------
|
|
285
411
|
|
|
286
412
|
/**
|
|
287
|
-
* Build a GatewayResponse with recall tool_use blocks
|
|
413
|
+
* Build a GatewayResponse with recall tool_use blocks replaced by marker text.
|
|
288
414
|
*
|
|
289
|
-
* Used for
|
|
290
|
-
*
|
|
415
|
+
* Used for both recall-only and mixed-tools cases to produce a response
|
|
416
|
+
* where the client sees human-readable markers instead of tool call mechanics.
|
|
291
417
|
*/
|
|
292
|
-
export function
|
|
418
|
+
export function replaceRecallWithMarker(
|
|
293
419
|
resp: GatewayResponse,
|
|
294
420
|
): GatewayResponse {
|
|
295
421
|
return {
|
|
296
422
|
...resp,
|
|
297
|
-
content: resp.content.
|
|
298
|
-
|
|
299
|
-
|
|
423
|
+
content: resp.content.map((b) => {
|
|
424
|
+
if (b.type === "tool_use" && b.name === RECALL_TOOL_NAME) {
|
|
425
|
+
const input = b.input as Record<string, unknown>;
|
|
426
|
+
const query = typeof input.query === "string" ? input.query : "";
|
|
427
|
+
const scope = (input.scope as string) ?? "all";
|
|
428
|
+
return { type: "text" as const, text: buildRecallMarker(query, scope) };
|
|
429
|
+
}
|
|
430
|
+
return b;
|
|
431
|
+
}),
|
|
300
432
|
};
|
|
301
433
|
}
|
package/src/temporal-adapter.ts
CHANGED
|
@@ -265,6 +265,24 @@ export type AnthropicCacheOptions = {
|
|
|
265
265
|
*/
|
|
266
266
|
systemTTL?: "5m" | "1h" | false;
|
|
267
267
|
|
|
268
|
+
/**
|
|
269
|
+
* LTM knowledge text to inject as a separate system block after the host
|
|
270
|
+
* prompt. Keeping it in a separate block means the host prompt gets its
|
|
271
|
+
* own cache breakpoint (1h) and LTM changes don't bust the host prefix.
|
|
272
|
+
*
|
|
273
|
+
* When provided AND systemTTL is set, the system becomes a 2-block array:
|
|
274
|
+
* system[0]: host prompt — cache_control with systemTTL
|
|
275
|
+
* system[1]: LTM content — no cache_control (benefits from prefix)
|
|
276
|
+
*/
|
|
277
|
+
ltmSystem?: string;
|
|
278
|
+
|
|
279
|
+
/**
|
|
280
|
+
* Cache the last tool definition with an explicit 1h breakpoint.
|
|
281
|
+
* Tool definitions (including our injected recall tool) are stable
|
|
282
|
+
* across turns — caching them avoids re-processing on every request.
|
|
283
|
+
*/
|
|
284
|
+
cacheTools?: boolean;
|
|
285
|
+
|
|
268
286
|
/**
|
|
269
287
|
* Place an explicit `cache_control` breakpoint on the last block of the
|
|
270
288
|
* last message, enabling Anthropic to cache the conversation prefix.
|
|
@@ -329,19 +347,33 @@ export function buildAnthropicRequest(
|
|
|
329
347
|
// System — only include if non-empty
|
|
330
348
|
if (req.system) {
|
|
331
349
|
const systemTTL = cache?.systemTTL;
|
|
350
|
+
const ltmText = cache?.ltmSystem;
|
|
351
|
+
|
|
332
352
|
if (systemTTL) {
|
|
333
|
-
// Send as block array with explicit cache_control breakpoint
|
|
334
|
-
//
|
|
335
|
-
//
|
|
353
|
+
// Send as block array with explicit cache_control breakpoint on the
|
|
354
|
+
// host prompt. The host prompt is the most stable part (changes only
|
|
355
|
+
// when the host mutates AGENTS.md, memory, etc.) so it gets a 1h TTL.
|
|
336
356
|
const cacheControl: Record<string, string> =
|
|
337
357
|
systemTTL === "1h"
|
|
338
358
|
? { type: "ephemeral", ttl: "1h" }
|
|
339
359
|
: { type: "ephemeral" };
|
|
340
|
-
|
|
360
|
+
|
|
361
|
+
const blocks: Record<string, unknown>[] = [
|
|
341
362
|
{ type: "text", text: req.system, cache_control: cacheControl },
|
|
342
363
|
];
|
|
364
|
+
|
|
365
|
+
// LTM knowledge as a separate block — no cache_control of its own,
|
|
366
|
+
// but benefits from the host prompt prefix cache. When LTM changes,
|
|
367
|
+
// only this block and everything after it is re-processed; the host
|
|
368
|
+
// prompt prefix is still a cache read.
|
|
369
|
+
if (ltmText) {
|
|
370
|
+
blocks.push({ type: "text", text: ltmText });
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
body.system = blocks;
|
|
343
374
|
} else {
|
|
344
|
-
|
|
375
|
+
// No caching — concatenate LTM into a single string.
|
|
376
|
+
body.system = ltmText ? `${req.system}\n\n${ltmText}` : req.system;
|
|
345
377
|
}
|
|
346
378
|
}
|
|
347
379
|
|
|
@@ -368,11 +400,23 @@ export function buildAnthropicRequest(
|
|
|
368
400
|
|
|
369
401
|
// Tools — only include if present
|
|
370
402
|
if (req.tools.length > 0) {
|
|
371
|
-
|
|
403
|
+
const tools = req.tools.map((t) => ({
|
|
372
404
|
name: t.name,
|
|
373
405
|
description: t.description,
|
|
374
406
|
input_schema: t.inputSchema,
|
|
375
407
|
}));
|
|
408
|
+
|
|
409
|
+
// Tool caching: place a 1h breakpoint on the last tool definition.
|
|
410
|
+
// Tool definitions (including our recall tool) are stable across turns.
|
|
411
|
+
if (cache?.cacheTools && tools.length > 0) {
|
|
412
|
+
const lastTool = tools[tools.length - 1]!;
|
|
413
|
+
(lastTool as Record<string, unknown>).cache_control = {
|
|
414
|
+
type: "ephemeral",
|
|
415
|
+
ttl: "1h",
|
|
416
|
+
};
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
body.tools = tools;
|
|
376
420
|
}
|
|
377
421
|
|
|
378
422
|
// Restore all metadata params (temperature, top_p, stop_sequences, etc.)
|
package/src/translate/types.ts
CHANGED
|
@@ -139,27 +139,70 @@ export type GatewayResponse = {
|
|
|
139
139
|
};
|
|
140
140
|
|
|
141
141
|
// ---------------------------------------------------------------------------
|
|
142
|
-
//
|
|
142
|
+
// Recall store (cross-request, gateway recall interception)
|
|
143
143
|
// ---------------------------------------------------------------------------
|
|
144
144
|
|
|
145
|
-
/**
|
|
146
|
-
export type
|
|
147
|
-
/** tool_use ID
|
|
145
|
+
/** Stored recall result for marker-based round-trip expansion. */
|
|
146
|
+
export type StoredRecall = {
|
|
147
|
+
/** The tool_use ID to reconstruct in the upstream request. */
|
|
148
148
|
toolUseId: string;
|
|
149
|
-
/**
|
|
149
|
+
/** Original recall input (query + scope). */
|
|
150
150
|
input: { query: string; scope?: string };
|
|
151
151
|
/** Position (content block index) in the original assistant message. */
|
|
152
152
|
position: number;
|
|
153
153
|
/** Executed recall result (formatted markdown). */
|
|
154
154
|
result: string;
|
|
155
|
-
/** Timestamp for TTL-based cleanup. */
|
|
156
|
-
timestamp: number;
|
|
157
155
|
};
|
|
158
156
|
|
|
157
|
+
/** Map from marker key (`${scope}:${query}`) → stored recall data. */
|
|
158
|
+
export type RecallStore = Map<string, StoredRecall>;
|
|
159
|
+
|
|
159
160
|
// ---------------------------------------------------------------------------
|
|
160
161
|
// Session state — per-session tracking for Lore pipeline integration
|
|
161
162
|
// ---------------------------------------------------------------------------
|
|
162
163
|
|
|
164
|
+
/** Per-turn cache analysis emitted as structured log data. */
|
|
165
|
+
export type CacheTurnAnalysis = {
|
|
166
|
+
/** Turn number within this session. */
|
|
167
|
+
turn: number;
|
|
168
|
+
|
|
169
|
+
// --- Ground truth from API response ---
|
|
170
|
+
/** Tokens served from prompt cache (hit). */
|
|
171
|
+
cacheRead: number;
|
|
172
|
+
/** Tokens written to prompt cache (miss / new). */
|
|
173
|
+
cacheCreation: number;
|
|
174
|
+
/** Uncached input tokens. */
|
|
175
|
+
inputTokens: number;
|
|
176
|
+
/** cacheRead / total input — 0..1. */
|
|
177
|
+
cacheHitRate: number;
|
|
178
|
+
|
|
179
|
+
// --- Request body prefix comparison ---
|
|
180
|
+
/** Bytes matching from start of serialized request body vs previous turn. */
|
|
181
|
+
prefixMatchBytes: number;
|
|
182
|
+
/** prefixMatchBytes / min(prev, current) body length — 0..1. */
|
|
183
|
+
prefixMatchPercent: number;
|
|
184
|
+
/** Semantic location of the first divergence (e.g. "messages[3].content[1]"). */
|
|
185
|
+
divergencePoint: string;
|
|
186
|
+
/** Human-readable reason (e.g. "system prompt changed", "new message appended"). */
|
|
187
|
+
divergenceReason: string;
|
|
188
|
+
};
|
|
189
|
+
|
|
190
|
+
/** Per-session cache analytics state. */
|
|
191
|
+
export type CacheAnalytics = {
|
|
192
|
+
/** Deflate-compressed serialized request body from the last turn. */
|
|
193
|
+
lastRequestBody: Uint8Array | null;
|
|
194
|
+
/** Uncompressed byte length of lastRequestBody (for prefix match %). */
|
|
195
|
+
lastRequestBodyLength: number;
|
|
196
|
+
/** cache_read_input_tokens from last API response. */
|
|
197
|
+
lastCacheRead: number;
|
|
198
|
+
/** cache_creation_input_tokens from last API response. */
|
|
199
|
+
lastCacheCreation: number;
|
|
200
|
+
/** Total turns observed. */
|
|
201
|
+
turnCount: number;
|
|
202
|
+
/** Confirmed busts (API returned cacheRead=0 with cacheCreation>0). */
|
|
203
|
+
bustCount: number;
|
|
204
|
+
};
|
|
205
|
+
|
|
163
206
|
/** Per-session state tracked by the gateway for Lore pipeline decisions. */
|
|
164
207
|
export type SessionState = {
|
|
165
208
|
sessionID: string;
|
|
@@ -172,6 +215,8 @@ export type SessionState = {
|
|
|
172
215
|
messageCount: number;
|
|
173
216
|
/** Turns since last curation run — triggers background curation. */
|
|
174
217
|
turnsSinceCuration: number;
|
|
175
|
-
/**
|
|
176
|
-
|
|
218
|
+
/** Stored recall results for marker-based round-trip expansion. */
|
|
219
|
+
recallStore: RecallStore;
|
|
220
|
+
/** Cache analytics — request body prefix comparison + API cache fields. */
|
|
221
|
+
cacheAnalytics: CacheAnalytics;
|
|
177
222
|
};
|