@loreai/gateway 0.14.0 → 0.14.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bin.cjs +27 -0
- package/dist/index.cjs +1042 -0
- package/dist/index.d.cts +21 -0
- package/package.json +10 -10
- package/dist/index.js +0 -50087
- package/src/auth.ts +0 -133
- package/src/batch-queue.ts +0 -575
- package/src/cache-analytics.ts +0 -344
- package/src/cli/agents.ts +0 -107
- package/src/cli/bin.ts +0 -11
- package/src/cli/help.ts +0 -55
- package/src/cli/lib/binary.ts +0 -353
- package/src/cli/lib/bspatch.ts +0 -306
- package/src/cli/lib/delta-upgrade.ts +0 -790
- package/src/cli/lib/errors.ts +0 -48
- package/src/cli/lib/ghcr.ts +0 -389
- package/src/cli/lib/patch-cache.ts +0 -342
- package/src/cli/lib/upgrade.ts +0 -454
- package/src/cli/lib/version-check.ts +0 -385
- package/src/cli/main.ts +0 -152
- package/src/cli/run.ts +0 -181
- package/src/cli/start.ts +0 -82
- package/src/cli/upgrade.ts +0 -311
- package/src/cli/version.ts +0 -22
- package/src/compaction.ts +0 -195
- package/src/config.ts +0 -199
- package/src/idle.ts +0 -240
- package/src/index.ts +0 -41
- package/src/llm-adapter.ts +0 -182
- package/src/pipeline.ts +0 -1681
- package/src/recall.ts +0 -433
- package/src/recorder.ts +0 -192
- package/src/server.ts +0 -250
- package/src/session.ts +0 -207
- package/src/stream/anthropic.ts +0 -708
- package/src/temporal-adapter.ts +0 -310
- package/src/translate/anthropic.ts +0 -469
- package/src/translate/openai.ts +0 -536
- package/src/translate/types.ts +0 -222
- package/src/worker-model.ts +0 -408
package/src/stream/anthropic.ts
DELETED
|
@@ -1,708 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Anthropic SSE stream handling.
|
|
3
|
-
*
|
|
4
|
-
* Parses upstream Anthropic streaming responses (named SSE events), accumulates
|
|
5
|
-
* the full response into a `GatewayResponse`, and provides helpers for
|
|
6
|
-
* generating synthetic SSE event sequences (e.g. for compaction interception).
|
|
7
|
-
*
|
|
8
|
-
* Anthropic uses named SSE events with a lifecycle:
|
|
9
|
-
* message_start -> content_block_start/delta/stop (repeated) -> message_delta -> message_stop
|
|
10
|
-
*
|
|
11
|
-
* All functions are pure (no side effects) except `parseSSEStream` which is
|
|
12
|
-
* an async generator consuming a byte stream.
|
|
13
|
-
*/
|
|
14
|
-
import type {
|
|
15
|
-
GatewayContentBlock,
|
|
16
|
-
GatewayResponse,
|
|
17
|
-
GatewayUsage,
|
|
18
|
-
} from "../translate/types";
|
|
19
|
-
|
|
20
|
-
// ---------------------------------------------------------------------------
|
|
21
|
-
// SSE formatting
|
|
22
|
-
// ---------------------------------------------------------------------------
|
|
23
|
-
|
|
24
|
-
/** Format a single named SSE event for sending to the client. */
|
|
25
|
-
export function formatSSEEvent(eventType: string, data: string): string {
|
|
26
|
-
return `event: ${eventType}\ndata: ${data}\n\n`;
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
// ---------------------------------------------------------------------------
|
|
30
|
-
// SSE parsing
|
|
31
|
-
// ---------------------------------------------------------------------------
|
|
32
|
-
|
|
33
|
-
/**
|
|
34
|
-
* Parse an SSE byte stream into typed events.
|
|
35
|
-
*
|
|
36
|
-
* Handles:
|
|
37
|
-
* - `event: <type>` followed by `data: <json>`
|
|
38
|
-
* - Multiple `data:` lines (joined with `\n`)
|
|
39
|
-
* - Blank lines as event delimiters
|
|
40
|
-
* - Default event type `"message"` when no `event:` line precedes data
|
|
41
|
-
*/
|
|
42
|
-
export async function* parseSSEStream(
|
|
43
|
-
reader: ReadableStreamDefaultReader<Uint8Array>,
|
|
44
|
-
): AsyncGenerator<{ event: string; data: string }> {
|
|
45
|
-
const decoder = new TextDecoder();
|
|
46
|
-
let buffer = "";
|
|
47
|
-
|
|
48
|
-
for (;;) {
|
|
49
|
-
const { done, value } = await reader.read();
|
|
50
|
-
if (value) {
|
|
51
|
-
buffer += decoder.decode(value, { stream: true });
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
// Process complete events (delimited by blank lines: \n\n)
|
|
55
|
-
let boundary: number;
|
|
56
|
-
while ((boundary = buffer.indexOf("\n\n")) !== -1) {
|
|
57
|
-
const block = buffer.slice(0, boundary);
|
|
58
|
-
buffer = buffer.slice(boundary + 2);
|
|
59
|
-
|
|
60
|
-
// Skip empty blocks
|
|
61
|
-
if (block.trim() === "") continue;
|
|
62
|
-
|
|
63
|
-
let eventType = "message";
|
|
64
|
-
const dataLines: string[] = [];
|
|
65
|
-
|
|
66
|
-
for (const line of block.split("\n")) {
|
|
67
|
-
if (line.startsWith("event:")) {
|
|
68
|
-
eventType = line.slice(6).trim();
|
|
69
|
-
} else if (line.startsWith("data:")) {
|
|
70
|
-
dataLines.push(line.slice(5).trimStart());
|
|
71
|
-
}
|
|
72
|
-
// Lines starting with ':' are comments — ignore
|
|
73
|
-
// Other lines without known prefix — ignore per SSE spec
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
if (dataLines.length > 0) {
|
|
77
|
-
yield { event: eventType, data: dataLines.join("\n") };
|
|
78
|
-
}
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
if (done) {
|
|
82
|
-
// Flush any remaining partial block (shouldn't happen with well-formed SSE)
|
|
83
|
-
if (buffer.trim()) {
|
|
84
|
-
let eventType = "message";
|
|
85
|
-
const dataLines: string[] = [];
|
|
86
|
-
for (const line of buffer.split("\n")) {
|
|
87
|
-
if (line.startsWith("event:")) {
|
|
88
|
-
eventType = line.slice(6).trim();
|
|
89
|
-
} else if (line.startsWith("data:")) {
|
|
90
|
-
dataLines.push(line.slice(5).trimStart());
|
|
91
|
-
}
|
|
92
|
-
}
|
|
93
|
-
if (dataLines.length > 0) {
|
|
94
|
-
yield { event: eventType, data: dataLines.join("\n") };
|
|
95
|
-
}
|
|
96
|
-
}
|
|
97
|
-
break;
|
|
98
|
-
}
|
|
99
|
-
}
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
// ---------------------------------------------------------------------------
|
|
103
|
-
// Stream accumulator
|
|
104
|
-
// ---------------------------------------------------------------------------
|
|
105
|
-
|
|
106
|
-
/** Intermediate block state during streaming. */
|
|
107
|
-
type AccumulatingBlock =
|
|
108
|
-
| { type: "text"; text: string }
|
|
109
|
-
| { type: "thinking"; thinking: string; signature: string }
|
|
110
|
-
| { type: "tool_use"; id: string; name: string; partialJson: string };
|
|
111
|
-
|
|
112
|
-
/** State machine that processes Anthropic SSE events and builds a GatewayResponse. */
|
|
113
|
-
export interface StreamAccumulator {
|
|
114
|
-
/** Process a single SSE event. Returns the event line(s) to forward to client. */
|
|
115
|
-
processEvent(eventType: string, data: string): string;
|
|
116
|
-
/** Get the accumulated response after stream ends. */
|
|
117
|
-
getResponse(): GatewayResponse;
|
|
118
|
-
/** Whether the stream has completed (message_stop received). */
|
|
119
|
-
isDone(): boolean;
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
export function createStreamAccumulator(): StreamAccumulator {
|
|
123
|
-
let id = "";
|
|
124
|
-
let model = "";
|
|
125
|
-
let stopReason = "";
|
|
126
|
-
let done = false;
|
|
127
|
-
|
|
128
|
-
const usage: GatewayUsage = {
|
|
129
|
-
inputTokens: 0,
|
|
130
|
-
outputTokens: 0,
|
|
131
|
-
};
|
|
132
|
-
|
|
133
|
-
/** Blocks indexed by their stream index. */
|
|
134
|
-
const blocks = new Map<number, AccumulatingBlock>();
|
|
135
|
-
/** Finalized content blocks in order. */
|
|
136
|
-
const content: GatewayContentBlock[] = [];
|
|
137
|
-
/** Track which indices have been finalized. */
|
|
138
|
-
const finalized = new Set<number>();
|
|
139
|
-
|
|
140
|
-
function processEvent(eventType: string, data: string): string {
|
|
141
|
-
// Forward the event as-is regardless of processing outcome
|
|
142
|
-
const forwarded = formatSSEEvent(eventType, data);
|
|
143
|
-
|
|
144
|
-
// Parse the data payload — if it's not valid JSON, just forward
|
|
145
|
-
let parsed: Record<string, unknown>;
|
|
146
|
-
try {
|
|
147
|
-
parsed = JSON.parse(data) as Record<string, unknown>;
|
|
148
|
-
} catch {
|
|
149
|
-
return forwarded;
|
|
150
|
-
}
|
|
151
|
-
|
|
152
|
-
switch (eventType) {
|
|
153
|
-
case "message_start":
|
|
154
|
-
handleMessageStart(parsed);
|
|
155
|
-
break;
|
|
156
|
-
case "content_block_start":
|
|
157
|
-
handleContentBlockStart(parsed);
|
|
158
|
-
break;
|
|
159
|
-
case "content_block_delta":
|
|
160
|
-
handleContentBlockDelta(parsed);
|
|
161
|
-
break;
|
|
162
|
-
case "content_block_stop":
|
|
163
|
-
handleContentBlockStop(parsed);
|
|
164
|
-
break;
|
|
165
|
-
case "message_delta":
|
|
166
|
-
handleMessageDelta(parsed);
|
|
167
|
-
break;
|
|
168
|
-
case "message_stop":
|
|
169
|
-
done = true;
|
|
170
|
-
break;
|
|
171
|
-
// "ping" and unknown events — just forward
|
|
172
|
-
}
|
|
173
|
-
|
|
174
|
-
return forwarded;
|
|
175
|
-
}
|
|
176
|
-
|
|
177
|
-
function handleMessageStart(parsed: Record<string, unknown>): void {
|
|
178
|
-
const message = parsed.message as Record<string, unknown> | undefined;
|
|
179
|
-
if (!message) return;
|
|
180
|
-
|
|
181
|
-
if (typeof message.id === "string") id = message.id;
|
|
182
|
-
if (typeof message.model === "string") model = message.model;
|
|
183
|
-
|
|
184
|
-
const msgUsage = message.usage as Record<string, number> | undefined;
|
|
185
|
-
if (msgUsage) {
|
|
186
|
-
if (typeof msgUsage.input_tokens === "number") {
|
|
187
|
-
usage.inputTokens = msgUsage.input_tokens;
|
|
188
|
-
}
|
|
189
|
-
if (typeof msgUsage.output_tokens === "number") {
|
|
190
|
-
usage.outputTokens = msgUsage.output_tokens;
|
|
191
|
-
}
|
|
192
|
-
if (typeof msgUsage.cache_read_input_tokens === "number") {
|
|
193
|
-
usage.cacheReadInputTokens = msgUsage.cache_read_input_tokens;
|
|
194
|
-
}
|
|
195
|
-
if (typeof msgUsage.cache_creation_input_tokens === "number") {
|
|
196
|
-
usage.cacheCreationInputTokens = msgUsage.cache_creation_input_tokens;
|
|
197
|
-
}
|
|
198
|
-
}
|
|
199
|
-
}
|
|
200
|
-
|
|
201
|
-
function handleContentBlockStart(parsed: Record<string, unknown>): void {
|
|
202
|
-
const index = parsed.index as number;
|
|
203
|
-
if (typeof index !== "number") return;
|
|
204
|
-
|
|
205
|
-
const block = parsed.content_block as Record<string, unknown> | undefined;
|
|
206
|
-
if (!block || typeof block.type !== "string") return;
|
|
207
|
-
|
|
208
|
-
switch (block.type) {
|
|
209
|
-
case "text":
|
|
210
|
-
blocks.set(index, {
|
|
211
|
-
type: "text",
|
|
212
|
-
text: typeof block.text === "string" ? block.text : "",
|
|
213
|
-
});
|
|
214
|
-
break;
|
|
215
|
-
case "thinking":
|
|
216
|
-
blocks.set(index, {
|
|
217
|
-
type: "thinking",
|
|
218
|
-
thinking:
|
|
219
|
-
typeof block.thinking === "string" ? block.thinking : "",
|
|
220
|
-
signature: "",
|
|
221
|
-
});
|
|
222
|
-
break;
|
|
223
|
-
case "tool_use":
|
|
224
|
-
blocks.set(index, {
|
|
225
|
-
type: "tool_use",
|
|
226
|
-
id: typeof block.id === "string" ? block.id : "",
|
|
227
|
-
name: typeof block.name === "string" ? block.name : "",
|
|
228
|
-
partialJson: "",
|
|
229
|
-
});
|
|
230
|
-
break;
|
|
231
|
-
}
|
|
232
|
-
}
|
|
233
|
-
|
|
234
|
-
function handleContentBlockDelta(parsed: Record<string, unknown>): void {
|
|
235
|
-
const index = parsed.index as number;
|
|
236
|
-
if (typeof index !== "number") return;
|
|
237
|
-
|
|
238
|
-
const delta = parsed.delta as Record<string, unknown> | undefined;
|
|
239
|
-
if (!delta || typeof delta.type !== "string") return;
|
|
240
|
-
|
|
241
|
-
const block = blocks.get(index);
|
|
242
|
-
if (!block) return;
|
|
243
|
-
|
|
244
|
-
switch (delta.type) {
|
|
245
|
-
case "text_delta":
|
|
246
|
-
if (block.type === "text" && typeof delta.text === "string") {
|
|
247
|
-
block.text += delta.text;
|
|
248
|
-
}
|
|
249
|
-
break;
|
|
250
|
-
case "thinking_delta":
|
|
251
|
-
if (
|
|
252
|
-
block.type === "thinking" &&
|
|
253
|
-
typeof delta.thinking === "string"
|
|
254
|
-
) {
|
|
255
|
-
block.thinking += delta.thinking;
|
|
256
|
-
}
|
|
257
|
-
break;
|
|
258
|
-
case "signature_delta":
|
|
259
|
-
if (
|
|
260
|
-
block.type === "thinking" &&
|
|
261
|
-
typeof delta.signature === "string"
|
|
262
|
-
) {
|
|
263
|
-
block.signature += delta.signature;
|
|
264
|
-
}
|
|
265
|
-
break;
|
|
266
|
-
case "input_json_delta":
|
|
267
|
-
if (
|
|
268
|
-
block.type === "tool_use" &&
|
|
269
|
-
typeof delta.partial_json === "string"
|
|
270
|
-
) {
|
|
271
|
-
block.partialJson += delta.partial_json;
|
|
272
|
-
}
|
|
273
|
-
break;
|
|
274
|
-
}
|
|
275
|
-
}
|
|
276
|
-
|
|
277
|
-
function handleContentBlockStop(parsed: Record<string, unknown>): void {
|
|
278
|
-
const index = parsed.index as number;
|
|
279
|
-
if (typeof index !== "number") return;
|
|
280
|
-
|
|
281
|
-
const block = blocks.get(index);
|
|
282
|
-
if (!block || finalized.has(index)) return;
|
|
283
|
-
|
|
284
|
-
finalized.add(index);
|
|
285
|
-
|
|
286
|
-
switch (block.type) {
|
|
287
|
-
case "text":
|
|
288
|
-
content.push({ type: "text", text: block.text });
|
|
289
|
-
break;
|
|
290
|
-
case "thinking": {
|
|
291
|
-
const thinkingBlock: GatewayContentBlock = {
|
|
292
|
-
type: "thinking",
|
|
293
|
-
thinking: block.thinking,
|
|
294
|
-
};
|
|
295
|
-
if (block.signature) {
|
|
296
|
-
(thinkingBlock as { signature?: string }).signature =
|
|
297
|
-
block.signature;
|
|
298
|
-
}
|
|
299
|
-
content.push(thinkingBlock);
|
|
300
|
-
break;
|
|
301
|
-
}
|
|
302
|
-
case "tool_use": {
|
|
303
|
-
let input: unknown = {};
|
|
304
|
-
if (block.partialJson) {
|
|
305
|
-
try {
|
|
306
|
-
input = JSON.parse(block.partialJson);
|
|
307
|
-
} catch {
|
|
308
|
-
// Malformed JSON — store as raw string
|
|
309
|
-
input = block.partialJson;
|
|
310
|
-
}
|
|
311
|
-
}
|
|
312
|
-
content.push({
|
|
313
|
-
type: "tool_use",
|
|
314
|
-
id: block.id,
|
|
315
|
-
name: block.name,
|
|
316
|
-
input,
|
|
317
|
-
});
|
|
318
|
-
break;
|
|
319
|
-
}
|
|
320
|
-
}
|
|
321
|
-
}
|
|
322
|
-
|
|
323
|
-
function handleMessageDelta(parsed: Record<string, unknown>): void {
|
|
324
|
-
const delta = parsed.delta as Record<string, unknown> | undefined;
|
|
325
|
-
if (delta && typeof delta.stop_reason === "string") {
|
|
326
|
-
stopReason = delta.stop_reason;
|
|
327
|
-
}
|
|
328
|
-
|
|
329
|
-
// message_delta usage is cumulative output tokens
|
|
330
|
-
const deltaUsage = parsed.usage as Record<string, number> | undefined;
|
|
331
|
-
if (deltaUsage) {
|
|
332
|
-
if (typeof deltaUsage.output_tokens === "number") {
|
|
333
|
-
usage.outputTokens = deltaUsage.output_tokens;
|
|
334
|
-
}
|
|
335
|
-
}
|
|
336
|
-
}
|
|
337
|
-
|
|
338
|
-
function getResponse(): GatewayResponse {
|
|
339
|
-
// Finalize any blocks that weren't explicitly stopped (shouldn't happen
|
|
340
|
-
// with well-formed streams, but be defensive)
|
|
341
|
-
for (const [index, block] of blocks) {
|
|
342
|
-
if (!finalized.has(index)) {
|
|
343
|
-
finalized.add(index);
|
|
344
|
-
switch (block.type) {
|
|
345
|
-
case "text":
|
|
346
|
-
content.push({ type: "text", text: block.text });
|
|
347
|
-
break;
|
|
348
|
-
case "thinking":
|
|
349
|
-
content.push({
|
|
350
|
-
type: "thinking",
|
|
351
|
-
thinking: block.thinking,
|
|
352
|
-
...(block.signature ? { signature: block.signature } : {}),
|
|
353
|
-
});
|
|
354
|
-
break;
|
|
355
|
-
case "tool_use": {
|
|
356
|
-
let input: unknown = {};
|
|
357
|
-
if (block.partialJson) {
|
|
358
|
-
try {
|
|
359
|
-
input = JSON.parse(block.partialJson);
|
|
360
|
-
} catch {
|
|
361
|
-
input = block.partialJson;
|
|
362
|
-
}
|
|
363
|
-
}
|
|
364
|
-
content.push({
|
|
365
|
-
type: "tool_use",
|
|
366
|
-
id: block.id,
|
|
367
|
-
name: block.name,
|
|
368
|
-
input,
|
|
369
|
-
});
|
|
370
|
-
break;
|
|
371
|
-
}
|
|
372
|
-
}
|
|
373
|
-
}
|
|
374
|
-
}
|
|
375
|
-
|
|
376
|
-
return {
|
|
377
|
-
id,
|
|
378
|
-
model,
|
|
379
|
-
content,
|
|
380
|
-
stopReason,
|
|
381
|
-
usage: { ...usage },
|
|
382
|
-
};
|
|
383
|
-
}
|
|
384
|
-
|
|
385
|
-
return {
|
|
386
|
-
processEvent,
|
|
387
|
-
getResponse,
|
|
388
|
-
isDone: () => done,
|
|
389
|
-
};
|
|
390
|
-
}
|
|
391
|
-
|
|
392
|
-
// ---------------------------------------------------------------------------
|
|
393
|
-
// Synthetic SSE builders
|
|
394
|
-
// ---------------------------------------------------------------------------
|
|
395
|
-
|
|
396
|
-
/**
|
|
397
|
-
* Build a synthetic `message_start` SSE event from a GatewayResponse.
|
|
398
|
-
*
|
|
399
|
-
* Used when the gateway generates its own response (e.g. compaction
|
|
400
|
-
* interception) and needs to emit a well-formed Anthropic stream.
|
|
401
|
-
*/
|
|
402
|
-
export function buildSSEMessageStart(response: GatewayResponse): string {
|
|
403
|
-
const message = {
|
|
404
|
-
type: "message_start",
|
|
405
|
-
message: {
|
|
406
|
-
id: response.id,
|
|
407
|
-
type: "message",
|
|
408
|
-
role: "assistant",
|
|
409
|
-
content: [],
|
|
410
|
-
model: response.model,
|
|
411
|
-
stop_reason: null,
|
|
412
|
-
stop_sequence: null,
|
|
413
|
-
usage: {
|
|
414
|
-
input_tokens: response.usage.inputTokens,
|
|
415
|
-
output_tokens: 1,
|
|
416
|
-
...(response.usage.cacheReadInputTokens != null
|
|
417
|
-
? { cache_read_input_tokens: response.usage.cacheReadInputTokens }
|
|
418
|
-
: {}),
|
|
419
|
-
...(response.usage.cacheCreationInputTokens != null
|
|
420
|
-
? {
|
|
421
|
-
cache_creation_input_tokens:
|
|
422
|
-
response.usage.cacheCreationInputTokens,
|
|
423
|
-
}
|
|
424
|
-
: {}),
|
|
425
|
-
},
|
|
426
|
-
},
|
|
427
|
-
};
|
|
428
|
-
|
|
429
|
-
return formatSSEEvent("message_start", JSON.stringify(message));
|
|
430
|
-
}
|
|
431
|
-
|
|
432
|
-
/**
|
|
433
|
-
* Build a complete SSE event sequence for a simple text-only response.
|
|
434
|
-
*
|
|
435
|
-
* Generates the full Anthropic streaming lifecycle:
|
|
436
|
-
* message_start -> content_block_start -> content_block_delta ->
|
|
437
|
-
* content_block_stop -> message_delta -> message_stop
|
|
438
|
-
*
|
|
439
|
-
* Used for compaction interception where Lore generates a synthetic
|
|
440
|
-
* response instead of forwarding to upstream.
|
|
441
|
-
*/
|
|
442
|
-
export function buildSSETextResponse(
|
|
443
|
-
id: string,
|
|
444
|
-
model: string,
|
|
445
|
-
text: string,
|
|
446
|
-
usage: { inputTokens: number; outputTokens: number },
|
|
447
|
-
): string {
|
|
448
|
-
const events: string[] = [];
|
|
449
|
-
|
|
450
|
-
// message_start
|
|
451
|
-
events.push(
|
|
452
|
-
formatSSEEvent(
|
|
453
|
-
"message_start",
|
|
454
|
-
JSON.stringify({
|
|
455
|
-
type: "message_start",
|
|
456
|
-
message: {
|
|
457
|
-
id,
|
|
458
|
-
type: "message",
|
|
459
|
-
role: "assistant",
|
|
460
|
-
content: [],
|
|
461
|
-
model,
|
|
462
|
-
stop_reason: null,
|
|
463
|
-
stop_sequence: null,
|
|
464
|
-
usage: {
|
|
465
|
-
input_tokens: usage.inputTokens,
|
|
466
|
-
output_tokens: 1,
|
|
467
|
-
},
|
|
468
|
-
},
|
|
469
|
-
}),
|
|
470
|
-
),
|
|
471
|
-
);
|
|
472
|
-
|
|
473
|
-
// content_block_start
|
|
474
|
-
events.push(
|
|
475
|
-
formatSSEEvent(
|
|
476
|
-
"content_block_start",
|
|
477
|
-
JSON.stringify({
|
|
478
|
-
type: "content_block_start",
|
|
479
|
-
index: 0,
|
|
480
|
-
content_block: { type: "text", text: "" },
|
|
481
|
-
}),
|
|
482
|
-
),
|
|
483
|
-
);
|
|
484
|
-
|
|
485
|
-
// content_block_delta — full text in one delta
|
|
486
|
-
events.push(
|
|
487
|
-
formatSSEEvent(
|
|
488
|
-
"content_block_delta",
|
|
489
|
-
JSON.stringify({
|
|
490
|
-
type: "content_block_delta",
|
|
491
|
-
index: 0,
|
|
492
|
-
delta: { type: "text_delta", text },
|
|
493
|
-
}),
|
|
494
|
-
),
|
|
495
|
-
);
|
|
496
|
-
|
|
497
|
-
// content_block_stop
|
|
498
|
-
events.push(
|
|
499
|
-
formatSSEEvent(
|
|
500
|
-
"content_block_stop",
|
|
501
|
-
JSON.stringify({
|
|
502
|
-
type: "content_block_stop",
|
|
503
|
-
index: 0,
|
|
504
|
-
}),
|
|
505
|
-
),
|
|
506
|
-
);
|
|
507
|
-
|
|
508
|
-
// message_delta
|
|
509
|
-
events.push(
|
|
510
|
-
formatSSEEvent(
|
|
511
|
-
"message_delta",
|
|
512
|
-
JSON.stringify({
|
|
513
|
-
type: "message_delta",
|
|
514
|
-
delta: { stop_reason: "end_turn", stop_sequence: null },
|
|
515
|
-
usage: { output_tokens: usage.outputTokens },
|
|
516
|
-
}),
|
|
517
|
-
),
|
|
518
|
-
);
|
|
519
|
-
|
|
520
|
-
// message_stop
|
|
521
|
-
events.push(
|
|
522
|
-
formatSSEEvent(
|
|
523
|
-
"message_stop",
|
|
524
|
-
JSON.stringify({ type: "message_stop" }),
|
|
525
|
-
),
|
|
526
|
-
);
|
|
527
|
-
|
|
528
|
-
return events.join("");
|
|
529
|
-
}
|
|
530
|
-
|
|
531
|
-
// ---------------------------------------------------------------------------
|
|
532
|
-
// Recall-aware stream accumulator
|
|
533
|
-
// ---------------------------------------------------------------------------
|
|
534
|
-
|
|
535
|
-
/**
|
|
536
|
-
* Extended accumulator interface with recall-aware filtering.
|
|
537
|
-
*
|
|
538
|
-
* Wraps the standard `StreamAccumulator` and adds:
|
|
539
|
-
* - Suppression of recall tool_use blocks (not forwarded to client)
|
|
540
|
-
* - Re-indexing of subsequent blocks to maintain contiguity
|
|
541
|
-
* - Detection of which recall case (only vs mixed) applies
|
|
542
|
-
* - Access to the suppressed recall block data
|
|
543
|
-
*
|
|
544
|
-
* For events targeting a suppressed (recall) block, `processEvent` returns
|
|
545
|
-
* an empty string (nothing to forward). For all other events, it returns
|
|
546
|
-
* the SSE text to forward — with adjusted block indices if needed.
|
|
547
|
-
*
|
|
548
|
-
* Also holds back `message_delta` and `message_stop` events when recall is
|
|
549
|
-
* detected, so the caller can decide whether to forward them (Case 2) or
|
|
550
|
-
* replace them with the continuation stream (Case 1).
|
|
551
|
-
*/
|
|
552
|
-
export interface RecallAwareAccumulator extends StreamAccumulator {
|
|
553
|
-
/** Whether a recall tool_use block was detected in the stream. */
|
|
554
|
-
hasRecall(): boolean;
|
|
555
|
-
/** Whether non-recall tool_use blocks exist in the stream. */
|
|
556
|
-
hasOtherTools(): boolean;
|
|
557
|
-
/** The upstream block index at which recall was first detected. */
|
|
558
|
-
recallBlockIndex(): number;
|
|
559
|
-
/** Number of non-suppressed content blocks forwarded to the client. */
|
|
560
|
-
clientBlockCount(): number;
|
|
561
|
-
/** The held-back message_delta + message_stop events (SSE text). */
|
|
562
|
-
heldBackEvents(): string;
|
|
563
|
-
}
|
|
564
|
-
|
|
565
|
-
/**
|
|
566
|
-
* Create a recall-aware stream accumulator.
|
|
567
|
-
*
|
|
568
|
-
* @param recallToolName - The name of the recall tool to intercept (default: "recall")
|
|
569
|
-
*/
|
|
570
|
-
export function createRecallAwareAccumulator(
|
|
571
|
-
recallToolName = "recall",
|
|
572
|
-
): RecallAwareAccumulator {
|
|
573
|
-
// Delegate to the standard accumulator for actual accumulation
|
|
574
|
-
const inner = createStreamAccumulator();
|
|
575
|
-
|
|
576
|
-
/** Set of upstream block indices that are suppressed (recall). */
|
|
577
|
-
const suppressedIndices = new Set<number>();
|
|
578
|
-
/** Tracks other tool_use block indices (non-recall). */
|
|
579
|
-
const otherToolIndices = new Set<number>();
|
|
580
|
-
/** Number of suppressed blocks seen so far (for re-indexing). */
|
|
581
|
-
let suppressedCount = 0;
|
|
582
|
-
/** First suppressed block index (for continuation re-indexing). */
|
|
583
|
-
let firstSuppressedIndex = -1;
|
|
584
|
-
/** Total client-visible blocks forwarded. */
|
|
585
|
-
let clientBlocks = 0;
|
|
586
|
-
/** Held-back message_delta + message_stop SSE text. */
|
|
587
|
-
let heldBack = "";
|
|
588
|
-
/** Whether we've detected recall in this stream. */
|
|
589
|
-
let recallDetected = false;
|
|
590
|
-
|
|
591
|
-
function processEvent(eventType: string, data: string): string {
|
|
592
|
-
// Always feed the inner accumulator (it tracks full state)
|
|
593
|
-
inner.processEvent(eventType, data);
|
|
594
|
-
|
|
595
|
-
// Parse the data payload
|
|
596
|
-
let parsed: Record<string, unknown>;
|
|
597
|
-
try {
|
|
598
|
-
parsed = JSON.parse(data) as Record<string, unknown>;
|
|
599
|
-
} catch {
|
|
600
|
-
// Non-JSON events (pings, etc.) — forward as-is
|
|
601
|
-
return formatSSEEvent(eventType, data);
|
|
602
|
-
}
|
|
603
|
-
|
|
604
|
-
switch (eventType) {
|
|
605
|
-
case "content_block_start": {
|
|
606
|
-
const index = parsed.index as number;
|
|
607
|
-
if (typeof index !== "number") break;
|
|
608
|
-
|
|
609
|
-
const block = parsed.content_block as Record<string, unknown> | undefined;
|
|
610
|
-
if (
|
|
611
|
-
block?.type === "tool_use" &&
|
|
612
|
-
block.name === recallToolName
|
|
613
|
-
) {
|
|
614
|
-
// Suppress this block
|
|
615
|
-
suppressedIndices.add(index);
|
|
616
|
-
suppressedCount++;
|
|
617
|
-
recallDetected = true;
|
|
618
|
-
if (firstSuppressedIndex < 0) firstSuppressedIndex = index;
|
|
619
|
-
return ""; // Don't forward
|
|
620
|
-
}
|
|
621
|
-
|
|
622
|
-
if (block?.type === "tool_use") {
|
|
623
|
-
otherToolIndices.add(index);
|
|
624
|
-
}
|
|
625
|
-
|
|
626
|
-
clientBlocks++;
|
|
627
|
-
// Re-index if needed
|
|
628
|
-
if (suppressedCount > 0) {
|
|
629
|
-
const adjusted = { ...parsed, index: index - suppressedCount };
|
|
630
|
-
return formatSSEEvent(eventType, JSON.stringify(adjusted));
|
|
631
|
-
}
|
|
632
|
-
break;
|
|
633
|
-
}
|
|
634
|
-
|
|
635
|
-
case "content_block_delta":
|
|
636
|
-
case "content_block_stop": {
|
|
637
|
-
const index = parsed.index as number;
|
|
638
|
-
if (typeof index === "number" && suppressedIndices.has(index)) {
|
|
639
|
-
return ""; // Don't forward recall block events
|
|
640
|
-
}
|
|
641
|
-
// Re-index if needed
|
|
642
|
-
if (suppressedCount > 0 && typeof (parsed.index) === "number") {
|
|
643
|
-
const adjusted = {
|
|
644
|
-
...parsed,
|
|
645
|
-
index: (parsed.index as number) - suppressedCount,
|
|
646
|
-
};
|
|
647
|
-
return formatSSEEvent(eventType, JSON.stringify(adjusted));
|
|
648
|
-
}
|
|
649
|
-
break;
|
|
650
|
-
}
|
|
651
|
-
|
|
652
|
-
case "message_delta":
|
|
653
|
-
case "message_stop": {
|
|
654
|
-
if (recallDetected) {
|
|
655
|
-
// Hold back — caller decides whether to forward or replace
|
|
656
|
-
heldBack += formatSSEEvent(eventType, data);
|
|
657
|
-
return "";
|
|
658
|
-
}
|
|
659
|
-
break;
|
|
660
|
-
}
|
|
661
|
-
|
|
662
|
-
// message_start, ping, etc. — forward unchanged
|
|
663
|
-
}
|
|
664
|
-
|
|
665
|
-
return formatSSEEvent(eventType, data);
|
|
666
|
-
}
|
|
667
|
-
|
|
668
|
-
return {
|
|
669
|
-
processEvent,
|
|
670
|
-
getResponse: () => inner.getResponse(),
|
|
671
|
-
isDone: () => inner.isDone(),
|
|
672
|
-
hasRecall: () => recallDetected,
|
|
673
|
-
hasOtherTools: () => otherToolIndices.size > 0,
|
|
674
|
-
recallBlockIndex: () => firstSuppressedIndex,
|
|
675
|
-
clientBlockCount: () => clientBlocks,
|
|
676
|
-
heldBackEvents: () => heldBack,
|
|
677
|
-
};
|
|
678
|
-
}
|
|
679
|
-
|
|
680
|
-
/**
|
|
681
|
-
* Consume an Anthropic SSE streaming Response and return the accumulated
|
|
682
|
-
* GatewayResponse. Useful when the response needs to be translated to another
|
|
683
|
-
* protocol format (e.g. OpenAI) after the pipeline produces Anthropic SSE.
|
|
684
|
-
*/
|
|
685
|
-
export async function accumulateSSEResponse(
|
|
686
|
-
response: Response,
|
|
687
|
-
): Promise<GatewayResponse> {
|
|
688
|
-
const accumulator = createStreamAccumulator();
|
|
689
|
-
const text = await response.text();
|
|
690
|
-
|
|
691
|
-
for (const block of text.split("\n\n")) {
|
|
692
|
-
if (!block.trim()) continue;
|
|
693
|
-
let eventType = "message";
|
|
694
|
-
const dataLines: string[] = [];
|
|
695
|
-
for (const line of block.split("\n")) {
|
|
696
|
-
if (line.startsWith("event:")) {
|
|
697
|
-
eventType = line.slice(6).trim();
|
|
698
|
-
} else if (line.startsWith("data:")) {
|
|
699
|
-
dataLines.push(line.slice(5).trimStart());
|
|
700
|
-
}
|
|
701
|
-
}
|
|
702
|
-
if (dataLines.length > 0) {
|
|
703
|
-
accumulator.processEvent(eventType, dataLines.join("\n"));
|
|
704
|
-
}
|
|
705
|
-
}
|
|
706
|
-
|
|
707
|
-
return accumulator.getResponse();
|
|
708
|
-
}
|