@loreai/gateway 0.14.0 → 0.14.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,708 +0,0 @@
1
- /**
2
- * Anthropic SSE stream handling.
3
- *
4
- * Parses upstream Anthropic streaming responses (named SSE events), accumulates
5
- * the full response into a `GatewayResponse`, and provides helpers for
6
- * generating synthetic SSE event sequences (e.g. for compaction interception).
7
- *
8
- * Anthropic uses named SSE events with a lifecycle:
9
- * message_start -> content_block_start/delta/stop (repeated) -> message_delta -> message_stop
10
- *
11
- * All functions are pure (no side effects) except `parseSSEStream` which is
12
- * an async generator consuming a byte stream.
13
- */
14
- import type {
15
- GatewayContentBlock,
16
- GatewayResponse,
17
- GatewayUsage,
18
- } from "../translate/types";
19
-
20
- // ---------------------------------------------------------------------------
21
- // SSE formatting
22
- // ---------------------------------------------------------------------------
23
-
24
- /** Format a single named SSE event for sending to the client. */
25
- export function formatSSEEvent(eventType: string, data: string): string {
26
- return `event: ${eventType}\ndata: ${data}\n\n`;
27
- }
28
-
29
- // ---------------------------------------------------------------------------
30
- // SSE parsing
31
- // ---------------------------------------------------------------------------
32
-
33
- /**
34
- * Parse an SSE byte stream into typed events.
35
- *
36
- * Handles:
37
- * - `event: <type>` followed by `data: <json>`
38
- * - Multiple `data:` lines (joined with `\n`)
39
- * - Blank lines as event delimiters
40
- * - Default event type `"message"` when no `event:` line precedes data
41
- */
42
- export async function* parseSSEStream(
43
- reader: ReadableStreamDefaultReader<Uint8Array>,
44
- ): AsyncGenerator<{ event: string; data: string }> {
45
- const decoder = new TextDecoder();
46
- let buffer = "";
47
-
48
- for (;;) {
49
- const { done, value } = await reader.read();
50
- if (value) {
51
- buffer += decoder.decode(value, { stream: true });
52
- }
53
-
54
- // Process complete events (delimited by blank lines: \n\n)
55
- let boundary: number;
56
- while ((boundary = buffer.indexOf("\n\n")) !== -1) {
57
- const block = buffer.slice(0, boundary);
58
- buffer = buffer.slice(boundary + 2);
59
-
60
- // Skip empty blocks
61
- if (block.trim() === "") continue;
62
-
63
- let eventType = "message";
64
- const dataLines: string[] = [];
65
-
66
- for (const line of block.split("\n")) {
67
- if (line.startsWith("event:")) {
68
- eventType = line.slice(6).trim();
69
- } else if (line.startsWith("data:")) {
70
- dataLines.push(line.slice(5).trimStart());
71
- }
72
- // Lines starting with ':' are comments — ignore
73
- // Other lines without known prefix — ignore per SSE spec
74
- }
75
-
76
- if (dataLines.length > 0) {
77
- yield { event: eventType, data: dataLines.join("\n") };
78
- }
79
- }
80
-
81
- if (done) {
82
- // Flush any remaining partial block (shouldn't happen with well-formed SSE)
83
- if (buffer.trim()) {
84
- let eventType = "message";
85
- const dataLines: string[] = [];
86
- for (const line of buffer.split("\n")) {
87
- if (line.startsWith("event:")) {
88
- eventType = line.slice(6).trim();
89
- } else if (line.startsWith("data:")) {
90
- dataLines.push(line.slice(5).trimStart());
91
- }
92
- }
93
- if (dataLines.length > 0) {
94
- yield { event: eventType, data: dataLines.join("\n") };
95
- }
96
- }
97
- break;
98
- }
99
- }
100
- }
101
-
102
- // ---------------------------------------------------------------------------
103
- // Stream accumulator
104
- // ---------------------------------------------------------------------------
105
-
106
- /** Intermediate block state during streaming. */
107
- type AccumulatingBlock =
108
- | { type: "text"; text: string }
109
- | { type: "thinking"; thinking: string; signature: string }
110
- | { type: "tool_use"; id: string; name: string; partialJson: string };
111
-
112
- /** State machine that processes Anthropic SSE events and builds a GatewayResponse. */
113
- export interface StreamAccumulator {
114
- /** Process a single SSE event. Returns the event line(s) to forward to client. */
115
- processEvent(eventType: string, data: string): string;
116
- /** Get the accumulated response after stream ends. */
117
- getResponse(): GatewayResponse;
118
- /** Whether the stream has completed (message_stop received). */
119
- isDone(): boolean;
120
- }
121
-
122
- export function createStreamAccumulator(): StreamAccumulator {
123
- let id = "";
124
- let model = "";
125
- let stopReason = "";
126
- let done = false;
127
-
128
- const usage: GatewayUsage = {
129
- inputTokens: 0,
130
- outputTokens: 0,
131
- };
132
-
133
- /** Blocks indexed by their stream index. */
134
- const blocks = new Map<number, AccumulatingBlock>();
135
- /** Finalized content blocks in order. */
136
- const content: GatewayContentBlock[] = [];
137
- /** Track which indices have been finalized. */
138
- const finalized = new Set<number>();
139
-
140
- function processEvent(eventType: string, data: string): string {
141
- // Forward the event as-is regardless of processing outcome
142
- const forwarded = formatSSEEvent(eventType, data);
143
-
144
- // Parse the data payload — if it's not valid JSON, just forward
145
- let parsed: Record<string, unknown>;
146
- try {
147
- parsed = JSON.parse(data) as Record<string, unknown>;
148
- } catch {
149
- return forwarded;
150
- }
151
-
152
- switch (eventType) {
153
- case "message_start":
154
- handleMessageStart(parsed);
155
- break;
156
- case "content_block_start":
157
- handleContentBlockStart(parsed);
158
- break;
159
- case "content_block_delta":
160
- handleContentBlockDelta(parsed);
161
- break;
162
- case "content_block_stop":
163
- handleContentBlockStop(parsed);
164
- break;
165
- case "message_delta":
166
- handleMessageDelta(parsed);
167
- break;
168
- case "message_stop":
169
- done = true;
170
- break;
171
- // "ping" and unknown events — just forward
172
- }
173
-
174
- return forwarded;
175
- }
176
-
177
- function handleMessageStart(parsed: Record<string, unknown>): void {
178
- const message = parsed.message as Record<string, unknown> | undefined;
179
- if (!message) return;
180
-
181
- if (typeof message.id === "string") id = message.id;
182
- if (typeof message.model === "string") model = message.model;
183
-
184
- const msgUsage = message.usage as Record<string, number> | undefined;
185
- if (msgUsage) {
186
- if (typeof msgUsage.input_tokens === "number") {
187
- usage.inputTokens = msgUsage.input_tokens;
188
- }
189
- if (typeof msgUsage.output_tokens === "number") {
190
- usage.outputTokens = msgUsage.output_tokens;
191
- }
192
- if (typeof msgUsage.cache_read_input_tokens === "number") {
193
- usage.cacheReadInputTokens = msgUsage.cache_read_input_tokens;
194
- }
195
- if (typeof msgUsage.cache_creation_input_tokens === "number") {
196
- usage.cacheCreationInputTokens = msgUsage.cache_creation_input_tokens;
197
- }
198
- }
199
- }
200
-
201
- function handleContentBlockStart(parsed: Record<string, unknown>): void {
202
- const index = parsed.index as number;
203
- if (typeof index !== "number") return;
204
-
205
- const block = parsed.content_block as Record<string, unknown> | undefined;
206
- if (!block || typeof block.type !== "string") return;
207
-
208
- switch (block.type) {
209
- case "text":
210
- blocks.set(index, {
211
- type: "text",
212
- text: typeof block.text === "string" ? block.text : "",
213
- });
214
- break;
215
- case "thinking":
216
- blocks.set(index, {
217
- type: "thinking",
218
- thinking:
219
- typeof block.thinking === "string" ? block.thinking : "",
220
- signature: "",
221
- });
222
- break;
223
- case "tool_use":
224
- blocks.set(index, {
225
- type: "tool_use",
226
- id: typeof block.id === "string" ? block.id : "",
227
- name: typeof block.name === "string" ? block.name : "",
228
- partialJson: "",
229
- });
230
- break;
231
- }
232
- }
233
-
234
- function handleContentBlockDelta(parsed: Record<string, unknown>): void {
235
- const index = parsed.index as number;
236
- if (typeof index !== "number") return;
237
-
238
- const delta = parsed.delta as Record<string, unknown> | undefined;
239
- if (!delta || typeof delta.type !== "string") return;
240
-
241
- const block = blocks.get(index);
242
- if (!block) return;
243
-
244
- switch (delta.type) {
245
- case "text_delta":
246
- if (block.type === "text" && typeof delta.text === "string") {
247
- block.text += delta.text;
248
- }
249
- break;
250
- case "thinking_delta":
251
- if (
252
- block.type === "thinking" &&
253
- typeof delta.thinking === "string"
254
- ) {
255
- block.thinking += delta.thinking;
256
- }
257
- break;
258
- case "signature_delta":
259
- if (
260
- block.type === "thinking" &&
261
- typeof delta.signature === "string"
262
- ) {
263
- block.signature += delta.signature;
264
- }
265
- break;
266
- case "input_json_delta":
267
- if (
268
- block.type === "tool_use" &&
269
- typeof delta.partial_json === "string"
270
- ) {
271
- block.partialJson += delta.partial_json;
272
- }
273
- break;
274
- }
275
- }
276
-
277
- function handleContentBlockStop(parsed: Record<string, unknown>): void {
278
- const index = parsed.index as number;
279
- if (typeof index !== "number") return;
280
-
281
- const block = blocks.get(index);
282
- if (!block || finalized.has(index)) return;
283
-
284
- finalized.add(index);
285
-
286
- switch (block.type) {
287
- case "text":
288
- content.push({ type: "text", text: block.text });
289
- break;
290
- case "thinking": {
291
- const thinkingBlock: GatewayContentBlock = {
292
- type: "thinking",
293
- thinking: block.thinking,
294
- };
295
- if (block.signature) {
296
- (thinkingBlock as { signature?: string }).signature =
297
- block.signature;
298
- }
299
- content.push(thinkingBlock);
300
- break;
301
- }
302
- case "tool_use": {
303
- let input: unknown = {};
304
- if (block.partialJson) {
305
- try {
306
- input = JSON.parse(block.partialJson);
307
- } catch {
308
- // Malformed JSON — store as raw string
309
- input = block.partialJson;
310
- }
311
- }
312
- content.push({
313
- type: "tool_use",
314
- id: block.id,
315
- name: block.name,
316
- input,
317
- });
318
- break;
319
- }
320
- }
321
- }
322
-
323
- function handleMessageDelta(parsed: Record<string, unknown>): void {
324
- const delta = parsed.delta as Record<string, unknown> | undefined;
325
- if (delta && typeof delta.stop_reason === "string") {
326
- stopReason = delta.stop_reason;
327
- }
328
-
329
- // message_delta usage is cumulative output tokens
330
- const deltaUsage = parsed.usage as Record<string, number> | undefined;
331
- if (deltaUsage) {
332
- if (typeof deltaUsage.output_tokens === "number") {
333
- usage.outputTokens = deltaUsage.output_tokens;
334
- }
335
- }
336
- }
337
-
338
- function getResponse(): GatewayResponse {
339
- // Finalize any blocks that weren't explicitly stopped (shouldn't happen
340
- // with well-formed streams, but be defensive)
341
- for (const [index, block] of blocks) {
342
- if (!finalized.has(index)) {
343
- finalized.add(index);
344
- switch (block.type) {
345
- case "text":
346
- content.push({ type: "text", text: block.text });
347
- break;
348
- case "thinking":
349
- content.push({
350
- type: "thinking",
351
- thinking: block.thinking,
352
- ...(block.signature ? { signature: block.signature } : {}),
353
- });
354
- break;
355
- case "tool_use": {
356
- let input: unknown = {};
357
- if (block.partialJson) {
358
- try {
359
- input = JSON.parse(block.partialJson);
360
- } catch {
361
- input = block.partialJson;
362
- }
363
- }
364
- content.push({
365
- type: "tool_use",
366
- id: block.id,
367
- name: block.name,
368
- input,
369
- });
370
- break;
371
- }
372
- }
373
- }
374
- }
375
-
376
- return {
377
- id,
378
- model,
379
- content,
380
- stopReason,
381
- usage: { ...usage },
382
- };
383
- }
384
-
385
- return {
386
- processEvent,
387
- getResponse,
388
- isDone: () => done,
389
- };
390
- }
391
-
392
- // ---------------------------------------------------------------------------
393
- // Synthetic SSE builders
394
- // ---------------------------------------------------------------------------
395
-
396
- /**
397
- * Build a synthetic `message_start` SSE event from a GatewayResponse.
398
- *
399
- * Used when the gateway generates its own response (e.g. compaction
400
- * interception) and needs to emit a well-formed Anthropic stream.
401
- */
402
- export function buildSSEMessageStart(response: GatewayResponse): string {
403
- const message = {
404
- type: "message_start",
405
- message: {
406
- id: response.id,
407
- type: "message",
408
- role: "assistant",
409
- content: [],
410
- model: response.model,
411
- stop_reason: null,
412
- stop_sequence: null,
413
- usage: {
414
- input_tokens: response.usage.inputTokens,
415
- output_tokens: 1,
416
- ...(response.usage.cacheReadInputTokens != null
417
- ? { cache_read_input_tokens: response.usage.cacheReadInputTokens }
418
- : {}),
419
- ...(response.usage.cacheCreationInputTokens != null
420
- ? {
421
- cache_creation_input_tokens:
422
- response.usage.cacheCreationInputTokens,
423
- }
424
- : {}),
425
- },
426
- },
427
- };
428
-
429
- return formatSSEEvent("message_start", JSON.stringify(message));
430
- }
431
-
432
- /**
433
- * Build a complete SSE event sequence for a simple text-only response.
434
- *
435
- * Generates the full Anthropic streaming lifecycle:
436
- * message_start -> content_block_start -> content_block_delta ->
437
- * content_block_stop -> message_delta -> message_stop
438
- *
439
- * Used for compaction interception where Lore generates a synthetic
440
- * response instead of forwarding to upstream.
441
- */
442
- export function buildSSETextResponse(
443
- id: string,
444
- model: string,
445
- text: string,
446
- usage: { inputTokens: number; outputTokens: number },
447
- ): string {
448
- const events: string[] = [];
449
-
450
- // message_start
451
- events.push(
452
- formatSSEEvent(
453
- "message_start",
454
- JSON.stringify({
455
- type: "message_start",
456
- message: {
457
- id,
458
- type: "message",
459
- role: "assistant",
460
- content: [],
461
- model,
462
- stop_reason: null,
463
- stop_sequence: null,
464
- usage: {
465
- input_tokens: usage.inputTokens,
466
- output_tokens: 1,
467
- },
468
- },
469
- }),
470
- ),
471
- );
472
-
473
- // content_block_start
474
- events.push(
475
- formatSSEEvent(
476
- "content_block_start",
477
- JSON.stringify({
478
- type: "content_block_start",
479
- index: 0,
480
- content_block: { type: "text", text: "" },
481
- }),
482
- ),
483
- );
484
-
485
- // content_block_delta — full text in one delta
486
- events.push(
487
- formatSSEEvent(
488
- "content_block_delta",
489
- JSON.stringify({
490
- type: "content_block_delta",
491
- index: 0,
492
- delta: { type: "text_delta", text },
493
- }),
494
- ),
495
- );
496
-
497
- // content_block_stop
498
- events.push(
499
- formatSSEEvent(
500
- "content_block_stop",
501
- JSON.stringify({
502
- type: "content_block_stop",
503
- index: 0,
504
- }),
505
- ),
506
- );
507
-
508
- // message_delta
509
- events.push(
510
- formatSSEEvent(
511
- "message_delta",
512
- JSON.stringify({
513
- type: "message_delta",
514
- delta: { stop_reason: "end_turn", stop_sequence: null },
515
- usage: { output_tokens: usage.outputTokens },
516
- }),
517
- ),
518
- );
519
-
520
- // message_stop
521
- events.push(
522
- formatSSEEvent(
523
- "message_stop",
524
- JSON.stringify({ type: "message_stop" }),
525
- ),
526
- );
527
-
528
- return events.join("");
529
- }
530
-
531
- // ---------------------------------------------------------------------------
532
- // Recall-aware stream accumulator
533
- // ---------------------------------------------------------------------------
534
-
535
- /**
536
- * Extended accumulator interface with recall-aware filtering.
537
- *
538
- * Wraps the standard `StreamAccumulator` and adds:
539
- * - Suppression of recall tool_use blocks (not forwarded to client)
540
- * - Re-indexing of subsequent blocks to maintain contiguity
541
- * - Detection of which recall case (only vs mixed) applies
542
- * - Access to the suppressed recall block data
543
- *
544
- * For events targeting a suppressed (recall) block, `processEvent` returns
545
- * an empty string (nothing to forward). For all other events, it returns
546
- * the SSE text to forward — with adjusted block indices if needed.
547
- *
548
- * Also holds back `message_delta` and `message_stop` events when recall is
549
- * detected, so the caller can decide whether to forward them (Case 2) or
550
- * replace them with the continuation stream (Case 1).
551
- */
552
- export interface RecallAwareAccumulator extends StreamAccumulator {
553
- /** Whether a recall tool_use block was detected in the stream. */
554
- hasRecall(): boolean;
555
- /** Whether non-recall tool_use blocks exist in the stream. */
556
- hasOtherTools(): boolean;
557
- /** The upstream block index at which recall was first detected. */
558
- recallBlockIndex(): number;
559
- /** Number of non-suppressed content blocks forwarded to the client. */
560
- clientBlockCount(): number;
561
- /** The held-back message_delta + message_stop events (SSE text). */
562
- heldBackEvents(): string;
563
- }
564
-
565
- /**
566
- * Create a recall-aware stream accumulator.
567
- *
568
- * @param recallToolName - The name of the recall tool to intercept (default: "recall")
569
- */
570
- export function createRecallAwareAccumulator(
571
- recallToolName = "recall",
572
- ): RecallAwareAccumulator {
573
- // Delegate to the standard accumulator for actual accumulation
574
- const inner = createStreamAccumulator();
575
-
576
- /** Set of upstream block indices that are suppressed (recall). */
577
- const suppressedIndices = new Set<number>();
578
- /** Tracks other tool_use block indices (non-recall). */
579
- const otherToolIndices = new Set<number>();
580
- /** Number of suppressed blocks seen so far (for re-indexing). */
581
- let suppressedCount = 0;
582
- /** First suppressed block index (for continuation re-indexing). */
583
- let firstSuppressedIndex = -1;
584
- /** Total client-visible blocks forwarded. */
585
- let clientBlocks = 0;
586
- /** Held-back message_delta + message_stop SSE text. */
587
- let heldBack = "";
588
- /** Whether we've detected recall in this stream. */
589
- let recallDetected = false;
590
-
591
- function processEvent(eventType: string, data: string): string {
592
- // Always feed the inner accumulator (it tracks full state)
593
- inner.processEvent(eventType, data);
594
-
595
- // Parse the data payload
596
- let parsed: Record<string, unknown>;
597
- try {
598
- parsed = JSON.parse(data) as Record<string, unknown>;
599
- } catch {
600
- // Non-JSON events (pings, etc.) — forward as-is
601
- return formatSSEEvent(eventType, data);
602
- }
603
-
604
- switch (eventType) {
605
- case "content_block_start": {
606
- const index = parsed.index as number;
607
- if (typeof index !== "number") break;
608
-
609
- const block = parsed.content_block as Record<string, unknown> | undefined;
610
- if (
611
- block?.type === "tool_use" &&
612
- block.name === recallToolName
613
- ) {
614
- // Suppress this block
615
- suppressedIndices.add(index);
616
- suppressedCount++;
617
- recallDetected = true;
618
- if (firstSuppressedIndex < 0) firstSuppressedIndex = index;
619
- return ""; // Don't forward
620
- }
621
-
622
- if (block?.type === "tool_use") {
623
- otherToolIndices.add(index);
624
- }
625
-
626
- clientBlocks++;
627
- // Re-index if needed
628
- if (suppressedCount > 0) {
629
- const adjusted = { ...parsed, index: index - suppressedCount };
630
- return formatSSEEvent(eventType, JSON.stringify(adjusted));
631
- }
632
- break;
633
- }
634
-
635
- case "content_block_delta":
636
- case "content_block_stop": {
637
- const index = parsed.index as number;
638
- if (typeof index === "number" && suppressedIndices.has(index)) {
639
- return ""; // Don't forward recall block events
640
- }
641
- // Re-index if needed
642
- if (suppressedCount > 0 && typeof (parsed.index) === "number") {
643
- const adjusted = {
644
- ...parsed,
645
- index: (parsed.index as number) - suppressedCount,
646
- };
647
- return formatSSEEvent(eventType, JSON.stringify(adjusted));
648
- }
649
- break;
650
- }
651
-
652
- case "message_delta":
653
- case "message_stop": {
654
- if (recallDetected) {
655
- // Hold back — caller decides whether to forward or replace
656
- heldBack += formatSSEEvent(eventType, data);
657
- return "";
658
- }
659
- break;
660
- }
661
-
662
- // message_start, ping, etc. — forward unchanged
663
- }
664
-
665
- return formatSSEEvent(eventType, data);
666
- }
667
-
668
- return {
669
- processEvent,
670
- getResponse: () => inner.getResponse(),
671
- isDone: () => inner.isDone(),
672
- hasRecall: () => recallDetected,
673
- hasOtherTools: () => otherToolIndices.size > 0,
674
- recallBlockIndex: () => firstSuppressedIndex,
675
- clientBlockCount: () => clientBlocks,
676
- heldBackEvents: () => heldBack,
677
- };
678
- }
679
-
680
- /**
681
- * Consume an Anthropic SSE streaming Response and return the accumulated
682
- * GatewayResponse. Useful when the response needs to be translated to another
683
- * protocol format (e.g. OpenAI) after the pipeline produces Anthropic SSE.
684
- */
685
- export async function accumulateSSEResponse(
686
- response: Response,
687
- ): Promise<GatewayResponse> {
688
- const accumulator = createStreamAccumulator();
689
- const text = await response.text();
690
-
691
- for (const block of text.split("\n\n")) {
692
- if (!block.trim()) continue;
693
- let eventType = "message";
694
- const dataLines: string[] = [];
695
- for (const line of block.split("\n")) {
696
- if (line.startsWith("event:")) {
697
- eventType = line.slice(6).trim();
698
- } else if (line.startsWith("data:")) {
699
- dataLines.push(line.slice(5).trimStart());
700
- }
701
- }
702
- if (dataLines.length > 0) {
703
- accumulator.processEvent(eventType, dataLines.join("\n"));
704
- }
705
- }
706
-
707
- return accumulator.getResponse();
708
- }