elasticdash-sdk 0.2.8 → 0.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -51,7 +51,7 @@ function extractPromptSnippet(body) {
51
51
  }
52
52
  return undefined;
53
53
  }
54
- function extractUsage(provider, body) {
54
+ function extractUsage(provider, body, url) {
55
55
  if (provider === 'openai' || provider === 'grok' || provider === 'kimi') {
56
56
  const u = body.usage;
57
57
  if (!u)
@@ -70,9 +70,26 @@ function extractUsage(provider, body) {
70
70
  return undefined;
71
71
  return { inputTokens: u.promptTokenCount, outputTokens: u.candidatesTokenCount, totalTokens: u.totalTokenCount };
72
72
  }
73
+ if (provider === 'bedrock') {
74
+ const kind = url ? bedrockApiKind(url) : undefined;
75
+ const vendor = url ? bedrockVendor(extractBedrockModelId(url)) : 'unknown';
76
+ if (kind === 'converse' || kind === 'converse-stream') {
77
+ const u = body.usage;
78
+ if (!u)
79
+ return undefined;
80
+ return { inputTokens: u.inputTokens, outputTokens: u.outputTokens, totalTokens: u.totalTokens ?? (u.inputTokens ?? 0) + (u.outputTokens ?? 0) };
81
+ }
82
+ if (vendor === 'anthropic') {
83
+ const u = body.usage;
84
+ if (!u)
85
+ return undefined;
86
+ return { inputTokens: u.input_tokens, outputTokens: u.output_tokens, totalTokens: (u.input_tokens ?? 0) + (u.output_tokens ?? 0) };
87
+ }
88
+ return undefined;
89
+ }
73
90
  return undefined;
74
91
  }
75
- function extractAssistantMessage(provider, body) {
92
+ function extractAssistantMessage(provider, body, url) {
76
93
  if (provider === 'openai' || provider === 'grok' || provider === 'kimi') {
77
94
  const choices = body.choices;
78
95
  if (Array.isArray(choices) && choices.length > 0) {
@@ -95,6 +112,19 @@ function extractAssistantMessage(provider, body) {
95
112
  return content;
96
113
  }
97
114
  }
115
+ if (provider === 'bedrock') {
116
+ const kind = url ? bedrockApiKind(url) : undefined;
117
+ const vendor = url ? bedrockVendor(extractBedrockModelId(url)) : 'unknown';
118
+ if (kind === 'converse' || kind === 'converse-stream') {
119
+ const output = body.output;
120
+ const msg = output?.message;
121
+ if (msg && typeof msg === 'object')
122
+ return msg;
123
+ }
124
+ else if (vendor === 'anthropic' && Array.isArray(body.content)) {
125
+ return { role: 'assistant', content: body.content };
126
+ }
127
+ }
98
128
  return null;
99
129
  }
100
130
  /** URL patterns for known AI providers */
@@ -104,6 +134,7 @@ const AI_PATTERNS = {
104
134
  gemini: /https?:\/\/generativelanguage\.googleapis\.com\/.*\/models\/[^\/:]+:(generateContent|streamGenerateContent)/,
105
135
  grok: /https?:\/\/api\.x\.ai\/v1\/(chat\/)?completions/,
106
136
  kimi: /https?:\/\/api\.moonshot\.ai\/v1\/(chat\/)?completions/,
137
+ bedrock: /https?:\/\/bedrock-runtime\.[^./]+\.amazonaws\.com\/model\/[^/]+\/(invoke|invoke-with-response-stream|converse|converse-stream)/,
107
138
  };
108
139
  /** Detect which provider (if any) a URL belongs to */
109
140
  function detectProvider(url) {
@@ -113,17 +144,118 @@ function detectProvider(url) {
113
144
  }
114
145
  return null;
115
146
  }
116
- /** Extract model name from request body or URL (for Gemini) */
147
+ /** @internal exported for tests only */
148
+ export function bedrockApiKind(url) {
149
+ if (/\/converse-stream(\?|$)/.test(url))
150
+ return 'converse-stream';
151
+ if (/\/converse(\?|$)/.test(url))
152
+ return 'converse';
153
+ if (/\/invoke-with-response-stream(\?|$)/.test(url))
154
+ return 'invoke-stream';
155
+ if (/\/invoke(\?|$)/.test(url))
156
+ return 'invoke';
157
+ return undefined;
158
+ }
159
+ /** @internal — exported for tests only */
160
+ export function extractBedrockModelId(url) {
161
+ const m = /\/model\/([^/]+)\//.exec(url);
162
+ if (!m)
163
+ return 'unknown';
164
+ try {
165
+ return decodeURIComponent(m[1]);
166
+ }
167
+ catch {
168
+ return m[1];
169
+ }
170
+ }
171
+ /** @internal — exported for tests only */
172
+ export function bedrockVendor(modelId) {
173
+ // Strip cross-region inference profile prefixes (us., eu., apac., au., etc.)
174
+ const stripped = modelId.replace(/^(us|eu|apac|au|ap)\./, '');
175
+ const vendor = stripped.split('.')[0];
176
+ switch (vendor) {
177
+ case 'anthropic': return 'anthropic';
178
+ case 'amazon': return 'amazon';
179
+ case 'cohere': return 'cohere';
180
+ case 'meta': return 'meta';
181
+ case 'mistral': return 'mistral';
182
+ case 'ai21': return 'ai21';
183
+ case 'stability': return 'stability';
184
+ default: return 'unknown';
185
+ }
186
+ }
187
+ /** Extract model name from request body or URL (for Gemini/Bedrock) */
117
188
  function extractModel(provider, body, url) {
118
189
  if (provider === 'gemini') {
119
190
  // URL shape: .../models/gemini-1.5-pro:generateContent
120
191
  const match = /\/models\/([^/:]+):/.exec(url);
121
192
  return match ? match[1] : 'unknown';
122
193
  }
194
+ if (provider === 'bedrock') {
195
+ return extractBedrockModelId(url);
196
+ }
123
197
  return typeof body.model === 'string' ? body.model : 'unknown';
124
198
  }
125
199
  /** Extract prompt text from request body */
126
- function extractPrompt(provider, body) {
200
+ function extractPrompt(provider, body, url) {
201
+ if (provider === 'bedrock') {
202
+ const kind = url ? bedrockApiKind(url) : undefined;
203
+ const vendor = url ? bedrockVendor(extractBedrockModelId(url)) : 'unknown';
204
+ if (kind === 'converse' || kind === 'converse-stream') {
205
+ let systemPrefix = '';
206
+ if (Array.isArray(body.system)) {
207
+ systemPrefix = body.system
208
+ .map((b) => (b && typeof b === 'object' ? String(b.text ?? '') : String(b)))
209
+ .filter(Boolean)
210
+ .map((t) => `system: ${t}`)
211
+ .join('\n');
212
+ if (systemPrefix)
213
+ systemPrefix += '\n';
214
+ }
215
+ const messages = body.messages;
216
+ if (Array.isArray(messages)) {
217
+ const msgText = messages
218
+ .map((m) => {
219
+ if (!m || typeof m !== 'object')
220
+ return String(m);
221
+ const msg = m;
222
+ let content = msg.content;
223
+ if (Array.isArray(content)) {
224
+ content = content
225
+ .map((b) => (b && typeof b === 'object' ? String(b.text ?? '') : String(b)))
226
+ .filter(Boolean)
227
+ .join('');
228
+ }
229
+ return `${msg.role}: ${content}`;
230
+ })
231
+ .join('\n');
232
+ return systemPrefix + msgText;
233
+ }
234
+ return systemPrefix;
235
+ }
236
+ if (vendor === 'anthropic') {
237
+ // Same body shape as anthropic — fall through to the anthropic branch below.
238
+ provider = 'anthropic';
239
+ }
240
+ else {
241
+ // Best-effort for other vendors
242
+ if (typeof body.prompt === 'string')
243
+ return body.prompt;
244
+ if (typeof body.inputText === 'string')
245
+ return body.inputText;
246
+ if (Array.isArray(body.messages)) {
247
+ return body.messages
248
+ .map((m) => {
249
+ if (!m || typeof m !== 'object')
250
+ return String(m);
251
+ const msg = m;
252
+ return `${msg.role ?? 'user'}: ${String(msg.content ?? '')}`;
253
+ })
254
+ .join('\n');
255
+ }
256
+ return '';
257
+ }
258
+ }
127
259
  if (provider === 'openai' || provider === 'anthropic' || provider === 'grok' || provider === 'kimi') {
128
260
  let systemPrefix = '';
129
261
  // Anthropic supports a top-level `system` parameter
@@ -203,11 +335,65 @@ function extractPrompt(provider, body) {
203
335
  return '';
204
336
  }
205
337
  /** Extract completion text from response body */
206
- function extractCompletion(provider, responseBody) {
338
+ function extractCompletion(provider, responseBody, url) {
207
339
  // Handle buffered streaming format
208
340
  if (responseBody.streamed === true && typeof responseBody.completion === 'string') {
209
341
  return responseBody.completion;
210
342
  }
343
+ if (provider === 'bedrock') {
344
+ const kind = url ? bedrockApiKind(url) : undefined;
345
+ const vendor = url ? bedrockVendor(extractBedrockModelId(url)) : 'unknown';
346
+ if (kind === 'converse' || kind === 'converse-stream') {
347
+ const output = responseBody.output;
348
+ const msg = output?.message;
349
+ if (msg && Array.isArray(msg.content)) {
350
+ return msg.content
351
+ .map((b) => (b && typeof b === 'object' ? String(b.text ?? '') : ''))
352
+ .filter(Boolean)
353
+ .join('');
354
+ }
355
+ return '';
356
+ }
357
+ if (vendor === 'anthropic') {
358
+ const content = responseBody.content;
359
+ if (Array.isArray(content)) {
360
+ return content
361
+ .map((block) => {
362
+ if (block && typeof block === 'object') {
363
+ const b = block;
364
+ if (b.type === 'text' && typeof b.text === 'string')
365
+ return b.text;
366
+ }
367
+ return '';
368
+ })
369
+ .filter(Boolean)
370
+ .join('');
371
+ }
372
+ return '';
373
+ }
374
+ // Other vendors — best-effort
375
+ if (typeof responseBody.completion === 'string')
376
+ return responseBody.completion; // legacy Claude
377
+ if (typeof responseBody.outputText === 'string')
378
+ return responseBody.outputText; // Titan
379
+ if (typeof responseBody.generation === 'string')
380
+ return responseBody.generation; // Llama
381
+ if (Array.isArray(responseBody.results)) {
382
+ // Titan / Cohere results array
383
+ return responseBody.results
384
+ .map((r) => (r && typeof r === 'object' ? String(r.outputText ?? r.text ?? '') : ''))
385
+ .filter(Boolean)
386
+ .join('');
387
+ }
388
+ if (Array.isArray(responseBody.choices)) {
389
+ // Mistral
390
+ const first = responseBody.choices[0];
391
+ const msg = first?.message;
392
+ if (msg && typeof msg.content === 'string')
393
+ return msg.content;
394
+ }
395
+ return '';
396
+ }
211
397
  if (provider === 'openai' || provider === 'grok' || provider === 'kimi') {
212
398
  const choices = responseBody.choices;
213
399
  if (Array.isArray(choices) && choices.length > 0) {
@@ -265,6 +451,226 @@ function extractCompletion(provider, responseBody) {
265
451
  }
266
452
  return '';
267
453
  }
454
+ // ---------------------------------------------------------------------------
455
+ // Bedrock binary eventstream support (application/vnd.amazon.eventstream)
456
+ // ---------------------------------------------------------------------------
457
+ //
458
+ // Wire format: 12-byte prelude (total_length | headers_length | prelude_crc),
459
+ // headers TLV block, JSON payload, 4-byte message CRC. We ignore CRCs on read
460
+ // (we are observing, not validating). On write we compute CRC-32 so that the
461
+ // synthesized stream is acceptable to strict consumers like the AWS SDK.
462
+ function crc32(bytes, init = 0xFFFFFFFF) {
463
+ let crc = init;
464
+ for (let i = 0; i < bytes.length; i++) {
465
+ crc = crc ^ bytes[i];
466
+ for (let j = 0; j < 8; j++) {
467
+ crc = (crc >>> 1) ^ (0xEDB88320 & -(crc & 1));
468
+ }
469
+ }
470
+ return (crc ^ 0xFFFFFFFF) >>> 0;
471
+ }
472
+ function concatChunks(chunks) {
473
+ let len = 0;
474
+ for (const c of chunks)
475
+ len += c.length;
476
+ const out = new Uint8Array(len);
477
+ let offset = 0;
478
+ for (const c of chunks) {
479
+ out.set(c, offset);
480
+ offset += c.length;
481
+ }
482
+ return out;
483
+ }
484
+ /** @internal — exported for tests only */
485
+ export function parseBedrockEventStream(buf) {
486
+ const frames = [];
487
+ const view = new DataView(buf.buffer, buf.byteOffset, buf.byteLength);
488
+ const decoder = new TextDecoder();
489
+ let offset = 0;
490
+ while (offset + 12 <= buf.length) {
491
+ const totalLen = view.getUint32(offset, false);
492
+ const headersLen = view.getUint32(offset + 4, false);
493
+ if (totalLen < 16 || totalLen > buf.length - offset)
494
+ break;
495
+ const headers = {};
496
+ let h = offset + 12;
497
+ const hEnd = h + headersLen;
498
+ while (h < hEnd) {
499
+ const nameLen = buf[h];
500
+ h += 1;
501
+ if (h + nameLen > hEnd)
502
+ break;
503
+ const name = decoder.decode(buf.subarray(h, h + nameLen));
504
+ h += nameLen;
505
+ const type = buf[h];
506
+ h += 1;
507
+ if (type === 7) {
508
+ // string
509
+ const valueLen = view.getUint16(h, false);
510
+ h += 2;
511
+ headers[name] = decoder.decode(buf.subarray(h, h + valueLen));
512
+ h += valueLen;
513
+ }
514
+ else {
515
+ // Other header types (bool, byte, short, int, long, byte_array, timestamp, uuid) — skip
516
+ const lenByType = { 0: 0, 1: 0, 2: 1, 3: 2, 4: 4, 5: 8, 8: 8, 9: 16 };
517
+ const skipLen = lenByType[type];
518
+ if (skipLen === undefined)
519
+ break;
520
+ h += skipLen;
521
+ }
522
+ }
523
+ const payloadStart = offset + 12 + headersLen;
524
+ const payloadEnd = offset + totalLen - 4;
525
+ if (payloadEnd >= payloadStart) {
526
+ frames.push({ headers, payload: buf.subarray(payloadStart, payloadEnd) });
527
+ }
528
+ offset += totalLen;
529
+ }
530
+ return frames;
531
+ }
532
+ /** @internal — exported for tests only */
533
+ export function encodeBedrockFrame(eventType, payloadObj) {
534
+ const encoder = new TextEncoder();
535
+ const payload = encoder.encode(JSON.stringify(payloadObj));
536
+ const writeHeader = (name, value) => {
537
+ const nameBytes = encoder.encode(name);
538
+ const valueBytes = encoder.encode(value);
539
+ const out = new Uint8Array(1 + nameBytes.length + 1 + 2 + valueBytes.length);
540
+ const dv = new DataView(out.buffer);
541
+ let p = 0;
542
+ out[p++] = nameBytes.length;
543
+ out.set(nameBytes, p);
544
+ p += nameBytes.length;
545
+ out[p++] = 7;
546
+ dv.setUint16(p, valueBytes.length, false);
547
+ p += 2;
548
+ out.set(valueBytes, p);
549
+ return out;
550
+ };
551
+ const headers = concatChunks([
552
+ writeHeader(':event-type', eventType),
553
+ writeHeader(':content-type', 'application/json'),
554
+ writeHeader(':message-type', 'event'),
555
+ ]);
556
+ const totalLen = 12 + headers.length + payload.length + 4;
557
+ const out = new Uint8Array(totalLen);
558
+ const dv = new DataView(out.buffer);
559
+ dv.setUint32(0, totalLen, false);
560
+ dv.setUint32(4, headers.length, false);
561
+ const preludeCrc = crc32(out.subarray(0, 8));
562
+ dv.setUint32(8, preludeCrc, false);
563
+ out.set(headers, 12);
564
+ out.set(payload, 12 + headers.length);
565
+ const msgCrc = crc32(out.subarray(0, totalLen - 4));
566
+ dv.setUint32(totalLen - 4, msgCrc, false);
567
+ return out;
568
+ }
569
+ /** @internal — exported for tests only */
570
+ export async function bufferBedrockEventStream(url, stream) {
571
+ const reader = stream.getReader();
572
+ const chunks = [];
573
+ try {
574
+ for (;;) {
575
+ const { done, value } = await reader.read();
576
+ if (done)
577
+ break;
578
+ chunks.push(value);
579
+ }
580
+ }
581
+ finally {
582
+ reader.releaseLock();
583
+ }
584
+ const buf = concatChunks(chunks);
585
+ const frames = parseBedrockEventStream(buf);
586
+ const kind = bedrockApiKind(url);
587
+ const vendor = bedrockVendor(extractBedrockModelId(url));
588
+ const isInvoke = kind === 'invoke-stream';
589
+ const decoder = new TextDecoder();
590
+ let completion = '';
591
+ let usage;
592
+ for (const frame of frames) {
593
+ let obj;
594
+ try {
595
+ obj = JSON.parse(decoder.decode(frame.payload));
596
+ }
597
+ catch {
598
+ continue;
599
+ }
600
+ const eventType = frame.headers[':event-type'];
601
+ if (isInvoke && typeof obj.bytes === 'string') {
602
+ // InvokeModelWithResponseStream wraps the underlying model's SSE-equivalent
603
+ // JSON event in a base64 'bytes' field per frame.
604
+ try {
605
+ const innerJson = typeof atob === 'function'
606
+ ? atob(obj.bytes)
607
+ : Buffer.from(obj.bytes, 'base64').toString('utf8');
608
+ const decoded = JSON.parse(innerJson);
609
+ if (vendor === 'anthropic') {
610
+ if (decoded.type === 'content_block_delta') {
611
+ const d = decoded.delta;
612
+ if (d?.type === 'text_delta' && typeof d.text === 'string')
613
+ completion += d.text;
614
+ }
615
+ if (decoded.type === 'message_delta') {
616
+ const u = decoded.usage;
617
+ if (u)
618
+ usage = extractUsage('anthropic', { usage: u }) ?? usage;
619
+ }
620
+ if (decoded.type === 'message_start') {
621
+ const msg = decoded.message;
622
+ if (msg?.usage)
623
+ usage = extractUsage('anthropic', { usage: msg.usage }) ?? usage;
624
+ }
625
+ }
626
+ else {
627
+ // Best-effort generic
628
+ if (typeof decoded.outputText === 'string')
629
+ completion += decoded.outputText;
630
+ if (typeof decoded.generation === 'string')
631
+ completion += decoded.generation;
632
+ }
633
+ }
634
+ catch { /* skip */ }
635
+ continue;
636
+ }
637
+ // Converse-stream: payload IS the event body; the discriminator lives
638
+ // in the :event-type header. Some SDK serialisations also nest the body
639
+ // under a key matching the event type — handle both shapes.
640
+ if (eventType === 'contentBlockDelta' || obj.contentBlockDelta) {
641
+ const body = obj.contentBlockDelta ?? obj;
642
+ const delta = body.delta;
643
+ if (typeof delta?.text === 'string')
644
+ completion += delta.text;
645
+ }
646
+ if (eventType === 'metadata' || obj.metadata) {
647
+ const body = obj.metadata ?? obj;
648
+ const u = body.usage;
649
+ if (u) {
650
+ usage = { inputTokens: u.inputTokens, outputTokens: u.outputTokens, totalTokens: u.totalTokens ?? (u.inputTokens ?? 0) + (u.outputTokens ?? 0) };
651
+ }
652
+ }
653
+ }
654
+ return { completion, usage };
655
+ }
656
+ /** Content-Type for a synthesized streaming response, per provider. */
657
+ function streamContentType(provider) {
658
+ if (provider === 'gemini')
659
+ return 'application/json';
660
+ if (provider === 'bedrock')
661
+ return 'application/vnd.amazon.eventstream';
662
+ return 'text/event-stream';
663
+ }
664
+ /**
665
+ * Unified streaming buffer: dispatches to the binary Bedrock eventstream parser
666
+ * or the text SSE/NDJSON parser based on provider.
667
+ */
668
+ async function bufferAIStream(provider, url, stream) {
669
+ if (provider === 'bedrock')
670
+ return bufferBedrockEventStream(url, stream);
671
+ const completion = await bufferSSEStream(provider, stream);
672
+ return { completion };
673
+ }
268
674
  /** Buffer a streaming SSE/NDJSON response to extract the completion text */
269
675
  async function bufferSSEStream(provider, stream) {
270
676
  const decoder = new TextDecoder();
@@ -387,7 +793,7 @@ function extractStreamUsage(provider, rawSSE) {
387
793
  return undefined;
388
794
  }
389
795
  /** Build a minimal non-streaming JSON response body from a completion string (for replay) */
390
- function synthesizeCompletionJSON(provider, completion) {
796
+ function synthesizeCompletionJSON(provider, completion, url) {
391
797
  if (provider === 'gemini') {
392
798
  return {
393
799
  candidates: [{ content: { parts: [{ text: completion }], role: 'model' }, finishReason: 'STOP' }],
@@ -403,6 +809,29 @@ function synthesizeCompletionJSON(provider, completion) {
403
809
  stop_sequence: null,
404
810
  };
405
811
  }
812
+ if (provider === 'bedrock') {
813
+ const kind = url ? bedrockApiKind(url) : undefined;
814
+ const vendor = url ? bedrockVendor(extractBedrockModelId(url)) : 'unknown';
815
+ if (kind === 'converse' || kind === 'converse-stream') {
816
+ return {
817
+ output: { message: { role: 'assistant', content: [{ text: completion }] } },
818
+ stopReason: 'end_turn',
819
+ usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0 },
820
+ };
821
+ }
822
+ if (vendor === 'anthropic') {
823
+ return {
824
+ id: 'replay',
825
+ type: 'message',
826
+ role: 'assistant',
827
+ content: [{ type: 'text', text: completion }],
828
+ stop_reason: 'end_turn',
829
+ stop_sequence: null,
830
+ };
831
+ }
832
+ // Generic fallback
833
+ return { outputText: completion };
834
+ }
406
835
  // OpenAI / Grok / Kimi format
407
836
  return {
408
837
  id: 'replay',
@@ -411,8 +840,43 @@ function synthesizeCompletionJSON(provider, completion) {
411
840
  };
412
841
  }
413
842
  /** Build a minimal SSE/NDJSON ReadableStream from a completion string (for replay) */
414
- function synthesizeSSEStream(provider, completion) {
843
+ function synthesizeSSEStream(provider, completion, url) {
415
844
  const encoder = new TextEncoder();
845
+ if (provider === 'bedrock') {
846
+ const kind = url ? bedrockApiKind(url) : undefined;
847
+ const vendor = url ? bedrockVendor(extractBedrockModelId(url)) : 'unknown';
848
+ return new ReadableStream({
849
+ start(ctrl) {
850
+ if (kind === 'converse-stream') {
851
+ ctrl.enqueue(encodeBedrockFrame('messageStart', { role: 'assistant' }));
852
+ ctrl.enqueue(encodeBedrockFrame('contentBlockDelta', { contentBlockIndex: 0, delta: { text: completion } }));
853
+ ctrl.enqueue(encodeBedrockFrame('contentBlockStop', { contentBlockIndex: 0 }));
854
+ ctrl.enqueue(encodeBedrockFrame('messageStop', { stopReason: 'end_turn' }));
855
+ ctrl.enqueue(encodeBedrockFrame('metadata', { usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0 }, metrics: { latencyMs: 0 } }));
856
+ }
857
+ else if (vendor === 'anthropic') {
858
+ // InvokeModelWithResponseStream wraps the inner Anthropic SSE event JSON in { bytes }
859
+ const wrap = (eventType, inner) => {
860
+ const innerJson = JSON.stringify(inner);
861
+ const b64 = typeof btoa === 'function' ? btoa(innerJson) : Buffer.from(innerJson, 'utf8').toString('base64');
862
+ return encodeBedrockFrame(eventType, { bytes: b64 });
863
+ };
864
+ ctrl.enqueue(wrap('chunk', { type: 'message_start', message: { id: 'replay', type: 'message', role: 'assistant', content: [], stop_reason: null, stop_sequence: null } }));
865
+ ctrl.enqueue(wrap('chunk', { type: 'content_block_start', index: 0, content_block: { type: 'text', text: '' } }));
866
+ ctrl.enqueue(wrap('chunk', { type: 'content_block_delta', index: 0, delta: { type: 'text_delta', text: completion } }));
867
+ ctrl.enqueue(wrap('chunk', { type: 'content_block_stop', index: 0 }));
868
+ ctrl.enqueue(wrap('chunk', { type: 'message_delta', delta: { stop_reason: 'end_turn', stop_sequence: null } }));
869
+ ctrl.enqueue(wrap('chunk', { type: 'message_stop' }));
870
+ }
871
+ else {
872
+ // Generic fallback: one chunk wrapping the completion text
873
+ const b64 = typeof btoa === 'function' ? btoa(JSON.stringify({ outputText: completion })) : Buffer.from(JSON.stringify({ outputText: completion }), 'utf8').toString('base64');
874
+ ctrl.enqueue(encodeBedrockFrame('chunk', { bytes: b64 }));
875
+ }
876
+ ctrl.close();
877
+ },
878
+ });
879
+ }
416
880
  return new ReadableStream({
417
881
  start(ctrl) {
418
882
  if (provider === 'gemini') {
@@ -483,30 +947,42 @@ export function installAIInterceptor() {
483
947
  url, provider, model: capturedModel, messages: capturedMessages,
484
948
  body: capturedReq, promptSnippet: capturedSnippet,
485
949
  };
486
- const isStreaming = capturedReq.stream === true;
950
+ const isStreaming = provider === 'bedrock'
951
+ ? (bedrockApiKind(url) ?? '').endsWith('-stream')
952
+ : capturedReq.stream === true;
487
953
  try {
488
954
  const cloned = response.clone();
489
955
  if (!isStreaming) {
490
956
  // Non-streaming: parse JSON response for usage
491
957
  const responseBody = await cloned.json();
492
- captured.usage = extractUsage(provider, responseBody);
958
+ captured.usage = extractUsage(provider, responseBody, url);
493
959
  }
494
960
  else if (cloned.body) {
495
- // Streaming: read the raw SSE text to extract usage from final events
496
- try {
497
- const decoder = new TextDecoder();
498
- const reader = cloned.body.getReader();
499
- let rawSSE = '';
500
- for (;;) {
501
- const { done, value } = await reader.read();
502
- if (done)
503
- break;
504
- rawSSE += decoder.decode(value, { stream: true });
961
+ if (provider === 'bedrock') {
962
+ // Binary eventstream: parse frames to extract usage
963
+ try {
964
+ const { usage } = await bufferBedrockEventStream(url, cloned.body);
965
+ captured.usage = usage;
505
966
  }
506
- reader.releaseLock();
507
- captured.usage = extractStreamUsage(provider, rawSSE);
967
+ catch { /* parse failed */ }
968
+ }
969
+ else {
970
+ // SSE/NDJSON text: extract usage from final events
971
+ try {
972
+ const decoder = new TextDecoder();
973
+ const reader = cloned.body.getReader();
974
+ let rawSSE = '';
975
+ for (;;) {
976
+ const { done, value } = await reader.read();
977
+ if (done)
978
+ break;
979
+ rawSSE += decoder.decode(value, { stream: true });
980
+ }
981
+ reader.releaseLock();
982
+ captured.usage = extractStreamUsage(provider, rawSSE);
983
+ }
984
+ catch { /* stream read failed */ }
508
985
  }
509
- catch { /* stream read failed */ }
510
986
  }
511
987
  }
512
988
  catch {
@@ -534,17 +1010,28 @@ export function installAIInterceptor() {
534
1010
  if (rawBody && typeof rawBody === 'string') {
535
1011
  const body = JSON.parse(rawBody);
536
1012
  model = extractModel(provider, body, url);
537
- prompt = extractPrompt(provider, body);
538
- isStreaming = body.stream === true;
1013
+ prompt = extractPrompt(provider, body, url);
1014
+ isStreaming = provider === 'bedrock'
1015
+ ? (bedrockApiKind(url) ?? '').endsWith('-stream')
1016
+ : body.stream === true;
539
1017
  // Capture full messages array for rich display in the dashboard
540
1018
  if (Array.isArray(body.messages))
541
1019
  messages = body.messages;
542
1020
  else if (Array.isArray(body.contents))
543
1021
  messages = body.contents; // Gemini
544
1022
  }
1023
+ else if (provider === 'bedrock') {
1024
+ // No body parsed but URL alone is enough to set streaming flag
1025
+ isStreaming = (bedrockApiKind(url) ?? '').endsWith('-stream');
1026
+ model = extractModel(provider, {}, url);
1027
+ }
545
1028
  }
546
1029
  catch {
547
1030
  // Ignore parse errors — still pass through
1031
+ if (provider === 'bedrock') {
1032
+ isStreaming = (bedrockApiKind(url) ?? '').endsWith('-stream');
1033
+ model = extractModel(provider, {}, url);
1034
+ }
548
1035
  }
549
1036
  const ctx = getCaptureContext();
550
1037
  // Observability-only mode: no trace handle, no capture context — record via pushTelemetryEvent
@@ -556,9 +1043,9 @@ export function installAIInterceptor() {
556
1043
  const response = await originalFetch(input, init);
557
1044
  if (isStreaming && response.body) {
558
1045
  const [streamForCaller, streamForRecorder] = response.body.tee();
559
- bufferSSEStream(provider, streamForRecorder).then((completion) => {
1046
+ bufferAIStream(provider, url, streamForRecorder).then(({ completion, usage }) => {
560
1047
  const durationMs = rawDateNow() - start;
561
- pushTelemetryEvent({ id, type: 'ai', name: model, input: eventInput, output: { streamed: true, completion }, timestamp: start, durationMs });
1048
+ pushTelemetryEvent({ id, type: 'ai', name: model, input: eventInput, output: { streamed: true, completion }, timestamp: start, durationMs, ...(usage ? { usage } : {}) });
562
1049
  }).catch(() => {
563
1050
  const durationMs = rawDateNow() - start;
564
1051
  pushTelemetryEvent({ id, type: 'ai', name: model, input: eventInput, output: null, streamed: true, streamRaw: '', timestamp: start, durationMs });
@@ -568,8 +1055,8 @@ export function installAIInterceptor() {
568
1055
  try {
569
1056
  const cloned = response.clone();
570
1057
  const responseBody = await cloned.json();
571
- const completion = extractCompletion(provider, responseBody);
572
- const usage = extractUsage(provider, responseBody);
1058
+ const completion = extractCompletion(provider, responseBody, url);
1059
+ const usage = extractUsage(provider, responseBody, url);
573
1060
  const durationMs = rawDateNow() - start;
574
1061
  const event = {
575
1062
  id, type: 'ai', name: model, input: eventInput, output: { completion },
@@ -594,16 +1081,16 @@ export function installAIInterceptor() {
594
1081
  if (frozen && frozen.type === 'ai') {
595
1082
  pushTelemetryEvent(frozen);
596
1083
  const frozenOutput = frozen.output;
597
- const completion = frozenOutput ? extractCompletion(provider, frozenOutput) : '(replayed)';
1084
+ const completion = frozenOutput ? extractCompletion(provider, frozenOutput, url) : '(replayed)';
598
1085
  if (isStreaming) {
599
- return new Response(synthesizeSSEStream(provider, completion), {
1086
+ return new Response(synthesizeSSEStream(provider, completion, url), {
600
1087
  status: 200,
601
- headers: { 'Content-Type': provider === 'gemini' ? 'application/json' : 'text/event-stream' },
1088
+ headers: { 'Content-Type': streamContentType(provider) },
602
1089
  });
603
1090
  }
604
1091
  const body = frozenOutput?.streamed === true
605
- ? synthesizeCompletionJSON(provider, completion)
606
- : (frozenOutput ?? synthesizeCompletionJSON(provider, completion));
1092
+ ? synthesizeCompletionJSON(provider, completion, url)
1093
+ : (frozenOutput ?? synthesizeCompletionJSON(provider, completion, url));
607
1094
  return new Response(JSON.stringify(body), {
608
1095
  status: 200,
609
1096
  headers: { 'Content-Type': 'application/json' },
@@ -614,9 +1101,9 @@ export function installAIInterceptor() {
614
1101
  const response = await originalFetch(input, init);
615
1102
  if (isStreaming && response.body) {
616
1103
  const [streamForCaller, streamForRecorder] = response.body.tee();
617
- bufferSSEStream(provider, streamForRecorder).then((completion) => {
1104
+ bufferAIStream(provider, url, streamForRecorder).then(({ completion, usage }) => {
618
1105
  const durationMs = rawDateNow() - start;
619
- pushTelemetryEvent({ id, type: 'ai', name: model, input: eventInput, output: { streamed: true, completion }, timestamp: start, durationMs });
1106
+ pushTelemetryEvent({ id, type: 'ai', name: model, input: eventInput, output: { streamed: true, completion }, timestamp: start, durationMs, ...(usage ? { usage } : {}) });
620
1107
  }).catch(() => {
621
1108
  const durationMs = rawDateNow() - start;
622
1109
  pushTelemetryEvent({ id, type: 'ai', name: model, input: eventInput, output: null, streamed: true, streamRaw: '', timestamp: start, durationMs });
@@ -626,8 +1113,8 @@ export function installAIInterceptor() {
626
1113
  try {
627
1114
  const cloned = response.clone();
628
1115
  const responseBody = await cloned.json();
629
- const completion = extractCompletion(provider, responseBody);
630
- const usage = extractUsage(provider, responseBody);
1116
+ const completion = extractCompletion(provider, responseBody, url);
1117
+ const usage = extractUsage(provider, responseBody, url);
631
1118
  const durationMs = rawDateNow() - start;
632
1119
  pushTelemetryEvent({ id, type: 'ai', name: model, input: eventInput, output: { completion }, timestamp: start, durationMs, ...(usage ? { usage } : {}) });
633
1120
  }
@@ -654,18 +1141,18 @@ export function installAIInterceptor() {
654
1141
  if (isReplayMatch && historicalEvent) {
655
1142
  recorder.record(historicalEvent);
656
1143
  const historicalOutput = historicalEvent.output;
657
- const completion = historicalOutput ? extractCompletion(provider, historicalOutput) : '(replayed)';
1144
+ const completion = historicalOutput ? extractCompletion(provider, historicalOutput, url) : '(replayed)';
658
1145
  traceAtCall.recordLLMStep({ model, provider, prompt, completion, workflowEventId: id });
659
1146
  if (isStreaming) {
660
1147
  // Current caller expects a streaming response — always synthesize SSE
661
- return new Response(synthesizeSSEStream(provider, completion), {
1148
+ return new Response(synthesizeSSEStream(provider, completion, url), {
662
1149
  status: 200,
663
- headers: { 'Content-Type': provider === 'gemini' ? 'application/json' : 'text/event-stream' },
1150
+ headers: { 'Content-Type': streamContentType(provider) },
664
1151
  });
665
1152
  }
666
1153
  if (historicalOutput?.streamed === true) {
667
1154
  // Original was streamed but caller now expects JSON — synthesize a completion response
668
- return new Response(JSON.stringify(synthesizeCompletionJSON(provider, completion)), {
1155
+ return new Response(JSON.stringify(synthesizeCompletionJSON(provider, completion, url)), {
669
1156
  status: 200,
670
1157
  headers: { 'Content-Type': 'application/json' },
671
1158
  });
@@ -680,10 +1167,10 @@ export function installAIInterceptor() {
680
1167
  if (isStreaming) {
681
1168
  if (response.body) {
682
1169
  const [streamForCaller, streamForRecorder] = response.body.tee();
683
- recorder.trackAsync(bufferSSEStream(provider, streamForRecorder).then((completion) => {
1170
+ recorder.trackAsync(bufferAIStream(provider, url, streamForRecorder).then(({ completion, usage }) => {
684
1171
  const durationMs = rawDateNow() - start;
685
1172
  traceAtCall.recordLLMStep({ model, provider, prompt, completion, workflowEventId: id, durationMs });
686
- recorder.record({ id, type: 'ai', name: model, input: { url, provider, model, prompt, messages }, output: { streamed: true, completion }, timestamp: start, durationMs });
1173
+ recorder.record({ id, type: 'ai', name: model, input: { url, provider, model, prompt, messages }, output: { streamed: true, completion }, timestamp: start, durationMs, ...(usage ? { usage } : {}) });
687
1174
  }).catch(() => {
688
1175
  const durationMs = rawDateNow() - start;
689
1176
  traceAtCall.recordLLMStep({ model, provider, prompt, completion: '(streamed-error)', workflowEventId: id, durationMs });
@@ -704,9 +1191,9 @@ export function installAIInterceptor() {
704
1191
  try {
705
1192
  const cloned = response.clone();
706
1193
  const responseBody = await cloned.json();
707
- const completion = extractCompletion(provider, responseBody);
708
- const usage = extractUsage(provider, responseBody);
709
- const assistantMessage = extractAssistantMessage(provider, responseBody);
1194
+ const completion = extractCompletion(provider, responseBody, url);
1195
+ const usage = extractUsage(provider, responseBody, url);
1196
+ const assistantMessage = extractAssistantMessage(provider, responseBody, url);
710
1197
  traceAtCall.recordLLMStep({ model, provider, prompt, completion, workflowEventId: id, durationMs });
711
1198
  recorder.record({ id, type: 'ai', name: model, input: { url, provider, model, prompt, messages }, output: assistantMessage ?? responseBody, timestamp: start, durationMs, usage });
712
1199
  }
@@ -723,7 +1210,7 @@ export function installAIInterceptor() {
723
1210
  const response = await originalFetch(input, init);
724
1211
  if (isStreaming && response.body) {
725
1212
  const [streamForCaller, streamForRecorder] = response.body.tee();
726
- bufferSSEStream(provider, streamForRecorder).then((completion) => {
1213
+ bufferAIStream(provider, url, streamForRecorder).then(({ completion }) => {
727
1214
  traceAtCall.recordLLMStep({ model, provider, prompt, completion });
728
1215
  }).catch(() => {
729
1216
  traceAtCall.recordLLMStep({ model, provider, prompt, completion: '(streamed-error)' });
@@ -734,7 +1221,7 @@ export function installAIInterceptor() {
734
1221
  try {
735
1222
  const cloned = response.clone();
736
1223
  const responseBody = await cloned.json();
737
- const completion = extractCompletion(provider, responseBody);
1224
+ const completion = extractCompletion(provider, responseBody, url);
738
1225
  traceAtCall.recordLLMStep({ model, provider, prompt, completion });
739
1226
  }
740
1227
  catch {