elasticdash-sdk 0.2.8 → 0.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -65,7 +65,7 @@ function extractPromptSnippet(body: Record<string, unknown>): string | undefined
65
65
 
66
66
  type UsageInfo = { inputTokens?: number; outputTokens?: number; totalTokens?: number }
67
67
 
68
- function extractUsage(provider: string, body: Record<string, unknown>): UsageInfo | undefined {
68
+ function extractUsage(provider: string, body: Record<string, unknown>, url?: string): UsageInfo | undefined {
69
69
  if (provider === 'openai' || provider === 'grok' || provider === 'kimi') {
70
70
  const u = body.usage as Record<string, number> | undefined
71
71
  if (!u) return undefined
@@ -81,10 +81,25 @@ function extractUsage(provider: string, body: Record<string, unknown>): UsageInf
81
81
  if (!u) return undefined
82
82
  return { inputTokens: u.promptTokenCount, outputTokens: u.candidatesTokenCount, totalTokens: u.totalTokenCount }
83
83
  }
84
+ if (provider === 'bedrock') {
85
+ const kind = url ? bedrockApiKind(url) : undefined
86
+ const vendor = url ? bedrockVendor(extractBedrockModelId(url)) : 'unknown'
87
+ if (kind === 'converse' || kind === 'converse-stream') {
88
+ const u = body.usage as Record<string, number> | undefined
89
+ if (!u) return undefined
90
+ return { inputTokens: u.inputTokens, outputTokens: u.outputTokens, totalTokens: u.totalTokens ?? (u.inputTokens ?? 0) + (u.outputTokens ?? 0) }
91
+ }
92
+ if (vendor === 'anthropic') {
93
+ const u = body.usage as Record<string, number> | undefined
94
+ if (!u) return undefined
95
+ return { inputTokens: u.input_tokens, outputTokens: u.output_tokens, totalTokens: (u.input_tokens ?? 0) + (u.output_tokens ?? 0) }
96
+ }
97
+ return undefined
98
+ }
84
99
  return undefined
85
100
  }
86
101
 
87
- function extractAssistantMessage(provider: string, body: Record<string, unknown>): Record<string, unknown> | null {
102
+ function extractAssistantMessage(provider: string, body: Record<string, unknown>, url?: string): Record<string, unknown> | null {
88
103
  if (provider === 'openai' || provider === 'grok' || provider === 'kimi') {
89
104
  const choices = body.choices
90
105
  if (Array.isArray(choices) && choices.length > 0) {
@@ -105,6 +120,17 @@ function extractAssistantMessage(provider: string, body: Record<string, unknown>
105
120
  if (content && typeof content === 'object') return content as Record<string, unknown>
106
121
  }
107
122
  }
123
+ if (provider === 'bedrock') {
124
+ const kind = url ? bedrockApiKind(url) : undefined
125
+ const vendor = url ? bedrockVendor(extractBedrockModelId(url)) : 'unknown'
126
+ if (kind === 'converse' || kind === 'converse-stream') {
127
+ const output = body.output as Record<string, unknown> | undefined
128
+ const msg = output?.message
129
+ if (msg && typeof msg === 'object') return msg as Record<string, unknown>
130
+ } else if (vendor === 'anthropic' && Array.isArray(body.content)) {
131
+ return { role: 'assistant', content: body.content }
132
+ }
133
+ }
108
134
  return null
109
135
  }
110
136
 
@@ -115,6 +141,7 @@ const AI_PATTERNS: Record<string, RegExp> = {
115
141
  gemini: /https?:\/\/generativelanguage\.googleapis\.com\/.*\/models\/[^\/:]+:(generateContent|streamGenerateContent)/,
116
142
  grok: /https?:\/\/api\.x\.ai\/v1\/(chat\/)?completions/,
117
143
  kimi: /https?:\/\/api\.moonshot\.ai\/v1\/(chat\/)?completions/,
144
+ bedrock: /https?:\/\/bedrock-runtime\.[^./]+\.amazonaws\.com\/model\/[^/]+\/(invoke|invoke-with-response-stream|converse|converse-stream)/,
118
145
  }
119
146
 
120
147
  /** Detect which provider (if any) a URL belongs to */
@@ -125,18 +152,120 @@ function detectProvider(url: string): string | null {
125
152
  return null
126
153
  }
127
154
 
128
- /** Extract model name from request body or URL (for Gemini) */
155
+ /**
156
+ * Bedrock URL routes:
157
+ * POST /model/{modelId}/invoke
158
+ * POST /model/{modelId}/invoke-with-response-stream
159
+ * POST /model/{modelId}/converse
160
+ * POST /model/{modelId}/converse-stream
161
+ */
162
+ type BedrockApiKind = 'invoke' | 'invoke-stream' | 'converse' | 'converse-stream'
163
+
164
+ /** @internal — exported for tests only */
165
+ export function bedrockApiKind(url: string): BedrockApiKind | undefined {
166
+ if (/\/converse-stream(\?|$)/.test(url)) return 'converse-stream'
167
+ if (/\/converse(\?|$)/.test(url)) return 'converse'
168
+ if (/\/invoke-with-response-stream(\?|$)/.test(url)) return 'invoke-stream'
169
+ if (/\/invoke(\?|$)/.test(url)) return 'invoke'
170
+ return undefined
171
+ }
172
+
173
+ /** @internal — exported for tests only */
174
+ export function extractBedrockModelId(url: string): string {
175
+ const m = /\/model\/([^/]+)\//.exec(url)
176
+ if (!m) return 'unknown'
177
+ try { return decodeURIComponent(m[1]) } catch { return m[1] }
178
+ }
179
+
180
+ type BedrockVendor = 'anthropic' | 'amazon' | 'cohere' | 'meta' | 'mistral' | 'ai21' | 'stability' | 'unknown'
181
+
182
+ /** @internal — exported for tests only */
183
+ export function bedrockVendor(modelId: string): BedrockVendor {
184
+ // Strip cross-region inference profile prefixes (us., eu., apac., au., etc.)
185
+ const stripped = modelId.replace(/^(us|eu|apac|au|ap)\./, '')
186
+ const vendor = stripped.split('.')[0]
187
+ switch (vendor) {
188
+ case 'anthropic': return 'anthropic'
189
+ case 'amazon': return 'amazon'
190
+ case 'cohere': return 'cohere'
191
+ case 'meta': return 'meta'
192
+ case 'mistral': return 'mistral'
193
+ case 'ai21': return 'ai21'
194
+ case 'stability': return 'stability'
195
+ default: return 'unknown'
196
+ }
197
+ }
198
+
199
+ /** Extract model name from request body or URL (for Gemini/Bedrock) */
129
200
  function extractModel(provider: string, body: Record<string, unknown>, url: string): string {
130
201
  if (provider === 'gemini') {
131
202
  // URL shape: .../models/gemini-1.5-pro:generateContent
132
203
  const match = /\/models\/([^/:]+):/.exec(url)
133
204
  return match ? match[1] : 'unknown'
134
205
  }
206
+ if (provider === 'bedrock') {
207
+ return extractBedrockModelId(url)
208
+ }
135
209
  return typeof body.model === 'string' ? body.model : 'unknown'
136
210
  }
137
211
 
138
212
  /** Extract prompt text from request body */
139
- function extractPrompt(provider: string, body: Record<string, unknown>): string {
213
+ function extractPrompt(provider: string, body: Record<string, unknown>, url?: string): string {
214
+ if (provider === 'bedrock') {
215
+ const kind = url ? bedrockApiKind(url) : undefined
216
+ const vendor = url ? bedrockVendor(extractBedrockModelId(url)) : 'unknown'
217
+
218
+ if (kind === 'converse' || kind === 'converse-stream') {
219
+ let systemPrefix = ''
220
+ if (Array.isArray(body.system)) {
221
+ systemPrefix = body.system
222
+ .map((b: unknown) => (b && typeof b === 'object' ? String((b as Record<string, unknown>).text ?? '') : String(b)))
223
+ .filter(Boolean)
224
+ .map((t) => `system: ${t}`)
225
+ .join('\n')
226
+ if (systemPrefix) systemPrefix += '\n'
227
+ }
228
+ const messages = body.messages
229
+ if (Array.isArray(messages)) {
230
+ const msgText = messages
231
+ .map((m: unknown) => {
232
+ if (!m || typeof m !== 'object') return String(m)
233
+ const msg = m as Record<string, unknown>
234
+ let content = msg.content
235
+ if (Array.isArray(content)) {
236
+ content = content
237
+ .map((b: unknown) => (b && typeof b === 'object' ? String((b as Record<string, unknown>).text ?? '') : String(b)))
238
+ .filter(Boolean)
239
+ .join('')
240
+ }
241
+ return `${msg.role}: ${content}`
242
+ })
243
+ .join('\n')
244
+ return systemPrefix + msgText
245
+ }
246
+ return systemPrefix
247
+ }
248
+
249
+ if (vendor === 'anthropic') {
250
+ // Same body shape as anthropic — fall through to the anthropic branch below.
251
+ provider = 'anthropic'
252
+ } else {
253
+ // Best-effort for other vendors
254
+ if (typeof body.prompt === 'string') return body.prompt
255
+ if (typeof body.inputText === 'string') return body.inputText
256
+ if (Array.isArray(body.messages)) {
257
+ return body.messages
258
+ .map((m: unknown) => {
259
+ if (!m || typeof m !== 'object') return String(m)
260
+ const msg = m as Record<string, unknown>
261
+ return `${msg.role ?? 'user'}: ${String(msg.content ?? '')}`
262
+ })
263
+ .join('\n')
264
+ }
265
+ return ''
266
+ }
267
+ }
268
+
140
269
  if (provider === 'openai' || provider === 'anthropic' || provider === 'grok' || provider === 'kimi') {
141
270
  let systemPrefix = ''
142
271
  // Anthropic supports a top-level `system` parameter
@@ -215,12 +344,65 @@ function extractPrompt(provider: string, body: Record<string, unknown>): string
215
344
  }
216
345
 
217
346
  /** Extract completion text from response body */
218
- function extractCompletion(provider: string, responseBody: Record<string, unknown>): string {
347
+ function extractCompletion(provider: string, responseBody: Record<string, unknown>, url?: string): string {
219
348
  // Handle buffered streaming format
220
349
  if (responseBody.streamed === true && typeof responseBody.completion === 'string') {
221
350
  return responseBody.completion
222
351
  }
223
352
 
353
+ if (provider === 'bedrock') {
354
+ const kind = url ? bedrockApiKind(url) : undefined
355
+ const vendor = url ? bedrockVendor(extractBedrockModelId(url)) : 'unknown'
356
+
357
+ if (kind === 'converse' || kind === 'converse-stream') {
358
+ const output = responseBody.output as Record<string, unknown> | undefined
359
+ const msg = output?.message as Record<string, unknown> | undefined
360
+ if (msg && Array.isArray(msg.content)) {
361
+ return msg.content
362
+ .map((b: unknown) => (b && typeof b === 'object' ? String((b as Record<string, unknown>).text ?? '') : ''))
363
+ .filter(Boolean)
364
+ .join('')
365
+ }
366
+ return ''
367
+ }
368
+
369
+ if (vendor === 'anthropic') {
370
+ const content = responseBody.content
371
+ if (Array.isArray(content)) {
372
+ return content
373
+ .map((block: unknown) => {
374
+ if (block && typeof block === 'object') {
375
+ const b = block as Record<string, unknown>
376
+ if (b.type === 'text' && typeof b.text === 'string') return b.text
377
+ }
378
+ return ''
379
+ })
380
+ .filter(Boolean)
381
+ .join('')
382
+ }
383
+ return ''
384
+ }
385
+
386
+ // Other vendors — best-effort
387
+ if (typeof responseBody.completion === 'string') return responseBody.completion // legacy Claude
388
+ if (typeof responseBody.outputText === 'string') return responseBody.outputText // Titan
389
+ if (typeof responseBody.generation === 'string') return responseBody.generation // Llama
390
+ if (Array.isArray(responseBody.results)) {
391
+ // Titan / Cohere results array
392
+ return (responseBody.results as unknown[])
393
+ .map((r) => (r && typeof r === 'object' ? String((r as Record<string, unknown>).outputText ?? (r as Record<string, unknown>).text ?? '') : ''))
394
+ .filter(Boolean)
395
+ .join('')
396
+ }
397
+ if (Array.isArray(responseBody.choices)) {
398
+ // Mistral
399
+ const first = (responseBody.choices as unknown[])[0] as Record<string, unknown> | undefined
400
+ const msg = first?.message as Record<string, unknown> | undefined
401
+ if (msg && typeof msg.content === 'string') return msg.content
402
+ }
403
+ return ''
404
+ }
405
+
224
406
  if (provider === 'openai' || provider === 'grok' || provider === 'kimi') {
225
407
  const choices = responseBody.choices
226
408
  if (Array.isArray(choices) && choices.length > 0) {
@@ -280,6 +462,227 @@ function extractCompletion(provider: string, responseBody: Record<string, unknow
280
462
  return ''
281
463
  }
282
464
 
465
+ // ---------------------------------------------------------------------------
466
+ // Bedrock binary eventstream support (application/vnd.amazon.eventstream)
467
+ // ---------------------------------------------------------------------------
468
+ //
469
+ // Wire format: 12-byte prelude (total_length | headers_length | prelude_crc),
470
+ // headers TLV block, JSON payload, 4-byte message CRC. We ignore CRCs on read
471
+ // (we are observing, not validating). On write we compute CRC-32 so that the
472
+ // synthesized stream is acceptable to strict consumers like the AWS SDK.
473
+
474
+ function crc32(bytes: Uint8Array, init = 0xFFFFFFFF): number {
475
+ let crc = init
476
+ for (let i = 0; i < bytes.length; i++) {
477
+ crc = crc ^ bytes[i]
478
+ for (let j = 0; j < 8; j++) {
479
+ crc = (crc >>> 1) ^ (0xEDB88320 & -(crc & 1))
480
+ }
481
+ }
482
+ return (crc ^ 0xFFFFFFFF) >>> 0
483
+ }
484
+
485
+ function concatChunks(chunks: Uint8Array[]): Uint8Array {
486
+ let len = 0
487
+ for (const c of chunks) len += c.length
488
+ const out = new Uint8Array(len)
489
+ let offset = 0
490
+ for (const c of chunks) { out.set(c, offset); offset += c.length }
491
+ return out
492
+ }
493
+
494
+ interface BedrockFrame { headers: Record<string, string>; payload: Uint8Array }
495
+
496
+ /** @internal — exported for tests only */
497
+ export function parseBedrockEventStream(buf: Uint8Array): BedrockFrame[] {
498
+ const frames: BedrockFrame[] = []
499
+ const view = new DataView(buf.buffer, buf.byteOffset, buf.byteLength)
500
+ const decoder = new TextDecoder()
501
+ let offset = 0
502
+ while (offset + 12 <= buf.length) {
503
+ const totalLen = view.getUint32(offset, false)
504
+ const headersLen = view.getUint32(offset + 4, false)
505
+ if (totalLen < 16 || totalLen > buf.length - offset) break
506
+
507
+ const headers: Record<string, string> = {}
508
+ let h = offset + 12
509
+ const hEnd = h + headersLen
510
+ while (h < hEnd) {
511
+ const nameLen = buf[h]; h += 1
512
+ if (h + nameLen > hEnd) break
513
+ const name = decoder.decode(buf.subarray(h, h + nameLen)); h += nameLen
514
+ const type = buf[h]; h += 1
515
+ if (type === 7) {
516
+ // string
517
+ const valueLen = view.getUint16(h, false); h += 2
518
+ headers[name] = decoder.decode(buf.subarray(h, h + valueLen))
519
+ h += valueLen
520
+ } else {
521
+ // Other header types (bool, byte, short, int, long, byte_array, timestamp, uuid) — skip
522
+ const lenByType: Record<number, number> = { 0: 0, 1: 0, 2: 1, 3: 2, 4: 4, 5: 8, 8: 8, 9: 16 }
523
+ const skipLen = lenByType[type]
524
+ if (skipLen === undefined) break
525
+ h += skipLen
526
+ }
527
+ }
528
+
529
+ const payloadStart = offset + 12 + headersLen
530
+ const payloadEnd = offset + totalLen - 4
531
+ if (payloadEnd >= payloadStart) {
532
+ frames.push({ headers, payload: buf.subarray(payloadStart, payloadEnd) })
533
+ }
534
+ offset += totalLen
535
+ }
536
+ return frames
537
+ }
538
+
539
+ /** @internal — exported for tests only */
540
+ export function encodeBedrockFrame(eventType: string, payloadObj: Record<string, unknown>): Uint8Array {
541
+ const encoder = new TextEncoder()
542
+ const payload = encoder.encode(JSON.stringify(payloadObj))
543
+
544
+ const writeHeader = (name: string, value: string): Uint8Array => {
545
+ const nameBytes = encoder.encode(name)
546
+ const valueBytes = encoder.encode(value)
547
+ const out = new Uint8Array(1 + nameBytes.length + 1 + 2 + valueBytes.length)
548
+ const dv = new DataView(out.buffer)
549
+ let p = 0
550
+ out[p++] = nameBytes.length
551
+ out.set(nameBytes, p); p += nameBytes.length
552
+ out[p++] = 7
553
+ dv.setUint16(p, valueBytes.length, false); p += 2
554
+ out.set(valueBytes, p)
555
+ return out
556
+ }
557
+
558
+ const headers = concatChunks([
559
+ writeHeader(':event-type', eventType),
560
+ writeHeader(':content-type', 'application/json'),
561
+ writeHeader(':message-type', 'event'),
562
+ ])
563
+
564
+ const totalLen = 12 + headers.length + payload.length + 4
565
+ const out = new Uint8Array(totalLen)
566
+ const dv = new DataView(out.buffer)
567
+ dv.setUint32(0, totalLen, false)
568
+ dv.setUint32(4, headers.length, false)
569
+ const preludeCrc = crc32(out.subarray(0, 8))
570
+ dv.setUint32(8, preludeCrc, false)
571
+ out.set(headers, 12)
572
+ out.set(payload, 12 + headers.length)
573
+ const msgCrc = crc32(out.subarray(0, totalLen - 4))
574
+ dv.setUint32(totalLen - 4, msgCrc, false)
575
+ return out
576
+ }
577
+
578
+ /** @internal — exported for tests only */
579
+ export async function bufferBedrockEventStream(
580
+ url: string,
581
+ stream: ReadableStream<Uint8Array>,
582
+ ): Promise<{ completion: string; usage?: UsageInfo }> {
583
+ const reader = stream.getReader()
584
+ const chunks: Uint8Array[] = []
585
+ try {
586
+ for (;;) {
587
+ const { done, value } = await reader.read()
588
+ if (done) break
589
+ chunks.push(value)
590
+ }
591
+ } finally {
592
+ reader.releaseLock()
593
+ }
594
+ const buf = concatChunks(chunks)
595
+ const frames = parseBedrockEventStream(buf)
596
+
597
+ const kind = bedrockApiKind(url)
598
+ const vendor = bedrockVendor(extractBedrockModelId(url))
599
+ const isInvoke = kind === 'invoke-stream'
600
+ const decoder = new TextDecoder()
601
+
602
+ let completion = ''
603
+ let usage: UsageInfo | undefined
604
+
605
+ for (const frame of frames) {
606
+ let obj: Record<string, unknown>
607
+ try {
608
+ obj = JSON.parse(decoder.decode(frame.payload)) as Record<string, unknown>
609
+ } catch {
610
+ continue
611
+ }
612
+
613
+ const eventType = frame.headers[':event-type']
614
+
615
+ if (isInvoke && typeof obj.bytes === 'string') {
616
+ // InvokeModelWithResponseStream wraps the underlying model's SSE-equivalent
617
+ // JSON event in a base64 'bytes' field per frame.
618
+ try {
619
+ const innerJson = typeof atob === 'function'
620
+ ? atob(obj.bytes)
621
+ : Buffer.from(obj.bytes, 'base64').toString('utf8')
622
+ const decoded = JSON.parse(innerJson) as Record<string, unknown>
623
+ if (vendor === 'anthropic') {
624
+ if (decoded.type === 'content_block_delta') {
625
+ const d = decoded.delta as Record<string, unknown> | undefined
626
+ if (d?.type === 'text_delta' && typeof d.text === 'string') completion += d.text
627
+ }
628
+ if (decoded.type === 'message_delta') {
629
+ const u = (decoded as Record<string, unknown>).usage as Record<string, number> | undefined
630
+ if (u) usage = extractUsage('anthropic', { usage: u }) ?? usage
631
+ }
632
+ if (decoded.type === 'message_start') {
633
+ const msg = (decoded as Record<string, unknown>).message as Record<string, unknown> | undefined
634
+ if (msg?.usage) usage = extractUsage('anthropic', { usage: msg.usage }) ?? usage
635
+ }
636
+ } else {
637
+ // Best-effort generic
638
+ if (typeof decoded.outputText === 'string') completion += decoded.outputText
639
+ if (typeof decoded.generation === 'string') completion += decoded.generation
640
+ }
641
+ } catch { /* skip */ }
642
+ continue
643
+ }
644
+
645
+ // Converse-stream: payload IS the event body; the discriminator lives
646
+ // in the :event-type header. Some SDK serialisations also nest the body
647
+ // under a key matching the event type — handle both shapes.
648
+ if (eventType === 'contentBlockDelta' || obj.contentBlockDelta) {
649
+ const body = (obj.contentBlockDelta as Record<string, unknown> | undefined) ?? obj
650
+ const delta = body.delta as Record<string, unknown> | undefined
651
+ if (typeof delta?.text === 'string') completion += delta.text
652
+ }
653
+ if (eventType === 'metadata' || obj.metadata) {
654
+ const body = (obj.metadata as Record<string, unknown> | undefined) ?? obj
655
+ const u = body.usage as Record<string, number> | undefined
656
+ if (u) {
657
+ usage = { inputTokens: u.inputTokens, outputTokens: u.outputTokens, totalTokens: u.totalTokens ?? (u.inputTokens ?? 0) + (u.outputTokens ?? 0) }
658
+ }
659
+ }
660
+ }
661
+
662
+ return { completion, usage }
663
+ }
664
+
665
+ /** Content-Type for a synthesized streaming response, per provider. */
666
+ function streamContentType(provider: string): string {
667
+ if (provider === 'gemini') return 'application/json'
668
+ if (provider === 'bedrock') return 'application/vnd.amazon.eventstream'
669
+ return 'text/event-stream'
670
+ }
671
+
672
+ /**
673
+ * Unified streaming buffer: dispatches to the binary Bedrock eventstream parser
674
+ * or the text SSE/NDJSON parser based on provider.
675
+ */
676
+ async function bufferAIStream(
677
+ provider: string,
678
+ url: string,
679
+ stream: ReadableStream<Uint8Array>,
680
+ ): Promise<{ completion: string; usage?: UsageInfo }> {
681
+ if (provider === 'bedrock') return bufferBedrockEventStream(url, stream)
682
+ const completion = await bufferSSEStream(provider, stream)
683
+ return { completion }
684
+ }
685
+
283
686
  /** Buffer a streaming SSE/NDJSON response to extract the completion text */
284
687
  async function bufferSSEStream(
285
688
  provider: string,
@@ -397,6 +800,7 @@ function extractStreamUsage(provider: string, rawSSE: string): UsageInfo | undef
397
800
  function synthesizeCompletionJSON(
398
801
  provider: string,
399
802
  completion: string,
803
+ url?: string,
400
804
  ): Record<string, unknown> {
401
805
  if (provider === 'gemini') {
402
806
  return {
@@ -413,6 +817,29 @@ function synthesizeCompletionJSON(
413
817
  stop_sequence: null,
414
818
  }
415
819
  }
820
+ if (provider === 'bedrock') {
821
+ const kind = url ? bedrockApiKind(url) : undefined
822
+ const vendor = url ? bedrockVendor(extractBedrockModelId(url)) : 'unknown'
823
+ if (kind === 'converse' || kind === 'converse-stream') {
824
+ return {
825
+ output: { message: { role: 'assistant', content: [{ text: completion }] } },
826
+ stopReason: 'end_turn',
827
+ usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0 },
828
+ }
829
+ }
830
+ if (vendor === 'anthropic') {
831
+ return {
832
+ id: 'replay',
833
+ type: 'message',
834
+ role: 'assistant',
835
+ content: [{ type: 'text', text: completion }],
836
+ stop_reason: 'end_turn',
837
+ stop_sequence: null,
838
+ }
839
+ }
840
+ // Generic fallback
841
+ return { outputText: completion }
842
+ }
416
843
  // OpenAI / Grok / Kimi format
417
844
  return {
418
845
  id: 'replay',
@@ -425,9 +852,44 @@ function synthesizeCompletionJSON(
425
852
  function synthesizeSSEStream(
426
853
  provider: string,
427
854
  completion: string,
855
+ url?: string,
428
856
  ): ReadableStream<Uint8Array> {
429
857
  const encoder = new TextEncoder()
430
858
 
859
+ if (provider === 'bedrock') {
860
+ const kind = url ? bedrockApiKind(url) : undefined
861
+ const vendor = url ? bedrockVendor(extractBedrockModelId(url)) : 'unknown'
862
+ return new ReadableStream<Uint8Array>({
863
+ start(ctrl) {
864
+ if (kind === 'converse-stream') {
865
+ ctrl.enqueue(encodeBedrockFrame('messageStart', { role: 'assistant' }))
866
+ ctrl.enqueue(encodeBedrockFrame('contentBlockDelta', { contentBlockIndex: 0, delta: { text: completion } }))
867
+ ctrl.enqueue(encodeBedrockFrame('contentBlockStop', { contentBlockIndex: 0 }))
868
+ ctrl.enqueue(encodeBedrockFrame('messageStop', { stopReason: 'end_turn' }))
869
+ ctrl.enqueue(encodeBedrockFrame('metadata', { usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0 }, metrics: { latencyMs: 0 } }))
870
+ } else if (vendor === 'anthropic') {
871
+ // InvokeModelWithResponseStream wraps the inner Anthropic SSE event JSON in { bytes }
872
+ const wrap = (eventType: string, inner: Record<string, unknown>) => {
873
+ const innerJson = JSON.stringify(inner)
874
+ const b64 = typeof btoa === 'function' ? btoa(innerJson) : Buffer.from(innerJson, 'utf8').toString('base64')
875
+ return encodeBedrockFrame(eventType, { bytes: b64 })
876
+ }
877
+ ctrl.enqueue(wrap('chunk', { type: 'message_start', message: { id: 'replay', type: 'message', role: 'assistant', content: [], stop_reason: null, stop_sequence: null } }))
878
+ ctrl.enqueue(wrap('chunk', { type: 'content_block_start', index: 0, content_block: { type: 'text', text: '' } }))
879
+ ctrl.enqueue(wrap('chunk', { type: 'content_block_delta', index: 0, delta: { type: 'text_delta', text: completion } }))
880
+ ctrl.enqueue(wrap('chunk', { type: 'content_block_stop', index: 0 }))
881
+ ctrl.enqueue(wrap('chunk', { type: 'message_delta', delta: { stop_reason: 'end_turn', stop_sequence: null } }))
882
+ ctrl.enqueue(wrap('chunk', { type: 'message_stop' }))
883
+ } else {
884
+ // Generic fallback: one chunk wrapping the completion text
885
+ const b64 = typeof btoa === 'function' ? btoa(JSON.stringify({ outputText: completion })) : Buffer.from(JSON.stringify({ outputText: completion }), 'utf8').toString('base64')
886
+ ctrl.enqueue(encodeBedrockFrame('chunk', { bytes: b64 }))
887
+ }
888
+ ctrl.close()
889
+ },
890
+ })
891
+ }
892
+
431
893
  return new ReadableStream<Uint8Array>({
432
894
  start(ctrl) {
433
895
  if (provider === 'gemini') {
@@ -503,27 +965,37 @@ export function installAIInterceptor(): void {
503
965
  url, provider, model: capturedModel, messages: capturedMessages,
504
966
  body: capturedReq, promptSnippet: capturedSnippet,
505
967
  }
506
- const isStreaming = capturedReq.stream === true
968
+ const isStreaming = provider === 'bedrock'
969
+ ? (bedrockApiKind(url) ?? '').endsWith('-stream')
970
+ : capturedReq.stream === true
507
971
  try {
508
972
  const cloned = response.clone()
509
973
  if (!isStreaming) {
510
974
  // Non-streaming: parse JSON response for usage
511
975
  const responseBody = await cloned.json() as Record<string, unknown>
512
- captured.usage = extractUsage(provider, responseBody)
976
+ captured.usage = extractUsage(provider, responseBody, url)
513
977
  } else if (cloned.body) {
514
- // Streaming: read the raw SSE text to extract usage from final events
515
- try {
516
- const decoder = new TextDecoder()
517
- const reader = cloned.body.getReader()
518
- let rawSSE = ''
519
- for (;;) {
520
- const { done, value } = await reader.read()
521
- if (done) break
522
- rawSSE += decoder.decode(value, { stream: true })
523
- }
524
- reader.releaseLock()
525
- captured.usage = extractStreamUsage(provider, rawSSE)
526
- } catch { /* stream read failed */ }
978
+ if (provider === 'bedrock') {
979
+ // Binary eventstream: parse frames to extract usage
980
+ try {
981
+ const { usage } = await bufferBedrockEventStream(url, cloned.body)
982
+ captured.usage = usage
983
+ } catch { /* parse failed */ }
984
+ } else {
985
+ // SSE/NDJSON text: extract usage from final events
986
+ try {
987
+ const decoder = new TextDecoder()
988
+ const reader = cloned.body.getReader()
989
+ let rawSSE = ''
990
+ for (;;) {
991
+ const { done, value } = await reader.read()
992
+ if (done) break
993
+ rawSSE += decoder.decode(value, { stream: true })
994
+ }
995
+ reader.releaseLock()
996
+ captured.usage = extractStreamUsage(provider, rawSSE)
997
+ } catch { /* stream read failed */ }
998
+ }
527
999
  }
528
1000
  } catch {
529
1001
  // Response body not available — usage won't be captured
@@ -555,14 +1027,24 @@ export function installAIInterceptor(): void {
555
1027
  if (rawBody && typeof rawBody === 'string') {
556
1028
  const body = JSON.parse(rawBody) as Record<string, unknown>
557
1029
  model = extractModel(provider, body, url)
558
- prompt = extractPrompt(provider, body)
559
- isStreaming = body.stream === true
1030
+ prompt = extractPrompt(provider, body, url)
1031
+ isStreaming = provider === 'bedrock'
1032
+ ? (bedrockApiKind(url) ?? '').endsWith('-stream')
1033
+ : body.stream === true
560
1034
  // Capture full messages array for rich display in the dashboard
561
1035
  if (Array.isArray(body.messages)) messages = body.messages
562
1036
  else if (Array.isArray(body.contents)) messages = body.contents // Gemini
1037
+ } else if (provider === 'bedrock') {
1038
+ // No body parsed but URL alone is enough to set streaming flag
1039
+ isStreaming = (bedrockApiKind(url) ?? '').endsWith('-stream')
1040
+ model = extractModel(provider, {}, url)
563
1041
  }
564
1042
  } catch {
565
1043
  // Ignore parse errors — still pass through
1044
+ if (provider === 'bedrock') {
1045
+ isStreaming = (bedrockApiKind(url) ?? '').endsWith('-stream')
1046
+ model = extractModel(provider, {}, url)
1047
+ }
566
1048
  }
567
1049
 
568
1050
  const ctx = getCaptureContext()
@@ -578,9 +1060,9 @@ export function installAIInterceptor(): void {
578
1060
 
579
1061
  if (isStreaming && response.body) {
580
1062
  const [streamForCaller, streamForRecorder] = response.body.tee()
581
- bufferSSEStream(provider, streamForRecorder).then((completion) => {
1063
+ bufferAIStream(provider, url, streamForRecorder).then(({ completion, usage }) => {
582
1064
  const durationMs = rawDateNow() - start
583
- pushTelemetryEvent({ id, type: 'ai', name: model, input: eventInput, output: { streamed: true, completion }, timestamp: start, durationMs })
1065
+ pushTelemetryEvent({ id, type: 'ai', name: model, input: eventInput, output: { streamed: true, completion }, timestamp: start, durationMs, ...(usage ? { usage } : {}) })
584
1066
  }).catch(() => {
585
1067
  const durationMs = rawDateNow() - start
586
1068
  pushTelemetryEvent({ id, type: 'ai', name: model, input: eventInput, output: null, streamed: true, streamRaw: '', timestamp: start, durationMs })
@@ -591,8 +1073,8 @@ export function installAIInterceptor(): void {
591
1073
  try {
592
1074
  const cloned = response.clone()
593
1075
  const responseBody = await cloned.json() as Record<string, unknown>
594
- const completion = extractCompletion(provider, responseBody)
595
- const usage = extractUsage(provider, responseBody)
1076
+ const completion = extractCompletion(provider, responseBody, url)
1077
+ const usage = extractUsage(provider, responseBody, url)
596
1078
  const durationMs = rawDateNow() - start
597
1079
  const event: WorkflowEvent = {
598
1080
  id, type: 'ai', name: model, input: eventInput, output: { completion },
@@ -619,18 +1101,18 @@ export function installAIInterceptor(): void {
619
1101
  if (frozen && frozen.type === 'ai') {
620
1102
  pushTelemetryEvent(frozen)
621
1103
  const frozenOutput = frozen.output as Record<string, unknown> | null
622
- const completion = frozenOutput ? extractCompletion(provider, frozenOutput) : '(replayed)'
1104
+ const completion = frozenOutput ? extractCompletion(provider, frozenOutput, url) : '(replayed)'
623
1105
 
624
1106
  if (isStreaming) {
625
- return new Response(synthesizeSSEStream(provider, completion), {
1107
+ return new Response(synthesizeSSEStream(provider, completion, url), {
626
1108
  status: 200,
627
- headers: { 'Content-Type': provider === 'gemini' ? 'application/json' : 'text/event-stream' },
1109
+ headers: { 'Content-Type': streamContentType(provider) },
628
1110
  })
629
1111
  }
630
1112
 
631
1113
  const body = frozenOutput?.streamed === true
632
- ? synthesizeCompletionJSON(provider, completion)
633
- : (frozenOutput ?? synthesizeCompletionJSON(provider, completion))
1114
+ ? synthesizeCompletionJSON(provider, completion, url)
1115
+ : (frozenOutput ?? synthesizeCompletionJSON(provider, completion, url))
634
1116
  return new Response(JSON.stringify(body), {
635
1117
  status: 200,
636
1118
  headers: { 'Content-Type': 'application/json' },
@@ -643,9 +1125,9 @@ export function installAIInterceptor(): void {
643
1125
 
644
1126
  if (isStreaming && response.body) {
645
1127
  const [streamForCaller, streamForRecorder] = response.body.tee()
646
- bufferSSEStream(provider, streamForRecorder).then((completion) => {
1128
+ bufferAIStream(provider, url, streamForRecorder).then(({ completion, usage }) => {
647
1129
  const durationMs = rawDateNow() - start
648
- pushTelemetryEvent({ id, type: 'ai', name: model, input: eventInput, output: { streamed: true, completion }, timestamp: start, durationMs })
1130
+ pushTelemetryEvent({ id, type: 'ai', name: model, input: eventInput, output: { streamed: true, completion }, timestamp: start, durationMs, ...(usage ? { usage } : {}) })
649
1131
  }).catch(() => {
650
1132
  const durationMs = rawDateNow() - start
651
1133
  pushTelemetryEvent({ id, type: 'ai', name: model, input: eventInput, output: null, streamed: true, streamRaw: '', timestamp: start, durationMs })
@@ -656,8 +1138,8 @@ export function installAIInterceptor(): void {
656
1138
  try {
657
1139
  const cloned = response.clone()
658
1140
  const responseBody = await cloned.json() as Record<string, unknown>
659
- const completion = extractCompletion(provider, responseBody)
660
- const usage = extractUsage(provider, responseBody)
1141
+ const completion = extractCompletion(provider, responseBody, url)
1142
+ const usage = extractUsage(provider, responseBody, url)
661
1143
  const durationMs = rawDateNow() - start
662
1144
  pushTelemetryEvent({ id, type: 'ai', name: model, input: eventInput, output: { completion }, timestamp: start, durationMs, ...(usage ? { usage } : {}) })
663
1145
  } catch {
@@ -687,20 +1169,20 @@ export function installAIInterceptor(): void {
687
1169
  if (isReplayMatch && historicalEvent) {
688
1170
  recorder.record(historicalEvent)
689
1171
  const historicalOutput = historicalEvent.output as Record<string, unknown> | null
690
- const completion = historicalOutput ? extractCompletion(provider, historicalOutput) : '(replayed)'
1172
+ const completion = historicalOutput ? extractCompletion(provider, historicalOutput, url) : '(replayed)'
691
1173
  traceAtCall.recordLLMStep({ model, provider, prompt, completion, workflowEventId: id })
692
1174
 
693
1175
  if (isStreaming) {
694
1176
  // Current caller expects a streaming response — always synthesize SSE
695
- return new Response(synthesizeSSEStream(provider, completion), {
1177
+ return new Response(synthesizeSSEStream(provider, completion, url), {
696
1178
  status: 200,
697
- headers: { 'Content-Type': provider === 'gemini' ? 'application/json' : 'text/event-stream' },
1179
+ headers: { 'Content-Type': streamContentType(provider) },
698
1180
  })
699
1181
  }
700
1182
 
701
1183
  if (historicalOutput?.streamed === true) {
702
1184
  // Original was streamed but caller now expects JSON — synthesize a completion response
703
- return new Response(JSON.stringify(synthesizeCompletionJSON(provider, completion)), {
1185
+ return new Response(JSON.stringify(synthesizeCompletionJSON(provider, completion, url)), {
704
1186
  status: 200,
705
1187
  headers: { 'Content-Type': 'application/json' },
706
1188
  })
@@ -722,10 +1204,10 @@ export function installAIInterceptor(): void {
722
1204
  if (response.body) {
723
1205
  const [streamForCaller, streamForRecorder] = response.body.tee()
724
1206
  recorder.trackAsync(
725
- bufferSSEStream(provider, streamForRecorder).then((completion) => {
1207
+ bufferAIStream(provider, url, streamForRecorder).then(({ completion, usage }) => {
726
1208
  const durationMs = rawDateNow() - start
727
1209
  traceAtCall.recordLLMStep({ model, provider, prompt, completion, workflowEventId: id, durationMs })
728
- recorder.record({ id, type: 'ai', name: model, input: { url, provider, model, prompt, messages }, output: { streamed: true, completion }, timestamp: start, durationMs })
1210
+ recorder.record({ id, type: 'ai', name: model, input: { url, provider, model, prompt, messages }, output: { streamed: true, completion }, timestamp: start, durationMs, ...(usage ? { usage } : {}) })
729
1211
  }).catch(() => {
730
1212
  const durationMs = rawDateNow() - start
731
1213
  traceAtCall.recordLLMStep({ model, provider, prompt, completion: '(streamed-error)', workflowEventId: id, durationMs })
@@ -745,9 +1227,9 @@ export function installAIInterceptor(): void {
745
1227
  try {
746
1228
  const cloned = response.clone()
747
1229
  const responseBody = await cloned.json() as Record<string, unknown>
748
- const completion = extractCompletion(provider, responseBody)
749
- const usage = extractUsage(provider, responseBody)
750
- const assistantMessage = extractAssistantMessage(provider, responseBody)
1230
+ const completion = extractCompletion(provider, responseBody, url)
1231
+ const usage = extractUsage(provider, responseBody, url)
1232
+ const assistantMessage = extractAssistantMessage(provider, responseBody, url)
751
1233
  traceAtCall.recordLLMStep({ model, provider, prompt, completion, workflowEventId: id, durationMs })
752
1234
  recorder.record({ id, type: 'ai', name: model, input: { url, provider, model, prompt, messages }, output: assistantMessage ?? responseBody, timestamp: start, durationMs, usage })
753
1235
  } catch {
@@ -766,7 +1248,7 @@ export function installAIInterceptor(): void {
766
1248
 
767
1249
  if (isStreaming && response.body) {
768
1250
  const [streamForCaller, streamForRecorder] = response.body.tee()
769
- bufferSSEStream(provider, streamForRecorder).then((completion) => {
1251
+ bufferAIStream(provider, url, streamForRecorder).then(({ completion }) => {
770
1252
  traceAtCall.recordLLMStep({ model, provider, prompt, completion })
771
1253
  }).catch(() => {
772
1254
  traceAtCall.recordLLMStep({ model, provider, prompt, completion: '(streamed-error)' })
@@ -776,7 +1258,7 @@ export function installAIInterceptor(): void {
776
1258
  try {
777
1259
  const cloned = response.clone()
778
1260
  const responseBody = await cloned.json() as Record<string, unknown>
779
- const completion = extractCompletion(provider, responseBody)
1261
+ const completion = extractCompletion(provider, responseBody, url)
780
1262
  traceAtCall.recordLLMStep({ model, provider, prompt, completion })
781
1263
  } catch {
782
1264
  traceAtCall.recordLLMStep({ model, provider, prompt, completion: '' })