bedrock-wrapper 2.4.5 → 2.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -15,6 +15,7 @@ import { bedrock_models } from "./bedrock-models.js";
15
15
  import {
16
16
  BedrockRuntimeClient,
17
17
  InvokeModelCommand, InvokeModelWithResponseStreamCommand,
18
+ ConverseCommand, ConverseStreamCommand,
18
19
  } from "@aws-sdk/client-bedrock-runtime";
19
20
  // helper functions
20
21
  import {
@@ -81,14 +82,82 @@ function processReasoningTags(text, awsModel) {
81
82
  return text.replace(/<reasoning>[\s\S]*?<\/reasoning>/g, '').trim();
82
83
  }
83
84
 
84
- export async function* bedrockWrapper(awsCreds, openaiChatCompletionsCreateObject, { logging = false } = {} ) {
85
- const { region, accessKeyId, secretAccessKey } = awsCreds;
86
- let { messages, model, max_tokens, stream, temperature, top_p, include_thinking_data, stop, stop_sequences } = openaiChatCompletionsCreateObject;
87
-
88
-
89
- let {awsModelId, awsModel} = findAwsModelWithId(model);
85
+ // Convert messages to Converse API format
86
+ async function convertToConverseFormat(messages) {
87
+ const converseMessages = [];
88
+ let systemPrompts = [];
89
+
90
+ for (const msg of messages) {
91
+ if (msg.role === "system") {
92
+ // System messages are handled separately in Converse API
93
+ if (typeof msg.content === 'string') {
94
+ systemPrompts.push({ text: msg.content });
95
+ } else if (Array.isArray(msg.content)) {
96
+ // Extract text from content array
97
+ const textContent = msg.content
98
+ .filter(item => item.type === 'text')
99
+ .map(item => item.text || item)
100
+ .join('\n');
101
+ if (textContent) {
102
+ systemPrompts.push({ text: textContent });
103
+ }
104
+ }
105
+ } else {
106
+ // Convert user and assistant messages
107
+ let content = [];
108
+
109
+ if (typeof msg.content === 'string') {
110
+ content = [{ text: msg.content }];
111
+ } else if (Array.isArray(msg.content)) {
112
+ for (const item of msg.content) {
113
+ if (item.type === 'text') {
114
+ content.push({ text: item.text || item });
115
+ } else if (item.type === 'image') {
116
+ // Handle image content
117
+ if (item.source && item.source.data) {
118
+ content.push({
119
+ image: {
120
+ format: 'jpeg',
121
+ source: {
122
+ bytes: Buffer.from(item.source.data, 'base64')
123
+ }
124
+ }
125
+ });
126
+ }
127
+ } else if (item.type === 'image_url') {
128
+ // Process image URL to base64
129
+ const processedImage = await processImage(
130
+ typeof item.image_url === 'string' ?
131
+ item.image_url :
132
+ item.image_url.url
133
+ );
134
+ content.push({
135
+ image: {
136
+ format: 'jpeg',
137
+ source: {
138
+ bytes: Buffer.from(processedImage, 'base64')
139
+ }
140
+ }
141
+ });
142
+ }
143
+ }
144
+ }
145
+
146
+ // Only add messages with actual content (Converse API doesn't allow empty content)
147
+ if (content.length > 0) {
148
+ converseMessages.push({
149
+ role: msg.role,
150
+ content: content
151
+ });
152
+ }
153
+ }
154
+ }
155
+
156
+ return { messages: converseMessages, system: systemPrompts };
157
+ }
90
158
 
91
- // cleanup message content before formatting prompt message
159
+ // Process messages for Invoke API (complex model-specific formatting)
160
+ async function processMessagesForInvoke(messages, awsModel) {
92
161
  let message_cleaned = [];
93
162
  let system_message = "";
94
163
 
@@ -149,15 +218,17 @@ export async function* bedrockWrapper(awsCreds, openaiChatCompletionsCreateObjec
149
218
  message_cleaned.push({role: "assistant", content: ""});
150
219
  }
151
220
  }
221
+
222
+ return { message_cleaned, system_message };
223
+ }
152
224
 
153
- let prompt;
154
-
155
- // format prompt message from message array
225
+ // Build prompt for Invoke API (model-specific formatting)
226
+ function buildInvokePrompt(message_cleaned, awsModel) {
156
227
  if (awsModel.messages_api) {
157
228
  // convert message array to prompt object if model supports messages api
158
- prompt = message_cleaned;
229
+ return message_cleaned;
159
230
  } else {
160
- prompt = awsModel.bos_text;
231
+ let prompt = awsModel.bos_text;
161
232
  let eom_text_inserted = false;
162
233
 
163
234
  for (let i = 0; i < message_cleaned.length; i++) {
@@ -213,45 +284,64 @@ export async function* bedrockWrapper(awsCreds, openaiChatCompletionsCreateObjec
213
284
  prompt += `\n${awsModel.eom_text}`;
214
285
  }
215
286
  }
287
+ return prompt;
216
288
  }
289
+ }
217
290
 
218
- // Add logging to see the final prompt
219
- if (logging) {
220
- console.log("\nFinal formatted prompt:", prompt);
291
+ // Apply parameter restrictions for models that have them
292
+ function applyParameterRestrictions(params, awsModel) {
293
+ if (!awsModel.parameter_restrictions) {
294
+ return params;
221
295
  }
222
296
 
223
- let max_gen_tokens = max_tokens <= awsModel.max_supported_response_tokens ? max_tokens : awsModel.max_supported_response_tokens;
224
-
225
- if (awsModel.special_request_schema?.thinking?.type === "enabled") {
226
- // temperature may only be set to 1 when thinking is enabled
227
- temperature = 1;
228
- // top_p must be unset when thinking is enabled
229
- top_p = undefined;
230
- // bugget_tokens can not be greater than 80% of max_gen_tokens
231
- let budget_tokens = awsModel.special_request_schema?.thinking?.budget_tokens;
232
- if (budget_tokens > (max_gen_tokens * 0.8)) {
233
- budget_tokens = Math.floor(max_gen_tokens * 0.8);
234
- }
235
- if (budget_tokens < 1024) {
236
- budget_tokens = 1024;
237
- }
238
- // if awsModel.special_request_schema?.thinking?.budget_tokens, set it to budget_tokens
239
- if (awsModel.special_request_schema?.thinking?.budget_tokens) {
240
- awsModel.special_request_schema.thinking.budget_tokens = budget_tokens;
241
- // max_gen_tokens has to be greater than budget_tokens
242
- if (max_gen_tokens <= budget_tokens) {
243
- // make max_gen_tokens 20% greater than budget_tokens
244
- max_gen_tokens = Math.floor(budget_tokens * 1.2);
297
+ const restrictions = awsModel.parameter_restrictions;
298
+
299
+ // Handle mutually exclusive parameters
300
+ if (restrictions.mutually_exclusive) {
301
+ for (const exclusiveGroup of restrictions.mutually_exclusive) {
302
+ // Check for both top_p and topP variants
303
+ const presentParams = exclusiveGroup.filter(param => {
304
+ if (param === 'top_p') {
305
+ return (params['top_p'] !== undefined && params['top_p'] !== null) ||
306
+ (params['topP'] !== undefined && params['topP'] !== null);
307
+ }
308
+ return params[param] !== undefined && params[param] !== null;
309
+ });
310
+
311
+ if (presentParams.length > 1) {
312
+ // Keep the first parameter and remove others
313
+ // For temperature/top_p, prioritize temperature as it's more commonly used
314
+ const priorityOrder = ['temperature', 'top_p'];
315
+ const sortedParams = presentParams.sort((a, b) => {
316
+ const aIndex = priorityOrder.indexOf(a);
317
+ const bIndex = priorityOrder.indexOf(b);
318
+ if (aIndex !== -1 && bIndex !== -1) return aIndex - bIndex;
319
+ if (aIndex !== -1) return -1;
320
+ if (bIndex !== -1) return 1;
321
+ return 0;
322
+ });
323
+
324
+ // Keep the first (highest priority) parameter, remove others
325
+ for (let i = 1; i < sortedParams.length; i++) {
326
+ const paramToRemove = sortedParams[i];
327
+ if (paramToRemove === 'top_p') {
328
+ // Remove both variants
329
+ delete params['top_p'];
330
+ delete params['topP'];
331
+ } else {
332
+ delete params[paramToRemove];
333
+ }
334
+ }
245
335
  }
246
336
  }
247
337
  }
338
+
339
+ return params;
340
+ }
248
341
 
249
- // if (logging) {
250
- // console.log("\nMax tokens:", max_gen_tokens);
251
- // }
252
-
253
- // Format the request payload using the model's native structure.
254
- const request = awsModel.messages_api ? (() => {
342
+ // Build request object for Invoke API (model-specific)
343
+ function buildInvokeRequest(prompt, awsModel, max_gen_tokens, temperature, top_p, stop_sequences, stop, system_message) {
344
+ if (awsModel.messages_api) {
255
345
  // Check if this is a Nova model (has schemaVersion in special_request_schema)
256
346
  if (awsModel.special_request_schema?.schemaVersion === "messages-v1") {
257
347
  // Nova model format - convert messages to Nova's expected format
@@ -289,17 +379,24 @@ export async function* bedrockWrapper(awsCreds, openaiChatCompletionsCreateObjec
289
379
  });
290
380
 
291
381
  const stopSequencesValue = stop_sequences || stop;
382
+
383
+ // Build inference config with parameter restrictions
384
+ let inferenceConfig = {
385
+ [awsModel.max_tokens_param_name]: max_gen_tokens,
386
+ temperature: temperature,
387
+ topP: top_p,
388
+ ...(awsModel.stop_sequences_param_name && stopSequencesValue && {
389
+ [awsModel.stop_sequences_param_name]: Array.isArray(stopSequencesValue) ? stopSequencesValue : [stopSequencesValue]
390
+ })
391
+ };
392
+
393
+ // Apply parameter restrictions
394
+ inferenceConfig = applyParameterRestrictions(inferenceConfig, awsModel);
395
+
292
396
  const novaRequest = {
293
397
  ...awsModel.special_request_schema,
294
398
  messages: novaMessages,
295
- inferenceConfig: {
296
- [awsModel.max_tokens_param_name]: max_gen_tokens,
297
- temperature: temperature,
298
- topP: top_p,
299
- ...(awsModel.stop_sequences_param_name && stopSequencesValue && {
300
- [awsModel.stop_sequences_param_name]: Array.isArray(stopSequencesValue) ? stopSequencesValue : [stopSequencesValue]
301
- })
302
- }
399
+ inferenceConfig: inferenceConfig
303
400
  };
304
401
 
305
402
  // Add system message if present
@@ -311,7 +408,9 @@ export async function* bedrockWrapper(awsCreds, openaiChatCompletionsCreateObjec
311
408
  } else {
312
409
  // Standard messages API format (Claude, etc.)
313
410
  const stopSequencesValue = stop_sequences || stop;
314
- return {
411
+
412
+ // Build request with parameter restrictions
413
+ let request = {
315
414
  messages: prompt,
316
415
  ...(awsModel.system_as_separate_field && system_message && { system: system_message }),
317
416
  [awsModel.max_tokens_param_name]: max_gen_tokens,
@@ -322,48 +421,47 @@ export async function* bedrockWrapper(awsCreds, openaiChatCompletionsCreateObjec
322
421
  }),
323
422
  ...awsModel.special_request_schema
324
423
  };
424
+
425
+ // Apply parameter restrictions
426
+ request = applyParameterRestrictions(request, awsModel);
427
+
428
+ return request;
325
429
  }
326
- })() : {
327
- prompt: typeof prompt === 'string' ? prompt : {
328
- messages: prompt.map(msg => ({
329
- role: msg.role,
330
- content: Array.isArray(msg.content) ?
331
- msg.content.map(item =>
332
- item.type === 'text' ? item.text : item
333
- ).join('\n') :
334
- msg.content
335
- }))
336
- },
337
- // Optional inference parameters:
338
- [awsModel.max_tokens_param_name]: max_gen_tokens,
339
- temperature: temperature,
340
- top_p: top_p,
341
- ...(() => {
342
- const stopSequencesValue = stop_sequences || stop;
343
- return awsModel.stop_sequences_param_name && stopSequencesValue ? {
344
- [awsModel.stop_sequences_param_name]: Array.isArray(stopSequencesValue) ? stopSequencesValue : [stopSequencesValue]
345
- } : {};
346
- })(),
347
- ...awsModel.special_request_schema
348
- };
349
-
350
- // Create a Bedrock Runtime client in the AWS Region of your choice
351
- const client = new BedrockRuntimeClient({
352
- region: region,
353
- credentials: {
354
- accessKeyId: accessKeyId,
355
- secretAccessKey: secretAccessKey,
356
- },
357
- });
358
-
359
- if (logging) {
360
- console.log("\nFinal request:", JSON.stringify(request, null, 2));
430
+ } else {
431
+ // Build request for non-messages API models (Llama, etc.)
432
+ let request = {
433
+ prompt: typeof prompt === 'string' ? prompt : {
434
+ messages: prompt.map(msg => ({
435
+ role: msg.role,
436
+ content: Array.isArray(msg.content) ?
437
+ msg.content.map(item =>
438
+ item.type === 'text' ? item.text : item
439
+ ).join('\n') :
440
+ msg.content
441
+ }))
442
+ },
443
+ // Optional inference parameters:
444
+ [awsModel.max_tokens_param_name]: max_gen_tokens,
445
+ temperature: temperature,
446
+ top_p: top_p,
447
+ ...(() => {
448
+ const stopSequencesValue = stop_sequences || stop;
449
+ return awsModel.stop_sequences_param_name && stopSequencesValue ? {
450
+ [awsModel.stop_sequences_param_name]: Array.isArray(stopSequencesValue) ? stopSequencesValue : [stopSequencesValue]
451
+ } : {};
452
+ })(),
453
+ ...awsModel.special_request_schema
454
+ };
455
+
456
+ // Apply parameter restrictions
457
+ request = applyParameterRestrictions(request, awsModel);
458
+
459
+ return request;
361
460
  }
461
+ }
362
462
 
363
- // Check if model supports streaming, override stream parameter if not
364
- const modelSupportsStreaming = awsModel.streaming_supported !== false;
365
- const shouldStream = stream && modelSupportsStreaming;
366
-
463
+ // Execute Invoke API call (streaming and non-streaming)
464
+ async function* executeInvokeAPI(client, request, awsModelId, shouldStream, awsModel, include_thinking_data) {
367
465
  if (shouldStream) {
368
466
  const responseStream = await client.send(
369
467
  new InvokeModelWithResponseStreamCommand({
@@ -452,7 +550,248 @@ export async function* bedrockWrapper(awsCreds, openaiChatCompletionsCreateObjec
452
550
  result = "";
453
551
  }
454
552
  yield result;
455
- }
553
+ }
554
+ }
555
+
556
+ export async function* bedrockWrapper(awsCreds, openaiChatCompletionsCreateObject, { logging = false, useConverseAPI = false } = {} ) {
557
+ const { region, accessKeyId, secretAccessKey } = awsCreds;
558
+ let { messages, model, max_tokens, stream, temperature, top_p, include_thinking_data, stop, stop_sequences } = openaiChatCompletionsCreateObject;
559
+
560
+ let {awsModelId, awsModel} = findAwsModelWithId(model);
561
+
562
+ // Create a Bedrock Runtime client
563
+ const client = new BedrockRuntimeClient({
564
+ region: region,
565
+ credentials: {
566
+ accessKeyId: accessKeyId,
567
+ secretAccessKey: secretAccessKey,
568
+ },
569
+ });
570
+
571
+ // Calculate max tokens (shared between both APIs)
572
+ let max_gen_tokens = max_tokens <= awsModel.max_supported_response_tokens ? max_tokens : awsModel.max_supported_response_tokens;
573
+
574
+ // Check if model supports streaming
575
+ const modelSupportsStreaming = awsModel.streaming_supported !== false;
576
+ const shouldStream = stream && modelSupportsStreaming;
577
+
578
+ // ============================
579
+ // CONVERSE API PATH (SIMPLIFIED)
580
+ // ============================
581
+ if (useConverseAPI) {
582
+ // Convert messages to Converse API format (no model-specific complexity)
583
+ const { messages: converseMessages, system: systemPrompts } = await convertToConverseFormat(messages);
584
+
585
+ // Build inference configuration (handle thinking mode for Claude models)
586
+ let inferenceConfig = {
587
+ maxTokens: max_gen_tokens,
588
+ temperature: temperature,
589
+ ...(top_p !== undefined && { topP: top_p })
590
+ };
591
+
592
+ // Apply parameter restrictions for Converse API
593
+ inferenceConfig = applyParameterRestrictions(inferenceConfig, awsModel);
594
+
595
+ // Handle thinking mode for Claude models
596
+ let budget_tokens;
597
+ if (awsModel.special_request_schema?.thinking?.type === "enabled") {
598
+ // Apply thinking mode constraints for Converse API
599
+ inferenceConfig.temperature = 1; // temperature must be 1 for thinking
600
+ delete inferenceConfig.topP; // top_p must be unset for thinking
601
+
602
+ // Calculate thinking budget configuration
603
+ budget_tokens = awsModel.special_request_schema?.thinking?.budget_tokens;
604
+ if (budget_tokens > (max_gen_tokens * 0.8)) {
605
+ budget_tokens = Math.floor(max_gen_tokens * 0.8);
606
+ }
607
+ if (budget_tokens < 1024) {
608
+ budget_tokens = 1024;
609
+ }
610
+
611
+ // Ensure max tokens is sufficient for thinking
612
+ if (inferenceConfig.maxTokens <= budget_tokens) {
613
+ inferenceConfig.maxTokens = Math.floor(budget_tokens * 1.2);
614
+ }
615
+ }
616
+
617
+ // Add stop sequences if provided (unified format)
618
+ const stopSequencesValue = stop_sequences || stop;
619
+ if (stopSequencesValue) {
620
+ inferenceConfig.stopSequences = Array.isArray(stopSequencesValue) ?
621
+ stopSequencesValue : [stopSequencesValue];
622
+ }
623
+
624
+ // Build the Converse API request (simple, unified format)
625
+ const converseRequest = {
626
+ modelId: awsModelId,
627
+ messages: converseMessages,
628
+ inferenceConfig: inferenceConfig
629
+ };
630
+
631
+ // Add system prompts if any
632
+ if (systemPrompts.length > 0) {
633
+ converseRequest.system = systemPrompts;
634
+ }
635
+
636
+ // Add thinking configuration for Claude models
637
+ if (awsModel.special_request_schema?.thinking?.type === "enabled") {
638
+ converseRequest.additionalModelRequestFields = {
639
+ thinking: {
640
+ type: "enabled",
641
+ budget_tokens: budget_tokens
642
+ }
643
+ };
644
+
645
+ if (awsModel.special_request_schema?.anthropic_beta) {
646
+ converseRequest.additionalModelRequestFields.anthropic_beta = awsModel.special_request_schema.anthropic_beta;
647
+ }
648
+ }
649
+
650
+ if (logging) {
651
+ console.log("\nConverse API request:", JSON.stringify(converseRequest, null, 2));
652
+ }
653
+
654
+ if (shouldStream) {
655
+ // Use ConverseStream for streaming responses
656
+ const responseStream = await client.send(new ConverseStreamCommand(converseRequest));
657
+
658
+ let is_thinking = false;
659
+ let should_think = include_thinking_data && awsModel.special_request_schema?.thinking?.type === "enabled";
660
+
661
+ for await (const event of responseStream.stream) {
662
+ if (event.contentBlockDelta) {
663
+ const text = event.contentBlockDelta.delta?.text;
664
+ const thinking = event.contentBlockDelta.delta?.thinking;
665
+ const reasoningContent = event.contentBlockDelta.delta?.reasoningContent;
666
+
667
+ // Handle Claude thinking data (streaming) - check both reasoningContent and thinking
668
+ const thinkingText = reasoningContent?.reasoningText?.text || thinking;
669
+ if (should_think && thinkingText) {
670
+ if (!is_thinking) {
671
+ is_thinking = true;
672
+ yield `<think>${thinkingText}`;
673
+ } else {
674
+ yield thinkingText;
675
+ }
676
+ }
677
+ // Handle regular text content
678
+ else if (text) {
679
+ // End thinking mode if we were in it
680
+ if (is_thinking) {
681
+ is_thinking = false;
682
+ yield `</think>\n\n${text}`;
683
+ } else {
684
+ // Process reasoning tags for GPT-OSS models only
685
+ const processedText = processReasoningTags(text, awsModel);
686
+ if (processedText) {
687
+ yield processedText;
688
+ }
689
+ }
690
+ }
691
+ }
692
+ }
693
+
694
+ // Close thinking tag if still open
695
+ if (is_thinking) {
696
+ yield "</think>";
697
+ }
698
+ } else {
699
+ // Use Converse for non-streaming responses
700
+ const response = await client.send(new ConverseCommand(converseRequest));
701
+
702
+ if (logging) {
703
+ console.log("\nConverse API response:", JSON.stringify(response, null, 2));
704
+ }
705
+
706
+ // Extract text and thinking from response (handle Claude thinking)
707
+ if (response.output && response.output.message && response.output.message.content) {
708
+ let thinking_result = "";
709
+ let text_result = "";
710
+
711
+ for (const contentBlock of response.output.message.content) {
712
+ // Extract thinking data for Claude models (from reasoningContent)
713
+ if (include_thinking_data && contentBlock.reasoningContent &&
714
+ awsModel.special_request_schema?.thinking?.type === "enabled") {
715
+ const reasoningText = contentBlock.reasoningContent.reasoningText?.text;
716
+ if (reasoningText) {
717
+ thinking_result += reasoningText;
718
+ }
719
+ }
720
+
721
+ // Also check for legacy thinking field format
722
+ if (include_thinking_data && contentBlock.thinking &&
723
+ awsModel.special_request_schema?.thinking?.type === "enabled") {
724
+ thinking_result += contentBlock.thinking;
725
+ }
726
+
727
+ // Extract regular text content
728
+ if (contentBlock.text) {
729
+ text_result += contentBlock.text;
730
+ }
731
+ }
732
+
733
+ // Process reasoning tags for GPT-OSS models
734
+ text_result = processReasoningTags(text_result, awsModel);
735
+
736
+ // Combine thinking and text for Claude models
737
+ let result = thinking_result ? `<think>${thinking_result}</think>\n\n${text_result}` : text_result;
738
+
739
+ if (result) {
740
+ yield result;
741
+ }
742
+ }
743
+ }
744
+ return; // Exit early when using Converse API
745
+ }
746
+
747
+ // ============================
748
+ // INVOKE API PATH (COMPLEX, MODEL-SPECIFIC)
749
+ // ============================
750
+
751
+ // Process messages for Invoke API (complex, model-specific)
752
+ const { message_cleaned, system_message } = await processMessagesForInvoke(messages, awsModel);
753
+
754
+ // Build prompt for Invoke API (complex, model-specific)
755
+ const prompt = buildInvokePrompt(message_cleaned, awsModel);
756
+
757
+ if (logging) {
758
+ console.log("\nFinal formatted prompt:", prompt);
759
+ }
760
+
761
+ // Handle thinking mode adjustments (Invoke API specific)
762
+ if (awsModel.special_request_schema?.thinking?.type === "enabled") {
763
+ // temperature may only be set to 1 when thinking is enabled
764
+ temperature = 1;
765
+ // top_p must be unset when thinking is enabled
766
+ top_p = undefined;
767
+ // budget_tokens can not be greater than 80% of max_gen_tokens
768
+ let budget_tokens = awsModel.special_request_schema?.thinking?.budget_tokens;
769
+ if (budget_tokens > (max_gen_tokens * 0.8)) {
770
+ budget_tokens = Math.floor(max_gen_tokens * 0.8);
771
+ }
772
+ if (budget_tokens < 1024) {
773
+ budget_tokens = 1024;
774
+ }
775
+ // if awsModel.special_request_schema?.thinking?.budget_tokens, set it to budget_tokens
776
+ if (awsModel.special_request_schema?.thinking?.budget_tokens) {
777
+ awsModel.special_request_schema.thinking.budget_tokens = budget_tokens;
778
+ // max_gen_tokens has to be greater than budget_tokens
779
+ if (max_gen_tokens <= budget_tokens) {
780
+ // make max_gen_tokens 20% greater than budget_tokens
781
+ max_gen_tokens = Math.floor(budget_tokens * 1.2);
782
+ }
783
+ }
784
+ }
785
+
786
+ // Build request for Invoke API (complex, model-specific)
787
+ const request = buildInvokeRequest(prompt, awsModel, max_gen_tokens, temperature, top_p, stop_sequences, stop, system_message);
788
+
789
+ if (logging) {
790
+ console.log("\nFinal request:", JSON.stringify(request, null, 2));
791
+ }
792
+
793
+ // Execute Invoke API call (complex, model-specific response parsing)
794
+ yield* executeInvokeAPI(client, request, awsModelId, shouldStream, awsModel, include_thinking_data);
456
795
  }
457
796
 
458
797