bedrock-wrapper 2.4.5 โ†’ 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,6 +1,17 @@
1
1
  # Changelog
2
2
  All notable changes to this project will be documented in this file.
3
3
 
4
+ ## [2.5.0] - 2025-08-12 (Converse API)
5
+ ### Added
6
+ - Support for Converse API (streaming and non-streaming)
7
+
8
+ ### Technical Details
9
+ - **Model Configuration**: All models use standard messages API format
10
+ - **API Compatibility**: Supports OpenAI-style requests
11
+ - **Response Processing**: Automatic reasoning tag handling based on model variant
12
+ - **Streaming Fallback**: Automatic detection and fallback to non-streaming for unsupported models
13
+ - **Testing Coverage**: Full integration with existing test suites and interactive example
14
+
4
15
  ## [2.4.5] - 2025-08-06 (GPT-OSS Models)
5
16
  ### Added
6
17
  - Support for OpenAI GPT-OSS models on AWS Bedrock
package/README.md CHANGED
@@ -48,7 +48,7 @@ Bedrock Wrapper is an npm package that simplifies the integration of existing Op
48
48
  };
49
49
  ```
50
50
 
51
- the `messages` variable should be in openai's role/content format
51
+ the `messages` variable should be in openai's role/content format (not all models support system prompts)
52
52
  ```javascript
53
53
  messages = [
54
54
  {
@@ -98,6 +98,24 @@ Bedrock Wrapper is an npm package that simplifies the integration of existing Op
98
98
  console.log(`\n\completeResponse:\n${completeResponse}\n`); // โ‡  do stuff with the complete response
99
99
  }
100
100
 
101
+ 5. **NEW: Using the Converse API (optional)**
102
+
103
+ You can now optionally use AWS Bedrock's Converse API instead of the Invoke API by passing `useConverseAPI: true` in the options parameter:
104
+ ```javascript
105
+ // Use the Converse API for unified request/response format across all models
106
+ for await (const chunk of bedrockWrapper(awsCreds, openaiChatCompletionsCreateObject, { useConverseAPI: true })) {
107
+ completeResponse += chunk;
108
+ process.stdout.write(chunk);
109
+ }
110
+ ```
111
+
112
+ The Converse API provides:
113
+ - **Consistent API**: Single request/response format across all models
114
+ - **Simplified conversation management**: Better handling of multi-turn conversations
115
+ - **System prompts**: Cleaner separation of system instructions
116
+ - **Tool use support**: Native support for function calling (where supported)
117
+ - **Unified multimodal**: Consistent handling of text and image inputs
118
+
101
119
  ---
102
120
 
103
121
  ### Supported Models
@@ -150,7 +168,7 @@ Please modify the `bedrock_models.js` file and submit a PR ๐Ÿ† or create an Iss
150
168
 
151
169
  ### Image Support
152
170
 
153
- For models with image support (Claude 4 series, Claude 3.7 Sonnet, Claude 3.5 Sonnet, Claude 3 Haiku, Nova Pro, and Nova Lite), you can include images in your messages using the following format:
171
+ For models with image support (Claude 4 series, Claude 3.7 Sonnet, Claude 3.5 Sonnet, Claude 3 Haiku, Nova Pro, and Nova Lite), you can include images in your messages using the following format (not all models support system prompts):
154
172
 
155
173
  ```javascript
156
174
  messages = [
@@ -239,6 +257,44 @@ const result = await bedrockWrapper(awsCreds, {
239
257
 
240
258
  ---
241
259
 
260
+ ### ๐Ÿงช Testing
261
+
262
+ The package includes comprehensive test suites to verify functionality:
263
+
264
+ ```bash
265
+ # Test all models with the Both APIs (Comparison)
266
+ npm run test
267
+
268
+ # Test all models with the Invoke API
269
+ npm run test:invoke
270
+
271
+ # Test all models with the Converse API
272
+ npm run test:converse
273
+
274
+ # Test vision/multimodal capabilities with Both APIs (Comparison)
275
+ npm run test-vision
276
+
277
+ # Test vision/multimodal capabilities with Invoke API
278
+ npm run test-vision:invoke
279
+
280
+ # Test vision/multimodal capabilities with Converse API
281
+ npm run test-vision:converse
282
+
283
+ # Test stop sequences functionality with Both APIs (Comparison)
284
+ npm run test-stop
285
+
286
+ # Test stop sequences functionality with Invoke API
287
+ npm run test-stop:invoke
288
+
289
+ # Test stop sequences functionality with Converse API
290
+ npm run test-stop:converse
291
+
292
+ # Interactive testing
293
+ npm run interactive
294
+ ```
295
+
296
+ ---
297
+
242
298
  ### ๐Ÿ“ข P.S.
243
299
 
244
300
  In case you missed it at the beginning of this doc, for an even easier setup, use the ๐Ÿ”€ [Bedrock Proxy Endpoint](https://github.com/jparkerweb/bedrock-proxy-endpoint) project to spin up your own custom OpenAI server endpoint (using the standard `baseUrl`, and `apiKey` params).
@@ -15,6 +15,7 @@ import { bedrock_models } from "./bedrock-models.js";
15
15
  import {
16
16
  BedrockRuntimeClient,
17
17
  InvokeModelCommand, InvokeModelWithResponseStreamCommand,
18
+ ConverseCommand, ConverseStreamCommand,
18
19
  } from "@aws-sdk/client-bedrock-runtime";
19
20
  // helper functions
20
21
  import {
@@ -81,14 +82,82 @@ function processReasoningTags(text, awsModel) {
81
82
  return text.replace(/<reasoning>[\s\S]*?<\/reasoning>/g, '').trim();
82
83
  }
83
84
 
84
- export async function* bedrockWrapper(awsCreds, openaiChatCompletionsCreateObject, { logging = false } = {} ) {
85
- const { region, accessKeyId, secretAccessKey } = awsCreds;
86
- let { messages, model, max_tokens, stream, temperature, top_p, include_thinking_data, stop, stop_sequences } = openaiChatCompletionsCreateObject;
87
-
88
-
89
- let {awsModelId, awsModel} = findAwsModelWithId(model);
85
+ // Convert messages to Converse API format
86
+ async function convertToConverseFormat(messages) {
87
+ const converseMessages = [];
88
+ let systemPrompts = [];
89
+
90
+ for (const msg of messages) {
91
+ if (msg.role === "system") {
92
+ // System messages are handled separately in Converse API
93
+ if (typeof msg.content === 'string') {
94
+ systemPrompts.push({ text: msg.content });
95
+ } else if (Array.isArray(msg.content)) {
96
+ // Extract text from content array
97
+ const textContent = msg.content
98
+ .filter(item => item.type === 'text')
99
+ .map(item => item.text || item)
100
+ .join('\n');
101
+ if (textContent) {
102
+ systemPrompts.push({ text: textContent });
103
+ }
104
+ }
105
+ } else {
106
+ // Convert user and assistant messages
107
+ let content = [];
108
+
109
+ if (typeof msg.content === 'string') {
110
+ content = [{ text: msg.content }];
111
+ } else if (Array.isArray(msg.content)) {
112
+ for (const item of msg.content) {
113
+ if (item.type === 'text') {
114
+ content.push({ text: item.text || item });
115
+ } else if (item.type === 'image') {
116
+ // Handle image content
117
+ if (item.source && item.source.data) {
118
+ content.push({
119
+ image: {
120
+ format: 'jpeg',
121
+ source: {
122
+ bytes: Buffer.from(item.source.data, 'base64')
123
+ }
124
+ }
125
+ });
126
+ }
127
+ } else if (item.type === 'image_url') {
128
+ // Process image URL to base64
129
+ const processedImage = await processImage(
130
+ typeof item.image_url === 'string' ?
131
+ item.image_url :
132
+ item.image_url.url
133
+ );
134
+ content.push({
135
+ image: {
136
+ format: 'jpeg',
137
+ source: {
138
+ bytes: Buffer.from(processedImage, 'base64')
139
+ }
140
+ }
141
+ });
142
+ }
143
+ }
144
+ }
145
+
146
+ // Only add messages with actual content (Converse API doesn't allow empty content)
147
+ if (content.length > 0) {
148
+ converseMessages.push({
149
+ role: msg.role,
150
+ content: content
151
+ });
152
+ }
153
+ }
154
+ }
155
+
156
+ return { messages: converseMessages, system: systemPrompts };
157
+ }
90
158
 
91
- // cleanup message content before formatting prompt message
159
+ // Process messages for Invoke API (complex model-specific formatting)
160
+ async function processMessagesForInvoke(messages, awsModel) {
92
161
  let message_cleaned = [];
93
162
  let system_message = "";
94
163
 
@@ -149,15 +218,17 @@ export async function* bedrockWrapper(awsCreds, openaiChatCompletionsCreateObjec
149
218
  message_cleaned.push({role: "assistant", content: ""});
150
219
  }
151
220
  }
221
+
222
+ return { message_cleaned, system_message };
223
+ }
152
224
 
153
- let prompt;
154
-
155
- // format prompt message from message array
225
+ // Build prompt for Invoke API (model-specific formatting)
226
+ function buildInvokePrompt(message_cleaned, awsModel) {
156
227
  if (awsModel.messages_api) {
157
228
  // convert message array to prompt object if model supports messages api
158
- prompt = message_cleaned;
229
+ return message_cleaned;
159
230
  } else {
160
- prompt = awsModel.bos_text;
231
+ let prompt = awsModel.bos_text;
161
232
  let eom_text_inserted = false;
162
233
 
163
234
  for (let i = 0; i < message_cleaned.length; i++) {
@@ -213,45 +284,13 @@ export async function* bedrockWrapper(awsCreds, openaiChatCompletionsCreateObjec
213
284
  prompt += `\n${awsModel.eom_text}`;
214
285
  }
215
286
  }
287
+ return prompt;
216
288
  }
289
+ }
217
290
 
218
- // Add logging to see the final prompt
219
- if (logging) {
220
- console.log("\nFinal formatted prompt:", prompt);
221
- }
222
-
223
- let max_gen_tokens = max_tokens <= awsModel.max_supported_response_tokens ? max_tokens : awsModel.max_supported_response_tokens;
224
-
225
- if (awsModel.special_request_schema?.thinking?.type === "enabled") {
226
- // temperature may only be set to 1 when thinking is enabled
227
- temperature = 1;
228
- // top_p must be unset when thinking is enabled
229
- top_p = undefined;
230
- // bugget_tokens can not be greater than 80% of max_gen_tokens
231
- let budget_tokens = awsModel.special_request_schema?.thinking?.budget_tokens;
232
- if (budget_tokens > (max_gen_tokens * 0.8)) {
233
- budget_tokens = Math.floor(max_gen_tokens * 0.8);
234
- }
235
- if (budget_tokens < 1024) {
236
- budget_tokens = 1024;
237
- }
238
- // if awsModel.special_request_schema?.thinking?.budget_tokens, set it to budget_tokens
239
- if (awsModel.special_request_schema?.thinking?.budget_tokens) {
240
- awsModel.special_request_schema.thinking.budget_tokens = budget_tokens;
241
- // max_gen_tokens has to be greater than budget_tokens
242
- if (max_gen_tokens <= budget_tokens) {
243
- // make max_gen_tokens 20% greater than budget_tokens
244
- max_gen_tokens = Math.floor(budget_tokens * 1.2);
245
- }
246
- }
247
- }
248
-
249
- // if (logging) {
250
- // console.log("\nMax tokens:", max_gen_tokens);
251
- // }
252
-
253
- // Format the request payload using the model's native structure.
254
- const request = awsModel.messages_api ? (() => {
291
+ // Build request object for Invoke API (model-specific)
292
+ function buildInvokeRequest(prompt, awsModel, max_gen_tokens, temperature, top_p, stop_sequences, stop, system_message) {
293
+ if (awsModel.messages_api) {
255
294
  // Check if this is a Nova model (has schemaVersion in special_request_schema)
256
295
  if (awsModel.special_request_schema?.schemaVersion === "messages-v1") {
257
296
  // Nova model format - convert messages to Nova's expected format
@@ -323,47 +362,35 @@ export async function* bedrockWrapper(awsCreds, openaiChatCompletionsCreateObjec
323
362
  ...awsModel.special_request_schema
324
363
  };
325
364
  }
326
- })() : {
327
- prompt: typeof prompt === 'string' ? prompt : {
328
- messages: prompt.map(msg => ({
329
- role: msg.role,
330
- content: Array.isArray(msg.content) ?
331
- msg.content.map(item =>
332
- item.type === 'text' ? item.text : item
333
- ).join('\n') :
334
- msg.content
335
- }))
336
- },
337
- // Optional inference parameters:
338
- [awsModel.max_tokens_param_name]: max_gen_tokens,
339
- temperature: temperature,
340
- top_p: top_p,
341
- ...(() => {
342
- const stopSequencesValue = stop_sequences || stop;
343
- return awsModel.stop_sequences_param_name && stopSequencesValue ? {
344
- [awsModel.stop_sequences_param_name]: Array.isArray(stopSequencesValue) ? stopSequencesValue : [stopSequencesValue]
345
- } : {};
346
- })(),
347
- ...awsModel.special_request_schema
348
- };
349
-
350
- // Create a Bedrock Runtime client in the AWS Region of your choice
351
- const client = new BedrockRuntimeClient({
352
- region: region,
353
- credentials: {
354
- accessKeyId: accessKeyId,
355
- secretAccessKey: secretAccessKey,
356
- },
357
- });
358
-
359
- if (logging) {
360
- console.log("\nFinal request:", JSON.stringify(request, null, 2));
365
+ } else {
366
+ return {
367
+ prompt: typeof prompt === 'string' ? prompt : {
368
+ messages: prompt.map(msg => ({
369
+ role: msg.role,
370
+ content: Array.isArray(msg.content) ?
371
+ msg.content.map(item =>
372
+ item.type === 'text' ? item.text : item
373
+ ).join('\n') :
374
+ msg.content
375
+ }))
376
+ },
377
+ // Optional inference parameters:
378
+ [awsModel.max_tokens_param_name]: max_gen_tokens,
379
+ temperature: temperature,
380
+ top_p: top_p,
381
+ ...(() => {
382
+ const stopSequencesValue = stop_sequences || stop;
383
+ return awsModel.stop_sequences_param_name && stopSequencesValue ? {
384
+ [awsModel.stop_sequences_param_name]: Array.isArray(stopSequencesValue) ? stopSequencesValue : [stopSequencesValue]
385
+ } : {};
386
+ })(),
387
+ ...awsModel.special_request_schema
388
+ };
361
389
  }
390
+ }
362
391
 
363
- // Check if model supports streaming, override stream parameter if not
364
- const modelSupportsStreaming = awsModel.streaming_supported !== false;
365
- const shouldStream = stream && modelSupportsStreaming;
366
-
392
+ // Execute Invoke API call (streaming and non-streaming)
393
+ async function* executeInvokeAPI(client, request, awsModelId, shouldStream, awsModel, include_thinking_data) {
367
394
  if (shouldStream) {
368
395
  const responseStream = await client.send(
369
396
  new InvokeModelWithResponseStreamCommand({
@@ -452,7 +479,245 @@ export async function* bedrockWrapper(awsCreds, openaiChatCompletionsCreateObjec
452
479
  result = "";
453
480
  }
454
481
  yield result;
455
- }
482
+ }
483
+ }
484
+
485
+ export async function* bedrockWrapper(awsCreds, openaiChatCompletionsCreateObject, { logging = false, useConverseAPI = false } = {} ) {
486
+ const { region, accessKeyId, secretAccessKey } = awsCreds;
487
+ let { messages, model, max_tokens, stream, temperature, top_p, include_thinking_data, stop, stop_sequences } = openaiChatCompletionsCreateObject;
488
+
489
+ let {awsModelId, awsModel} = findAwsModelWithId(model);
490
+
491
+ // Create a Bedrock Runtime client
492
+ const client = new BedrockRuntimeClient({
493
+ region: region,
494
+ credentials: {
495
+ accessKeyId: accessKeyId,
496
+ secretAccessKey: secretAccessKey,
497
+ },
498
+ });
499
+
500
+ // Calculate max tokens (shared between both APIs)
501
+ let max_gen_tokens = max_tokens <= awsModel.max_supported_response_tokens ? max_tokens : awsModel.max_supported_response_tokens;
502
+
503
+ // Check if model supports streaming
504
+ const modelSupportsStreaming = awsModel.streaming_supported !== false;
505
+ const shouldStream = stream && modelSupportsStreaming;
506
+
507
+ // ============================
508
+ // CONVERSE API PATH (SIMPLIFIED)
509
+ // ============================
510
+ if (useConverseAPI) {
511
+ // Convert messages to Converse API format (no model-specific complexity)
512
+ const { messages: converseMessages, system: systemPrompts } = await convertToConverseFormat(messages);
513
+
514
+ // Build inference configuration (handle thinking mode for Claude models)
515
+ const inferenceConfig = {
516
+ maxTokens: max_gen_tokens,
517
+ temperature: temperature,
518
+ ...(top_p !== undefined && { topP: top_p })
519
+ };
520
+
521
+ // Handle thinking mode for Claude models
522
+ let budget_tokens;
523
+ if (awsModel.special_request_schema?.thinking?.type === "enabled") {
524
+ // Apply thinking mode constraints for Converse API
525
+ inferenceConfig.temperature = 1; // temperature must be 1 for thinking
526
+ delete inferenceConfig.topP; // top_p must be unset for thinking
527
+
528
+ // Calculate thinking budget configuration
529
+ budget_tokens = awsModel.special_request_schema?.thinking?.budget_tokens;
530
+ if (budget_tokens > (max_gen_tokens * 0.8)) {
531
+ budget_tokens = Math.floor(max_gen_tokens * 0.8);
532
+ }
533
+ if (budget_tokens < 1024) {
534
+ budget_tokens = 1024;
535
+ }
536
+
537
+ // Ensure max tokens is sufficient for thinking
538
+ if (inferenceConfig.maxTokens <= budget_tokens) {
539
+ inferenceConfig.maxTokens = Math.floor(budget_tokens * 1.2);
540
+ }
541
+ }
542
+
543
+ // Add stop sequences if provided (unified format)
544
+ const stopSequencesValue = stop_sequences || stop;
545
+ if (stopSequencesValue) {
546
+ inferenceConfig.stopSequences = Array.isArray(stopSequencesValue) ?
547
+ stopSequencesValue : [stopSequencesValue];
548
+ }
549
+
550
+ // Build the Converse API request (simple, unified format)
551
+ const converseRequest = {
552
+ modelId: awsModelId,
553
+ messages: converseMessages,
554
+ inferenceConfig: inferenceConfig
555
+ };
556
+
557
+ // Add system prompts if any
558
+ if (systemPrompts.length > 0) {
559
+ converseRequest.system = systemPrompts;
560
+ }
561
+
562
+ // Add thinking configuration for Claude models
563
+ if (awsModel.special_request_schema?.thinking?.type === "enabled") {
564
+ converseRequest.additionalModelRequestFields = {
565
+ thinking: {
566
+ type: "enabled",
567
+ budget_tokens: budget_tokens
568
+ }
569
+ };
570
+
571
+ if (awsModel.special_request_schema?.anthropic_beta) {
572
+ converseRequest.additionalModelRequestFields.anthropic_beta = awsModel.special_request_schema.anthropic_beta;
573
+ }
574
+ }
575
+
576
+ if (logging) {
577
+ console.log("\nConverse API request:", JSON.stringify(converseRequest, null, 2));
578
+ }
579
+
580
+ if (shouldStream) {
581
+ // Use ConverseStream for streaming responses
582
+ const responseStream = await client.send(new ConverseStreamCommand(converseRequest));
583
+
584
+ let is_thinking = false;
585
+ let should_think = include_thinking_data && awsModel.special_request_schema?.thinking?.type === "enabled";
586
+
587
+ for await (const event of responseStream.stream) {
588
+ if (event.contentBlockDelta) {
589
+ const text = event.contentBlockDelta.delta?.text;
590
+ const thinking = event.contentBlockDelta.delta?.thinking;
591
+ const reasoningContent = event.contentBlockDelta.delta?.reasoningContent;
592
+
593
+ // Handle Claude thinking data (streaming) - check both reasoningContent and thinking
594
+ const thinkingText = reasoningContent?.reasoningText?.text || thinking;
595
+ if (should_think && thinkingText) {
596
+ if (!is_thinking) {
597
+ is_thinking = true;
598
+ yield `<think>${thinkingText}`;
599
+ } else {
600
+ yield thinkingText;
601
+ }
602
+ }
603
+ // Handle regular text content
604
+ else if (text) {
605
+ // End thinking mode if we were in it
606
+ if (is_thinking) {
607
+ is_thinking = false;
608
+ yield `</think>\n\n${text}`;
609
+ } else {
610
+ // Process reasoning tags for GPT-OSS models only
611
+ const processedText = processReasoningTags(text, awsModel);
612
+ if (processedText) {
613
+ yield processedText;
614
+ }
615
+ }
616
+ }
617
+ }
618
+ }
619
+
620
+ // Close thinking tag if still open
621
+ if (is_thinking) {
622
+ yield "</think>";
623
+ }
624
+ } else {
625
+ // Use Converse for non-streaming responses
626
+ const response = await client.send(new ConverseCommand(converseRequest));
627
+
628
+ if (logging) {
629
+ console.log("\nConverse API response:", JSON.stringify(response, null, 2));
630
+ }
631
+
632
+ // Extract text and thinking from response (handle Claude thinking)
633
+ if (response.output && response.output.message && response.output.message.content) {
634
+ let thinking_result = "";
635
+ let text_result = "";
636
+
637
+ for (const contentBlock of response.output.message.content) {
638
+ // Extract thinking data for Claude models (from reasoningContent)
639
+ if (include_thinking_data && contentBlock.reasoningContent &&
640
+ awsModel.special_request_schema?.thinking?.type === "enabled") {
641
+ const reasoningText = contentBlock.reasoningContent.reasoningText?.text;
642
+ if (reasoningText) {
643
+ thinking_result += reasoningText;
644
+ }
645
+ }
646
+
647
+ // Also check for legacy thinking field format
648
+ if (include_thinking_data && contentBlock.thinking &&
649
+ awsModel.special_request_schema?.thinking?.type === "enabled") {
650
+ thinking_result += contentBlock.thinking;
651
+ }
652
+
653
+ // Extract regular text content
654
+ if (contentBlock.text) {
655
+ text_result += contentBlock.text;
656
+ }
657
+ }
658
+
659
+ // Process reasoning tags for GPT-OSS models
660
+ text_result = processReasoningTags(text_result, awsModel);
661
+
662
+ // Combine thinking and text for Claude models
663
+ let result = thinking_result ? `<think>${thinking_result}</think>\n\n${text_result}` : text_result;
664
+
665
+ if (result) {
666
+ yield result;
667
+ }
668
+ }
669
+ }
670
+ return; // Exit early when using Converse API
671
+ }
672
+
673
+ // ============================
674
+ // INVOKE API PATH (COMPLEX, MODEL-SPECIFIC)
675
+ // ============================
676
+
677
+ // Process messages for Invoke API (complex, model-specific)
678
+ const { message_cleaned, system_message } = await processMessagesForInvoke(messages, awsModel);
679
+
680
+ // Build prompt for Invoke API (complex, model-specific)
681
+ const prompt = buildInvokePrompt(message_cleaned, awsModel);
682
+
683
+ if (logging) {
684
+ console.log("\nFinal formatted prompt:", prompt);
685
+ }
686
+
687
+ // Handle thinking mode adjustments (Invoke API specific)
688
+ if (awsModel.special_request_schema?.thinking?.type === "enabled") {
689
+ // temperature may only be set to 1 when thinking is enabled
690
+ temperature = 1;
691
+ // top_p must be unset when thinking is enabled
692
+ top_p = undefined;
693
+ // budget_tokens can not be greater than 80% of max_gen_tokens
694
+ let budget_tokens = awsModel.special_request_schema?.thinking?.budget_tokens;
695
+ if (budget_tokens > (max_gen_tokens * 0.8)) {
696
+ budget_tokens = Math.floor(max_gen_tokens * 0.8);
697
+ }
698
+ if (budget_tokens < 1024) {
699
+ budget_tokens = 1024;
700
+ }
701
+ // if awsModel.special_request_schema?.thinking?.budget_tokens, set it to budget_tokens
702
+ if (awsModel.special_request_schema?.thinking?.budget_tokens) {
703
+ awsModel.special_request_schema.thinking.budget_tokens = budget_tokens;
704
+ // max_gen_tokens has to be greater than budget_tokens
705
+ if (max_gen_tokens <= budget_tokens) {
706
+ // make max_gen_tokens 20% greater than budget_tokens
707
+ max_gen_tokens = Math.floor(budget_tokens * 1.2);
708
+ }
709
+ }
710
+ }
711
+
712
+ // Build request for Invoke API (complex, model-specific)
713
+ const request = buildInvokeRequest(prompt, awsModel, max_gen_tokens, temperature, top_p, stop_sequences, stop, system_message);
714
+
715
+ if (logging) {
716
+ console.log("\nFinal request:", JSON.stringify(request, null, 2));
717
+ }
718
+
719
+ // Execute Invoke API call (complex, model-specific response parsing)
720
+ yield* executeInvokeAPI(client, request, awsModelId, shouldStream, awsModel, include_thinking_data);
456
721
  }
457
722
 
458
723