bedrock-wrapper 2.4.5 โ 2.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +11 -0
- package/README.md +58 -2
- package/bedrock-wrapper.js +354 -89
- package/example-converse-api.js +116 -0
- package/interactive-example.js +18 -10
- package/logs/e4cf59ef-9d22-45bf-9c6c-53e3cb9efda3/notification.json +58 -0
- package/logs/e4cf59ef-9d22-45bf-9c6c-53e3cb9efda3/post_tool_use.json +7977 -0
- package/logs/e4cf59ef-9d22-45bf-9c6c-53e3cb9efda3/pre_tool_use.json +2541 -0
- package/logs/e4cf59ef-9d22-45bf-9c6c-53e3cb9efda3/stop.json +86 -0
- package/logs/e4cf59ef-9d22-45bf-9c6c-53e3cb9efda3/user_prompt_submit.json +86 -0
- package/package.json +12 -5
- package/test-converse-api.js +347 -0
- package/test-models.js +96 -20
- package/test-stop-sequences.js +167 -43
- package/test-vision.js +88 -28
- package/logs/7aa436f5-0b5d-44bd-8860-e1a898f87df2/notification.json +0 -65
- package/logs/7aa436f5-0b5d-44bd-8860-e1a898f87df2/post_tool_use.json +0 -5194
- package/logs/7aa436f5-0b5d-44bd-8860-e1a898f87df2/pre_tool_use.json +0 -1919
- package/logs/7aa436f5-0b5d-44bd-8860-e1a898f87df2/stop.json +0 -44
- package/logs/7aa436f5-0b5d-44bd-8860-e1a898f87df2/user_prompt_submit.json +0 -44
package/CHANGELOG.md
CHANGED
|
@@ -1,6 +1,17 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
All notable changes to this project will be documented in this file.
|
|
3
3
|
|
|
4
|
+
## [2.5.0] - 2025-08-12 (Converse API)
|
|
5
|
+
### Added
|
|
6
|
+
- Support for Converse API (streaming and non-streaming)
|
|
7
|
+
|
|
8
|
+
### Technical Details
|
|
9
|
+
- **Model Configuration**: All models use standard messages API format
|
|
10
|
+
- **API Compatibility**: Supports OpenAI-style requests
|
|
11
|
+
- **Response Processing**: Automatic reasoning tag handling based on model variant
|
|
12
|
+
- **Streaming Fallback**: Automatic detection and fallback to non-streaming for unsupported models
|
|
13
|
+
- **Testing Coverage**: Full integration with existing test suites and interactive example
|
|
14
|
+
|
|
4
15
|
## [2.4.5] - 2025-08-06 (GPT-OSS Models)
|
|
5
16
|
### Added
|
|
6
17
|
- Support for OpenAI GPT-OSS models on AWS Bedrock
|
package/README.md
CHANGED
|
@@ -48,7 +48,7 @@ Bedrock Wrapper is an npm package that simplifies the integration of existing Op
|
|
|
48
48
|
};
|
|
49
49
|
```
|
|
50
50
|
|
|
51
|
-
the `messages` variable should be in openai's role/content format
|
|
51
|
+
the `messages` variable should be in openai's role/content format (not all models support system prompts)
|
|
52
52
|
```javascript
|
|
53
53
|
messages = [
|
|
54
54
|
{
|
|
@@ -98,6 +98,24 @@ Bedrock Wrapper is an npm package that simplifies the integration of existing Op
|
|
|
98
98
|
console.log(`\n\completeResponse:\n${completeResponse}\n`); // โ do stuff with the complete response
|
|
99
99
|
}
|
|
100
100
|
|
|
101
|
+
5. **NEW: Using the Converse API (optional)**
|
|
102
|
+
|
|
103
|
+
You can now optionally use AWS Bedrock's Converse API instead of the Invoke API by passing `useConverseAPI: true` in the options parameter:
|
|
104
|
+
```javascript
|
|
105
|
+
// Use the Converse API for unified request/response format across all models
|
|
106
|
+
for await (const chunk of bedrockWrapper(awsCreds, openaiChatCompletionsCreateObject, { useConverseAPI: true })) {
|
|
107
|
+
completeResponse += chunk;
|
|
108
|
+
process.stdout.write(chunk);
|
|
109
|
+
}
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
The Converse API provides:
|
|
113
|
+
- **Consistent API**: Single request/response format across all models
|
|
114
|
+
- **Simplified conversation management**: Better handling of multi-turn conversations
|
|
115
|
+
- **System prompts**: Cleaner separation of system instructions
|
|
116
|
+
- **Tool use support**: Native support for function calling (where supported)
|
|
117
|
+
- **Unified multimodal**: Consistent handling of text and image inputs
|
|
118
|
+
|
|
101
119
|
---
|
|
102
120
|
|
|
103
121
|
### Supported Models
|
|
@@ -150,7 +168,7 @@ Please modify the `bedrock_models.js` file and submit a PR ๐ or create an Iss
|
|
|
150
168
|
|
|
151
169
|
### Image Support
|
|
152
170
|
|
|
153
|
-
For models with image support (Claude 4 series, Claude 3.7 Sonnet, Claude 3.5 Sonnet, Claude 3 Haiku, Nova Pro, and Nova Lite), you can include images in your messages using the following format:
|
|
171
|
+
For models with image support (Claude 4 series, Claude 3.7 Sonnet, Claude 3.5 Sonnet, Claude 3 Haiku, Nova Pro, and Nova Lite), you can include images in your messages using the following format (not all models support system prompts):
|
|
154
172
|
|
|
155
173
|
```javascript
|
|
156
174
|
messages = [
|
|
@@ -239,6 +257,44 @@ const result = await bedrockWrapper(awsCreds, {
|
|
|
239
257
|
|
|
240
258
|
---
|
|
241
259
|
|
|
260
|
+
### ๐งช Testing
|
|
261
|
+
|
|
262
|
+
The package includes comprehensive test suites to verify functionality:
|
|
263
|
+
|
|
264
|
+
```bash
|
|
265
|
+
# Test all models with the Both APIs (Comparison)
|
|
266
|
+
npm run test
|
|
267
|
+
|
|
268
|
+
# Test all models with the Invoke API
|
|
269
|
+
npm run test:invoke
|
|
270
|
+
|
|
271
|
+
# Test all models with the Converse API
|
|
272
|
+
npm run test:converse
|
|
273
|
+
|
|
274
|
+
# Test vision/multimodal capabilities with Both APIs (Comparison)
|
|
275
|
+
npm run test-vision
|
|
276
|
+
|
|
277
|
+
# Test vision/multimodal capabilities with Invoke API
|
|
278
|
+
npm run test-vision:invoke
|
|
279
|
+
|
|
280
|
+
# Test vision/multimodal capabilities with Converse API
|
|
281
|
+
npm run test-vision:converse
|
|
282
|
+
|
|
283
|
+
# Test stop sequences functionality with Both APIs (Comparison)
|
|
284
|
+
npm run test-stop
|
|
285
|
+
|
|
286
|
+
# Test stop sequences functionality with Invoke API
|
|
287
|
+
npm run test-stop:invoke
|
|
288
|
+
|
|
289
|
+
# Test stop sequences functionality with Converse API
|
|
290
|
+
npm run test-stop:converse
|
|
291
|
+
|
|
292
|
+
# Interactive testing
|
|
293
|
+
npm run interactive
|
|
294
|
+
```
|
|
295
|
+
|
|
296
|
+
---
|
|
297
|
+
|
|
242
298
|
### ๐ข P.S.
|
|
243
299
|
|
|
244
300
|
In case you missed it at the beginning of this doc, for an even easier setup, use the ๐ [Bedrock Proxy Endpoint](https://github.com/jparkerweb/bedrock-proxy-endpoint) project to spin up your own custom OpenAI server endpoint (using the standard `baseUrl`, and `apiKey` params).
|
package/bedrock-wrapper.js
CHANGED
|
@@ -15,6 +15,7 @@ import { bedrock_models } from "./bedrock-models.js";
|
|
|
15
15
|
import {
|
|
16
16
|
BedrockRuntimeClient,
|
|
17
17
|
InvokeModelCommand, InvokeModelWithResponseStreamCommand,
|
|
18
|
+
ConverseCommand, ConverseStreamCommand,
|
|
18
19
|
} from "@aws-sdk/client-bedrock-runtime";
|
|
19
20
|
// helper functions
|
|
20
21
|
import {
|
|
@@ -81,14 +82,82 @@ function processReasoningTags(text, awsModel) {
|
|
|
81
82
|
return text.replace(/<reasoning>[\s\S]*?<\/reasoning>/g, '').trim();
|
|
82
83
|
}
|
|
83
84
|
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
85
|
+
// Convert messages to Converse API format
|
|
86
|
+
async function convertToConverseFormat(messages) {
|
|
87
|
+
const converseMessages = [];
|
|
88
|
+
let systemPrompts = [];
|
|
89
|
+
|
|
90
|
+
for (const msg of messages) {
|
|
91
|
+
if (msg.role === "system") {
|
|
92
|
+
// System messages are handled separately in Converse API
|
|
93
|
+
if (typeof msg.content === 'string') {
|
|
94
|
+
systemPrompts.push({ text: msg.content });
|
|
95
|
+
} else if (Array.isArray(msg.content)) {
|
|
96
|
+
// Extract text from content array
|
|
97
|
+
const textContent = msg.content
|
|
98
|
+
.filter(item => item.type === 'text')
|
|
99
|
+
.map(item => item.text || item)
|
|
100
|
+
.join('\n');
|
|
101
|
+
if (textContent) {
|
|
102
|
+
systemPrompts.push({ text: textContent });
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
} else {
|
|
106
|
+
// Convert user and assistant messages
|
|
107
|
+
let content = [];
|
|
108
|
+
|
|
109
|
+
if (typeof msg.content === 'string') {
|
|
110
|
+
content = [{ text: msg.content }];
|
|
111
|
+
} else if (Array.isArray(msg.content)) {
|
|
112
|
+
for (const item of msg.content) {
|
|
113
|
+
if (item.type === 'text') {
|
|
114
|
+
content.push({ text: item.text || item });
|
|
115
|
+
} else if (item.type === 'image') {
|
|
116
|
+
// Handle image content
|
|
117
|
+
if (item.source && item.source.data) {
|
|
118
|
+
content.push({
|
|
119
|
+
image: {
|
|
120
|
+
format: 'jpeg',
|
|
121
|
+
source: {
|
|
122
|
+
bytes: Buffer.from(item.source.data, 'base64')
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
});
|
|
126
|
+
}
|
|
127
|
+
} else if (item.type === 'image_url') {
|
|
128
|
+
// Process image URL to base64
|
|
129
|
+
const processedImage = await processImage(
|
|
130
|
+
typeof item.image_url === 'string' ?
|
|
131
|
+
item.image_url :
|
|
132
|
+
item.image_url.url
|
|
133
|
+
);
|
|
134
|
+
content.push({
|
|
135
|
+
image: {
|
|
136
|
+
format: 'jpeg',
|
|
137
|
+
source: {
|
|
138
|
+
bytes: Buffer.from(processedImage, 'base64')
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
});
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
// Only add messages with actual content (Converse API doesn't allow empty content)
|
|
147
|
+
if (content.length > 0) {
|
|
148
|
+
converseMessages.push({
|
|
149
|
+
role: msg.role,
|
|
150
|
+
content: content
|
|
151
|
+
});
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
return { messages: converseMessages, system: systemPrompts };
|
|
157
|
+
}
|
|
90
158
|
|
|
91
|
-
|
|
159
|
+
// Process messages for Invoke API (complex model-specific formatting)
|
|
160
|
+
async function processMessagesForInvoke(messages, awsModel) {
|
|
92
161
|
let message_cleaned = [];
|
|
93
162
|
let system_message = "";
|
|
94
163
|
|
|
@@ -149,15 +218,17 @@ export async function* bedrockWrapper(awsCreds, openaiChatCompletionsCreateObjec
|
|
|
149
218
|
message_cleaned.push({role: "assistant", content: ""});
|
|
150
219
|
}
|
|
151
220
|
}
|
|
221
|
+
|
|
222
|
+
return { message_cleaned, system_message };
|
|
223
|
+
}
|
|
152
224
|
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
// format prompt message from message array
|
|
225
|
+
// Build prompt for Invoke API (model-specific formatting)
|
|
226
|
+
function buildInvokePrompt(message_cleaned, awsModel) {
|
|
156
227
|
if (awsModel.messages_api) {
|
|
157
228
|
// convert message array to prompt object if model supports messages api
|
|
158
|
-
|
|
229
|
+
return message_cleaned;
|
|
159
230
|
} else {
|
|
160
|
-
prompt = awsModel.bos_text;
|
|
231
|
+
let prompt = awsModel.bos_text;
|
|
161
232
|
let eom_text_inserted = false;
|
|
162
233
|
|
|
163
234
|
for (let i = 0; i < message_cleaned.length; i++) {
|
|
@@ -213,45 +284,13 @@ export async function* bedrockWrapper(awsCreds, openaiChatCompletionsCreateObjec
|
|
|
213
284
|
prompt += `\n${awsModel.eom_text}`;
|
|
214
285
|
}
|
|
215
286
|
}
|
|
287
|
+
return prompt;
|
|
216
288
|
}
|
|
289
|
+
}
|
|
217
290
|
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
}
|
|
222
|
-
|
|
223
|
-
let max_gen_tokens = max_tokens <= awsModel.max_supported_response_tokens ? max_tokens : awsModel.max_supported_response_tokens;
|
|
224
|
-
|
|
225
|
-
if (awsModel.special_request_schema?.thinking?.type === "enabled") {
|
|
226
|
-
// temperature may only be set to 1 when thinking is enabled
|
|
227
|
-
temperature = 1;
|
|
228
|
-
// top_p must be unset when thinking is enabled
|
|
229
|
-
top_p = undefined;
|
|
230
|
-
// bugget_tokens can not be greater than 80% of max_gen_tokens
|
|
231
|
-
let budget_tokens = awsModel.special_request_schema?.thinking?.budget_tokens;
|
|
232
|
-
if (budget_tokens > (max_gen_tokens * 0.8)) {
|
|
233
|
-
budget_tokens = Math.floor(max_gen_tokens * 0.8);
|
|
234
|
-
}
|
|
235
|
-
if (budget_tokens < 1024) {
|
|
236
|
-
budget_tokens = 1024;
|
|
237
|
-
}
|
|
238
|
-
// if awsModel.special_request_schema?.thinking?.budget_tokens, set it to budget_tokens
|
|
239
|
-
if (awsModel.special_request_schema?.thinking?.budget_tokens) {
|
|
240
|
-
awsModel.special_request_schema.thinking.budget_tokens = budget_tokens;
|
|
241
|
-
// max_gen_tokens has to be greater than budget_tokens
|
|
242
|
-
if (max_gen_tokens <= budget_tokens) {
|
|
243
|
-
// make max_gen_tokens 20% greater than budget_tokens
|
|
244
|
-
max_gen_tokens = Math.floor(budget_tokens * 1.2);
|
|
245
|
-
}
|
|
246
|
-
}
|
|
247
|
-
}
|
|
248
|
-
|
|
249
|
-
// if (logging) {
|
|
250
|
-
// console.log("\nMax tokens:", max_gen_tokens);
|
|
251
|
-
// }
|
|
252
|
-
|
|
253
|
-
// Format the request payload using the model's native structure.
|
|
254
|
-
const request = awsModel.messages_api ? (() => {
|
|
291
|
+
// Build request object for Invoke API (model-specific)
|
|
292
|
+
function buildInvokeRequest(prompt, awsModel, max_gen_tokens, temperature, top_p, stop_sequences, stop, system_message) {
|
|
293
|
+
if (awsModel.messages_api) {
|
|
255
294
|
// Check if this is a Nova model (has schemaVersion in special_request_schema)
|
|
256
295
|
if (awsModel.special_request_schema?.schemaVersion === "messages-v1") {
|
|
257
296
|
// Nova model format - convert messages to Nova's expected format
|
|
@@ -323,47 +362,35 @@ export async function* bedrockWrapper(awsCreds, openaiChatCompletionsCreateObjec
|
|
|
323
362
|
...awsModel.special_request_schema
|
|
324
363
|
};
|
|
325
364
|
}
|
|
326
|
-
}
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
msg.content
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
// Create a Bedrock Runtime client in the AWS Region of your choice
|
|
351
|
-
const client = new BedrockRuntimeClient({
|
|
352
|
-
region: region,
|
|
353
|
-
credentials: {
|
|
354
|
-
accessKeyId: accessKeyId,
|
|
355
|
-
secretAccessKey: secretAccessKey,
|
|
356
|
-
},
|
|
357
|
-
});
|
|
358
|
-
|
|
359
|
-
if (logging) {
|
|
360
|
-
console.log("\nFinal request:", JSON.stringify(request, null, 2));
|
|
365
|
+
} else {
|
|
366
|
+
return {
|
|
367
|
+
prompt: typeof prompt === 'string' ? prompt : {
|
|
368
|
+
messages: prompt.map(msg => ({
|
|
369
|
+
role: msg.role,
|
|
370
|
+
content: Array.isArray(msg.content) ?
|
|
371
|
+
msg.content.map(item =>
|
|
372
|
+
item.type === 'text' ? item.text : item
|
|
373
|
+
).join('\n') :
|
|
374
|
+
msg.content
|
|
375
|
+
}))
|
|
376
|
+
},
|
|
377
|
+
// Optional inference parameters:
|
|
378
|
+
[awsModel.max_tokens_param_name]: max_gen_tokens,
|
|
379
|
+
temperature: temperature,
|
|
380
|
+
top_p: top_p,
|
|
381
|
+
...(() => {
|
|
382
|
+
const stopSequencesValue = stop_sequences || stop;
|
|
383
|
+
return awsModel.stop_sequences_param_name && stopSequencesValue ? {
|
|
384
|
+
[awsModel.stop_sequences_param_name]: Array.isArray(stopSequencesValue) ? stopSequencesValue : [stopSequencesValue]
|
|
385
|
+
} : {};
|
|
386
|
+
})(),
|
|
387
|
+
...awsModel.special_request_schema
|
|
388
|
+
};
|
|
361
389
|
}
|
|
390
|
+
}
|
|
362
391
|
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
const shouldStream = stream && modelSupportsStreaming;
|
|
366
|
-
|
|
392
|
+
// Execute Invoke API call (streaming and non-streaming)
|
|
393
|
+
async function* executeInvokeAPI(client, request, awsModelId, shouldStream, awsModel, include_thinking_data) {
|
|
367
394
|
if (shouldStream) {
|
|
368
395
|
const responseStream = await client.send(
|
|
369
396
|
new InvokeModelWithResponseStreamCommand({
|
|
@@ -452,7 +479,245 @@ export async function* bedrockWrapper(awsCreds, openaiChatCompletionsCreateObjec
|
|
|
452
479
|
result = "";
|
|
453
480
|
}
|
|
454
481
|
yield result;
|
|
455
|
-
}
|
|
482
|
+
}
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
export async function* bedrockWrapper(awsCreds, openaiChatCompletionsCreateObject, { logging = false, useConverseAPI = false } = {} ) {
|
|
486
|
+
const { region, accessKeyId, secretAccessKey } = awsCreds;
|
|
487
|
+
let { messages, model, max_tokens, stream, temperature, top_p, include_thinking_data, stop, stop_sequences } = openaiChatCompletionsCreateObject;
|
|
488
|
+
|
|
489
|
+
let {awsModelId, awsModel} = findAwsModelWithId(model);
|
|
490
|
+
|
|
491
|
+
// Create a Bedrock Runtime client
|
|
492
|
+
const client = new BedrockRuntimeClient({
|
|
493
|
+
region: region,
|
|
494
|
+
credentials: {
|
|
495
|
+
accessKeyId: accessKeyId,
|
|
496
|
+
secretAccessKey: secretAccessKey,
|
|
497
|
+
},
|
|
498
|
+
});
|
|
499
|
+
|
|
500
|
+
// Calculate max tokens (shared between both APIs)
|
|
501
|
+
let max_gen_tokens = max_tokens <= awsModel.max_supported_response_tokens ? max_tokens : awsModel.max_supported_response_tokens;
|
|
502
|
+
|
|
503
|
+
// Check if model supports streaming
|
|
504
|
+
const modelSupportsStreaming = awsModel.streaming_supported !== false;
|
|
505
|
+
const shouldStream = stream && modelSupportsStreaming;
|
|
506
|
+
|
|
507
|
+
// ============================
|
|
508
|
+
// CONVERSE API PATH (SIMPLIFIED)
|
|
509
|
+
// ============================
|
|
510
|
+
if (useConverseAPI) {
|
|
511
|
+
// Convert messages to Converse API format (no model-specific complexity)
|
|
512
|
+
const { messages: converseMessages, system: systemPrompts } = await convertToConverseFormat(messages);
|
|
513
|
+
|
|
514
|
+
// Build inference configuration (handle thinking mode for Claude models)
|
|
515
|
+
const inferenceConfig = {
|
|
516
|
+
maxTokens: max_gen_tokens,
|
|
517
|
+
temperature: temperature,
|
|
518
|
+
...(top_p !== undefined && { topP: top_p })
|
|
519
|
+
};
|
|
520
|
+
|
|
521
|
+
// Handle thinking mode for Claude models
|
|
522
|
+
let budget_tokens;
|
|
523
|
+
if (awsModel.special_request_schema?.thinking?.type === "enabled") {
|
|
524
|
+
// Apply thinking mode constraints for Converse API
|
|
525
|
+
inferenceConfig.temperature = 1; // temperature must be 1 for thinking
|
|
526
|
+
delete inferenceConfig.topP; // top_p must be unset for thinking
|
|
527
|
+
|
|
528
|
+
// Calculate thinking budget configuration
|
|
529
|
+
budget_tokens = awsModel.special_request_schema?.thinking?.budget_tokens;
|
|
530
|
+
if (budget_tokens > (max_gen_tokens * 0.8)) {
|
|
531
|
+
budget_tokens = Math.floor(max_gen_tokens * 0.8);
|
|
532
|
+
}
|
|
533
|
+
if (budget_tokens < 1024) {
|
|
534
|
+
budget_tokens = 1024;
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
// Ensure max tokens is sufficient for thinking
|
|
538
|
+
if (inferenceConfig.maxTokens <= budget_tokens) {
|
|
539
|
+
inferenceConfig.maxTokens = Math.floor(budget_tokens * 1.2);
|
|
540
|
+
}
|
|
541
|
+
}
|
|
542
|
+
|
|
543
|
+
// Add stop sequences if provided (unified format)
|
|
544
|
+
const stopSequencesValue = stop_sequences || stop;
|
|
545
|
+
if (stopSequencesValue) {
|
|
546
|
+
inferenceConfig.stopSequences = Array.isArray(stopSequencesValue) ?
|
|
547
|
+
stopSequencesValue : [stopSequencesValue];
|
|
548
|
+
}
|
|
549
|
+
|
|
550
|
+
// Build the Converse API request (simple, unified format)
|
|
551
|
+
const converseRequest = {
|
|
552
|
+
modelId: awsModelId,
|
|
553
|
+
messages: converseMessages,
|
|
554
|
+
inferenceConfig: inferenceConfig
|
|
555
|
+
};
|
|
556
|
+
|
|
557
|
+
// Add system prompts if any
|
|
558
|
+
if (systemPrompts.length > 0) {
|
|
559
|
+
converseRequest.system = systemPrompts;
|
|
560
|
+
}
|
|
561
|
+
|
|
562
|
+
// Add thinking configuration for Claude models
|
|
563
|
+
if (awsModel.special_request_schema?.thinking?.type === "enabled") {
|
|
564
|
+
converseRequest.additionalModelRequestFields = {
|
|
565
|
+
thinking: {
|
|
566
|
+
type: "enabled",
|
|
567
|
+
budget_tokens: budget_tokens
|
|
568
|
+
}
|
|
569
|
+
};
|
|
570
|
+
|
|
571
|
+
if (awsModel.special_request_schema?.anthropic_beta) {
|
|
572
|
+
converseRequest.additionalModelRequestFields.anthropic_beta = awsModel.special_request_schema.anthropic_beta;
|
|
573
|
+
}
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
if (logging) {
|
|
577
|
+
console.log("\nConverse API request:", JSON.stringify(converseRequest, null, 2));
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
if (shouldStream) {
|
|
581
|
+
// Use ConverseStream for streaming responses
|
|
582
|
+
const responseStream = await client.send(new ConverseStreamCommand(converseRequest));
|
|
583
|
+
|
|
584
|
+
let is_thinking = false;
|
|
585
|
+
let should_think = include_thinking_data && awsModel.special_request_schema?.thinking?.type === "enabled";
|
|
586
|
+
|
|
587
|
+
for await (const event of responseStream.stream) {
|
|
588
|
+
if (event.contentBlockDelta) {
|
|
589
|
+
const text = event.contentBlockDelta.delta?.text;
|
|
590
|
+
const thinking = event.contentBlockDelta.delta?.thinking;
|
|
591
|
+
const reasoningContent = event.contentBlockDelta.delta?.reasoningContent;
|
|
592
|
+
|
|
593
|
+
// Handle Claude thinking data (streaming) - check both reasoningContent and thinking
|
|
594
|
+
const thinkingText = reasoningContent?.reasoningText?.text || thinking;
|
|
595
|
+
if (should_think && thinkingText) {
|
|
596
|
+
if (!is_thinking) {
|
|
597
|
+
is_thinking = true;
|
|
598
|
+
yield `<think>${thinkingText}`;
|
|
599
|
+
} else {
|
|
600
|
+
yield thinkingText;
|
|
601
|
+
}
|
|
602
|
+
}
|
|
603
|
+
// Handle regular text content
|
|
604
|
+
else if (text) {
|
|
605
|
+
// End thinking mode if we were in it
|
|
606
|
+
if (is_thinking) {
|
|
607
|
+
is_thinking = false;
|
|
608
|
+
yield `</think>\n\n${text}`;
|
|
609
|
+
} else {
|
|
610
|
+
// Process reasoning tags for GPT-OSS models only
|
|
611
|
+
const processedText = processReasoningTags(text, awsModel);
|
|
612
|
+
if (processedText) {
|
|
613
|
+
yield processedText;
|
|
614
|
+
}
|
|
615
|
+
}
|
|
616
|
+
}
|
|
617
|
+
}
|
|
618
|
+
}
|
|
619
|
+
|
|
620
|
+
// Close thinking tag if still open
|
|
621
|
+
if (is_thinking) {
|
|
622
|
+
yield "</think>";
|
|
623
|
+
}
|
|
624
|
+
} else {
|
|
625
|
+
// Use Converse for non-streaming responses
|
|
626
|
+
const response = await client.send(new ConverseCommand(converseRequest));
|
|
627
|
+
|
|
628
|
+
if (logging) {
|
|
629
|
+
console.log("\nConverse API response:", JSON.stringify(response, null, 2));
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
// Extract text and thinking from response (handle Claude thinking)
|
|
633
|
+
if (response.output && response.output.message && response.output.message.content) {
|
|
634
|
+
let thinking_result = "";
|
|
635
|
+
let text_result = "";
|
|
636
|
+
|
|
637
|
+
for (const contentBlock of response.output.message.content) {
|
|
638
|
+
// Extract thinking data for Claude models (from reasoningContent)
|
|
639
|
+
if (include_thinking_data && contentBlock.reasoningContent &&
|
|
640
|
+
awsModel.special_request_schema?.thinking?.type === "enabled") {
|
|
641
|
+
const reasoningText = contentBlock.reasoningContent.reasoningText?.text;
|
|
642
|
+
if (reasoningText) {
|
|
643
|
+
thinking_result += reasoningText;
|
|
644
|
+
}
|
|
645
|
+
}
|
|
646
|
+
|
|
647
|
+
// Also check for legacy thinking field format
|
|
648
|
+
if (include_thinking_data && contentBlock.thinking &&
|
|
649
|
+
awsModel.special_request_schema?.thinking?.type === "enabled") {
|
|
650
|
+
thinking_result += contentBlock.thinking;
|
|
651
|
+
}
|
|
652
|
+
|
|
653
|
+
// Extract regular text content
|
|
654
|
+
if (contentBlock.text) {
|
|
655
|
+
text_result += contentBlock.text;
|
|
656
|
+
}
|
|
657
|
+
}
|
|
658
|
+
|
|
659
|
+
// Process reasoning tags for GPT-OSS models
|
|
660
|
+
text_result = processReasoningTags(text_result, awsModel);
|
|
661
|
+
|
|
662
|
+
// Combine thinking and text for Claude models
|
|
663
|
+
let result = thinking_result ? `<think>${thinking_result}</think>\n\n${text_result}` : text_result;
|
|
664
|
+
|
|
665
|
+
if (result) {
|
|
666
|
+
yield result;
|
|
667
|
+
}
|
|
668
|
+
}
|
|
669
|
+
}
|
|
670
|
+
return; // Exit early when using Converse API
|
|
671
|
+
}
|
|
672
|
+
|
|
673
|
+
// ============================
|
|
674
|
+
// INVOKE API PATH (COMPLEX, MODEL-SPECIFIC)
|
|
675
|
+
// ============================
|
|
676
|
+
|
|
677
|
+
// Process messages for Invoke API (complex, model-specific)
|
|
678
|
+
const { message_cleaned, system_message } = await processMessagesForInvoke(messages, awsModel);
|
|
679
|
+
|
|
680
|
+
// Build prompt for Invoke API (complex, model-specific)
|
|
681
|
+
const prompt = buildInvokePrompt(message_cleaned, awsModel);
|
|
682
|
+
|
|
683
|
+
if (logging) {
|
|
684
|
+
console.log("\nFinal formatted prompt:", prompt);
|
|
685
|
+
}
|
|
686
|
+
|
|
687
|
+
// Handle thinking mode adjustments (Invoke API specific)
|
|
688
|
+
if (awsModel.special_request_schema?.thinking?.type === "enabled") {
|
|
689
|
+
// temperature may only be set to 1 when thinking is enabled
|
|
690
|
+
temperature = 1;
|
|
691
|
+
// top_p must be unset when thinking is enabled
|
|
692
|
+
top_p = undefined;
|
|
693
|
+
// budget_tokens can not be greater than 80% of max_gen_tokens
|
|
694
|
+
let budget_tokens = awsModel.special_request_schema?.thinking?.budget_tokens;
|
|
695
|
+
if (budget_tokens > (max_gen_tokens * 0.8)) {
|
|
696
|
+
budget_tokens = Math.floor(max_gen_tokens * 0.8);
|
|
697
|
+
}
|
|
698
|
+
if (budget_tokens < 1024) {
|
|
699
|
+
budget_tokens = 1024;
|
|
700
|
+
}
|
|
701
|
+
// if awsModel.special_request_schema?.thinking?.budget_tokens, set it to budget_tokens
|
|
702
|
+
if (awsModel.special_request_schema?.thinking?.budget_tokens) {
|
|
703
|
+
awsModel.special_request_schema.thinking.budget_tokens = budget_tokens;
|
|
704
|
+
// max_gen_tokens has to be greater than budget_tokens
|
|
705
|
+
if (max_gen_tokens <= budget_tokens) {
|
|
706
|
+
// make max_gen_tokens 20% greater than budget_tokens
|
|
707
|
+
max_gen_tokens = Math.floor(budget_tokens * 1.2);
|
|
708
|
+
}
|
|
709
|
+
}
|
|
710
|
+
}
|
|
711
|
+
|
|
712
|
+
// Build request for Invoke API (complex, model-specific)
|
|
713
|
+
const request = buildInvokeRequest(prompt, awsModel, max_gen_tokens, temperature, top_p, stop_sequences, stop, system_message);
|
|
714
|
+
|
|
715
|
+
if (logging) {
|
|
716
|
+
console.log("\nFinal request:", JSON.stringify(request, null, 2));
|
|
717
|
+
}
|
|
718
|
+
|
|
719
|
+
// Execute Invoke API call (complex, model-specific response parsing)
|
|
720
|
+
yield* executeInvokeAPI(client, request, awsModelId, shouldStream, awsModel, include_thinking_data);
|
|
456
721
|
}
|
|
457
722
|
|
|
458
723
|
|