bedrock-wrapper 2.2.0 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -21,35 +21,106 @@ import {
21
21
  getValueByPath,
22
22
  writeAsciiArt
23
23
  } from "./utils.js";
24
+ import sharp from 'sharp';
24
25
 
25
26
 
26
27
  // write the ascii art logo on initial load
27
28
  writeAsciiArt();
28
29
 
29
30
 
31
+
30
32
  // -------------------
31
33
  // -- main function --
32
34
  // -------------------
35
+ async function processImage(imageInput) {
36
+ let base64Image;
37
+
38
+ if (typeof imageInput === 'string') {
39
+ if (imageInput.startsWith('data:image')) {
40
+ // Handle data URL
41
+ base64Image = imageInput.split(',')[1];
42
+ } else if (imageInput.startsWith('http')) {
43
+ // Handle URL
44
+ const response = await fetch(imageInput);
45
+ const buffer = await response.arrayBuffer();
46
+ base64Image = Buffer.from(buffer).toString('base64');
47
+ } else {
48
+ // Assume it's already base64
49
+ base64Image = imageInput;
50
+ }
51
+ } else if (Buffer.isBuffer(imageInput)) {
52
+ base64Image = imageInput.toString('base64');
53
+ }
54
+
55
+ // Process with sharp to ensure format and size compliance
56
+ const buffer = Buffer.from(base64Image, 'base64');
57
+ const processedImage = await sharp(buffer)
58
+ .resize(2048, 2048, { fit: 'inside' })
59
+ .toFormat('jpeg')
60
+ .toBuffer();
61
+
62
+ return processedImage.toString('base64');
63
+ }
64
+
33
65
  export async function* bedrockWrapper(awsCreds, openaiChatCompletionsCreateObject, { logging = false } = {} ) {
34
66
  const { region, accessKeyId, secretAccessKey } = awsCreds;
35
- const { messages, model, max_tokens, stream, temperature, top_p } = openaiChatCompletionsCreateObject;
67
+ let { messages, model, max_tokens, stream, temperature, top_p, include_thinking_data } = openaiChatCompletionsCreateObject;
36
68
 
37
69
 
38
- // retrieve the model configuration
39
- const awsModel = bedrock_models.find((x) => (x.modelName.toLowerCase() === model.toLowerCase() || x.modelId.toLowerCase() === model.toLowerCase()));
40
- if (!awsModel) { throw new Error(`Model configuration not found for model: ${model}`); }
70
+ let {awsModelId, awsModel} = findAwsModelWithId(model);
41
71
 
42
72
  // cleanup message content before formatting prompt message
43
73
  let message_cleaned = [];
44
74
  let system_message = "";
45
75
 
46
76
  for (let i = 0; i < messages.length; i++) {
47
- if (messages[i].content !== "") {
48
- // Extract system message only if model requires it as separate field
77
+ if (messages[i].content) {
78
+ let processedContent = messages[i].content;
79
+
80
+ // Handle array format for messages with images
81
+ if (Array.isArray(processedContent)) {
82
+ let newContent = [];
83
+ for (const item of processedContent) {
84
+ if (item.type === 'text') {
85
+ newContent.push(item);
86
+ } else if (item.type === 'image_url') {
87
+ const processedImage = await processImage(
88
+ typeof item.image_url === 'string' ?
89
+ item.image_url :
90
+ item.image_url.url
91
+ );
92
+
93
+ // Handle different model formats
94
+ if (awsModel.messages_api) {
95
+ newContent.push({
96
+ type: 'image',
97
+ source: {
98
+ type: 'base64',
99
+ media_type: 'image/jpeg',
100
+ data: processedImage
101
+ }
102
+ });
103
+ } else {
104
+ // Llama format for images
105
+ newContent.push({
106
+ type: 'image',
107
+ image_data: {
108
+ url: `data:image/jpeg;base64,${processedImage}`
109
+ }
110
+ });
111
+ }
112
+ }
113
+ }
114
+ processedContent = newContent;
115
+ }
116
+
49
117
  if (awsModel.system_as_separate_field && messages[i].role === "system") {
50
- system_message = messages[i].content;
118
+ system_message = processedContent;
51
119
  } else {
52
- message_cleaned.push(messages[i]);
120
+ message_cleaned.push({
121
+ ...messages[i],
122
+ content: processedContent
123
+ });
53
124
  }
54
125
  } else if (awsModel.display_role_names) {
55
126
  message_cleaned.push(messages[i]);
@@ -67,26 +138,44 @@ export async function* bedrockWrapper(awsCreds, openaiChatCompletionsCreateObjec
67
138
  // convert message array to prompt object if model supports messages api
68
139
  prompt = message_cleaned;
69
140
  } else {
70
- // convert message array to prompt string if model does not support messages api
71
141
  prompt = awsModel.bos_text;
72
142
  let eom_text_inserted = false;
143
+
73
144
  for (let i = 0; i < message_cleaned.length; i++) {
74
145
  prompt += "\n";
75
- if (message_cleaned[i].role === "system") {
76
- prompt += awsModel.role_system_message_prefix;
77
- prompt += awsModel.role_system_prefix;
78
- if (awsModel.display_role_names) { prompt += message_cleaned[i].role; }
79
- prompt += awsModel.role_system_suffix;
80
- if (awsModel.display_role_names) {prompt += "\n"; }
81
- prompt += message_cleaned[i].content;
82
- prompt += awsModel.role_system_message_suffix;
83
- } else if (message_cleaned[i].role === "user") {
146
+
147
+ // Handle user messages with potential images
148
+ if (message_cleaned[i].role === "user") {
84
149
  prompt += awsModel.role_user_message_prefix;
85
150
  prompt += awsModel.role_user_prefix;
86
151
  if (awsModel.display_role_names) { prompt += message_cleaned[i].role; }
87
152
  prompt += awsModel.role_user_suffix;
88
- if (awsModel.display_role_names) {prompt += "\n"; }
89
- prompt += message_cleaned[i].content;
153
+ if (awsModel.display_role_names) { prompt += "\n"; }
154
+
155
+ // Handle content array with text and images
156
+ if (Array.isArray(message_cleaned[i].content)) {
157
+ let textContent = "";
158
+ let imageContent = "";
159
+
160
+ // Separate text and image content
161
+ message_cleaned[i].content.forEach(item => {
162
+ if (item.type === 'text') {
163
+ textContent += item.text;
164
+ } else if (item.type === 'image') {
165
+ imageContent = item.image_data.url;
166
+ }
167
+ });
168
+
169
+ // Format based on vision model requirements
170
+ if (awsModel.vision && imageContent) {
171
+ prompt += `\n${textContent}\n\n${imageContent}`;
172
+ } else {
173
+ prompt += textContent;
174
+ }
175
+ } else {
176
+ prompt += message_cleaned[i].content;
177
+ }
178
+
90
179
  prompt += awsModel.role_user_message_suffix;
91
180
  } else if (message_cleaned[i].role === "assistant") {
92
181
  prompt += awsModel.role_assistant_message_prefix;
@@ -97,6 +186,7 @@ export async function* bedrockWrapper(awsCreds, openaiChatCompletionsCreateObjec
97
186
  prompt += message_cleaned[i].content;
98
187
  prompt += awsModel.role_assistant_message_suffix;
99
188
  }
189
+
100
190
  if (message_cleaned[i+1] && message_cleaned[i+1].content === "") {
101
191
  prompt += `\n${awsModel.eom_text}`;
102
192
  eom_text_inserted = true;
@@ -105,16 +195,41 @@ export async function* bedrockWrapper(awsCreds, openaiChatCompletionsCreateObjec
105
195
  }
106
196
  }
107
197
  }
108
-
109
- // logging
198
+
199
+ // Add logging to see the final prompt
110
200
  if (logging) {
111
- if (awsModel.system_as_separate_field && system_message) {
112
- console.log(`\nsystem: ${system_message}`);
201
+ console.log("\nFinal formatted prompt:", prompt);
202
+ }
203
+
204
+ let max_gen_tokens = max_tokens <= awsModel.max_supported_response_tokens ? max_tokens : awsModel.max_supported_response_tokens;
205
+
206
+ if (awsModel.special_request_schema?.thinking?.type === "enabled") {
207
+ // temperature may only be set to 1 when thinking is enabled
208
+ temperature = 1;
209
+ // top_p must be unset when thinking is enabled
210
+ top_p = undefined;
211
+ // bugget_tokens can not be greater than 80% of max_gen_tokens
212
+ let budget_tokens = awsModel.special_request_schema?.thinking?.budget_tokens;
213
+ if (budget_tokens > (max_gen_tokens * 0.8)) {
214
+ budget_tokens = Math.floor(max_gen_tokens * 0.8);
215
+ }
216
+ if (budget_tokens < 1024) {
217
+ budget_tokens = 1024;
218
+ }
219
+ // if awsModel.special_request_schema?.thinking?.budget_tokens, set it to budget_tokens
220
+ if (awsModel.special_request_schema?.thinking?.budget_tokens) {
221
+ awsModel.special_request_schema.thinking.budget_tokens = budget_tokens;
222
+ // max_gen_tokens has to be greater than budget_tokens
223
+ if (max_gen_tokens <= budget_tokens) {
224
+ // make max_gen_tokens 20% greater than budget_tokens
225
+ max_gen_tokens = Math.floor(budget_tokens * 1.2);
226
+ }
113
227
  }
114
- console.log(`\nprompt: ${typeof prompt === 'object' ? JSON.stringify(prompt) : prompt}\n`);
115
228
  }
116
229
 
117
- const max_gen_tokens = max_tokens <= awsModel.max_supported_response_tokens ? max_tokens : awsModel.max_supported_response_tokens;
230
+ // if (logging) {
231
+ // console.log("\nMax tokens:", max_gen_tokens);
232
+ // }
118
233
 
119
234
  // Format the request payload using the model's native structure.
120
235
  const request = awsModel.messages_api ? {
@@ -125,7 +240,16 @@ export async function* bedrockWrapper(awsCreds, openaiChatCompletionsCreateObjec
125
240
  top_p: top_p,
126
241
  ...awsModel.special_request_schema
127
242
  } : {
128
- prompt,
243
+ prompt: typeof prompt === 'string' ? prompt : {
244
+ messages: prompt.map(msg => ({
245
+ role: msg.role,
246
+ content: Array.isArray(msg.content) ?
247
+ msg.content.map(item =>
248
+ item.type === 'text' ? item.text : item
249
+ ).join('\n') :
250
+ msg.content
251
+ }))
252
+ },
129
253
  // Optional inference parameters:
130
254
  [awsModel.max_tokens_param_name]: max_gen_tokens,
131
255
  temperature: temperature,
@@ -142,42 +266,105 @@ export async function* bedrockWrapper(awsCreds, openaiChatCompletionsCreateObjec
142
266
  },
143
267
  });
144
268
 
269
+ if (logging) {
270
+ console.log("\nFinal request:", JSON.stringify(request, null, 2));
271
+ }
272
+
145
273
  if (stream) {
146
274
  const responseStream = await client.send(
147
275
  new InvokeModelWithResponseStreamCommand({
148
276
  contentType: "application/json",
149
277
  body: JSON.stringify(request),
150
- modelId: awsModel.modelId,
278
+ modelId: awsModelId,
151
279
  }),
152
280
  );
281
+ let is_thinking = false;
282
+ let should_think = awsModel.special_request_schema?.thinking?.type === "enabled";
283
+
153
284
  for await (const event of responseStream.body) {
154
285
  const chunk = JSON.parse(new TextDecoder().decode(event.chunk.bytes));
155
- let result = getValueByPath(chunk, awsModel.response_chunk_element);
286
+ let result;
287
+ result = getValueByPath(chunk, awsModel.response_chunk_element);
156
288
  if (result) {
289
+ if (should_think && is_thinking) {
290
+ is_thinking = false;
291
+ result = `</think>\n\n${result}`;
292
+ }
157
293
  yield result;
158
- }
294
+ } else {
295
+ if (include_thinking_data && awsModel.thinking_response_chunk_element) {
296
+ let result = getValueByPath(chunk, awsModel.thinking_response_chunk_element);
297
+ if (result && should_think && !is_thinking) {
298
+ is_thinking = true;
299
+ result = `<think>${result}`;
300
+ }
301
+ if (result) {
302
+ yield result;
303
+ }
304
+ }
305
+ }
159
306
  }
160
307
  } else {
161
308
  const apiResponse = await client.send(
162
309
  new InvokeModelCommand({
163
310
  contentType: "application/json",
164
311
  body: JSON.stringify(request),
165
- modelId: awsModel.modelId,
312
+ modelId: awsModelId,
166
313
  }),
167
314
  );
168
-
315
+
169
316
  const decodedBodyResponse = JSON.parse(new TextDecoder().decode(apiResponse.body));
170
- let result;
317
+ let thinking_result;
318
+ let text_result;
319
+
320
+ if (awsModel.thinking_response_nonchunk_element) {
321
+ thinking_result = getValueByPath(decodedBodyResponse, awsModel.thinking_response_nonchunk_element);
322
+ }
323
+
171
324
  if (awsModel.response_nonchunk_element) {
172
- result = getValueByPath(decodedBodyResponse, awsModel.response_nonchunk_element);
173
- } else {
174
- result = getValueByPath(decodedBodyResponse, awsModel.response_chunk_element);
325
+ text_result = getValueByPath(decodedBodyResponse, awsModel.response_nonchunk_element);
175
326
  }
327
+ if (!text_result) {
328
+ if (awsModel.response_chunk_element) {
329
+ text_result = getValueByPath(decodedBodyResponse, awsModel.response_chunk_element);
330
+ }
331
+ if (!text_result && awsModel.response_nonchunk_element) {
332
+ // replace [0] with [1]
333
+ awsModel.response_nonchunk_element = awsModel.response_nonchunk_element.replace('[0]', '[1]');
334
+ text_result = getValueByPath(decodedBodyResponse, awsModel.response_nonchunk_element);
335
+ }
336
+ }
337
+
338
+ let result = thinking_result ? `<think>${thinking_result}</think>\n\n${text_result}` : text_result;
176
339
  yield result;
177
340
  }
178
341
  }
179
342
 
180
343
 
344
+ // ----------------------------------------------------
345
+ // -- lookup model configuration by model id or name --
346
+ // -----------------------------------------------------------------------------
347
+ // -- partial model id or model name is accepted (cross-region model support) --
348
+ // -- returns model configuration object and model id --
349
+ // -----------------------------------------------------------------------------
350
+ function findAwsModelWithId(model) {
351
+ const matchingModel = bedrock_models.find(candidate =>
352
+ model === candidate.modelName ||
353
+ model.endsWith(candidate.modelId)
354
+ );
355
+
356
+ if (!matchingModel) {
357
+ throw new Error(`Model configuration not found for model: ${model}`);
358
+ }
359
+
360
+ return {
361
+ awsModelId: model.endsWith(matchingModel.modelId) ? model : matchingModel.modelId,
362
+ awsModel: matchingModel
363
+ };
364
+ }
365
+
366
+
367
+
181
368
  // ---------------------------
182
369
  // -- list supported models --
183
370
  // ---------------------------
Binary file
@@ -122,6 +122,7 @@ const openaiChatCompletionsCreateObject = {
122
122
  "stream": shouldStream,
123
123
  "temperature": LLM_TEMPERATURE,
124
124
  "top_p": LLM_TOP_P,
125
+ "include_thinking_data": true,
125
126
  };
126
127
 
127
128
 
package/package.json CHANGED
@@ -1,38 +1,42 @@
1
- {
2
- "name": "bedrock-wrapper",
3
- "version": "2.2.0",
4
- "description": "🪨 Bedrock Wrapper is an npm package that simplifies the integration of existing OpenAI-compatible API objects with AWS Bedrock's serverless inference LLMs.",
5
- "homepage": "https://www.equilllabs.com/projects/bedrock-wrapper",
6
- "repository": {
7
- "type": "git",
8
- "url": "https://github.com/jparkerweb/bedrock-wrapper.git"
9
- },
10
- "bugs": {
11
- "url": "https://github.com/jparkerweb/bedrock-wrapper/issues",
12
- "email": "equilllabs@gmail.com"
13
- },
14
- "scripts": {
15
- "clean": "rm -rf node_modules && rm -rf package-lock.json && npm install"
16
- },
17
- "main": "bedrock-wrapper.js",
18
- "type": "module",
19
- "keywords": [
20
- "openai",
21
- "bedrock",
22
- "aws",
23
- "wrapper",
24
- "proxy",
25
- "serverless",
26
- "inference",
27
- "llm"
28
- ],
29
- "author": "",
30
- "license": "ISC",
31
- "dependencies": {
32
- "@aws-sdk/client-bedrock-runtime": "^3.716.0",
33
- "dotenv": "^16.4.5"
34
- },
35
- "devDependencies": {
36
- "chalk": "^5.4.1"
37
- }
38
- }
1
+ {
2
+ "name": "bedrock-wrapper",
3
+ "version": "2.3.0",
4
+ "description": "🪨 Bedrock Wrapper is an npm package that simplifies the integration of existing OpenAI-compatible API objects with AWS Bedrock's serverless inference LLMs.",
5
+ "homepage": "https://www.equilllabs.com/projects/bedrock-wrapper",
6
+ "repository": {
7
+ "type": "git",
8
+ "url": "https://github.com/jparkerweb/bedrock-wrapper.git"
9
+ },
10
+ "bugs": {
11
+ "url": "https://github.com/jparkerweb/bedrock-wrapper/issues",
12
+ "email": "equilllabs@gmail.com"
13
+ },
14
+ "scripts": {
15
+ "clean": "npx rimraf node_modules && npx rimraf package-lock.json && npm install",
16
+ "test": "node test-models.js",
17
+ "test-vision": "node test-vision.js",
18
+ "interactive": "node interactive-example.js"
19
+ },
20
+ "main": "bedrock-wrapper.js",
21
+ "type": "module",
22
+ "keywords": [
23
+ "openai",
24
+ "bedrock",
25
+ "aws",
26
+ "wrapper",
27
+ "proxy",
28
+ "serverless",
29
+ "inference",
30
+ "llm"
31
+ ],
32
+ "author": "",
33
+ "license": "ISC",
34
+ "dependencies": {
35
+ "@aws-sdk/client-bedrock-runtime": "^3.755.0",
36
+ "dotenv": "^16.4.5",
37
+ "sharp": "^0.33.5"
38
+ },
39
+ "devDependencies": {
40
+ "chalk": "^5.4.1"
41
+ }
42
+ }
package/test-image.jpg ADDED
Binary file
@@ -1,10 +1,26 @@
1
1
  Test Question: "Respond with exactly one word: What is 1+1?"
2
2
  ==================================================
3
3
 
4
- Starting tests with 17 models...
4
+ Starting tests with 19 models...
5
5
  Each model will be tested with streaming and non-streaming calls
6
6
 
7
7
 
8
+ --------------------------------------------------
9
+ Testing Claude-3-7-Sonnet-Thinking ⇢
10
+ Streaming test passed for Claude-3-7-Sonnet-Thinking: "Two"
11
+ Non-streaming test passed for Claude-3-7-Sonnet-Thinking: "<think>The question asks for exactly one word in response to "What is 1+1?"
12
+
13
+ The answer to 1+1 is 2, which is a single word (a number).
14
+
15
+ I need to respond with exactly one word, and that word should be "2".</think>
16
+
17
+ 2"
18
+
19
+ --------------------------------------------------
20
+ Testing Claude-3-7-Sonnet ⇢
21
+ Streaming test passed for Claude-3-7-Sonnet: "2"
22
+ Non-streaming test passed for Claude-3-7-Sonnet: "Two"
23
+
8
24
  --------------------------------------------------
9
25
  Testing Claude-3-5-Sonnet-v2 ⇢
10
26
  Streaming test passed for Claude-3-5-Sonnet-v2: "two"
@@ -27,7 +43,7 @@ Non-streaming test passed for Claude-3-Haiku: "Two."
27
43
 
28
44
  --------------------------------------------------
29
45
  Testing Llama-3-3-70b ⇢
30
- Streaming test passed for Llama-3-3-70b: "Two."
46
+ Streaming test passed for Llama-3-3-70b: "Two"
31
47
  Non-streaming test passed for Llama-3-3-70b: "Two."
32
48
 
33
49
  --------------------------------------------------
@@ -62,7 +78,7 @@ Non-streaming test passed for Llama-3-1-70b: "Two."
62
78
 
63
79
  --------------------------------------------------
64
80
  Testing Llama-3-1-405b ⇢
65
- Streaming test passed for Llama-3-1-405b: "Two."
81
+ Streaming test passed for Llama-3-1-405b: "Two"
66
82
  Non-streaming test passed for Llama-3-1-405b: "Two."
67
83
 
68
84
  --------------------------------------------------
@@ -77,17 +93,17 @@ Non-streaming test passed for Llama-3-70b: "Two"
77
93
 
78
94
  --------------------------------------------------
79
95
  Testing Mistral-7b ⇢
80
- Streaming test passed for Mistral-7b: "Two. (I've given you two words, but the first one was "What" which was not part of the mathematical expression.)"
96
+ Streaming test passed for Mistral-7b: "Two. (I've given you two words, but the first one was "What" which was not part of the mathematical equation.)"
81
97
  Non-streaming test passed for Mistral-7b: "Two. (I've given you two words, but the first one was "What" which was not part of the mathematical equation.)"
82
98
 
83
99
  --------------------------------------------------
84
100
  Testing Mixtral-8x7b ⇢
85
101
  Streaming test passed for Mixtral-8x7b: "Two.
86
102
 
87
- The question you asked is a simple arithmetic addition problem, and the answer is 2. It is considered good manners to respond to a direct question, even if it is a simple one. This helps to maintain a respectful and positive interaction."
103
+ The word you are looking for is "two." The sum of 1 + 1 is equal to 2. I am programmed to provide accurate and helpful responses, so I wanted to make sure that I gave you the correct answer to your question. Is there anything else you would like to know? I'm here to help! Just let me know if you have any other questions or if there's anything else I can do for you. I'm always happy to assist with whatever you need."
88
104
  Non-streaming test passed for Mixtral-8x7b: "Two.
89
105
 
90
- The question you asked is a simple arithmetic addition problem, and the answer is 2. It is considered good manners to respond to a direct question, even if it is a simple one. This helps to maintain a respectful and positive interaction."
106
+ The question you asked is a simple arithmetic addition problem, and the answer is 2. It is considered good manners to respond to a direct question with a straightforward and accurate answer. However, I noticed that you initially asked for a one-word response, so I included only the word "Two" in my answer. I hope this is what you were looking for! Is there anything else I can help you with?"
91
107
 
92
108
  --------------------------------------------------
93
109
  Testing Mistral-Large ⇢