bedrock-wrapper 2.4.4 → 2.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +31 -0
- package/README.md +63 -2
- package/bedrock-models.js +78 -0
- package/bedrock-wrapper.js +378 -85
- package/example-converse-api.js +116 -0
- package/interactive-example.js +18 -10
- package/logs/e4cf59ef-9d22-45bf-9c6c-53e3cb9efda3/notification.json +58 -0
- package/logs/e4cf59ef-9d22-45bf-9c6c-53e3cb9efda3/post_tool_use.json +7977 -0
- package/logs/e4cf59ef-9d22-45bf-9c6c-53e3cb9efda3/pre_tool_use.json +2541 -0
- package/logs/e4cf59ef-9d22-45bf-9c6c-53e3cb9efda3/stop.json +86 -0
- package/logs/e4cf59ef-9d22-45bf-9c6c-53e3cb9efda3/user_prompt_submit.json +86 -0
- package/package.json +12 -5
- package/test-converse-api.js +347 -0
- package/test-models.js +96 -20
- package/test-stop-sequences.js +171 -43
- package/test-vision.js +88 -28
package/test-models.js
CHANGED
|
@@ -51,7 +51,7 @@ async function logOutput(message, type = 'info', writeToFile = true ) {
|
|
|
51
51
|
}
|
|
52
52
|
}
|
|
53
53
|
|
|
54
|
-
async function testModel(model, awsCreds, testMessage, isStreaming) {
|
|
54
|
+
async function testModel(model, awsCreds, testMessage, isStreaming, useConverseAPI, apiName) {
|
|
55
55
|
const messages = [{ role: "user", content: testMessage }];
|
|
56
56
|
const openaiChatCompletionsCreateObject = {
|
|
57
57
|
messages,
|
|
@@ -60,17 +60,18 @@ async function testModel(model, awsCreds, testMessage, isStreaming) {
|
|
|
60
60
|
stream: isStreaming,
|
|
61
61
|
temperature: LLM_TEMPERATURE,
|
|
62
62
|
top_p: LLM_TOP_P,
|
|
63
|
+
include_thinking_data: true,
|
|
63
64
|
};
|
|
64
65
|
|
|
65
66
|
let completeResponse = "";
|
|
66
67
|
|
|
67
68
|
try {
|
|
68
69
|
if (isStreaming) {
|
|
69
|
-
for await (const chunk of bedrockWrapper(awsCreds, openaiChatCompletionsCreateObject, { logging: true })) {
|
|
70
|
+
for await (const chunk of bedrockWrapper(awsCreds, openaiChatCompletionsCreateObject, { logging: true, useConverseAPI })) {
|
|
70
71
|
completeResponse += chunk;
|
|
71
72
|
}
|
|
72
73
|
} else {
|
|
73
|
-
const response = await bedrockWrapper(awsCreds, openaiChatCompletionsCreateObject, { logging: true });
|
|
74
|
+
const response = await bedrockWrapper(awsCreds, openaiChatCompletionsCreateObject, { logging: true, useConverseAPI });
|
|
74
75
|
for await (const data of response) {
|
|
75
76
|
completeResponse += data;
|
|
76
77
|
}
|
|
@@ -88,12 +89,24 @@ async function testModel(model, awsCreds, testMessage, isStreaming) {
|
|
|
88
89
|
}
|
|
89
90
|
|
|
90
91
|
async function main() {
|
|
92
|
+
// Check for command-line arguments
|
|
93
|
+
const args = process.argv.slice(2);
|
|
94
|
+
const testBothAPIs = args.includes('--both') || args.includes('--compare');
|
|
95
|
+
const useConverseOnly = args.includes('--converse');
|
|
96
|
+
|
|
91
97
|
const testMessage = "Respond with exactly one word: What is 1+1?";
|
|
92
98
|
|
|
99
|
+
// Determine test mode
|
|
100
|
+
let testMode = "Invoke API";
|
|
101
|
+
if (useConverseOnly) testMode = "Converse API";
|
|
102
|
+
if (testBothAPIs) testMode = "Both APIs (Comparison)";
|
|
103
|
+
|
|
93
104
|
// Clear output file and add header
|
|
94
105
|
await fs.writeFile('test-models-output.txt',
|
|
106
|
+
`Test Mode: ${testMode}\n` +
|
|
95
107
|
`Test Question: "${testMessage}"\n` +
|
|
96
|
-
|
|
108
|
+
`Test Date: ${new Date().toISOString()}\n` +
|
|
109
|
+
`${'='.repeat(60)}\n\n`
|
|
97
110
|
);
|
|
98
111
|
|
|
99
112
|
const supportedModels = await listBedrockWrapperSupportedModels();
|
|
@@ -105,7 +118,7 @@ async function main() {
|
|
|
105
118
|
});
|
|
106
119
|
|
|
107
120
|
console.clear();
|
|
108
|
-
await logOutput(`Starting tests with ${availableModels.length} models...`, 'info');
|
|
121
|
+
await logOutput(`Starting tests with ${availableModels.length} models using ${testMode}...`, 'info');
|
|
109
122
|
await logOutput(`Each model will be tested with streaming and non-streaming calls\n`, 'info');
|
|
110
123
|
|
|
111
124
|
const awsCreds = {
|
|
@@ -115,30 +128,93 @@ async function main() {
|
|
|
115
128
|
};
|
|
116
129
|
|
|
117
130
|
for (const model of availableModels) {
|
|
118
|
-
await logOutput(`\n${'-'.repeat(
|
|
131
|
+
await logOutput(`\n${'-'.repeat(60)}\nTesting ${model} ⇢`, 'running');
|
|
132
|
+
|
|
133
|
+
if (testBothAPIs) {
|
|
134
|
+
// Test both APIs and compare
|
|
135
|
+
await logOutput(`\n📡 Testing with Invoke API:`, 'info');
|
|
136
|
+
|
|
137
|
+
// Invoke API streaming test
|
|
138
|
+
const invokeStreamResult = await testModel(model, awsCreds, testMessage, true, false, "Invoke API");
|
|
139
|
+
if (invokeStreamResult.success) {
|
|
140
|
+
await logOutput(`✓ Invoke API Streaming: "${invokeStreamResult.response}"`, 'success');
|
|
141
|
+
} else {
|
|
142
|
+
await logOutput(`✗ Invoke API Streaming: ${invokeStreamResult.error}`, 'error');
|
|
143
|
+
}
|
|
119
144
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
145
|
+
// Invoke API non-streaming test
|
|
146
|
+
const invokeNonStreamResult = await testModel(model, awsCreds, testMessage, false, false, "Invoke API");
|
|
147
|
+
if (invokeNonStreamResult.success) {
|
|
148
|
+
await logOutput(`✓ Invoke API Non-streaming: "${invokeNonStreamResult.response}"`, 'success');
|
|
149
|
+
} else {
|
|
150
|
+
await logOutput(`✗ Invoke API Non-streaming: ${invokeNonStreamResult.error}`, 'error');
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
await logOutput(`\n📡 Testing with Converse API:`, 'info');
|
|
154
|
+
|
|
155
|
+
// Converse API streaming test
|
|
156
|
+
const converseStreamResult = await testModel(model, awsCreds, testMessage, true, true, "Converse API");
|
|
157
|
+
if (converseStreamResult.success) {
|
|
158
|
+
await logOutput(`✓ Converse API Streaming: "${converseStreamResult.response}"`, 'success');
|
|
159
|
+
} else {
|
|
160
|
+
await logOutput(`✗ Converse API Streaming: ${converseStreamResult.error}`, 'error');
|
|
161
|
+
}
|
|
127
162
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
163
|
+
// Converse API non-streaming test
|
|
164
|
+
const converseNonStreamResult = await testModel(model, awsCreds, testMessage, false, true, "Converse API");
|
|
165
|
+
if (converseNonStreamResult.success) {
|
|
166
|
+
await logOutput(`✓ Converse API Non-streaming: "${converseNonStreamResult.response}"`, 'success');
|
|
167
|
+
} else {
|
|
168
|
+
await logOutput(`✗ Converse API Non-streaming: ${converseNonStreamResult.error}`, 'error');
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
// Compare results
|
|
172
|
+
const invokeSuccess = invokeStreamResult.success && invokeNonStreamResult.success;
|
|
173
|
+
const converseSuccess = converseStreamResult.success && converseNonStreamResult.success;
|
|
174
|
+
|
|
175
|
+
if (invokeSuccess && converseSuccess) {
|
|
176
|
+
await logOutput(`🔍 Both APIs successful for ${model}`, 'success');
|
|
177
|
+
} else if (invokeSuccess || converseSuccess) {
|
|
178
|
+
await logOutput(`⚠ Partial success for ${model}`, 'warning');
|
|
179
|
+
} else {
|
|
180
|
+
await logOutput(`❌ Both APIs failed for ${model}`, 'error');
|
|
181
|
+
}
|
|
182
|
+
|
|
132
183
|
} else {
|
|
133
|
-
|
|
184
|
+
// Test single API
|
|
185
|
+
const useConverseAPI = useConverseOnly;
|
|
186
|
+
const apiName = useConverseAPI ? "Converse API" : "Invoke API";
|
|
187
|
+
|
|
188
|
+
// Test streaming
|
|
189
|
+
const streamResult = await testModel(model, awsCreds, testMessage, true, useConverseAPI, apiName);
|
|
190
|
+
if (streamResult.success) {
|
|
191
|
+
await logOutput(`✓ ${apiName} Streaming: "${streamResult.response}"`, 'success');
|
|
192
|
+
} else {
|
|
193
|
+
await logOutput(`✗ ${apiName} Streaming: ${streamResult.error}`, 'error');
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
// Test non-streaming
|
|
197
|
+
const nonStreamResult = await testModel(model, awsCreds, testMessage, false, useConverseAPI, apiName);
|
|
198
|
+
if (nonStreamResult.success) {
|
|
199
|
+
await logOutput(`✓ ${apiName} Non-streaming: "${nonStreamResult.response}"`, 'success');
|
|
200
|
+
} else {
|
|
201
|
+
await logOutput(`✗ ${apiName} Non-streaming: ${nonStreamResult.error}`, 'error');
|
|
202
|
+
}
|
|
134
203
|
}
|
|
135
204
|
|
|
136
|
-
console.log('');
|
|
205
|
+
console.log('\n' + '-'.repeat(40));
|
|
137
206
|
}
|
|
138
207
|
|
|
139
|
-
await logOutput('
|
|
208
|
+
await logOutput('\nTesting complete! Check test-models-output.txt for full test results.', 'info', false);
|
|
140
209
|
}
|
|
141
210
|
|
|
211
|
+
// Add usage info
|
|
212
|
+
console.log('Model Test Usage:');
|
|
213
|
+
console.log(' npm run test # Test with Invoke API (default)');
|
|
214
|
+
console.log(' npm run test -- --converse # Test with Converse API only');
|
|
215
|
+
console.log(' npm run test -- --both # Test both APIs and compare');
|
|
216
|
+
console.log('\n');
|
|
217
|
+
|
|
142
218
|
main().catch(async (error) => {
|
|
143
219
|
await logOutput(`Fatal Error: ${error.message}`, 'error');
|
|
144
220
|
console.error(error);
|
package/test-stop-sequences.js
CHANGED
|
@@ -49,7 +49,7 @@ async function logOutput(message, type = 'info', writeToFile = true ) {
|
|
|
49
49
|
}
|
|
50
50
|
}
|
|
51
51
|
|
|
52
|
-
async function testStopSequence(model, awsCreds, testCase, isStreaming) {
|
|
52
|
+
async function testStopSequence(model, awsCreds, testCase, isStreaming, useConverseAPI, apiName) {
|
|
53
53
|
const messages = [{ role: "user", content: testCase.prompt }];
|
|
54
54
|
const openaiChatCompletionsCreateObject = {
|
|
55
55
|
messages,
|
|
@@ -65,11 +65,11 @@ async function testStopSequence(model, awsCreds, testCase, isStreaming) {
|
|
|
65
65
|
|
|
66
66
|
try {
|
|
67
67
|
if (isStreaming) {
|
|
68
|
-
for await (const chunk of bedrockWrapper(awsCreds, openaiChatCompletionsCreateObject, { logging: false })) {
|
|
68
|
+
for await (const chunk of bedrockWrapper(awsCreds, openaiChatCompletionsCreateObject, { logging: false, useConverseAPI })) {
|
|
69
69
|
completeResponse += chunk;
|
|
70
70
|
}
|
|
71
71
|
} else {
|
|
72
|
-
const response = await bedrockWrapper(awsCreds, openaiChatCompletionsCreateObject, { logging: false });
|
|
72
|
+
const response = await bedrockWrapper(awsCreds, openaiChatCompletionsCreateObject, { logging: false, useConverseAPI });
|
|
73
73
|
for await (const data of response) {
|
|
74
74
|
completeResponse += data;
|
|
75
75
|
}
|
|
@@ -159,10 +159,21 @@ const stopSequenceTestCases = [
|
|
|
159
159
|
];
|
|
160
160
|
|
|
161
161
|
async function main() {
|
|
162
|
+
// Check for command-line arguments
|
|
163
|
+
const args = process.argv.slice(2);
|
|
164
|
+
const testBothAPIs = args.includes('--both') || args.includes('--compare');
|
|
165
|
+
const useConverseOnly = args.includes('--converse');
|
|
166
|
+
|
|
167
|
+
// Determine test mode
|
|
168
|
+
let testMode = "Invoke API";
|
|
169
|
+
if (useConverseOnly) testMode = "Converse API";
|
|
170
|
+
if (testBothAPIs) testMode = "Both APIs (Comparison)";
|
|
171
|
+
|
|
162
172
|
// Clear output file and add header
|
|
163
173
|
const timestamp = new Date().toISOString();
|
|
164
174
|
await fs.writeFile('test-stop-sequences-output.txt',
|
|
165
|
-
`Stop Sequences Test Results - ${
|
|
175
|
+
`Stop Sequences Test Results - ${testMode}\n` +
|
|
176
|
+
`Test Date: ${timestamp}\n` +
|
|
166
177
|
`${'='.repeat(80)}\n\n` +
|
|
167
178
|
`This test validates that stop sequences work correctly across all models.\n` +
|
|
168
179
|
`Each model is tested with multiple stop sequence scenarios.\n\n`
|
|
@@ -174,7 +185,7 @@ async function main() {
|
|
|
174
185
|
});
|
|
175
186
|
|
|
176
187
|
console.clear();
|
|
177
|
-
await logOutput(`Starting stop sequences tests with ${availableModels.length} models...`, 'info');
|
|
188
|
+
await logOutput(`Starting stop sequences tests with ${availableModels.length} models using ${testMode}...`, 'info');
|
|
178
189
|
await logOutput(`Testing ${stopSequenceTestCases.length} different stop sequence scenarios\n`, 'info');
|
|
179
190
|
|
|
180
191
|
const awsCreds = {
|
|
@@ -193,6 +204,10 @@ async function main() {
|
|
|
193
204
|
"Claude-3-Haiku",
|
|
194
205
|
"Nova-Pro",
|
|
195
206
|
"Nova-Lite",
|
|
207
|
+
"GPT-OSS-120B",
|
|
208
|
+
"GPT-OSS-120B-Thinking",
|
|
209
|
+
"GPT-OSS-20B",
|
|
210
|
+
"GPT-OSS-20B-Thinking",
|
|
196
211
|
"Llama-3-3-70b",
|
|
197
212
|
"Mistral-7b"
|
|
198
213
|
].filter(m => availableModels.includes(m));
|
|
@@ -204,50 +219,133 @@ async function main() {
|
|
|
204
219
|
await logOutput(`Testing ${model}`, 'running');
|
|
205
220
|
await logOutput(`${'='.repeat(60)}`, 'info');
|
|
206
221
|
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
222
|
+
if (testBothAPIs) {
|
|
223
|
+
modelResults[model] = {
|
|
224
|
+
invoke: { streaming: { passed: 0, failed: 0 }, nonStreaming: { passed: 0, failed: 0 } },
|
|
225
|
+
converse: { streaming: { passed: 0, failed: 0 }, nonStreaming: { passed: 0, failed: 0 } }
|
|
226
|
+
};
|
|
227
|
+
} else {
|
|
228
|
+
modelResults[model] = {
|
|
229
|
+
streaming: { passed: 0, failed: 0 },
|
|
230
|
+
nonStreaming: { passed: 0, failed: 0 }
|
|
231
|
+
};
|
|
232
|
+
}
|
|
211
233
|
|
|
212
234
|
for (const testCase of stopSequenceTestCases) {
|
|
213
235
|
await logOutput(`\n▶ Test Case: ${testCase.name}`, 'info');
|
|
214
236
|
await logOutput(` Prompt: "${testCase.prompt.substring(0, 50)}..."`, 'info');
|
|
215
237
|
await logOutput(` Stop sequences: [${testCase.stopSequences.join(', ')}]`, 'info');
|
|
216
238
|
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
239
|
+
if (testBothAPIs) {
|
|
240
|
+
// Test both APIs and compare
|
|
241
|
+
await logOutput(`\n 📡 Testing with Invoke API:`, 'info');
|
|
242
|
+
|
|
243
|
+
// Invoke API streaming test
|
|
244
|
+
const invokeStreamResult = await testStopSequence(model, awsCreds, testCase, true, false, "Invoke API");
|
|
245
|
+
if (invokeStreamResult.success) {
|
|
246
|
+
if (invokeStreamResult.stoppedCorrectly) {
|
|
247
|
+
await logOutput(` ✓ Invoke Streaming: PASSED - ${invokeStreamResult.analysis}`, 'success');
|
|
248
|
+
modelResults[model].invoke.streaming.passed++;
|
|
249
|
+
} else {
|
|
250
|
+
await logOutput(` ✗ Invoke Streaming: FAILED - ${invokeStreamResult.analysis}`, 'warning');
|
|
251
|
+
modelResults[model].invoke.streaming.failed++;
|
|
252
|
+
}
|
|
253
|
+
await logOutput(` Response: "${invokeStreamResult.response.substring(0, 100)}..."`, 'info');
|
|
225
254
|
} else {
|
|
226
|
-
await logOutput(`
|
|
227
|
-
modelResults[model].streaming.failed++;
|
|
255
|
+
await logOutput(` ✗ Invoke Streaming: ERROR - ${invokeStreamResult.error}`, 'error');
|
|
256
|
+
modelResults[model].invoke.streaming.failed++;
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
// Invoke API non-streaming test
|
|
260
|
+
const invokeNonStreamResult = await testStopSequence(model, awsCreds, testCase, false, false, "Invoke API");
|
|
261
|
+
if (invokeNonStreamResult.success) {
|
|
262
|
+
if (invokeNonStreamResult.stoppedCorrectly) {
|
|
263
|
+
await logOutput(` ✓ Invoke Non-streaming: PASSED - ${invokeNonStreamResult.analysis}`, 'success');
|
|
264
|
+
modelResults[model].invoke.nonStreaming.passed++;
|
|
265
|
+
} else {
|
|
266
|
+
await logOutput(` ✗ Invoke Non-streaming: FAILED - ${invokeNonStreamResult.analysis}`, 'warning');
|
|
267
|
+
modelResults[model].invoke.nonStreaming.failed++;
|
|
268
|
+
}
|
|
269
|
+
await logOutput(` Response: "${invokeNonStreamResult.response.substring(0, 100)}..."`, 'info');
|
|
270
|
+
} else {
|
|
271
|
+
await logOutput(` ✗ Invoke Non-streaming: ERROR - ${invokeNonStreamResult.error}`, 'error');
|
|
272
|
+
modelResults[model].invoke.nonStreaming.failed++;
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
await logOutput(`\n 📡 Testing with Converse API:`, 'info');
|
|
276
|
+
|
|
277
|
+
// Converse API streaming test
|
|
278
|
+
const converseStreamResult = await testStopSequence(model, awsCreds, testCase, true, true, "Converse API");
|
|
279
|
+
if (converseStreamResult.success) {
|
|
280
|
+
if (converseStreamResult.stoppedCorrectly) {
|
|
281
|
+
await logOutput(` ✓ Converse Streaming: PASSED - ${converseStreamResult.analysis}`, 'success');
|
|
282
|
+
modelResults[model].converse.streaming.passed++;
|
|
283
|
+
} else {
|
|
284
|
+
await logOutput(` ✗ Converse Streaming: FAILED - ${converseStreamResult.analysis}`, 'warning');
|
|
285
|
+
modelResults[model].converse.streaming.failed++;
|
|
286
|
+
}
|
|
287
|
+
await logOutput(` Response: "${converseStreamResult.response.substring(0, 100)}..."`, 'info');
|
|
288
|
+
} else {
|
|
289
|
+
await logOutput(` ✗ Converse Streaming: ERROR - ${converseStreamResult.error}`, 'error');
|
|
290
|
+
modelResults[model].converse.streaming.failed++;
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
// Converse API non-streaming test
|
|
294
|
+
const converseNonStreamResult = await testStopSequence(model, awsCreds, testCase, false, true, "Converse API");
|
|
295
|
+
if (converseNonStreamResult.success) {
|
|
296
|
+
if (converseNonStreamResult.stoppedCorrectly) {
|
|
297
|
+
await logOutput(` ✓ Converse Non-streaming: PASSED - ${converseNonStreamResult.analysis}`, 'success');
|
|
298
|
+
modelResults[model].converse.nonStreaming.passed++;
|
|
299
|
+
} else {
|
|
300
|
+
await logOutput(` ✗ Converse Non-streaming: FAILED - ${converseNonStreamResult.analysis}`, 'warning');
|
|
301
|
+
modelResults[model].converse.nonStreaming.failed++;
|
|
302
|
+
}
|
|
303
|
+
await logOutput(` Response: "${converseNonStreamResult.response.substring(0, 100)}..."`, 'info');
|
|
304
|
+
} else {
|
|
305
|
+
await logOutput(` ✗ Converse Non-streaming: ERROR - ${converseNonStreamResult.error}`, 'error');
|
|
306
|
+
modelResults[model].converse.nonStreaming.failed++;
|
|
228
307
|
}
|
|
229
|
-
|
|
308
|
+
|
|
230
309
|
} else {
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
310
|
+
// Test single API
|
|
311
|
+
const useConverseAPI = useConverseOnly;
|
|
312
|
+
const apiName = useConverseAPI ? "Converse API" : "Invoke API";
|
|
313
|
+
|
|
314
|
+
// Test streaming
|
|
315
|
+
await logOutput(` Testing streaming with ${apiName}...`, 'info');
|
|
316
|
+
const streamResult = await testStopSequence(model, awsCreds, testCase, true, useConverseAPI, apiName);
|
|
317
|
+
|
|
318
|
+
if (streamResult.success) {
|
|
319
|
+
if (streamResult.stoppedCorrectly) {
|
|
320
|
+
await logOutput(` ✓ Streaming: PASSED - ${streamResult.analysis}`, 'success');
|
|
321
|
+
modelResults[model].streaming.passed++;
|
|
322
|
+
} else {
|
|
323
|
+
await logOutput(` ✗ Streaming: FAILED - ${streamResult.analysis}`, 'warning');
|
|
324
|
+
modelResults[model].streaming.failed++;
|
|
325
|
+
}
|
|
326
|
+
await logOutput(` Response: "${streamResult.response.substring(0, 100)}..."`, 'info');
|
|
327
|
+
} else {
|
|
328
|
+
await logOutput(` ✗ Streaming: ERROR - ${streamResult.error}`, 'error');
|
|
329
|
+
modelResults[model].streaming.failed++;
|
|
330
|
+
}
|
|
234
331
|
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
332
|
+
// Test non-streaming
|
|
333
|
+
await logOutput(` Testing non-streaming with ${apiName}...`, 'info');
|
|
334
|
+
const nonStreamResult = await testStopSequence(model, awsCreds, testCase, false, useConverseAPI, apiName);
|
|
335
|
+
|
|
336
|
+
if (nonStreamResult.success) {
|
|
337
|
+
if (nonStreamResult.stoppedCorrectly) {
|
|
338
|
+
await logOutput(` ✓ Non-streaming: PASSED - ${nonStreamResult.analysis}`, 'success');
|
|
339
|
+
modelResults[model].nonStreaming.passed++;
|
|
340
|
+
} else {
|
|
341
|
+
await logOutput(` ✗ Non-streaming: FAILED - ${nonStreamResult.analysis}`, 'warning');
|
|
342
|
+
modelResults[model].nonStreaming.failed++;
|
|
343
|
+
}
|
|
344
|
+
await logOutput(` Response: "${nonStreamResult.response.substring(0, 100)}..."`, 'info');
|
|
243
345
|
} else {
|
|
244
|
-
await logOutput(` ✗ Non-streaming:
|
|
346
|
+
await logOutput(` ✗ Non-streaming: ERROR - ${nonStreamResult.error}`, 'error');
|
|
245
347
|
modelResults[model].nonStreaming.failed++;
|
|
246
348
|
}
|
|
247
|
-
await logOutput(` Response: "${nonStreamResult.response.substring(0, 100)}..."`, 'info');
|
|
248
|
-
} else {
|
|
249
|
-
await logOutput(` ✗ Non-streaming: ERROR - ${nonStreamResult.error}`, 'error');
|
|
250
|
-
modelResults[model].nonStreaming.failed++;
|
|
251
349
|
}
|
|
252
350
|
}
|
|
253
351
|
}
|
|
@@ -258,19 +356,49 @@ async function main() {
|
|
|
258
356
|
await logOutput(`${'='.repeat(80)}\n`, 'info');
|
|
259
357
|
|
|
260
358
|
for (const [model, results] of Object.entries(modelResults)) {
|
|
261
|
-
const streamingRate = (results.streaming.passed / (results.streaming.passed + results.streaming.failed) * 100).toFixed(1);
|
|
262
|
-
const nonStreamingRate = (results.nonStreaming.passed / (results.nonStreaming.passed + results.nonStreaming.failed) * 100).toFixed(1);
|
|
263
|
-
|
|
264
359
|
await logOutput(`${model}:`, 'info');
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
360
|
+
|
|
361
|
+
if (testBothAPIs) {
|
|
362
|
+
// Both APIs summary
|
|
363
|
+
const invokeStreamingRate = (results.invoke.streaming.passed / (results.invoke.streaming.passed + results.invoke.streaming.failed) * 100).toFixed(1);
|
|
364
|
+
const invokeNonStreamingRate = (results.invoke.nonStreaming.passed / (results.invoke.nonStreaming.passed + results.invoke.nonStreaming.failed) * 100).toFixed(1);
|
|
365
|
+
const converseStreamingRate = (results.converse.streaming.passed / (results.converse.streaming.passed + results.converse.streaming.failed) * 100).toFixed(1);
|
|
366
|
+
const converseNonStreamingRate = (results.converse.nonStreaming.passed / (results.converse.nonStreaming.passed + results.converse.nonStreaming.failed) * 100).toFixed(1);
|
|
367
|
+
|
|
368
|
+
await logOutput(` Invoke API:`, 'info');
|
|
369
|
+
await logOutput(` Streaming: ${results.invoke.streaming.passed}/${results.invoke.streaming.passed + results.invoke.streaming.failed} passed (${invokeStreamingRate}%)`,
|
|
370
|
+
invokeStreamingRate > 80 ? 'success' : 'warning');
|
|
371
|
+
await logOutput(` Non-streaming: ${results.invoke.nonStreaming.passed}/${results.invoke.nonStreaming.passed + results.invoke.nonStreaming.failed} passed (${invokeNonStreamingRate}%)`,
|
|
372
|
+
invokeNonStreamingRate > 80 ? 'success' : 'warning');
|
|
373
|
+
|
|
374
|
+
await logOutput(` Converse API:`, 'info');
|
|
375
|
+
await logOutput(` Streaming: ${results.converse.streaming.passed}/${results.converse.streaming.passed + results.converse.streaming.failed} passed (${converseStreamingRate}%)`,
|
|
376
|
+
converseStreamingRate > 80 ? 'success' : 'warning');
|
|
377
|
+
await logOutput(` Non-streaming: ${results.converse.nonStreaming.passed}/${results.converse.nonStreaming.passed + results.converse.nonStreaming.failed} passed (${converseNonStreamingRate}%)`,
|
|
378
|
+
converseNonStreamingRate > 80 ? 'success' : 'warning');
|
|
379
|
+
|
|
380
|
+
} else {
|
|
381
|
+
// Single API summary
|
|
382
|
+
const streamingRate = (results.streaming.passed / (results.streaming.passed + results.streaming.failed) * 100).toFixed(1);
|
|
383
|
+
const nonStreamingRate = (results.nonStreaming.passed / (results.nonStreaming.passed + results.nonStreaming.failed) * 100).toFixed(1);
|
|
384
|
+
|
|
385
|
+
await logOutput(` Streaming: ${results.streaming.passed}/${results.streaming.passed + results.streaming.failed} passed (${streamingRate}%)`,
|
|
386
|
+
streamingRate > 80 ? 'success' : 'warning');
|
|
387
|
+
await logOutput(` Non-streaming: ${results.nonStreaming.passed}/${results.nonStreaming.passed + results.nonStreaming.failed} passed (${nonStreamingRate}%)`,
|
|
388
|
+
nonStreamingRate > 80 ? 'success' : 'warning');
|
|
389
|
+
}
|
|
269
390
|
}
|
|
270
391
|
|
|
271
392
|
await logOutput('\nTesting complete! Check test-stop-sequences-output.txt for full results.', 'info', false);
|
|
272
393
|
}
|
|
273
394
|
|
|
395
|
+
// Add usage info
|
|
396
|
+
console.log('Stop Sequences Test Usage:');
|
|
397
|
+
console.log(' npm run test-stop # Test with Invoke API (default)');
|
|
398
|
+
console.log(' npm run test-stop -- --converse # Test with Converse API only');
|
|
399
|
+
console.log(' npm run test-stop -- --both # Test both APIs and compare');
|
|
400
|
+
console.log('\n');
|
|
401
|
+
|
|
274
402
|
main().catch(async (error) => {
|
|
275
403
|
await logOutput(`Fatal Error: ${error.message}`, 'error');
|
|
276
404
|
console.error(error);
|
package/test-vision.js
CHANGED
|
@@ -37,7 +37,37 @@ async function logOutput(message, type = 'info', writeToFile = true) {
|
|
|
37
37
|
}
|
|
38
38
|
}
|
|
39
39
|
|
|
40
|
+
async function testVisionModel(model, messages, useConverseAPI, apiName) {
|
|
41
|
+
const openaiChatCompletionsCreateObject = {
|
|
42
|
+
messages,
|
|
43
|
+
model,
|
|
44
|
+
max_tokens: 1000,
|
|
45
|
+
stream: true,
|
|
46
|
+
temperature: 0.7
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
try {
|
|
50
|
+
console.log(`\nSending request to ${model} using ${apiName}...`);
|
|
51
|
+
|
|
52
|
+
let response = "";
|
|
53
|
+
for await (const chunk of bedrockWrapper(awsCreds, openaiChatCompletionsCreateObject, { logging: false, useConverseAPI })) {
|
|
54
|
+
response += chunk;
|
|
55
|
+
process.stdout.write(chunk);
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
return { success: true, response: response.trim() };
|
|
59
|
+
|
|
60
|
+
} catch (error) {
|
|
61
|
+
return { success: false, error: error.message };
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
|
|
40
65
|
async function testVisionCapabilities() {
|
|
66
|
+
// Check for command-line arguments
|
|
67
|
+
const args = process.argv.slice(2);
|
|
68
|
+
const testBothAPIs = args.includes('--both') || args.includes('--compare');
|
|
69
|
+
const useConverseOnly = args.includes('--converse');
|
|
70
|
+
|
|
41
71
|
// Read and convert image to base64
|
|
42
72
|
const imageBuffer = await fs.readFile('./test-image.jpg');
|
|
43
73
|
const base64Image = imageBuffer.toString('base64');
|
|
@@ -68,58 +98,88 @@ async function testVisionCapabilities() {
|
|
|
68
98
|
.filter(model => model.vision === true)
|
|
69
99
|
.map(model => model.modelName);
|
|
70
100
|
|
|
101
|
+
// Determine test mode
|
|
102
|
+
let testMode = "Invoke API";
|
|
103
|
+
if (useConverseOnly) testMode = "Converse API";
|
|
104
|
+
if (testBothAPIs) testMode = "Both APIs (Comparison)";
|
|
105
|
+
|
|
71
106
|
// Clear output file and add header
|
|
72
107
|
await fs.writeFile('test-vision-models-output.txt',
|
|
73
|
-
`Vision Test Results\n` +
|
|
108
|
+
`Vision Test Results - ${testMode}\n` +
|
|
74
109
|
`Test Question: "${testPrompt}"\n` +
|
|
75
110
|
`Test Date: ${new Date().toISOString()}\n` +
|
|
76
|
-
`${'='.repeat(
|
|
111
|
+
`${'='.repeat(60)}\n\n`
|
|
77
112
|
);
|
|
78
113
|
|
|
79
114
|
console.clear();
|
|
80
|
-
await logOutput(`Starting vision tests with ${visionModels.length} models...`, 'info');
|
|
115
|
+
await logOutput(`Starting vision tests with ${visionModels.length} models using ${testMode}...`, 'info');
|
|
81
116
|
await logOutput(`Testing image description capabilities\n`, 'info');
|
|
82
117
|
|
|
83
118
|
for (const model of visionModels) {
|
|
84
|
-
await logOutput(`\n${'-'.repeat(
|
|
119
|
+
await logOutput(`\n${'-'.repeat(60)}\nTesting ${model} ⇢`, 'running');
|
|
85
120
|
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
121
|
+
if (testBothAPIs) {
|
|
122
|
+
// Test both APIs and compare
|
|
123
|
+
await logOutput(`\n📡 Testing with Invoke API:`, 'info');
|
|
124
|
+
const invokeResult = await testVisionModel(model, messages, false, "Invoke API");
|
|
125
|
+
|
|
126
|
+
if (invokeResult.success) {
|
|
127
|
+
await logOutput(`✓ Invoke API: Success`, 'success');
|
|
128
|
+
await logOutput(`Response: ${invokeResult.response.substring(0, 150)}...\n`, 'info');
|
|
129
|
+
} else {
|
|
130
|
+
await logOutput(`✗ Invoke API: ${invokeResult.error}`, 'error');
|
|
131
|
+
}
|
|
96
132
|
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
133
|
+
await logOutput(`📡 Testing with Converse API:`, 'info');
|
|
134
|
+
const converseResult = await testVisionModel(model, messages, true, "Converse API");
|
|
135
|
+
|
|
136
|
+
if (converseResult.success) {
|
|
137
|
+
await logOutput(`✓ Converse API: Success`, 'success');
|
|
138
|
+
await logOutput(`Response: ${converseResult.response.substring(0, 150)}...\n`, 'info');
|
|
139
|
+
} else {
|
|
140
|
+
await logOutput(`✗ Converse API: ${converseResult.error}`, 'error');
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// Compare results
|
|
144
|
+
if (invokeResult.success && converseResult.success) {
|
|
145
|
+
await logOutput(`🔍 Both APIs successful for ${model}`, 'success');
|
|
146
|
+
} else if (invokeResult.success || converseResult.success) {
|
|
147
|
+
await logOutput(`⚠ Partial success for ${model}`, 'warning');
|
|
148
|
+
} else {
|
|
149
|
+
await logOutput(`❌ Both APIs failed for ${model}`, 'error');
|
|
101
150
|
}
|
|
102
151
|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
152
|
+
} else {
|
|
153
|
+
// Test single API
|
|
154
|
+
const useConverseAPI = useConverseOnly;
|
|
155
|
+
const apiName = useConverseAPI ? "Converse API" : "Invoke API";
|
|
106
156
|
|
|
107
|
-
|
|
108
|
-
const errorMessage = `Error with ${model}: ${error.message}`;
|
|
109
|
-
await logOutput(errorMessage, 'error');
|
|
157
|
+
const result = await testVisionModel(model, messages, useConverseAPI, apiName);
|
|
110
158
|
|
|
111
|
-
|
|
112
|
-
|
|
159
|
+
if (result.success) {
|
|
160
|
+
await logOutput(`\n✓ ${apiName}: Success`, 'success');
|
|
161
|
+
await logOutput(`Response: ${result.response}\n`, 'info', true);
|
|
162
|
+
} else {
|
|
163
|
+
await logOutput(`\n✗ ${apiName}: ${result.error}`, 'error');
|
|
164
|
+
|
|
165
|
+
// Log the full error details to file
|
|
113
166
|
await fs.appendFile('test-vision-models-output.txt',
|
|
114
|
-
`Error details: ${
|
|
167
|
+
`Error details: ${result.error}\n\n`
|
|
115
168
|
);
|
|
116
169
|
}
|
|
117
170
|
}
|
|
118
171
|
|
|
119
|
-
console.log("\n
|
|
172
|
+
console.log("\n" + "-".repeat(40));
|
|
120
173
|
}
|
|
121
174
|
|
|
122
175
|
await logOutput('\nVision testing complete! Check test-vision-models-output.txt for full results.', 'info', false);
|
|
123
176
|
}
|
|
124
177
|
|
|
178
|
+
// Add usage info
|
|
179
|
+
console.log('Vision Test Usage:');
|
|
180
|
+
console.log(' npm run test-vision # Test with Invoke API (default)');
|
|
181
|
+
console.log(' npm run test-vision -- --converse # Test with Converse API only');
|
|
182
|
+
console.log(' npm run test-vision -- --both # Test both APIs and compare');
|
|
183
|
+
console.log('\n');
|
|
184
|
+
|
|
125
185
|
testVisionCapabilities().catch(console.error);
|