bedrock-wrapper 2.4.2 → 2.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +50 -4
- package/README.md +19 -10
- package/bedrock-models.js +62 -14
- package/bedrock-wrapper.js +20 -1
- package/package.json +4 -3
- package/test-stop-sequences.js +277 -0
package/CHANGELOG.md
CHANGED
|
@@ -1,20 +1,66 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
All notable changes to this project will be documented in this file.
|
|
3
3
|
|
|
4
|
+
## [2.4.4] - 2025-08-05 (Claude 4.1 Opus)
|
|
5
|
+
### Added
|
|
6
|
+
- Support for Claude 4.1 Opus models
|
|
7
|
+
- Claude-4-1-Opus
|
|
8
|
+
- Claude-4-1-Opus-Thinking
|
|
9
|
+
|
|
10
|
+
## [2.4.3] - 2025-07-31 (Stop Sequences Fixes)
|
|
11
|
+
### Fixed
|
|
12
|
+
- **Critical Discovery**: Removed stop sequences support from Llama models
|
|
13
|
+
- AWS Bedrock does not support stop sequences for Llama models (confirmed via official AWS documentation)
|
|
14
|
+
- Llama models only support: `prompt`, `temperature`, `top_p`, `max_gen_len`, `images`
|
|
15
|
+
- This is an AWS Bedrock limitation, not a wrapper limitation
|
|
16
|
+
- Fixed Nova model configuration conflicts that were causing stop sequence inconsistencies
|
|
17
|
+
- Removed conflicting empty `inferenceConfig: {}` from Nova model configurations
|
|
18
|
+
- Improved error handling for empty responses when stop sequences trigger early
|
|
19
|
+
|
|
20
|
+
### Updated
|
|
21
|
+
- **Documentation corrections**
|
|
22
|
+
- Corrected stop sequences support claims (removed "all models support" language)
|
|
23
|
+
- Added accurate model-specific support matrix with sequence limits
|
|
24
|
+
- Added comprehensive stop sequences support table with AWS documentation references
|
|
25
|
+
- **Model Support Matrix** now clearly documented:
|
|
26
|
+
- ✅ Claude models: Full support (up to 8,191 sequences)
|
|
27
|
+
- ✅ Nova models: Full support (up to 4 sequences)
|
|
28
|
+
- ✅ Mistral models: Full support (up to 10 sequences)
|
|
29
|
+
- ❌ Llama models: Not supported (AWS Bedrock limitation)
|
|
30
|
+
|
|
31
|
+
### Technical Details
|
|
32
|
+
- Based on comprehensive research of official AWS Bedrock documentation
|
|
33
|
+
- All changes maintain full backward compatibility
|
|
34
|
+
- Test results show significant improvements in stop sequences reliability for supported models
|
|
35
|
+
- Added detailed explanations to help users understand AWS Bedrock's actual capabilities
|
|
36
|
+
|
|
4
37
|
## [2.4.2] - 2025-07-31 (Stop Sequences Support)
|
|
5
38
|
### Added
|
|
6
|
-
- Stop sequences support for
|
|
39
|
+
- Stop sequences support for compatible models
|
|
7
40
|
- OpenAI-compatible `stop` and `stop_sequences` parameters
|
|
8
41
|
- Automatic string-to-array conversion for compatibility
|
|
9
|
-
- Model-specific parameter mapping (stop_sequences for Claude, stopSequences for Nova, stop for
|
|
42
|
+
- Model-specific parameter mapping (stop_sequences for Claude, stopSequences for Nova, stop for Mistral)
|
|
10
43
|
- Enhanced request building logic to include stop sequences in appropriate API formats
|
|
11
|
-
- Comprehensive stop sequences testing and validation
|
|
44
|
+
- Comprehensive stop sequences testing and validation with `npm run test-stop`
|
|
45
|
+
|
|
46
|
+
### Fixed
|
|
47
|
+
- **Critical Discovery**: Removed stop sequences support from Llama models
|
|
48
|
+
- AWS Bedrock does not support stop sequences for Llama models (confirmed via official documentation)
|
|
49
|
+
- Llama models only support: `prompt`, `temperature`, `top_p`, `max_gen_len`, `images`
|
|
50
|
+
- This is an AWS Bedrock limitation, not a wrapper limitation
|
|
51
|
+
- Fixed Nova model configuration conflicts that were causing stop sequence inconsistencies
|
|
52
|
+
- Improved error handling for empty responses when stop sequences trigger early
|
|
12
53
|
|
|
13
54
|
### Technical Details
|
|
14
|
-
-
|
|
55
|
+
- **Model Support Matrix**:
|
|
56
|
+
- ✅ Claude models: Full support (up to 8,191 sequences)
|
|
57
|
+
- ✅ Nova models: Full support (up to 4 sequences)
|
|
58
|
+
- ✅ Mistral models: Full support (up to 10 sequences)
|
|
59
|
+
- ❌ Llama models: Not supported (AWS Bedrock limitation)
|
|
15
60
|
- Updated request construction for both messages API and prompt-based models
|
|
16
61
|
- Supports both single string and array formats for stop sequences
|
|
17
62
|
- Maintains full backward compatibility with existing API usage
|
|
63
|
+
- Added comprehensive documentation in README.md and CLAUDE.md explaining support limitations
|
|
18
64
|
|
|
19
65
|
## [2.4.0] - 2025-07-24 (AWS Nova Models)
|
|
20
66
|
### Added
|
package/README.md
CHANGED
|
@@ -104,19 +104,21 @@ Bedrock Wrapper is an npm package that simplifies the integration of existing Op
|
|
|
104
104
|
|
|
105
105
|
| modelName | AWS Model Id | Image |
|
|
106
106
|
|----------------------------|----------------------------------------------|-------|
|
|
107
|
-
| Claude-4-Opus
|
|
108
|
-
| Claude-4-Opus-Thinking
|
|
109
|
-
| Claude-4-
|
|
110
|
-
| Claude-4-
|
|
107
|
+
| Claude-4-1-Opus | us.anthropic.claude-opus-4-1-20250805-v1:0 | ✅ |
|
|
108
|
+
| Claude-4-1-Opus-Thinking | us.anthropic.claude-opus-4-1-20250805-v1:0 | ✅ |
|
|
109
|
+
| Claude-4-Opus | us.anthropic.claude-opus-4-20250514-v1:0 | ✅ |
|
|
110
|
+
| Claude-4-Opus-Thinking | us.anthropic.claude-opus-4-20250514-v1:0 | ✅ |
|
|
111
|
+
| Claude-4-Sonnet | us.anthropic.claude-sonnet-4-20250514-v1:0 | ✅ |
|
|
112
|
+
| Claude-4-Sonnet-Thinking | us.anthropic.claude-sonnet-4-20250514-v1:0 | ✅ |
|
|
111
113
|
| Claude-3-7-Sonnet-Thinking | us.anthropic.claude-3-7-sonnet-20250219-v1:0 | ✅ |
|
|
112
114
|
| Claude-3-7-Sonnet | us.anthropic.claude-3-7-sonnet-20250219-v1:0 | ✅ |
|
|
113
115
|
| Claude-3-5-Sonnet-v2 | anthropic.claude-3-5-sonnet-20241022-v2:0 | ✅ |
|
|
114
116
|
| Claude-3-5-Sonnet | anthropic.claude-3-5-sonnet-20240620-v1:0 | ✅ |
|
|
115
117
|
| Claude-3-5-Haiku | anthropic.claude-3-5-haiku-20241022-v1:0 | ❌ |
|
|
116
118
|
| Claude-3-Haiku | anthropic.claude-3-haiku-20240307-v1:0 | ✅ |
|
|
117
|
-
| Nova-Pro | us.amazon.nova-pro-v1:0
|
|
118
|
-
| Nova-Lite | us.amazon.nova-lite-v1:0
|
|
119
|
-
| Nova-Micro | us.amazon.nova-micro-v1:0
|
|
119
|
+
| Nova-Pro | us.amazon.nova-pro-v1:0 | ✅ |
|
|
120
|
+
| Nova-Lite | us.amazon.nova-lite-v1:0 | ✅ |
|
|
121
|
+
| Nova-Micro | us.amazon.nova-micro-v1:0 | ❌ |
|
|
120
122
|
| Llama-3-3-70b | us.meta.llama3-3-70b-instruct-v1:0 | ❌ |
|
|
121
123
|
| Llama-3-2-1b | us.meta.llama3-2-1b-instruct-v1:0 | ❌ |
|
|
122
124
|
| Llama-3-2-3b | us.meta.llama3-2-3b-instruct-v1:0 | ❌ |
|
|
@@ -192,7 +194,7 @@ You can include multiple images in a single message by adding more image_url obj
|
|
|
192
194
|
|
|
193
195
|
### Stop Sequences
|
|
194
196
|
|
|
195
|
-
|
|
197
|
+
Stop sequences are custom text sequences that cause the model to stop generating text. This is useful for controlling where the model stops its response.
|
|
196
198
|
|
|
197
199
|
```javascript
|
|
198
200
|
const openaiChatCompletionsCreateObject = {
|
|
@@ -205,11 +207,16 @@ const openaiChatCompletionsCreateObject = {
|
|
|
205
207
|
};
|
|
206
208
|
```
|
|
207
209
|
|
|
210
|
+
**Model Support:**
|
|
211
|
+
- ✅ **Claude models**: Fully supported (up to 8,191 sequences)
|
|
212
|
+
- ✅ **Nova models**: Fully supported (up to 4 sequences)
|
|
213
|
+
- ✅ **Mistral models**: Fully supported (up to 10 sequences)
|
|
214
|
+
- ❌ **Llama models**: Not supported (AWS Bedrock limitation)
|
|
215
|
+
|
|
208
216
|
**Features:**
|
|
209
217
|
- Compatible with OpenAI's `stop` parameter (single string or array)
|
|
210
218
|
- Also accepts `stop_sequences` parameter for explicit usage
|
|
211
219
|
- Automatic conversion between string and array formats
|
|
212
|
-
- Works with all 26+ supported models (Claude, Nova, Llama, Mistral)
|
|
213
220
|
- Model-specific parameter mapping handled automatically
|
|
214
221
|
|
|
215
222
|
**Example Usage:**
|
|
@@ -217,10 +224,12 @@ const openaiChatCompletionsCreateObject = {
|
|
|
217
224
|
// Stop generation when model tries to output "7"
|
|
218
225
|
const result = await bedrockWrapper(awsCreds, {
|
|
219
226
|
messages: [{ role: "user", content: "Count from 1 to 10" }],
|
|
220
|
-
model: "Claude-3-5-Sonnet",
|
|
227
|
+
model: "Claude-3-5-Sonnet", // Use Claude, Nova, or Mistral models
|
|
221
228
|
stop_sequences: ["7"]
|
|
222
229
|
});
|
|
223
230
|
// Response: "1, 2, 3, 4, 5, 6," (stops before "7")
|
|
231
|
+
|
|
232
|
+
// Note: Llama models will ignore stop sequences due to AWS Bedrock limitations
|
|
224
233
|
```
|
|
225
234
|
|
|
226
235
|
---
|
package/bedrock-models.js
CHANGED
|
@@ -6,6 +6,66 @@
|
|
|
6
6
|
// https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/cross-region-inference
|
|
7
7
|
|
|
8
8
|
export const bedrock_models = [
|
|
9
|
+
{
|
|
10
|
+
// =====================
|
|
11
|
+
// == Claude 4.1 Opus ==
|
|
12
|
+
// =====================
|
|
13
|
+
"modelName": "Claude-4-1-Opus",
|
|
14
|
+
// "modelId": "anthropic.claude-opus-4-1-20250805-v1:0",
|
|
15
|
+
"modelId": "us.anthropic.claude-opus-4-1-20250805-v1:0",
|
|
16
|
+
"vision": true,
|
|
17
|
+
"messages_api": true,
|
|
18
|
+
"system_as_separate_field": true,
|
|
19
|
+
"display_role_names": true,
|
|
20
|
+
"max_tokens_param_name": "max_tokens",
|
|
21
|
+
"max_supported_response_tokens": 131072,
|
|
22
|
+
"stop_sequences_param_name": "stop_sequences",
|
|
23
|
+
"response_chunk_element": "delta.text",
|
|
24
|
+
"response_nonchunk_element": "content[0].text",
|
|
25
|
+
"thinking_response_chunk_element": "delta.thinking",
|
|
26
|
+
"thinking_response_nonchunk_element": "content[0].thinking",
|
|
27
|
+
"special_request_schema": {
|
|
28
|
+
"anthropic_version": "bedrock-2023-05-31",
|
|
29
|
+
"anthropic_beta": ["output-128k-2025-02-19"],
|
|
30
|
+
},
|
|
31
|
+
"image_support": {
|
|
32
|
+
"max_image_size": 20971520, // 20MB
|
|
33
|
+
"supported_formats": ["jpeg", "png", "gif", "webp"],
|
|
34
|
+
"max_images_per_request": 10
|
|
35
|
+
}
|
|
36
|
+
},
|
|
37
|
+
{
|
|
38
|
+
// ==============================
|
|
39
|
+
// == Claude 4.1 Opus Thinking ==
|
|
40
|
+
// ==============================
|
|
41
|
+
"modelName": "Claude-4-1-Opus-Thinking",
|
|
42
|
+
// "modelId": "anthropic.claude-opus-4-1-20250805-v1:0",
|
|
43
|
+
"modelId": "us.anthropic.claude-opus-4-1-20250805-v1:0",
|
|
44
|
+
"vision": true,
|
|
45
|
+
"messages_api": true,
|
|
46
|
+
"system_as_separate_field": true,
|
|
47
|
+
"display_role_names": true,
|
|
48
|
+
"max_tokens_param_name": "max_tokens",
|
|
49
|
+
"max_supported_response_tokens": 131072,
|
|
50
|
+
"stop_sequences_param_name": "stop_sequences",
|
|
51
|
+
"response_chunk_element": "delta.text",
|
|
52
|
+
"response_nonchunk_element": "content[0].text",
|
|
53
|
+
"thinking_response_chunk_element": "delta.thinking",
|
|
54
|
+
"thinking_response_nonchunk_element": "content[0].thinking",
|
|
55
|
+
"special_request_schema": {
|
|
56
|
+
"anthropic_version": "bedrock-2023-05-31",
|
|
57
|
+
"anthropic_beta": ["output-128k-2025-02-19"],
|
|
58
|
+
"thinking": {
|
|
59
|
+
"type": "enabled",
|
|
60
|
+
"budget_tokens": 16000
|
|
61
|
+
},
|
|
62
|
+
},
|
|
63
|
+
"image_support": {
|
|
64
|
+
"max_image_size": 20971520, // 20MB
|
|
65
|
+
"supported_formats": ["jpeg", "png", "gif", "webp"],
|
|
66
|
+
"max_images_per_request": 10
|
|
67
|
+
}
|
|
68
|
+
},
|
|
9
69
|
{
|
|
10
70
|
// ====================
|
|
11
71
|
// == Claude 4 Opus ==
|
|
@@ -301,7 +361,6 @@ export const bedrock_models = [
|
|
|
301
361
|
"display_role_names": true,
|
|
302
362
|
"max_tokens_param_name": "max_gen_len",
|
|
303
363
|
"max_supported_response_tokens": 2048,
|
|
304
|
-
"stop_sequences_param_name": "stop",
|
|
305
364
|
"response_chunk_element": "generation"
|
|
306
365
|
},
|
|
307
366
|
{
|
|
@@ -330,7 +389,6 @@ export const bedrock_models = [
|
|
|
330
389
|
"display_role_names": true,
|
|
331
390
|
"max_tokens_param_name": "max_gen_len",
|
|
332
391
|
"max_supported_response_tokens": 2048,
|
|
333
|
-
"stop_sequences_param_name": "stop",
|
|
334
392
|
"response_chunk_element": "generation"
|
|
335
393
|
},
|
|
336
394
|
{
|
|
@@ -359,7 +417,6 @@ export const bedrock_models = [
|
|
|
359
417
|
"display_role_names": true,
|
|
360
418
|
"max_tokens_param_name": "max_gen_len",
|
|
361
419
|
"max_supported_response_tokens": 2048,
|
|
362
|
-
"stop_sequences_param_name": "stop",
|
|
363
420
|
"response_chunk_element": "generation"
|
|
364
421
|
},
|
|
365
422
|
{
|
|
@@ -388,7 +445,6 @@ export const bedrock_models = [
|
|
|
388
445
|
"display_role_names": true,
|
|
389
446
|
"max_tokens_param_name": "max_gen_len",
|
|
390
447
|
"max_supported_response_tokens": 2048,
|
|
391
|
-
"stop_sequences_param_name": "stop",
|
|
392
448
|
"response_chunk_element": "generation"
|
|
393
449
|
},
|
|
394
450
|
{
|
|
@@ -417,7 +473,6 @@ export const bedrock_models = [
|
|
|
417
473
|
"display_role_names": true,
|
|
418
474
|
"max_tokens_param_name": "max_gen_len",
|
|
419
475
|
"max_supported_response_tokens": 2048,
|
|
420
|
-
"stop_sequences_param_name": "stop",
|
|
421
476
|
"response_chunk_element": "generation"
|
|
422
477
|
},
|
|
423
478
|
{
|
|
@@ -445,7 +500,6 @@ export const bedrock_models = [
|
|
|
445
500
|
"display_role_names": true,
|
|
446
501
|
"max_tokens_param_name": "max_gen_len",
|
|
447
502
|
"max_supported_response_tokens": 2048,
|
|
448
|
-
"stop_sequences_param_name": "stop",
|
|
449
503
|
"response_chunk_element": "generation"
|
|
450
504
|
},
|
|
451
505
|
{
|
|
@@ -473,7 +527,6 @@ export const bedrock_models = [
|
|
|
473
527
|
"display_role_names": true,
|
|
474
528
|
"max_tokens_param_name": "max_gen_len",
|
|
475
529
|
"max_supported_response_tokens": 2048,
|
|
476
|
-
"stop_sequences_param_name": "stop",
|
|
477
530
|
"response_chunk_element": "generation"
|
|
478
531
|
},
|
|
479
532
|
{
|
|
@@ -501,7 +554,6 @@ export const bedrock_models = [
|
|
|
501
554
|
"display_role_names": true,
|
|
502
555
|
"max_tokens_param_name": "max_gen_len",
|
|
503
556
|
"max_supported_response_tokens": 2048,
|
|
504
|
-
"stop_sequences_param_name": "stop",
|
|
505
557
|
"response_chunk_element": "generation"
|
|
506
558
|
},
|
|
507
559
|
{
|
|
@@ -529,7 +581,6 @@ export const bedrock_models = [
|
|
|
529
581
|
"display_role_names": true,
|
|
530
582
|
"max_tokens_param_name": "max_gen_len",
|
|
531
583
|
"max_supported_response_tokens": 2048,
|
|
532
|
-
"stop_sequences_param_name": "stop",
|
|
533
584
|
"response_chunk_element": "generation"
|
|
534
585
|
},
|
|
535
586
|
{
|
|
@@ -557,7 +608,6 @@ export const bedrock_models = [
|
|
|
557
608
|
"display_role_names": true,
|
|
558
609
|
"max_tokens_param_name": "max_gen_len",
|
|
559
610
|
"max_supported_response_tokens": 2048,
|
|
560
|
-
"stop_sequences_param_name": "stop",
|
|
561
611
|
"response_chunk_element": "generation"
|
|
562
612
|
},
|
|
563
613
|
{
|
|
@@ -576,8 +626,7 @@ export const bedrock_models = [
|
|
|
576
626
|
"response_chunk_element": "contentBlockDelta.delta.text",
|
|
577
627
|
"response_nonchunk_element": "output.message.content[0].text",
|
|
578
628
|
"special_request_schema": {
|
|
579
|
-
"schemaVersion": "messages-v1"
|
|
580
|
-
"inferenceConfig": {}
|
|
629
|
+
"schemaVersion": "messages-v1"
|
|
581
630
|
},
|
|
582
631
|
"image_support": {
|
|
583
632
|
"max_image_size": 5242880, // 5MB per image
|
|
@@ -601,8 +650,7 @@ export const bedrock_models = [
|
|
|
601
650
|
"response_chunk_element": "contentBlockDelta.delta.text",
|
|
602
651
|
"response_nonchunk_element": "output.message.content[0].text",
|
|
603
652
|
"special_request_schema": {
|
|
604
|
-
"schemaVersion": "messages-v1"
|
|
605
|
-
"inferenceConfig": {}
|
|
653
|
+
"schemaVersion": "messages-v1"
|
|
606
654
|
},
|
|
607
655
|
"image_support": {
|
|
608
656
|
"max_image_size": 5242880, // 5MB per image
|
package/bedrock-wrapper.js
CHANGED
|
@@ -406,7 +406,23 @@ export async function* bedrockWrapper(awsCreds, openaiChatCompletionsCreateObjec
|
|
|
406
406
|
}
|
|
407
407
|
}
|
|
408
408
|
|
|
409
|
+
// Handle case where stop sequences cause empty content array
|
|
410
|
+
if (!text_result && decodedBodyResponse.stop_reason === "stop_sequence") {
|
|
411
|
+
// If stopped by sequence but no content, return empty string instead of undefined
|
|
412
|
+
text_result = "";
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
// Ensure text_result is a string to prevent 'undefined' from being part of the response
|
|
416
|
+
if (text_result === null || text_result === undefined) {
|
|
417
|
+
text_result = "";
|
|
418
|
+
}
|
|
419
|
+
|
|
409
420
|
let result = thinking_result ? `<think>${thinking_result}</think>\n\n${text_result}` : text_result;
|
|
421
|
+
|
|
422
|
+
// Ensure final result is a string, in case thinking_result was also empty
|
|
423
|
+
if (result === null || result === undefined) {
|
|
424
|
+
result = "";
|
|
425
|
+
}
|
|
410
426
|
yield result;
|
|
411
427
|
}
|
|
412
428
|
}
|
|
@@ -442,7 +458,10 @@ function findAwsModelWithId(model) {
|
|
|
442
458
|
export async function listBedrockWrapperSupportedModels() {
|
|
443
459
|
let supported_models = [];
|
|
444
460
|
for (let i = 0; i < bedrock_models.length; i++) {
|
|
445
|
-
supported_models.push(
|
|
461
|
+
supported_models.push(JSON.stringify({
|
|
462
|
+
modelName: bedrock_models[i].modelName,
|
|
463
|
+
modelId: bedrock_models[i].modelId
|
|
464
|
+
}));
|
|
446
465
|
}
|
|
447
466
|
return supported_models;
|
|
448
467
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "bedrock-wrapper",
|
|
3
|
-
"version": "2.4.
|
|
3
|
+
"version": "2.4.4",
|
|
4
4
|
"description": "🪨 Bedrock Wrapper is an npm package that simplifies the integration of existing OpenAI-compatible API objects with AWS Bedrock's serverless inference LLMs.",
|
|
5
5
|
"homepage": "https://www.equilllabs.com/projects/bedrock-wrapper",
|
|
6
6
|
"repository": {
|
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
"clean": "npx rimraf node_modules && npx rimraf package-lock.json && npm install",
|
|
16
16
|
"test": "node test-models.js",
|
|
17
17
|
"test-vision": "node test-vision.js",
|
|
18
|
+
"test-stop": "node test-stop-sequences.js",
|
|
18
19
|
"interactive": "node interactive-example.js"
|
|
19
20
|
},
|
|
20
21
|
"main": "bedrock-wrapper.js",
|
|
@@ -32,11 +33,11 @@
|
|
|
32
33
|
"author": "",
|
|
33
34
|
"license": "ISC",
|
|
34
35
|
"dependencies": {
|
|
35
|
-
"@aws-sdk/client-bedrock-runtime": "^3.
|
|
36
|
+
"@aws-sdk/client-bedrock-runtime": "^3.861.0",
|
|
36
37
|
"dotenv": "^17.2.1",
|
|
37
38
|
"sharp": "^0.34.3"
|
|
38
39
|
},
|
|
39
40
|
"devDependencies": {
|
|
40
|
-
"chalk": "^5.
|
|
41
|
+
"chalk": "^5.5.0"
|
|
41
42
|
}
|
|
42
43
|
}
|
|
@@ -0,0 +1,277 @@
|
|
|
1
|
+
// ================================================================================
|
|
2
|
+
// == AWS Bedrock Stop Sequences Test - Validates stop sequences implementation ==
|
|
3
|
+
// ================================================================================
|
|
4
|
+
|
|
5
|
+
// ---------------------------------------------------------------------
|
|
6
|
+
// -- import environment variables from .env file or define them here --
|
|
7
|
+
// ---------------------------------------------------------------------
|
|
8
|
+
import dotenv from 'dotenv';
|
|
9
|
+
import fs from 'fs/promises';
|
|
10
|
+
import chalk from 'chalk';
|
|
11
|
+
|
|
12
|
+
dotenv.config();
|
|
13
|
+
|
|
14
|
+
const AWS_REGION = process.env.AWS_REGION;
|
|
15
|
+
const AWS_ACCESS_KEY_ID = process.env.AWS_ACCESS_KEY_ID;
|
|
16
|
+
const AWS_SECRET_ACCESS_KEY = process.env.AWS_SECRET_ACCESS_KEY;
|
|
17
|
+
|
|
18
|
+
// --------------------------------------------
|
|
19
|
+
// -- import functions from bedrock-wrapper --
|
|
20
|
+
// --------------------------------------------
|
|
21
|
+
import {
|
|
22
|
+
bedrockWrapper,
|
|
23
|
+
listBedrockWrapperSupportedModels
|
|
24
|
+
} from "./bedrock-wrapper.js";
|
|
25
|
+
|
|
26
|
+
async function logOutput(message, type = 'info', writeToFile = true ) {
|
|
27
|
+
if (writeToFile) {
|
|
28
|
+
// Log to file
|
|
29
|
+
await fs.appendFile('test-stop-sequences-output.txt', message + '\n');
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// Log to console with colors
|
|
33
|
+
switch(type) {
|
|
34
|
+
case 'success':
|
|
35
|
+
console.log(chalk.green('✓ ' + message));
|
|
36
|
+
break;
|
|
37
|
+
case 'error':
|
|
38
|
+
console.log(chalk.red('✗ ' + message));
|
|
39
|
+
break;
|
|
40
|
+
case 'info':
|
|
41
|
+
console.log(chalk.blue('ℹ ' + message));
|
|
42
|
+
break;
|
|
43
|
+
case 'running':
|
|
44
|
+
console.log(chalk.yellow(message));
|
|
45
|
+
break;
|
|
46
|
+
case 'warning':
|
|
47
|
+
console.log(chalk.magenta('⚠ ' + message));
|
|
48
|
+
break;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
async function testStopSequence(model, awsCreds, testCase, isStreaming) {
|
|
53
|
+
const messages = [{ role: "user", content: testCase.prompt }];
|
|
54
|
+
const openaiChatCompletionsCreateObject = {
|
|
55
|
+
messages,
|
|
56
|
+
model,
|
|
57
|
+
max_tokens: 200,
|
|
58
|
+
stream: isStreaming,
|
|
59
|
+
temperature: 0.1,
|
|
60
|
+
top_p: 0.9,
|
|
61
|
+
stop_sequences: testCase.stopSequences
|
|
62
|
+
};
|
|
63
|
+
|
|
64
|
+
let completeResponse = "";
|
|
65
|
+
|
|
66
|
+
try {
|
|
67
|
+
if (isStreaming) {
|
|
68
|
+
for await (const chunk of bedrockWrapper(awsCreds, openaiChatCompletionsCreateObject, { logging: false })) {
|
|
69
|
+
completeResponse += chunk;
|
|
70
|
+
}
|
|
71
|
+
} else {
|
|
72
|
+
const response = await bedrockWrapper(awsCreds, openaiChatCompletionsCreateObject, { logging: false });
|
|
73
|
+
for await (const data of response) {
|
|
74
|
+
completeResponse += data;
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// Analyze if stop sequence worked
|
|
79
|
+
const result = {
|
|
80
|
+
success: true,
|
|
81
|
+
response: completeResponse.trim(),
|
|
82
|
+
stoppedCorrectly: false,
|
|
83
|
+
analysis: ""
|
|
84
|
+
};
|
|
85
|
+
|
|
86
|
+
// Use the expectedBehavior function to determine if stopping worked correctly
|
|
87
|
+
if (testCase.expectedBehavior) {
|
|
88
|
+
result.stoppedCorrectly = testCase.expectedBehavior(completeResponse);
|
|
89
|
+
result.analysis = result.stoppedCorrectly ?
|
|
90
|
+
"Response stopped at the correct point" :
|
|
91
|
+
"Response did not stop at the expected point";
|
|
92
|
+
} else {
|
|
93
|
+
// Generic check - if response is shorter than expected, it probably stopped
|
|
94
|
+
result.stoppedCorrectly = completeResponse.length < 100; // Assume short response means it stopped
|
|
95
|
+
result.analysis = result.stoppedCorrectly ?
|
|
96
|
+
"Response appears to have stopped early (good sign)" :
|
|
97
|
+
"Response seems to have continued beyond expected stop point";
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
return result;
|
|
101
|
+
} catch (error) {
|
|
102
|
+
return {
|
|
103
|
+
success: false,
|
|
104
|
+
error: error.message,
|
|
105
|
+
response: "",
|
|
106
|
+
stoppedCorrectly: false,
|
|
107
|
+
analysis: "Error occurred"
|
|
108
|
+
};
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// Test cases designed to validate stop sequences
|
|
113
|
+
const stopSequenceTestCases = [
|
|
114
|
+
{
|
|
115
|
+
name: "Number sequence test",
|
|
116
|
+
prompt: "Count from 1 to 10, separated by commas: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10",
|
|
117
|
+
stopSequences: ["7"],
|
|
118
|
+
expectedBehavior: (response) => {
|
|
119
|
+
// Should stop at or before 7, and definitely not continue to 8, 9, 10
|
|
120
|
+
return response.includes("6") && !response.includes("8") && !response.includes("9") && !response.includes("10");
|
|
121
|
+
}
|
|
122
|
+
},
|
|
123
|
+
{
|
|
124
|
+
name: "Word-based stop test",
|
|
125
|
+
prompt: "List the days of the week in order: Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday",
|
|
126
|
+
stopSequences: ["Friday"],
|
|
127
|
+
expectedBehavior: (response) => {
|
|
128
|
+
// Should stop at or before Friday, and not continue to Saturday/Sunday
|
|
129
|
+
return response.includes("Thursday") && !response.includes("Saturday") && !response.includes("Sunday");
|
|
130
|
+
}
|
|
131
|
+
},
|
|
132
|
+
{
|
|
133
|
+
name: "Multi-stop sequence test",
|
|
134
|
+
prompt: "Write the alphabet: A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U, V, W, X, Y, Z",
|
|
135
|
+
stopSequences: ["G", "H", "I"],
|
|
136
|
+
expectedBehavior: (response) => {
|
|
137
|
+
// Should stop at any of G, H, or I and not continue beyond
|
|
138
|
+
return response.includes("F") && !response.includes("J") && !response.includes("K") && !response.includes("L");
|
|
139
|
+
}
|
|
140
|
+
},
|
|
141
|
+
{
|
|
142
|
+
name: "Sentence completion test",
|
|
143
|
+
prompt: "Complete this story: Once upon a time, there was a brave knight who loved to explore. One day, he found a mysterious cave. Inside the cave, he discovered a magical sword. With the sword in hand, he continued deeper into the darkness.",
|
|
144
|
+
stopSequences: ["sword"],
|
|
145
|
+
expectedBehavior: (response) => {
|
|
146
|
+
// Should stop at or shortly after "sword" and not continue the full story
|
|
147
|
+
return response.includes("cave") && response.length < 200; // Shortened response
|
|
148
|
+
}
|
|
149
|
+
},
|
|
150
|
+
{
|
|
151
|
+
name: "Special character stop test",
|
|
152
|
+
prompt: "Generate a list with bullet points:\n• First item\n• Second item\n• Third item\n• Fourth item\n• Fifth item",
|
|
153
|
+
stopSequences: ["• Third"],
|
|
154
|
+
expectedBehavior: (response) => {
|
|
155
|
+
// Should stop at or before "• Third" and not continue to Fourth/Fifth
|
|
156
|
+
return response.includes("Second") && !response.includes("Fourth") && !response.includes("Fifth");
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
];
|
|
160
|
+
|
|
161
|
+
async function main() {
|
|
162
|
+
// Clear output file and add header
|
|
163
|
+
const timestamp = new Date().toISOString();
|
|
164
|
+
await fs.writeFile('test-stop-sequences-output.txt',
|
|
165
|
+
`Stop Sequences Test Results - ${timestamp}\n` +
|
|
166
|
+
`${'='.repeat(80)}\n\n` +
|
|
167
|
+
`This test validates that stop sequences work correctly across all models.\n` +
|
|
168
|
+
`Each model is tested with multiple stop sequence scenarios.\n\n`
|
|
169
|
+
);
|
|
170
|
+
|
|
171
|
+
const supportedModels = await listBedrockWrapperSupportedModels();
|
|
172
|
+
const availableModels = supportedModels.map(model => {
|
|
173
|
+
return JSON.parse(model).modelName;
|
|
174
|
+
});
|
|
175
|
+
|
|
176
|
+
console.clear();
|
|
177
|
+
await logOutput(`Starting stop sequences tests with ${availableModels.length} models...`, 'info');
|
|
178
|
+
await logOutput(`Testing ${stopSequenceTestCases.length} different stop sequence scenarios\n`, 'info');
|
|
179
|
+
|
|
180
|
+
const awsCreds = {
|
|
181
|
+
region: AWS_REGION,
|
|
182
|
+
accessKeyId: AWS_ACCESS_KEY_ID,
|
|
183
|
+
secretAccessKey: AWS_SECRET_ACCESS_KEY,
|
|
184
|
+
};
|
|
185
|
+
|
|
186
|
+
// Track overall results
|
|
187
|
+
const modelResults = {};
|
|
188
|
+
|
|
189
|
+
// Test a subset of models for efficiency (you can test all if needed)
|
|
190
|
+
const modelsToTest = [
|
|
191
|
+
"Claude-4-1-Opus",
|
|
192
|
+
"Claude-3-5-Sonnet-v2",
|
|
193
|
+
"Claude-3-Haiku",
|
|
194
|
+
"Nova-Pro",
|
|
195
|
+
"Nova-Lite",
|
|
196
|
+
"Llama-3-3-70b",
|
|
197
|
+
"Mistral-7b"
|
|
198
|
+
].filter(m => availableModels.includes(m));
|
|
199
|
+
|
|
200
|
+
await logOutput(`\nTesting ${modelsToTest.length} representative models...\n`, 'info');
|
|
201
|
+
|
|
202
|
+
for (const model of modelsToTest) {
|
|
203
|
+
await logOutput(`\n${'='.repeat(60)}`, 'info');
|
|
204
|
+
await logOutput(`Testing ${model}`, 'running');
|
|
205
|
+
await logOutput(`${'='.repeat(60)}`, 'info');
|
|
206
|
+
|
|
207
|
+
modelResults[model] = {
|
|
208
|
+
streaming: { passed: 0, failed: 0 },
|
|
209
|
+
nonStreaming: { passed: 0, failed: 0 }
|
|
210
|
+
};
|
|
211
|
+
|
|
212
|
+
for (const testCase of stopSequenceTestCases) {
|
|
213
|
+
await logOutput(`\n▶ Test Case: ${testCase.name}`, 'info');
|
|
214
|
+
await logOutput(` Prompt: "${testCase.prompt.substring(0, 50)}..."`, 'info');
|
|
215
|
+
await logOutput(` Stop sequences: [${testCase.stopSequences.join(', ')}]`, 'info');
|
|
216
|
+
|
|
217
|
+
// Test streaming
|
|
218
|
+
await logOutput(` Testing streaming...`, 'info');
|
|
219
|
+
const streamResult = await testStopSequence(model, awsCreds, testCase, true);
|
|
220
|
+
|
|
221
|
+
if (streamResult.success) {
|
|
222
|
+
if (streamResult.stoppedCorrectly) {
|
|
223
|
+
await logOutput(` ✓ Streaming: PASSED - ${streamResult.analysis}`, 'success');
|
|
224
|
+
modelResults[model].streaming.passed++;
|
|
225
|
+
} else {
|
|
226
|
+
await logOutput(` ✗ Streaming: FAILED - ${streamResult.analysis}`, 'warning');
|
|
227
|
+
modelResults[model].streaming.failed++;
|
|
228
|
+
}
|
|
229
|
+
await logOutput(` Response: "${streamResult.response.substring(0, 100)}..."`, 'info');
|
|
230
|
+
} else {
|
|
231
|
+
await logOutput(` ✗ Streaming: ERROR - ${streamResult.error}`, 'error');
|
|
232
|
+
modelResults[model].streaming.failed++;
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
// Test non-streaming
|
|
236
|
+
await logOutput(` Testing non-streaming...`, 'info');
|
|
237
|
+
const nonStreamResult = await testStopSequence(model, awsCreds, testCase, false);
|
|
238
|
+
|
|
239
|
+
if (nonStreamResult.success) {
|
|
240
|
+
if (nonStreamResult.stoppedCorrectly) {
|
|
241
|
+
await logOutput(` ✓ Non-streaming: PASSED - ${nonStreamResult.analysis}`, 'success');
|
|
242
|
+
modelResults[model].nonStreaming.passed++;
|
|
243
|
+
} else {
|
|
244
|
+
await logOutput(` ✗ Non-streaming: FAILED - ${nonStreamResult.analysis}`, 'warning');
|
|
245
|
+
modelResults[model].nonStreaming.failed++;
|
|
246
|
+
}
|
|
247
|
+
await logOutput(` Response: "${nonStreamResult.response.substring(0, 100)}..."`, 'info');
|
|
248
|
+
} else {
|
|
249
|
+
await logOutput(` ✗ Non-streaming: ERROR - ${nonStreamResult.error}`, 'error');
|
|
250
|
+
modelResults[model].nonStreaming.failed++;
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
// Summary
|
|
256
|
+
await logOutput(`\n\n${'='.repeat(80)}`, 'info');
|
|
257
|
+
await logOutput('SUMMARY', 'running');
|
|
258
|
+
await logOutput(`${'='.repeat(80)}\n`, 'info');
|
|
259
|
+
|
|
260
|
+
for (const [model, results] of Object.entries(modelResults)) {
|
|
261
|
+
const streamingRate = (results.streaming.passed / (results.streaming.passed + results.streaming.failed) * 100).toFixed(1);
|
|
262
|
+
const nonStreamingRate = (results.nonStreaming.passed / (results.nonStreaming.passed + results.nonStreaming.failed) * 100).toFixed(1);
|
|
263
|
+
|
|
264
|
+
await logOutput(`${model}:`, 'info');
|
|
265
|
+
await logOutput(` Streaming: ${results.streaming.passed}/${results.streaming.passed + results.streaming.failed} passed (${streamingRate}%)`,
|
|
266
|
+
streamingRate > 80 ? 'success' : 'warning');
|
|
267
|
+
await logOutput(` Non-streaming: ${results.nonStreaming.passed}/${results.nonStreaming.passed + results.nonStreaming.failed} passed (${nonStreamingRate}%)`,
|
|
268
|
+
nonStreamingRate > 80 ? 'success' : 'warning');
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
await logOutput('\nTesting complete! Check test-stop-sequences-output.txt for full results.', 'info', false);
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
main().catch(async (error) => {
|
|
275
|
+
await logOutput(`Fatal Error: ${error.message}`, 'error');
|
|
276
|
+
console.error(error);
|
|
277
|
+
});
|