bedrock-wrapper 2.4.2 → 2.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,20 +1,66 @@
1
1
  # Changelog
2
2
  All notable changes to this project will be documented in this file.
3
3
 
4
+ ## [2.4.4] - 2025-08-05 (Claude 4.1 Opus)
5
+ ### Added
6
+ - Support for Claude 4.1 Opus models
7
+ - Claude-4-1-Opus
8
+ - Claude-4-1-Opus-Thinking
9
+
10
+ ## [2.4.3] - 2025-07-31 (Stop Sequences Fixes)
11
+ ### Fixed
12
+ - **Critical Discovery**: Removed stop sequences support from Llama models
13
+ - AWS Bedrock does not support stop sequences for Llama models (confirmed via official AWS documentation)
14
+ - Llama models only support: `prompt`, `temperature`, `top_p`, `max_gen_len`, `images`
15
+ - This is an AWS Bedrock limitation, not a wrapper limitation
16
+ - Fixed Nova model configuration conflicts that were causing stop sequence inconsistencies
17
+ - Removed conflicting empty `inferenceConfig: {}` from Nova model configurations
18
+ - Improved error handling for empty responses when stop sequences trigger early
19
+
20
+ ### Updated
21
+ - **Documentation corrections**
22
+ - Corrected stop sequences support claims (removed "all models support" language)
23
+ - Added accurate model-specific support matrix with sequence limits
24
+ - Added comprehensive stop sequences support table with AWS documentation references
25
+ - **Model Support Matrix** now clearly documented:
26
+ - ✅ Claude models: Full support (up to 8,191 sequences)
27
+ - ✅ Nova models: Full support (up to 4 sequences)
28
+ - ✅ Mistral models: Full support (up to 10 sequences)
29
+ - ❌ Llama models: Not supported (AWS Bedrock limitation)
30
+
31
+ ### Technical Details
32
+ - Based on comprehensive research of official AWS Bedrock documentation
33
+ - All changes maintain full backward compatibility
34
+ - Test results show significant improvements in stop sequences reliability for supported models
35
+ - Added detailed explanations to help users understand AWS Bedrock's actual capabilities
36
+
4
37
  ## [2.4.2] - 2025-07-31 (Stop Sequences Support)
5
38
  ### Added
6
- - Stop sequences support for all models
39
+ - Stop sequences support for compatible models
7
40
  - OpenAI-compatible `stop` and `stop_sequences` parameters
8
41
  - Automatic string-to-array conversion for compatibility
9
- - Model-specific parameter mapping (stop_sequences for Claude, stopSequences for Nova, stop for Llama/Mistral)
42
+ - Model-specific parameter mapping (stop_sequences for Claude, stopSequences for Nova, stop for Mistral)
10
43
  - Enhanced request building logic to include stop sequences in appropriate API formats
11
- - Comprehensive stop sequences testing and validation
44
+ - Comprehensive stop sequences testing and validation with `npm run test-stop`
45
+
46
+ ### Fixed
47
+ - **Critical Discovery**: Removed stop sequences support from Llama models
48
+ - AWS Bedrock does not support stop sequences for Llama models (confirmed via official documentation)
49
+ - Llama models only support: `prompt`, `temperature`, `top_p`, `max_gen_len`, `images`
50
+ - This is an AWS Bedrock limitation, not a wrapper limitation
51
+ - Fixed Nova model configuration conflicts that were causing stop sequence inconsistencies
52
+ - Improved error handling for empty responses when stop sequences trigger early
12
53
 
13
54
  ### Technical Details
14
- - Added `stop_sequences_param_name` configuration to all 26+ model definitions
55
+ - **Model Support Matrix**:
56
+ - ✅ Claude models: Full support (up to 8,191 sequences)
57
+ - ✅ Nova models: Full support (up to 4 sequences)
58
+ - ✅ Mistral models: Full support (up to 10 sequences)
59
+ - ❌ Llama models: Not supported (AWS Bedrock limitation)
15
60
  - Updated request construction for both messages API and prompt-based models
16
61
  - Supports both single string and array formats for stop sequences
17
62
  - Maintains full backward compatibility with existing API usage
63
+ - Added comprehensive documentation in README.md and CLAUDE.md explaining support limitations
18
64
 
19
65
  ## [2.4.0] - 2025-07-24 (AWS Nova Models)
20
66
  ### Added
package/README.md CHANGED
@@ -104,19 +104,21 @@ Bedrock Wrapper is an npm package that simplifies the integration of existing Op
104
104
 
105
105
  | modelName | AWS Model Id | Image |
106
106
  |----------------------------|----------------------------------------------|-------|
107
- | Claude-4-Opus | us.anthropic.claude-opus-4-20250514-v1:0 | ✅ |
108
- | Claude-4-Opus-Thinking | us.anthropic.claude-opus-4-20250514-v1:0 | ✅ |
109
- | Claude-4-Sonnet | us.anthropic.claude-sonnet-4-20250514-v1:0 | ✅ |
110
- | Claude-4-Sonnet-Thinking | us.anthropic.claude-sonnet-4-20250514-v1:0 | ✅ |
107
+ | Claude-4-1-Opus | us.anthropic.claude-opus-4-1-20250805-v1:0 | ✅ |
108
+ | Claude-4-1-Opus-Thinking | us.anthropic.claude-opus-4-1-20250805-v1:0 | ✅ |
109
+ | Claude-4-Opus | us.anthropic.claude-opus-4-20250514-v1:0 | ✅ |
110
+ | Claude-4-Opus-Thinking | us.anthropic.claude-opus-4-20250514-v1:0 | ✅ |
111
+ | Claude-4-Sonnet | us.anthropic.claude-sonnet-4-20250514-v1:0 | ✅ |
112
+ | Claude-4-Sonnet-Thinking | us.anthropic.claude-sonnet-4-20250514-v1:0 | ✅ |
111
113
  | Claude-3-7-Sonnet-Thinking | us.anthropic.claude-3-7-sonnet-20250219-v1:0 | ✅ |
112
114
  | Claude-3-7-Sonnet | us.anthropic.claude-3-7-sonnet-20250219-v1:0 | ✅ |
113
115
  | Claude-3-5-Sonnet-v2 | anthropic.claude-3-5-sonnet-20241022-v2:0 | ✅ |
114
116
  | Claude-3-5-Sonnet | anthropic.claude-3-5-sonnet-20240620-v1:0 | ✅ |
115
117
  | Claude-3-5-Haiku | anthropic.claude-3-5-haiku-20241022-v1:0 | ❌ |
116
118
  | Claude-3-Haiku | anthropic.claude-3-haiku-20240307-v1:0 | ✅ |
117
- | Nova-Pro | us.amazon.nova-pro-v1:0 | ✅ |
118
- | Nova-Lite | us.amazon.nova-lite-v1:0 | ✅ |
119
- | Nova-Micro | us.amazon.nova-micro-v1:0 | ❌ |
119
+ | Nova-Pro | us.amazon.nova-pro-v1:0 | ✅ |
120
+ | Nova-Lite | us.amazon.nova-lite-v1:0 | ✅ |
121
+ | Nova-Micro | us.amazon.nova-micro-v1:0 | ❌ |
120
122
  | Llama-3-3-70b | us.meta.llama3-3-70b-instruct-v1:0 | ❌ |
121
123
  | Llama-3-2-1b | us.meta.llama3-2-1b-instruct-v1:0 | ❌ |
122
124
  | Llama-3-2-3b | us.meta.llama3-2-3b-instruct-v1:0 | ❌ |
@@ -192,7 +194,7 @@ You can include multiple images in a single message by adding more image_url obj
192
194
 
193
195
  ### Stop Sequences
194
196
 
195
- All models support stop sequences - custom text sequences that cause the model to stop generating. This is useful for controlling where the model stops its response.
197
+ Stop sequences are custom text sequences that cause the model to stop generating text. This is useful for controlling where the model stops its response.
196
198
 
197
199
  ```javascript
198
200
  const openaiChatCompletionsCreateObject = {
@@ -205,11 +207,16 @@ const openaiChatCompletionsCreateObject = {
205
207
  };
206
208
  ```
207
209
 
210
+ **Model Support:**
211
+ - ✅ **Claude models**: Fully supported (up to 8,191 sequences)
212
+ - ✅ **Nova models**: Fully supported (up to 4 sequences)
213
+ - ✅ **Mistral models**: Fully supported (up to 10 sequences)
214
+ - ❌ **Llama models**: Not supported (AWS Bedrock limitation)
215
+
208
216
  **Features:**
209
217
  - Compatible with OpenAI's `stop` parameter (single string or array)
210
218
  - Also accepts `stop_sequences` parameter for explicit usage
211
219
  - Automatic conversion between string and array formats
212
- - Works with all 26+ supported models (Claude, Nova, Llama, Mistral)
213
220
  - Model-specific parameter mapping handled automatically
214
221
 
215
222
  **Example Usage:**
@@ -217,10 +224,12 @@ const openaiChatCompletionsCreateObject = {
217
224
  // Stop generation when model tries to output "7"
218
225
  const result = await bedrockWrapper(awsCreds, {
219
226
  messages: [{ role: "user", content: "Count from 1 to 10" }],
220
- model: "Claude-3-5-Sonnet",
227
+ model: "Claude-3-5-Sonnet", // Use Claude, Nova, or Mistral models
221
228
  stop_sequences: ["7"]
222
229
  });
223
230
  // Response: "1, 2, 3, 4, 5, 6," (stops before "7")
231
+
232
+ // Note: Llama models will ignore stop sequences due to AWS Bedrock limitations
224
233
  ```
225
234
 
226
235
  ---
package/bedrock-models.js CHANGED
@@ -6,6 +6,66 @@
6
6
  // https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/cross-region-inference
7
7
 
8
8
  export const bedrock_models = [
9
+ {
10
+ // =====================
11
+ // == Claude 4.1 Opus ==
12
+ // =====================
13
+ "modelName": "Claude-4-1-Opus",
14
+ // "modelId": "anthropic.claude-opus-4-1-20250805-v1:0",
15
+ "modelId": "us.anthropic.claude-opus-4-1-20250805-v1:0",
16
+ "vision": true,
17
+ "messages_api": true,
18
+ "system_as_separate_field": true,
19
+ "display_role_names": true,
20
+ "max_tokens_param_name": "max_tokens",
21
+ "max_supported_response_tokens": 131072,
22
+ "stop_sequences_param_name": "stop_sequences",
23
+ "response_chunk_element": "delta.text",
24
+ "response_nonchunk_element": "content[0].text",
25
+ "thinking_response_chunk_element": "delta.thinking",
26
+ "thinking_response_nonchunk_element": "content[0].thinking",
27
+ "special_request_schema": {
28
+ "anthropic_version": "bedrock-2023-05-31",
29
+ "anthropic_beta": ["output-128k-2025-02-19"],
30
+ },
31
+ "image_support": {
32
+ "max_image_size": 20971520, // 20MB
33
+ "supported_formats": ["jpeg", "png", "gif", "webp"],
34
+ "max_images_per_request": 10
35
+ }
36
+ },
37
+ {
38
+ // ==============================
39
+ // == Claude 4.1 Opus Thinking ==
40
+ // ==============================
41
+ "modelName": "Claude-4-1-Opus-Thinking",
42
+ // "modelId": "anthropic.claude-opus-4-1-20250805-v1:0",
43
+ "modelId": "us.anthropic.claude-opus-4-1-20250805-v1:0",
44
+ "vision": true,
45
+ "messages_api": true,
46
+ "system_as_separate_field": true,
47
+ "display_role_names": true,
48
+ "max_tokens_param_name": "max_tokens",
49
+ "max_supported_response_tokens": 131072,
50
+ "stop_sequences_param_name": "stop_sequences",
51
+ "response_chunk_element": "delta.text",
52
+ "response_nonchunk_element": "content[0].text",
53
+ "thinking_response_chunk_element": "delta.thinking",
54
+ "thinking_response_nonchunk_element": "content[0].thinking",
55
+ "special_request_schema": {
56
+ "anthropic_version": "bedrock-2023-05-31",
57
+ "anthropic_beta": ["output-128k-2025-02-19"],
58
+ "thinking": {
59
+ "type": "enabled",
60
+ "budget_tokens": 16000
61
+ },
62
+ },
63
+ "image_support": {
64
+ "max_image_size": 20971520, // 20MB
65
+ "supported_formats": ["jpeg", "png", "gif", "webp"],
66
+ "max_images_per_request": 10
67
+ }
68
+ },
9
69
  {
10
70
  // ====================
11
71
  // == Claude 4 Opus ==
@@ -301,7 +361,6 @@ export const bedrock_models = [
301
361
  "display_role_names": true,
302
362
  "max_tokens_param_name": "max_gen_len",
303
363
  "max_supported_response_tokens": 2048,
304
- "stop_sequences_param_name": "stop",
305
364
  "response_chunk_element": "generation"
306
365
  },
307
366
  {
@@ -330,7 +389,6 @@ export const bedrock_models = [
330
389
  "display_role_names": true,
331
390
  "max_tokens_param_name": "max_gen_len",
332
391
  "max_supported_response_tokens": 2048,
333
- "stop_sequences_param_name": "stop",
334
392
  "response_chunk_element": "generation"
335
393
  },
336
394
  {
@@ -359,7 +417,6 @@ export const bedrock_models = [
359
417
  "display_role_names": true,
360
418
  "max_tokens_param_name": "max_gen_len",
361
419
  "max_supported_response_tokens": 2048,
362
- "stop_sequences_param_name": "stop",
363
420
  "response_chunk_element": "generation"
364
421
  },
365
422
  {
@@ -388,7 +445,6 @@ export const bedrock_models = [
388
445
  "display_role_names": true,
389
446
  "max_tokens_param_name": "max_gen_len",
390
447
  "max_supported_response_tokens": 2048,
391
- "stop_sequences_param_name": "stop",
392
448
  "response_chunk_element": "generation"
393
449
  },
394
450
  {
@@ -417,7 +473,6 @@ export const bedrock_models = [
417
473
  "display_role_names": true,
418
474
  "max_tokens_param_name": "max_gen_len",
419
475
  "max_supported_response_tokens": 2048,
420
- "stop_sequences_param_name": "stop",
421
476
  "response_chunk_element": "generation"
422
477
  },
423
478
  {
@@ -445,7 +500,6 @@ export const bedrock_models = [
445
500
  "display_role_names": true,
446
501
  "max_tokens_param_name": "max_gen_len",
447
502
  "max_supported_response_tokens": 2048,
448
- "stop_sequences_param_name": "stop",
449
503
  "response_chunk_element": "generation"
450
504
  },
451
505
  {
@@ -473,7 +527,6 @@ export const bedrock_models = [
473
527
  "display_role_names": true,
474
528
  "max_tokens_param_name": "max_gen_len",
475
529
  "max_supported_response_tokens": 2048,
476
- "stop_sequences_param_name": "stop",
477
530
  "response_chunk_element": "generation"
478
531
  },
479
532
  {
@@ -501,7 +554,6 @@ export const bedrock_models = [
501
554
  "display_role_names": true,
502
555
  "max_tokens_param_name": "max_gen_len",
503
556
  "max_supported_response_tokens": 2048,
504
- "stop_sequences_param_name": "stop",
505
557
  "response_chunk_element": "generation"
506
558
  },
507
559
  {
@@ -529,7 +581,6 @@ export const bedrock_models = [
529
581
  "display_role_names": true,
530
582
  "max_tokens_param_name": "max_gen_len",
531
583
  "max_supported_response_tokens": 2048,
532
- "stop_sequences_param_name": "stop",
533
584
  "response_chunk_element": "generation"
534
585
  },
535
586
  {
@@ -557,7 +608,6 @@ export const bedrock_models = [
557
608
  "display_role_names": true,
558
609
  "max_tokens_param_name": "max_gen_len",
559
610
  "max_supported_response_tokens": 2048,
560
- "stop_sequences_param_name": "stop",
561
611
  "response_chunk_element": "generation"
562
612
  },
563
613
  {
@@ -576,8 +626,7 @@ export const bedrock_models = [
576
626
  "response_chunk_element": "contentBlockDelta.delta.text",
577
627
  "response_nonchunk_element": "output.message.content[0].text",
578
628
  "special_request_schema": {
579
- "schemaVersion": "messages-v1",
580
- "inferenceConfig": {}
629
+ "schemaVersion": "messages-v1"
581
630
  },
582
631
  "image_support": {
583
632
  "max_image_size": 5242880, // 5MB per image
@@ -601,8 +650,7 @@ export const bedrock_models = [
601
650
  "response_chunk_element": "contentBlockDelta.delta.text",
602
651
  "response_nonchunk_element": "output.message.content[0].text",
603
652
  "special_request_schema": {
604
- "schemaVersion": "messages-v1",
605
- "inferenceConfig": {}
653
+ "schemaVersion": "messages-v1"
606
654
  },
607
655
  "image_support": {
608
656
  "max_image_size": 5242880, // 5MB per image
@@ -406,7 +406,23 @@ export async function* bedrockWrapper(awsCreds, openaiChatCompletionsCreateObjec
406
406
  }
407
407
  }
408
408
 
409
+ // Handle case where stop sequences cause empty content array
410
+ if (!text_result && decodedBodyResponse.stop_reason === "stop_sequence") {
411
+ // If stopped by sequence but no content, return empty string instead of undefined
412
+ text_result = "";
413
+ }
414
+
415
+ // Ensure text_result is a string to prevent 'undefined' from being part of the response
416
+ if (text_result === null || text_result === undefined) {
417
+ text_result = "";
418
+ }
419
+
409
420
  let result = thinking_result ? `<think>${thinking_result}</think>\n\n${text_result}` : text_result;
421
+
422
+ // Ensure final result is a string, in case thinking_result was also empty
423
+ if (result === null || result === undefined) {
424
+ result = "";
425
+ }
410
426
  yield result;
411
427
  }
412
428
  }
@@ -442,7 +458,10 @@ function findAwsModelWithId(model) {
442
458
  export async function listBedrockWrapperSupportedModels() {
443
459
  let supported_models = [];
444
460
  for (let i = 0; i < bedrock_models.length; i++) {
445
- supported_models.push(`{"modelName": ${bedrock_models[i].modelName}, "modelId": ${bedrock_models[i].modelId}}`);
461
+ supported_models.push(JSON.stringify({
462
+ modelName: bedrock_models[i].modelName,
463
+ modelId: bedrock_models[i].modelId
464
+ }));
446
465
  }
447
466
  return supported_models;
448
467
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "bedrock-wrapper",
3
- "version": "2.4.2",
3
+ "version": "2.4.4",
4
4
  "description": "🪨 Bedrock Wrapper is an npm package that simplifies the integration of existing OpenAI-compatible API objects with AWS Bedrock's serverless inference LLMs.",
5
5
  "homepage": "https://www.equilllabs.com/projects/bedrock-wrapper",
6
6
  "repository": {
@@ -15,6 +15,7 @@
15
15
  "clean": "npx rimraf node_modules && npx rimraf package-lock.json && npm install",
16
16
  "test": "node test-models.js",
17
17
  "test-vision": "node test-vision.js",
18
+ "test-stop": "node test-stop-sequences.js",
18
19
  "interactive": "node interactive-example.js"
19
20
  },
20
21
  "main": "bedrock-wrapper.js",
@@ -32,11 +33,11 @@
32
33
  "author": "",
33
34
  "license": "ISC",
34
35
  "dependencies": {
35
- "@aws-sdk/client-bedrock-runtime": "^3.857.0",
36
+ "@aws-sdk/client-bedrock-runtime": "^3.861.0",
36
37
  "dotenv": "^17.2.1",
37
38
  "sharp": "^0.34.3"
38
39
  },
39
40
  "devDependencies": {
40
- "chalk": "^5.4.1"
41
+ "chalk": "^5.5.0"
41
42
  }
42
43
  }
@@ -0,0 +1,277 @@
1
+ // ================================================================================
2
+ // == AWS Bedrock Stop Sequences Test - Validates stop sequences implementation ==
3
+ // ================================================================================
4
+
5
+ // ---------------------------------------------------------------------
6
+ // -- import environment variables from .env file or define them here --
7
+ // ---------------------------------------------------------------------
8
+ import dotenv from 'dotenv';
9
+ import fs from 'fs/promises';
10
+ import chalk from 'chalk';
11
+
12
+ dotenv.config();
13
+
14
+ const AWS_REGION = process.env.AWS_REGION;
15
+ const AWS_ACCESS_KEY_ID = process.env.AWS_ACCESS_KEY_ID;
16
+ const AWS_SECRET_ACCESS_KEY = process.env.AWS_SECRET_ACCESS_KEY;
17
+
18
+ // --------------------------------------------
19
+ // -- import functions from bedrock-wrapper --
20
+ // --------------------------------------------
21
+ import {
22
+ bedrockWrapper,
23
+ listBedrockWrapperSupportedModels
24
+ } from "./bedrock-wrapper.js";
25
+
26
+ async function logOutput(message, type = 'info', writeToFile = true ) {
27
+ if (writeToFile) {
28
+ // Log to file
29
+ await fs.appendFile('test-stop-sequences-output.txt', message + '\n');
30
+ }
31
+
32
+ // Log to console with colors
33
+ switch(type) {
34
+ case 'success':
35
+ console.log(chalk.green('✓ ' + message));
36
+ break;
37
+ case 'error':
38
+ console.log(chalk.red('✗ ' + message));
39
+ break;
40
+ case 'info':
41
+ console.log(chalk.blue('ℹ ' + message));
42
+ break;
43
+ case 'running':
44
+ console.log(chalk.yellow(message));
45
+ break;
46
+ case 'warning':
47
+ console.log(chalk.magenta('⚠ ' + message));
48
+ break;
49
+ }
50
+ }
51
+
52
+ async function testStopSequence(model, awsCreds, testCase, isStreaming) {
53
+ const messages = [{ role: "user", content: testCase.prompt }];
54
+ const openaiChatCompletionsCreateObject = {
55
+ messages,
56
+ model,
57
+ max_tokens: 200,
58
+ stream: isStreaming,
59
+ temperature: 0.1,
60
+ top_p: 0.9,
61
+ stop_sequences: testCase.stopSequences
62
+ };
63
+
64
+ let completeResponse = "";
65
+
66
+ try {
67
+ if (isStreaming) {
68
+ for await (const chunk of bedrockWrapper(awsCreds, openaiChatCompletionsCreateObject, { logging: false })) {
69
+ completeResponse += chunk;
70
+ }
71
+ } else {
72
+ const response = await bedrockWrapper(awsCreds, openaiChatCompletionsCreateObject, { logging: false });
73
+ for await (const data of response) {
74
+ completeResponse += data;
75
+ }
76
+ }
77
+
78
+ // Analyze if stop sequence worked
79
+ const result = {
80
+ success: true,
81
+ response: completeResponse.trim(),
82
+ stoppedCorrectly: false,
83
+ analysis: ""
84
+ };
85
+
86
+ // Use the expectedBehavior function to determine if stopping worked correctly
87
+ if (testCase.expectedBehavior) {
88
+ result.stoppedCorrectly = testCase.expectedBehavior(completeResponse);
89
+ result.analysis = result.stoppedCorrectly ?
90
+ "Response stopped at the correct point" :
91
+ "Response did not stop at the expected point";
92
+ } else {
93
+ // Generic check - if response is shorter than expected, it probably stopped
94
+ result.stoppedCorrectly = completeResponse.length < 100; // Assume short response means it stopped
95
+ result.analysis = result.stoppedCorrectly ?
96
+ "Response appears to have stopped early (good sign)" :
97
+ "Response seems to have continued beyond expected stop point";
98
+ }
99
+
100
+ return result;
101
+ } catch (error) {
102
+ return {
103
+ success: false,
104
+ error: error.message,
105
+ response: "",
106
+ stoppedCorrectly: false,
107
+ analysis: "Error occurred"
108
+ };
109
+ }
110
+ }
111
+
112
+ // Test cases designed to validate stop sequences
113
+ const stopSequenceTestCases = [
114
+ {
115
+ name: "Number sequence test",
116
+ prompt: "Count from 1 to 10, separated by commas: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10",
117
+ stopSequences: ["7"],
118
+ expectedBehavior: (response) => {
119
+ // Should stop at or before 7, and definitely not continue to 8, 9, 10
120
+ return response.includes("6") && !response.includes("8") && !response.includes("9") && !response.includes("10");
121
+ }
122
+ },
123
+ {
124
+ name: "Word-based stop test",
125
+ prompt: "List the days of the week in order: Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday",
126
+ stopSequences: ["Friday"],
127
+ expectedBehavior: (response) => {
128
+ // Should stop at or before Friday, and not continue to Saturday/Sunday
129
+ return response.includes("Thursday") && !response.includes("Saturday") && !response.includes("Sunday");
130
+ }
131
+ },
132
+ {
133
+ name: "Multi-stop sequence test",
134
+ prompt: "Write the alphabet: A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U, V, W, X, Y, Z",
135
+ stopSequences: ["G", "H", "I"],
136
+ expectedBehavior: (response) => {
137
+ // Should stop at any of G, H, or I and not continue beyond
138
+ return response.includes("F") && !response.includes("J") && !response.includes("K") && !response.includes("L");
139
+ }
140
+ },
141
+ {
142
+ name: "Sentence completion test",
143
+ prompt: "Complete this story: Once upon a time, there was a brave knight who loved to explore. One day, he found a mysterious cave. Inside the cave, he discovered a magical sword. With the sword in hand, he continued deeper into the darkness.",
144
+ stopSequences: ["sword"],
145
+ expectedBehavior: (response) => {
146
+ // Should stop at or shortly after "sword" and not continue the full story
147
+ return response.includes("cave") && response.length < 200; // Shortened response
148
+ }
149
+ },
150
+ {
151
+ name: "Special character stop test",
152
+ prompt: "Generate a list with bullet points:\n• First item\n• Second item\n• Third item\n• Fourth item\n• Fifth item",
153
+ stopSequences: ["• Third"],
154
+ expectedBehavior: (response) => {
155
+ // Should stop at or before "• Third" and not continue to Fourth/Fifth
156
+ return response.includes("Second") && !response.includes("Fourth") && !response.includes("Fifth");
157
+ }
158
+ }
159
+ ];
160
+
161
+ async function main() {
162
+ // Clear output file and add header
163
+ const timestamp = new Date().toISOString();
164
+ await fs.writeFile('test-stop-sequences-output.txt',
165
+ `Stop Sequences Test Results - ${timestamp}\n` +
166
+ `${'='.repeat(80)}\n\n` +
167
+ `This test validates that stop sequences work correctly across all models.\n` +
168
+ `Each model is tested with multiple stop sequence scenarios.\n\n`
169
+ );
170
+
171
+ const supportedModels = await listBedrockWrapperSupportedModels();
172
+ const availableModels = supportedModels.map(model => {
173
+ return JSON.parse(model).modelName;
174
+ });
175
+
176
+ console.clear();
177
+ await logOutput(`Starting stop sequences tests with ${availableModels.length} models...`, 'info');
178
+ await logOutput(`Testing ${stopSequenceTestCases.length} different stop sequence scenarios\n`, 'info');
179
+
180
+ const awsCreds = {
181
+ region: AWS_REGION,
182
+ accessKeyId: AWS_ACCESS_KEY_ID,
183
+ secretAccessKey: AWS_SECRET_ACCESS_KEY,
184
+ };
185
+
186
+ // Track overall results
187
+ const modelResults = {};
188
+
189
+ // Test a subset of models for efficiency (you can test all if needed)
190
+ const modelsToTest = [
191
+ "Claude-4-1-Opus",
192
+ "Claude-3-5-Sonnet-v2",
193
+ "Claude-3-Haiku",
194
+ "Nova-Pro",
195
+ "Nova-Lite",
196
+ "Llama-3-3-70b",
197
+ "Mistral-7b"
198
+ ].filter(m => availableModels.includes(m));
199
+
200
+ await logOutput(`\nTesting ${modelsToTest.length} representative models...\n`, 'info');
201
+
202
+ for (const model of modelsToTest) {
203
+ await logOutput(`\n${'='.repeat(60)}`, 'info');
204
+ await logOutput(`Testing ${model}`, 'running');
205
+ await logOutput(`${'='.repeat(60)}`, 'info');
206
+
207
+ modelResults[model] = {
208
+ streaming: { passed: 0, failed: 0 },
209
+ nonStreaming: { passed: 0, failed: 0 }
210
+ };
211
+
212
+ for (const testCase of stopSequenceTestCases) {
213
+ await logOutput(`\n▶ Test Case: ${testCase.name}`, 'info');
214
+ await logOutput(` Prompt: "${testCase.prompt.substring(0, 50)}..."`, 'info');
215
+ await logOutput(` Stop sequences: [${testCase.stopSequences.join(', ')}]`, 'info');
216
+
217
+ // Test streaming
218
+ await logOutput(` Testing streaming...`, 'info');
219
+ const streamResult = await testStopSequence(model, awsCreds, testCase, true);
220
+
221
+ if (streamResult.success) {
222
+ if (streamResult.stoppedCorrectly) {
223
+ await logOutput(` ✓ Streaming: PASSED - ${streamResult.analysis}`, 'success');
224
+ modelResults[model].streaming.passed++;
225
+ } else {
226
+ await logOutput(` ✗ Streaming: FAILED - ${streamResult.analysis}`, 'warning');
227
+ modelResults[model].streaming.failed++;
228
+ }
229
+ await logOutput(` Response: "${streamResult.response.substring(0, 100)}..."`, 'info');
230
+ } else {
231
+ await logOutput(` ✗ Streaming: ERROR - ${streamResult.error}`, 'error');
232
+ modelResults[model].streaming.failed++;
233
+ }
234
+
235
+ // Test non-streaming
236
+ await logOutput(` Testing non-streaming...`, 'info');
237
+ const nonStreamResult = await testStopSequence(model, awsCreds, testCase, false);
238
+
239
+ if (nonStreamResult.success) {
240
+ if (nonStreamResult.stoppedCorrectly) {
241
+ await logOutput(` ✓ Non-streaming: PASSED - ${nonStreamResult.analysis}`, 'success');
242
+ modelResults[model].nonStreaming.passed++;
243
+ } else {
244
+ await logOutput(` ✗ Non-streaming: FAILED - ${nonStreamResult.analysis}`, 'warning');
245
+ modelResults[model].nonStreaming.failed++;
246
+ }
247
+ await logOutput(` Response: "${nonStreamResult.response.substring(0, 100)}..."`, 'info');
248
+ } else {
249
+ await logOutput(` ✗ Non-streaming: ERROR - ${nonStreamResult.error}`, 'error');
250
+ modelResults[model].nonStreaming.failed++;
251
+ }
252
+ }
253
+ }
254
+
255
+ // Summary
256
+ await logOutput(`\n\n${'='.repeat(80)}`, 'info');
257
+ await logOutput('SUMMARY', 'running');
258
+ await logOutput(`${'='.repeat(80)}\n`, 'info');
259
+
260
+ for (const [model, results] of Object.entries(modelResults)) {
261
+ const streamingRate = (results.streaming.passed / (results.streaming.passed + results.streaming.failed) * 100).toFixed(1);
262
+ const nonStreamingRate = (results.nonStreaming.passed / (results.nonStreaming.passed + results.nonStreaming.failed) * 100).toFixed(1);
263
+
264
+ await logOutput(`${model}:`, 'info');
265
+ await logOutput(` Streaming: ${results.streaming.passed}/${results.streaming.passed + results.streaming.failed} passed (${streamingRate}%)`,
266
+ streamingRate > 80 ? 'success' : 'warning');
267
+ await logOutput(` Non-streaming: ${results.nonStreaming.passed}/${results.nonStreaming.passed + results.nonStreaming.failed} passed (${nonStreamingRate}%)`,
268
+ nonStreamingRate > 80 ? 'success' : 'warning');
269
+ }
270
+
271
+ await logOutput('\nTesting complete! Check test-stop-sequences-output.txt for full results.', 'info', false);
272
+ }
273
+
274
+ main().catch(async (error) => {
275
+ await logOutput(`Fatal Error: ${error.message}`, 'error');
276
+ console.error(error);
277
+ });