bedrock-wrapper 2.4.1 → 2.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +55 -0
- package/README.md +43 -0
- package/bedrock-models.js +18 -4
- package/bedrock-wrapper.js +36 -3
- package/logs/e0b34b2c-ee9a-4813-893a-82d47d3d5141/notification.json +51 -0
- package/logs/e0b34b2c-ee9a-4813-893a-82d47d3d5141/post_tool_use.json +4062 -0
- package/logs/e0b34b2c-ee9a-4813-893a-82d47d3d5141/pre_tool_use.json +1625 -0
- package/logs/e0b34b2c-ee9a-4813-893a-82d47d3d5141/stop.json +65 -0
- package/logs/e0b34b2c-ee9a-4813-893a-82d47d3d5141/subagent_stop.json +9 -0
- package/logs/e0b34b2c-ee9a-4813-893a-82d47d3d5141/user_prompt_submit.json +65 -0
- package/package.json +3 -2
- package/test-stop-sequences.js +276 -0
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
[
|
|
2
|
+
{
|
|
3
|
+
"session_id": "e0b34b2c-ee9a-4813-893a-82d47d3d5141",
|
|
4
|
+
"transcript_path": "C:\\Users\\Justin.Parker\\.claude\\projects\\C--git-bedrock-wrapper\\e0b34b2c-ee9a-4813-893a-82d47d3d5141.jsonl",
|
|
5
|
+
"cwd": "C:\\git\\bedrock-wrapper",
|
|
6
|
+
"hook_event_name": "Stop",
|
|
7
|
+
"stop_hook_active": false
|
|
8
|
+
},
|
|
9
|
+
{
|
|
10
|
+
"session_id": "e0b34b2c-ee9a-4813-893a-82d47d3d5141",
|
|
11
|
+
"transcript_path": "C:\\Users\\Justin.Parker\\.claude\\projects\\C--git-bedrock-wrapper\\e0b34b2c-ee9a-4813-893a-82d47d3d5141.jsonl",
|
|
12
|
+
"cwd": "C:\\git\\bedrock-wrapper",
|
|
13
|
+
"hook_event_name": "Stop",
|
|
14
|
+
"stop_hook_active": false
|
|
15
|
+
},
|
|
16
|
+
{
|
|
17
|
+
"session_id": "e0b34b2c-ee9a-4813-893a-82d47d3d5141",
|
|
18
|
+
"transcript_path": "C:\\Users\\Justin.Parker\\.claude\\projects\\C--git-bedrock-wrapper\\e0b34b2c-ee9a-4813-893a-82d47d3d5141.jsonl",
|
|
19
|
+
"cwd": "C:\\git\\bedrock-wrapper",
|
|
20
|
+
"hook_event_name": "Stop",
|
|
21
|
+
"stop_hook_active": false
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
"session_id": "e0b34b2c-ee9a-4813-893a-82d47d3d5141",
|
|
25
|
+
"transcript_path": "C:\\Users\\Justin.Parker\\.claude\\projects\\C--git-bedrock-wrapper\\e0b34b2c-ee9a-4813-893a-82d47d3d5141.jsonl",
|
|
26
|
+
"cwd": "C:\\git\\bedrock-wrapper",
|
|
27
|
+
"hook_event_name": "Stop",
|
|
28
|
+
"stop_hook_active": false
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
"session_id": "e0b34b2c-ee9a-4813-893a-82d47d3d5141",
|
|
32
|
+
"transcript_path": "C:\\Users\\Justin.Parker\\.claude\\projects\\C--git-bedrock-wrapper\\e0b34b2c-ee9a-4813-893a-82d47d3d5141.jsonl",
|
|
33
|
+
"cwd": "C:\\git\\bedrock-wrapper",
|
|
34
|
+
"hook_event_name": "Stop",
|
|
35
|
+
"stop_hook_active": false
|
|
36
|
+
},
|
|
37
|
+
{
|
|
38
|
+
"session_id": "e0b34b2c-ee9a-4813-893a-82d47d3d5141",
|
|
39
|
+
"transcript_path": "C:\\Users\\Justin.Parker\\.claude\\projects\\C--git-bedrock-wrapper\\e0b34b2c-ee9a-4813-893a-82d47d3d5141.jsonl",
|
|
40
|
+
"cwd": "C:\\git\\bedrock-wrapper",
|
|
41
|
+
"hook_event_name": "Stop",
|
|
42
|
+
"stop_hook_active": false
|
|
43
|
+
},
|
|
44
|
+
{
|
|
45
|
+
"session_id": "e0b34b2c-ee9a-4813-893a-82d47d3d5141",
|
|
46
|
+
"transcript_path": "C:\\Users\\Justin.Parker\\.claude\\projects\\C--git-bedrock-wrapper\\e0b34b2c-ee9a-4813-893a-82d47d3d5141.jsonl",
|
|
47
|
+
"cwd": "C:\\git\\bedrock-wrapper",
|
|
48
|
+
"hook_event_name": "Stop",
|
|
49
|
+
"stop_hook_active": false
|
|
50
|
+
},
|
|
51
|
+
{
|
|
52
|
+
"session_id": "e0b34b2c-ee9a-4813-893a-82d47d3d5141",
|
|
53
|
+
"transcript_path": "C:\\Users\\Justin.Parker\\.claude\\projects\\C--git-bedrock-wrapper\\e0b34b2c-ee9a-4813-893a-82d47d3d5141.jsonl",
|
|
54
|
+
"cwd": "C:\\git\\bedrock-wrapper",
|
|
55
|
+
"hook_event_name": "Stop",
|
|
56
|
+
"stop_hook_active": false
|
|
57
|
+
},
|
|
58
|
+
{
|
|
59
|
+
"session_id": "e0b34b2c-ee9a-4813-893a-82d47d3d5141",
|
|
60
|
+
"transcript_path": "C:\\Users\\Justin.Parker\\.claude\\projects\\C--git-bedrock-wrapper\\e0b34b2c-ee9a-4813-893a-82d47d3d5141.jsonl",
|
|
61
|
+
"cwd": "C:\\git\\bedrock-wrapper",
|
|
62
|
+
"hook_event_name": "Stop",
|
|
63
|
+
"stop_hook_active": false
|
|
64
|
+
}
|
|
65
|
+
]
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
[
|
|
2
|
+
{
|
|
3
|
+
"session_id": "e0b34b2c-ee9a-4813-893a-82d47d3d5141",
|
|
4
|
+
"transcript_path": "C:\\Users\\Justin.Parker\\.claude\\projects\\C--git-bedrock-wrapper\\e0b34b2c-ee9a-4813-893a-82d47d3d5141.jsonl",
|
|
5
|
+
"cwd": "C:\\git\\bedrock-wrapper",
|
|
6
|
+
"hook_event_name": "SubagentStop",
|
|
7
|
+
"stop_hook_active": false
|
|
8
|
+
}
|
|
9
|
+
]
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
[
|
|
2
|
+
{
|
|
3
|
+
"session_id": "e0b34b2c-ee9a-4813-893a-82d47d3d5141",
|
|
4
|
+
"transcript_path": "C:\\Users\\Justin.Parker\\.claude\\projects\\C--git-bedrock-wrapper\\e0b34b2c-ee9a-4813-893a-82d47d3d5141.jsonl",
|
|
5
|
+
"cwd": "C:\\git\\bedrock-wrapper",
|
|
6
|
+
"hook_event_name": "UserPromptSubmit",
|
|
7
|
+
"prompt": "hmm, having trouble with the stop sequence, pleae write a test simlar to @test-models.js so we can validate it is working as expected"
|
|
8
|
+
},
|
|
9
|
+
{
|
|
10
|
+
"session_id": "e0b34b2c-ee9a-4813-893a-82d47d3d5141",
|
|
11
|
+
"transcript_path": "C:\\Users\\Justin.Parker\\.claude\\projects\\C--git-bedrock-wrapper\\e0b34b2c-ee9a-4813-893a-82d47d3d5141.jsonl",
|
|
12
|
+
"cwd": "C:\\git\\bedrock-wrapper",
|
|
13
|
+
"hook_event_name": "UserPromptSubmit",
|
|
14
|
+
"prompt": "it seems you said theat non-streaming responses return \"undefined\" which needs investigation. is that something you are able to investigate?"
|
|
15
|
+
},
|
|
16
|
+
{
|
|
17
|
+
"session_id": "e0b34b2c-ee9a-4813-893a-82d47d3d5141",
|
|
18
|
+
"transcript_path": "C:\\Users\\Justin.Parker\\.claude\\projects\\C--git-bedrock-wrapper\\e0b34b2c-ee9a-4813-893a-82d47d3d5141.jsonl",
|
|
19
|
+
"cwd": "C:\\git\\bedrock-wrapper",
|
|
20
|
+
"hook_event_name": "UserPromptSubmit",
|
|
21
|
+
"prompt": "stop sequences should work regardless of the complexity of the prompt as it is just a parm to tell the model when to stop generating token, or am I wrong about this?"
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
"session_id": "e0b34b2c-ee9a-4813-893a-82d47d3d5141",
|
|
25
|
+
"transcript_path": "C:\\Users\\Justin.Parker\\.claude\\projects\\C--git-bedrock-wrapper\\e0b34b2c-ee9a-4813-893a-82d47d3d5141.jsonl",
|
|
26
|
+
"cwd": "C:\\git\\bedrock-wrapper",
|
|
27
|
+
"hook_event_name": "UserPromptSubmit",
|
|
28
|
+
"prompt": "please do online research on the AWS Bedrock invoke model calls, as I'm sure it does support stop sequence. we need this working correctly. Think Ultrahard about this."
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
"session_id": "e0b34b2c-ee9a-4813-893a-82d47d3d5141",
|
|
32
|
+
"transcript_path": "C:\\Users\\Justin.Parker\\.claude\\projects\\C--git-bedrock-wrapper\\e0b34b2c-ee9a-4813-893a-82d47d3d5141.jsonl",
|
|
33
|
+
"cwd": "C:\\git\\bedrock-wrapper",
|
|
34
|
+
"hook_event_name": "UserPromptSubmit",
|
|
35
|
+
"prompt": "ok, lets update any docs @README.md , @CLAUDE.md to reflect our findings and any changes we made"
|
|
36
|
+
},
|
|
37
|
+
{
|
|
38
|
+
"session_id": "e0b34b2c-ee9a-4813-893a-82d47d3d5141",
|
|
39
|
+
"transcript_path": "C:\\Users\\Justin.Parker\\.claude\\projects\\C--git-bedrock-wrapper\\e0b34b2c-ee9a-4813-893a-82d47d3d5141.jsonl",
|
|
40
|
+
"cwd": "C:\\git\\bedrock-wrapper",
|
|
41
|
+
"hook_event_name": "UserPromptSubmit",
|
|
42
|
+
"prompt": "update @CHANGELOG.md "
|
|
43
|
+
},
|
|
44
|
+
{
|
|
45
|
+
"session_id": "e0b34b2c-ee9a-4813-893a-82d47d3d5141",
|
|
46
|
+
"transcript_path": "C:\\Users\\Justin.Parker\\.claude\\projects\\C--git-bedrock-wrapper\\e0b34b2c-ee9a-4813-893a-82d47d3d5141.jsonl",
|
|
47
|
+
"cwd": "C:\\git\\bedrock-wrapper",
|
|
48
|
+
"hook_event_name": "UserPromptSubmit",
|
|
49
|
+
"prompt": "what I meant was to add a new 2.4.3 entry for our latest changes"
|
|
50
|
+
},
|
|
51
|
+
{
|
|
52
|
+
"session_id": "e0b34b2c-ee9a-4813-893a-82d47d3d5141",
|
|
53
|
+
"transcript_path": "C:\\Users\\Justin.Parker\\.claude\\projects\\C--git-bedrock-wrapper\\e0b34b2c-ee9a-4813-893a-82d47d3d5141.jsonl",
|
|
54
|
+
"cwd": "C:\\git\\bedrock-wrapper",
|
|
55
|
+
"hook_event_name": "UserPromptSubmit",
|
|
56
|
+
"prompt": "[Image #1]\nI see this suggestion, what do you think?"
|
|
57
|
+
},
|
|
58
|
+
{
|
|
59
|
+
"session_id": "e0b34b2c-ee9a-4813-893a-82d47d3d5141",
|
|
60
|
+
"transcript_path": "C:\\Users\\Justin.Parker\\.claude\\projects\\C--git-bedrock-wrapper\\e0b34b2c-ee9a-4813-893a-82d47d3d5141.jsonl",
|
|
61
|
+
"cwd": "C:\\git\\bedrock-wrapper",
|
|
62
|
+
"hook_event_name": "UserPromptSubmit",
|
|
63
|
+
"prompt": "[Image #1]\n\nhere is another suggestiong, what do you think?"
|
|
64
|
+
}
|
|
65
|
+
]
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "bedrock-wrapper",
|
|
3
|
-
"version": "2.4.
|
|
3
|
+
"version": "2.4.3",
|
|
4
4
|
"description": "🪨 Bedrock Wrapper is an npm package that simplifies the integration of existing OpenAI-compatible API objects with AWS Bedrock's serverless inference LLMs.",
|
|
5
5
|
"homepage": "https://www.equilllabs.com/projects/bedrock-wrapper",
|
|
6
6
|
"repository": {
|
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
"clean": "npx rimraf node_modules && npx rimraf package-lock.json && npm install",
|
|
16
16
|
"test": "node test-models.js",
|
|
17
17
|
"test-vision": "node test-vision.js",
|
|
18
|
+
"test-stop": "node test-stop-sequences.js",
|
|
18
19
|
"interactive": "node interactive-example.js"
|
|
19
20
|
},
|
|
20
21
|
"main": "bedrock-wrapper.js",
|
|
@@ -32,7 +33,7 @@
|
|
|
32
33
|
"author": "",
|
|
33
34
|
"license": "ISC",
|
|
34
35
|
"dependencies": {
|
|
35
|
-
"@aws-sdk/client-bedrock-runtime": "^3.
|
|
36
|
+
"@aws-sdk/client-bedrock-runtime": "^3.857.0",
|
|
36
37
|
"dotenv": "^17.2.1",
|
|
37
38
|
"sharp": "^0.34.3"
|
|
38
39
|
},
|
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
// ================================================================================
|
|
2
|
+
// == AWS Bedrock Stop Sequences Test - Validates stop sequences implementation ==
|
|
3
|
+
// ================================================================================
|
|
4
|
+
|
|
5
|
+
// ---------------------------------------------------------------------
|
|
6
|
+
// -- import environment variables from .env file or define them here --
|
|
7
|
+
// ---------------------------------------------------------------------
|
|
8
|
+
import dotenv from 'dotenv';
|
|
9
|
+
import fs from 'fs/promises';
|
|
10
|
+
import chalk from 'chalk';
|
|
11
|
+
|
|
12
|
+
dotenv.config();
|
|
13
|
+
|
|
14
|
+
const AWS_REGION = process.env.AWS_REGION;
|
|
15
|
+
const AWS_ACCESS_KEY_ID = process.env.AWS_ACCESS_KEY_ID;
|
|
16
|
+
const AWS_SECRET_ACCESS_KEY = process.env.AWS_SECRET_ACCESS_KEY;
|
|
17
|
+
|
|
18
|
+
// --------------------------------------------
|
|
19
|
+
// -- import functions from bedrock-wrapper --
|
|
20
|
+
// --------------------------------------------
|
|
21
|
+
import {
|
|
22
|
+
bedrockWrapper,
|
|
23
|
+
listBedrockWrapperSupportedModels
|
|
24
|
+
} from "./bedrock-wrapper.js";
|
|
25
|
+
|
|
26
|
+
async function logOutput(message, type = 'info', writeToFile = true ) {
|
|
27
|
+
if (writeToFile) {
|
|
28
|
+
// Log to file
|
|
29
|
+
await fs.appendFile('test-stop-sequences-output.txt', message + '\n');
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// Log to console with colors
|
|
33
|
+
switch(type) {
|
|
34
|
+
case 'success':
|
|
35
|
+
console.log(chalk.green('✓ ' + message));
|
|
36
|
+
break;
|
|
37
|
+
case 'error':
|
|
38
|
+
console.log(chalk.red('✗ ' + message));
|
|
39
|
+
break;
|
|
40
|
+
case 'info':
|
|
41
|
+
console.log(chalk.blue('ℹ ' + message));
|
|
42
|
+
break;
|
|
43
|
+
case 'running':
|
|
44
|
+
console.log(chalk.yellow(message));
|
|
45
|
+
break;
|
|
46
|
+
case 'warning':
|
|
47
|
+
console.log(chalk.magenta('⚠ ' + message));
|
|
48
|
+
break;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
async function testStopSequence(model, awsCreds, testCase, isStreaming) {
|
|
53
|
+
const messages = [{ role: "user", content: testCase.prompt }];
|
|
54
|
+
const openaiChatCompletionsCreateObject = {
|
|
55
|
+
messages,
|
|
56
|
+
model,
|
|
57
|
+
max_tokens: 200,
|
|
58
|
+
stream: isStreaming,
|
|
59
|
+
temperature: 0.1,
|
|
60
|
+
top_p: 0.9,
|
|
61
|
+
stop_sequences: testCase.stopSequences
|
|
62
|
+
};
|
|
63
|
+
|
|
64
|
+
let completeResponse = "";
|
|
65
|
+
|
|
66
|
+
try {
|
|
67
|
+
if (isStreaming) {
|
|
68
|
+
for await (const chunk of bedrockWrapper(awsCreds, openaiChatCompletionsCreateObject, { logging: false })) {
|
|
69
|
+
completeResponse += chunk;
|
|
70
|
+
}
|
|
71
|
+
} else {
|
|
72
|
+
const response = await bedrockWrapper(awsCreds, openaiChatCompletionsCreateObject, { logging: false });
|
|
73
|
+
for await (const data of response) {
|
|
74
|
+
completeResponse += data;
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// Analyze if stop sequence worked
|
|
79
|
+
const result = {
|
|
80
|
+
success: true,
|
|
81
|
+
response: completeResponse.trim(),
|
|
82
|
+
stoppedCorrectly: false,
|
|
83
|
+
analysis: ""
|
|
84
|
+
};
|
|
85
|
+
|
|
86
|
+
// Use the expectedBehavior function to determine if stopping worked correctly
|
|
87
|
+
if (testCase.expectedBehavior) {
|
|
88
|
+
result.stoppedCorrectly = testCase.expectedBehavior(completeResponse);
|
|
89
|
+
result.analysis = result.stoppedCorrectly ?
|
|
90
|
+
"Response stopped at the correct point" :
|
|
91
|
+
"Response did not stop at the expected point";
|
|
92
|
+
} else {
|
|
93
|
+
// Generic check - if response is shorter than expected, it probably stopped
|
|
94
|
+
result.stoppedCorrectly = completeResponse.length < 100; // Assume short response means it stopped
|
|
95
|
+
result.analysis = result.stoppedCorrectly ?
|
|
96
|
+
"Response appears to have stopped early (good sign)" :
|
|
97
|
+
"Response seems to have continued beyond expected stop point";
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
return result;
|
|
101
|
+
} catch (error) {
|
|
102
|
+
return {
|
|
103
|
+
success: false,
|
|
104
|
+
error: error.message,
|
|
105
|
+
response: "",
|
|
106
|
+
stoppedCorrectly: false,
|
|
107
|
+
analysis: "Error occurred"
|
|
108
|
+
};
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// Test cases designed to validate stop sequences
|
|
113
|
+
const stopSequenceTestCases = [
|
|
114
|
+
{
|
|
115
|
+
name: "Number sequence test",
|
|
116
|
+
prompt: "Count from 1 to 10, separated by commas: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10",
|
|
117
|
+
stopSequences: ["7"],
|
|
118
|
+
expectedBehavior: (response) => {
|
|
119
|
+
// Should stop at or before 7, and definitely not continue to 8, 9, 10
|
|
120
|
+
return response.includes("6") && !response.includes("8") && !response.includes("9") && !response.includes("10");
|
|
121
|
+
}
|
|
122
|
+
},
|
|
123
|
+
{
|
|
124
|
+
name: "Word-based stop test",
|
|
125
|
+
prompt: "List the days of the week in order: Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday",
|
|
126
|
+
stopSequences: ["Friday"],
|
|
127
|
+
expectedBehavior: (response) => {
|
|
128
|
+
// Should stop at or before Friday, and not continue to Saturday/Sunday
|
|
129
|
+
return response.includes("Thursday") && !response.includes("Saturday") && !response.includes("Sunday");
|
|
130
|
+
}
|
|
131
|
+
},
|
|
132
|
+
{
|
|
133
|
+
name: "Multi-stop sequence test",
|
|
134
|
+
prompt: "Write the alphabet: A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U, V, W, X, Y, Z",
|
|
135
|
+
stopSequences: ["G", "H", "I"],
|
|
136
|
+
expectedBehavior: (response) => {
|
|
137
|
+
// Should stop at any of G, H, or I and not continue beyond
|
|
138
|
+
return response.includes("F") && !response.includes("J") && !response.includes("K") && !response.includes("L");
|
|
139
|
+
}
|
|
140
|
+
},
|
|
141
|
+
{
|
|
142
|
+
name: "Sentence completion test",
|
|
143
|
+
prompt: "Complete this story: Once upon a time, there was a brave knight who loved to explore. One day, he found a mysterious cave. Inside the cave, he discovered a magical sword. With the sword in hand, he continued deeper into the darkness.",
|
|
144
|
+
stopSequences: ["sword"],
|
|
145
|
+
expectedBehavior: (response) => {
|
|
146
|
+
// Should stop at or shortly after "sword" and not continue the full story
|
|
147
|
+
return response.includes("cave") && response.length < 200; // Shortened response
|
|
148
|
+
}
|
|
149
|
+
},
|
|
150
|
+
{
|
|
151
|
+
name: "Special character stop test",
|
|
152
|
+
prompt: "Generate a list with bullet points:\n• First item\n• Second item\n• Third item\n• Fourth item\n• Fifth item",
|
|
153
|
+
stopSequences: ["• Third"],
|
|
154
|
+
expectedBehavior: (response) => {
|
|
155
|
+
// Should stop at or before "• Third" and not continue to Fourth/Fifth
|
|
156
|
+
return response.includes("Second") && !response.includes("Fourth") && !response.includes("Fifth");
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
];
|
|
160
|
+
|
|
161
|
+
async function main() {
|
|
162
|
+
// Clear output file and add header
|
|
163
|
+
const timestamp = new Date().toISOString();
|
|
164
|
+
await fs.writeFile('test-stop-sequences-output.txt',
|
|
165
|
+
`Stop Sequences Test Results - ${timestamp}\n` +
|
|
166
|
+
`${'='.repeat(80)}\n\n` +
|
|
167
|
+
`This test validates that stop sequences work correctly across all models.\n` +
|
|
168
|
+
`Each model is tested with multiple stop sequence scenarios.\n\n`
|
|
169
|
+
);
|
|
170
|
+
|
|
171
|
+
const supportedModels = await listBedrockWrapperSupportedModels();
|
|
172
|
+
const availableModels = supportedModels.map(model => {
|
|
173
|
+
return JSON.parse(model).modelName;
|
|
174
|
+
});
|
|
175
|
+
|
|
176
|
+
console.clear();
|
|
177
|
+
await logOutput(`Starting stop sequences tests with ${availableModels.length} models...`, 'info');
|
|
178
|
+
await logOutput(`Testing ${stopSequenceTestCases.length} different stop sequence scenarios\n`, 'info');
|
|
179
|
+
|
|
180
|
+
const awsCreds = {
|
|
181
|
+
region: AWS_REGION,
|
|
182
|
+
accessKeyId: AWS_ACCESS_KEY_ID,
|
|
183
|
+
secretAccessKey: AWS_SECRET_ACCESS_KEY,
|
|
184
|
+
};
|
|
185
|
+
|
|
186
|
+
// Track overall results
|
|
187
|
+
const modelResults = {};
|
|
188
|
+
|
|
189
|
+
// Test a subset of models for efficiency (you can test all if needed)
|
|
190
|
+
const modelsToTest = [
|
|
191
|
+
"Claude-3-5-Sonnet-v2",
|
|
192
|
+
"Claude-3-Haiku",
|
|
193
|
+
"Nova-Pro",
|
|
194
|
+
"Nova-Lite",
|
|
195
|
+
"Llama-3-3-70b",
|
|
196
|
+
"Mistral-7b"
|
|
197
|
+
].filter(m => availableModels.includes(m));
|
|
198
|
+
|
|
199
|
+
await logOutput(`\nTesting ${modelsToTest.length} representative models...\n`, 'info');
|
|
200
|
+
|
|
201
|
+
for (const model of modelsToTest) {
|
|
202
|
+
await logOutput(`\n${'='.repeat(60)}`, 'info');
|
|
203
|
+
await logOutput(`Testing ${model}`, 'running');
|
|
204
|
+
await logOutput(`${'='.repeat(60)}`, 'info');
|
|
205
|
+
|
|
206
|
+
modelResults[model] = {
|
|
207
|
+
streaming: { passed: 0, failed: 0 },
|
|
208
|
+
nonStreaming: { passed: 0, failed: 0 }
|
|
209
|
+
};
|
|
210
|
+
|
|
211
|
+
for (const testCase of stopSequenceTestCases) {
|
|
212
|
+
await logOutput(`\n▶ Test Case: ${testCase.name}`, 'info');
|
|
213
|
+
await logOutput(` Prompt: "${testCase.prompt.substring(0, 50)}..."`, 'info');
|
|
214
|
+
await logOutput(` Stop sequences: [${testCase.stopSequences.join(', ')}]`, 'info');
|
|
215
|
+
|
|
216
|
+
// Test streaming
|
|
217
|
+
await logOutput(` Testing streaming...`, 'info');
|
|
218
|
+
const streamResult = await testStopSequence(model, awsCreds, testCase, true);
|
|
219
|
+
|
|
220
|
+
if (streamResult.success) {
|
|
221
|
+
if (streamResult.stoppedCorrectly) {
|
|
222
|
+
await logOutput(` ✓ Streaming: PASSED - ${streamResult.analysis}`, 'success');
|
|
223
|
+
modelResults[model].streaming.passed++;
|
|
224
|
+
} else {
|
|
225
|
+
await logOutput(` ✗ Streaming: FAILED - ${streamResult.analysis}`, 'warning');
|
|
226
|
+
modelResults[model].streaming.failed++;
|
|
227
|
+
}
|
|
228
|
+
await logOutput(` Response: "${streamResult.response.substring(0, 100)}..."`, 'info');
|
|
229
|
+
} else {
|
|
230
|
+
await logOutput(` ✗ Streaming: ERROR - ${streamResult.error}`, 'error');
|
|
231
|
+
modelResults[model].streaming.failed++;
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
// Test non-streaming
|
|
235
|
+
await logOutput(` Testing non-streaming...`, 'info');
|
|
236
|
+
const nonStreamResult = await testStopSequence(model, awsCreds, testCase, false);
|
|
237
|
+
|
|
238
|
+
if (nonStreamResult.success) {
|
|
239
|
+
if (nonStreamResult.stoppedCorrectly) {
|
|
240
|
+
await logOutput(` ✓ Non-streaming: PASSED - ${nonStreamResult.analysis}`, 'success');
|
|
241
|
+
modelResults[model].nonStreaming.passed++;
|
|
242
|
+
} else {
|
|
243
|
+
await logOutput(` ✗ Non-streaming: FAILED - ${nonStreamResult.analysis}`, 'warning');
|
|
244
|
+
modelResults[model].nonStreaming.failed++;
|
|
245
|
+
}
|
|
246
|
+
await logOutput(` Response: "${nonStreamResult.response.substring(0, 100)}..."`, 'info');
|
|
247
|
+
} else {
|
|
248
|
+
await logOutput(` ✗ Non-streaming: ERROR - ${nonStreamResult.error}`, 'error');
|
|
249
|
+
modelResults[model].nonStreaming.failed++;
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
// Summary
|
|
255
|
+
await logOutput(`\n\n${'='.repeat(80)}`, 'info');
|
|
256
|
+
await logOutput('SUMMARY', 'running');
|
|
257
|
+
await logOutput(`${'='.repeat(80)}\n`, 'info');
|
|
258
|
+
|
|
259
|
+
for (const [model, results] of Object.entries(modelResults)) {
|
|
260
|
+
const streamingRate = (results.streaming.passed / (results.streaming.passed + results.streaming.failed) * 100).toFixed(1);
|
|
261
|
+
const nonStreamingRate = (results.nonStreaming.passed / (results.nonStreaming.passed + results.nonStreaming.failed) * 100).toFixed(1);
|
|
262
|
+
|
|
263
|
+
await logOutput(`${model}:`, 'info');
|
|
264
|
+
await logOutput(` Streaming: ${results.streaming.passed}/${results.streaming.passed + results.streaming.failed} passed (${streamingRate}%)`,
|
|
265
|
+
streamingRate > 80 ? 'success' : 'warning');
|
|
266
|
+
await logOutput(` Non-streaming: ${results.nonStreaming.passed}/${results.nonStreaming.passed + results.nonStreaming.failed} passed (${nonStreamingRate}%)`,
|
|
267
|
+
nonStreamingRate > 80 ? 'success' : 'warning');
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
await logOutput('\nTesting complete! Check test-stop-sequences-output.txt for full results.', 'info', false);
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
main().catch(async (error) => {
|
|
274
|
+
await logOutput(`Fatal Error: ${error.message}`, 'error');
|
|
275
|
+
console.error(error);
|
|
276
|
+
});
|