ai.matey.testing 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/dist/cjs/fixture-capture.js +169 -0
- package/dist/cjs/fixture-capture.js.map +1 -0
- package/dist/cjs/fixture-helpers.js +240 -0
- package/dist/cjs/fixture-helpers.js.map +1 -0
- package/dist/cjs/fixture-loader.js +196 -0
- package/dist/cjs/fixture-loader.js.map +1 -0
- package/dist/cjs/fixture-types.js +20 -0
- package/dist/cjs/fixture-types.js.map +1 -0
- package/dist/cjs/index.js +66 -0
- package/dist/cjs/index.js.map +1 -0
- package/dist/cjs/property-testing.js +288 -0
- package/dist/cjs/property-testing.js.map +1 -0
- package/dist/cjs/test-helpers.js +289 -0
- package/dist/cjs/test-helpers.js.map +1 -0
- package/dist/esm/fixture-capture.js +163 -0
- package/dist/esm/fixture-capture.js.map +1 -0
- package/dist/esm/fixture-helpers.js +228 -0
- package/dist/esm/fixture-helpers.js.map +1 -0
- package/dist/esm/fixture-loader.js +154 -0
- package/dist/esm/fixture-loader.js.map +1 -0
- package/dist/esm/fixture-types.js +16 -0
- package/dist/esm/fixture-types.js.map +1 -0
- package/dist/esm/index.js +15 -0
- package/dist/esm/index.js.map +1 -0
- package/dist/esm/property-testing.js +274 -0
- package/dist/esm/property-testing.js.map +1 -0
- package/dist/esm/test-helpers.js +272 -0
- package/dist/esm/test-helpers.js.map +1 -0
- package/dist/types/fixture-capture.d.ts +55 -0
- package/dist/types/fixture-capture.d.ts.map +1 -0
- package/dist/types/fixture-helpers.d.ts +69 -0
- package/dist/types/fixture-helpers.d.ts.map +1 -0
- package/dist/types/fixture-loader.d.ts +40 -0
- package/dist/types/fixture-loader.d.ts.map +1 -0
- package/dist/types/fixture-types.d.ts +76 -0
- package/dist/types/fixture-types.d.ts.map +1 -0
- package/dist/types/index.d.ts +12 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/property-testing.d.ts +79 -0
- package/dist/types/property-testing.d.ts.map +1 -0
- package/dist/types/test-helpers.d.ts +81 -0
- package/dist/types/test-helpers.d.ts.map +1 -0
- package/package.json +68 -0
- package/readme.md +31 -0
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Testing utilities and fixtures
|
|
4
|
+
*/
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.propertyMultiTurnAlternates = exports.propertyValidRequest = exports.shrinkChatRequest = exports.forAll = exports.generateChatRequest = exports.generateParameters = exports.generateSystemMessage = exports.generateAssistantMessage = exports.generateUserMessage = exports.generateTextContent = exports.SeededRandom = exports.assertReasonableUsage = exports.estimateTokens = exports.accumulateStreamText = exports.extractToolUsesFromResponse = exports.extractTextFromResponse = exports.buildMultiTurnRequest = exports.buildChatRequest = exports.assertValidStreamSequence = exports.assertResponseHasToolUse = exports.assertResponseHasText = exports.assertValidMessageContent = exports.assertValidMessage = exports.assertValidChatRequest = exports.assertValidStreamChunk = exports.assertValidChatResponse = exports.createConfigurableMock = exports.collectChunksToResponse = exports.extractChunks = exports.extractResponse = exports.extractRequest = exports.validateAgainstFixture = exports.createMocksFromFixtures = exports.replayStreamWithTiming = exports.createMockFromFixture = exports.bulkCapture = exports.createCaptureMiddleware = exports.captureStream = exports.captureChat = exports.loadFixtureCollection = exports.getFixtureCacheStats = exports.clearFixtureCache = exports.findFixtures = exports.loadProviderFixtures = exports.loadFixture = exports.FIXTURES_DIR = exports.isStreamingFixture = exports.isChatFixture = void 0;
|
|
7
|
+
// Type guards
|
|
8
|
+
var fixture_types_js_1 = require("./fixture-types.js");
|
|
9
|
+
Object.defineProperty(exports, "isChatFixture", { enumerable: true, get: function () { return fixture_types_js_1.isChatFixture; } });
|
|
10
|
+
Object.defineProperty(exports, "isStreamingFixture", { enumerable: true, get: function () { return fixture_types_js_1.isStreamingFixture; } });
|
|
11
|
+
// Fixture loading
|
|
12
|
+
var fixture_loader_js_1 = require("./fixture-loader.js");
|
|
13
|
+
Object.defineProperty(exports, "FIXTURES_DIR", { enumerable: true, get: function () { return fixture_loader_js_1.FIXTURES_DIR; } });
|
|
14
|
+
Object.defineProperty(exports, "loadFixture", { enumerable: true, get: function () { return fixture_loader_js_1.loadFixture; } });
|
|
15
|
+
Object.defineProperty(exports, "loadProviderFixtures", { enumerable: true, get: function () { return fixture_loader_js_1.loadProviderFixtures; } });
|
|
16
|
+
Object.defineProperty(exports, "findFixtures", { enumerable: true, get: function () { return fixture_loader_js_1.findFixtures; } });
|
|
17
|
+
Object.defineProperty(exports, "clearFixtureCache", { enumerable: true, get: function () { return fixture_loader_js_1.clearFixtureCache; } });
|
|
18
|
+
Object.defineProperty(exports, "getFixtureCacheStats", { enumerable: true, get: function () { return fixture_loader_js_1.getFixtureCacheStats; } });
|
|
19
|
+
Object.defineProperty(exports, "loadFixtureCollection", { enumerable: true, get: function () { return fixture_loader_js_1.loadFixtureCollection; } });
|
|
20
|
+
var fixture_capture_js_1 = require("./fixture-capture.js");
|
|
21
|
+
Object.defineProperty(exports, "captureChat", { enumerable: true, get: function () { return fixture_capture_js_1.captureChat; } });
|
|
22
|
+
Object.defineProperty(exports, "captureStream", { enumerable: true, get: function () { return fixture_capture_js_1.captureStream; } });
|
|
23
|
+
Object.defineProperty(exports, "createCaptureMiddleware", { enumerable: true, get: function () { return fixture_capture_js_1.createCaptureMiddleware; } });
|
|
24
|
+
Object.defineProperty(exports, "bulkCapture", { enumerable: true, get: function () { return fixture_capture_js_1.bulkCapture; } });
|
|
25
|
+
// Fixture helpers
|
|
26
|
+
var fixture_helpers_js_1 = require("./fixture-helpers.js");
|
|
27
|
+
Object.defineProperty(exports, "createMockFromFixture", { enumerable: true, get: function () { return fixture_helpers_js_1.createMockFromFixture; } });
|
|
28
|
+
Object.defineProperty(exports, "replayStreamWithTiming", { enumerable: true, get: function () { return fixture_helpers_js_1.replayStreamWithTiming; } });
|
|
29
|
+
Object.defineProperty(exports, "createMocksFromFixtures", { enumerable: true, get: function () { return fixture_helpers_js_1.createMocksFromFixtures; } });
|
|
30
|
+
Object.defineProperty(exports, "validateAgainstFixture", { enumerable: true, get: function () { return fixture_helpers_js_1.validateAgainstFixture; } });
|
|
31
|
+
Object.defineProperty(exports, "extractRequest", { enumerable: true, get: function () { return fixture_helpers_js_1.extractRequest; } });
|
|
32
|
+
Object.defineProperty(exports, "extractResponse", { enumerable: true, get: function () { return fixture_helpers_js_1.extractResponse; } });
|
|
33
|
+
Object.defineProperty(exports, "extractChunks", { enumerable: true, get: function () { return fixture_helpers_js_1.extractChunks; } });
|
|
34
|
+
Object.defineProperty(exports, "collectChunksToResponse", { enumerable: true, get: function () { return fixture_helpers_js_1.collectChunksToResponse; } });
|
|
35
|
+
Object.defineProperty(exports, "createConfigurableMock", { enumerable: true, get: function () { return fixture_helpers_js_1.createConfigurableMock; } });
|
|
36
|
+
// Test helpers and assertions
|
|
37
|
+
var test_helpers_js_1 = require("./test-helpers.js");
|
|
38
|
+
Object.defineProperty(exports, "assertValidChatResponse", { enumerable: true, get: function () { return test_helpers_js_1.assertValidChatResponse; } });
|
|
39
|
+
Object.defineProperty(exports, "assertValidStreamChunk", { enumerable: true, get: function () { return test_helpers_js_1.assertValidStreamChunk; } });
|
|
40
|
+
Object.defineProperty(exports, "assertValidChatRequest", { enumerable: true, get: function () { return test_helpers_js_1.assertValidChatRequest; } });
|
|
41
|
+
Object.defineProperty(exports, "assertValidMessage", { enumerable: true, get: function () { return test_helpers_js_1.assertValidMessage; } });
|
|
42
|
+
Object.defineProperty(exports, "assertValidMessageContent", { enumerable: true, get: function () { return test_helpers_js_1.assertValidMessageContent; } });
|
|
43
|
+
Object.defineProperty(exports, "assertResponseHasText", { enumerable: true, get: function () { return test_helpers_js_1.assertResponseHasText; } });
|
|
44
|
+
Object.defineProperty(exports, "assertResponseHasToolUse", { enumerable: true, get: function () { return test_helpers_js_1.assertResponseHasToolUse; } });
|
|
45
|
+
Object.defineProperty(exports, "assertValidStreamSequence", { enumerable: true, get: function () { return test_helpers_js_1.assertValidStreamSequence; } });
|
|
46
|
+
Object.defineProperty(exports, "buildChatRequest", { enumerable: true, get: function () { return test_helpers_js_1.buildChatRequest; } });
|
|
47
|
+
Object.defineProperty(exports, "buildMultiTurnRequest", { enumerable: true, get: function () { return test_helpers_js_1.buildMultiTurnRequest; } });
|
|
48
|
+
Object.defineProperty(exports, "extractTextFromResponse", { enumerable: true, get: function () { return test_helpers_js_1.extractTextFromResponse; } });
|
|
49
|
+
Object.defineProperty(exports, "extractToolUsesFromResponse", { enumerable: true, get: function () { return test_helpers_js_1.extractToolUsesFromResponse; } });
|
|
50
|
+
Object.defineProperty(exports, "accumulateStreamText", { enumerable: true, get: function () { return test_helpers_js_1.accumulateStreamText; } });
|
|
51
|
+
Object.defineProperty(exports, "estimateTokens", { enumerable: true, get: function () { return test_helpers_js_1.estimateTokens; } });
|
|
52
|
+
Object.defineProperty(exports, "assertReasonableUsage", { enumerable: true, get: function () { return test_helpers_js_1.assertReasonableUsage; } });
|
|
53
|
+
// Property-based testing
|
|
54
|
+
var property_testing_js_1 = require("./property-testing.js");
|
|
55
|
+
Object.defineProperty(exports, "SeededRandom", { enumerable: true, get: function () { return property_testing_js_1.SeededRandom; } });
|
|
56
|
+
Object.defineProperty(exports, "generateTextContent", { enumerable: true, get: function () { return property_testing_js_1.generateTextContent; } });
|
|
57
|
+
Object.defineProperty(exports, "generateUserMessage", { enumerable: true, get: function () { return property_testing_js_1.generateUserMessage; } });
|
|
58
|
+
Object.defineProperty(exports, "generateAssistantMessage", { enumerable: true, get: function () { return property_testing_js_1.generateAssistantMessage; } });
|
|
59
|
+
Object.defineProperty(exports, "generateSystemMessage", { enumerable: true, get: function () { return property_testing_js_1.generateSystemMessage; } });
|
|
60
|
+
Object.defineProperty(exports, "generateParameters", { enumerable: true, get: function () { return property_testing_js_1.generateParameters; } });
|
|
61
|
+
Object.defineProperty(exports, "generateChatRequest", { enumerable: true, get: function () { return property_testing_js_1.generateChatRequest; } });
|
|
62
|
+
Object.defineProperty(exports, "forAll", { enumerable: true, get: function () { return property_testing_js_1.forAll; } });
|
|
63
|
+
Object.defineProperty(exports, "shrinkChatRequest", { enumerable: true, get: function () { return property_testing_js_1.shrinkChatRequest; } });
|
|
64
|
+
Object.defineProperty(exports, "propertyValidRequest", { enumerable: true, get: function () { return property_testing_js_1.propertyValidRequest; } });
|
|
65
|
+
Object.defineProperty(exports, "propertyMultiTurnAlternates", { enumerable: true, get: function () { return property_testing_js_1.propertyMultiTurnAlternates; } });
|
|
66
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":";AAAA;;GAEG;;;AAYH,cAAc;AACd,uDAAuE;AAA9D,iHAAA,aAAa,OAAA;AAAE,sHAAA,kBAAkB,OAAA;AAE1C,kBAAkB;AAClB,yDAQ6B;AAP3B,iHAAA,YAAY,OAAA;AACZ,gHAAA,WAAW,OAAA;AACX,yHAAA,oBAAoB,OAAA;AACpB,iHAAA,YAAY,OAAA;AACZ,sHAAA,iBAAiB,OAAA;AACjB,yHAAA,oBAAoB,OAAA;AACpB,0HAAA,qBAAqB,OAAA;AAKvB,2DAK8B;AAJ5B,iHAAA,WAAW,OAAA;AACX,mHAAA,aAAa,OAAA;AACb,6HAAA,uBAAuB,OAAA;AACvB,iHAAA,WAAW,OAAA;AAGb,kBAAkB;AAClB,2DAU8B;AAT5B,2HAAA,qBAAqB,OAAA;AACrB,4HAAA,sBAAsB,OAAA;AACtB,6HAAA,uBAAuB,OAAA;AACvB,4HAAA,sBAAsB,OAAA;AACtB,oHAAA,cAAc,OAAA;AACd,qHAAA,eAAe,OAAA;AACf,mHAAA,aAAa,OAAA;AACb,6HAAA,uBAAuB,OAAA;AACvB,4HAAA,sBAAsB,OAAA;AAGxB,8BAA8B;AAC9B,qDAgB2B;AAfzB,0HAAA,uBAAuB,OAAA;AACvB,yHAAA,sBAAsB,OAAA;AACtB,yHAAA,sBAAsB,OAAA;AACtB,qHAAA,kBAAkB,OAAA;AAClB,4HAAA,yBAAyB,OAAA;AACzB,wHAAA,qBAAqB,OAAA;AACrB,2HAAA,wBAAwB,OAAA;AACxB,4HAAA,yBAAyB,OAAA;AACzB,mHAAA,gBAAgB,OAAA;AAChB,wHAAA,qBAAqB,OAAA;AACrB,0HAAA,uBAAuB,OAAA;AACvB,8HAAA,2BAA2B,OAAA;AAC3B,uHAAA,oBAAoB,OAAA;AACpB,iHAAA,cAAc,OAAA;AACd,wHAAA,qBAAqB,OAAA;AAGvB,yBAAyB;AACzB,6DAY+B;AAX7B,mHAAA,YAAY,OAAA;AACZ,0HAAA,mBAAmB,OAAA;AACnB,0HAAA,mBAAmB,OAAA;AACnB,+HAAA,wBAAwB,OAAA;AACxB,4HAAA,qBAAqB,OAAA;AACrB,yHAAA,kBAAkB,OAAA;AAClB,0HAAA,mBAAmB,OAAA;AACnB,6GAAA,MAAM,OAAA;AACN,wHAAA,iBAAiB,OAAA;AACjB,2HAAA,oBAAoB,OAAA;AACpB,kIAAA,2BAA2B,OAAA"}
|
|
@@ -0,0 +1,288 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Property-based testing utilities - generate random valid inputs
|
|
4
|
+
*/
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.SeededRandom = void 0;
|
|
7
|
+
exports.generateTextContent = generateTextContent;
|
|
8
|
+
exports.generateUserMessage = generateUserMessage;
|
|
9
|
+
exports.generateAssistantMessage = generateAssistantMessage;
|
|
10
|
+
exports.generateSystemMessage = generateSystemMessage;
|
|
11
|
+
exports.generateParameters = generateParameters;
|
|
12
|
+
exports.generateChatRequest = generateChatRequest;
|
|
13
|
+
exports.forAll = forAll;
|
|
14
|
+
exports.shrinkChatRequest = shrinkChatRequest;
|
|
15
|
+
exports.propertyValidRequest = propertyValidRequest;
|
|
16
|
+
exports.propertyMultiTurnAlternates = propertyMultiTurnAlternates;
|
|
17
|
+
/**
|
|
18
|
+
* Random seed generator for deterministic randomness
|
|
19
|
+
*/
|
|
20
|
+
class SeededRandom {
|
|
21
|
+
seed;
|
|
22
|
+
constructor(seed = Date.now()) {
|
|
23
|
+
this.seed = seed;
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Generate next random number between 0 and 1
|
|
27
|
+
*/
|
|
28
|
+
next() {
|
|
29
|
+
this.seed = (this.seed * 9301 + 49297) % 233280;
|
|
30
|
+
return this.seed / 233280;
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Generate random integer between min and max (inclusive)
|
|
34
|
+
*/
|
|
35
|
+
nextInt(min, max) {
|
|
36
|
+
return Math.floor(this.next() * (max - min + 1)) + min;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Pick random element from array
|
|
40
|
+
*/
|
|
41
|
+
pick(array) {
|
|
42
|
+
if (array.length === 0) {
|
|
43
|
+
throw new Error('Cannot pick from empty array');
|
|
44
|
+
}
|
|
45
|
+
return array[this.nextInt(0, array.length - 1)];
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Generate random boolean
|
|
49
|
+
*/
|
|
50
|
+
nextBool(probability = 0.5) {
|
|
51
|
+
return this.next() < probability;
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
exports.SeededRandom = SeededRandom;
|
|
55
|
+
/**
|
|
56
|
+
* Generate random text content
|
|
57
|
+
*/
|
|
58
|
+
function generateTextContent(random) {
|
|
59
|
+
const texts = [
|
|
60
|
+
'Hello, how are you?',
|
|
61
|
+
'What is the weather today?',
|
|
62
|
+
'Explain quantum computing',
|
|
63
|
+
'Write a haiku about programming',
|
|
64
|
+
'What is 2 + 2?',
|
|
65
|
+
'Tell me a joke',
|
|
66
|
+
'Summarize the main points',
|
|
67
|
+
'Translate this to French',
|
|
68
|
+
'Help me debug this code',
|
|
69
|
+
'What are the benefits?',
|
|
70
|
+
];
|
|
71
|
+
return {
|
|
72
|
+
type: 'text',
|
|
73
|
+
text: random.pick(texts),
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
/**
|
|
77
|
+
* Generate random user message
|
|
78
|
+
*/
|
|
79
|
+
function generateUserMessage(random) {
|
|
80
|
+
const contentCount = random.nextInt(1, 3);
|
|
81
|
+
const content = [];
|
|
82
|
+
for (let i = 0; i < contentCount; i++) {
|
|
83
|
+
content.push(generateTextContent(random));
|
|
84
|
+
}
|
|
85
|
+
return {
|
|
86
|
+
role: 'user',
|
|
87
|
+
content,
|
|
88
|
+
};
|
|
89
|
+
}
|
|
90
|
+
/**
|
|
91
|
+
* Generate random assistant message
|
|
92
|
+
*/
|
|
93
|
+
function generateAssistantMessage(random) {
|
|
94
|
+
return {
|
|
95
|
+
role: 'assistant',
|
|
96
|
+
content: [generateTextContent(random)],
|
|
97
|
+
};
|
|
98
|
+
}
|
|
99
|
+
/**
|
|
100
|
+
* Generate random system message
|
|
101
|
+
*/
|
|
102
|
+
function generateSystemMessage(random) {
|
|
103
|
+
const systemPrompts = [
|
|
104
|
+
'You are a helpful assistant',
|
|
105
|
+
'You are an expert programmer',
|
|
106
|
+
'You speak like a pirate',
|
|
107
|
+
'You are a teacher',
|
|
108
|
+
'You provide concise answers',
|
|
109
|
+
];
|
|
110
|
+
return {
|
|
111
|
+
role: 'system',
|
|
112
|
+
content: [
|
|
113
|
+
{
|
|
114
|
+
type: 'text',
|
|
115
|
+
text: random.pick(systemPrompts),
|
|
116
|
+
},
|
|
117
|
+
],
|
|
118
|
+
};
|
|
119
|
+
}
|
|
120
|
+
/**
|
|
121
|
+
* Generate random parameters
|
|
122
|
+
*/
|
|
123
|
+
function generateParameters(random) {
|
|
124
|
+
const models = [
|
|
125
|
+
'gpt-4',
|
|
126
|
+
'gpt-3.5-turbo',
|
|
127
|
+
'claude-3-5-sonnet-20241022',
|
|
128
|
+
'claude-3-haiku-20240307',
|
|
129
|
+
'gemini-1.5-pro',
|
|
130
|
+
'llama3.2:1b',
|
|
131
|
+
];
|
|
132
|
+
return {
|
|
133
|
+
model: random.pick(models),
|
|
134
|
+
temperature: Math.round(random.next() * 20) / 20, // 0.0 to 1.0 in 0.05 increments
|
|
135
|
+
maxTokens: random.nextInt(50, 500),
|
|
136
|
+
topP: random.nextBool() ? Math.round(random.next() * 20) / 20 : undefined,
|
|
137
|
+
};
|
|
138
|
+
}
|
|
139
|
+
/**
|
|
140
|
+
* Generate random chat request
|
|
141
|
+
*/
|
|
142
|
+
function generateChatRequest(random, options) {
|
|
143
|
+
const messages = [];
|
|
144
|
+
// Add system message if requested
|
|
145
|
+
if (options?.includeSystem && random.nextBool(0.5)) {
|
|
146
|
+
messages.push(generateSystemMessage(random));
|
|
147
|
+
}
|
|
148
|
+
// Add conversation turns
|
|
149
|
+
const minMessages = options?.minMessages ?? 1;
|
|
150
|
+
const maxMessages = options?.maxMessages ?? 5;
|
|
151
|
+
const messageCount = random.nextInt(minMessages, maxMessages);
|
|
152
|
+
if (options?.multiTurn) {
|
|
153
|
+
// Multi-turn conversation
|
|
154
|
+
const turns = Math.floor(messageCount / 2);
|
|
155
|
+
for (let i = 0; i < turns; i++) {
|
|
156
|
+
messages.push(generateUserMessage(random));
|
|
157
|
+
messages.push(generateAssistantMessage(random));
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
// Always end with user message
|
|
161
|
+
messages.push(generateUserMessage(random));
|
|
162
|
+
return {
|
|
163
|
+
messages,
|
|
164
|
+
parameters: generateParameters(random),
|
|
165
|
+
metadata: {
|
|
166
|
+
requestId: `gen-${random.nextInt(1000, 9999)}`,
|
|
167
|
+
timestamp: Date.now(),
|
|
168
|
+
},
|
|
169
|
+
};
|
|
170
|
+
}
|
|
171
|
+
/**
|
|
172
|
+
* Property test: run test with multiple generated inputs
|
|
173
|
+
*/
|
|
174
|
+
async function forAll(generator, test, options) {
|
|
175
|
+
const runs = options?.runs ?? 100;
|
|
176
|
+
const seed = options?.seed ?? Date.now();
|
|
177
|
+
const random = new SeededRandom(seed);
|
|
178
|
+
const failures = [];
|
|
179
|
+
for (let i = 0; i < runs; i++) {
|
|
180
|
+
const value = generator(random);
|
|
181
|
+
try {
|
|
182
|
+
await test(value);
|
|
183
|
+
}
|
|
184
|
+
catch (error) {
|
|
185
|
+
failures.push({
|
|
186
|
+
run: i,
|
|
187
|
+
value,
|
|
188
|
+
error: error,
|
|
189
|
+
});
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
if (failures.length > 0) {
|
|
193
|
+
const firstFailure = failures[0];
|
|
194
|
+
throw new Error(`Property test failed on run ${firstFailure.run + 1}/${runs}\n` +
|
|
195
|
+
`Seed: ${seed}\n` +
|
|
196
|
+
`Value: ${JSON.stringify(firstFailure.value, null, 2)}\n` +
|
|
197
|
+
`Error: ${firstFailure.error.message}\n` +
|
|
198
|
+
`Total failures: ${failures.length}/${runs}`);
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
/**
|
|
202
|
+
* Shrink a failing value to find minimal failing case
|
|
203
|
+
*/
|
|
204
|
+
function shrinkChatRequest(request) {
|
|
205
|
+
const shrunk = [];
|
|
206
|
+
// Try removing messages
|
|
207
|
+
if (request.messages.length > 1) {
|
|
208
|
+
shrunk.push({
|
|
209
|
+
...request,
|
|
210
|
+
messages: request.messages.slice(0, -1),
|
|
211
|
+
});
|
|
212
|
+
}
|
|
213
|
+
// Try simplifying parameters
|
|
214
|
+
shrunk.push({
|
|
215
|
+
...request,
|
|
216
|
+
parameters: {
|
|
217
|
+
...request.parameters,
|
|
218
|
+
temperature: 0.7,
|
|
219
|
+
maxTokens: 100,
|
|
220
|
+
topP: undefined,
|
|
221
|
+
},
|
|
222
|
+
});
|
|
223
|
+
// Try removing content from messages
|
|
224
|
+
for (let i = 0; i < request.messages.length; i++) {
|
|
225
|
+
const message = request.messages[i];
|
|
226
|
+
if (Array.isArray(message.content) && message.content.length > 1) {
|
|
227
|
+
const newMessages = [...request.messages];
|
|
228
|
+
newMessages[i] = {
|
|
229
|
+
...message,
|
|
230
|
+
content: [message.content[0]],
|
|
231
|
+
};
|
|
232
|
+
shrunk.push({
|
|
233
|
+
...request,
|
|
234
|
+
messages: newMessages,
|
|
235
|
+
});
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
return shrunk;
|
|
239
|
+
}
|
|
240
|
+
/**
|
|
241
|
+
* Example property tests
|
|
242
|
+
*/
|
|
243
|
+
/**
|
|
244
|
+
* Property: All generated requests should be valid
|
|
245
|
+
*/
|
|
246
|
+
async function propertyValidRequest() {
|
|
247
|
+
await forAll((random) => generateChatRequest(random), (request) => {
|
|
248
|
+
// Request should have messages
|
|
249
|
+
if (request.messages.length === 0) {
|
|
250
|
+
throw new Error('Request must have at least one message');
|
|
251
|
+
}
|
|
252
|
+
// Last message should be from user
|
|
253
|
+
const lastMessage = request.messages[request.messages.length - 1];
|
|
254
|
+
if (lastMessage?.role !== 'user') {
|
|
255
|
+
throw new Error('Last message must be from user');
|
|
256
|
+
}
|
|
257
|
+
// Parameters should be in valid ranges
|
|
258
|
+
if (request.parameters?.temperature !== undefined) {
|
|
259
|
+
if (request.parameters.temperature < 0 || request.parameters.temperature > 2) {
|
|
260
|
+
throw new Error('Temperature must be between 0 and 2');
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
if (request.parameters?.maxTokens !== undefined) {
|
|
264
|
+
if (request.parameters.maxTokens < 1) {
|
|
265
|
+
throw new Error('maxTokens must be positive');
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
}, { runs: 100 });
|
|
269
|
+
}
|
|
270
|
+
/**
|
|
271
|
+
* Property: Multi-turn conversations should alternate user/assistant
|
|
272
|
+
*/
|
|
273
|
+
async function propertyMultiTurnAlternates() {
|
|
274
|
+
await forAll((random) => generateChatRequest(random, { multiTurn: true }), (request) => {
|
|
275
|
+
const nonSystemMessages = request.messages.filter((m) => m.role !== 'system');
|
|
276
|
+
for (let i = 0; i < nonSystemMessages.length - 1; i++) {
|
|
277
|
+
const current = nonSystemMessages[i];
|
|
278
|
+
const next = nonSystemMessages[i + 1];
|
|
279
|
+
if (current.role === 'user' && next.role !== 'assistant') {
|
|
280
|
+
throw new Error('User message must be followed by assistant message');
|
|
281
|
+
}
|
|
282
|
+
if (current.role === 'assistant' && next.role !== 'user') {
|
|
283
|
+
throw new Error('Assistant message must be followed by user message');
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
}, { runs: 100 });
|
|
287
|
+
}
|
|
288
|
+
//# sourceMappingURL=property-testing.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"property-testing.js","sourceRoot":"","sources":["../../src/property-testing.ts"],"names":[],"mappings":";AAAA;;GAEG;;;AAkDH,kDAkBC;AAKD,kDAYC;AAKD,4DAKC;AAKD,sDAkBC;AAKD,gDAgBC;AAKD,kDAyCC;AAKD,wBAsCC;AAKD,8CAuCC;AASD,oDA8BC;AAKD,kEAoBC;AA5UD;;GAEG;AACH,MAAa,YAAY;IACf,IAAI,CAAS;IAErB,YAAY,OAAe,IAAI,CAAC,GAAG,EAAE;QACnC,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;IACnB,CAAC;IAED;;OAEG;IACH,IAAI;QACF,IAAI,CAAC,IAAI,GAAG,CAAC,IAAI,CAAC,IAAI,GAAG,IAAI,GAAG,KAAK,CAAC,GAAG,MAAM,CAAC;QAChD,OAAO,IAAI,CAAC,IAAI,GAAG,MAAM,CAAC;IAC5B,CAAC;IAED;;OAEG;IACH,OAAO,CAAC,GAAW,EAAE,GAAW;QAC9B,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,EAAE,GAAG,CAAC,GAAG,GAAG,GAAG,GAAG,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC;IACzD,CAAC;IAED;;OAEG;IACH,IAAI,CAAI,KAAmB;QACzB,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACvB,MAAM,IAAI,KAAK,CAAC,8BAA8B,CAAC,CAAC;QAClD,CAAC;QACD,OAAO,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,EAAE,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAE,CAAC;IACnD,CAAC;IAED;;OAEG;IACH,QAAQ,CAAC,cAAsB,GAAG;QAChC,OAAO,IAAI,CAAC,IAAI,EAAE,GAAG,WAAW,CAAC;IACnC,CAAC;CACF;AAtCD,oCAsCC;AAED;;GAEG;AACH,SAAgB,mBAAmB,CAAC,MAAoB;IACtD,MAAM,KAAK,GAAG;QACZ,qBAAqB;QACrB,4BAA4B;QAC5B,2BAA2B;QAC3B,iCAAiC;QACjC,gBAAgB;QAChB,gBAAgB;QAChB,2BAA2B;QAC3B,0BAA0B;QAC1B,yBAAyB;QACzB,wBAAwB;KACzB,CAAC;IAEF,OAAO;QACL,IAAI,EAAE,MAAM;QACZ,IAAI,EAAE,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC;KACzB,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAgB,mBAAmB,CAAC,MAAoB;IACtD,MAAM,YAAY,GAAG,MAAM,CAAC,OAAO,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;IAC1C,MAAM,OAAO,GAAqB,EAAE,CAAC;IAErC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,YAAY,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,OAAO,CAAC,IAAI,CAAC,mBAAmB,CAAC,MAAM,CAAC,CAAC,CAAC;IAC5C,CAAC;IAED,OAAO;QACL,IAAI,EAAE,MAAM;QACZ,OAAO;KACR,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAgB,wBAAwB,CAAC,MAAoB;IAC3D,OAAO;QACL,IAAI,EAAE,WAAW;QACjB,OAAO,EAAE,CAAC,mBAAmB,CAAC,MAAM,CAAC,CAAC;KACvC,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAgB,qBAAqB,CAAC,MAAoB;IACxD,MAAM,aAAa,GAAG;QACpB,6BAA6B;QAC7B,8BAA8B;QAC9B,yBAAyB;QACzB,mBAAmB;QACnB,6BAA6B;KAC9B,CAAC;IAEF,OAAO;QACL,IAAI,EAAE,QAAQ;QACd,OAAO,EAAE;YACP;gBACE,IAAI,EAAE,MAAM;gBACZ,IAAI,EAAE,MAAM,CAAC,IAAI,CAAC,aAAa,CAAC;aACjC;SACF;KACF,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAgB,kBAAkB,CAAC,MAAoB;IACrD,MAAM,MAAM,GAAG;QACb,OAAO;QACP,eAAe;QACf,4BAA4B;QAC5B,yBAAyB;QACzB,gBAAgB;QAChB,aAAa;KACd,CAAC;IAEF,OAAO;QACL,KAAK,EAAE,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC;QAC1B,WAAW,EAAE,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,IAAI,EAAE,GAAG,EAAE,CAAC,GAAG,EAAE,EAAE,gCAAgC;QAClF,SAAS,EAAE,MAAM,CAAC,OAAO,CAAC,EAAE,EAAE,GAAG,CAAC;QAClC,IAAI,EAAE,MAAM,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,IAAI,EAAE,GAAG,EAAE,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC,SAAS;KAC1E,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAgB,mBAAmB,CACjC,MAAoB,EACpB,OAKC;IAED,MAAM,QAAQ,GAAgB,EAAE,CAAC;IAEjC,kCAAkC;IAClC,IAAI,OAAO,EAAE,aAAa,IAAI,MAAM,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;QACnD,QAAQ,CAAC,IAAI,CAAC,qBAAqB,CAAC,MAAM,CAAC,CAAC,CAAC;IAC/C,CAAC;IAED,yBAAyB;IACzB,MAAM,WAAW,GAAG,OAAO,EAAE,WAAW,IAAI,CAAC,CAAC;IAC9C,MAAM,WAAW,GAAG,OAAO,EAAE,WAAW,IAAI,CAAC,CAAC;IAC9C,MAAM,YAAY,GAAG,MAAM,CAAC,OAAO,CAAC,WAAW,EAAE,WAAW,CAAC,CAAC;IAE9D,IAAI,OAAO,EAAE,SAAS,EAAE,CAAC;QACvB,0BAA0B;QAC1B,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,GAAG,CAAC,CAAC,CAAC;QAC3C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,EAAE,CAAC,EAAE,EAAE,CAAC;YAC/B,QAAQ,CAAC,IAAI,CAAC,mBAAmB,CAAC,MAAM,CAAC,CAAC,CAAC;YAC3C,QAAQ,CAAC,IAAI,CAAC,wBAAwB,CAAC,MAAM,CAAC,CAAC,CAAC;QAClD,CAAC;IACH,CAAC;IAED,+BAA+B;IAC/B,QAAQ,CAAC,IAAI,CAAC,mBAAmB,CAAC,MAAM,CAAC,CAAC,CAAC;IAE3C,OAAO;QACL,QAAQ;QACR,UAAU,EAAE,kBAAkB,CAAC,MAAM,CAAC;QACtC,QAAQ,EAAE;YACR,SAAS,EAAE,OAAO,MAAM,CAAC,OAAO,CAAC,IAAI,EAAE,IAAI,CAAC,EAAE;YAC9C,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;SACtB;KACF,CAAC;AACJ,CAAC;AAED;;GAEG;AACI,KAAK,UAAU,MAAM,CAC1B,SAAsC,EACtC,IAAwC,EACxC,OAGC;IAED,MAAM,IAAI,GAAG,OAAO,EAAE,IAAI,IAAI,GAAG,CAAC;IAClC,MAAM,IAAI,GAAG,OAAO,EAAE,IAAI,IAAI,IAAI,CAAC,GAAG,EAAE,CAAC;IACzC,MAAM,MAAM,GAAG,IAAI,YAAY,CAAC,IAAI,CAAC,CAAC;IAEtC,MAAM,QAAQ,GAAmD,EAAE,CAAC;IAEpE,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,EAAE,CAAC,EAAE,EAAE,CAAC;QAC9B,MAAM,KAAK,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC;QAEhC,IAAI,CAAC;YACH,MAAM,IAAI,CAAC,KAAK,CAAC,CAAC;QACpB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,QAAQ,CAAC,IAAI,CAAC;gBACZ,GAAG,EAAE,CAAC;gBACN,KAAK;gBACL,KAAK,EAAE,KAAc;aACtB,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACxB,MAAM,YAAY,GAAG,QAAQ,CAAC,CAAC,CAAE,CAAC;QAClC,MAAM,IAAI,KAAK,CACb,+BAA+B,YAAY,CAAC,GAAG,GAAG,CAAC,IAAI,IAAI,IAAI;YAC7D,SAAS,IAAI,IAAI;YACjB,UAAU,IAAI,CAAC,SAAS,CAAC,YAAY,CAAC,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI;YACzD,UAAU,YAAY,CAAC,KAAK,CAAC,OAAO,IAAI;YACxC,mBAAmB,QAAQ,CAAC,MAAM,IAAI,IAAI,EAAE,CAC/C,CAAC;IACJ,CAAC;AACH,CAAC;AAED;;GAEG;AACH,SAAgB,iBAAiB,CAAC,OAAsB;IACtD,MAAM,MAAM,GAAoB,EAAE,CAAC;IAEnC,wBAAwB;IACxB,IAAI,OAAO,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAChC,MAAM,CAAC,IAAI,CAAC;YACV,GAAG,OAAO;YACV,QAAQ,EAAE,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;SACxC,CAAC,CAAC;IACL,CAAC;IAED,6BAA6B;IAC7B,MAAM,CAAC,IAAI,CAAC;QACV,GAAG,OAAO;QACV,UAAU,EAAE;YACV,GAAG,OAAO,CAAC,UAAU;YACrB,WAAW,EAAE,GAAG;YAChB,SAAS,EAAE,GAAG;YACd,IAAI,EAAE,SAAS;SAChB;KACF,CAAC,CAAC;IAEH,qCAAqC;IACrC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACjD,MAAM,OAAO,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAE,CAAC;QACrC,IAAI,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,OAAO,CAAC,IAAI,OAAO,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACjE,MAAM,WAAW,GAAG,CAAC,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC;YAC1C,WAAW,CAAC,CAAC,CAAC,GAAG;gBACf,GAAG,OAAO;gBACV,OAAO,EAAE,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAE,CAAC;aAC/B,CAAC;YACF,MAAM,CAAC,IAAI,CAAC;gBACV,GAAG,OAAO;gBACV,QAAQ,EAAE,WAAW;aACtB,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AAEH;;GAEG;AACI,KAAK,UAAU,oBAAoB;IACxC,MAAM,MAAM,CACV,CAAC,MAAM,EAAE,EAAE,CAAC,mBAAmB,CAAC,MAAM,CAAC,EACvC,CAAC,OAAO,EAAE,EAAE;QACV,+BAA+B;QAC/B,IAAI,OAAO,CAAC,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAClC,MAAM,IAAI,KAAK,CAAC,wCAAwC,CAAC,CAAC;QAC5D,CAAC;QAED,mCAAmC;QACnC,MAAM,WAAW,GAAG,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAClE,IAAI,WAAW,EAAE,IAAI,KAAK,MAAM,EAAE,CAAC;YACjC,MAAM,IAAI,KAAK,CAAC,gCAAgC,CAAC,CAAC;QACpD,CAAC;QAED,uCAAuC;QACvC,IAAI,OAAO,CAAC,UAAU,EAAE,WAAW,KAAK,SAAS,EAAE,CAAC;YAClD,IAAI,OAAO,CAAC,UAAU,CAAC,WAAW,GAAG,CAAC,IAAI,OAAO,CAAC,UAAU,CAAC,WAAW,GAAG,CAAC,EAAE,CAAC;gBAC7E,MAAM,IAAI,KAAK,CAAC,qCAAqC,CAAC,CAAC;YACzD,CAAC;QACH,CAAC;QAED,IAAI,OAAO,CAAC,UAAU,EAAE,SAAS,KAAK,SAAS,EAAE,CAAC;YAChD,IAAI,OAAO,CAAC,UAAU,CAAC,SAAS,GAAG,CAAC,EAAE,CAAC;gBACrC,MAAM,IAAI,KAAK,CAAC,4BAA4B,CAAC,CAAC;YAChD,CAAC;QACH,CAAC;IACH,CAAC,EACD,EAAE,IAAI,EAAE,GAAG,EAAE,CACd,CAAC;AACJ,CAAC;AAED;;GAEG;AACI,KAAK,UAAU,2BAA2B;IAC/C,MAAM,MAAM,CACV,CAAC,MAAM,EAAE,EAAE,CAAC,mBAAmB,CAAC,MAAM,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,EAC5D,CAAC,OAAO,EAAE,EAAE;QACV,MAAM,iBAAiB,GAAG,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC;QAE9E,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,iBAAiB,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YACtD,MAAM,OAAO,GAAG,iBAAiB,CAAC,CAAC,CAAE,CAAC;YACtC,MAAM,IAAI,GAAG,iBAAiB,CAAC,CAAC,GAAG,CAAC,CAAE,CAAC;YAEvC,IAAI,OAAO,CAAC,IAAI,KAAK,MAAM,IAAI,IAAI,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;gBACzD,MAAM,IAAI,KAAK,CAAC,oDAAoD,CAAC,CAAC;YACxE,CAAC;YACD,IAAI,OAAO,CAAC,IAAI,KAAK,WAAW,IAAI,IAAI,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;gBACzD,MAAM,IAAI,KAAK,CAAC,oDAAoD,CAAC,CAAC;YACxE,CAAC;QACH,CAAC;IACH,CAAC,EACD,EAAE,IAAI,EAAE,GAAG,EAAE,CACd,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,289 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Test helper utilities - assertions and test builders
|
|
4
|
+
*/
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.assertValidChatResponse = assertValidChatResponse;
|
|
7
|
+
exports.assertValidStreamChunk = assertValidStreamChunk;
|
|
8
|
+
exports.assertValidChatRequest = assertValidChatRequest;
|
|
9
|
+
exports.assertValidMessage = assertValidMessage;
|
|
10
|
+
exports.assertValidMessageContent = assertValidMessageContent;
|
|
11
|
+
exports.assertResponseHasText = assertResponseHasText;
|
|
12
|
+
exports.assertResponseHasToolUse = assertResponseHasToolUse;
|
|
13
|
+
exports.assertValidStreamSequence = assertValidStreamSequence;
|
|
14
|
+
exports.buildChatRequest = buildChatRequest;
|
|
15
|
+
exports.buildMultiTurnRequest = buildMultiTurnRequest;
|
|
16
|
+
exports.extractTextFromResponse = extractTextFromResponse;
|
|
17
|
+
exports.extractToolUsesFromResponse = extractToolUsesFromResponse;
|
|
18
|
+
exports.accumulateStreamText = accumulateStreamText;
|
|
19
|
+
exports.estimateTokens = estimateTokens;
|
|
20
|
+
exports.assertReasonableUsage = assertReasonableUsage;
|
|
21
|
+
const vitest_1 = require("vitest");
|
|
22
|
+
/**
|
|
23
|
+
* Assert that a response has the correct structure
|
|
24
|
+
*/
|
|
25
|
+
function assertValidChatResponse(response) {
|
|
26
|
+
(0, vitest_1.expect)(response).toBeDefined();
|
|
27
|
+
(0, vitest_1.expect)(response.message).toBeDefined();
|
|
28
|
+
(0, vitest_1.expect)(response.message.role).toBe('assistant');
|
|
29
|
+
(0, vitest_1.expect)(response.message.content).toBeDefined();
|
|
30
|
+
(0, vitest_1.expect)(Array.isArray(response.message.content)).toBe(true);
|
|
31
|
+
(0, vitest_1.expect)(response.message.content.length).toBeGreaterThan(0);
|
|
32
|
+
(0, vitest_1.expect)(response.usage).toBeDefined();
|
|
33
|
+
if (response.usage) {
|
|
34
|
+
(0, vitest_1.expect)(response.usage.promptTokens).toBeGreaterThanOrEqual(0);
|
|
35
|
+
(0, vitest_1.expect)(response.usage.completionTokens).toBeGreaterThanOrEqual(0);
|
|
36
|
+
(0, vitest_1.expect)(response.usage.totalTokens).toBeGreaterThanOrEqual(0);
|
|
37
|
+
}
|
|
38
|
+
(0, vitest_1.expect)(response.finishReason).toBeDefined();
|
|
39
|
+
(0, vitest_1.expect)(['stop', 'length', 'tool_use', 'content_filter']).toContain(response.finishReason);
|
|
40
|
+
(0, vitest_1.expect)(response.metadata).toBeDefined();
|
|
41
|
+
(0, vitest_1.expect)(response.metadata.requestId).toBeDefined();
|
|
42
|
+
(0, vitest_1.expect)(response.metadata.timestamp).toBeDefined();
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Assert that a stream chunk has the correct structure
|
|
46
|
+
*/
|
|
47
|
+
function assertValidStreamChunk(chunk) {
|
|
48
|
+
(0, vitest_1.expect)(chunk).toBeDefined();
|
|
49
|
+
(0, vitest_1.expect)(chunk.type).toBeDefined();
|
|
50
|
+
(0, vitest_1.expect)(['start', 'content', 'tool_use', 'error', 'metadata', 'done']).toContain(chunk.type);
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Assert that a request has the correct structure
|
|
54
|
+
*/
|
|
55
|
+
function assertValidChatRequest(request) {
|
|
56
|
+
(0, vitest_1.expect)(request).toBeDefined();
|
|
57
|
+
(0, vitest_1.expect)(request.messages).toBeDefined();
|
|
58
|
+
(0, vitest_1.expect)(Array.isArray(request.messages)).toBe(true);
|
|
59
|
+
(0, vitest_1.expect)(request.messages.length).toBeGreaterThan(0);
|
|
60
|
+
// Validate each message
|
|
61
|
+
for (const message of request.messages) {
|
|
62
|
+
assertValidMessage(message);
|
|
63
|
+
}
|
|
64
|
+
(0, vitest_1.expect)(request.parameters).toBeDefined();
|
|
65
|
+
(0, vitest_1.expect)(request.metadata).toBeDefined();
|
|
66
|
+
(0, vitest_1.expect)(request.metadata.requestId).toBeDefined();
|
|
67
|
+
(0, vitest_1.expect)(request.metadata.timestamp).toBeDefined();
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Assert that a message has the correct structure
|
|
71
|
+
*/
|
|
72
|
+
function assertValidMessage(message) {
|
|
73
|
+
(0, vitest_1.expect)(message).toBeDefined();
|
|
74
|
+
(0, vitest_1.expect)(message.role).toBeDefined();
|
|
75
|
+
(0, vitest_1.expect)(['user', 'assistant', 'system', 'tool']).toContain(message.role);
|
|
76
|
+
(0, vitest_1.expect)(message.content).toBeDefined();
|
|
77
|
+
if (Array.isArray(message.content)) {
|
|
78
|
+
(0, vitest_1.expect)(message.content.length).toBeGreaterThan(0);
|
|
79
|
+
for (const content of message.content) {
|
|
80
|
+
assertValidMessageContent(content);
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* Assert that message content has the correct structure
|
|
86
|
+
*/
|
|
87
|
+
function assertValidMessageContent(content) {
|
|
88
|
+
(0, vitest_1.expect)(content).toBeDefined();
|
|
89
|
+
(0, vitest_1.expect)(content.type).toBeDefined();
|
|
90
|
+
switch (content.type) {
|
|
91
|
+
case 'text':
|
|
92
|
+
(0, vitest_1.expect)(content.text).toBeDefined();
|
|
93
|
+
(0, vitest_1.expect)(typeof content.text).toBe('string');
|
|
94
|
+
break;
|
|
95
|
+
case 'image':
|
|
96
|
+
(0, vitest_1.expect)(content.source).toBeDefined();
|
|
97
|
+
(0, vitest_1.expect)(content.source.type).toBeDefined();
|
|
98
|
+
break;
|
|
99
|
+
case 'tool_use':
|
|
100
|
+
(0, vitest_1.expect)(content.id).toBeDefined();
|
|
101
|
+
(0, vitest_1.expect)(content.name).toBeDefined();
|
|
102
|
+
(0, vitest_1.expect)(content.input).toBeDefined();
|
|
103
|
+
break;
|
|
104
|
+
case 'tool_result':
|
|
105
|
+
(0, vitest_1.expect)(content.toolUseId).toBeDefined();
|
|
106
|
+
(0, vitest_1.expect)(content.content).toBeDefined();
|
|
107
|
+
break;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
/**
|
|
111
|
+
* Assert that response contains text
|
|
112
|
+
*/
|
|
113
|
+
function assertResponseHasText(response) {
|
|
114
|
+
if (!Array.isArray(response.message.content)) {
|
|
115
|
+
throw new Error('Response content must be an array');
|
|
116
|
+
}
|
|
117
|
+
const textContent = response.message.content.find((c) => c.type === 'text');
|
|
118
|
+
(0, vitest_1.expect)(textContent).toBeDefined();
|
|
119
|
+
if (textContent?.type === 'text') {
|
|
120
|
+
(0, vitest_1.expect)(textContent.text).toBeDefined();
|
|
121
|
+
(0, vitest_1.expect)(textContent.text.length).toBeGreaterThan(0);
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
/**
|
|
125
|
+
* Assert that response contains tool use
|
|
126
|
+
*/
|
|
127
|
+
function assertResponseHasToolUse(response) {
|
|
128
|
+
if (!Array.isArray(response.message.content)) {
|
|
129
|
+
throw new Error('Response content must be an array');
|
|
130
|
+
}
|
|
131
|
+
const toolUse = response.message.content.find((c) => c.type === 'tool_use');
|
|
132
|
+
(0, vitest_1.expect)(toolUse).toBeDefined();
|
|
133
|
+
if (toolUse?.type === 'tool_use') {
|
|
134
|
+
(0, vitest_1.expect)(toolUse.id).toBeDefined();
|
|
135
|
+
(0, vitest_1.expect)(toolUse.name).toBeDefined();
|
|
136
|
+
(0, vitest_1.expect)(toolUse.input).toBeDefined();
|
|
137
|
+
}
|
|
138
|
+
(0, vitest_1.expect)(response.finishReason).toBe('tool_use');
|
|
139
|
+
}
|
|
140
|
+
/**
|
|
141
|
+
* Assert that streaming produces expected sequence
|
|
142
|
+
*/
|
|
143
|
+
async function assertValidStreamSequence(stream) {
|
|
144
|
+
const chunks = [];
|
|
145
|
+
for await (const chunk of stream) {
|
|
146
|
+
assertValidStreamChunk(chunk);
|
|
147
|
+
chunks.push(chunk);
|
|
148
|
+
}
|
|
149
|
+
(0, vitest_1.expect)(chunks.length).toBeGreaterThan(0);
|
|
150
|
+
// First chunk should be 'start'
|
|
151
|
+
const firstChunk = chunks[0];
|
|
152
|
+
if (firstChunk) {
|
|
153
|
+
(0, vitest_1.expect)(firstChunk.type).toBe('start');
|
|
154
|
+
}
|
|
155
|
+
// Last chunk should be 'done'
|
|
156
|
+
const lastChunk = chunks[chunks.length - 1];
|
|
157
|
+
if (lastChunk) {
|
|
158
|
+
(0, vitest_1.expect)(lastChunk.type).toBe('done');
|
|
159
|
+
}
|
|
160
|
+
return chunks;
|
|
161
|
+
}
|
|
162
|
+
/**
|
|
163
|
+
* Build a simple chat request for testing
|
|
164
|
+
*/
|
|
165
|
+
function buildChatRequest(userMessage, options) {
|
|
166
|
+
const messages = [];
|
|
167
|
+
if (options?.systemMessage) {
|
|
168
|
+
messages.push({
|
|
169
|
+
role: 'system',
|
|
170
|
+
content: [{ type: 'text', text: options.systemMessage }],
|
|
171
|
+
});
|
|
172
|
+
}
|
|
173
|
+
messages.push({
|
|
174
|
+
role: 'user',
|
|
175
|
+
content: [{ type: 'text', text: userMessage }],
|
|
176
|
+
});
|
|
177
|
+
return {
|
|
178
|
+
messages,
|
|
179
|
+
parameters: {
|
|
180
|
+
model: options?.model || 'test-model',
|
|
181
|
+
temperature: options?.temperature ?? 0.7,
|
|
182
|
+
maxTokens: options?.maxTokens ?? 100,
|
|
183
|
+
},
|
|
184
|
+
metadata: {
|
|
185
|
+
requestId: `test-${Date.now()}`,
|
|
186
|
+
timestamp: Date.now(),
|
|
187
|
+
},
|
|
188
|
+
};
|
|
189
|
+
}
|
|
190
|
+
/**
|
|
191
|
+
* Build a multi-turn chat request for testing
|
|
192
|
+
*/
|
|
193
|
+
function buildMultiTurnRequest(exchanges, finalUserMessage, options) {
|
|
194
|
+
const messages = [];
|
|
195
|
+
for (const exchange of exchanges) {
|
|
196
|
+
messages.push({
|
|
197
|
+
role: 'user',
|
|
198
|
+
content: [{ type: 'text', text: exchange.user }],
|
|
199
|
+
});
|
|
200
|
+
messages.push({
|
|
201
|
+
role: 'assistant',
|
|
202
|
+
content: [{ type: 'text', text: exchange.assistant }],
|
|
203
|
+
});
|
|
204
|
+
}
|
|
205
|
+
messages.push({
|
|
206
|
+
role: 'user',
|
|
207
|
+
content: [{ type: 'text', text: finalUserMessage }],
|
|
208
|
+
});
|
|
209
|
+
return {
|
|
210
|
+
messages,
|
|
211
|
+
parameters: {
|
|
212
|
+
model: options?.model || 'test-model',
|
|
213
|
+
temperature: options?.temperature ?? 0.7,
|
|
214
|
+
maxTokens: options?.maxTokens ?? 100,
|
|
215
|
+
},
|
|
216
|
+
metadata: {
|
|
217
|
+
requestId: `test-${Date.now()}`,
|
|
218
|
+
timestamp: Date.now(),
|
|
219
|
+
},
|
|
220
|
+
};
|
|
221
|
+
}
|
|
222
|
+
/**
|
|
223
|
+
* Extract text from response
|
|
224
|
+
*/
|
|
225
|
+
function extractTextFromResponse(response) {
|
|
226
|
+
if (!Array.isArray(response.message.content)) {
|
|
227
|
+
return '';
|
|
228
|
+
}
|
|
229
|
+
const textContent = response.message.content.filter((c) => c.type === 'text');
|
|
230
|
+
return textContent.map((c) => (c.type === 'text' ? c.text : '')).join('');
|
|
231
|
+
}
|
|
232
|
+
/**
|
|
233
|
+
* Extract tool uses from response
|
|
234
|
+
*/
|
|
235
|
+
function extractToolUsesFromResponse(response) {
|
|
236
|
+
if (!Array.isArray(response.message.content)) {
|
|
237
|
+
return [];
|
|
238
|
+
}
|
|
239
|
+
return response.message.content
|
|
240
|
+
.filter((c) => c.type === 'tool_use')
|
|
241
|
+
.map((c) => {
|
|
242
|
+
if (c.type === 'tool_use') {
|
|
243
|
+
return {
|
|
244
|
+
id: c.id,
|
|
245
|
+
name: c.name,
|
|
246
|
+
input: c.input,
|
|
247
|
+
};
|
|
248
|
+
}
|
|
249
|
+
return null;
|
|
250
|
+
})
|
|
251
|
+
.filter(Boolean);
|
|
252
|
+
}
|
|
253
|
+
/**
|
|
254
|
+
* Accumulate text from stream chunks
|
|
255
|
+
*/
|
|
256
|
+
async function accumulateStreamText(stream) {
|
|
257
|
+
let text = '';
|
|
258
|
+
for await (const chunk of stream) {
|
|
259
|
+
if (chunk.type === 'content' && chunk.delta) {
|
|
260
|
+
text += chunk.delta;
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
return text;
|
|
264
|
+
}
|
|
265
|
+
/**
|
|
266
|
+
* Count tokens in text (rough estimate)
|
|
267
|
+
*/
|
|
268
|
+
function estimateTokens(text) {
|
|
269
|
+
// Rough estimate: ~4 characters per token
|
|
270
|
+
return Math.ceil(text.length / 4);
|
|
271
|
+
}
|
|
272
|
+
/**
|
|
273
|
+
* Assert that usage statistics are reasonable
|
|
274
|
+
*/
|
|
275
|
+
function assertReasonableUsage(response) {
|
|
276
|
+
const { usage } = response;
|
|
277
|
+
if (!usage) {
|
|
278
|
+
throw new Error('Response must have usage statistics');
|
|
279
|
+
}
|
|
280
|
+
// Tokens should be positive
|
|
281
|
+
(0, vitest_1.expect)(usage.promptTokens).toBeGreaterThan(0);
|
|
282
|
+
(0, vitest_1.expect)(usage.completionTokens).toBeGreaterThan(0);
|
|
283
|
+
(0, vitest_1.expect)(usage.totalTokens).toBeGreaterThan(0);
|
|
284
|
+
// Total should equal sum
|
|
285
|
+
(0, vitest_1.expect)(usage.totalTokens).toBe(usage.promptTokens + usage.completionTokens);
|
|
286
|
+
// Token counts should be reasonable (not absurdly high)
|
|
287
|
+
(0, vitest_1.expect)(usage.totalTokens).toBeLessThan(1000000);
|
|
288
|
+
}
|
|
289
|
+
//# sourceMappingURL=test-helpers.js.map
|