@heilgar/pest-core 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/dist/chunk-T7HWWH52.js +49 -0
- package/dist/index.d.ts +467 -0
- package/dist/index.js +1393 -0
- package/dist/send-P3XIZERN.js +8 -0
- package/package.json +53 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 pest contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
// src/send.ts
|
|
2
|
+
var sendHooks = [];
|
|
3
|
+
function onSend(hook) {
|
|
4
|
+
sendHooks.push(hook);
|
|
5
|
+
return () => {
|
|
6
|
+
const idx = sendHooks.indexOf(hook);
|
|
7
|
+
if (idx >= 0) sendHooks.splice(idx, 1);
|
|
8
|
+
};
|
|
9
|
+
}
|
|
10
|
+
async function send(provider, message, options) {
|
|
11
|
+
const start = performance.now();
|
|
12
|
+
const response = await provider.call({
|
|
13
|
+
systemPrompt: options?.systemPrompt,
|
|
14
|
+
messages: [{ role: "user", content: message }],
|
|
15
|
+
tools: options?.tools,
|
|
16
|
+
temperature: options?.temperature,
|
|
17
|
+
maxTokens: options?.maxTokens,
|
|
18
|
+
responseFormat: options?.responseFormat
|
|
19
|
+
});
|
|
20
|
+
const latencyMs = performance.now() - start;
|
|
21
|
+
const result = {
|
|
22
|
+
...response,
|
|
23
|
+
latencyMs,
|
|
24
|
+
provider: provider.name,
|
|
25
|
+
model: provider.model
|
|
26
|
+
};
|
|
27
|
+
if (sendHooks.length > 0) {
|
|
28
|
+
const entry = {
|
|
29
|
+
input: message,
|
|
30
|
+
output: response.text,
|
|
31
|
+
systemPrompt: options?.systemPrompt,
|
|
32
|
+
provider: provider.name,
|
|
33
|
+
model: provider.model,
|
|
34
|
+
latencyMs,
|
|
35
|
+
usage: { ...response.usage },
|
|
36
|
+
toolCalls: response.toolCalls,
|
|
37
|
+
timestamp: Date.now()
|
|
38
|
+
};
|
|
39
|
+
for (const hook of sendHooks) {
|
|
40
|
+
hook(entry);
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
return result;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
export {
|
|
47
|
+
onSend,
|
|
48
|
+
send
|
|
49
|
+
};
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,467 @@
|
|
|
1
|
+
import * as v from 'valibot';
|
|
2
|
+
import { z } from 'zod';
|
|
3
|
+
|
|
4
|
+
declare const ProviderConfigSchema: v.ObjectSchema<{
|
|
5
|
+
readonly name: v.StringSchema<undefined>;
|
|
6
|
+
readonly type: v.PicklistSchema<["openai", "anthropic", "gemini", "xai", "ollama"], undefined>;
|
|
7
|
+
readonly model: v.StringSchema<undefined>;
|
|
8
|
+
readonly apiKey: v.OptionalSchema<v.StringSchema<undefined>, undefined>;
|
|
9
|
+
readonly baseUrl: v.OptionalSchema<v.StringSchema<undefined>, undefined>;
|
|
10
|
+
readonly temperature: v.OptionalSchema<v.NumberSchema<undefined>, undefined>;
|
|
11
|
+
}, undefined>;
|
|
12
|
+
declare const PestConfigSchema: v.ObjectSchema<{
|
|
13
|
+
readonly providers: v.SchemaWithPipe<readonly [v.ArraySchema<v.ObjectSchema<{
|
|
14
|
+
readonly name: v.StringSchema<undefined>;
|
|
15
|
+
readonly type: v.PicklistSchema<["openai", "anthropic", "gemini", "xai", "ollama"], undefined>;
|
|
16
|
+
readonly model: v.StringSchema<undefined>;
|
|
17
|
+
readonly apiKey: v.OptionalSchema<v.StringSchema<undefined>, undefined>;
|
|
18
|
+
readonly baseUrl: v.OptionalSchema<v.StringSchema<undefined>, undefined>;
|
|
19
|
+
readonly temperature: v.OptionalSchema<v.NumberSchema<undefined>, undefined>;
|
|
20
|
+
}, undefined>, undefined>, v.MinLengthAction<{
|
|
21
|
+
name: string;
|
|
22
|
+
type: "openai" | "anthropic" | "gemini" | "xai" | "ollama";
|
|
23
|
+
model: string;
|
|
24
|
+
apiKey?: string | undefined;
|
|
25
|
+
baseUrl?: string | undefined;
|
|
26
|
+
temperature?: number | undefined;
|
|
27
|
+
}[], 1, undefined>]>;
|
|
28
|
+
readonly judge: v.OptionalSchema<v.ObjectSchema<{
|
|
29
|
+
readonly provider: v.StringSchema<undefined>;
|
|
30
|
+
}, undefined>, undefined>;
|
|
31
|
+
readonly pricing: v.OptionalSchema<v.RecordSchema<v.StringSchema<undefined>, v.ObjectSchema<{
|
|
32
|
+
readonly inputCentsPer1M: v.NumberSchema<undefined>;
|
|
33
|
+
readonly outputCentsPer1M: v.NumberSchema<undefined>;
|
|
34
|
+
}, undefined>, undefined>, undefined>;
|
|
35
|
+
}, undefined>;
|
|
36
|
+
type PestConfig = v.InferOutput<typeof PestConfigSchema>;
|
|
37
|
+
type ProviderConfig = v.InferOutput<typeof ProviderConfigSchema>;
|
|
38
|
+
|
|
39
|
+
/** Reset env-loaded flag so loadEnv() can be called again. For testing only. */
|
|
40
|
+
declare function resetEnv(): void;
|
|
41
|
+
/**
|
|
42
|
+
* Load environment variables from .env files into process.env.
|
|
43
|
+
*
|
|
44
|
+
* Files loaded (lowest to highest priority):
|
|
45
|
+
* - `.env` — shared defaults, may be committed
|
|
46
|
+
* - `.env.local` — local overrides, should be gitignored
|
|
47
|
+
*
|
|
48
|
+
* Real environment variables are never overwritten.
|
|
49
|
+
* Files are loaded from the project root (where pest.config.ts or package.json is).
|
|
50
|
+
*
|
|
51
|
+
* This is called automatically by `loadConfig()` and `createProvider()`.
|
|
52
|
+
* Safe to call multiple times — only loads once.
|
|
53
|
+
*/
|
|
54
|
+
declare function loadEnv(cwd?: string): void;
|
|
55
|
+
declare function loadConfig(cwd?: string): Promise<PestConfig>;
|
|
56
|
+
declare function defineConfig(config: PestConfig): PestConfig;
|
|
57
|
+
|
|
58
|
+
interface JudgeResult {
|
|
59
|
+
pass: boolean;
|
|
60
|
+
score: number;
|
|
61
|
+
reasoning: string;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
interface ToolDefinition {
|
|
65
|
+
type: 'function';
|
|
66
|
+
function: {
|
|
67
|
+
name: string;
|
|
68
|
+
description?: string;
|
|
69
|
+
parameters?: Record<string, unknown>;
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
interface ToolCall {
|
|
73
|
+
name: string;
|
|
74
|
+
args: Record<string, unknown>;
|
|
75
|
+
/** Provider-assigned ID — required for multi-turn tool result routing */
|
|
76
|
+
id?: string;
|
|
77
|
+
}
|
|
78
|
+
type AgenticMessage = {
|
|
79
|
+
role: 'user';
|
|
80
|
+
content: string;
|
|
81
|
+
} | {
|
|
82
|
+
role: 'assistant';
|
|
83
|
+
content: string;
|
|
84
|
+
toolCalls?: ToolCall[];
|
|
85
|
+
} | {
|
|
86
|
+
role: 'tool';
|
|
87
|
+
toolCallName: string;
|
|
88
|
+
toolCallId: string;
|
|
89
|
+
content: string;
|
|
90
|
+
};
|
|
91
|
+
type ToolExecutor = (name: string, args: Record<string, unknown>) => Promise<unknown> | unknown;
|
|
92
|
+
interface SendAgenticOptions extends SendOptions {
|
|
93
|
+
/** Called for each tool invocation. Defaults to returning '[]' (no-op mock). */
|
|
94
|
+
executor?: ToolExecutor;
|
|
95
|
+
/** Max tool-calling iterations before stopping. Default: 10 */
|
|
96
|
+
maxSteps?: number;
|
|
97
|
+
}
|
|
98
|
+
interface ProviderUsage {
|
|
99
|
+
inputTokens: number;
|
|
100
|
+
outputTokens: number;
|
|
101
|
+
totalTokens: number;
|
|
102
|
+
}
|
|
103
|
+
interface ProviderResponse {
|
|
104
|
+
text: string;
|
|
105
|
+
toolCalls: ToolCall[];
|
|
106
|
+
usage: ProviderUsage;
|
|
107
|
+
raw: unknown;
|
|
108
|
+
}
|
|
109
|
+
interface ProviderRequestOptions {
|
|
110
|
+
systemPrompt?: string;
|
|
111
|
+
messages: AgenticMessage[];
|
|
112
|
+
tools?: ToolDefinition[];
|
|
113
|
+
temperature?: number;
|
|
114
|
+
maxTokens?: number;
|
|
115
|
+
responseFormat?: 'text' | 'json';
|
|
116
|
+
}
|
|
117
|
+
interface Provider {
|
|
118
|
+
name: string;
|
|
119
|
+
model: string;
|
|
120
|
+
call(options: ProviderRequestOptions): Promise<ProviderResponse>;
|
|
121
|
+
}
|
|
122
|
+
interface PestResponse extends ProviderResponse {
|
|
123
|
+
latencyMs: number;
|
|
124
|
+
provider: string;
|
|
125
|
+
model: string;
|
|
126
|
+
}
|
|
127
|
+
interface SendOptions {
|
|
128
|
+
systemPrompt?: string;
|
|
129
|
+
tools?: ToolDefinition[];
|
|
130
|
+
temperature?: number;
|
|
131
|
+
maxTokens?: number;
|
|
132
|
+
responseFormat?: 'text' | 'json';
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
/**
|
|
136
|
+
* Resolve the active provider from PEST_PROVIDER env + pest.config.ts.
|
|
137
|
+
*
|
|
138
|
+
* Used in test files so the CLI can switch providers via env:
|
|
139
|
+
* - CLI sets PEST_PROVIDER=gpt4o, then shells out to vitest
|
|
140
|
+
* - Test file calls useProvider() → gets the gpt4o provider
|
|
141
|
+
* - When running vitest directly (no CLI), falls back to first provider in config
|
|
142
|
+
*/
|
|
143
|
+
declare function useProvider(fallbackName?: string): Promise<Provider>;
|
|
144
|
+
/**
|
|
145
|
+
* Resolve system prompt with PEST_SYSTEM_PROMPT env override.
|
|
146
|
+
*
|
|
147
|
+
* Used in test files so the CLI tune command can swap prompts via env:
|
|
148
|
+
* - CLI sets PEST_SYSTEM_PROMPT=<variant>, then shells out to vitest
|
|
149
|
+
* - Test file calls useSystemPrompt("default...") → gets the variant
|
|
150
|
+
* - When running vitest directly, returns the default
|
|
151
|
+
*/
|
|
152
|
+
declare function useSystemPrompt(defaultPrompt: string): string;
|
|
153
|
+
|
|
154
|
+
declare function setJudge(provider: Provider): void;
|
|
155
|
+
declare function getJudge(): Provider | null;
|
|
156
|
+
declare function resolveJudge(options?: {
|
|
157
|
+
judge?: Provider;
|
|
158
|
+
}): Provider;
|
|
159
|
+
|
|
160
|
+
interface MatcherResult {
|
|
161
|
+
pass: boolean;
|
|
162
|
+
message: string;
|
|
163
|
+
score?: number;
|
|
164
|
+
reasoning?: string;
|
|
165
|
+
metadata?: Record<string, unknown>;
|
|
166
|
+
}
|
|
167
|
+
interface SemanticOptions {
|
|
168
|
+
threshold?: number;
|
|
169
|
+
judge?: Provider;
|
|
170
|
+
}
|
|
171
|
+
interface ClassificationOptions {
|
|
172
|
+
categories?: string[];
|
|
173
|
+
judge?: Provider;
|
|
174
|
+
}
|
|
175
|
+
interface RubricConfig {
|
|
176
|
+
criteria: string;
|
|
177
|
+
scoreRange?: [number, number];
|
|
178
|
+
passThreshold?: number;
|
|
179
|
+
}
|
|
180
|
+
declare function containsToolCall(response: PestResponse, name: string, args?: Record<string, unknown>): MatcherResult;
|
|
181
|
+
declare function callsToolsInOrder(response: PestResponse, names: string[]): MatcherResult;
|
|
182
|
+
declare function matchesResponseSchema(response: PestResponse, schema: {
|
|
183
|
+
_run?: unknown;
|
|
184
|
+
'~standard'?: {
|
|
185
|
+
validate: (input: unknown) => {
|
|
186
|
+
issues?: unknown[];
|
|
187
|
+
};
|
|
188
|
+
};
|
|
189
|
+
}): MatcherResult;
|
|
190
|
+
declare function respondsWithinTokens(response: PestResponse, maxTokens: number): MatcherResult;
|
|
191
|
+
declare function containsText(response: PestResponse, text: string): MatcherResult;
|
|
192
|
+
declare function hasToolCallCount(response: PestResponse, count: number): MatcherResult;
|
|
193
|
+
declare function matchesSemanticMeaning(response: PestResponse, expected: string, judge: Provider, options?: SemanticOptions): Promise<MatcherResult>;
|
|
194
|
+
declare function satisfiesCriteria(response: PestResponse, rubric: string | RubricConfig, judge: Provider): Promise<MatcherResult>;
|
|
195
|
+
declare function classifiedAs(response: PestResponse, label: string, judge: Provider, options?: ClassificationOptions): Promise<MatcherResult>;
|
|
196
|
+
declare function doesNotDisclose(response: PestResponse, topic: string, judge: Provider): Promise<MatcherResult>;
|
|
197
|
+
declare function assertConsistent(provider: Provider, message: string, runs: number, options?: {
|
|
198
|
+
threshold?: number;
|
|
199
|
+
judge?: Provider;
|
|
200
|
+
systemPrompt?: string;
|
|
201
|
+
tools?: ToolDefinition[];
|
|
202
|
+
}): Promise<{
|
|
203
|
+
pass: boolean;
|
|
204
|
+
scores: number[];
|
|
205
|
+
reasoning: string[];
|
|
206
|
+
}>;
|
|
207
|
+
|
|
208
|
+
interface ModelPricing {
|
|
209
|
+
inputCentsPer1M: number;
|
|
210
|
+
outputCentsPer1M: number;
|
|
211
|
+
}
|
|
212
|
+
declare function setPricing(pricing: Record<string, ModelPricing>): void;
|
|
213
|
+
declare function resetPricing(): void;
|
|
214
|
+
declare function getPricing(model: string): ModelPricing;
|
|
215
|
+
declare function estimateCostCents(model: string, inputTokens: number, outputTokens: number): number;
|
|
216
|
+
|
|
217
|
+
declare function createProvider(config: ProviderConfig): Provider;
|
|
218
|
+
declare function createProviders(configs: ProviderConfig[]): Map<string, Provider>;
|
|
219
|
+
|
|
220
|
+
interface SendEntry {
|
|
221
|
+
input: string;
|
|
222
|
+
output: string;
|
|
223
|
+
systemPrompt?: string;
|
|
224
|
+
provider: string;
|
|
225
|
+
model: string;
|
|
226
|
+
latencyMs: number;
|
|
227
|
+
usage: ProviderUsage;
|
|
228
|
+
toolCalls: ToolCall[];
|
|
229
|
+
timestamp: number;
|
|
230
|
+
}
|
|
231
|
+
interface MatcherEntry {
|
|
232
|
+
matcher: string;
|
|
233
|
+
pass: boolean;
|
|
234
|
+
score?: number;
|
|
235
|
+
reasoning?: string;
|
|
236
|
+
response?: {
|
|
237
|
+
provider: string;
|
|
238
|
+
model: string;
|
|
239
|
+
latencyMs: number;
|
|
240
|
+
usage: ProviderUsage;
|
|
241
|
+
toolCalls: ToolCall[];
|
|
242
|
+
text: string;
|
|
243
|
+
};
|
|
244
|
+
judgeModel?: string;
|
|
245
|
+
}
|
|
246
|
+
interface TestData {
|
|
247
|
+
testId: string;
|
|
248
|
+
testName?: string;
|
|
249
|
+
startTime: number;
|
|
250
|
+
endTime?: number;
|
|
251
|
+
sends: SendEntry[];
|
|
252
|
+
entries: MatcherEntry[];
|
|
253
|
+
}
|
|
254
|
+
declare function startTest(testId: string, testName?: string): void;
|
|
255
|
+
declare function endTest(testId: string): void;
|
|
256
|
+
declare function recordSend(testId: string, entry: SendEntry): void;
|
|
257
|
+
declare function record(testId: string, entry: MatcherEntry): void;
|
|
258
|
+
declare function getTestData(testId: string): TestData | undefined;
|
|
259
|
+
declare function getAllTestData(): Map<string, TestData>;
|
|
260
|
+
declare function clearAll(): void;
|
|
261
|
+
declare function buildMatcherEntry(matcher: string, result: MatcherResult, response?: {
|
|
262
|
+
provider: string;
|
|
263
|
+
model: string;
|
|
264
|
+
latencyMs: number;
|
|
265
|
+
usage: ProviderUsage;
|
|
266
|
+
toolCalls: ToolCall[];
|
|
267
|
+
text: string;
|
|
268
|
+
}, judgeModel?: string): MatcherEntry;
|
|
269
|
+
|
|
270
|
+
type SendHook = (entry: SendEntry) => void;
|
|
271
|
+
/**
|
|
272
|
+
* Register a hook that gets called after every send() call.
|
|
273
|
+
* Used by extensions to capture LLM input/output for reporters.
|
|
274
|
+
* Returns a dispose function to remove the hook.
|
|
275
|
+
*/
|
|
276
|
+
declare function onSend(hook: SendHook): () => void;
|
|
277
|
+
declare function send(provider: Provider, message: string, options?: SendOptions): Promise<PestResponse>;
|
|
278
|
+
|
|
279
|
+
/**
|
|
280
|
+
* Run a full agentic tool-call loop and return a single PestResponse with all
|
|
281
|
+
* accumulated tool calls across all steps.
|
|
282
|
+
*
|
|
283
|
+
* This lets you use toContainToolCall, toCallToolsInOrder, toHaveToolCallCount
|
|
284
|
+
* on multi-turn tool-calling conversations.
|
|
285
|
+
*
|
|
286
|
+
* @example
|
|
287
|
+
* const res = await sendAgentic(provider, 'List then create a segment', {
|
|
288
|
+
* systemPrompt,
|
|
289
|
+
* tools,
|
|
290
|
+
* executor: async (name, args) => myApp.handleTool(name, args),
|
|
291
|
+
* });
|
|
292
|
+
* expect(res).toCallToolsInOrder(['list_segments', 'create_segment']);
|
|
293
|
+
*/
|
|
294
|
+
declare function sendAgentic(provider: Provider, message: string, options?: SendAgenticOptions): Promise<PestResponse>;
|
|
295
|
+
|
|
296
|
+
/**
|
|
297
|
+
* Convert a Zod schema to a pest ToolDefinition.
|
|
298
|
+
*
|
|
299
|
+
* Eliminates the need to duplicate tool definitions in JSON Schema by hand.
|
|
300
|
+
* Requires zod >= 3.0.0 as a peer dependency.
|
|
301
|
+
*
|
|
302
|
+
* @example
|
|
303
|
+
* const tools = Object.values(TOOL_DEFINITIONS).map(def =>
|
|
304
|
+
* zodTool(def.name, def.description, def.parameters)
|
|
305
|
+
* );
|
|
306
|
+
*/
|
|
307
|
+
declare function zodTool(name: string, description: string, schema: z.ZodTypeAny): ToolDefinition;
|
|
308
|
+
|
|
309
|
+
declare const ansi: {
|
|
310
|
+
reset: string;
|
|
311
|
+
bold: string;
|
|
312
|
+
dim: string;
|
|
313
|
+
green: string;
|
|
314
|
+
red: string;
|
|
315
|
+
yellow: string;
|
|
316
|
+
cyan: string;
|
|
317
|
+
magenta: string;
|
|
318
|
+
};
|
|
319
|
+
declare function formatTokens(n: number): string;
|
|
320
|
+
declare function formatDuration(ms: number): string;
|
|
321
|
+
declare function formatCost(cents: number): string;
|
|
322
|
+
declare function escapeHtml(str: string): string;
|
|
323
|
+
|
|
324
|
+
type TestIdResolver = () => string | undefined;
|
|
325
|
+
/**
|
|
326
|
+
* Create pest matchers bound to a test-id resolver.
|
|
327
|
+
* Shared by vitest and jest extensions to avoid code duplication.
|
|
328
|
+
*/
|
|
329
|
+
declare function createPestMatchers(getTestId: TestIdResolver): {
|
|
330
|
+
toContainToolCall(received: PestResponse, name: string, args?: Record<string, unknown>): {
|
|
331
|
+
pass: boolean;
|
|
332
|
+
message: () => string;
|
|
333
|
+
};
|
|
334
|
+
toCallToolsInOrder(received: PestResponse, names: string[]): {
|
|
335
|
+
pass: boolean;
|
|
336
|
+
message: () => string;
|
|
337
|
+
};
|
|
338
|
+
toMatchResponseSchema(received: PestResponse, schema: unknown): {
|
|
339
|
+
pass: boolean;
|
|
340
|
+
message: () => string;
|
|
341
|
+
};
|
|
342
|
+
toRespondWithinTokens(received: PestResponse, maxTokens: number): {
|
|
343
|
+
pass: boolean;
|
|
344
|
+
message: () => string;
|
|
345
|
+
};
|
|
346
|
+
toContainText(received: PestResponse, text: string): {
|
|
347
|
+
pass: boolean;
|
|
348
|
+
message: () => string;
|
|
349
|
+
};
|
|
350
|
+
toNotContainText(received: PestResponse, text: string): {
|
|
351
|
+
pass: boolean;
|
|
352
|
+
message: () => string;
|
|
353
|
+
};
|
|
354
|
+
toHaveToolCallCount(received: PestResponse, count: number): {
|
|
355
|
+
pass: boolean;
|
|
356
|
+
message: () => string;
|
|
357
|
+
};
|
|
358
|
+
toMatchSemanticMeaning(received: PestResponse, expected: string, options?: SemanticOptions): Promise<{
|
|
359
|
+
pass: boolean;
|
|
360
|
+
message: () => string;
|
|
361
|
+
}>;
|
|
362
|
+
toSatisfyCriteria(received: PestResponse, rubric: string | RubricConfig, options?: {
|
|
363
|
+
judge?: Provider;
|
|
364
|
+
}): Promise<{
|
|
365
|
+
pass: boolean;
|
|
366
|
+
message: () => string;
|
|
367
|
+
}>;
|
|
368
|
+
toBeClassifiedAs(received: PestResponse, label: string, options?: ClassificationOptions): Promise<{
|
|
369
|
+
pass: boolean;
|
|
370
|
+
message: () => string;
|
|
371
|
+
}>;
|
|
372
|
+
toNotDisclose(received: PestResponse, topic: string, options?: {
|
|
373
|
+
judge?: Provider;
|
|
374
|
+
}): Promise<{
|
|
375
|
+
pass: boolean;
|
|
376
|
+
message: () => string;
|
|
377
|
+
}>;
|
|
378
|
+
};
|
|
379
|
+
|
|
380
|
+
interface ReporterOptions {
|
|
381
|
+
verbose?: boolean;
|
|
382
|
+
showCost?: boolean;
|
|
383
|
+
logFile?: string | false;
|
|
384
|
+
htmlFile?: string | false;
|
|
385
|
+
}
|
|
386
|
+
interface RunStats {
|
|
387
|
+
tests: number;
|
|
388
|
+
totalInputTokens: number;
|
|
389
|
+
totalOutputTokens: number;
|
|
390
|
+
totalLatencyMs: number;
|
|
391
|
+
totalCostCents: number;
|
|
392
|
+
judgeCount: number;
|
|
393
|
+
toolCallCount: number;
|
|
394
|
+
}
|
|
395
|
+
interface LogEntry {
|
|
396
|
+
test: string;
|
|
397
|
+
status: string;
|
|
398
|
+
sends: SendEntry[];
|
|
399
|
+
matchers: Array<{
|
|
400
|
+
matcher: string;
|
|
401
|
+
pass: boolean;
|
|
402
|
+
score?: number;
|
|
403
|
+
reasoning?: string;
|
|
404
|
+
judgeModel?: string;
|
|
405
|
+
}>;
|
|
406
|
+
}
|
|
407
|
+
interface TestResult {
|
|
408
|
+
name: string;
|
|
409
|
+
status: 'passed' | 'failed' | string;
|
|
410
|
+
sends: SendEntry[];
|
|
411
|
+
entries: Array<{
|
|
412
|
+
matcher: string;
|
|
413
|
+
pass: boolean;
|
|
414
|
+
score?: number;
|
|
415
|
+
reasoning?: string;
|
|
416
|
+
judgeModel?: string;
|
|
417
|
+
response?: {
|
|
418
|
+
provider: string;
|
|
419
|
+
model: string;
|
|
420
|
+
latencyMs: number;
|
|
421
|
+
usage: {
|
|
422
|
+
inputTokens: number;
|
|
423
|
+
outputTokens: number;
|
|
424
|
+
};
|
|
425
|
+
toolCalls: Array<{
|
|
426
|
+
name: string;
|
|
427
|
+
}>;
|
|
428
|
+
text: string;
|
|
429
|
+
};
|
|
430
|
+
}>;
|
|
431
|
+
}
|
|
432
|
+
declare function emptyStats(): RunStats;
|
|
433
|
+
/** Process a single test result and update stats. Returns a LogEntry. */
|
|
434
|
+
declare function processTestResult(testResult: TestResult, stats: RunStats, options: ReporterOptions): LogEntry;
|
|
435
|
+
/** Print summary and write log/HTML files. */
|
|
436
|
+
declare function finishReport(stats: RunStats, logEntries: LogEntry[], options: ReporterOptions): void;
|
|
437
|
+
|
|
438
|
+
interface ReportMatcher {
|
|
439
|
+
matcher: string;
|
|
440
|
+
pass: boolean;
|
|
441
|
+
score?: number;
|
|
442
|
+
reasoning?: string;
|
|
443
|
+
judgeModel?: string;
|
|
444
|
+
}
|
|
445
|
+
interface ReportTest {
|
|
446
|
+
test: string;
|
|
447
|
+
status: string;
|
|
448
|
+
sends: SendEntry[];
|
|
449
|
+
matchers: ReportMatcher[];
|
|
450
|
+
}
|
|
451
|
+
interface ReportSummary {
|
|
452
|
+
tests: number;
|
|
453
|
+
totalTokens: number;
|
|
454
|
+
inputTokens: number;
|
|
455
|
+
outputTokens: number;
|
|
456
|
+
estimatedCost: string;
|
|
457
|
+
judgeCount: number;
|
|
458
|
+
toolCallCount: number;
|
|
459
|
+
}
|
|
460
|
+
interface ReportData {
|
|
461
|
+
timestamp: string;
|
|
462
|
+
summary: ReportSummary;
|
|
463
|
+
tests: ReportTest[];
|
|
464
|
+
}
|
|
465
|
+
declare function buildHtmlReport(data: ReportData): string;
|
|
466
|
+
|
|
467
|
+
export { type AgenticMessage, type ClassificationOptions, type JudgeResult, type LogEntry, type MatcherEntry, type MatcherResult, type ModelPricing, type PestConfig, type PestResponse, type Provider, type ProviderConfig, type ProviderRequestOptions, type ProviderResponse, type ProviderUsage, type ReporterOptions, type RubricConfig, type RunStats, type SemanticOptions, type SendAgenticOptions, type SendEntry, type SendOptions, type TestData, type TestResult, type ToolCall, type ToolDefinition, type ToolExecutor, ansi, assertConsistent, buildHtmlReport, buildMatcherEntry, callsToolsInOrder, classifiedAs, clearAll, containsText, containsToolCall, createPestMatchers, createProvider, createProviders, defineConfig, doesNotDisclose, emptyStats, endTest, escapeHtml, estimateCostCents, finishReport, formatCost, formatDuration, formatTokens, getAllTestData, getJudge, getPricing, getTestData, hasToolCallCount, loadConfig, loadEnv, matchesResponseSchema, matchesSemanticMeaning, onSend, processTestResult, record, recordSend, resetEnv, resetPricing, resolveJudge, respondsWithinTokens, satisfiesCriteria, send, sendAgentic, setJudge, setPricing, startTest, useProvider, useSystemPrompt, zodTool };
|