@heilgar/pest-core 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 pest contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,49 @@
1
+ // src/send.ts
2
+ var sendHooks = [];
3
+ function onSend(hook) {
4
+ sendHooks.push(hook);
5
+ return () => {
6
+ const idx = sendHooks.indexOf(hook);
7
+ if (idx >= 0) sendHooks.splice(idx, 1);
8
+ };
9
+ }
10
+ async function send(provider, message, options) {
11
+ const start = performance.now();
12
+ const response = await provider.call({
13
+ systemPrompt: options?.systemPrompt,
14
+ messages: [{ role: "user", content: message }],
15
+ tools: options?.tools,
16
+ temperature: options?.temperature,
17
+ maxTokens: options?.maxTokens,
18
+ responseFormat: options?.responseFormat
19
+ });
20
+ const latencyMs = performance.now() - start;
21
+ const result = {
22
+ ...response,
23
+ latencyMs,
24
+ provider: provider.name,
25
+ model: provider.model
26
+ };
27
+ if (sendHooks.length > 0) {
28
+ const entry = {
29
+ input: message,
30
+ output: response.text,
31
+ systemPrompt: options?.systemPrompt,
32
+ provider: provider.name,
33
+ model: provider.model,
34
+ latencyMs,
35
+ usage: { ...response.usage },
36
+ toolCalls: response.toolCalls,
37
+ timestamp: Date.now()
38
+ };
39
+ for (const hook of sendHooks) {
40
+ hook(entry);
41
+ }
42
+ }
43
+ return result;
44
+ }
45
+
46
+ export {
47
+ onSend,
48
+ send
49
+ };
@@ -0,0 +1,467 @@
1
+ import * as v from 'valibot';
2
+ import { z } from 'zod';
3
+
4
+ declare const ProviderConfigSchema: v.ObjectSchema<{
5
+ readonly name: v.StringSchema<undefined>;
6
+ readonly type: v.PicklistSchema<["openai", "anthropic", "gemini", "xai", "ollama"], undefined>;
7
+ readonly model: v.StringSchema<undefined>;
8
+ readonly apiKey: v.OptionalSchema<v.StringSchema<undefined>, undefined>;
9
+ readonly baseUrl: v.OptionalSchema<v.StringSchema<undefined>, undefined>;
10
+ readonly temperature: v.OptionalSchema<v.NumberSchema<undefined>, undefined>;
11
+ }, undefined>;
12
+ declare const PestConfigSchema: v.ObjectSchema<{
13
+ readonly providers: v.SchemaWithPipe<readonly [v.ArraySchema<v.ObjectSchema<{
14
+ readonly name: v.StringSchema<undefined>;
15
+ readonly type: v.PicklistSchema<["openai", "anthropic", "gemini", "xai", "ollama"], undefined>;
16
+ readonly model: v.StringSchema<undefined>;
17
+ readonly apiKey: v.OptionalSchema<v.StringSchema<undefined>, undefined>;
18
+ readonly baseUrl: v.OptionalSchema<v.StringSchema<undefined>, undefined>;
19
+ readonly temperature: v.OptionalSchema<v.NumberSchema<undefined>, undefined>;
20
+ }, undefined>, undefined>, v.MinLengthAction<{
21
+ name: string;
22
+ type: "openai" | "anthropic" | "gemini" | "xai" | "ollama";
23
+ model: string;
24
+ apiKey?: string | undefined;
25
+ baseUrl?: string | undefined;
26
+ temperature?: number | undefined;
27
+ }[], 1, undefined>]>;
28
+ readonly judge: v.OptionalSchema<v.ObjectSchema<{
29
+ readonly provider: v.StringSchema<undefined>;
30
+ }, undefined>, undefined>;
31
+ readonly pricing: v.OptionalSchema<v.RecordSchema<v.StringSchema<undefined>, v.ObjectSchema<{
32
+ readonly inputCentsPer1M: v.NumberSchema<undefined>;
33
+ readonly outputCentsPer1M: v.NumberSchema<undefined>;
34
+ }, undefined>, undefined>, undefined>;
35
+ }, undefined>;
36
+ type PestConfig = v.InferOutput<typeof PestConfigSchema>;
37
+ type ProviderConfig = v.InferOutput<typeof ProviderConfigSchema>;
38
+
39
+ /** Reset env-loaded flag so loadEnv() can be called again. For testing only. */
40
+ declare function resetEnv(): void;
41
+ /**
42
+ * Load environment variables from .env files into process.env.
43
+ *
44
+ * Files loaded (lowest to highest priority):
45
+ * - `.env` — shared defaults, may be committed
46
+ * - `.env.local` — local overrides, should be gitignored
47
+ *
48
+ * Real environment variables are never overwritten.
49
+ * Files are loaded from the project root (where pest.config.ts or package.json is).
50
+ *
51
+ * This is called automatically by `loadConfig()` and `createProvider()`.
52
+ * Safe to call multiple times — only loads once.
53
+ */
54
+ declare function loadEnv(cwd?: string): void;
55
+ declare function loadConfig(cwd?: string): Promise<PestConfig>;
56
+ declare function defineConfig(config: PestConfig): PestConfig;
57
+
58
+ interface JudgeResult {
59
+ pass: boolean;
60
+ score: number;
61
+ reasoning: string;
62
+ }
63
+
64
+ interface ToolDefinition {
65
+ type: 'function';
66
+ function: {
67
+ name: string;
68
+ description?: string;
69
+ parameters?: Record<string, unknown>;
70
+ };
71
+ }
72
+ interface ToolCall {
73
+ name: string;
74
+ args: Record<string, unknown>;
75
+ /** Provider-assigned ID — required for multi-turn tool result routing */
76
+ id?: string;
77
+ }
78
+ type AgenticMessage = {
79
+ role: 'user';
80
+ content: string;
81
+ } | {
82
+ role: 'assistant';
83
+ content: string;
84
+ toolCalls?: ToolCall[];
85
+ } | {
86
+ role: 'tool';
87
+ toolCallName: string;
88
+ toolCallId: string;
89
+ content: string;
90
+ };
91
+ type ToolExecutor = (name: string, args: Record<string, unknown>) => Promise<unknown> | unknown;
92
+ interface SendAgenticOptions extends SendOptions {
93
+ /** Called for each tool invocation. Defaults to returning '[]' (no-op mock). */
94
+ executor?: ToolExecutor;
95
+ /** Max tool-calling iterations before stopping. Default: 10 */
96
+ maxSteps?: number;
97
+ }
98
+ interface ProviderUsage {
99
+ inputTokens: number;
100
+ outputTokens: number;
101
+ totalTokens: number;
102
+ }
103
+ interface ProviderResponse {
104
+ text: string;
105
+ toolCalls: ToolCall[];
106
+ usage: ProviderUsage;
107
+ raw: unknown;
108
+ }
109
+ interface ProviderRequestOptions {
110
+ systemPrompt?: string;
111
+ messages: AgenticMessage[];
112
+ tools?: ToolDefinition[];
113
+ temperature?: number;
114
+ maxTokens?: number;
115
+ responseFormat?: 'text' | 'json';
116
+ }
117
+ interface Provider {
118
+ name: string;
119
+ model: string;
120
+ call(options: ProviderRequestOptions): Promise<ProviderResponse>;
121
+ }
122
+ interface PestResponse extends ProviderResponse {
123
+ latencyMs: number;
124
+ provider: string;
125
+ model: string;
126
+ }
127
+ interface SendOptions {
128
+ systemPrompt?: string;
129
+ tools?: ToolDefinition[];
130
+ temperature?: number;
131
+ maxTokens?: number;
132
+ responseFormat?: 'text' | 'json';
133
+ }
134
+
135
+ /**
136
+ * Resolve the active provider from PEST_PROVIDER env + pest.config.ts.
137
+ *
138
+ * Used in test files so the CLI can switch providers via env:
139
+ * - CLI sets PEST_PROVIDER=gpt4o, then shells out to vitest
140
+ * - Test file calls useProvider() → gets the gpt4o provider
141
+ * - When running vitest directly (no CLI), falls back to first provider in config
142
+ */
143
+ declare function useProvider(fallbackName?: string): Promise<Provider>;
144
+ /**
145
+ * Resolve system prompt with PEST_SYSTEM_PROMPT env override.
146
+ *
147
+ * Used in test files so the CLI tune command can swap prompts via env:
148
+ * - CLI sets PEST_SYSTEM_PROMPT=<variant>, then shells out to vitest
149
+ * - Test file calls useSystemPrompt("default...") → gets the variant
150
+ * - When running vitest directly, returns the default
151
+ */
152
+ declare function useSystemPrompt(defaultPrompt: string): string;
153
+
154
+ declare function setJudge(provider: Provider): void;
155
+ declare function getJudge(): Provider | null;
156
+ declare function resolveJudge(options?: {
157
+ judge?: Provider;
158
+ }): Provider;
159
+
160
+ interface MatcherResult {
161
+ pass: boolean;
162
+ message: string;
163
+ score?: number;
164
+ reasoning?: string;
165
+ metadata?: Record<string, unknown>;
166
+ }
167
+ interface SemanticOptions {
168
+ threshold?: number;
169
+ judge?: Provider;
170
+ }
171
+ interface ClassificationOptions {
172
+ categories?: string[];
173
+ judge?: Provider;
174
+ }
175
+ interface RubricConfig {
176
+ criteria: string;
177
+ scoreRange?: [number, number];
178
+ passThreshold?: number;
179
+ }
180
+ declare function containsToolCall(response: PestResponse, name: string, args?: Record<string, unknown>): MatcherResult;
181
+ declare function callsToolsInOrder(response: PestResponse, names: string[]): MatcherResult;
182
+ declare function matchesResponseSchema(response: PestResponse, schema: {
183
+ _run?: unknown;
184
+ '~standard'?: {
185
+ validate: (input: unknown) => {
186
+ issues?: unknown[];
187
+ };
188
+ };
189
+ }): MatcherResult;
190
+ declare function respondsWithinTokens(response: PestResponse, maxTokens: number): MatcherResult;
191
+ declare function containsText(response: PestResponse, text: string): MatcherResult;
192
+ declare function hasToolCallCount(response: PestResponse, count: number): MatcherResult;
193
+ declare function matchesSemanticMeaning(response: PestResponse, expected: string, judge: Provider, options?: SemanticOptions): Promise<MatcherResult>;
194
+ declare function satisfiesCriteria(response: PestResponse, rubric: string | RubricConfig, judge: Provider): Promise<MatcherResult>;
195
+ declare function classifiedAs(response: PestResponse, label: string, judge: Provider, options?: ClassificationOptions): Promise<MatcherResult>;
196
+ declare function doesNotDisclose(response: PestResponse, topic: string, judge: Provider): Promise<MatcherResult>;
197
+ declare function assertConsistent(provider: Provider, message: string, runs: number, options?: {
198
+ threshold?: number;
199
+ judge?: Provider;
200
+ systemPrompt?: string;
201
+ tools?: ToolDefinition[];
202
+ }): Promise<{
203
+ pass: boolean;
204
+ scores: number[];
205
+ reasoning: string[];
206
+ }>;
207
+
208
+ interface ModelPricing {
209
+ inputCentsPer1M: number;
210
+ outputCentsPer1M: number;
211
+ }
212
+ declare function setPricing(pricing: Record<string, ModelPricing>): void;
213
+ declare function resetPricing(): void;
214
+ declare function getPricing(model: string): ModelPricing;
215
+ declare function estimateCostCents(model: string, inputTokens: number, outputTokens: number): number;
216
+
217
+ declare function createProvider(config: ProviderConfig): Provider;
218
+ declare function createProviders(configs: ProviderConfig[]): Map<string, Provider>;
219
+
220
+ interface SendEntry {
221
+ input: string;
222
+ output: string;
223
+ systemPrompt?: string;
224
+ provider: string;
225
+ model: string;
226
+ latencyMs: number;
227
+ usage: ProviderUsage;
228
+ toolCalls: ToolCall[];
229
+ timestamp: number;
230
+ }
231
+ interface MatcherEntry {
232
+ matcher: string;
233
+ pass: boolean;
234
+ score?: number;
235
+ reasoning?: string;
236
+ response?: {
237
+ provider: string;
238
+ model: string;
239
+ latencyMs: number;
240
+ usage: ProviderUsage;
241
+ toolCalls: ToolCall[];
242
+ text: string;
243
+ };
244
+ judgeModel?: string;
245
+ }
246
+ interface TestData {
247
+ testId: string;
248
+ testName?: string;
249
+ startTime: number;
250
+ endTime?: number;
251
+ sends: SendEntry[];
252
+ entries: MatcherEntry[];
253
+ }
254
+ declare function startTest(testId: string, testName?: string): void;
255
+ declare function endTest(testId: string): void;
256
+ declare function recordSend(testId: string, entry: SendEntry): void;
257
+ declare function record(testId: string, entry: MatcherEntry): void;
258
+ declare function getTestData(testId: string): TestData | undefined;
259
+ declare function getAllTestData(): Map<string, TestData>;
260
+ declare function clearAll(): void;
261
+ declare function buildMatcherEntry(matcher: string, result: MatcherResult, response?: {
262
+ provider: string;
263
+ model: string;
264
+ latencyMs: number;
265
+ usage: ProviderUsage;
266
+ toolCalls: ToolCall[];
267
+ text: string;
268
+ }, judgeModel?: string): MatcherEntry;
269
+
270
+ type SendHook = (entry: SendEntry) => void;
271
+ /**
272
+ * Register a hook that gets called after every send() call.
273
+ * Used by extensions to capture LLM input/output for reporters.
274
+ * Returns a dispose function to remove the hook.
275
+ */
276
+ declare function onSend(hook: SendHook): () => void;
277
+ declare function send(provider: Provider, message: string, options?: SendOptions): Promise<PestResponse>;
278
+
279
+ /**
280
+ * Run a full agentic tool-call loop and return a single PestResponse with all
281
+ * accumulated tool calls across all steps.
282
+ *
283
+ * This lets you use toContainToolCall, toCallToolsInOrder, toHaveToolCallCount
284
+ * on multi-turn tool-calling conversations.
285
+ *
286
+ * @example
287
+ * const res = await sendAgentic(provider, 'List then create a segment', {
288
+ * systemPrompt,
289
+ * tools,
290
+ * executor: async (name, args) => myApp.handleTool(name, args),
291
+ * });
292
+ * expect(res).toCallToolsInOrder(['list_segments', 'create_segment']);
293
+ */
294
+ declare function sendAgentic(provider: Provider, message: string, options?: SendAgenticOptions): Promise<PestResponse>;
295
+
296
+ /**
297
+ * Convert a Zod schema to a pest ToolDefinition.
298
+ *
299
+ * Eliminates the need to duplicate tool definitions in JSON Schema by hand.
300
+ * Requires zod >= 3.0.0 as a peer dependency.
301
+ *
302
+ * @example
303
+ * const tools = Object.values(TOOL_DEFINITIONS).map(def =>
304
+ * zodTool(def.name, def.description, def.parameters)
305
+ * );
306
+ */
307
+ declare function zodTool(name: string, description: string, schema: z.ZodTypeAny): ToolDefinition;
308
+
309
+ declare const ansi: {
310
+ reset: string;
311
+ bold: string;
312
+ dim: string;
313
+ green: string;
314
+ red: string;
315
+ yellow: string;
316
+ cyan: string;
317
+ magenta: string;
318
+ };
319
+ declare function formatTokens(n: number): string;
320
+ declare function formatDuration(ms: number): string;
321
+ declare function formatCost(cents: number): string;
322
+ declare function escapeHtml(str: string): string;
323
+
324
+ type TestIdResolver = () => string | undefined;
325
+ /**
326
+ * Create pest matchers bound to a test-id resolver.
327
+ * Shared by vitest and jest extensions to avoid code duplication.
328
+ */
329
+ declare function createPestMatchers(getTestId: TestIdResolver): {
330
+ toContainToolCall(received: PestResponse, name: string, args?: Record<string, unknown>): {
331
+ pass: boolean;
332
+ message: () => string;
333
+ };
334
+ toCallToolsInOrder(received: PestResponse, names: string[]): {
335
+ pass: boolean;
336
+ message: () => string;
337
+ };
338
+ toMatchResponseSchema(received: PestResponse, schema: unknown): {
339
+ pass: boolean;
340
+ message: () => string;
341
+ };
342
+ toRespondWithinTokens(received: PestResponse, maxTokens: number): {
343
+ pass: boolean;
344
+ message: () => string;
345
+ };
346
+ toContainText(received: PestResponse, text: string): {
347
+ pass: boolean;
348
+ message: () => string;
349
+ };
350
+ toNotContainText(received: PestResponse, text: string): {
351
+ pass: boolean;
352
+ message: () => string;
353
+ };
354
+ toHaveToolCallCount(received: PestResponse, count: number): {
355
+ pass: boolean;
356
+ message: () => string;
357
+ };
358
+ toMatchSemanticMeaning(received: PestResponse, expected: string, options?: SemanticOptions): Promise<{
359
+ pass: boolean;
360
+ message: () => string;
361
+ }>;
362
+ toSatisfyCriteria(received: PestResponse, rubric: string | RubricConfig, options?: {
363
+ judge?: Provider;
364
+ }): Promise<{
365
+ pass: boolean;
366
+ message: () => string;
367
+ }>;
368
+ toBeClassifiedAs(received: PestResponse, label: string, options?: ClassificationOptions): Promise<{
369
+ pass: boolean;
370
+ message: () => string;
371
+ }>;
372
+ toNotDisclose(received: PestResponse, topic: string, options?: {
373
+ judge?: Provider;
374
+ }): Promise<{
375
+ pass: boolean;
376
+ message: () => string;
377
+ }>;
378
+ };
379
+
380
+ interface ReporterOptions {
381
+ verbose?: boolean;
382
+ showCost?: boolean;
383
+ logFile?: string | false;
384
+ htmlFile?: string | false;
385
+ }
386
+ interface RunStats {
387
+ tests: number;
388
+ totalInputTokens: number;
389
+ totalOutputTokens: number;
390
+ totalLatencyMs: number;
391
+ totalCostCents: number;
392
+ judgeCount: number;
393
+ toolCallCount: number;
394
+ }
395
+ interface LogEntry {
396
+ test: string;
397
+ status: string;
398
+ sends: SendEntry[];
399
+ matchers: Array<{
400
+ matcher: string;
401
+ pass: boolean;
402
+ score?: number;
403
+ reasoning?: string;
404
+ judgeModel?: string;
405
+ }>;
406
+ }
407
+ interface TestResult {
408
+ name: string;
409
+ status: 'passed' | 'failed' | string;
410
+ sends: SendEntry[];
411
+ entries: Array<{
412
+ matcher: string;
413
+ pass: boolean;
414
+ score?: number;
415
+ reasoning?: string;
416
+ judgeModel?: string;
417
+ response?: {
418
+ provider: string;
419
+ model: string;
420
+ latencyMs: number;
421
+ usage: {
422
+ inputTokens: number;
423
+ outputTokens: number;
424
+ };
425
+ toolCalls: Array<{
426
+ name: string;
427
+ }>;
428
+ text: string;
429
+ };
430
+ }>;
431
+ }
432
+ declare function emptyStats(): RunStats;
433
+ /** Process a single test result and update stats. Returns a LogEntry. */
434
+ declare function processTestResult(testResult: TestResult, stats: RunStats, options: ReporterOptions): LogEntry;
435
+ /** Print summary and write log/HTML files. */
436
+ declare function finishReport(stats: RunStats, logEntries: LogEntry[], options: ReporterOptions): void;
437
+
438
+ interface ReportMatcher {
439
+ matcher: string;
440
+ pass: boolean;
441
+ score?: number;
442
+ reasoning?: string;
443
+ judgeModel?: string;
444
+ }
445
+ interface ReportTest {
446
+ test: string;
447
+ status: string;
448
+ sends: SendEntry[];
449
+ matchers: ReportMatcher[];
450
+ }
451
+ interface ReportSummary {
452
+ tests: number;
453
+ totalTokens: number;
454
+ inputTokens: number;
455
+ outputTokens: number;
456
+ estimatedCost: string;
457
+ judgeCount: number;
458
+ toolCallCount: number;
459
+ }
460
+ interface ReportData {
461
+ timestamp: string;
462
+ summary: ReportSummary;
463
+ tests: ReportTest[];
464
+ }
465
+ declare function buildHtmlReport(data: ReportData): string;
466
+
467
+ export { type AgenticMessage, type ClassificationOptions, type JudgeResult, type LogEntry, type MatcherEntry, type MatcherResult, type ModelPricing, type PestConfig, type PestResponse, type Provider, type ProviderConfig, type ProviderRequestOptions, type ProviderResponse, type ProviderUsage, type ReporterOptions, type RubricConfig, type RunStats, type SemanticOptions, type SendAgenticOptions, type SendEntry, type SendOptions, type TestData, type TestResult, type ToolCall, type ToolDefinition, type ToolExecutor, ansi, assertConsistent, buildHtmlReport, buildMatcherEntry, callsToolsInOrder, classifiedAs, clearAll, containsText, containsToolCall, createPestMatchers, createProvider, createProviders, defineConfig, doesNotDisclose, emptyStats, endTest, escapeHtml, estimateCostCents, finishReport, formatCost, formatDuration, formatTokens, getAllTestData, getJudge, getPricing, getTestData, hasToolCallCount, loadConfig, loadEnv, matchesResponseSchema, matchesSemanticMeaning, onSend, processTestResult, record, recordSend, resetEnv, resetPricing, resolveJudge, respondsWithinTokens, satisfiesCriteria, send, sendAgentic, setJudge, setPricing, startTest, useProvider, useSystemPrompt, zodTool };