@prompd/test 0.5.0-beta.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/EvaluatorEngine.d.ts +32 -0
- package/dist/EvaluatorEngine.d.ts.map +1 -0
- package/dist/EvaluatorEngine.js +97 -0
- package/dist/TestDiscovery.d.ts +28 -0
- package/dist/TestDiscovery.d.ts.map +1 -0
- package/dist/TestDiscovery.js +137 -0
- package/dist/TestParser.d.ts +25 -0
- package/dist/TestParser.d.ts.map +1 -0
- package/dist/TestParser.js +187 -0
- package/dist/TestRunner.d.ts +57 -0
- package/dist/TestRunner.d.ts.map +1 -0
- package/dist/TestRunner.js +463 -0
- package/dist/cli-types.d.ts +62 -0
- package/dist/cli-types.d.ts.map +1 -0
- package/dist/cli-types.js +6 -0
- package/dist/evaluators/NlpEvaluator.d.ts +26 -0
- package/dist/evaluators/NlpEvaluator.d.ts.map +1 -0
- package/dist/evaluators/NlpEvaluator.js +145 -0
- package/dist/evaluators/PrmdEvaluator.d.ts +42 -0
- package/dist/evaluators/PrmdEvaluator.d.ts.map +1 -0
- package/dist/evaluators/PrmdEvaluator.js +265 -0
- package/dist/evaluators/ScriptEvaluator.d.ts +19 -0
- package/dist/evaluators/ScriptEvaluator.d.ts.map +1 -0
- package/dist/evaluators/ScriptEvaluator.js +161 -0
- package/dist/evaluators/types.d.ts +19 -0
- package/dist/evaluators/types.d.ts.map +1 -0
- package/dist/evaluators/types.js +5 -0
- package/dist/index.d.ts +25 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +33 -0
- package/dist/reporters/ConsoleReporter.d.ts +17 -0
- package/dist/reporters/ConsoleReporter.d.ts.map +1 -0
- package/dist/reporters/ConsoleReporter.js +85 -0
- package/dist/reporters/JsonReporter.d.ts +11 -0
- package/dist/reporters/JsonReporter.d.ts.map +1 -0
- package/dist/reporters/JsonReporter.js +18 -0
- package/dist/reporters/JunitReporter.d.ts +15 -0
- package/dist/reporters/JunitReporter.d.ts.map +1 -0
- package/dist/reporters/JunitReporter.js +89 -0
- package/dist/reporters/types.d.ts +8 -0
- package/dist/reporters/types.d.ts.map +1 -0
- package/dist/reporters/types.js +5 -0
- package/dist/types.d.ts +115 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +5 -0
- package/package.json +34 -0
- package/src/EvaluatorEngine.ts +130 -0
- package/src/TestDiscovery.ts +133 -0
- package/src/TestParser.ts +235 -0
- package/src/TestRunner.ts +516 -0
- package/src/cli-types.ts +92 -0
- package/src/evaluators/NlpEvaluator.ts +184 -0
- package/src/evaluators/PrmdEvaluator.ts +284 -0
- package/src/evaluators/ScriptEvaluator.ts +149 -0
- package/src/evaluators/types.ts +24 -0
- package/src/index.ts +76 -0
- package/src/reporters/ConsoleReporter.ts +100 -0
- package/src/reporters/JsonReporter.ts +21 -0
- package/src/reporters/JunitReporter.ts +113 -0
- package/src/reporters/types.ts +9 -0
- package/src/types.ts +133 -0
- package/tsconfig.json +20 -0
|
@@ -0,0 +1,463 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* TestRunner - orchestrates the full test lifecycle:
|
|
4
|
+
* discovery -> compile -> execute -> evaluate -> report
|
|
5
|
+
*
|
|
6
|
+
* Consumes @prompd/cli for compilation and execution.
|
|
7
|
+
* This is the primary public API for @prompd/test.
|
|
8
|
+
*/
|
|
9
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
12
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
13
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
14
|
+
}
|
|
15
|
+
Object.defineProperty(o, k2, desc);
|
|
16
|
+
}) : (function(o, m, k, k2) {
|
|
17
|
+
if (k2 === undefined) k2 = k;
|
|
18
|
+
o[k2] = m[k];
|
|
19
|
+
}));
|
|
20
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
21
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
22
|
+
}) : function(o, v) {
|
|
23
|
+
o["default"] = v;
|
|
24
|
+
});
|
|
25
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
26
|
+
var ownKeys = function(o) {
|
|
27
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
28
|
+
var ar = [];
|
|
29
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
30
|
+
return ar;
|
|
31
|
+
};
|
|
32
|
+
return ownKeys(o);
|
|
33
|
+
};
|
|
34
|
+
return function (mod) {
|
|
35
|
+
if (mod && mod.__esModule) return mod;
|
|
36
|
+
var result = {};
|
|
37
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
38
|
+
__setModuleDefault(result, mod);
|
|
39
|
+
return result;
|
|
40
|
+
};
|
|
41
|
+
})();
|
|
42
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
43
|
+
exports.TestRunner = void 0;
|
|
44
|
+
const path = __importStar(require("path"));
|
|
45
|
+
const fs = __importStar(require("fs"));
|
|
46
|
+
const TestDiscovery_1 = require("./TestDiscovery");
|
|
47
|
+
const EvaluatorEngine_1 = require("./EvaluatorEngine");
|
|
48
|
+
const ConsoleReporter_1 = require("./reporters/ConsoleReporter");
|
|
49
|
+
const JsonReporter_1 = require("./reporters/JsonReporter");
|
|
50
|
+
const JunitReporter_1 = require("./reporters/JunitReporter");
|
|
51
|
+
class TestRunner {
|
|
52
|
+
/**
|
|
53
|
+
* @param cli - Optional pre-loaded @prompd/cli module. If provided, skips dynamic import.
|
|
54
|
+
* This is the recommended approach when running inside Electron where the CLI
|
|
55
|
+
* is already loaded by the main process.
|
|
56
|
+
*/
|
|
57
|
+
constructor(cli) {
|
|
58
|
+
this.cliModule = null;
|
|
59
|
+
this.configLoaded = false;
|
|
60
|
+
this.discovery = new TestDiscovery_1.TestDiscovery();
|
|
61
|
+
if (cli) {
|
|
62
|
+
this.cliModule = cli;
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Ensure CLI config is loaded (API keys, provider settings).
|
|
67
|
+
* Called once before any execution.
|
|
68
|
+
*/
|
|
69
|
+
async ensureConfig() {
|
|
70
|
+
if (this.configLoaded)
|
|
71
|
+
return;
|
|
72
|
+
const cli = await this.getCli();
|
|
73
|
+
try {
|
|
74
|
+
const configManager = new cli.ConfigManager();
|
|
75
|
+
console.log('[TestRunner] Loading config...');
|
|
76
|
+
// loadConfig() is async — must await it
|
|
77
|
+
if (configManager.loadConfig) {
|
|
78
|
+
await configManager.loadConfig();
|
|
79
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
80
|
+
const cfg = configManager.config;
|
|
81
|
+
console.log('[TestRunner] Config loaded:', cfg ? Object.keys(cfg) : 'null');
|
|
82
|
+
console.log('[TestRunner] API keys:', cfg?.apiKeys ? Object.keys(cfg.apiKeys).filter((k) => cfg.apiKeys[k]) : 'none');
|
|
83
|
+
console.log('[TestRunner] Default provider:', cfg?.defaultProvider);
|
|
84
|
+
}
|
|
85
|
+
else if (configManager.load) {
|
|
86
|
+
await configManager.load();
|
|
87
|
+
console.log('[TestRunner] Config loaded via load()');
|
|
88
|
+
}
|
|
89
|
+
this.configLoaded = true;
|
|
90
|
+
}
|
|
91
|
+
catch (err) {
|
|
92
|
+
console.error('[TestRunner] Config load failed:', err);
|
|
93
|
+
// Config may not exist — that's OK for --no-llm runs
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
/**
|
|
97
|
+
* Run tests for a target path (file or directory).
|
|
98
|
+
* Returns structured results and an exit code (0 = all pass, 1 = failures).
|
|
99
|
+
*/
|
|
100
|
+
async run(targetPath, options = {}, onProgress) {
|
|
101
|
+
const startTime = Date.now();
|
|
102
|
+
// 1. Discovery
|
|
103
|
+
const { suites, errors: discoveryErrors } = await this.discovery.discover(targetPath);
|
|
104
|
+
if (discoveryErrors.length > 0 && suites.length === 0) {
|
|
105
|
+
return this.buildErrorResult(discoveryErrors.map(e => e.message), startTime);
|
|
106
|
+
}
|
|
107
|
+
// 2. Run each suite
|
|
108
|
+
const suiteResults = [];
|
|
109
|
+
for (const suite of suites) {
|
|
110
|
+
onProgress?.({ type: 'suite_start', suite: suite.name, testCount: suite.tests.length });
|
|
111
|
+
const results = await this.runSuite(suite, options, onProgress);
|
|
112
|
+
suiteResults.push({
|
|
113
|
+
suite: suite.name,
|
|
114
|
+
testFilePath: suite.testFilePath,
|
|
115
|
+
results,
|
|
116
|
+
});
|
|
117
|
+
onProgress?.({ type: 'suite_complete', suite: suite.name, results });
|
|
118
|
+
}
|
|
119
|
+
// 3. Build summary
|
|
120
|
+
const summary = this.buildSummary(suiteResults, startTime);
|
|
121
|
+
return { suites: suiteResults, summary };
|
|
122
|
+
}
|
|
123
|
+
/**
|
|
124
|
+
* Run tests and return formatted output string.
|
|
125
|
+
*/
|
|
126
|
+
async runAndReport(targetPath, options = {}, onProgress) {
|
|
127
|
+
const result = await this.run(targetPath, options, onProgress);
|
|
128
|
+
const reporter = this.getReporter(options);
|
|
129
|
+
const output = reporter.report(result);
|
|
130
|
+
const exitCode = (result.summary.failed > 0 || result.summary.errors > 0) ? 1 : 0;
|
|
131
|
+
return { output, exitCode };
|
|
132
|
+
}
|
|
133
|
+
async runSuite(suite, options, onProgress) {
|
|
134
|
+
const results = [];
|
|
135
|
+
const allowedEvaluators = this.resolveAllowedEvaluators(options);
|
|
136
|
+
for (const testCase of suite.tests) {
|
|
137
|
+
onProgress?.({ type: 'test_start', suite: suite.name, testName: testCase.name });
|
|
138
|
+
const result = await this.runTestCase(suite, testCase, allowedEvaluators, options, onProgress);
|
|
139
|
+
results.push(result);
|
|
140
|
+
onProgress?.({ type: 'test_complete', suite: suite.name, testName: testCase.name, result });
|
|
141
|
+
}
|
|
142
|
+
return results;
|
|
143
|
+
}
|
|
144
|
+
async runTestCase(suite, testCase, allowedEvaluators, options, onProgress) {
|
|
145
|
+
const start = Date.now();
|
|
146
|
+
// Step 1: Compile the target .prmd with test params
|
|
147
|
+
let compiledOutput;
|
|
148
|
+
let promptMetadata = {};
|
|
149
|
+
try {
|
|
150
|
+
const compileResult = await this.compileTarget(suite.target, testCase.params, options);
|
|
151
|
+
compiledOutput = compileResult.compiled;
|
|
152
|
+
promptMetadata = compileResult.metadata;
|
|
153
|
+
}
|
|
154
|
+
catch (err) {
|
|
155
|
+
const errorMessage = err instanceof Error ? err.message : String(err);
|
|
156
|
+
// If expect_error is set, compilation failure is a PASS
|
|
157
|
+
if (testCase.expect_error) {
|
|
158
|
+
return {
|
|
159
|
+
suite: suite.name,
|
|
160
|
+
testName: testCase.name,
|
|
161
|
+
status: 'pass',
|
|
162
|
+
duration: Date.now() - start,
|
|
163
|
+
assertions: [],
|
|
164
|
+
error: `Expected error occurred: ${errorMessage}`,
|
|
165
|
+
};
|
|
166
|
+
}
|
|
167
|
+
return {
|
|
168
|
+
suite: suite.name,
|
|
169
|
+
testName: testCase.name,
|
|
170
|
+
status: 'error',
|
|
171
|
+
duration: Date.now() - start,
|
|
172
|
+
assertions: [],
|
|
173
|
+
error: `Compilation failed: ${errorMessage}`,
|
|
174
|
+
};
|
|
175
|
+
}
|
|
176
|
+
// If expect_error was set but compilation succeeded, that's a failure
|
|
177
|
+
if (testCase.expect_error) {
|
|
178
|
+
return {
|
|
179
|
+
suite: suite.name,
|
|
180
|
+
testName: testCase.name,
|
|
181
|
+
status: 'fail',
|
|
182
|
+
duration: Date.now() - start,
|
|
183
|
+
assertions: [],
|
|
184
|
+
compiledInput: compiledOutput,
|
|
185
|
+
error: 'Expected compilation to fail, but it succeeded',
|
|
186
|
+
};
|
|
187
|
+
}
|
|
188
|
+
// Step 2: Execute against LLM (unless --no-llm)
|
|
189
|
+
let llmOutput = '';
|
|
190
|
+
let provider = 'none';
|
|
191
|
+
let model = 'none';
|
|
192
|
+
let execDuration = 0;
|
|
193
|
+
let usage;
|
|
194
|
+
if (!options.noLlm) {
|
|
195
|
+
try {
|
|
196
|
+
const execResult = await this.executePrompt(compiledOutput, promptMetadata, options);
|
|
197
|
+
llmOutput = execResult.response;
|
|
198
|
+
provider = execResult.provider;
|
|
199
|
+
model = execResult.model;
|
|
200
|
+
execDuration = execResult.duration;
|
|
201
|
+
usage = execResult.usage;
|
|
202
|
+
}
|
|
203
|
+
catch (err) {
|
|
204
|
+
return {
|
|
205
|
+
suite: suite.name,
|
|
206
|
+
testName: testCase.name,
|
|
207
|
+
status: 'error',
|
|
208
|
+
duration: Date.now() - start,
|
|
209
|
+
assertions: [],
|
|
210
|
+
compiledInput: compiledOutput,
|
|
211
|
+
error: `Execution failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
212
|
+
};
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
else {
|
|
216
|
+
// In --no-llm mode, use the compiled output as the "output" for NLP checks
|
|
217
|
+
// This enables structural assertions against the compiled prompt itself
|
|
218
|
+
llmOutput = compiledOutput;
|
|
219
|
+
}
|
|
220
|
+
const execution = !options.noLlm ? { provider, model, duration: execDuration, usage } : undefined;
|
|
221
|
+
// Step 3: Run evaluations
|
|
222
|
+
if (testCase.assert.length === 0) {
|
|
223
|
+
return {
|
|
224
|
+
suite: suite.name,
|
|
225
|
+
testName: testCase.name,
|
|
226
|
+
status: 'pass',
|
|
227
|
+
duration: Date.now() - start,
|
|
228
|
+
assertions: [],
|
|
229
|
+
output: llmOutput,
|
|
230
|
+
compiledInput: compiledOutput,
|
|
231
|
+
execution,
|
|
232
|
+
};
|
|
233
|
+
}
|
|
234
|
+
const engine = new EvaluatorEngine_1.EvaluatorEngine({
|
|
235
|
+
testFileDir: path.dirname(suite.testFilePath),
|
|
236
|
+
evaluatorPrompt: suite.evaluatorPrompt,
|
|
237
|
+
workspaceRoot: options.workspaceRoot,
|
|
238
|
+
registryUrl: options.registryUrl,
|
|
239
|
+
allowedEvaluators,
|
|
240
|
+
failFast: options.runAll ? false : (options.failFast !== false),
|
|
241
|
+
cliModule: this.cliModule || undefined,
|
|
242
|
+
provider: options.provider,
|
|
243
|
+
model: options.model,
|
|
244
|
+
});
|
|
245
|
+
const context = {
|
|
246
|
+
prompt: compiledOutput,
|
|
247
|
+
response: llmOutput,
|
|
248
|
+
params: testCase.params,
|
|
249
|
+
metadata: { provider, model, duration: execDuration },
|
|
250
|
+
};
|
|
251
|
+
const assertions = await engine.evaluate(testCase.assert, context, (assertion) => {
|
|
252
|
+
onProgress?.({
|
|
253
|
+
type: 'assertion_complete',
|
|
254
|
+
suite: suite.name,
|
|
255
|
+
testName: testCase.name,
|
|
256
|
+
assertion,
|
|
257
|
+
});
|
|
258
|
+
});
|
|
259
|
+
// Determine overall test status from assertions
|
|
260
|
+
const hasFailure = assertions.some(a => a.status === 'fail');
|
|
261
|
+
const hasError = assertions.some(a => a.status === 'error');
|
|
262
|
+
const status = hasError ? 'error' : hasFailure ? 'fail' : 'pass';
|
|
263
|
+
return {
|
|
264
|
+
suite: suite.name,
|
|
265
|
+
testName: testCase.name,
|
|
266
|
+
status,
|
|
267
|
+
duration: Date.now() - start,
|
|
268
|
+
assertions,
|
|
269
|
+
output: llmOutput,
|
|
270
|
+
compiledInput: compiledOutput,
|
|
271
|
+
execution,
|
|
272
|
+
};
|
|
273
|
+
}
|
|
274
|
+
/**
|
|
275
|
+
* Compile a .prmd file and return both the compiled text and metadata
|
|
276
|
+
* (provider, model, temperature, max_tokens from frontmatter).
|
|
277
|
+
*/
|
|
278
|
+
async compileTarget(targetPath, params, options) {
|
|
279
|
+
const cli = await this.getCli();
|
|
280
|
+
const compiler = new cli.PrompdCompiler();
|
|
281
|
+
if (!fs.existsSync(targetPath)) {
|
|
282
|
+
throw new Error(`Target prompt file not found: ${targetPath}`);
|
|
283
|
+
}
|
|
284
|
+
// Use compileWithContext to get both output and frontmatter metadata
|
|
285
|
+
const context = await compiler.compileWithContext(targetPath, {
|
|
286
|
+
outputFormat: 'markdown',
|
|
287
|
+
parameters: params,
|
|
288
|
+
filePath: targetPath,
|
|
289
|
+
workspaceRoot: options.workspaceRoot,
|
|
290
|
+
registryUrl: options.registryUrl,
|
|
291
|
+
fileSystem: new cli.NodeFileSystem(),
|
|
292
|
+
});
|
|
293
|
+
// compileWithContext may return { compiledResult, metadata } or a string
|
|
294
|
+
let compiled;
|
|
295
|
+
let metadata = {};
|
|
296
|
+
if (typeof context === 'string') {
|
|
297
|
+
compiled = context;
|
|
298
|
+
}
|
|
299
|
+
else if (context && typeof context === 'object') {
|
|
300
|
+
compiled = context.compiledResult || '';
|
|
301
|
+
metadata = context.metadata || {};
|
|
302
|
+
}
|
|
303
|
+
else {
|
|
304
|
+
throw new Error('Compilation produced no output');
|
|
305
|
+
}
|
|
306
|
+
if (!compiled) {
|
|
307
|
+
throw new Error('Compilation produced no output');
|
|
308
|
+
}
|
|
309
|
+
console.log(`[TestRunner] Compiled ${targetPath}`);
|
|
310
|
+
console.log(`[TestRunner] params: ${JSON.stringify(params)}`);
|
|
311
|
+
console.log(`[TestRunner] metadata: ${JSON.stringify(metadata)}`);
|
|
312
|
+
console.log(`[TestRunner] output (${compiled.length} chars): ${compiled.substring(0, 200)}`);
|
|
313
|
+
return { compiled, metadata };
|
|
314
|
+
}
|
|
315
|
+
/**
|
|
316
|
+
* Execute compiled prompt text against an LLM using the executor's callLLM directly.
|
|
317
|
+
* This avoids re-compilation through executeRawText which loses metadata.
|
|
318
|
+
*/
|
|
319
|
+
async executePrompt(compiled, metadata, runOptions) {
|
|
320
|
+
await this.ensureConfig();
|
|
321
|
+
const cli = await this.getCli();
|
|
322
|
+
const executor = new cli.PrompdExecutor();
|
|
323
|
+
const start = Date.now();
|
|
324
|
+
// Resolve provider/model from frontmatter metadata + config defaults
|
|
325
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
326
|
+
const configManager = cli.ConfigManager?.getInstance
|
|
327
|
+
? cli.ConfigManager.getInstance()
|
|
328
|
+
: null;
|
|
329
|
+
const config = configManager?.config || {};
|
|
330
|
+
// Priority: .prmd frontmatter > test run options (UI selector) > config defaults
|
|
331
|
+
const provider = String(metadata.provider || runOptions.provider || config.defaultProvider || 'openai');
|
|
332
|
+
const rawModel = metadata.model || runOptions.model || config.default_model || config.defaultModel || '';
|
|
333
|
+
// Fall back to a sensible default model if none specified
|
|
334
|
+
const model = String(rawModel) || this.getDefaultModel(provider);
|
|
335
|
+
const temperature = Number(metadata.temperature ?? 0.7);
|
|
336
|
+
const maxTokens = Number(metadata.max_tokens ?? 4096);
|
|
337
|
+
// Get API key from config
|
|
338
|
+
const apiKey = configManager?.getApiKey?.(provider, config) || '';
|
|
339
|
+
console.log(`[TestRunner] Executing: provider=${provider}, model=${model || '(default)'}, tokens=${compiled.length}`);
|
|
340
|
+
if (!apiKey && provider !== 'ollama') {
|
|
341
|
+
throw new Error(`No API key configured for provider "${provider}". Check ~/.prompd/config.yaml`);
|
|
342
|
+
}
|
|
343
|
+
try {
|
|
344
|
+
const result = await executor.callLLM(provider, model, compiled, apiKey, temperature, maxTokens);
|
|
345
|
+
if (!result.success) {
|
|
346
|
+
throw new Error(result.error || 'LLM execution failed');
|
|
347
|
+
}
|
|
348
|
+
return {
|
|
349
|
+
response: result.response || result.content || '',
|
|
350
|
+
provider,
|
|
351
|
+
model,
|
|
352
|
+
duration: Date.now() - start,
|
|
353
|
+
usage: result.usage,
|
|
354
|
+
};
|
|
355
|
+
}
|
|
356
|
+
catch (err) {
|
|
357
|
+
const errMsg = err instanceof Error ? err.message : String(err);
|
|
358
|
+
console.error(`[TestRunner] callLLM failed: ${errMsg}`);
|
|
359
|
+
throw new Error(errMsg);
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
resolveAllowedEvaluators(options) {
|
|
363
|
+
if (options.noLlm) {
|
|
364
|
+
// In --no-llm mode, skip prmd evaluators (they require LLM calls)
|
|
365
|
+
const base = options.evaluators || ['nlp', 'script'];
|
|
366
|
+
return base.filter(e => e !== 'prmd');
|
|
367
|
+
}
|
|
368
|
+
return options.evaluators || ['nlp', 'script', 'prmd'];
|
|
369
|
+
}
|
|
370
|
+
getReporter(options) {
|
|
371
|
+
switch (options.reporter) {
|
|
372
|
+
case 'json':
|
|
373
|
+
return new JsonReporter_1.JsonReporter(options.verbose);
|
|
374
|
+
case 'junit':
|
|
375
|
+
return new JunitReporter_1.JunitReporter();
|
|
376
|
+
case 'console':
|
|
377
|
+
default:
|
|
378
|
+
return new ConsoleReporter_1.ConsoleReporter(options.verbose);
|
|
379
|
+
}
|
|
380
|
+
}
|
|
381
|
+
buildSummary(suiteResults, startTime) {
|
|
382
|
+
let total = 0;
|
|
383
|
+
let passed = 0;
|
|
384
|
+
let failed = 0;
|
|
385
|
+
let errors = 0;
|
|
386
|
+
let skipped = 0;
|
|
387
|
+
let totalTokens = 0;
|
|
388
|
+
const providerSet = new Set();
|
|
389
|
+
const modelSet = new Set();
|
|
390
|
+
for (const suite of suiteResults) {
|
|
391
|
+
for (const result of suite.results) {
|
|
392
|
+
total++;
|
|
393
|
+
switch (result.status) {
|
|
394
|
+
case 'pass':
|
|
395
|
+
passed++;
|
|
396
|
+
break;
|
|
397
|
+
case 'fail':
|
|
398
|
+
failed++;
|
|
399
|
+
break;
|
|
400
|
+
case 'error':
|
|
401
|
+
errors++;
|
|
402
|
+
break;
|
|
403
|
+
case 'skip':
|
|
404
|
+
skipped++;
|
|
405
|
+
break;
|
|
406
|
+
}
|
|
407
|
+
if (result.execution) {
|
|
408
|
+
if (result.execution.provider && result.execution.provider !== 'none') {
|
|
409
|
+
providerSet.add(result.execution.provider);
|
|
410
|
+
}
|
|
411
|
+
if (result.execution.model && result.execution.model !== 'none') {
|
|
412
|
+
modelSet.add(result.execution.model);
|
|
413
|
+
}
|
|
414
|
+
if (result.execution.usage?.totalTokens) {
|
|
415
|
+
totalTokens += result.execution.usage.totalTokens;
|
|
416
|
+
}
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
return {
|
|
421
|
+
total,
|
|
422
|
+
passed,
|
|
423
|
+
failed,
|
|
424
|
+
errors,
|
|
425
|
+
skipped,
|
|
426
|
+
duration: Date.now() - startTime,
|
|
427
|
+
totalTokens: totalTokens || undefined,
|
|
428
|
+
providers: providerSet.size > 0 ? Array.from(providerSet) : undefined,
|
|
429
|
+
models: modelSet.size > 0 ? Array.from(modelSet) : undefined,
|
|
430
|
+
};
|
|
431
|
+
}
|
|
432
|
+
buildErrorResult(errorMessages, startTime) {
|
|
433
|
+
return {
|
|
434
|
+
suites: [],
|
|
435
|
+
summary: {
|
|
436
|
+
total: 0,
|
|
437
|
+
passed: 0,
|
|
438
|
+
failed: 0,
|
|
439
|
+
errors: errorMessages.length,
|
|
440
|
+
skipped: 0,
|
|
441
|
+
duration: Date.now() - startTime,
|
|
442
|
+
},
|
|
443
|
+
};
|
|
444
|
+
}
|
|
445
|
+
getDefaultModel(provider) {
|
|
446
|
+
const defaults = {
|
|
447
|
+
openai: 'gpt-4o',
|
|
448
|
+
anthropic: 'claude-sonnet-4-20250514',
|
|
449
|
+
groq: 'llama-3.1-70b-versatile',
|
|
450
|
+
google: 'gemini-2.0-flash',
|
|
451
|
+
mistral: 'mistral-large-latest',
|
|
452
|
+
deepseek: 'deepseek-chat',
|
|
453
|
+
};
|
|
454
|
+
return defaults[provider.toLowerCase()] || 'gpt-4o';
|
|
455
|
+
}
|
|
456
|
+
async getCli() {
|
|
457
|
+
if (!this.cliModule) {
|
|
458
|
+
throw new Error('@prompd/cli module not provided. Pass it to the TestRunner constructor: new TestRunner(cliModule)');
|
|
459
|
+
}
|
|
460
|
+
return this.cliModule;
|
|
461
|
+
}
|
|
462
|
+
}
|
|
463
|
+
exports.TestRunner = TestRunner;
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared type interfaces for dynamically imported @prompd/cli module.
|
|
3
|
+
* These mirror the CLI's public API without requiring a compile-time dependency.
|
|
4
|
+
*/
|
|
5
|
+
export interface CompilerModule {
|
|
6
|
+
PrompdCompiler: new (config?: Record<string, unknown>) => Compiler;
|
|
7
|
+
PrompdExecutor: new () => Executor;
|
|
8
|
+
ConfigManager: new () => ConfigManagerInstance;
|
|
9
|
+
MemoryFileSystem: new (files?: Record<string, string>) => MemoryFileSystemInstance;
|
|
10
|
+
NodeFileSystem: new () => NodeFileSystemInstance;
|
|
11
|
+
}
|
|
12
|
+
export interface Compiler {
|
|
13
|
+
compile(sourcePath: string, options: Record<string, unknown>): Promise<string | {
|
|
14
|
+
output?: string;
|
|
15
|
+
error?: string;
|
|
16
|
+
metadata?: Record<string, unknown>;
|
|
17
|
+
}>;
|
|
18
|
+
compileWithContext(sourcePath: string, options: Record<string, unknown>): Promise<string | {
|
|
19
|
+
compiledResult?: string;
|
|
20
|
+
metadata?: Record<string, unknown>;
|
|
21
|
+
errors?: unknown[];
|
|
22
|
+
warnings?: unknown[];
|
|
23
|
+
}>;
|
|
24
|
+
}
|
|
25
|
+
export interface Executor {
|
|
26
|
+
execute(filePath: string, options: Record<string, unknown>): Promise<ExecutorResult>;
|
|
27
|
+
executeRawText(compiledText: string, options: Record<string, unknown>): Promise<ExecutorResult>;
|
|
28
|
+
callLLM(provider: string, model: string, content: string, apiKey: string, temperature?: number, maxTokens?: number): Promise<{
|
|
29
|
+
success: boolean;
|
|
30
|
+
response?: string;
|
|
31
|
+
content?: string;
|
|
32
|
+
error?: string;
|
|
33
|
+
usage?: {
|
|
34
|
+
promptTokens?: number;
|
|
35
|
+
completionTokens?: number;
|
|
36
|
+
totalTokens?: number;
|
|
37
|
+
};
|
|
38
|
+
}>;
|
|
39
|
+
}
|
|
40
|
+
export interface ExecutorResult {
|
|
41
|
+
response?: string;
|
|
42
|
+
error?: string;
|
|
43
|
+
usage?: {
|
|
44
|
+
promptTokens?: number;
|
|
45
|
+
completionTokens?: number;
|
|
46
|
+
totalTokens?: number;
|
|
47
|
+
};
|
|
48
|
+
metadata?: {
|
|
49
|
+
provider?: string;
|
|
50
|
+
model?: string;
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
export interface MemoryFileSystemInstance {
|
|
54
|
+
}
|
|
55
|
+
export interface NodeFileSystemInstance {
|
|
56
|
+
}
|
|
57
|
+
export interface ConfigManagerInstance {
|
|
58
|
+
loadConfig?(): void;
|
|
59
|
+
load?(): void;
|
|
60
|
+
getConfig?(): Record<string, unknown> | null;
|
|
61
|
+
}
|
|
62
|
+
//# sourceMappingURL=cli-types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cli-types.d.ts","sourceRoot":"","sources":["../src/cli-types.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,MAAM,WAAW,cAAc;IAC7B,cAAc,EAAE,KAAK,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,KAAK,QAAQ,CAAC;IACnE,cAAc,EAAE,UAAU,QAAQ,CAAC;IACnC,aAAa,EAAE,UAAU,qBAAqB,CAAC;IAC/C,gBAAgB,EAAE,KAAK,KAAK,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,KAAK,wBAAwB,CAAC;IACnF,cAAc,EAAE,UAAU,sBAAsB,CAAC;CAClD;AAED,MAAM,WAAW,QAAQ;IACvB,OAAO,CACL,UAAU,EAAE,MAAM,EAClB,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAC/B,OAAO,CAAC,MAAM,GAAG;QAClB,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;KACpC,CAAC,CAAC;IAEH,kBAAkB,CAChB,UAAU,EAAE,MAAM,EAClB,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAC/B,OAAO,CAAC,MAAM,GAAG;QAClB,cAAc,CAAC,EAAE,MAAM,CAAC;QACxB,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QACnC,MAAM,CAAC,EAAE,OAAO,EAAE,CAAC;QACnB,QAAQ,CAAC,EAAE,OAAO,EAAE,CAAC;KACtB,CAAC,CAAC;CACJ;AAED,MAAM,WAAW,QAAQ;IACvB,OAAO,CACL,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAC/B,OAAO,CAAC,cAAc,CAAC,CAAC;IAE3B,cAAc,CACZ,YAAY,EAAE,MAAM,EACpB,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAC/B,OAAO,CAAC,cAAc,CAAC,CAAC;IAE3B,OAAO,CACL,QAAQ,EAAE,MAAM,EAChB,KAAK,EAAE,MAAM,EACb,OAAO,EAAE,MAAM,EACf,MAAM,EAAE,MAAM,EACd,WAAW,CAAC,EAAE,MAAM,EACpB,SAAS,CAAC,EAAE,MAAM,GACjB,OAAO,CAAC;QACT,OAAO,EAAE,OAAO,CAAC;QACjB,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,KAAK,CAAC,EAAE;YACN,YAAY,CAAC,EAAE,MAAM,CAAC;YACtB,gBAAgB,CAAC,EAAE,MAAM,CAAC;YAC1B,WAAW,CAAC,EAAE,MAAM,CAAC;SACtB,CAAC;KACH,CAAC,CAAC;CACJ;AAED,MAAM,WAAW,cAAc;IAC7B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE;QACN,YAAY,CAAC,EAAE,MAAM,CAAC;QACtB,gBAAgB,CAAC,EAAE,MAAM,CAAC;QAC1B,WAAW,CAAC,EAAE,MAAM,CAAC;KACtB,CAAC;IACF,QAAQ,CAAC,EAAE;QACT,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,KAAK,CAAC,EAAE,MAAM,CAAC;KAChB,CAAC;CACH;AAED,MAAM,WAAW,wBAAwB;CAExC;AAED,MAAM,WAAW,sBAAsB;CAEtC;AAED,MAAM,WAAW,qBAAqB;IACpC,UAAU,CAAC,IAAI,IAAI,CAAC;IACpB,IAAI,CAAC,IAAI,IAAI,CAAC;IACd,SAAS,CAAC,IAAI,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC;CAC9C"}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* NLP Evaluator - local, fast, free, deterministic assertions.
|
|
3
|
+
*
|
|
4
|
+
* Checks: contains, not_contains, matches, max_tokens, min_tokens, starts_with, ends_with
|
|
5
|
+
*/
|
|
6
|
+
import type { Evaluator, EvaluatorContext } from './types';
|
|
7
|
+
import type { AssertionDef, AssertionResult } from '../types';
|
|
8
|
+
export declare class NlpEvaluator implements Evaluator {
|
|
9
|
+
readonly type = "nlp";
|
|
10
|
+
evaluate(assertion: AssertionDef, context: EvaluatorContext): Promise<AssertionResult>;
|
|
11
|
+
private runCheck;
|
|
12
|
+
private checkContains;
|
|
13
|
+
private checkNotContains;
|
|
14
|
+
private checkMatches;
|
|
15
|
+
private checkMaxTokens;
|
|
16
|
+
private checkMinTokens;
|
|
17
|
+
private checkStartsWith;
|
|
18
|
+
private checkEndsWith;
|
|
19
|
+
/**
|
|
20
|
+
* Rough token estimation: ~4 characters per token (GPT-family average).
|
|
21
|
+
* This is intentionally approximate — for precise counting, use a tokenizer.
|
|
22
|
+
*/
|
|
23
|
+
private estimateTokens;
|
|
24
|
+
private toStringArray;
|
|
25
|
+
}
|
|
26
|
+
//# sourceMappingURL=NlpEvaluator.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"NlpEvaluator.d.ts","sourceRoot":"","sources":["../../src/evaluators/NlpEvaluator.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,KAAK,EAAE,SAAS,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAC;AAC3D,OAAO,KAAK,EAAE,YAAY,EAAE,eAAe,EAAY,MAAM,UAAU,CAAC;AAExE,qBAAa,YAAa,YAAW,SAAS;IAC5C,QAAQ,CAAC,IAAI,SAAS;IAEhB,QAAQ,CAAC,SAAS,EAAE,YAAY,EAAE,OAAO,EAAE,gBAAgB,GAAG,OAAO,CAAC,eAAe,CAAC;IAwB5F,OAAO,CAAC,QAAQ;IAyBhB,OAAO,CAAC,aAAa;IAiBrB,OAAO,CAAC,gBAAgB;IAiBxB,OAAO,CAAC,YAAY;IAepB,OAAO,CAAC,cAAc;IAetB,OAAO,CAAC,cAAc;IAetB,OAAO,CAAC,eAAe;IAevB,OAAO,CAAC,aAAa;IAerB;;;OAGG;IACH,OAAO,CAAC,cAAc;IAItB,OAAO,CAAC,aAAa;CAKtB"}
|