@prompd/test 0.5.0-beta.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/EvaluatorEngine.d.ts +32 -0
- package/dist/EvaluatorEngine.d.ts.map +1 -0
- package/dist/EvaluatorEngine.js +97 -0
- package/dist/TestDiscovery.d.ts +28 -0
- package/dist/TestDiscovery.d.ts.map +1 -0
- package/dist/TestDiscovery.js +137 -0
- package/dist/TestParser.d.ts +25 -0
- package/dist/TestParser.d.ts.map +1 -0
- package/dist/TestParser.js +187 -0
- package/dist/TestRunner.d.ts +57 -0
- package/dist/TestRunner.d.ts.map +1 -0
- package/dist/TestRunner.js +463 -0
- package/dist/cli-types.d.ts +62 -0
- package/dist/cli-types.d.ts.map +1 -0
- package/dist/cli-types.js +6 -0
- package/dist/evaluators/NlpEvaluator.d.ts +26 -0
- package/dist/evaluators/NlpEvaluator.d.ts.map +1 -0
- package/dist/evaluators/NlpEvaluator.js +145 -0
- package/dist/evaluators/PrmdEvaluator.d.ts +42 -0
- package/dist/evaluators/PrmdEvaluator.d.ts.map +1 -0
- package/dist/evaluators/PrmdEvaluator.js +265 -0
- package/dist/evaluators/ScriptEvaluator.d.ts +19 -0
- package/dist/evaluators/ScriptEvaluator.d.ts.map +1 -0
- package/dist/evaluators/ScriptEvaluator.js +161 -0
- package/dist/evaluators/types.d.ts +19 -0
- package/dist/evaluators/types.d.ts.map +1 -0
- package/dist/evaluators/types.js +5 -0
- package/dist/index.d.ts +25 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +33 -0
- package/dist/reporters/ConsoleReporter.d.ts +17 -0
- package/dist/reporters/ConsoleReporter.d.ts.map +1 -0
- package/dist/reporters/ConsoleReporter.js +85 -0
- package/dist/reporters/JsonReporter.d.ts +11 -0
- package/dist/reporters/JsonReporter.d.ts.map +1 -0
- package/dist/reporters/JsonReporter.js +18 -0
- package/dist/reporters/JunitReporter.d.ts +15 -0
- package/dist/reporters/JunitReporter.d.ts.map +1 -0
- package/dist/reporters/JunitReporter.js +89 -0
- package/dist/reporters/types.d.ts +8 -0
- package/dist/reporters/types.d.ts.map +1 -0
- package/dist/reporters/types.js +5 -0
- package/dist/types.d.ts +115 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +5 -0
- package/package.json +34 -0
- package/src/EvaluatorEngine.ts +130 -0
- package/src/TestDiscovery.ts +133 -0
- package/src/TestParser.ts +235 -0
- package/src/TestRunner.ts +516 -0
- package/src/cli-types.ts +92 -0
- package/src/evaluators/NlpEvaluator.ts +184 -0
- package/src/evaluators/PrmdEvaluator.ts +284 -0
- package/src/evaluators/ScriptEvaluator.ts +149 -0
- package/src/evaluators/types.ts +24 -0
- package/src/index.ts +76 -0
- package/src/reporters/ConsoleReporter.ts +100 -0
- package/src/reporters/JsonReporter.ts +21 -0
- package/src/reporters/JunitReporter.ts +113 -0
- package/src/reporters/types.ts +9 -0
- package/src/types.ts +133 -0
- package/tsconfig.json +20 -0
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @prompd/test - Prompt testing and evaluation framework
|
|
3
|
+
*
|
|
4
|
+
* Provides test discovery, assertion evaluation, and reporting for .prmd files.
|
|
5
|
+
* Consumes @prompd/cli for compilation and execution.
|
|
6
|
+
*/
|
|
7
|
+
export { TestRunner } from './TestRunner';
|
|
8
|
+
export { TestParser, TestParseError } from './TestParser';
|
|
9
|
+
export { TestDiscovery } from './TestDiscovery';
|
|
10
|
+
export { EvaluatorEngine } from './EvaluatorEngine';
|
|
11
|
+
export { NlpEvaluator } from './evaluators/NlpEvaluator';
|
|
12
|
+
export { ScriptEvaluator } from './evaluators/ScriptEvaluator';
|
|
13
|
+
export { PrmdEvaluator } from './evaluators/PrmdEvaluator';
|
|
14
|
+
export { ConsoleReporter } from './reporters/ConsoleReporter';
|
|
15
|
+
export { JsonReporter } from './reporters/JsonReporter';
|
|
16
|
+
export { JunitReporter } from './reporters/JunitReporter';
|
|
17
|
+
export type { TestSuite, TestCase, AssertionDef, TestResult, TestRunResult, TestSuiteResult, TestRunSummary, TestRunOptions, TestProgressEvent, TestProgressCallback, TestStatus, AssertionStatus, AssertionResult, EvaluatorType, NlpCheck, } from './types';
|
|
18
|
+
export type { Evaluator, EvaluatorContext, } from './evaluators/types';
|
|
19
|
+
export type { Reporter, } from './reporters/types';
|
|
20
|
+
export type { DiscoveryResult, DiscoveryError, } from './TestDiscovery';
|
|
21
|
+
export type { EvaluatorEngineOptions, } from './EvaluatorEngine';
|
|
22
|
+
export type { TestHarness, TestHarnessResult, TestHarnessOptions, TestHarnessProgressEvent, TestHarnessProgressCallback, } from '@prompd/cli';
|
|
23
|
+
export type { PrmdEvaluatorOptions, } from './evaluators/PrmdEvaluator';
|
|
24
|
+
export type { CompilerModule, } from './cli-types';
|
|
25
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAGH,OAAO,EAAE,UAAU,EAAE,MAAM,cAAc,CAAC;AAC1C,OAAO,EAAE,UAAU,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AAC1D,OAAO,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAChD,OAAO,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAC;AAGpD,OAAO,EAAE,YAAY,EAAE,MAAM,2BAA2B,CAAC;AACzD,OAAO,EAAE,eAAe,EAAE,MAAM,8BAA8B,CAAC;AAC/D,OAAO,EAAE,aAAa,EAAE,MAAM,4BAA4B,CAAC;AAG3D,OAAO,EAAE,eAAe,EAAE,MAAM,6BAA6B,CAAC;AAC9D,OAAO,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;AACxD,OAAO,EAAE,aAAa,EAAE,MAAM,2BAA2B,CAAC;AAG1D,YAAY,EACV,SAAS,EACT,QAAQ,EACR,YAAY,EACZ,UAAU,EACV,aAAa,EACb,eAAe,EACf,cAAc,EACd,cAAc,EACd,iBAAiB,EACjB,oBAAoB,EACpB,UAAU,EACV,eAAe,EACf,eAAe,EACf,aAAa,EACb,QAAQ,GACT,MAAM,SAAS,CAAC;AAEjB,YAAY,EACV,SAAS,EACT,gBAAgB,GACjB,MAAM,oBAAoB,CAAC;AAE5B,YAAY,EACV,QAAQ,GACT,MAAM,mBAAmB,CAAC;AAE3B,YAAY,EACV,eAAe,EACf,cAAc,GACf,MAAM,iBAAiB,CAAC;AAEzB,YAAY,EACV,sBAAsB,GACvB,MAAM,mBAAmB,CAAC;AAG3B,YAAY,EACV,WAAW,EACX,iBAAiB,EACjB,kBAAkB,EAClB,wBAAwB,EACxB,2BAA2B,GAC5B,MAAM,aAAa,CAAC;AAErB,YAAY,EACV,oBAAoB,GACrB,MAAM,4BAA4B,CAAC;AAEpC,YAAY,EACV,cAAc,GACf,MAAM,aAAa,CAAC"}
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* @prompd/test - Prompt testing and evaluation framework
|
|
4
|
+
*
|
|
5
|
+
* Provides test discovery, assertion evaluation, and reporting for .prmd files.
|
|
6
|
+
* Consumes @prompd/cli for compilation and execution.
|
|
7
|
+
*/
|
|
8
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
9
|
+
exports.JunitReporter = exports.JsonReporter = exports.ConsoleReporter = exports.PrmdEvaluator = exports.ScriptEvaluator = exports.NlpEvaluator = exports.EvaluatorEngine = exports.TestDiscovery = exports.TestParseError = exports.TestParser = exports.TestRunner = void 0;
|
|
10
|
+
// Core classes
|
|
11
|
+
var TestRunner_1 = require("./TestRunner");
|
|
12
|
+
Object.defineProperty(exports, "TestRunner", { enumerable: true, get: function () { return TestRunner_1.TestRunner; } });
|
|
13
|
+
var TestParser_1 = require("./TestParser");
|
|
14
|
+
Object.defineProperty(exports, "TestParser", { enumerable: true, get: function () { return TestParser_1.TestParser; } });
|
|
15
|
+
Object.defineProperty(exports, "TestParseError", { enumerable: true, get: function () { return TestParser_1.TestParseError; } });
|
|
16
|
+
var TestDiscovery_1 = require("./TestDiscovery");
|
|
17
|
+
Object.defineProperty(exports, "TestDiscovery", { enumerable: true, get: function () { return TestDiscovery_1.TestDiscovery; } });
|
|
18
|
+
var EvaluatorEngine_1 = require("./EvaluatorEngine");
|
|
19
|
+
Object.defineProperty(exports, "EvaluatorEngine", { enumerable: true, get: function () { return EvaluatorEngine_1.EvaluatorEngine; } });
|
|
20
|
+
// Evaluators
|
|
21
|
+
var NlpEvaluator_1 = require("./evaluators/NlpEvaluator");
|
|
22
|
+
Object.defineProperty(exports, "NlpEvaluator", { enumerable: true, get: function () { return NlpEvaluator_1.NlpEvaluator; } });
|
|
23
|
+
var ScriptEvaluator_1 = require("./evaluators/ScriptEvaluator");
|
|
24
|
+
Object.defineProperty(exports, "ScriptEvaluator", { enumerable: true, get: function () { return ScriptEvaluator_1.ScriptEvaluator; } });
|
|
25
|
+
var PrmdEvaluator_1 = require("./evaluators/PrmdEvaluator");
|
|
26
|
+
Object.defineProperty(exports, "PrmdEvaluator", { enumerable: true, get: function () { return PrmdEvaluator_1.PrmdEvaluator; } });
|
|
27
|
+
// Reporters
|
|
28
|
+
var ConsoleReporter_1 = require("./reporters/ConsoleReporter");
|
|
29
|
+
Object.defineProperty(exports, "ConsoleReporter", { enumerable: true, get: function () { return ConsoleReporter_1.ConsoleReporter; } });
|
|
30
|
+
var JsonReporter_1 = require("./reporters/JsonReporter");
|
|
31
|
+
Object.defineProperty(exports, "JsonReporter", { enumerable: true, get: function () { return JsonReporter_1.JsonReporter; } });
|
|
32
|
+
var JunitReporter_1 = require("./reporters/JunitReporter");
|
|
33
|
+
Object.defineProperty(exports, "JunitReporter", { enumerable: true, get: function () { return JunitReporter_1.JunitReporter; } });
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Console Reporter - terminal output with pass/fail formatting.
|
|
3
|
+
*
|
|
4
|
+
* Does NOT use emojis (breaks things per project rules).
|
|
5
|
+
* Uses simple text markers: [PASS], [FAIL], [ERROR], [SKIP].
|
|
6
|
+
*/
|
|
7
|
+
import type { Reporter } from './types';
|
|
8
|
+
import type { TestRunResult } from '../types';
|
|
9
|
+
export declare class ConsoleReporter implements Reporter {
|
|
10
|
+
private verbose;
|
|
11
|
+
constructor(verbose?: boolean);
|
|
12
|
+
report(result: TestRunResult): string;
|
|
13
|
+
private appendAssertionDetail;
|
|
14
|
+
private statusMarker;
|
|
15
|
+
private formatDuration;
|
|
16
|
+
}
|
|
17
|
+
//# sourceMappingURL=ConsoleReporter.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ConsoleReporter.d.ts","sourceRoot":"","sources":["../../src/reporters/ConsoleReporter.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,SAAS,CAAC;AACxC,OAAO,KAAK,EAAE,aAAa,EAA+B,MAAM,UAAU,CAAC;AAE3E,qBAAa,eAAgB,YAAW,QAAQ;IAC9C,OAAO,CAAC,OAAO,CAAU;gBAEb,OAAO,UAAQ;IAI3B,MAAM,CAAC,MAAM,EAAE,aAAa,GAAG,MAAM;IAwDrC,OAAO,CAAC,qBAAqB;IAW7B,OAAO,CAAC,YAAY;IAUpB,OAAO,CAAC,cAAc;CAKvB"}
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Console Reporter - terminal output with pass/fail formatting.
|
|
4
|
+
*
|
|
5
|
+
* Does NOT use emojis (breaks things per project rules).
|
|
6
|
+
* Uses simple text markers: [PASS], [FAIL], [ERROR], [SKIP].
|
|
7
|
+
*/
|
|
8
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
9
|
+
exports.ConsoleReporter = void 0;
|
|
10
|
+
class ConsoleReporter {
|
|
11
|
+
constructor(verbose = false) {
|
|
12
|
+
this.verbose = verbose;
|
|
13
|
+
}
|
|
14
|
+
report(result) {
|
|
15
|
+
const lines = [];
|
|
16
|
+
lines.push('');
|
|
17
|
+
lines.push('=== Prompd Test Results ===');
|
|
18
|
+
lines.push('');
|
|
19
|
+
for (const suite of result.suites) {
|
|
20
|
+
lines.push(` ${suite.suite}`);
|
|
21
|
+
for (const test of suite.results) {
|
|
22
|
+
const marker = this.statusMarker(test.status);
|
|
23
|
+
const duration = this.formatDuration(test.duration);
|
|
24
|
+
const meta = test.execution
|
|
25
|
+
? ` [${test.execution.provider}/${test.execution.model}${test.execution.usage?.totalTokens ? ` ${test.execution.usage.totalTokens}tok` : ''}]`
|
|
26
|
+
: '';
|
|
27
|
+
lines.push(` ${marker} ${test.testName} (${duration})${meta}`);
|
|
28
|
+
if (test.status === 'error' && test.error) {
|
|
29
|
+
lines.push(` Error: ${test.error}`);
|
|
30
|
+
}
|
|
31
|
+
if (this.verbose || test.status === 'fail' || test.status === 'error') {
|
|
32
|
+
for (const assertion of test.assertions) {
|
|
33
|
+
this.appendAssertionDetail(lines, assertion);
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
lines.push('');
|
|
38
|
+
}
|
|
39
|
+
// Summary
|
|
40
|
+
const s = result.summary;
|
|
41
|
+
lines.push('---');
|
|
42
|
+
lines.push(`Tests: ${s.passed} passed, ${s.failed} failed, ${s.errors} errors, ${s.skipped} skipped, ${s.total} total`);
|
|
43
|
+
lines.push(`Time: ${this.formatDuration(s.duration)}`);
|
|
44
|
+
if (s.totalTokens) {
|
|
45
|
+
lines.push(`Tokens: ${s.totalTokens.toLocaleString()}`);
|
|
46
|
+
}
|
|
47
|
+
if (s.models && s.models.length > 0) {
|
|
48
|
+
lines.push(`Models: ${s.models.join(', ')}`);
|
|
49
|
+
}
|
|
50
|
+
if (s.failed > 0 || s.errors > 0) {
|
|
51
|
+
lines.push('Result: FAIL');
|
|
52
|
+
}
|
|
53
|
+
else {
|
|
54
|
+
lines.push('Result: PASS');
|
|
55
|
+
}
|
|
56
|
+
lines.push('');
|
|
57
|
+
return lines.join('\n');
|
|
58
|
+
}
|
|
59
|
+
appendAssertionDetail(lines, assertion) {
|
|
60
|
+
const marker = this.statusMarker(assertion.status);
|
|
61
|
+
const check = assertion.check ? ` (${assertion.check})` : '';
|
|
62
|
+
const duration = this.formatDuration(assertion.duration);
|
|
63
|
+
lines.push(` ${marker} ${assertion.evaluator}${check} [${duration}]`);
|
|
64
|
+
if (assertion.reason && (assertion.status !== 'pass' || this.verbose)) {
|
|
65
|
+
lines.push(` ${assertion.reason}`);
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
statusMarker(status) {
|
|
69
|
+
switch (status) {
|
|
70
|
+
case 'pass': return '[PASS]';
|
|
71
|
+
case 'fail': return '[FAIL]';
|
|
72
|
+
case 'error': return '[ERR ]';
|
|
73
|
+
case 'skip': return '[SKIP]';
|
|
74
|
+
default: return '[????]';
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
formatDuration(ms) {
|
|
78
|
+
if (ms < 1000)
|
|
79
|
+
return `${ms}ms`;
|
|
80
|
+
if (ms < 60000)
|
|
81
|
+
return `${(ms / 1000).toFixed(1)}s`;
|
|
82
|
+
return `${(ms / 60000).toFixed(1)}m`;
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
exports.ConsoleReporter = ConsoleReporter;
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* JSON Reporter - structured output for programmatic consumption and CI.
|
|
3
|
+
*/
|
|
4
|
+
import type { Reporter } from './types';
|
|
5
|
+
import type { TestRunResult } from '../types';
|
|
6
|
+
export declare class JsonReporter implements Reporter {
|
|
7
|
+
private pretty;
|
|
8
|
+
constructor(pretty?: boolean);
|
|
9
|
+
report(result: TestRunResult): string;
|
|
10
|
+
}
|
|
11
|
+
//# sourceMappingURL=JsonReporter.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"JsonReporter.d.ts","sourceRoot":"","sources":["../../src/reporters/JsonReporter.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,SAAS,CAAC;AACxC,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AAE9C,qBAAa,YAAa,YAAW,QAAQ;IAC3C,OAAO,CAAC,MAAM,CAAU;gBAEZ,MAAM,UAAO;IAIzB,MAAM,CAAC,MAAM,EAAE,aAAa,GAAG,MAAM;CAMtC"}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* JSON Reporter - structured output for programmatic consumption and CI.
|
|
4
|
+
*/
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.JsonReporter = void 0;
|
|
7
|
+
class JsonReporter {
|
|
8
|
+
constructor(pretty = true) {
|
|
9
|
+
this.pretty = pretty;
|
|
10
|
+
}
|
|
11
|
+
report(result) {
|
|
12
|
+
if (this.pretty) {
|
|
13
|
+
return JSON.stringify(result, null, 2);
|
|
14
|
+
}
|
|
15
|
+
return JSON.stringify(result);
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
exports.JsonReporter = JsonReporter;
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* JUnit XML Reporter - generates JUnit-compatible XML for CI systems.
|
|
3
|
+
*
|
|
4
|
+
* Output format follows the JUnit XML schema used by Jenkins, GitHub Actions,
|
|
5
|
+
* Azure DevOps, and most CI platforms.
|
|
6
|
+
*/
|
|
7
|
+
import type { Reporter } from './types';
|
|
8
|
+
import type { TestRunResult } from '../types';
|
|
9
|
+
export declare class JunitReporter implements Reporter {
|
|
10
|
+
report(result: TestRunResult): string;
|
|
11
|
+
private appendTestCase;
|
|
12
|
+
private buildFailureDetail;
|
|
13
|
+
private escapeXml;
|
|
14
|
+
}
|
|
15
|
+
//# sourceMappingURL=JunitReporter.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"JunitReporter.d.ts","sourceRoot":"","sources":["../../src/reporters/JunitReporter.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,SAAS,CAAC;AACxC,OAAO,KAAK,EAAE,aAAa,EAAc,MAAM,UAAU,CAAC;AAE1D,qBAAa,aAAc,YAAW,QAAQ;IAC5C,MAAM,CAAC,MAAM,EAAE,aAAa,GAAG,MAAM;IAwCrC,OAAO,CAAC,cAAc;IAyCtB,OAAO,CAAC,kBAAkB;IAY1B,OAAO,CAAC,SAAS;CAQlB"}
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* JUnit XML Reporter - generates JUnit-compatible XML for CI systems.
|
|
4
|
+
*
|
|
5
|
+
* Output format follows the JUnit XML schema used by Jenkins, GitHub Actions,
|
|
6
|
+
* Azure DevOps, and most CI platforms.
|
|
7
|
+
*/
|
|
8
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
9
|
+
exports.JunitReporter = void 0;
|
|
10
|
+
class JunitReporter {
|
|
11
|
+
report(result) {
|
|
12
|
+
const lines = [];
|
|
13
|
+
lines.push('<?xml version="1.0" encoding="UTF-8"?>');
|
|
14
|
+
lines.push(`<testsuites tests="${result.summary.total}" ` +
|
|
15
|
+
`failures="${result.summary.failed}" ` +
|
|
16
|
+
`errors="${result.summary.errors}" ` +
|
|
17
|
+
`skipped="${result.summary.skipped}" ` +
|
|
18
|
+
`time="${(result.summary.duration / 1000).toFixed(3)}">`);
|
|
19
|
+
for (const suite of result.suites) {
|
|
20
|
+
const suiteTests = suite.results.length;
|
|
21
|
+
const suiteFailures = suite.results.filter(r => r.status === 'fail').length;
|
|
22
|
+
const suiteErrors = suite.results.filter(r => r.status === 'error').length;
|
|
23
|
+
const suiteSkipped = suite.results.filter(r => r.status === 'skip').length;
|
|
24
|
+
const suiteDuration = suite.results.reduce((sum, r) => sum + r.duration, 0);
|
|
25
|
+
lines.push(` <testsuite name="${this.escapeXml(suite.suite)}" ` +
|
|
26
|
+
`tests="${suiteTests}" ` +
|
|
27
|
+
`failures="${suiteFailures}" ` +
|
|
28
|
+
`errors="${suiteErrors}" ` +
|
|
29
|
+
`skipped="${suiteSkipped}" ` +
|
|
30
|
+
`time="${(suiteDuration / 1000).toFixed(3)}" ` +
|
|
31
|
+
`file="${this.escapeXml(suite.testFilePath)}">`);
|
|
32
|
+
for (const test of suite.results) {
|
|
33
|
+
this.appendTestCase(lines, suite.suite, test);
|
|
34
|
+
}
|
|
35
|
+
lines.push(' </testsuite>');
|
|
36
|
+
}
|
|
37
|
+
lines.push('</testsuites>');
|
|
38
|
+
return lines.join('\n');
|
|
39
|
+
}
|
|
40
|
+
appendTestCase(lines, suiteName, test) {
|
|
41
|
+
const time = (test.duration / 1000).toFixed(3);
|
|
42
|
+
lines.push(` <testcase name="${this.escapeXml(test.testName)}" ` +
|
|
43
|
+
`classname="${this.escapeXml(suiteName)}" ` +
|
|
44
|
+
`time="${time}">`);
|
|
45
|
+
if (test.status === 'fail') {
|
|
46
|
+
const failedAssertions = test.assertions.filter(a => a.status === 'fail');
|
|
47
|
+
const message = failedAssertions
|
|
48
|
+
.map(a => `${a.evaluator}${a.check ? `(${a.check})` : ''}: ${a.reason || 'failed'}`)
|
|
49
|
+
.join('; ');
|
|
50
|
+
lines.push(` <failure message="${this.escapeXml(message)}">`);
|
|
51
|
+
lines.push(this.escapeXml(this.buildFailureDetail(test)));
|
|
52
|
+
lines.push(' </failure>');
|
|
53
|
+
}
|
|
54
|
+
if (test.status === 'error') {
|
|
55
|
+
const errorMessage = test.error || 'Unknown error';
|
|
56
|
+
lines.push(` <error message="${this.escapeXml(errorMessage)}">`);
|
|
57
|
+
lines.push(this.escapeXml(errorMessage));
|
|
58
|
+
lines.push(' </error>');
|
|
59
|
+
}
|
|
60
|
+
if (test.status === 'skip') {
|
|
61
|
+
lines.push(' <skipped/>');
|
|
62
|
+
}
|
|
63
|
+
// Include output as system-out if available
|
|
64
|
+
if (test.output) {
|
|
65
|
+
lines.push(' <system-out>');
|
|
66
|
+
lines.push(this.escapeXml(test.output.substring(0, 10000)));
|
|
67
|
+
lines.push(' </system-out>');
|
|
68
|
+
}
|
|
69
|
+
lines.push(' </testcase>');
|
|
70
|
+
}
|
|
71
|
+
buildFailureDetail(test) {
|
|
72
|
+
const details = [];
|
|
73
|
+
for (const assertion of test.assertions) {
|
|
74
|
+
const prefix = assertion.status === 'pass' ? '[PASS]' : '[FAIL]';
|
|
75
|
+
const check = assertion.check ? ` (${assertion.check})` : '';
|
|
76
|
+
details.push(`${prefix} ${assertion.evaluator}${check}: ${assertion.reason || ''}`);
|
|
77
|
+
}
|
|
78
|
+
return details.join('\n');
|
|
79
|
+
}
|
|
80
|
+
escapeXml(str) {
|
|
81
|
+
return str
|
|
82
|
+
.replace(/&/g, '&')
|
|
83
|
+
.replace(/</g, '<')
|
|
84
|
+
.replace(/>/g, '>')
|
|
85
|
+
.replace(/"/g, '"')
|
|
86
|
+
.replace(/'/g, ''');
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
exports.JunitReporter = JunitReporter;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/reporters/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AAE9C,MAAM,WAAW,QAAQ;IACvB,MAAM,CAAC,MAAM,EAAE,aAAa,GAAG,MAAM,CAAC;CACvC"}
|
package/dist/types.d.ts
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Core type definitions for @prompd/test
|
|
3
|
+
*/
|
|
4
|
+
export type EvaluatorType = 'nlp' | 'script' | 'prmd';
|
|
5
|
+
export type NlpCheck = 'contains' | 'not_contains' | 'matches' | 'max_tokens' | 'min_tokens' | 'starts_with' | 'ends_with';
|
|
6
|
+
export interface AssertionDef {
|
|
7
|
+
evaluator: EvaluatorType;
|
|
8
|
+
check?: NlpCheck;
|
|
9
|
+
value?: string | string[] | number;
|
|
10
|
+
run?: string;
|
|
11
|
+
prompt?: string;
|
|
12
|
+
provider?: string;
|
|
13
|
+
model?: string;
|
|
14
|
+
}
|
|
15
|
+
export interface TestCase {
|
|
16
|
+
name: string;
|
|
17
|
+
params: Record<string, unknown>;
|
|
18
|
+
assert: AssertionDef[];
|
|
19
|
+
expect_error?: boolean;
|
|
20
|
+
}
|
|
21
|
+
export interface TestSuite {
|
|
22
|
+
name: string;
|
|
23
|
+
description?: string;
|
|
24
|
+
target: string;
|
|
25
|
+
testFilePath: string;
|
|
26
|
+
tests: TestCase[];
|
|
27
|
+
evaluatorPrompt?: string;
|
|
28
|
+
}
|
|
29
|
+
export type TestStatus = 'pass' | 'fail' | 'error' | 'skip';
|
|
30
|
+
export type AssertionStatus = 'pass' | 'fail' | 'error' | 'skip';
|
|
31
|
+
export interface AssertionResult {
|
|
32
|
+
evaluator: EvaluatorType;
|
|
33
|
+
check?: string;
|
|
34
|
+
status: AssertionStatus;
|
|
35
|
+
reason?: string;
|
|
36
|
+
duration: number;
|
|
37
|
+
}
|
|
38
|
+
export interface TestExecutionMetadata {
|
|
39
|
+
provider: string;
|
|
40
|
+
model: string;
|
|
41
|
+
duration: number;
|
|
42
|
+
usage?: {
|
|
43
|
+
promptTokens?: number;
|
|
44
|
+
completionTokens?: number;
|
|
45
|
+
totalTokens?: number;
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
export interface TestResult {
|
|
49
|
+
suite: string;
|
|
50
|
+
testName: string;
|
|
51
|
+
status: TestStatus;
|
|
52
|
+
duration: number;
|
|
53
|
+
assertions: AssertionResult[];
|
|
54
|
+
output?: string;
|
|
55
|
+
compiledInput?: string;
|
|
56
|
+
error?: string;
|
|
57
|
+
execution?: TestExecutionMetadata;
|
|
58
|
+
}
|
|
59
|
+
export interface TestRunSummary {
|
|
60
|
+
total: number;
|
|
61
|
+
passed: number;
|
|
62
|
+
failed: number;
|
|
63
|
+
errors: number;
|
|
64
|
+
skipped: number;
|
|
65
|
+
duration: number;
|
|
66
|
+
totalTokens?: number;
|
|
67
|
+
providers?: string[];
|
|
68
|
+
models?: string[];
|
|
69
|
+
}
|
|
70
|
+
export interface TestRunResult {
|
|
71
|
+
suites: TestSuiteResult[];
|
|
72
|
+
summary: TestRunSummary;
|
|
73
|
+
}
|
|
74
|
+
export interface TestSuiteResult {
|
|
75
|
+
suite: string;
|
|
76
|
+
testFilePath: string;
|
|
77
|
+
results: TestResult[];
|
|
78
|
+
}
|
|
79
|
+
export interface TestRunOptions {
|
|
80
|
+
evaluators?: EvaluatorType[];
|
|
81
|
+
noLlm?: boolean;
|
|
82
|
+
reporter?: 'console' | 'json' | 'junit';
|
|
83
|
+
failFast?: boolean;
|
|
84
|
+
runAll?: boolean;
|
|
85
|
+
verbose?: boolean;
|
|
86
|
+
workspaceRoot?: string;
|
|
87
|
+
registryUrl?: string;
|
|
88
|
+
provider?: string;
|
|
89
|
+
model?: string;
|
|
90
|
+
}
|
|
91
|
+
export type TestProgressEvent = {
|
|
92
|
+
type: 'suite_start';
|
|
93
|
+
suite: string;
|
|
94
|
+
testCount: number;
|
|
95
|
+
} | {
|
|
96
|
+
type: 'test_start';
|
|
97
|
+
suite: string;
|
|
98
|
+
testName: string;
|
|
99
|
+
} | {
|
|
100
|
+
type: 'test_complete';
|
|
101
|
+
suite: string;
|
|
102
|
+
testName: string;
|
|
103
|
+
result: TestResult;
|
|
104
|
+
} | {
|
|
105
|
+
type: 'suite_complete';
|
|
106
|
+
suite: string;
|
|
107
|
+
results: TestResult[];
|
|
108
|
+
} | {
|
|
109
|
+
type: 'assertion_complete';
|
|
110
|
+
suite: string;
|
|
111
|
+
testName: string;
|
|
112
|
+
assertion: AssertionResult;
|
|
113
|
+
};
|
|
114
|
+
export type TestProgressCallback = (event: TestProgressEvent) => void;
|
|
115
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH,MAAM,MAAM,aAAa,GAAG,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;AAEtD,MAAM,MAAM,QAAQ,GAChB,UAAU,GACV,cAAc,GACd,SAAS,GACT,YAAY,GACZ,YAAY,GACZ,aAAa,GACb,WAAW,CAAC;AAIhB,MAAM,WAAW,YAAY;IAC3B,SAAS,EAAE,aAAa,CAAC;IAEzB,KAAK,CAAC,EAAE,QAAQ,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,GAAG,MAAM,EAAE,GAAG,MAAM,CAAC;IAEnC,GAAG,CAAC,EAAE,MAAM,CAAC;IAEb,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAChC,MAAM,EAAE,YAAY,EAAE,CAAC;IACvB,YAAY,CAAC,EAAE,OAAO,CAAC;CACxB;AAED,MAAM,WAAW,SAAS;IACxB,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,MAAM,EAAE,MAAM,CAAC;IACf,YAAY,EAAE,MAAM,CAAC;IACrB,KAAK,EAAE,QAAQ,EAAE,CAAC;IAClB,eAAe,CAAC,EAAE,MAAM,CAAC;CAC1B;AAID,MAAM,MAAM,UAAU,GAAG,MAAM,GAAG,MAAM,GAAG,OAAO,GAAG,MAAM,CAAC;AAC5D,MAAM,MAAM,eAAe,GAAG,MAAM,GAAG,MAAM,GAAG,OAAO,GAAG,MAAM,CAAC;AAEjE,MAAM,WAAW,eAAe;IAC9B,SAAS,EAAE,aAAa,CAAC;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,eAAe,CAAC;IACxB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,qBAAqB;IACpC,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE;QACN,YAAY,CAAC,EAAE,MAAM,CAAC;QACtB,gBAAgB,CAAC,EAAE,MAAM,CAAC;QAC1B,WAAW,CAAC,EAAE,MAAM,CAAC;KACtB,CAAC;CACH;AAED,MAAM,WAAW,UAAU;IACzB,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,UAAU,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,eAAe,EAAE,CAAC;IAC9B,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,SAAS,CAAC,EAAE,qBAAqB,CAAC;CACnC;AAED,MAAM,WAAW,cAAc;IAC7B,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IACrB,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC;CACnB;AAED,MAAM,WAAW,aAAa;IAC5B,MAAM,EAAE,eAAe,EAAE,CAAC;IAC1B,OAAO,EAAE,cAAc,CAAC;CACzB;AAED,MAAM,WAAW,eAAe;IAC9B,KAAK,EAAE,MAAM,CAAC;IACd,YAAY,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE,UAAU,EAAE,CAAC;CACvB;AAID,MAAM,WAAW,cAAc;IAC7B,UAAU,CAAC,EAAE,aAAa,EAAE,CAAC;IAC7B,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB,QAAQ,CAAC,EAAE,SAAS,GAAG,MAAM,GAAG,OAAO,CAAC;IACxC,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,WAAW,CAAC,EAAE,MAAM,CAAC;IAErB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAID,MAAM,MAAM,iBAAiB,GACzB;IAAE,IAAI,EAAE,aAAa,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,SAAS,EAAE,MAAM,CAAA;CAAE,GACzD;IAAE,IAAI,EAAE,YAAY,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,QAAQ,EAAE,MAAM,CAAA;CAAE,GACvD;IAAE,IAAI,EAAE,eAAe,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,QAAQ,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,UAAU,CAAA;CAAE,GAC9E;IAAE,IAAI,EAAE,gBAAgB,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,UAAU,EAAE,CAAA;CAAE,GAChE;IAAE,IAAI,EAAE,oBAAoB,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,QAAQ,EAAE,MAAM,CAAC;IAAC,SAAS,EAAE,eAAe,CAAA;CAAE,CAAC;AAEhG,MAAM,MAAM,oBAAoB,GAAG,CAAC,KAAK,EAAE,iBAAiB,KAAK,IAAI,CAAC"}
|
package/dist/types.js
ADDED
package/package.json
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@prompd/test",
|
|
3
|
+
"version": "0.5.0-beta.9",
|
|
4
|
+
"description": "Prompt testing and evaluation framework for Prompd",
|
|
5
|
+
"main": "dist/index.js",
|
|
6
|
+
"types": "dist/index.d.ts",
|
|
7
|
+
"scripts": {
|
|
8
|
+
"build": "tsc",
|
|
9
|
+
"dev": "tsc --watch",
|
|
10
|
+
"typecheck": "tsc --noEmit",
|
|
11
|
+
"clean": "rm -rf dist"
|
|
12
|
+
},
|
|
13
|
+
"keywords": [
|
|
14
|
+
"test",
|
|
15
|
+
"eval",
|
|
16
|
+
"evaluator",
|
|
17
|
+
"prompt",
|
|
18
|
+
"prompd"
|
|
19
|
+
],
|
|
20
|
+
"author": "Prompd Team",
|
|
21
|
+
"license": "Elastic-2.0",
|
|
22
|
+
"dependencies": {
|
|
23
|
+
"glob": "^10.3.10",
|
|
24
|
+
"yaml": "^2.7.1"
|
|
25
|
+
},
|
|
26
|
+
"peerDependencies": {
|
|
27
|
+
"@prompd/cli": ">=0.5.0-beta.9"
|
|
28
|
+
},
|
|
29
|
+
"devDependencies": {
|
|
30
|
+
"@prompd/cli": "^0.5.0-beta.9",
|
|
31
|
+
"@types/node": "^18.19.17",
|
|
32
|
+
"typescript": "^5.7.3"
|
|
33
|
+
}
|
|
34
|
+
}
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Routes assertions to the correct evaluator and manages execution order.
|
|
3
|
+
*
|
|
4
|
+
* Execution order: nlp -> script -> prmd (cheap to expensive).
|
|
5
|
+
* Fail-fast by default — stops on first failure unless runAll is set.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import type { AssertionDef, AssertionResult, EvaluatorType } from './types';
|
|
9
|
+
import type { Evaluator, EvaluatorContext } from './evaluators/types';
|
|
10
|
+
import type { CompilerModule } from './cli-types';
|
|
11
|
+
import { NlpEvaluator } from './evaluators/NlpEvaluator';
|
|
12
|
+
import { ScriptEvaluator } from './evaluators/ScriptEvaluator';
|
|
13
|
+
import { PrmdEvaluator, type PrmdEvaluatorOptions } from './evaluators/PrmdEvaluator';
|
|
14
|
+
|
|
15
|
+
/** Execution priority — lower number runs first */
|
|
16
|
+
const EVALUATOR_PRIORITY: Record<EvaluatorType, number> = {
|
|
17
|
+
nlp: 0,
|
|
18
|
+
script: 1,
|
|
19
|
+
prmd: 2,
|
|
20
|
+
};
|
|
21
|
+
|
|
22
|
+
export interface EvaluatorEngineOptions {
|
|
23
|
+
testFileDir: string;
|
|
24
|
+
evaluatorPrompt?: string;
|
|
25
|
+
workspaceRoot?: string;
|
|
26
|
+
registryUrl?: string;
|
|
27
|
+
allowedEvaluators?: EvaluatorType[];
|
|
28
|
+
failFast?: boolean;
|
|
29
|
+
cliModule?: CompilerModule;
|
|
30
|
+
provider?: string;
|
|
31
|
+
model?: string;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export class EvaluatorEngine {
|
|
35
|
+
private evaluators: Map<EvaluatorType, Evaluator>;
|
|
36
|
+
private allowedEvaluators: Set<EvaluatorType>;
|
|
37
|
+
private failFast: boolean;
|
|
38
|
+
|
|
39
|
+
constructor(options: EvaluatorEngineOptions) {
|
|
40
|
+
this.failFast = options.failFast !== false;
|
|
41
|
+
this.allowedEvaluators = new Set(options.allowedEvaluators || ['nlp', 'script', 'prmd']);
|
|
42
|
+
|
|
43
|
+
const prmdOptions: PrmdEvaluatorOptions = {
|
|
44
|
+
testFileDir: options.testFileDir,
|
|
45
|
+
evaluatorPrompt: options.evaluatorPrompt,
|
|
46
|
+
workspaceRoot: options.workspaceRoot,
|
|
47
|
+
registryUrl: options.registryUrl,
|
|
48
|
+
cliModule: options.cliModule,
|
|
49
|
+
provider: options.provider,
|
|
50
|
+
model: options.model,
|
|
51
|
+
};
|
|
52
|
+
|
|
53
|
+
this.evaluators = new Map<EvaluatorType, Evaluator>([
|
|
54
|
+
['nlp', new NlpEvaluator()],
|
|
55
|
+
['script', new ScriptEvaluator(options.testFileDir)],
|
|
56
|
+
['prmd', new PrmdEvaluator(prmdOptions)],
|
|
57
|
+
]);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* Evaluate all assertions in cost-priority order.
|
|
62
|
+
* Returns results for each assertion.
|
|
63
|
+
*/
|
|
64
|
+
async evaluate(
|
|
65
|
+
assertions: AssertionDef[],
|
|
66
|
+
context: EvaluatorContext,
|
|
67
|
+
onResult?: (result: AssertionResult) => void
|
|
68
|
+
): Promise<AssertionResult[]> {
|
|
69
|
+
const results: AssertionResult[] = [];
|
|
70
|
+
|
|
71
|
+
// Sort by evaluator priority (nlp first, prmd last)
|
|
72
|
+
const sorted = [...assertions].sort(
|
|
73
|
+
(a, b) => EVALUATOR_PRIORITY[a.evaluator] - EVALUATOR_PRIORITY[b.evaluator]
|
|
74
|
+
);
|
|
75
|
+
|
|
76
|
+
for (const assertion of sorted) {
|
|
77
|
+
// Skip evaluators that aren't allowed
|
|
78
|
+
if (!this.allowedEvaluators.has(assertion.evaluator)) {
|
|
79
|
+
const skipped: AssertionResult = {
|
|
80
|
+
evaluator: assertion.evaluator,
|
|
81
|
+
check: assertion.check,
|
|
82
|
+
status: 'skip',
|
|
83
|
+
reason: `Evaluator type "${assertion.evaluator}" skipped by filter`,
|
|
84
|
+
duration: 0,
|
|
85
|
+
};
|
|
86
|
+
results.push(skipped);
|
|
87
|
+
onResult?.(skipped);
|
|
88
|
+
continue;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
const evaluator = this.evaluators.get(assertion.evaluator);
|
|
92
|
+
if (!evaluator) {
|
|
93
|
+
const errorResult: AssertionResult = {
|
|
94
|
+
evaluator: assertion.evaluator,
|
|
95
|
+
check: assertion.check,
|
|
96
|
+
status: 'error',
|
|
97
|
+
reason: `No evaluator registered for type "${assertion.evaluator}"`,
|
|
98
|
+
duration: 0,
|
|
99
|
+
};
|
|
100
|
+
results.push(errorResult);
|
|
101
|
+
onResult?.(errorResult);
|
|
102
|
+
continue;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
const result = await evaluator.evaluate(assertion, context);
|
|
106
|
+
results.push(result);
|
|
107
|
+
onResult?.(result);
|
|
108
|
+
|
|
109
|
+
// Fail-fast: stop on first failure
|
|
110
|
+
if (this.failFast && result.status !== 'pass') {
|
|
111
|
+
// Mark remaining assertions as skipped
|
|
112
|
+
const remaining = sorted.slice(sorted.indexOf(assertion) + 1);
|
|
113
|
+
for (const rem of remaining) {
|
|
114
|
+
const skipped: AssertionResult = {
|
|
115
|
+
evaluator: rem.evaluator,
|
|
116
|
+
check: rem.check,
|
|
117
|
+
status: 'skip',
|
|
118
|
+
reason: 'Skipped due to prior failure (fail-fast)',
|
|
119
|
+
duration: 0,
|
|
120
|
+
};
|
|
121
|
+
results.push(skipped);
|
|
122
|
+
onResult?.(skipped);
|
|
123
|
+
}
|
|
124
|
+
break;
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
return results;
|
|
129
|
+
}
|
|
130
|
+
}
|