@prompd/test 0.5.0-beta.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/dist/EvaluatorEngine.d.ts +32 -0
  2. package/dist/EvaluatorEngine.d.ts.map +1 -0
  3. package/dist/EvaluatorEngine.js +97 -0
  4. package/dist/TestDiscovery.d.ts +28 -0
  5. package/dist/TestDiscovery.d.ts.map +1 -0
  6. package/dist/TestDiscovery.js +137 -0
  7. package/dist/TestParser.d.ts +25 -0
  8. package/dist/TestParser.d.ts.map +1 -0
  9. package/dist/TestParser.js +187 -0
  10. package/dist/TestRunner.d.ts +57 -0
  11. package/dist/TestRunner.d.ts.map +1 -0
  12. package/dist/TestRunner.js +463 -0
  13. package/dist/cli-types.d.ts +62 -0
  14. package/dist/cli-types.d.ts.map +1 -0
  15. package/dist/cli-types.js +6 -0
  16. package/dist/evaluators/NlpEvaluator.d.ts +30 -0
  17. package/dist/evaluators/NlpEvaluator.d.ts.map +1 -0
  18. package/dist/evaluators/NlpEvaluator.js +183 -0
  19. package/dist/evaluators/PrmdEvaluator.d.ts +42 -0
  20. package/dist/evaluators/PrmdEvaluator.d.ts.map +1 -0
  21. package/dist/evaluators/PrmdEvaluator.js +265 -0
  22. package/dist/evaluators/ScriptEvaluator.d.ts +19 -0
  23. package/dist/evaluators/ScriptEvaluator.d.ts.map +1 -0
  24. package/dist/evaluators/ScriptEvaluator.js +163 -0
  25. package/dist/evaluators/types.d.ts +19 -0
  26. package/dist/evaluators/types.d.ts.map +1 -0
  27. package/dist/evaluators/types.js +5 -0
  28. package/dist/index.d.ts +25 -0
  29. package/dist/index.d.ts.map +1 -0
  30. package/dist/index.js +33 -0
  31. package/dist/reporters/ConsoleReporter.d.ts +17 -0
  32. package/dist/reporters/ConsoleReporter.d.ts.map +1 -0
  33. package/dist/reporters/ConsoleReporter.js +85 -0
  34. package/dist/reporters/JsonReporter.d.ts +11 -0
  35. package/dist/reporters/JsonReporter.d.ts.map +1 -0
  36. package/dist/reporters/JsonReporter.js +18 -0
  37. package/dist/reporters/JunitReporter.d.ts +15 -0
  38. package/dist/reporters/JunitReporter.d.ts.map +1 -0
  39. package/dist/reporters/JunitReporter.js +89 -0
  40. package/dist/reporters/types.d.ts +8 -0
  41. package/dist/reporters/types.d.ts.map +1 -0
  42. package/dist/reporters/types.js +5 -0
  43. package/dist/types.d.ts +119 -0
  44. package/dist/types.d.ts.map +1 -0
  45. package/dist/types.js +5 -0
  46. package/package.json +34 -0
  47. package/src/EvaluatorEngine.ts +130 -0
  48. package/src/TestDiscovery.ts +133 -0
  49. package/src/TestParser.ts +235 -0
  50. package/src/TestRunner.ts +516 -0
  51. package/src/cli-types.ts +92 -0
  52. package/src/evaluators/NlpEvaluator.ts +240 -0
  53. package/src/evaluators/PrmdEvaluator.ts +284 -0
  54. package/src/evaluators/ScriptEvaluator.ts +152 -0
  55. package/src/evaluators/types.ts +24 -0
  56. package/src/index.ts +76 -0
  57. package/src/reporters/ConsoleReporter.ts +100 -0
  58. package/src/reporters/JsonReporter.ts +21 -0
  59. package/src/reporters/JunitReporter.ts +113 -0
  60. package/src/reporters/types.ts +9 -0
  61. package/src/types.ts +140 -0
  62. package/tsconfig.json +20 -0
@@ -0,0 +1,19 @@
1
+ /**
2
+ * Evaluator interfaces for @prompd/test
3
+ */
4
+ import type { AssertionDef, AssertionResult } from '../types';
5
+ export interface EvaluatorContext {
6
+ prompt: string;
7
+ response: string;
8
+ params: Record<string, unknown>;
9
+ metadata: {
10
+ provider: string;
11
+ model: string;
12
+ duration: number;
13
+ };
14
+ }
15
+ export interface Evaluator {
16
+ readonly type: string;
17
+ evaluate(assertion: AssertionDef, context: EvaluatorContext): Promise<AssertionResult>;
18
+ }
19
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/evaluators/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,YAAY,EAAE,eAAe,EAAE,MAAM,UAAU,CAAC;AAE9D,MAAM,WAAW,gBAAgB;IAC/B,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAChC,QAAQ,EAAE;QACR,QAAQ,EAAE,MAAM,CAAC;QACjB,KAAK,EAAE,MAAM,CAAC;QACd,QAAQ,EAAE,MAAM,CAAC;KAClB,CAAC;CACH;AAED,MAAM,WAAW,SAAS;IACxB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CACN,SAAS,EAAE,YAAY,EACvB,OAAO,EAAE,gBAAgB,GACxB,OAAO,CAAC,eAAe,CAAC,CAAC;CAC7B"}
@@ -0,0 +1,5 @@
1
+ "use strict";
2
+ /**
3
+ * Evaluator interfaces for @prompd/test
4
+ */
5
+ Object.defineProperty(exports, "__esModule", { value: true });
@@ -0,0 +1,25 @@
1
+ /**
2
+ * @prompd/test - Prompt testing and evaluation framework
3
+ *
4
+ * Provides test discovery, assertion evaluation, and reporting for .prmd files.
5
+ * Consumes @prompd/cli for compilation and execution.
6
+ */
7
+ export { TestRunner } from './TestRunner';
8
+ export { TestParser, TestParseError } from './TestParser';
9
+ export { TestDiscovery } from './TestDiscovery';
10
+ export { EvaluatorEngine } from './EvaluatorEngine';
11
+ export { NlpEvaluator } from './evaluators/NlpEvaluator';
12
+ export { ScriptEvaluator } from './evaluators/ScriptEvaluator';
13
+ export { PrmdEvaluator } from './evaluators/PrmdEvaluator';
14
+ export { ConsoleReporter } from './reporters/ConsoleReporter';
15
+ export { JsonReporter } from './reporters/JsonReporter';
16
+ export { JunitReporter } from './reporters/JunitReporter';
17
+ export type { TestSuite, TestCase, AssertionDef, TestResult, TestRunResult, TestSuiteResult, TestRunSummary, TestRunOptions, TestProgressEvent, TestProgressCallback, TestStatus, AssertionStatus, AssertionResult, EvaluatorType, NlpCheck, } from './types';
18
+ export type { Evaluator, EvaluatorContext, } from './evaluators/types';
19
+ export type { Reporter, } from './reporters/types';
20
+ export type { DiscoveryResult, DiscoveryError, } from './TestDiscovery';
21
+ export type { EvaluatorEngineOptions, } from './EvaluatorEngine';
22
+ export type { TestHarness, TestHarnessResult, TestHarnessOptions, TestHarnessProgressEvent, TestHarnessProgressCallback, } from '@prompd/cli';
23
+ export type { PrmdEvaluatorOptions, } from './evaluators/PrmdEvaluator';
24
+ export type { CompilerModule, } from './cli-types';
25
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAGH,OAAO,EAAE,UAAU,EAAE,MAAM,cAAc,CAAC;AAC1C,OAAO,EAAE,UAAU,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AAC1D,OAAO,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAChD,OAAO,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAC;AAGpD,OAAO,EAAE,YAAY,EAAE,MAAM,2BAA2B,CAAC;AACzD,OAAO,EAAE,eAAe,EAAE,MAAM,8BAA8B,CAAC;AAC/D,OAAO,EAAE,aAAa,EAAE,MAAM,4BAA4B,CAAC;AAG3D,OAAO,EAAE,eAAe,EAAE,MAAM,6BAA6B,CAAC;AAC9D,OAAO,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;AACxD,OAAO,EAAE,aAAa,EAAE,MAAM,2BAA2B,CAAC;AAG1D,YAAY,EACV,SAAS,EACT,QAAQ,EACR,YAAY,EACZ,UAAU,EACV,aAAa,EACb,eAAe,EACf,cAAc,EACd,cAAc,EACd,iBAAiB,EACjB,oBAAoB,EACpB,UAAU,EACV,eAAe,EACf,eAAe,EACf,aAAa,EACb,QAAQ,GACT,MAAM,SAAS,CAAC;AAEjB,YAAY,EACV,SAAS,EACT,gBAAgB,GACjB,MAAM,oBAAoB,CAAC;AAE5B,YAAY,EACV,QAAQ,GACT,MAAM,mBAAmB,CAAC;AAE3B,YAAY,EACV,eAAe,EACf,cAAc,GACf,MAAM,iBAAiB,CAAC;AAEzB,YAAY,EACV,sBAAsB,GACvB,MAAM,mBAAmB,CAAC;AAG3B,YAAY,EACV,WAAW,EACX,iBAAiB,EACjB,kBAAkB,EAClB,wBAAwB,EACxB,2BAA2B,GAC5B,MAAM,aAAa,CAAC;AAErB,YAAY,EACV,oBAAoB,GACrB,MAAM,4BAA4B,CAAC;AAEpC,YAAY,EACV,cAAc,GACf,MAAM,aAAa,CAAC"}
package/dist/index.js ADDED
@@ -0,0 +1,33 @@
1
+ "use strict";
2
+ /**
3
+ * @prompd/test - Prompt testing and evaluation framework
4
+ *
5
+ * Provides test discovery, assertion evaluation, and reporting for .prmd files.
6
+ * Consumes @prompd/cli for compilation and execution.
7
+ */
8
+ Object.defineProperty(exports, "__esModule", { value: true });
9
+ exports.JunitReporter = exports.JsonReporter = exports.ConsoleReporter = exports.PrmdEvaluator = exports.ScriptEvaluator = exports.NlpEvaluator = exports.EvaluatorEngine = exports.TestDiscovery = exports.TestParseError = exports.TestParser = exports.TestRunner = void 0;
10
+ // Core classes
11
+ var TestRunner_1 = require("./TestRunner");
12
+ Object.defineProperty(exports, "TestRunner", { enumerable: true, get: function () { return TestRunner_1.TestRunner; } });
13
+ var TestParser_1 = require("./TestParser");
14
+ Object.defineProperty(exports, "TestParser", { enumerable: true, get: function () { return TestParser_1.TestParser; } });
15
+ Object.defineProperty(exports, "TestParseError", { enumerable: true, get: function () { return TestParser_1.TestParseError; } });
16
+ var TestDiscovery_1 = require("./TestDiscovery");
17
+ Object.defineProperty(exports, "TestDiscovery", { enumerable: true, get: function () { return TestDiscovery_1.TestDiscovery; } });
18
+ var EvaluatorEngine_1 = require("./EvaluatorEngine");
19
+ Object.defineProperty(exports, "EvaluatorEngine", { enumerable: true, get: function () { return EvaluatorEngine_1.EvaluatorEngine; } });
20
+ // Evaluators
21
+ var NlpEvaluator_1 = require("./evaluators/NlpEvaluator");
22
+ Object.defineProperty(exports, "NlpEvaluator", { enumerable: true, get: function () { return NlpEvaluator_1.NlpEvaluator; } });
23
+ var ScriptEvaluator_1 = require("./evaluators/ScriptEvaluator");
24
+ Object.defineProperty(exports, "ScriptEvaluator", { enumerable: true, get: function () { return ScriptEvaluator_1.ScriptEvaluator; } });
25
+ var PrmdEvaluator_1 = require("./evaluators/PrmdEvaluator");
26
+ Object.defineProperty(exports, "PrmdEvaluator", { enumerable: true, get: function () { return PrmdEvaluator_1.PrmdEvaluator; } });
27
+ // Reporters
28
+ var ConsoleReporter_1 = require("./reporters/ConsoleReporter");
29
+ Object.defineProperty(exports, "ConsoleReporter", { enumerable: true, get: function () { return ConsoleReporter_1.ConsoleReporter; } });
30
+ var JsonReporter_1 = require("./reporters/JsonReporter");
31
+ Object.defineProperty(exports, "JsonReporter", { enumerable: true, get: function () { return JsonReporter_1.JsonReporter; } });
32
+ var JunitReporter_1 = require("./reporters/JunitReporter");
33
+ Object.defineProperty(exports, "JunitReporter", { enumerable: true, get: function () { return JunitReporter_1.JunitReporter; } });
@@ -0,0 +1,17 @@
1
+ /**
2
+ * Console Reporter - terminal output with pass/fail formatting.
3
+ *
4
+ * Does NOT use emojis (breaks things per project rules).
5
+ * Uses simple text markers: [PASS], [FAIL], [ERROR], [SKIP].
6
+ */
7
+ import type { Reporter } from './types';
8
+ import type { TestRunResult } from '../types';
9
+ export declare class ConsoleReporter implements Reporter {
10
+ private verbose;
11
+ constructor(verbose?: boolean);
12
+ report(result: TestRunResult): string;
13
+ private appendAssertionDetail;
14
+ private statusMarker;
15
+ private formatDuration;
16
+ }
17
+ //# sourceMappingURL=ConsoleReporter.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ConsoleReporter.d.ts","sourceRoot":"","sources":["../../src/reporters/ConsoleReporter.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,SAAS,CAAC;AACxC,OAAO,KAAK,EAAE,aAAa,EAA+B,MAAM,UAAU,CAAC;AAE3E,qBAAa,eAAgB,YAAW,QAAQ;IAC9C,OAAO,CAAC,OAAO,CAAU;gBAEb,OAAO,UAAQ;IAI3B,MAAM,CAAC,MAAM,EAAE,aAAa,GAAG,MAAM;IAwDrC,OAAO,CAAC,qBAAqB;IAW7B,OAAO,CAAC,YAAY;IAUpB,OAAO,CAAC,cAAc;CAKvB"}
@@ -0,0 +1,85 @@
1
+ "use strict";
2
+ /**
3
+ * Console Reporter - terminal output with pass/fail formatting.
4
+ *
5
+ * Does NOT use emojis (breaks things per project rules).
6
+ * Uses simple text markers: [PASS], [FAIL], [ERROR], [SKIP].
7
+ */
8
+ Object.defineProperty(exports, "__esModule", { value: true });
9
+ exports.ConsoleReporter = void 0;
10
+ class ConsoleReporter {
11
+ constructor(verbose = false) {
12
+ this.verbose = verbose;
13
+ }
14
+ report(result) {
15
+ const lines = [];
16
+ lines.push('');
17
+ lines.push('=== Prompd Test Results ===');
18
+ lines.push('');
19
+ for (const suite of result.suites) {
20
+ lines.push(` ${suite.suite}`);
21
+ for (const test of suite.results) {
22
+ const marker = this.statusMarker(test.status);
23
+ const duration = this.formatDuration(test.duration);
24
+ const meta = test.execution
25
+ ? ` [${test.execution.provider}/${test.execution.model}${test.execution.usage?.totalTokens ? ` ${test.execution.usage.totalTokens}tok` : ''}]`
26
+ : '';
27
+ lines.push(` ${marker} ${test.testName} (${duration})${meta}`);
28
+ if (test.status === 'error' && test.error) {
29
+ lines.push(` Error: ${test.error}`);
30
+ }
31
+ if (this.verbose || test.status === 'fail' || test.status === 'error') {
32
+ for (const assertion of test.assertions) {
33
+ this.appendAssertionDetail(lines, assertion);
34
+ }
35
+ }
36
+ }
37
+ lines.push('');
38
+ }
39
+ // Summary
40
+ const s = result.summary;
41
+ lines.push('---');
42
+ lines.push(`Tests: ${s.passed} passed, ${s.failed} failed, ${s.errors} errors, ${s.skipped} skipped, ${s.total} total`);
43
+ lines.push(`Time: ${this.formatDuration(s.duration)}`);
44
+ if (s.totalTokens) {
45
+ lines.push(`Tokens: ${s.totalTokens.toLocaleString()}`);
46
+ }
47
+ if (s.models && s.models.length > 0) {
48
+ lines.push(`Models: ${s.models.join(', ')}`);
49
+ }
50
+ if (s.failed > 0 || s.errors > 0) {
51
+ lines.push('Result: FAIL');
52
+ }
53
+ else {
54
+ lines.push('Result: PASS');
55
+ }
56
+ lines.push('');
57
+ return lines.join('\n');
58
+ }
59
+ appendAssertionDetail(lines, assertion) {
60
+ const marker = this.statusMarker(assertion.status);
61
+ const check = assertion.check ? ` (${assertion.check})` : '';
62
+ const duration = this.formatDuration(assertion.duration);
63
+ lines.push(` ${marker} ${assertion.evaluator}${check} [${duration}]`);
64
+ if (assertion.reason && (assertion.status !== 'pass' || this.verbose)) {
65
+ lines.push(` ${assertion.reason}`);
66
+ }
67
+ }
68
+ statusMarker(status) {
69
+ switch (status) {
70
+ case 'pass': return '[PASS]';
71
+ case 'fail': return '[FAIL]';
72
+ case 'error': return '[ERR ]';
73
+ case 'skip': return '[SKIP]';
74
+ default: return '[????]';
75
+ }
76
+ }
77
+ formatDuration(ms) {
78
+ if (ms < 1000)
79
+ return `${ms}ms`;
80
+ if (ms < 60000)
81
+ return `${(ms / 1000).toFixed(1)}s`;
82
+ return `${(ms / 60000).toFixed(1)}m`;
83
+ }
84
+ }
85
+ exports.ConsoleReporter = ConsoleReporter;
@@ -0,0 +1,11 @@
1
+ /**
2
+ * JSON Reporter - structured output for programmatic consumption and CI.
3
+ */
4
+ import type { Reporter } from './types';
5
+ import type { TestRunResult } from '../types';
6
+ export declare class JsonReporter implements Reporter {
7
+ private pretty;
8
+ constructor(pretty?: boolean);
9
+ report(result: TestRunResult): string;
10
+ }
11
+ //# sourceMappingURL=JsonReporter.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"JsonReporter.d.ts","sourceRoot":"","sources":["../../src/reporters/JsonReporter.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,SAAS,CAAC;AACxC,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AAE9C,qBAAa,YAAa,YAAW,QAAQ;IAC3C,OAAO,CAAC,MAAM,CAAU;gBAEZ,MAAM,UAAO;IAIzB,MAAM,CAAC,MAAM,EAAE,aAAa,GAAG,MAAM;CAMtC"}
@@ -0,0 +1,18 @@
1
+ "use strict";
2
+ /**
3
+ * JSON Reporter - structured output for programmatic consumption and CI.
4
+ */
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.JsonReporter = void 0;
7
+ class JsonReporter {
8
+ constructor(pretty = true) {
9
+ this.pretty = pretty;
10
+ }
11
+ report(result) {
12
+ if (this.pretty) {
13
+ return JSON.stringify(result, null, 2);
14
+ }
15
+ return JSON.stringify(result);
16
+ }
17
+ }
18
+ exports.JsonReporter = JsonReporter;
@@ -0,0 +1,15 @@
1
+ /**
2
+ * JUnit XML Reporter - generates JUnit-compatible XML for CI systems.
3
+ *
4
+ * Output format follows the JUnit XML schema used by Jenkins, GitHub Actions,
5
+ * Azure DevOps, and most CI platforms.
6
+ */
7
+ import type { Reporter } from './types';
8
+ import type { TestRunResult } from '../types';
9
+ export declare class JunitReporter implements Reporter {
10
+ report(result: TestRunResult): string;
11
+ private appendTestCase;
12
+ private buildFailureDetail;
13
+ private escapeXml;
14
+ }
15
+ //# sourceMappingURL=JunitReporter.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"JunitReporter.d.ts","sourceRoot":"","sources":["../../src/reporters/JunitReporter.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,SAAS,CAAC;AACxC,OAAO,KAAK,EAAE,aAAa,EAAc,MAAM,UAAU,CAAC;AAE1D,qBAAa,aAAc,YAAW,QAAQ;IAC5C,MAAM,CAAC,MAAM,EAAE,aAAa,GAAG,MAAM;IAwCrC,OAAO,CAAC,cAAc;IAyCtB,OAAO,CAAC,kBAAkB;IAY1B,OAAO,CAAC,SAAS;CAQlB"}
@@ -0,0 +1,89 @@
1
+ "use strict";
2
+ /**
3
+ * JUnit XML Reporter - generates JUnit-compatible XML for CI systems.
4
+ *
5
+ * Output format follows the JUnit XML schema used by Jenkins, GitHub Actions,
6
+ * Azure DevOps, and most CI platforms.
7
+ */
8
+ Object.defineProperty(exports, "__esModule", { value: true });
9
+ exports.JunitReporter = void 0;
10
+ class JunitReporter {
11
+ report(result) {
12
+ const lines = [];
13
+ lines.push('<?xml version="1.0" encoding="UTF-8"?>');
14
+ lines.push(`<testsuites tests="${result.summary.total}" ` +
15
+ `failures="${result.summary.failed}" ` +
16
+ `errors="${result.summary.errors}" ` +
17
+ `skipped="${result.summary.skipped}" ` +
18
+ `time="${(result.summary.duration / 1000).toFixed(3)}">`);
19
+ for (const suite of result.suites) {
20
+ const suiteTests = suite.results.length;
21
+ const suiteFailures = suite.results.filter(r => r.status === 'fail').length;
22
+ const suiteErrors = suite.results.filter(r => r.status === 'error').length;
23
+ const suiteSkipped = suite.results.filter(r => r.status === 'skip').length;
24
+ const suiteDuration = suite.results.reduce((sum, r) => sum + r.duration, 0);
25
+ lines.push(` <testsuite name="${this.escapeXml(suite.suite)}" ` +
26
+ `tests="${suiteTests}" ` +
27
+ `failures="${suiteFailures}" ` +
28
+ `errors="${suiteErrors}" ` +
29
+ `skipped="${suiteSkipped}" ` +
30
+ `time="${(suiteDuration / 1000).toFixed(3)}" ` +
31
+ `file="${this.escapeXml(suite.testFilePath)}">`);
32
+ for (const test of suite.results) {
33
+ this.appendTestCase(lines, suite.suite, test);
34
+ }
35
+ lines.push(' </testsuite>');
36
+ }
37
+ lines.push('</testsuites>');
38
+ return lines.join('\n');
39
+ }
40
+ appendTestCase(lines, suiteName, test) {
41
+ const time = (test.duration / 1000).toFixed(3);
42
+ lines.push(` <testcase name="${this.escapeXml(test.testName)}" ` +
43
+ `classname="${this.escapeXml(suiteName)}" ` +
44
+ `time="${time}">`);
45
+ if (test.status === 'fail') {
46
+ const failedAssertions = test.assertions.filter(a => a.status === 'fail');
47
+ const message = failedAssertions
48
+ .map(a => `${a.evaluator}${a.check ? `(${a.check})` : ''}: ${a.reason || 'failed'}`)
49
+ .join('; ');
50
+ lines.push(` <failure message="${this.escapeXml(message)}">`);
51
+ lines.push(this.escapeXml(this.buildFailureDetail(test)));
52
+ lines.push(' </failure>');
53
+ }
54
+ if (test.status === 'error') {
55
+ const errorMessage = test.error || 'Unknown error';
56
+ lines.push(` <error message="${this.escapeXml(errorMessage)}">`);
57
+ lines.push(this.escapeXml(errorMessage));
58
+ lines.push(' </error>');
59
+ }
60
+ if (test.status === 'skip') {
61
+ lines.push(' <skipped/>');
62
+ }
63
+ // Include output as system-out if available
64
+ if (test.output) {
65
+ lines.push(' <system-out>');
66
+ lines.push(this.escapeXml(test.output.substring(0, 10000)));
67
+ lines.push(' </system-out>');
68
+ }
69
+ lines.push(' </testcase>');
70
+ }
71
+ buildFailureDetail(test) {
72
+ const details = [];
73
+ for (const assertion of test.assertions) {
74
+ const prefix = assertion.status === 'pass' ? '[PASS]' : '[FAIL]';
75
+ const check = assertion.check ? ` (${assertion.check})` : '';
76
+ details.push(`${prefix} ${assertion.evaluator}${check}: ${assertion.reason || ''}`);
77
+ }
78
+ return details.join('\n');
79
+ }
80
+ escapeXml(str) {
81
+ return str
82
+ .replace(/&/g, '&amp;')
83
+ .replace(/</g, '&lt;')
84
+ .replace(/>/g, '&gt;')
85
+ .replace(/"/g, '&quot;')
86
+ .replace(/'/g, '&apos;');
87
+ }
88
+ }
89
+ exports.JunitReporter = JunitReporter;
@@ -0,0 +1,8 @@
1
+ /**
2
+ * Reporter interface for @prompd/test
3
+ */
4
+ import type { TestRunResult } from '../types';
5
+ export interface Reporter {
6
+ report(result: TestRunResult): string;
7
+ }
8
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/reporters/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AAE9C,MAAM,WAAW,QAAQ;IACvB,MAAM,CAAC,MAAM,EAAE,aAAa,GAAG,MAAM,CAAC;CACvC"}
@@ -0,0 +1,5 @@
1
+ "use strict";
2
+ /**
3
+ * Reporter interface for @prompd/test
4
+ */
5
+ Object.defineProperty(exports, "__esModule", { value: true });
@@ -0,0 +1,119 @@
1
+ /**
2
+ * Core type definitions for @prompd/test
3
+ */
4
+ export type EvaluatorType = 'nlp' | 'script' | 'prmd';
5
+ export type NlpCheck = 'contains' | 'not_contains' | 'matches' | 'max_tokens' | 'min_tokens' | 'max_words' | 'min_words' | 'starts_with' | 'ends_with';
6
+ /** What the evaluator checks: the compiled prompt, the LLM response, or both */
7
+ export type EvaluateTarget = 'prompt' | 'response' | 'both';
8
+ export interface AssertionDef {
9
+ evaluator: EvaluatorType;
10
+ /** What to evaluate: 'prompt' (compiled input), 'response' (LLM output), or 'both'. Defaults to 'response'. */
11
+ evaluate?: EvaluateTarget;
12
+ check?: NlpCheck;
13
+ value?: string | string[] | number;
14
+ run?: string;
15
+ prompt?: string;
16
+ provider?: string;
17
+ model?: string;
18
+ }
19
+ export interface TestCase {
20
+ name: string;
21
+ params: Record<string, unknown>;
22
+ assert: AssertionDef[];
23
+ expect_error?: boolean;
24
+ }
25
+ export interface TestSuite {
26
+ name: string;
27
+ description?: string;
28
+ target: string;
29
+ testFilePath: string;
30
+ tests: TestCase[];
31
+ evaluatorPrompt?: string;
32
+ }
33
+ export type TestStatus = 'pass' | 'fail' | 'error' | 'skip';
34
+ export type AssertionStatus = 'pass' | 'fail' | 'error' | 'skip';
35
+ export interface AssertionResult {
36
+ evaluator: EvaluatorType;
37
+ check?: string;
38
+ status: AssertionStatus;
39
+ reason?: string;
40
+ duration: number;
41
+ }
42
+ export interface TestExecutionMetadata {
43
+ provider: string;
44
+ model: string;
45
+ duration: number;
46
+ usage?: {
47
+ promptTokens?: number;
48
+ completionTokens?: number;
49
+ totalTokens?: number;
50
+ };
51
+ }
52
+ export interface TestResult {
53
+ suite: string;
54
+ testName: string;
55
+ status: TestStatus;
56
+ duration: number;
57
+ assertions: AssertionResult[];
58
+ output?: string;
59
+ compiledInput?: string;
60
+ error?: string;
61
+ execution?: TestExecutionMetadata;
62
+ }
63
+ export interface TestRunSummary {
64
+ total: number;
65
+ passed: number;
66
+ failed: number;
67
+ errors: number;
68
+ skipped: number;
69
+ duration: number;
70
+ totalTokens?: number;
71
+ providers?: string[];
72
+ models?: string[];
73
+ }
74
+ export interface TestRunResult {
75
+ suites: TestSuiteResult[];
76
+ summary: TestRunSummary;
77
+ }
78
+ export interface TestSuiteResult {
79
+ suite: string;
80
+ testFilePath: string;
81
+ results: TestResult[];
82
+ }
83
+ export interface TestRunOptions {
84
+ evaluators?: EvaluatorType[];
85
+ noLlm?: boolean;
86
+ reporter?: 'console' | 'json' | 'junit';
87
+ failFast?: boolean;
88
+ runAll?: boolean;
89
+ verbose?: boolean;
90
+ workspaceRoot?: string;
91
+ registryUrl?: string;
92
+ provider?: string;
93
+ model?: string;
94
+ }
95
+ export type TestProgressEvent = {
96
+ type: 'suite_start';
97
+ suite: string;
98
+ testCount: number;
99
+ } | {
100
+ type: 'test_start';
101
+ suite: string;
102
+ testName: string;
103
+ } | {
104
+ type: 'test_complete';
105
+ suite: string;
106
+ testName: string;
107
+ result: TestResult;
108
+ } | {
109
+ type: 'suite_complete';
110
+ suite: string;
111
+ results: TestResult[];
112
+ } | {
113
+ type: 'assertion_complete';
114
+ suite: string;
115
+ testName: string;
116
+ assertion: AssertionResult;
117
+ };
118
+ export type TestProgressCallback = (event: TestProgressEvent) => void;
119
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH,MAAM,MAAM,aAAa,GAAG,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;AAEtD,MAAM,MAAM,QAAQ,GAChB,UAAU,GACV,cAAc,GACd,SAAS,GACT,YAAY,GACZ,YAAY,GACZ,WAAW,GACX,WAAW,GACX,aAAa,GACb,WAAW,CAAC;AAIhB,gFAAgF;AAChF,MAAM,MAAM,cAAc,GAAG,QAAQ,GAAG,UAAU,GAAG,MAAM,CAAC;AAE5D,MAAM,WAAW,YAAY;IAC3B,SAAS,EAAE,aAAa,CAAC;IACzB,+GAA+G;IAC/G,QAAQ,CAAC,EAAE,cAAc,CAAC;IAE1B,KAAK,CAAC,EAAE,QAAQ,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,GAAG,MAAM,EAAE,GAAG,MAAM,CAAC;IAEnC,GAAG,CAAC,EAAE,MAAM,CAAC;IAEb,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAChC,MAAM,EAAE,YAAY,EAAE,CAAC;IACvB,YAAY,CAAC,EAAE,OAAO,CAAC;CACxB;AAED,MAAM,WAAW,SAAS;IACxB,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,MAAM,EAAE,MAAM,CAAC;IACf,YAAY,EAAE,MAAM,CAAC;IACrB,KAAK,EAAE,QAAQ,EAAE,CAAC;IAClB,eAAe,CAAC,EAAE,MAAM,CAAC;CAC1B;AAID,MAAM,MAAM,UAAU,GAAG,MAAM,GAAG,MAAM,GAAG,OAAO,GAAG,MAAM,CAAC;AAC5D,MAAM,MAAM,eAAe,GAAG,MAAM,GAAG,MAAM,GAAG,OAAO,GAAG,MAAM,CAAC;AAEjE,MAAM,WAAW,eAAe;IAC9B,SAAS,EAAE,aAAa,CAAC;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,eAAe,CAAC;IACxB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,qBAAqB;IACpC,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE;QACN,YAAY,CAAC,EAAE,MAAM,CAAC;QACtB,gBAAgB,CAAC,EAAE,MAAM,CAAC;QAC1B,WAAW,CAAC,EAAE,MAAM,CAAC;KACtB,CAAC;CACH;AAED,MAAM,WAAW,UAAU;IACzB,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,UAAU,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,eAAe,EAAE,CAAC;IAC9B,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,SAAS,CAAC,EAAE,qBAAqB,CAAC;CACnC;AAED,MAAM,WAAW,cAAc;IAC7B,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IACrB,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC;CACnB;AAED,MAAM,WAAW,aAAa;IAC5B,MAAM,EAAE,eAAe,EAAE,CAAC;IAC1B,OAAO,EAAE,cAAc,CAAC;CACzB;AAED,MAAM,WAAW,eAAe;IAC9B,KAAK,EAAE,MAAM,CAAC;IACd,YAAY,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE,UAAU,EAAE,CAAC;CACvB;AAID,MAAM,WAAW,cAAc;IAC7B,UAAU,CAAC,EAAE,aAAa,EAAE,CAAC;IAC7B,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB,QAAQ,CAAC,EAAE,SAAS,GAAG,MAAM,GAAG,OAAO,CAAC;IACxC,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,WAAW,CAAC,EAAE,MAAM,CAAC;IAErB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAID,MAAM,MAAM,iBAAiB,GACzB;IAAE,IAAI,EAAE,aAAa,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,SAAS,EAAE,MAAM,CAAA;CAAE,GACzD;IAAE,IAAI,EAAE,YAAY,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,QAAQ,EAAE,MAAM,CAAA;CAAE,GACvD;IAAE,IAAI,EAAE,eAAe,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,QAAQ,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,UAAU,CAAA;CAAE,GAC9E;IAAE,IAAI,EAAE,gBAAgB,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,UAAU,EAAE,CAAA;CAAE,GAChE;IAAE,IAAI,EAAE,oBAAoB,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,QAAQ,EAAE,MAAM,CAAC;IAAC,SAAS,EAAE,eAAe,CAAA;CAAE,CAAC;AAEhG,MAAM,MAAM,oBAAoB,GAAG,CAAC,KAAK,EAAE,iBAAiB,KAAK,IAAI,CAAC"}
package/dist/types.js ADDED
@@ -0,0 +1,5 @@
1
+ "use strict";
2
+ /**
3
+ * Core type definitions for @prompd/test
4
+ */
5
+ Object.defineProperty(exports, "__esModule", { value: true });
package/package.json ADDED
@@ -0,0 +1,34 @@
1
+ {
2
+ "name": "@prompd/test",
3
+ "version": "0.5.0-beta.10",
4
+ "description": "Prompt testing and evaluation framework for Prompd",
5
+ "main": "dist/index.js",
6
+ "types": "dist/index.d.ts",
7
+ "scripts": {
8
+ "build": "tsc",
9
+ "dev": "tsc --watch",
10
+ "typecheck": "tsc --noEmit",
11
+ "clean": "rm -rf dist"
12
+ },
13
+ "keywords": [
14
+ "test",
15
+ "eval",
16
+ "evaluator",
17
+ "prompt",
18
+ "prompd"
19
+ ],
20
+ "author": "Prompd Team",
21
+ "license": "Elastic-2.0",
22
+ "dependencies": {
23
+ "glob": "^10.3.10",
24
+ "yaml": "^2.7.1"
25
+ },
26
+ "peerDependencies": {
27
+ "@prompd/cli": "0.5.0-beta.10"
28
+ },
29
+ "devDependencies": {
30
+ "@prompd/cli": "0.5.0-beta.10",
31
+ "@types/node": "^18.19.17",
32
+ "typescript": "^5.7.3"
33
+ }
34
+ }