@prompd/test 0.5.0-beta.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/dist/EvaluatorEngine.d.ts +32 -0
  2. package/dist/EvaluatorEngine.d.ts.map +1 -0
  3. package/dist/EvaluatorEngine.js +97 -0
  4. package/dist/TestDiscovery.d.ts +28 -0
  5. package/dist/TestDiscovery.d.ts.map +1 -0
  6. package/dist/TestDiscovery.js +137 -0
  7. package/dist/TestParser.d.ts +25 -0
  8. package/dist/TestParser.d.ts.map +1 -0
  9. package/dist/TestParser.js +187 -0
  10. package/dist/TestRunner.d.ts +57 -0
  11. package/dist/TestRunner.d.ts.map +1 -0
  12. package/dist/TestRunner.js +463 -0
  13. package/dist/cli-types.d.ts +62 -0
  14. package/dist/cli-types.d.ts.map +1 -0
  15. package/dist/cli-types.js +6 -0
  16. package/dist/evaluators/NlpEvaluator.d.ts +26 -0
  17. package/dist/evaluators/NlpEvaluator.d.ts.map +1 -0
  18. package/dist/evaluators/NlpEvaluator.js +145 -0
  19. package/dist/evaluators/PrmdEvaluator.d.ts +42 -0
  20. package/dist/evaluators/PrmdEvaluator.d.ts.map +1 -0
  21. package/dist/evaluators/PrmdEvaluator.js +265 -0
  22. package/dist/evaluators/ScriptEvaluator.d.ts +19 -0
  23. package/dist/evaluators/ScriptEvaluator.d.ts.map +1 -0
  24. package/dist/evaluators/ScriptEvaluator.js +161 -0
  25. package/dist/evaluators/types.d.ts +19 -0
  26. package/dist/evaluators/types.d.ts.map +1 -0
  27. package/dist/evaluators/types.js +5 -0
  28. package/dist/index.d.ts +25 -0
  29. package/dist/index.d.ts.map +1 -0
  30. package/dist/index.js +33 -0
  31. package/dist/reporters/ConsoleReporter.d.ts +17 -0
  32. package/dist/reporters/ConsoleReporter.d.ts.map +1 -0
  33. package/dist/reporters/ConsoleReporter.js +85 -0
  34. package/dist/reporters/JsonReporter.d.ts +11 -0
  35. package/dist/reporters/JsonReporter.d.ts.map +1 -0
  36. package/dist/reporters/JsonReporter.js +18 -0
  37. package/dist/reporters/JunitReporter.d.ts +15 -0
  38. package/dist/reporters/JunitReporter.d.ts.map +1 -0
  39. package/dist/reporters/JunitReporter.js +89 -0
  40. package/dist/reporters/types.d.ts +8 -0
  41. package/dist/reporters/types.d.ts.map +1 -0
  42. package/dist/reporters/types.js +5 -0
  43. package/dist/types.d.ts +115 -0
  44. package/dist/types.d.ts.map +1 -0
  45. package/dist/types.js +5 -0
  46. package/package.json +34 -0
  47. package/src/EvaluatorEngine.ts +130 -0
  48. package/src/TestDiscovery.ts +133 -0
  49. package/src/TestParser.ts +235 -0
  50. package/src/TestRunner.ts +516 -0
  51. package/src/cli-types.ts +92 -0
  52. package/src/evaluators/NlpEvaluator.ts +184 -0
  53. package/src/evaluators/PrmdEvaluator.ts +284 -0
  54. package/src/evaluators/ScriptEvaluator.ts +149 -0
  55. package/src/evaluators/types.ts +24 -0
  56. package/src/index.ts +76 -0
  57. package/src/reporters/ConsoleReporter.ts +100 -0
  58. package/src/reporters/JsonReporter.ts +21 -0
  59. package/src/reporters/JunitReporter.ts +113 -0
  60. package/src/reporters/types.ts +9 -0
  61. package/src/types.ts +133 -0
  62. package/tsconfig.json +20 -0
@@ -0,0 +1,145 @@
1
+ "use strict";
2
+ /**
3
+ * NLP Evaluator - local, fast, free, deterministic assertions.
4
+ *
5
+ * Checks: contains, not_contains, matches, max_tokens, min_tokens, starts_with, ends_with
6
+ */
7
+ Object.defineProperty(exports, "__esModule", { value: true });
8
+ exports.NlpEvaluator = void 0;
9
+ class NlpEvaluator {
10
+ constructor() {
11
+ this.type = 'nlp';
12
+ }
13
+ async evaluate(assertion, context) {
14
+ const start = Date.now();
15
+ const check = assertion.check;
16
+ try {
17
+ const result = this.runCheck(check, assertion.value, context.response);
18
+ return {
19
+ evaluator: 'nlp',
20
+ check,
21
+ status: result.pass ? 'pass' : 'fail',
22
+ reason: result.reason,
23
+ duration: Date.now() - start,
24
+ };
25
+ }
26
+ catch (err) {
27
+ return {
28
+ evaluator: 'nlp',
29
+ check,
30
+ status: 'error',
31
+ reason: err instanceof Error ? err.message : String(err),
32
+ duration: Date.now() - start,
33
+ };
34
+ }
35
+ }
36
+ runCheck(check, value, output) {
37
+ switch (check) {
38
+ case 'contains':
39
+ return this.checkContains(value, output);
40
+ case 'not_contains':
41
+ return this.checkNotContains(value, output);
42
+ case 'matches':
43
+ return this.checkMatches(value, output);
44
+ case 'max_tokens':
45
+ return this.checkMaxTokens(value, output);
46
+ case 'min_tokens':
47
+ return this.checkMinTokens(value, output);
48
+ case 'starts_with':
49
+ return this.checkStartsWith(value, output);
50
+ case 'ends_with':
51
+ return this.checkEndsWith(value, output);
52
+ default:
53
+ return { pass: false, reason: `Unknown NLP check: ${check}` };
54
+ }
55
+ }
56
+ checkContains(value, output) {
57
+ const values = this.toStringArray(value);
58
+ const lower = output.toLowerCase();
59
+ const missing = values.filter(v => !lower.includes(v.toLowerCase()));
60
+ if (missing.length === 0) {
61
+ return { pass: true, reason: `Output contains all expected values` };
62
+ }
63
+ return {
64
+ pass: false,
65
+ reason: `Output missing: ${missing.map(v => `"${v}"`).join(', ')}`,
66
+ };
67
+ }
68
+ checkNotContains(value, output) {
69
+ const values = this.toStringArray(value);
70
+ const lower = output.toLowerCase();
71
+ const found = values.filter(v => lower.includes(v.toLowerCase()));
72
+ if (found.length === 0) {
73
+ return { pass: true, reason: `Output does not contain any excluded values` };
74
+ }
75
+ return {
76
+ pass: false,
77
+ reason: `Output contains excluded values: ${found.map(v => `"${v}"`).join(', ')}`,
78
+ };
79
+ }
80
+ checkMatches(value, output) {
81
+ if (typeof value !== 'string') {
82
+ return { pass: false, reason: '"matches" check requires a string regex pattern' };
83
+ }
84
+ const regex = new RegExp(value);
85
+ if (regex.test(output)) {
86
+ return { pass: true, reason: `Output matches pattern /${value}/` };
87
+ }
88
+ return { pass: false, reason: `Output does not match pattern /${value}/` };
89
+ }
90
+ checkMaxTokens(value, output) {
91
+ if (typeof value !== 'number') {
92
+ return { pass: false, reason: '"max_tokens" check requires a numeric value' };
93
+ }
94
+ const tokenCount = this.estimateTokens(output);
95
+ if (tokenCount <= value) {
96
+ return { pass: true, reason: `Token count ${tokenCount} <= ${value}` };
97
+ }
98
+ return { pass: false, reason: `Token count ${tokenCount} exceeds max ${value}` };
99
+ }
100
+ checkMinTokens(value, output) {
101
+ if (typeof value !== 'number') {
102
+ return { pass: false, reason: '"min_tokens" check requires a numeric value' };
103
+ }
104
+ const tokenCount = this.estimateTokens(output);
105
+ if (tokenCount >= value) {
106
+ return { pass: true, reason: `Token count ${tokenCount} >= ${value}` };
107
+ }
108
+ return { pass: false, reason: `Token count ${tokenCount} below min ${value}` };
109
+ }
110
+ checkStartsWith(value, output) {
111
+ if (typeof value !== 'string') {
112
+ return { pass: false, reason: '"starts_with" check requires a string value' };
113
+ }
114
+ const trimmed = output.trimStart();
115
+ if (trimmed.toLowerCase().startsWith(value.toLowerCase())) {
116
+ return { pass: true, reason: `Output starts with "${value}"` };
117
+ }
118
+ return { pass: false, reason: `Output does not start with "${value}"` };
119
+ }
120
+ checkEndsWith(value, output) {
121
+ if (typeof value !== 'string') {
122
+ return { pass: false, reason: '"ends_with" check requires a string value' };
123
+ }
124
+ const trimmed = output.trimEnd();
125
+ if (trimmed.toLowerCase().endsWith(value.toLowerCase())) {
126
+ return { pass: true, reason: `Output ends with "${value}"` };
127
+ }
128
+ return { pass: false, reason: `Output does not end with "${value}"` };
129
+ }
130
+ /**
131
+ * Rough token estimation: ~4 characters per token (GPT-family average).
132
+ * This is intentionally approximate — for precise counting, use a tokenizer.
133
+ */
134
+ estimateTokens(text) {
135
+ return Math.ceil(text.length / 4);
136
+ }
137
+ toStringArray(value) {
138
+ if (value === undefined || value === null)
139
+ return [];
140
+ if (Array.isArray(value))
141
+ return value.map(String);
142
+ return [String(value)];
143
+ }
144
+ }
145
+ exports.NlpEvaluator = NlpEvaluator;
@@ -0,0 +1,42 @@
1
+ /**
2
+ * Prmd Evaluator - LLM-based evaluation via @prompd/cli.
3
+ *
4
+ * Modes:
5
+ * - prompt: "@scope/pkg@version" -> uses a registry package as the evaluator
6
+ * - prompt: "./path" -> uses a local .prmd file as the evaluator
7
+ * - (no prompt field) -> uses the content block of the .test.prmd
8
+ *
9
+ * The evaluator prompt receives {{input}}, {{output}}, and {{params}} variables.
10
+ * Response must start with PASS or FAIL.
11
+ */
12
+ import type { Evaluator, EvaluatorContext } from './types';
13
+ import type { AssertionDef, AssertionResult } from '../types';
14
+ import type { CompilerModule } from '../cli-types';
15
+ export interface PrmdEvaluatorOptions {
16
+ testFileDir: string;
17
+ evaluatorPrompt?: string;
18
+ workspaceRoot?: string;
19
+ registryUrl?: string;
20
+ cliModule?: CompilerModule;
21
+ provider?: string;
22
+ model?: string;
23
+ }
24
+ export declare class PrmdEvaluator implements Evaluator {
25
+ readonly type = "prmd";
26
+ private options;
27
+ private cliModule;
28
+ constructor(options: PrmdEvaluatorOptions);
29
+ evaluate(assertion: AssertionDef, context: EvaluatorContext): Promise<AssertionResult>;
30
+ private resolveEvaluatorContent;
31
+ private resolvePromptTarget;
32
+ /**
33
+ * Wrap a registry reference as a minimal .prmd that inherits from the evaluator package.
34
+ * The compiler handles resolution, download, and caching.
35
+ */
36
+ private wrapAsInherits;
37
+ private compileEvaluator;
38
+ private parseEvaluatorResponse;
39
+ private getDefaultModel;
40
+ private getCli;
41
+ }
42
+ //# sourceMappingURL=PrmdEvaluator.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"PrmdEvaluator.d.ts","sourceRoot":"","sources":["../../src/evaluators/PrmdEvaluator.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAIH,OAAO,KAAK,EAAE,SAAS,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAC;AAC3D,OAAO,KAAK,EAAE,YAAY,EAAE,eAAe,EAAE,MAAM,UAAU,CAAC;AAC9D,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AAInD,MAAM,WAAW,oBAAoB;IACnC,WAAW,EAAE,MAAM,CAAC;IACpB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,SAAS,CAAC,EAAE,cAAc,CAAC;IAC3B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,qBAAa,aAAc,YAAW,SAAS;IAC7C,QAAQ,CAAC,IAAI,UAAU;IACvB,OAAO,CAAC,OAAO,CAAuB;IACtC,OAAO,CAAC,SAAS,CAA+B;gBAEpC,OAAO,EAAE,oBAAoB;IAOnC,QAAQ,CAAC,SAAS,EAAE,YAAY,EAAE,OAAO,EAAE,gBAAgB,GAAG,OAAO,CAAC,eAAe,CAAC;YA+F9E,uBAAuB;YAUvB,mBAAmB;IAejC;;;OAGG;IACH,OAAO,CAAC,cAAc;YAgBR,gBAAgB;IAyD9B,OAAO,CAAC,sBAAsB;IAwB9B,OAAO,CAAC,eAAe;YAYT,MAAM;CAQrB"}
@@ -0,0 +1,265 @@
1
+ "use strict";
2
+ /**
3
+ * Prmd Evaluator - LLM-based evaluation via @prompd/cli.
4
+ *
5
+ * Modes:
6
+ * - prompt: "@scope/pkg@version" -> uses a registry package as the evaluator
7
+ * - prompt: "./path" -> uses a local .prmd file as the evaluator
8
+ * - (no prompt field) -> uses the content block of the .test.prmd
9
+ *
10
+ * The evaluator prompt receives {{input}}, {{output}}, and {{params}} variables.
11
+ * Response must start with PASS or FAIL.
12
+ */
13
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
14
+ if (k2 === undefined) k2 = k;
15
+ var desc = Object.getOwnPropertyDescriptor(m, k);
16
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
17
+ desc = { enumerable: true, get: function() { return m[k]; } };
18
+ }
19
+ Object.defineProperty(o, k2, desc);
20
+ }) : (function(o, m, k, k2) {
21
+ if (k2 === undefined) k2 = k;
22
+ o[k2] = m[k];
23
+ }));
24
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
25
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
26
+ }) : function(o, v) {
27
+ o["default"] = v;
28
+ });
29
+ var __importStar = (this && this.__importStar) || (function () {
30
+ var ownKeys = function(o) {
31
+ ownKeys = Object.getOwnPropertyNames || function (o) {
32
+ var ar = [];
33
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
34
+ return ar;
35
+ };
36
+ return ownKeys(o);
37
+ };
38
+ return function (mod) {
39
+ if (mod && mod.__esModule) return mod;
40
+ var result = {};
41
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
42
+ __setModuleDefault(result, mod);
43
+ return result;
44
+ };
45
+ })();
46
+ Object.defineProperty(exports, "__esModule", { value: true });
47
+ exports.PrmdEvaluator = void 0;
48
+ const path = __importStar(require("path"));
49
+ const fs = __importStar(require("fs"));
50
+ const PASS_FAIL_REGEX = /^(PASS|FAIL)[:\s]*(.*)/i;
51
+ class PrmdEvaluator {
52
+ constructor(options) {
53
+ this.type = 'prmd';
54
+ this.cliModule = null;
55
+ this.options = options;
56
+ if (options.cliModule) {
57
+ this.cliModule = options.cliModule;
58
+ }
59
+ }
60
+ async evaluate(assertion, context) {
61
+ const start = Date.now();
62
+ try {
63
+ const evaluatorContent = await this.resolveEvaluatorContent(assertion);
64
+ console.log(`[PrmdEvaluator] Resolved evaluator content (${evaluatorContent?.length || 0} chars)`);
65
+ if (evaluatorContent) {
66
+ console.log(`[PrmdEvaluator] source: ${assertion.prompt || 'content block'}`);
67
+ console.log(`[PrmdEvaluator] preview: ${evaluatorContent.substring(0, 150)}`);
68
+ }
69
+ if (!evaluatorContent) {
70
+ return {
71
+ evaluator: 'prmd',
72
+ status: 'error',
73
+ reason: 'Could not resolve evaluator prompt content',
74
+ duration: Date.now() - start,
75
+ };
76
+ }
77
+ // Compile the evaluator prompt with context as parameters
78
+ const cli = await this.getCli();
79
+ const compiled = await this.compileEvaluator(cli, evaluatorContent, context);
80
+ console.log(`[PrmdEvaluator] Compiled evaluator (${compiled?.length || 0} chars): ${compiled?.substring(0, 150) || 'null'}`);
81
+ if (!compiled) {
82
+ return {
83
+ evaluator: 'prmd',
84
+ status: 'error',
85
+ reason: 'Evaluator prompt compilation failed',
86
+ duration: Date.now() - start,
87
+ };
88
+ }
89
+ // Execute against LLM using callLLM directly (avoids executeRawText re-compilation)
90
+ const executor = new cli.PrompdExecutor();
91
+ // Resolve provider/model/apiKey — same logic as TestRunner
92
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
93
+ const configManager = cli.ConfigManager?.getInstance
94
+ ? cli.ConfigManager.getInstance()
95
+ : null;
96
+ const config = configManager?.config || {};
97
+ // Priority: assertion-level > run options (UI selector) > config defaults
98
+ const provider = assertion.provider || this.options.provider || config.defaultProvider || 'openai';
99
+ const rawModel = assertion.model || this.options.model || config.default_model || config.defaultModel || '';
100
+ const model = rawModel || this.getDefaultModel(provider);
101
+ const apiKey = configManager?.getApiKey?.(provider, config) || '';
102
+ console.log(`[PrmdEvaluator] Executing: provider=${provider}, model=${model}`);
103
+ if (!apiKey && provider !== 'ollama') {
104
+ return {
105
+ evaluator: 'prmd',
106
+ status: 'error',
107
+ reason: `No API key configured for provider "${provider}"`,
108
+ duration: Date.now() - start,
109
+ };
110
+ }
111
+ const execResult = await executor.callLLM(provider, model, compiled, apiKey);
112
+ if (!execResult.success) {
113
+ return {
114
+ evaluator: 'prmd',
115
+ status: 'error',
116
+ reason: execResult.error || 'Evaluator LLM execution failed',
117
+ duration: Date.now() - start,
118
+ };
119
+ }
120
+ const response = execResult.response || execResult.content || '';
121
+ if (!response) {
122
+ return {
123
+ evaluator: 'prmd',
124
+ status: 'error',
125
+ reason: 'No response from evaluator',
126
+ duration: Date.now() - start,
127
+ };
128
+ }
129
+ // Parse PASS/FAIL from response
130
+ return this.parseEvaluatorResponse(response, Date.now() - start);
131
+ }
132
+ catch (err) {
133
+ return {
134
+ evaluator: 'prmd',
135
+ status: 'error',
136
+ reason: err instanceof Error ? err.message : String(err),
137
+ duration: Date.now() - start,
138
+ };
139
+ }
140
+ }
141
+ async resolveEvaluatorContent(assertion) {
142
+ // If prompt: is specified, resolve it (registry ref, local file)
143
+ if (assertion.prompt) {
144
+ return this.resolvePromptTarget(assertion.prompt);
145
+ }
146
+ // No prompt: field — use the content block of the .test.prmd
147
+ return this.options.evaluatorPrompt || null;
148
+ }
149
+ async resolvePromptTarget(prompt) {
150
+ // Registry reference: @scope/package@version
151
+ if (prompt.startsWith('@')) {
152
+ return this.wrapAsInherits(prompt);
153
+ }
154
+ // Local file path
155
+ const resolved = path.resolve(this.options.testFileDir, prompt);
156
+ if (!fs.existsSync(resolved)) {
157
+ throw new Error(`Evaluator prompt file not found: ${resolved}`);
158
+ }
159
+ return fs.readFileSync(resolved, 'utf-8');
160
+ }
161
+ /**
162
+ * Wrap a registry reference as a minimal .prmd that inherits from the evaluator package.
163
+ * The compiler handles resolution, download, and caching.
164
+ */
165
+ wrapAsInherits(registryRef) {
166
+ return [
167
+ '---',
168
+ `inherits: "${registryRef}"`,
169
+ 'parameters:',
170
+ ' - name: prompt',
171
+ ' type: string',
172
+ ' - name: response',
173
+ ' type: string',
174
+ ' - name: params',
175
+ ' type: string',
176
+ '---',
177
+ '',
178
+ ].join('\n');
179
+ }
180
+ async compileEvaluator(cli, content, context) {
181
+ // If content doesn't start with frontmatter, wrap it with minimal frontmatter
182
+ // so the compiler can process it. Content blocks from .test.prmd are raw markdown.
183
+ let prmdContent = content;
184
+ if (!content.trimStart().startsWith('---')) {
185
+ prmdContent = [
186
+ '---',
187
+ 'id: evaluator',
188
+ 'name: "Test Evaluator"',
189
+ 'version: 0.0.1',
190
+ 'parameters:',
191
+ ' - name: prompt',
192
+ ' type: string',
193
+ ' - name: response',
194
+ ' type: string',
195
+ ' - name: params',
196
+ ' type: object',
197
+ '---',
198
+ '',
199
+ content,
200
+ ].join('\n');
201
+ }
202
+ const memFs = new cli.MemoryFileSystem({ '/evaluator.prmd': prmdContent });
203
+ const compiler = new cli.PrompdCompiler();
204
+ // Inject evaluation context as template variables
205
+ const parameters = {
206
+ prompt: context.prompt,
207
+ response: context.response,
208
+ params: JSON.stringify(context.params, null, 2),
209
+ };
210
+ // Also expose individual params via dot notation
211
+ for (const [key, value] of Object.entries(context.params)) {
212
+ parameters[`params.${key}`] = String(value);
213
+ }
214
+ const result = await compiler.compile('/evaluator.prmd', {
215
+ outputFormat: 'markdown',
216
+ parameters,
217
+ fileSystem: memFs,
218
+ workspaceRoot: this.options.workspaceRoot,
219
+ registryUrl: this.options.registryUrl,
220
+ });
221
+ // CLI compile() may return a string directly or an object
222
+ if (typeof result === 'string') {
223
+ return result || null;
224
+ }
225
+ return result.output || null;
226
+ }
227
+ parseEvaluatorResponse(response, duration) {
228
+ const firstLine = response.trim().split('\n')[0];
229
+ const match = firstLine.match(PASS_FAIL_REGEX);
230
+ if (!match) {
231
+ return {
232
+ evaluator: 'prmd',
233
+ status: 'error',
234
+ reason: `Evaluator response did not start with PASS or FAIL. Got: "${firstLine.substring(0, 100)}"`,
235
+ duration,
236
+ };
237
+ }
238
+ const verdict = match[1].toUpperCase();
239
+ const reason = match[2]?.trim() || undefined;
240
+ return {
241
+ evaluator: 'prmd',
242
+ status: verdict === 'PASS' ? 'pass' : 'fail',
243
+ reason: reason || `Evaluator returned ${verdict}`,
244
+ duration,
245
+ };
246
+ }
247
+ getDefaultModel(provider) {
248
+ const defaults = {
249
+ openai: 'gpt-4o',
250
+ anthropic: 'claude-sonnet-4-20250514',
251
+ groq: 'llama-3.1-70b-versatile',
252
+ google: 'gemini-2.0-flash',
253
+ mistral: 'mistral-large-latest',
254
+ deepseek: 'deepseek-chat',
255
+ };
256
+ return defaults[provider.toLowerCase()] || 'gpt-4o';
257
+ }
258
+ async getCli() {
259
+ if (!this.cliModule) {
260
+ throw new Error('@prompd/cli module not provided. Pass it via PrmdEvaluatorOptions.cliModule');
261
+ }
262
+ return this.cliModule;
263
+ }
264
+ }
265
+ exports.PrmdEvaluator = PrmdEvaluator;
@@ -0,0 +1,19 @@
1
+ /**
2
+ * Script Evaluator - runs external scripts with stdin/stdout contract.
3
+ *
4
+ * Contract:
5
+ * - Receives JSON on stdin: { input, output, params, metadata }
6
+ * - Exit code 0 = PASS, 1 = FAIL, other = ERROR
7
+ * - Stdout = reason (optional)
8
+ */
9
+ import type { Evaluator, EvaluatorContext } from './types';
10
+ import type { AssertionDef, AssertionResult } from '../types';
11
+ export declare class ScriptEvaluator implements Evaluator {
12
+ readonly type = "script";
13
+ private testFileDir;
14
+ constructor(testFileDir: string);
15
+ evaluate(assertion: AssertionDef, context: EvaluatorContext): Promise<AssertionResult>;
16
+ private runScript;
17
+ private getRunner;
18
+ }
19
+ //# sourceMappingURL=ScriptEvaluator.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ScriptEvaluator.d.ts","sourceRoot":"","sources":["../../src/evaluators/ScriptEvaluator.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAKH,OAAO,KAAK,EAAE,SAAS,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAC;AAC3D,OAAO,KAAK,EAAE,YAAY,EAAE,eAAe,EAAE,MAAM,UAAU,CAAC;AAI9D,qBAAa,eAAgB,YAAW,SAAS;IAC/C,QAAQ,CAAC,IAAI,YAAY;IACzB,OAAO,CAAC,WAAW,CAAS;gBAEhB,WAAW,EAAE,MAAM;IAIzB,QAAQ,CAAC,SAAS,EAAE,YAAY,EAAE,OAAO,EAAE,gBAAgB,GAAG,OAAO,CAAC,eAAe,CAAC;IAsD5F,OAAO,CAAC,SAAS;IAiDjB,OAAO,CAAC,SAAS;CAoBlB"}
@@ -0,0 +1,161 @@
1
+ "use strict";
2
+ /**
3
+ * Script Evaluator - runs external scripts with stdin/stdout contract.
4
+ *
5
+ * Contract:
6
+ * - Receives JSON on stdin: { input, output, params, metadata }
7
+ * - Exit code 0 = PASS, 1 = FAIL, other = ERROR
8
+ * - Stdout = reason (optional)
9
+ */
10
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
11
+ if (k2 === undefined) k2 = k;
12
+ var desc = Object.getOwnPropertyDescriptor(m, k);
13
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
14
+ desc = { enumerable: true, get: function() { return m[k]; } };
15
+ }
16
+ Object.defineProperty(o, k2, desc);
17
+ }) : (function(o, m, k, k2) {
18
+ if (k2 === undefined) k2 = k;
19
+ o[k2] = m[k];
20
+ }));
21
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
22
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
23
+ }) : function(o, v) {
24
+ o["default"] = v;
25
+ });
26
+ var __importStar = (this && this.__importStar) || (function () {
27
+ var ownKeys = function(o) {
28
+ ownKeys = Object.getOwnPropertyNames || function (o) {
29
+ var ar = [];
30
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
31
+ return ar;
32
+ };
33
+ return ownKeys(o);
34
+ };
35
+ return function (mod) {
36
+ if (mod && mod.__esModule) return mod;
37
+ var result = {};
38
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
39
+ __setModuleDefault(result, mod);
40
+ return result;
41
+ };
42
+ })();
43
+ Object.defineProperty(exports, "__esModule", { value: true });
44
+ exports.ScriptEvaluator = void 0;
45
+ const child_process_1 = require("child_process");
46
+ const path = __importStar(require("path"));
47
+ const fs = __importStar(require("fs"));
48
+ const SCRIPT_TIMEOUT_MS = 30000;
49
+ class ScriptEvaluator {
50
+ constructor(testFileDir) {
51
+ this.type = 'script';
52
+ this.testFileDir = testFileDir;
53
+ }
54
+ async evaluate(assertion, context) {
55
+ const start = Date.now();
56
+ const scriptPath = assertion.run;
57
+ if (!scriptPath) {
58
+ return {
59
+ evaluator: 'script',
60
+ status: 'error',
61
+ reason: 'No "run" path specified for script evaluator',
62
+ duration: Date.now() - start,
63
+ };
64
+ }
65
+ const resolvedPath = path.resolve(this.testFileDir, scriptPath);
66
+ if (!fs.existsSync(resolvedPath)) {
67
+ return {
68
+ evaluator: 'script',
69
+ status: 'error',
70
+ reason: `Script not found: ${resolvedPath}`,
71
+ duration: Date.now() - start,
72
+ };
73
+ }
74
+ // Validate script stays within the test file's directory tree
75
+ const normalizedScript = path.normalize(resolvedPath);
76
+ const normalizedBase = path.normalize(this.testFileDir);
77
+ if (!normalizedScript.startsWith(normalizedBase)) {
78
+ return {
79
+ evaluator: 'script',
80
+ status: 'error',
81
+ reason: `Script path escapes test directory: ${scriptPath}`,
82
+ duration: Date.now() - start,
83
+ };
84
+ }
85
+ try {
86
+ const result = await this.runScript(resolvedPath, context);
87
+ return {
88
+ evaluator: 'script',
89
+ status: result.exitCode === 0 ? 'pass' : 'fail',
90
+ reason: result.stdout.trim() || (result.exitCode === 0 ? 'Script passed' : 'Script failed'),
91
+ duration: Date.now() - start,
92
+ };
93
+ }
94
+ catch (err) {
95
+ return {
96
+ evaluator: 'script',
97
+ status: 'error',
98
+ reason: err instanceof Error ? err.message : String(err),
99
+ duration: Date.now() - start,
100
+ };
101
+ }
102
+ }
103
+ runScript(scriptPath, context) {
104
+ return new Promise((resolve, reject) => {
105
+ const { command, args } = this.getRunner(scriptPath);
106
+ const child = (0, child_process_1.spawn)(command, args, {
107
+ cwd: this.testFileDir,
108
+ timeout: SCRIPT_TIMEOUT_MS,
109
+ stdio: ['pipe', 'pipe', 'pipe'],
110
+ shell: process.platform === 'win32',
111
+ });
112
+ let stdout = '';
113
+ let stderr = '';
114
+ child.stdout.on('data', (data) => {
115
+ stdout += data.toString();
116
+ });
117
+ child.stderr.on('data', (data) => {
118
+ stderr += data.toString();
119
+ });
120
+ child.on('error', (err) => {
121
+ reject(new Error(`Failed to spawn script: ${err.message}`));
122
+ });
123
+ child.on('close', (code) => {
124
+ if (code === null) {
125
+ reject(new Error('Script process was killed (timeout or signal)'));
126
+ return;
127
+ }
128
+ resolve({ exitCode: code, stdout, stderr });
129
+ });
130
+ // Send context as JSON on stdin
131
+ const payload = JSON.stringify({
132
+ prompt: context.prompt,
133
+ response: context.response,
134
+ params: context.params,
135
+ metadata: context.metadata,
136
+ });
137
+ child.stdin.write(payload);
138
+ child.stdin.end();
139
+ });
140
+ }
141
+ getRunner(scriptPath) {
142
+ const ext = path.extname(scriptPath).toLowerCase();
143
+ switch (ext) {
144
+ case '.ts':
145
+ return { command: 'npx', args: ['tsx', scriptPath] };
146
+ case '.js':
147
+ case '.mjs':
148
+ return { command: 'node', args: [scriptPath] };
149
+ case '.py':
150
+ return { command: 'python', args: [scriptPath] };
151
+ case '.sh':
152
+ return { command: 'bash', args: [scriptPath] };
153
+ case '.ps1':
154
+ return { command: 'powershell', args: ['-File', scriptPath] };
155
+ default:
156
+ // For unknown extensions, try running directly (relies on shebang or OS association)
157
+ return { command: scriptPath, args: [] };
158
+ }
159
+ }
160
+ }
161
+ exports.ScriptEvaluator = ScriptEvaluator;
@@ -0,0 +1,19 @@
1
+ /**
2
+ * Evaluator interfaces for @prompd/test
3
+ */
4
+ import type { AssertionDef, AssertionResult } from '../types';
5
+ export interface EvaluatorContext {
6
+ prompt: string;
7
+ response: string;
8
+ params: Record<string, unknown>;
9
+ metadata: {
10
+ provider: string;
11
+ model: string;
12
+ duration: number;
13
+ };
14
+ }
15
+ export interface Evaluator {
16
+ readonly type: string;
17
+ evaluate(assertion: AssertionDef, context: EvaluatorContext): Promise<AssertionResult>;
18
+ }
19
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/evaluators/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,YAAY,EAAE,eAAe,EAAE,MAAM,UAAU,CAAC;AAE9D,MAAM,WAAW,gBAAgB;IAC/B,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAChC,QAAQ,EAAE;QACR,QAAQ,EAAE,MAAM,CAAC;QACjB,KAAK,EAAE,MAAM,CAAC;QACd,QAAQ,EAAE,MAAM,CAAC;KAClB,CAAC;CACH;AAED,MAAM,WAAW,SAAS;IACxB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CACN,SAAS,EAAE,YAAY,EACvB,OAAO,EAAE,gBAAgB,GACxB,OAAO,CAAC,eAAe,CAAC,CAAC;CAC7B"}
@@ -0,0 +1,5 @@
1
+ "use strict";
2
+ /**
3
+ * Evaluator interfaces for @prompd/test
4
+ */
5
+ Object.defineProperty(exports, "__esModule", { value: true });