npm - @prompd/test - Versions diffs - 0.5.0-beta.10 - Mend

@prompd/test 0.5.0-beta.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

package/dist/EvaluatorEngine.d.ts +32 -0
package/dist/EvaluatorEngine.d.ts.map +1 -0
package/dist/EvaluatorEngine.js +97 -0
package/dist/TestDiscovery.d.ts +28 -0
package/dist/TestDiscovery.d.ts.map +1 -0
package/dist/TestDiscovery.js +137 -0
package/dist/TestParser.d.ts +25 -0
package/dist/TestParser.d.ts.map +1 -0
package/dist/TestParser.js +187 -0
package/dist/TestRunner.d.ts +57 -0
package/dist/TestRunner.d.ts.map +1 -0
package/dist/TestRunner.js +463 -0
package/dist/cli-types.d.ts +62 -0
package/dist/cli-types.d.ts.map +1 -0
package/dist/cli-types.js +6 -0
package/dist/evaluators/NlpEvaluator.d.ts +30 -0
package/dist/evaluators/NlpEvaluator.d.ts.map +1 -0
package/dist/evaluators/NlpEvaluator.js +183 -0
package/dist/evaluators/PrmdEvaluator.d.ts +42 -0
package/dist/evaluators/PrmdEvaluator.d.ts.map +1 -0
package/dist/evaluators/PrmdEvaluator.js +265 -0
package/dist/evaluators/ScriptEvaluator.d.ts +19 -0
package/dist/evaluators/ScriptEvaluator.d.ts.map +1 -0
package/dist/evaluators/ScriptEvaluator.js +163 -0
package/dist/evaluators/types.d.ts +19 -0
package/dist/evaluators/types.d.ts.map +1 -0
package/dist/evaluators/types.js +5 -0
package/dist/index.d.ts +25 -0
package/dist/index.d.ts.map +1 -0
package/dist/index.js +33 -0
package/dist/reporters/ConsoleReporter.d.ts +17 -0
package/dist/reporters/ConsoleReporter.d.ts.map +1 -0
package/dist/reporters/ConsoleReporter.js +85 -0
package/dist/reporters/JsonReporter.d.ts +11 -0
package/dist/reporters/JsonReporter.d.ts.map +1 -0
package/dist/reporters/JsonReporter.js +18 -0
package/dist/reporters/JunitReporter.d.ts +15 -0
package/dist/reporters/JunitReporter.d.ts.map +1 -0
package/dist/reporters/JunitReporter.js +89 -0
package/dist/reporters/types.d.ts +8 -0
package/dist/reporters/types.d.ts.map +1 -0
package/dist/reporters/types.js +5 -0
package/dist/types.d.ts +119 -0
package/dist/types.d.ts.map +1 -0
package/dist/types.js +5 -0
package/package.json +34 -0
package/src/EvaluatorEngine.ts +130 -0
package/src/TestDiscovery.ts +133 -0
package/src/TestParser.ts +235 -0
package/src/TestRunner.ts +516 -0
package/src/cli-types.ts +92 -0
package/src/evaluators/NlpEvaluator.ts +240 -0
package/src/evaluators/PrmdEvaluator.ts +284 -0
package/src/evaluators/ScriptEvaluator.ts +152 -0
package/src/evaluators/types.ts +24 -0
package/src/index.ts +76 -0
package/src/reporters/ConsoleReporter.ts +100 -0
package/src/reporters/JsonReporter.ts +21 -0
package/src/reporters/JunitReporter.ts +113 -0
package/src/reporters/types.ts +9 -0
package/src/types.ts +140 -0
package/tsconfig.json +20 -0

package/src/evaluators/NlpEvaluator.ts ADDED Viewed

@@ -0,0 +1,240 @@
+/**
+ * NLP Evaluator - local, fast, free, deterministic assertions.
+ *
+ * Checks: contains, not_contains, matches, max_tokens, min_tokens, starts_with, ends_with
+ */
+import type { Evaluator, EvaluatorContext } from './types';
+import type { AssertionDef, AssertionResult, NlpCheck, EvaluateTarget } from '../types';
+export class NlpEvaluator implements Evaluator {
+  readonly type = 'nlp';
+  async evaluate(assertion: AssertionDef, context: EvaluatorContext): Promise<AssertionResult> {
+    const start = Date.now();
+    const check = assertion.check as NlpCheck;
+    const target: EvaluateTarget = assertion.evaluate || 'response';
+    try {
+      const text = this.resolveTarget(target, context);
+      const targetLabel = target === 'both' ? 'Prompt+Response' : target === 'prompt' ? 'Prompt' : 'Output';
+      const result = this.runCheck(check, assertion.value, text, targetLabel);
+      return {
+        evaluator: 'nlp',
+        check,
+        status: result.pass ? 'pass' : 'fail',
+        reason: result.reason,
+        duration: Date.now() - start,
+      };
+    } catch (err) {
+      return {
+        evaluator: 'nlp',
+        check,
+        status: 'error',
+        reason: err instanceof Error ? err.message : String(err),
+        duration: Date.now() - start,
+      };
+    }
+  }
+  private resolveTarget(target: EvaluateTarget, context: EvaluatorContext): string {
+    switch (target) {
+      case 'prompt': return context.prompt;
+      case 'both': return `${context.prompt}\n\n${context.response}`;
+      case 'response':
+      default: return context.response;
+    }
+  }
+  private runCheck(
+    check: NlpCheck,
+    value: string | string[] | number | undefined,
+    output: string,
+    label: string = 'Output'
+  ): { pass: boolean; reason: string } {
+    switch (check) {
+      case 'contains':
+        return this.checkContains(value, output, label);
+      case 'not_contains':
+        return this.checkNotContains(value, output, label);
+      case 'matches':
+        return this.checkMatches(value, output, label);
+      case 'max_tokens':
+        return this.checkMaxTokens(value, output);
+      case 'min_tokens':
+        return this.checkMinTokens(value, output);
+      case 'max_words':
+        return this.checkMaxWords(value, output);
+      case 'min_words':
+        return this.checkMinWords(value, output);
+      case 'starts_with':
+        return this.checkStartsWith(value, output, label);
+      case 'ends_with':
+        return this.checkEndsWith(value, output, label);
+      default:
+        return { pass: false, reason: `Unknown NLP check: ${check}` };
+    }
+  }
+  private checkContains(
+    value: string | string[] | number | undefined,
+    output: string,
+    label: string
+  ): { pass: boolean; reason: string } {
+    const values = this.toStringArray(value);
+    const lower = output.toLowerCase();
+    const missing = values.filter(v => !lower.includes(v.toLowerCase()));
+    if (missing.length === 0) {
+      return { pass: true, reason: `${label} contains all expected values` };
+    }
+    return {
+      pass: false,
+      reason: `${label} missing: ${missing.map(v => `"${v}"`).join(', ')}`,
+    };
+  }
+  private checkNotContains(
+    value: string | string[] | number | undefined,
+    output: string,
+    label: string
+  ): { pass: boolean; reason: string } {
+    const values = this.toStringArray(value);
+    const lower = output.toLowerCase();
+    const found = values.filter(v => lower.includes(v.toLowerCase()));
+    if (found.length === 0) {
+      return { pass: true, reason: `${label} does not contain any excluded values` };
+    }
+    return {
+      pass: false,
+      reason: `${label} contains excluded values: ${found.map(v => `"${v}"`).join(', ')}`,
+    };
+  }
+  private checkMatches(
+    value: string | string[] | number | undefined,
+    output: string,
+    label: string
+  ): { pass: boolean; reason: string } {
+    if (typeof value !== 'string') {
+      return { pass: false, reason: '"matches" check requires a string regex pattern' };
+    }
+    const regex = new RegExp(value);
+    if (regex.test(output)) {
+      return { pass: true, reason: `${label} matches pattern /${value}/` };
+    }
+    return { pass: false, reason: `${label} does not match pattern /${value}/` };
+  }
+  private checkMaxTokens(
+    value: string | string[] | number | undefined,
+    output: string
+  ): { pass: boolean; reason: string } {
+    if (typeof value !== 'number') {
+      return { pass: false, reason: '"max_tokens" check requires a numeric value' };
+    }
+    const tokenCount = this.estimateTokens(output);
+    if (tokenCount <= value) {
+      return { pass: true, reason: `Token count ${tokenCount} <= ${value}` };
+    }
+    return { pass: false, reason: `Token count ${tokenCount} exceeds max ${value}` };
+  }
+  private checkMinTokens(
+    value: string | string[] | number | undefined,
+    output: string
+  ): { pass: boolean; reason: string } {
+    if (typeof value !== 'number') {
+      return { pass: false, reason: '"min_tokens" check requires a numeric value' };
+    }
+    const tokenCount = this.estimateTokens(output);
+    if (tokenCount >= value) {
+      return { pass: true, reason: `Token count ${tokenCount} >= ${value}` };
+    }
+    return { pass: false, reason: `Token count ${tokenCount} below min ${value}` };
+  }
+  private checkStartsWith(
+    value: string | string[] | number | undefined,
+    output: string,
+    label: string
+  ): { pass: boolean; reason: string } {
+    if (typeof value !== 'string') {
+      return { pass: false, reason: '"starts_with" check requires a string value' };
+    }
+    const trimmed = output.trimStart();
+    if (trimmed.toLowerCase().startsWith(value.toLowerCase())) {
+      return { pass: true, reason: `${label} starts with "${value}"` };
+    }
+    return { pass: false, reason: `${label} does not start with "${value}"` };
+  }
+  private checkEndsWith(
+    value: string | string[] | number | undefined,
+    output: string,
+    label: string
+  ): { pass: boolean; reason: string } {
+    if (typeof value !== 'string') {
+      return { pass: false, reason: '"ends_with" check requires a string value' };
+    }
+    const trimmed = output.trimEnd();
+    if (trimmed.toLowerCase().endsWith(value.toLowerCase())) {
+      return { pass: true, reason: `${label} ends with "${value}"` };
+    }
+    return { pass: false, reason: `${label} does not end with "${value}"` };
+  }
+  private checkMaxWords(
+    value: string | string[] | number | undefined,
+    output: string
+  ): { pass: boolean; reason: string } {
+    if (typeof value !== 'number') {
+      return { pass: false, reason: '"max_words" check requires a numeric value' };
+    }
+    const wordCount = this.countWords(output);
+    if (wordCount <= value) {
+      return { pass: true, reason: `Word count ${wordCount} <= ${value}` };
+    }
+    return { pass: false, reason: `Word count ${wordCount} exceeds max ${value}` };
+  }
+  private checkMinWords(
+    value: string | string[] | number | undefined,
+    output: string
+  ): { pass: boolean; reason: string } {
+    if (typeof value !== 'number') {
+      return { pass: false, reason: '"min_words" check requires a numeric value' };
+    }
+    const wordCount = this.countWords(output);
+    if (wordCount >= value) {
+      return { pass: true, reason: `Word count ${wordCount} >= ${value}` };
+    }
+    return { pass: false, reason: `Word count ${wordCount} below min ${value}` };
+  }
+  private countWords(text: string): number {
+    return text.trim().split(/\s+/).filter(w => w.length > 0).length;
+  }
+  /**
+   * Rough token estimation: ~4 characters per token (GPT-family average).
+   * This is intentionally approximate — for precise counting, use a tokenizer.
+   */
+  private estimateTokens(text: string): number {
+    return Math.ceil(text.length / 4);
+  }
+  private toStringArray(value: string | string[] | number | undefined): string[] {
+    if (value === undefined || value === null) return [];
+    if (Array.isArray(value)) return value.map(String);
+    return [String(value)];
+  }
+}

package/src/evaluators/PrmdEvaluator.ts ADDED Viewed

@@ -0,0 +1,284 @@
+/**
+ * Prmd Evaluator - LLM-based evaluation via @prompd/cli.
+ *
+ * Modes:
+ * - prompt: "@scope/pkg@version" -> uses a registry package as the evaluator
+ * - prompt: "./path" -> uses a local .prmd file as the evaluator
+ * - (no prompt field) -> uses the content block of the .test.prmd
+ *
+ * The evaluator prompt receives {{input}}, {{output}}, and {{params}} variables.
+ * Response must start with PASS or FAIL.
+ */
+import * as path from 'path';
+import * as fs from 'fs';
+import type { Evaluator, EvaluatorContext } from './types';
+import type { AssertionDef, AssertionResult } from '../types';
+import type { CompilerModule } from '../cli-types';
+const PASS_FAIL_REGEX = /^(PASS|FAIL)[:\s]*(.*)/i;
+export interface PrmdEvaluatorOptions {
+  testFileDir: string;
+  evaluatorPrompt?: string;
+  workspaceRoot?: string;
+  registryUrl?: string;
+  cliModule?: CompilerModule;
+  provider?: string;
+  model?: string;
+}
+export class PrmdEvaluator implements Evaluator {
+  readonly type = 'prmd';
+  private options: PrmdEvaluatorOptions;
+  private cliModule: CompilerModule | null = null;
+  constructor(options: PrmdEvaluatorOptions) {
+    this.options = options;
+    if (options.cliModule) {
+      this.cliModule = options.cliModule;
+    }
+  }
+  async evaluate(assertion: AssertionDef, context: EvaluatorContext): Promise<AssertionResult> {
+    const start = Date.now();
+    try {
+      const evaluatorContent = await this.resolveEvaluatorContent(assertion);
+      console.log(`[PrmdEvaluator] Resolved evaluator content (${evaluatorContent?.length || 0} chars)`);
+      if (evaluatorContent) {
+        console.log(`[PrmdEvaluator]   source: ${assertion.prompt || 'content block'}`);
+        console.log(`[PrmdEvaluator]   preview: ${evaluatorContent.substring(0, 150)}`);
+      }
+      if (!evaluatorContent) {
+        return {
+          evaluator: 'prmd',
+          status: 'error',
+          reason: 'Could not resolve evaluator prompt content',
+          duration: Date.now() - start,
+        };
+      }
+      // Compile the evaluator prompt with context as parameters
+      const cli = await this.getCli();
+      const compiled = await this.compileEvaluator(cli, evaluatorContent, context);
+      console.log(`[PrmdEvaluator] Compiled evaluator (${compiled?.length || 0} chars): ${compiled?.substring(0, 150) || 'null'}`);
+      if (!compiled) {
+        return {
+          evaluator: 'prmd',
+          status: 'error',
+          reason: 'Evaluator prompt compilation failed',
+          duration: Date.now() - start,
+        };
+      }
+      // Execute against LLM using callLLM directly (avoids executeRawText re-compilation)
+      const executor = new cli.PrompdExecutor();
+      // Resolve provider/model/apiKey — same logic as TestRunner
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      const configManager = (cli as any).ConfigManager?.getInstance
+        ? (cli as any).ConfigManager.getInstance()
+        : null;
+      const config = configManager?.config || {};
+      // Priority: assertion-level > run options (UI selector) > config defaults
+      const provider = assertion.provider || this.options.provider || config.defaultProvider || 'openai';
+      const rawModel = assertion.model || this.options.model || config.default_model || config.defaultModel || '';
+      const model = rawModel || this.getDefaultModel(provider);
+      const apiKey = configManager?.getApiKey?.(provider, config) || '';
+      console.log(`[PrmdEvaluator] Executing: provider=${provider}, model=${model}`);
+      if (!apiKey && provider !== 'ollama') {
+        return {
+          evaluator: 'prmd',
+          status: 'error',
+          reason: `No API key configured for provider "${provider}"`,
+          duration: Date.now() - start,
+        };
+      }
+      const execResult = await executor.callLLM(provider, model, compiled, apiKey);
+      if (!execResult.success) {
+        return {
+          evaluator: 'prmd',
+          status: 'error',
+          reason: execResult.error || 'Evaluator LLM execution failed',
+          duration: Date.now() - start,
+        };
+      }
+      const response = execResult.response || execResult.content || '';
+      if (!response) {
+        return {
+          evaluator: 'prmd',
+          status: 'error',
+          reason: 'No response from evaluator',
+          duration: Date.now() - start,
+        };
+      }
+      // Parse PASS/FAIL from response
+      return this.parseEvaluatorResponse(response, Date.now() - start);
+    } catch (err) {
+      return {
+        evaluator: 'prmd',
+        status: 'error',
+        reason: err instanceof Error ? err.message : String(err),
+        duration: Date.now() - start,
+      };
+    }
+  }
+  private async resolveEvaluatorContent(assertion: AssertionDef): Promise<string | null> {
+    // If prompt: is specified, resolve it (registry ref, local file)
+    if (assertion.prompt) {
+      return this.resolvePromptTarget(assertion.prompt);
+    }
+    // No prompt: field — use the content block of the .test.prmd
+    return this.options.evaluatorPrompt || null;
+  }
+  private async resolvePromptTarget(prompt: string): Promise<string | null> {
+    // Registry reference: @scope/package@version
+    if (prompt.startsWith('@')) {
+      return this.wrapAsInherits(prompt);
+    }
+    // Local file path
+    const resolved = path.resolve(this.options.testFileDir, prompt);
+    if (!fs.existsSync(resolved)) {
+      throw new Error(`Evaluator prompt file not found: ${resolved}`);
+    }
+    return fs.readFileSync(resolved, 'utf-8');
+  }
+  /**
+   * Wrap a registry reference as a minimal .prmd that inherits from the evaluator package.
+   * The compiler handles resolution, download, and caching.
+   */
+  private wrapAsInherits(registryRef: string): string {
+    return [
+      '---',
+      `inherits: "${registryRef}"`,
+      'parameters:',
+      '  - name: prompt',
+      '    type: string',
+      '  - name: response',
+      '    type: string',
+      '  - name: params',
+      '    type: string',
+      '---',
+      '',
+    ].join('\n');
+  }
+  private async compileEvaluator(
+    cli: CompilerModule,
+    content: string,
+    context: EvaluatorContext
+  ): Promise<string | null> {
+    // If content doesn't start with frontmatter, wrap it with minimal frontmatter
+    // so the compiler can process it. Content blocks from .test.prmd are raw markdown.
+    let prmdContent = content;
+    if (!content.trimStart().startsWith('---')) {
+      prmdContent = [
+        '---',
+        'id: evaluator',
+        'name: "Test Evaluator"',
+        'version: 0.0.1',
+        'parameters:',
+        '  - name: prompt',
+        '    type: string',
+        '  - name: response',
+        '    type: string',
+        '  - name: params',
+        '    type: object',
+        '---',
+        '',
+        content,
+      ].join('\n');
+    }
+    const memFs = new cli.MemoryFileSystem({ '/evaluator.prmd': prmdContent });
+    const compiler = new cli.PrompdCompiler();
+    // Inject evaluation context as template variables
+    const parameters: Record<string, string> = {
+      prompt: context.prompt,
+      response: context.response,
+      params: JSON.stringify(context.params, null, 2),
+    };
+    // Also expose individual params via dot notation
+    for (const [key, value] of Object.entries(context.params)) {
+      parameters[`params.${key}`] = String(value);
+    }
+    const result = await compiler.compile('/evaluator.prmd', {
+      outputFormat: 'markdown',
+      parameters,
+      fileSystem: memFs,
+      workspaceRoot: this.options.workspaceRoot,
+      registryUrl: this.options.registryUrl,
+    });
+    // CLI compile() may return a string directly or an object
+    if (typeof result === 'string') {
+      return result || null;
+    }
+    return result.output || null;
+  }
+  private parseEvaluatorResponse(response: string, duration: number): AssertionResult {
+    const firstLine = response.trim().split('\n')[0];
+    const match = firstLine.match(PASS_FAIL_REGEX);
+    if (!match) {
+      return {
+        evaluator: 'prmd',
+        status: 'error',
+        reason: `Evaluator response did not start with PASS or FAIL. Got: "${firstLine.substring(0, 100)}"`,
+        duration,
+      };
+    }
+    const verdict = match[1].toUpperCase();
+    const reason = match[2]?.trim() || undefined;
+    return {
+      evaluator: 'prmd',
+      status: verdict === 'PASS' ? 'pass' : 'fail',
+      reason: reason || `Evaluator returned ${verdict}`,
+      duration,
+    };
+  }
+  private getDefaultModel(provider: string): string {
+    const defaults: Record<string, string> = {
+      openai: 'gpt-4o',
+      anthropic: 'claude-sonnet-4-20250514',
+      groq: 'llama-3.1-70b-versatile',
+      google: 'gemini-2.0-flash',
+      mistral: 'mistral-large-latest',
+      deepseek: 'deepseek-chat',
+    };
+    return defaults[provider.toLowerCase()] || 'gpt-4o';
+  }
+  private async getCli(): Promise<CompilerModule> {
+    if (!this.cliModule) {
+      throw new Error(
+        '@prompd/cli module not provided. Pass it via PrmdEvaluatorOptions.cliModule'
+      );
+    }
+    return this.cliModule;
+  }
+}

package/src/evaluators/ScriptEvaluator.ts ADDED Viewed

@@ -0,0 +1,152 @@
+/**
+ * Script Evaluator - runs external scripts with stdin/stdout contract.
+ *
+ * Contract:
+ * - Receives JSON on stdin: { input, output, params, metadata }
+ * - Exit code 0 = PASS, 1 = FAIL, other = ERROR
+ * - Stdout = reason (optional)
+ */
+import { spawn } from 'child_process';
+import * as path from 'path';
+import * as fs from 'fs';
+import type { Evaluator, EvaluatorContext } from './types';
+import type { AssertionDef, AssertionResult, EvaluateTarget } from '../types';
+const SCRIPT_TIMEOUT_MS = 30_000;
+export class ScriptEvaluator implements Evaluator {
+  readonly type = 'script';
+  private testFileDir: string;
+  constructor(testFileDir: string) {
+    this.testFileDir = testFileDir;
+  }
+  async evaluate(assertion: AssertionDef, context: EvaluatorContext): Promise<AssertionResult> {
+    const start = Date.now();
+    const scriptPath = assertion.run;
+    if (!scriptPath) {
+      return {
+        evaluator: 'script',
+        status: 'error',
+        reason: 'No "run" path specified for script evaluator',
+        duration: Date.now() - start,
+      };
+    }
+    const resolvedPath = path.resolve(this.testFileDir, scriptPath);
+    if (!fs.existsSync(resolvedPath)) {
+      return {
+        evaluator: 'script',
+        status: 'error',
+        reason: `Script not found: ${resolvedPath}`,
+        duration: Date.now() - start,
+      };
+    }
+    // Validate script stays within the test file's directory tree
+    const normalizedScript = path.normalize(resolvedPath);
+    const normalizedBase = path.normalize(this.testFileDir);
+    if (!normalizedScript.startsWith(normalizedBase)) {
+      return {
+        evaluator: 'script',
+        status: 'error',
+        reason: `Script path escapes test directory: ${scriptPath}`,
+        duration: Date.now() - start,
+      };
+    }
+    try {
+      const result = await this.runScript(resolvedPath, context, assertion);
+      return {
+        evaluator: 'script',
+        status: result.exitCode === 0 ? 'pass' : 'fail',
+        reason: result.stdout.trim() || (result.exitCode === 0 ? 'Script passed' : 'Script failed'),
+        duration: Date.now() - start,
+      };
+    } catch (err) {
+      return {
+        evaluator: 'script',
+        status: 'error',
+        reason: err instanceof Error ? err.message : String(err),
+        duration: Date.now() - start,
+      };
+    }
+  }
+  private runScript(
+    scriptPath: string,
+    context: EvaluatorContext,
+    assertion: AssertionDef
+  ): Promise<{ exitCode: number; stdout: string; stderr: string }> {
+    return new Promise((resolve, reject) => {
+      const { command, args } = this.getRunner(scriptPath);
+      const child = spawn(command, args, {
+        cwd: this.testFileDir,
+        timeout: SCRIPT_TIMEOUT_MS,
+        stdio: ['pipe', 'pipe', 'pipe'],
+        shell: process.platform === 'win32',
+      });
+      let stdout = '';
+      let stderr = '';
+      child.stdout.on('data', (data: Buffer) => {
+        stdout += data.toString();
+      });
+      child.stderr.on('data', (data: Buffer) => {
+        stderr += data.toString();
+      });
+      child.on('error', (err) => {
+        reject(new Error(`Failed to spawn script: ${err.message}`));
+      });
+      child.on('close', (code) => {
+        if (code === null) {
+          reject(new Error('Script process was killed (timeout or signal)'));
+          return;
+        }
+        resolve({ exitCode: code, stdout, stderr });
+      });
+      // Send context as JSON on stdin, include target so script knows what to evaluate
+      const target: EvaluateTarget = assertion.evaluate || 'response';
+      const payload = JSON.stringify({
+        target,
+        prompt: context.prompt,
+        response: context.response,
+        params: context.params,
+        metadata: context.metadata,
+      });
+      child.stdin.write(payload);
+      child.stdin.end();
+    });
+  }
+  private getRunner(scriptPath: string): { command: string; args: string[] } {
+    const ext = path.extname(scriptPath).toLowerCase();
+    switch (ext) {
+      case '.ts':
+        return { command: 'npx', args: ['tsx', scriptPath] };
+      case '.js':
+      case '.mjs':
+        return { command: 'node', args: [scriptPath] };
+      case '.py':
+        return { command: 'python', args: [scriptPath] };
+      case '.sh':
+        return { command: 'bash', args: [scriptPath] };
+      case '.ps1':
+        return { command: 'powershell', args: ['-File', scriptPath] };
+      default:
+        // For unknown extensions, try running directly (relies on shebang or OS association)
+        return { command: scriptPath, args: [] };
+    }
+  }
+}

package/src/evaluators/types.ts ADDED Viewed

@@ -0,0 +1,24 @@
+/**
+ * Evaluator interfaces for @prompd/test
+ */
+import type { AssertionDef, AssertionResult } from '../types';
+export interface EvaluatorContext {
+  prompt: string;
+  response: string;
+  params: Record<string, unknown>;
+  metadata: {
+    provider: string;
+    model: string;
+    duration: number;
+  };
+}
+export interface Evaluator {
+  readonly type: string;
+  evaluate(
+    assertion: AssertionDef,
+    context: EvaluatorContext
+  ): Promise<AssertionResult>;
+}