npm - llm-testrunner-components - Versions diffs - 1.1.0 → 1.2.1 - Mend

llm-testrunner-components 1.1.0 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (82) hide show

package/dist/cjs/index.cjs.js CHANGED Viewed

@@ -64,20 +64,6 @@ class RateLimitedFetcher {
     }
 }
-var EvaluationApproach;
-(function (EvaluationApproach) {
-    EvaluationApproach["EXACT"] = "exact";
-    EvaluationApproach["SEMANTIC"] = "semantic";
-    EvaluationApproach["ROUGE_1"] = "rouge-1";
-    EvaluationApproach["ROUGE_L"] = "rouge-L";
-    EvaluationApproach["BLEU"] = "bleu";
-})(EvaluationApproach || (EvaluationApproach = {}));
-// Array of all evaluation approach values for UI components
-const EvaluationApproachValues = Object.values(EvaluationApproach);
-const DEFAULT_ROUGE_PASS_SCORE = 0.7;
-const DEFAULT_SEMANTIC_PASS_SCORE = 0.7;
-const DEFAULT_BLEU_PASS_SCORE = 0.7;
 /**
  * Reads a file asynchronously and returns its content as a string
  * @param file - The File object to read
@@ -120,23 +106,10 @@ function formatTestSuiteAsJson(testCases) {
         id: testCase.id,
         question: testCase.question,
         expectedOutcome: testCase.expectedOutcome,
-        evaluationParameters: testCase.evaluationParameters,
     }));
     return JSON.stringify(exportData, null, 2);
 }
-function serializeExpectedOutcome(expectedOutcome, joinWith = '\n') {
-    return (expectedOutcome || [])
-        .map(field => {
-        if (field.type === 'chips-input') {
-            return field.value.join(', ');
-        }
-        return field.value;
-    })
-        .join(joinWith)
-        .trim();
-}
 /**
  * Escapes a CSV field by wrapping it in quotes if it contains special characters
  * @param field - The field to escape
@@ -155,48 +128,63 @@ function escapeCsvField(field) {
  */
 function exportTestResultsToCsv(testCases) {
     const csvRows = [];
+    const maxFieldCount = testCases.reduce((max, testCase) => Math.max(max, (testCase.expectedOutcome || []).length), 0);
     // Add header row
     const headers = [
         'Question',
-        'Expected Keywords',
-        'Generated Keywords',
-        'Keywords Match',
         'Response Time (s)',
-        'Evaluation Approach',
-        'Evaluation Score',
     ];
+    for (let i = 1; i <= maxFieldCount; i++) {
+        headers.push('Field Name');
+        headers.push('Expected Keywords');
+        headers.push('Generated Keywords');
+        headers.push('Evaluation Strategy');
+        headers.push('Passed Evaluation');
+        headers.push('Keyword Match');
+        headers.push('Score');
+        if (i < maxFieldCount) {
+            headers.push('');
+        }
+    }
     csvRows.push(headers.join(','));
-    // Add data rows
+    // Add data rows (one row per test case)
     testCases.forEach(testCase => {
-        const expectedOutcome = serializeExpectedOutcome(testCase.expectedOutcome || [], ' | ');
-        const evaluationApproach = testCase.evaluationParameters?.approach || '';
-        const score = testCase.evaluationResult?.evaluationApproachResult?.score;
-        const evaluationScore = score !== undefined ? score.toString() : '';
-        let generatedKeywords = '';
-        let keywordsMatch = '';
-        if (testCase.evaluationResult) {
-            const foundKeywords = testCase.evaluationResult.keywordMatches
-                .filter(match => match.found)
-                .map(match => match.keyword);
-            generatedKeywords = foundKeywords.join('; ');
-            // Calculate match percentages
-            const keywordMatchCount = testCase.evaluationResult.keywordMatches.filter(m => m.found).length;
-            const totalKeywords = testCase.evaluationResult.keywordMatches.length;
-            keywordsMatch =
-                totalKeywords > 0 ? `${keywordMatchCount}/${totalKeywords}` : 'N/A';
-        }
         const responseTime = testCase.responseTime
             ? (testCase.responseTime / 1000).toFixed(3)
             : 'N/A';
-        const row = [
-            escapeCsvField(testCase.question),
-            escapeCsvField(expectedOutcome),
-            escapeCsvField(generatedKeywords),
-            keywordsMatch,
-            responseTime,
-            escapeCsvField(evaluationApproach),
-            escapeCsvField(evaluationScore),
-        ];
+        const row = [escapeCsvField(testCase.question), responseTime];
+        for (let i = 0; i < maxFieldCount; i++) {
+            const field = testCase.expectedOutcome?.[i];
+            const fieldResult = testCase.evaluationResult?.fieldResults?.find(result => result.index === i);
+            const expectedKeywords = fieldResult?.expectedValue ??
+                (field
+                    ? field.type === 'chips-input'
+                        ? field.value.join(', ')
+                        : field.value
+                    : '');
+            const generatedKeywords = (fieldResult?.keywordMatches || [])
+                .filter(match => match.found)
+                .map(match => match.keyword)
+                .join('; ');
+            const matchedCount = (fieldResult?.keywordMatches || []).filter(match => match.found).length;
+            const totalMatches = fieldResult?.keywordMatches?.length || 0;
+            const keywordMatch = totalMatches > 0 ? `${matchedCount}/${totalMatches}` : '';
+            const score = fieldResult?.evaluationApproachResult?.score !== undefined
+                ? fieldResult.evaluationApproachResult.score.toFixed(2)
+                : '';
+            row.push(escapeCsvField(field?.label || ''));
+            row.push(escapeCsvField(expectedKeywords || ''));
+            row.push(escapeCsvField(generatedKeywords));
+            row.push(escapeCsvField(fieldResult?.evaluationParameters.approach ||
+                field?.evaluationParameters?.approach ||
+                ''));
+            row.push(fieldResult ? (fieldResult.passed ? 'TRUE' : 'FALSE') : '');
+            row.push(keywordMatch);
+            row.push(score);
+            if (i < maxFieldCount - 1) {
+                row.push('');
+            }
+        }
         csvRows.push(row.join(','));
     });
     return csvRows.join('\n');
@@ -255,6 +243,43 @@ function v4(options, buf, offset) {
   return unsafeStringify(rnds);
 }
+var EvaluationApproach;
+(function (EvaluationApproach) {
+    EvaluationApproach["EXACT"] = "exact";
+    EvaluationApproach["SEMANTIC"] = "semantic";
+    EvaluationApproach["ROUGE_1"] = "rouge-1";
+    EvaluationApproach["ROUGE_L"] = "rouge-L";
+    EvaluationApproach["BLEU"] = "bleu";
+})(EvaluationApproach || (EvaluationApproach = {}));
+// Array of all evaluation approach values for UI components
+const EvaluationApproachValues = Object.values(EvaluationApproach);
+const DEFAULT_ROUGE_PASS_SCORE = 0.7;
+const DEFAULT_SEMANTIC_PASS_SCORE = 0.7;
+const DEFAULT_BLEU_PASS_SCORE = 0.7;
+const SELECT_ONLY_APPROACHES = [EvaluationApproach.EXACT];
+function getAllowedApproachesForFieldType(fieldType) {
+    if (fieldType === 'select') {
+        return SELECT_ONLY_APPROACHES;
+    }
+    return EvaluationApproachValues;
+}
+function isApproachAllowedForFieldType(fieldType, approach) {
+    return getAllowedApproachesForFieldType(fieldType).includes(approach);
+}
+function normalizeEvaluationParametersForField(fieldType, evaluationParameters) {
+    const allowedApproaches = getAllowedApproachesForFieldType(fieldType);
+    const fallbackApproach = allowedApproaches[0];
+    const rawApproach = evaluationParameters?.approach;
+    const approach = rawApproach && allowedApproaches.includes(rawApproach)
+        ? rawApproach
+        : fallbackApproach;
+    return {
+        ...evaluationParameters,
+        approach,
+    };
+}
 const DEFAULT_EXPECTED_OUTCOME_SCHEMA = [
     {
         type: 'textarea',
@@ -263,6 +288,12 @@ const DEFAULT_EXPECTED_OUTCOME_SCHEMA = [
         rows: 2,
     },
 ];
+function normalizeExpectedOutcomeField(field) {
+    return {
+        ...field,
+        evaluationParameters: normalizeEvaluationParametersForField(field.type, field.evaluationParameters),
+    };
+}
 /**
  * Creates a new test case with default values
  * @returns A new TestCase object with a unique ID
@@ -272,9 +303,6 @@ function createTestCase(expectedOutcomeSchema = DEFAULT_EXPECTED_OUTCOME_SCHEMA)
         id: v4(),
         question: '',
         expectedOutcome: createExpectedOutcomeFromSchema(expectedOutcomeSchema),
-        evaluationParameters: {
-            approach: EvaluationApproach.EXACT,
-        },
         isRunning: false,
     };
 }
@@ -284,35 +312,35 @@ function createExpectedOutcomeFieldFromSchema(schemaField) {
             return {
                 type: 'text',
                 label: schemaField.label,
-                required: schemaField.required,
                 placeholder: schemaField.placeholder,
                 value: '',
+                evaluationParameters: normalizeEvaluationParametersForField(schemaField.type, schemaField.evaluationParameters),
             };
         case 'textarea':
             return {
                 type: 'textarea',
                 label: schemaField.label,
-                required: schemaField.required,
                 placeholder: schemaField.placeholder,
                 rows: schemaField.rows,
                 value: '',
+                evaluationParameters: normalizeEvaluationParametersForField(schemaField.type, schemaField.evaluationParameters),
             };
         case 'chips-input':
             return {
                 type: 'chips-input',
                 label: schemaField.label,
-                required: schemaField.required,
                 placeholder: schemaField.placeholder,
                 value: [],
+                evaluationParameters: normalizeEvaluationParametersForField(schemaField.type, schemaField.evaluationParameters),
             };
         case 'select':
             return {
                 type: 'select',
                 label: schemaField.label,
-                required: schemaField.required,
                 placeholder: schemaField.placeholder,
                 value: '',
                 options: schemaField.options,
+                evaluationParameters: normalizeEvaluationParametersForField(schemaField.type, schemaField.evaluationParameters),
             };
         default: {
             const _exhaustiveCheck = schemaField;
@@ -323,32 +351,19 @@ function createExpectedOutcomeFieldFromSchema(schemaField) {
 function createExpectedOutcomeFromSchema(expectedOutcomeSchema) {
     return expectedOutcomeSchema.map(createExpectedOutcomeFieldFromSchema);
 }
-function migrateLegacyExpectedOutcomeString(value) {
-    return [
-        {
-            type: 'textarea',
-            label: 'Expected Outcome',
-            value,
-        },
-    ];
-}
 /**
  * Creates a runtime test case from validated input data.
- * The input is expected to already satisfy `TestCaseInput` (legacy string or v2 shape),
- * and this function only performs normalization/defaulting (including legacy migration).
+ * The input is expected to already satisfy `TestCaseInput`,
+ * and this function only performs normalization/defaulting.
  *
  * @param data - Validated test case input
  * @returns A normalized TestCase object with runtime defaults applied
  */
 function createTestCaseFromInput(data) {
-    let expectedOutcome;
-    if (typeof data.expectedOutcome === 'string') {
-        expectedOutcome = migrateLegacyExpectedOutcomeString(data.expectedOutcome);
-    }
-    else {
-        expectedOutcome = data.expectedOutcome;
-    }
-    return { ...data, expectedOutcome };
+    return {
+        ...data,
+        expectedOutcome: data.expectedOutcome.map(normalizeExpectedOutcomeField),
+    };
 }
 /** A special constant with type `never` */
@@ -4938,27 +4953,43 @@ function superRefine(fn) {
 const nonEmptyString = string().trim().min(1);
 const optionalPositiveInt = number().int().positive().optional();
 const optionalString = string().optional();
-const optionalBoolean = boolean().optional();
 const selectOptionsSchema = array(nonEmptyString).min(1);
+const optionalNumber = number().optional();
+const evaluationParametersSchema = object({
+    approach: _enum(EvaluationApproach),
+    threshold: optionalNumber,
+});
+const selectEvaluationParametersSchema = evaluationParametersSchema.superRefine((parameters, ctx) => {
+    if (!isApproachAllowedForFieldType('select', parameters.approach)) {
+        ctx.addIssue({
+            code: 'custom',
+            path: ['approach'],
+            message: `select fields only support "${EvaluationApproach.EXACT}" evaluation approach.`,
+        });
+    }
+});
 const defaultExpectedOutcomeBaseSchema = object({
     label: nonEmptyString,
-    required: optionalBoolean,
     placeholder: optionalString,
 });
 const createDefaultExpectedOutcomeFieldSchemas = (baseSchema) => ({
     text: baseSchema.extend({
         type: literal('text'),
+        evaluationParameters: evaluationParametersSchema.optional(),
     }),
     textarea: baseSchema.extend({
         type: literal('textarea'),
         rows: optionalPositiveInt,
+        evaluationParameters: evaluationParametersSchema.optional(),
     }),
     chipsInput: baseSchema.extend({
         type: literal('chips-input'),
+        evaluationParameters: evaluationParametersSchema.optional(),
     }),
     select: baseSchema.extend({
         type: literal('select'),
         options: selectOptionsSchema,
+        evaluationParameters: selectEvaluationParametersSchema.optional(),
     }),
 });
 function hasDuplicateChips(values) {
@@ -5020,33 +5051,16 @@ function validateExpectedOutcomeSchema(schema) {
     }
 }
-const evaluationParametersSchema = object({
-    approach: _enum(EvaluationApproach),
-    threshold: number().optional(),
-});
-const baseTestCaseInputSchema = object({
+const testCaseInputSchema = object({
     id: string(),
     question: string(),
-    evaluationParameters: evaluationParametersSchema.optional(),
-});
-const legacyTestCaseInputSchema = baseTestCaseInputSchema.extend({
-    expectedOutcome: string(),
-});
-const v2TestCaseInputSchema = baseTestCaseInputSchema.extend({
     expectedOutcome: expectedOutcomeArraySchema,
 });
-const testCaseInputSchema = union([
-    legacyTestCaseInputSchema,
-    v2TestCaseInputSchema,
-]);
-const testCaseInputArraySchema = array(testCaseInputSchema).min(1, {
-    message: 'The test suite is empty. Please provide at least one test case.',
-});
+const testCaseInputArraySchema = array(testCaseInputSchema);
 object({
     id: string(),
     question: string(),
     expectedOutcome: expectedOutcomeArraySchema,
-    evaluationParameters: evaluationParametersSchema.optional(),
     output: string().optional(),
     isRunning: boolean().optional(),
     error: string().optional(),
@@ -5097,19 +5111,69 @@ function importTestSuite(jsonContent) {
     }
 }
+function applyExpectedOutcomeChange(testCase, change) {
+    const { index } = change;
+    const expectedOutcome = [...(testCase.expectedOutcome || [])];
+    const target = expectedOutcome[index];
+    if (!target) {
+        return testCase;
+    }
+    switch (change.operation) {
+        case 'set-value': {
+            if (target.type === 'chips-input') {
+                return testCase;
+            }
+            expectedOutcome[index] = {
+                ...target,
+                value: change.value,
+            };
+            return { ...testCase, expectedOutcome };
+        }
+        case 'add-chip': {
+            if (target.type !== 'chips-input') {
+                return testCase;
+            }
+            expectedOutcome[index] = {
+                ...target,
+                value: [...target.value, change.value],
+            };
+            return { ...testCase, expectedOutcome };
+        }
+        case 'remove-chip': {
+            if (target.type !== 'chips-input') {
+                return testCase;
+            }
+            expectedOutcome[index] = {
+                ...target,
+                value: target.value.filter(chip => chip !== change.value),
+            };
+            return { ...testCase, expectedOutcome };
+        }
+        case 'set-evaluation-approach':
+            return updateExpectedOutcomeFieldApproach(testCase, index, change.value);
+    }
+}
 /**
- * Updates the evaluation approach for a test case
- * @param testCase - The test case to update
- * @param approach - The new evaluation approach
- * @returns Updated test case with the new evaluation approach
+ * Updates the evaluation approach for a specific expected outcome field.
+ * Select fields always use exact matching.
  */
-function updateApproach(testCase, approach) {
+function updateExpectedOutcomeFieldApproach(testCase, fieldIndex, approach) {
+    const expectedOutcome = [...(testCase.expectedOutcome || [])];
+    const target = expectedOutcome[fieldIndex];
+    if (!target) {
+        return testCase;
+    }
+    const currentEvaluationParameters = target.evaluationParameters;
+    expectedOutcome[fieldIndex] = {
+        ...target,
+        evaluationParameters: normalizeEvaluationParametersForField(target.type, {
+            ...currentEvaluationParameters,
+            approach,
+        }),
+    };
     return {
         ...testCase,
-        evaluationParameters: {
-            ...testCase.evaluationParameters,
-            approach: approach,
-        },
+        expectedOutcome,
     };
 }
@@ -29555,6 +29619,7 @@ class SemanticEvaluator {
         }
     }
     async performEvaluation(request) {
+        const threshold = request.evaluationParameters?.threshold ?? DEFAULT_SEMANTIC_PASS_SCORE;
         try {
             await this.initialize();
             // Split expectedOutcome by newlines to create keywords array
@@ -29564,7 +29629,7 @@ class SemanticEvaluator {
                     .map(k => k.trim())
                     .filter(k => k.length > 0)
                 : [];
-            const keywordMatches = await evaluateKeywordsSemantically(SemanticEvaluator.extractor, request.actualResponse, expectedKeywords, DEFAULT_SEMANTIC_PASS_SCORE);
+            const keywordMatches = await evaluateKeywordsSemantically(SemanticEvaluator.extractor, request.actualResponse, expectedKeywords, threshold);
             const totalItems = keywordMatches.length;
             // calculate the overall score by averaging the score of the keyword matches
             const keywordScore = keywordMatches.reduce((acc, curr) => acc + curr.evaluationApproachResult.score, 0);
@@ -29572,7 +29637,7 @@ class SemanticEvaluator {
             const passed = keywordMatches.every(match => match.found);
             const evaluationParameters = {
                 approach: EvaluationApproach.SEMANTIC,
-                threshold: DEFAULT_SEMANTIC_PASS_SCORE,
+                threshold,
             };
             return {
                 testCaseId: request.testCaseId,
@@ -29594,7 +29659,7 @@ class SemanticEvaluator {
                 keywordMatches: [],
                 evaluationParameters: {
                     approach: EvaluationApproach.SEMANTIC,
-                    threshold: DEFAULT_SEMANTIC_PASS_SCORE,
+                    threshold,
                 },
                 evaluationApproachResult: {
                     score: 0,
@@ -29861,57 +29926,78 @@ function performBleuEvaluation(request) {
 class LLMEvaluationEngine {
     async evaluateResponse(request, callback) {
-        try {
-            const approach = request.evaluationParameters.approach;
-            switch (approach) {
-                case EvaluationApproach.BLEU: {
-                    const bleuResult = performBleuEvaluation(request);
-                    callback(bleuResult);
-                    break;
-                }
-                case EvaluationApproach.EXACT: {
-                    const exactResult = await performEvaluation(request);
-                    callback(exactResult);
-                    break;
-                }
-                case EvaluationApproach.ROUGE_1: {
-                    const rougeResult = await performRouge1Evaluation(request);
-                    callback(rougeResult);
-                    break;
-                }
-                case EvaluationApproach.ROUGE_L: {
-                    const rougeLResult = await performRougeLEvaluation(request);
-                    callback(rougeLResult);
-                    break;
-                }
-                case EvaluationApproach.SEMANTIC: {
-                    const semanticResult = await performSemanticEvaluation(request);
-                    callback(semanticResult);
-                    break;
-                }
-                default: {
-                    console.warn(`Unknown matching approach: ${request.evaluationParameters.approach}, falling back to exact matching`);
-                    const fallbackResult = await performEvaluation(request);
-                    callback(fallbackResult);
-                }
-            }
-        }
-        catch (error) {
-            console.error('Evaluation failed:', error);
-            const errorResult = {
+        const settledResults = await Promise.allSettled(request.fields.map(async (field) => {
+            const fieldRequest = {
                 testCaseId: request.testCaseId,
+                question: request.question,
+                actualResponse: request.actualResponse,
+                expectedOutcome: field.expectedValue,
+                evaluationParameters: field.evaluationParameters,
+            };
+            const result = await this.evaluateField(fieldRequest);
+            const fieldResult = {
+                index: field.index,
+                label: field.label,
+                type: field.type,
+                expectedValue: field.expectedValue,
+                passed: result.passed,
+                keywordMatches: result.keywordMatches,
+                evaluationParameters: result.evaluationParameters,
+                evaluationApproachResult: result.evaluationApproachResult,
+            };
+            return fieldResult;
+        }));
+        const fieldResults = settledResults.map((settledResult, index) => {
+            const field = request.fields[index];
+            if (settledResult.status === 'fulfilled') {
+                return settledResult.value;
+            }
+            return {
+                index: field.index,
+                label: field.label,
+                type: field.type,
+                expectedValue: field.expectedValue,
                 passed: false,
                 keywordMatches: [],
-                timestamp: new Date().toISOString(),
-                evaluationParameters: request.evaluationParameters,
+                evaluationParameters: field.evaluationParameters,
                 evaluationApproachResult: {
                     score: 0,
-                    approachUsed: EvaluationApproach.EXACT,
+                    approachUsed: field.evaluationParameters.approach,
                 },
+                error: this.getSafeErrorMessage(settledResult.reason),
             };
-            callback(errorResult);
+        });
+        const keywordMatches = fieldResults.flatMap(field => field.keywordMatches);
+        const passed = fieldResults.every(field => field.passed && !field.error);
+        callback({
+            testCaseId: request.testCaseId,
+            passed,
+            keywordMatches,
+            fieldResults,
+            timestamp: new Date().toISOString(),
+        });
+    }
+    async evaluateField(request) {
+        const approach = request.evaluationParameters.approach;
+        switch (approach) {
+            case EvaluationApproach.BLEU:
+                return performBleuEvaluation(request);
+            case EvaluationApproach.EXACT:
+                return performEvaluation(request);
+            case EvaluationApproach.ROUGE_1:
+                return performRouge1Evaluation(request);
+            case EvaluationApproach.ROUGE_L:
+                return performRougeLEvaluation(request);
+            case EvaluationApproach.SEMANTIC:
+                return performSemanticEvaluation(request);
+            default:
+                console.warn(`Unknown matching approach: ${request.evaluationParameters.approach}, falling back to exact matching`);
+                return performEvaluation(request);
         }
     }
+    getSafeErrorMessage(error) {
+        return error instanceof Error ? error.message : 'Field evaluation failed.';
+    }
 }
 /**
@@ -29932,12 +30018,18 @@ class EvaluationService {
             console.warn('⚠️ No output to evaluate for test case:', testCase.id);
             return;
         }
+        const fields = (testCase.expectedOutcome || []).map((field, index) => ({
+            index,
+            label: field.label,
+            type: field.type,
+            expectedValue: getFieldExpectedValue(field),
+            evaluationParameters: normalizeEvaluationParametersForField(field.type, field.evaluationParameters),
+        }));
         const evaluationRequest = {
             testCaseId: testCase.id,
             question: testCase.question,
-            expectedOutcome: serializeExpectedOutcome(testCase.expectedOutcome),
             actualResponse: testCase.output,
-            evaluationParameters: testCase.evaluationParameters,
+            fields,
         };
         await this.engine.evaluateResponse(evaluationRequest, (result) => {
             console.log('📊 Evaluation result received:', result);
@@ -29945,6 +30037,12 @@ class EvaluationService {
         });
     }
 }
+function getFieldExpectedValue(field) {
+    if (field.type === 'chips-input') {
+        return field.value.join(', ');
+    }
+    return field.value;
+}
 const Button = (props, children) => {
     const { variant = 'primary', size = 'md', disabled = false, loading = false, onClick, type = 'button', 'class': className = '', icon, 'aria-label': ariaLabel, } = props;
@@ -29966,7 +30064,7 @@ const Button = (props, children) => {
     return (index.h("button", { type: type, class: classes, disabled: disabled || loading, onClick: onClick, "aria-busy": loading, "aria-label": ariaLabel }, icon && index.h("span", { class: "icon" }, icon), children));
 };
-const LLMTestRunnerHeader = ({ isExportingTestSuite, isExportingTestResults, isRunningAll, useSave = false, isSaving = false, onImport, onExportSuite, onExportResults, onRunAll, onSave, }) => {
+const LLMTestRunnerHeader = ({ isExportingTestSuite, isExportingTestResults, isRunningAll, useSave = false, isSaving = false, usePromptEditor = false, onImport, onExportSuite, onExportResults, onRunAll, onSave, }) => {
     let fileInputRef;
     const handleFileSelect = () => {
         fileInputRef?.click();
@@ -29979,7 +30077,7 @@ const LLMTestRunnerHeader = ({ isExportingTestSuite, isExportingTestResults, isR
             onImport(file);
         }
     };
-    return (index.h("header", { class: "test-runner-header" }, index.h("div", { class: "test-runner-header__left" }, index.h("input", { class: "test-runner-header--hidden", type: "file", ref: el => (fileInputRef = el), onChange: handleFileChange, accept: ".json,application/json" }), index.h(Button, { variant: "secondary", size: "md", onClick: handleFileSelect, icon: "\u2191" }, "Import Test Suite"), index.h(Button, { variant: "secondary", size: "md", onClick: onExportSuite, disabled: isExportingTestSuite, loading: isExportingTestSuite, icon: isExportingTestSuite ? '⏳' : '↓' }, isExportingTestSuite ? 'Exporting...' : 'Export Test Suite')), index.h("div", { class: "test-runner-header__right" }, index.h(Button, { variant: "secondary", size: "md", icon: "\u2699\uFE0F" }, "Prompt Editor"), index.h(Button, { variant: "secondary", size: "md", onClick: onExportResults, disabled: isExportingTestResults, loading: isExportingTestResults, icon: isExportingTestResults ? '⏳' : '↓' }, isExportingTestResults ? 'Exporting...' : 'Export Test Results'), useSave && (index.h(Button, { variant: "secondary", size: "md", onClick: onSave, disabled: isSaving, loading: isSaving, icon: isSaving ? '⏳' : '💾' }, isSaving ? 'Saving...' : 'Save')), index.h(Button, { "aria-label": "Run All", variant: "primary", size: "md", onClick: onRunAll, disabled: isRunningAll, loading: isRunningAll }, isRunningAll ? 'Running...' : 'Run All'))));
+    return (index.h("header", { class: "test-runner-header" }, index.h("div", { class: "test-runner-header__left" }, index.h("input", { class: "test-runner-header--hidden", type: "file", ref: el => (fileInputRef = el), onChange: handleFileChange, accept: ".json,application/json" }), index.h(Button, { variant: "secondary", size: "md", onClick: handleFileSelect, icon: "\u2191" }, "Import Test Suite"), index.h(Button, { variant: "secondary", size: "md", onClick: onExportSuite, disabled: isExportingTestSuite, loading: isExportingTestSuite, icon: isExportingTestSuite ? '⏳' : '↓' }, isExportingTestSuite ? 'Exporting...' : 'Export Test Suite')), index.h("div", { class: "test-runner-header__right" }, usePromptEditor && (index.h(Button, { variant: "secondary", size: "md", icon: "\u2699\uFE0F" }, "Prompt Editor")), index.h(Button, { variant: "secondary", size: "md", onClick: onExportResults, disabled: isExportingTestResults, loading: isExportingTestResults, icon: isExportingTestResults ? '⏳' : '↓' }, isExportingTestResults ? 'Exporting...' : 'Export Test Results'), useSave && (index.h(Button, { variant: "secondary", size: "md", onClick: onSave, disabled: isSaving, loading: isSaving, icon: isSaving ? '⏳' : '💾' }, isSaving ? 'Saving...' : 'Save')), index.h(Button, { "aria-label": "Run All", variant: "primary", size: "md", onClick: onRunAll, disabled: isRunningAll, loading: isRunningAll }, isRunningAll ? 'Running...' : 'Run All'))));
 };
 const ResponseOutput = ({ output, isRunning, }) => {
@@ -29987,7 +30085,9 @@ const ResponseOutput = ({ output, isRunning, }) => {
 };
 const EvaluationSummary = ({ result, isRunning, }) => {
-    return (index.h("div", { class: "evaluation-summary" }, result ? (index.h("div", { class: "evaluation-summary__result" }, index.h("div", { class: `evaluation-summary__result-status evaluation-summary__result-status--${result.passed ? 'passed' : 'failed'}` }, result.passed ? '✅ PASSED' : '❌ FAILED'), index.h("div", { class: "evaluation-summary__details" }, "Keywords: ", result.keywordMatches.filter(m => m.found).length, "/", result.keywordMatches.length, " found"))) : (index.h("div", { class: "evaluation-summary__placeholder" }, isRunning ? 'Evaluating...' : ''))));
+    const fieldResults = result?.fieldResults || [];
+    const hasFieldResults = fieldResults.length > 0;
+    return (index.h("div", { class: "evaluation-summary" }, result ? (index.h("div", { class: "evaluation-summary__result" }, hasFieldResults ? (index.h("div", { class: "evaluation-summary__field-results" }, fieldResults.map(fieldResult => (index.h("div", { class: "evaluation-summary__field-result" }, index.h("div", { class: "evaluation-summary__field-header" }, index.h("span", { class: "evaluation-summary__field-label" }, fieldResult.label), index.h("span", { class: "evaluation-summary__field-approach" }, "Strategy: ", fieldResult.evaluationParameters.approach)), index.h("div", { class: "evaluation-summary__field-details" }, index.h("span", { class: `evaluation-summary__field-status evaluation-summary__field-status--${fieldResult.passed ? 'passed' : 'failed'}` }, fieldResult.passed ? 'PASSED' : 'FAILED'), fieldResult.error && (index.h("span", { class: "evaluation-summary__error-message" }, fieldResult.error)), index.h("span", null, "Score: ", fieldResult.evaluationApproachResult.score.toFixed(2)), index.h("span", null, "Matches:", ' ', fieldResult.keywordMatches.filter(match => match.found).length, "/", fieldResult.keywordMatches.length))))))) : null)) : (index.h("div", { class: "evaluation-summary__placeholder" }, isRunning ? 'Evaluating...' : ''))));
 };
 const IconButton = (props, children) => {
@@ -30023,6 +30123,24 @@ const ExpectedOutcomeRenderer = ({ testCaseId, fields, onExpectedOutcomeChange,
     const emit = (detail) => onExpectedOutcomeChange({
         detail,
     });
+    const buildEvaluationConfig = (index, optionList) => ({
+        name: `expectedOutcomeEvaluation-${index}`,
+        fieldType: FormFieldType.SELECT,
+        label: 'Evaluation Approach',
+        placeholder: 'Select evaluation approach…',
+        required: true,
+        optionList,
+        defaultValue: EvaluationApproach.EXACT,
+    });
+    const renderEvaluationSelector = (field, index$1) => {
+        const optionList = getAllowedApproachesForFieldType(field.type);
+        return (index.h("app-select", { config: buildEvaluationConfig(index$1, optionList), value: field.evaluationParameters?.approach, onValueChange: (e) => emit({
+                testCaseId,
+                index: index$1,
+                operation: 'set-evaluation-approach',
+                value: e.detail.value,
+            }) }));
+    };
     return (index.h("div", { class: "expected-outcome-renderer" }, (fields || []).map((field, index$1) => {
         if (field.type === 'textarea') {
             const config = {
@@ -30030,15 +30148,15 @@ const ExpectedOutcomeRenderer = ({ testCaseId, fields, onExpectedOutcomeChange,
                 fieldType: FormFieldType.TEXT_AREA,
                 label: field.label,
                 placeholder: field.placeholder,
-                required: field.required,
+                required: true,
                 rows: field.rows || 2,
             };
-            return (index.h("app-textarea", { config: config, value: field.value, onValueChange: (e) => emit({
+            return (index.h("div", { class: "expected-outcome-renderer__group" }, index.h("app-textarea", { config: config, value: field.value, onValueChange: (e) => emit({
                     testCaseId,
                     index: index$1,
                     operation: 'set-value',
                     value: e.detail.value,
-                }) }));
+                }) }), renderEvaluationSelector(field, index$1)));
         }
         if (field.type === 'chips-input') {
             const config = {
@@ -30046,9 +30164,9 @@ const ExpectedOutcomeRenderer = ({ testCaseId, fields, onExpectedOutcomeChange,
                 fieldType: FormFieldType.CHIPS,
                 label: field.label,
                 placeholder: field.placeholder,
-                required: field.required,
+                required: true,
             };
-            return (index.h("app-chips", { config: config, value: field.value, onAddChip: (e) => emit({
+            return (index.h("div", { class: "expected-outcome-renderer__group" }, index.h("app-chips", { config: config, value: field.value, onAddChip: (e) => emit({
                     testCaseId,
                     index: index$1,
                     operation: 'add-chip',
@@ -30058,7 +30176,7 @@ const ExpectedOutcomeRenderer = ({ testCaseId, fields, onExpectedOutcomeChange,
                     index: index$1,
                     operation: 'remove-chip',
                     value: e.detail.value,
-                }) }));
+                }) }), renderEvaluationSelector(field, index$1)));
         }
         if (field.type === 'select') {
             const config = {
@@ -30066,26 +30184,26 @@ const ExpectedOutcomeRenderer = ({ testCaseId, fields, onExpectedOutcomeChange,
                 fieldType: FormFieldType.SELECT,
                 label: field.label,
                 placeholder: field.placeholder,
-                required: field.required,
+                required: true,
                 optionList: field.options,
             };
-            return (index.h("app-select", { config: config, value: field.value, onValueChange: (e) => emit({
+            return (index.h("div", { class: "expected-outcome-renderer__group" }, index.h("app-select", { config: config, value: field.value, onValueChange: (e) => emit({
                     testCaseId,
                     index: index$1,
                     operation: 'set-value',
                     value: e.detail.value,
-                }) }));
+                }) }), renderEvaluationSelector(field, index$1)));
         }
-        return (index.h("div", { class: "expected-outcome-renderer__text" }, index.h("label", null, field.label), index.h("input", { type: "text", value: field.value, placeholder: field.placeholder, onInput: (e) => emit({
+        return (index.h("div", { class: "expected-outcome-renderer__group" }, index.h("div", { class: "expected-outcome-renderer__text" }, index.h("label", null, field.label), index.h("input", { type: "text", value: field.value, placeholder: field.placeholder, onInput: (e) => emit({
                 testCaseId,
                 index: index$1,
                 operation: 'set-value',
                 value: e.target.value,
-            }) })));
+            }) })), renderEvaluationSelector(field, index$1)));
     })));
 };
-const LLMTestCaseRow = ({ testCase, onRun, onDelete, onUpdateApproach, handleTestCaseChange, onExpectedOutcomeChange, }) => {
+const LLMTestCaseRow = ({ testCase, onRun, onDelete, handleTestCaseChange, onExpectedOutcomeChange, }) => {
     const questionConfig = {
         name: 'question',
         fieldType: FormFieldType.TEXT_AREA,
@@ -30095,26 +30213,17 @@ const LLMTestCaseRow = ({ testCase, onRun, onDelete, onUpdateApproach, handleTes
         required: true,
         rows: 3,
     };
-    const evaluationConfig = {
-        name: 'EvaluationApproach',
-        fieldType: FormFieldType.SELECT,
-        label: 'Evaluation',
-        placeholder: 'Select evaluation approach…',
-        required: true,
-        optionList: EvaluationApproachValues,
-        defaultValue: EvaluationApproach.EXACT,
-    };
     return (index.h("div", { class: "test-case-row", key: testCase.id }, index.h("div", { class: "test-case-row__input-column" }, index.h("app-textarea", { config: questionConfig, value: testCase.question, onValueChange: (e) => handleTestCaseChange({
             detail: {
                 testCaseId: testCase.id,
                 key: 'question',
                 value: e.detail.value,
             },
-        }) }), index.h(ExpectedOutcomeRenderer, { testCaseId: testCase.id, fields: testCase.expectedOutcome || [], onExpectedOutcomeChange: onExpectedOutcomeChange }), index.h("app-select", { config: evaluationConfig, value: testCase.evaluationParameters?.approach, onValueChange: (e) => onUpdateApproach(testCase, e.detail.value) })), index.h(ResponseOutput, { output: testCase.output, isRunning: testCase.isRunning }), index.h(EvaluationSummary, { result: testCase.evaluationResult, isRunning: testCase.isRunning }), index.h(RowActions, { isRunning: testCase.isRunning, canRun: !!testCase.question.trim(), onRun: () => onRun(testCase), onDelete: () => onDelete(testCase.id) })));
+        }) }), index.h(ExpectedOutcomeRenderer, { testCaseId: testCase.id, fields: testCase.expectedOutcome || [], onExpectedOutcomeChange: onExpectedOutcomeChange })), index.h(ResponseOutput, { output: testCase.output, isRunning: testCase.isRunning }), index.h(EvaluationSummary, { result: testCase.evaluationResult, isRunning: testCase.isRunning }), index.h(RowActions, { isRunning: testCase.isRunning, canRun: !!testCase.question.trim(), onRun: () => onRun(testCase), onDelete: () => onDelete(testCase.id) })));
 };
-const LLMTestCases = ({ testCases, onRun, onDelete, onUpdateApproach, onAddTestCase, handleTestCaseChange, onExpectedOutcomeChange, }) => {
-    return (index.h("div", { class: "test-cases" }, index.h("div", { class: "test-cases__column-headers" }, index.h("div", { class: "test-cases__column-header" }, "Input"), index.h("div", { class: "test-cases__column-header" }, "Output"), index.h("div", { class: "test-cases__column-header" }, "Evaluation"), index.h("div", { class: "test-cases__column-header" }, "Actions")), testCases.map(testCase => (index.h(LLMTestCaseRow, { testCase: testCase, onRun: onRun, onDelete: onDelete, onUpdateApproach: onUpdateApproach, handleTestCaseChange: handleTestCaseChange, onExpectedOutcomeChange: onExpectedOutcomeChange }))), index.h("div", { class: "test-cases__add-section" }, index.h(Button, { variant: "outline", size: "md", onClick: onAddTestCase }, "+ Add Question"))));
+const LLMTestCases = ({ testCases, onRun, onDelete, onAddTestCase, handleTestCaseChange, onExpectedOutcomeChange, }) => {
+    return (index.h("div", { class: "test-cases" }, index.h("div", { class: "test-cases__column-headers" }, index.h("div", { class: "test-cases__column-header" }, "Input"), index.h("div", { class: "test-cases__column-header" }, "Output"), index.h("div", { class: "test-cases__column-header" }, "Evaluation"), index.h("div", { class: "test-cases__column-header" }, "Actions")), testCases.map(testCase => (index.h(LLMTestCaseRow, { testCase: testCase, onRun: onRun, onDelete: onDelete, handleTestCaseChange: handleTestCaseChange, onExpectedOutcomeChange: onExpectedOutcomeChange }))), index.h("div", { class: "test-cases__add-section" }, index.h(Button, { variant: "outline", size: "md", onClick: onAddTestCase }, "+ Add Question"))));
 };
 const tokensCss = () => `:host{--spacing:0.25rem;--spacing-1:calc(var(--spacing) * 1);--spacing-2:calc(var(--spacing) * 2);--spacing-3:calc(var(--spacing) * 3);--spacing-4:calc(var(--spacing) * 4);--spacing-5:calc(var(--spacing) * 5);--spacing-6:calc(var(--spacing) * 6);--spacing-8:calc(var(--spacing) * 8);--spacing-10:calc(var(--spacing) * 10);--spacing-12:calc(var(--spacing) * 12);--spacing-16:calc(var(--spacing) * 16);--spacing-20:calc(var(--spacing) * 20);--spacing-24:calc(var(--spacing) * 24);--radius-none:0;--radius-sm:0.125rem;--radius-md:0.375rem;--radius-lg:0.5rem;--radius-xl:0.75rem;--radius-2xl:1rem;--radius-3xl:1.5rem;--radius-full:9999px;--radius:var(--radius-lg);--font-size-xs:0.75rem;--font-size-sm:0.875rem;--font-size-base:1rem;--font-size-lg:1.125rem;--font-size-xl:1.25rem;--font-size-2xl:1.5rem;--font-size-3xl:1.875rem;--font-size-4xl:2.25rem;--font-weight-normal:400;--font-weight-medium:500;--font-weight-semibold:600;--font-weight-bold:700;--line-height-none:1;--line-height-tight:1.25;--line-height-snug:1.375;--line-height-normal:1.5;--line-height-relaxed:1.625;--line-height-loose:2;--letter-spacing-tight:-0.025em;--letter-spacing-normal:0;--letter-spacing-wide:0.05em;--shadow-sm:0 1px 2px 0 rgba(0, 0, 0, 0.05);--shadow-md:0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06);--shadow-lg:0 10px 15px -3px rgba(0, 0, 0, 0.1), 0 4px 6px -2px rgba(0, 0, 0, 0.05);--shadow-xl:0 20px 25px -5px rgba(0, 0, 0, 0.1), 0 10px 10px -5px rgba(0, 0, 0, 0.04);--shadow-2xl:0 25px 50px -12px rgba(0, 0, 0, 0.25);--border-width:1px;--z-base:0;--z-dropdown:1000;--z-sticky:1100;--z-modal:1200;--z-popover:1300;--z-tooltip:1400;--opacity-disabled:0.5;--opacity-hover:0.8;--opacity-muted:0.6;--max-w-sm:24rem;--max-w-md:28rem;--max-w-lg:32rem;--max-w-xl:42rem;--max-w-2xl:48rem;--max-w-full:100%;--breakpoint-sm:640px;--breakpoint-md:768px;--breakpoint-lg:1024px;--breakpoint-xl:1280px;--breakpoint-2xl:1536px;--background:#ffffff;--foreground:#0a0a0a;--card:#ffffff;--card-foreground:#0a0a0a;--popover:#ffffff;--popover-foreground:#0a0a0a;--primary:#0a0a0a;--primary-foreground:#fafafa;--secondary:#f4f4f5;--secondary-foreground:#0a0a0a;--muted:#f4f4f5;--muted-foreground:#71717a;--accent:#f4f4f5;--accent-foreground:#0a0a0a;--destructive:#ef4444;--destructive-foreground:#fafafa;--border:#e4e4e7;--input:#e4e4e7;--ring:#3b82f6;--success:#10b981;--success-foreground:#fafafa;--warning:#f59e0b;--warning-foreground:#fafafa;--info:#3b82f6;--info-foreground:#fafafa}:host([data-theme='dark']){--background:#0a0a0a;--foreground:#fafafa;--card:#171717;--card-foreground:#fafafa;--popover:#171717;--popover-foreground:#fafafa;--primary:#fafafa;--primary-foreground:#0a0a0a;--secondary:#27272a;--secondary-foreground:#fafafa;--muted:#27272a;--muted-foreground:#a1a1aa;--accent:#27272a;--accent-foreground:#fafafa;--destructive:#dc2626;--destructive-foreground:#fafafa;--border:#27272a;--input:#27272a;--ring:#3b82f6;--success:#059669;--success-foreground:#fafafa;--warning:#d97706;--warning-foreground:#fafafa;--info:#2563eb;--info-foreground:#fafafa}`;
@@ -30125,11 +30234,11 @@ const llmTestRunnerHeaderCss = () => `.test-runner-header{display:flex;justify-c
 const llmTestCasesCss = () => `.test-cases{background:var(--background)}.test-cases__column-headers{display:grid;grid-template-columns:1fr 1.5fr 0.5fr 120px;gap:var(--border-width);background:var(--border);border-bottom:2px solid var(--border)}.test-cases__column-header{background:var(--muted);padding:var(--spacing-4) var(--spacing-5);font-weight:var(--font-weight-semibold);color:var(--foreground);font-size:var(--font-size-sm);text-transform:uppercase;letter-spacing:var(--letter-spacing-wide)}.test-cases__add-section{padding:var(--spacing-6);text-align:center;background:var(--muted);border-top:var(--border-width) solid var(--border)}@media (max-width: 1200px){.test-cases__column-headers{display:none}}`;
-const llmTestCaseRowCss = () => `.test-case-row{display:grid;grid-template-columns:1fr 1.5fr 0.5fr 120px;gap:var(--border-width);border-bottom:var(--border-width) solid var(--border);min-height:200px}.test-case-row:hover{background:var(--muted)}.test-case-row__input-column{padding:var(--spacing-5);background:var(--background);border-right:var(--border-width) solid var(--border)}@media (max-width: 1200px){.test-case-row{grid-template-columns:1fr;gap:0}.test-case-row__input-column{border-right:none;border-bottom:var(--border-width) solid var(--border)}}@media (max-width: 768px){.test-case-row__input-column{padding:var(--spacing-4)}.test-case-row{min-height:auto}}`;
+const llmTestCaseRowCss = () => `.test-case-row{display:grid;grid-template-columns:1fr 1.5fr 0.5fr 120px;gap:var(--border-width);border-bottom:var(--border-width) solid var(--border);min-height:200px}.test-case-row:hover{background:var(--muted)}.test-case-row__input-column{padding:var(--spacing-5);background:var(--background);border-right:var(--border-width) solid var(--border)}.expected-outcome-renderer{display:flex;flex-direction:column;gap:var(--spacing-4);margin-top:var(--spacing-4)}.expected-outcome-renderer__group{display:flex;flex-direction:column;gap:var(--spacing-2);padding:var(--spacing-3);border:var(--border-width) solid var(--border);border-radius:var(--radius-md);background:var(--background)}@media (max-width: 1200px){.test-case-row{grid-template-columns:1fr;gap:0}.test-case-row__input-column{border-right:none;border-bottom:var(--border-width) solid var(--border)}}@media (max-width: 768px){.test-case-row__input-column{padding:var(--spacing-4)}.test-case-row{min-height:auto}}`;
 const rowActionsCss = () => `.row-actions{height:100%;padding:var(--spacing-5);background:var(--background);display:flex;flex-direction:column;gap:var(--spacing-3);align-items:center;justify-content:flex-start;align-self:flex-start}@media (max-width: 1200px){.row-actions{border-right:none;border-bottom:var(--border-width) solid var(--border);flex-direction:row;justify-content:center}}@media (max-width: 768px){.row-actions{padding:var(--spacing-4)}}`;
-const evaluationSummaryCss = () => `.evaluation-summary{padding:var(--spacing-5);background:var(--background);border-right:var(--border-width) solid var(--border);display:flex;flex-direction:column}.evaluation-summary__details{display:flex;flex-direction:column;gap:var(--spacing-2)}.evaluation-summary__placeholder{display:flex;align-items:center;justify-content:center;color:var(--muted-foreground);font-style:italic;flex:1;background:var(--muted);border:2px dashed var(--border);border-radius:var(--radius)}.evaluation-summary__result{display:flex;flex-direction:column;gap:var(--spacing-2)}.evaluation-summary__result-status{font-weight:var(--font-weight-semibold);font-size:var(--font-size-sm);padding:var(--spacing-2) var(--spacing-3);border-radius:var(--radius-md);text-align:center}.evaluation-summary__result-status--passed{background:var(--success);color:var(--success-foreground);border:var(--border-width) solid var(--success)}.evaluation-summary__result-status--failed{background:var(--destructive);color:var(--destructive-foreground);border:var(--border-width) solid var(--destructive)}@media (max-width: 1200px){.evaluation-summary{border-right:none;border-bottom:var(--border-width) solid var(--border)}}@media (max-width: 768px){.evaluation-summary{padding:var(--spacing-4)}}`;
+const evaluationSummaryCss = () => `.evaluation-summary{padding:var(--spacing-5);background:var(--background);border-right:var(--border-width) solid var(--border);display:flex;flex-direction:column}.evaluation-summary__field-results{display:flex;flex-direction:column;gap:var(--spacing-2);margin-top:var(--spacing-2)}.evaluation-summary__field-result{border:var(--border-width) solid var(--border);border-radius:var(--radius-md);padding:var(--spacing-2);display:flex;flex-direction:column;gap:var(--spacing-1)}.evaluation-summary__field-header{display:flex;flex-direction:column;gap:var(--spacing-1)}.evaluation-summary__field-label{font-weight:var(--font-weight-semibold);font-size:var(--font-size-xs)}.evaluation-summary__field-approach{color:var(--muted-foreground);font-size:11px}.evaluation-summary__field-details{display:flex;flex-direction:column;gap:var(--spacing-1);font-size:var(--font-size-xs)}.evaluation-summary__field-status{width:fit-content;padding:2px var(--spacing-2);border-radius:var(--radius-sm);font-size:11px;font-weight:var(--font-weight-semibold);border:var(--border-width) solid transparent}.evaluation-summary__field-status--passed{background:var(--success);color:var(--success-foreground);border-color:var(--success)}.evaluation-summary__field-status--failed{background:var(--destructive);color:var(--destructive-foreground);border-color:var(--destructive)}.evaluation-summary__error-message{color:var(--destructive);font-size:var(--font-size-xs)}.evaluation-summary__placeholder{display:flex;align-items:center;justify-content:center;color:var(--muted-foreground);font-style:italic;flex:1;background:var(--muted);border:2px dashed var(--border);border-radius:var(--radius)}.evaluation-summary__result{display:flex;flex-direction:column;gap:var(--spacing-2)}@media (max-width: 1200px){.evaluation-summary{border-right:none;border-bottom:var(--border-width) solid var(--border)}}@media (max-width: 768px){.evaluation-summary{padding:var(--spacing-4)}}`;
 const responseOutputCss = () => `.response-output{padding:var(--spacing-5);background:var(--background);border-right:var(--border-width) solid var(--border);display:flex;flex-direction:column}.response-output__content{background:var(--muted);border:var(--border-width) solid var(--border);border-radius:var(--radius);padding:var(--spacing-4);font-size:var(--font-size-sm);line-height:var(--line-height-relaxed);color:var(--foreground);white-space:pre-wrap;word-wrap:break-word;flex:1;overflow-y:auto;max-height:250px;overflow-x:scroll}.response-output__placeholder{display:flex;align-items:center;justify-content:center;color:var(--muted-foreground);font-style:italic;flex:1;background:var(--muted);border:2px dashed var(--border);border-radius:var(--radius)}@media (max-width: 1200px){.response-output{border-right:none;border-bottom:var(--border-width) solid var(--border)}}@media (max-width: 768px){.response-output{padding:var(--spacing-4)}}`;
@@ -30149,6 +30258,7 @@ const LLMTestRunner = class {
     save;
     delayMs = 500;
     useSave = false;
+    usePromptEditor = false;
     initialTestCases;
     defaultExpectedOutcomeSchema;
     testCases = [
@@ -30162,9 +30272,6 @@ const LLMTestRunner = class {
                     value: '',
                 },
             ],
-            evaluationParameters: {
-                approach: EvaluationApproach.EXACT,
-            },
             isRunning: false,
         },
     ];
@@ -30271,52 +30378,13 @@ const LLMTestRunner = class {
     deleteTestCase(id) {
         this.testCases = this.testCases.filter(tc => tc.id !== id);
     }
-    updateApproach(testCase, approach) {
-        if (testCase) {
-            const updated = updateApproach(testCase, approach);
-            this.updateTestCase(testCase.id, {
-                evaluationParameters: updated.evaluationParameters,
-            });
-        }
-    }
     handleExpectedOutcomeChange = (event) => {
-        const { testCaseId, index, operation, value } = event.detail;
+        const { testCaseId, ...change } = event.detail;
         this.testCases = this.testCases.map(tc => {
-            if (tc.id !== testCaseId)
-                return tc;
-            const expectedOutcome = [...(tc.expectedOutcome || [])];
-            const target = expectedOutcome[index];
-            if (!target)
+            if (tc.id !== testCaseId) {
                 return tc;
-            if (operation === 'set-value') {
-                if (target.type === 'chips-input') {
-                    return tc;
-                }
-                expectedOutcome[index] = { ...target, value: value || '' };
-                return { ...tc, expectedOutcome };
-            }
-            if (operation === 'add-chip') {
-                if (target.type !== 'chips-input' || !value) {
-                    return tc;
-                }
-                expectedOutcome[index] = {
-                    ...target,
-                    value: [...target.value, value],
-                };
-                return { ...tc, expectedOutcome };
-            }
-            if (operation === 'remove-chip') {
-                if (target.type !== 'chips-input' ||
-                    !value) {
-                    return tc;
-                }
-                expectedOutcome[index] = {
-                    ...target,
-                    value: target.value.filter(chip => chip !== value),
-                };
-                return { ...tc, expectedOutcome };
             }
-            return tc;
+            return applyExpectedOutcomeChange(tc, change);
         });
     };
     async evaluateResponse(testCase) {
@@ -30416,7 +30484,7 @@ const LLMTestRunner = class {
         }
     }
     render() {
-        return (index.h("div", { key: '5cbdc388678929c271fd2a040aca8118344024c3', class: "test-runner-container" }, index.h(LLMTestRunnerHeader, { key: '92533803732fc5ec28da802ac9d367f9fbbffe72', isExportingTestSuite: this.isExportingTestSuite, isExportingTestResults: this.isExportingTestResults, isRunningAll: this.isRunningAll, useSave: this.useSave, isSaving: this.isSaving, onImport: file => this.handleImport(file), onExportSuite: () => this.handleExportTestSuite(), onExportResults: () => this.handleExportTestResults(), onRunAll: () => this.runAllTests(), onSave: () => this.handleSave() }), index.h(ErrorMessage, { key: 'c16a0334b1a71d676a128de18a83991c2625a075', message: this.error, onClear: () => (this.error = '') }), index.h("div", { key: 'e757f49052a9516c12af858b46b32a957707524c', class: "test-runner-container__content" }, index.h(LLMTestCases, { key: 'e9a9f6553a3ce97aeb80924b116e1b73c2397b15', testCases: this.testCases, onRun: testCase => this.runSingleTest(testCase).catch(() => { }), onDelete: id => this.deleteTestCase(id), onUpdateApproach: (testCase, approach) => this.updateApproach(testCase, approach), onAddTestCase: () => this.addNewTestCase(), handleTestCaseChange: this.handleTestCaseChange, onExpectedOutcomeChange: this.handleExpectedOutcomeChange }))));
+        return (index.h("div", { key: '323b5e140740bb72d4767c0763c382a6b125caa2', class: "test-runner-container" }, index.h(LLMTestRunnerHeader, { key: 'e1e2efdf6cfe5f406de7e26e745b5775f307d294', isExportingTestSuite: this.isExportingTestSuite, isExportingTestResults: this.isExportingTestResults, isRunningAll: this.isRunningAll, useSave: this.useSave, isSaving: this.isSaving, usePromptEditor: this.usePromptEditor, onImport: file => this.handleImport(file), onExportSuite: () => this.handleExportTestSuite(), onExportResults: () => this.handleExportTestResults(), onRunAll: () => this.runAllTests(), onSave: () => this.handleSave() }), index.h(ErrorMessage, { key: 'c6a34b81f66c6cd835eb8bc253f7a28d68c49874', message: this.error, onClear: () => (this.error = '') }), index.h("div", { key: '674daad8a2754afc8144463e9a173690a3d1d589', class: "test-runner-container__content" }, index.h(LLMTestCases, { key: '96c1aeae37f56378b7a9b5d54be73c5df48ae448', testCases: this.testCases, onRun: testCase => this.runSingleTest(testCase).catch(() => { }), onDelete: id => this.deleteTestCase(id), onAddTestCase: () => this.addNewTestCase(), handleTestCaseChange: this.handleTestCaseChange, onExpectedOutcomeChange: this.handleExpectedOutcomeChange }))));
     }
 };
 LLMTestRunner.style = tokensCss() + (llmTestRunnerCss() + (llmTestRunnerHeaderCss() + (llmTestCasesCss() + (llmTestCaseRowCss() + (rowActionsCss() + (evaluationSummaryCss() + (responseOutputCss() + (errorMessageCss() + (buttonCss() + iconButtonCss())))))))));