npm - llm-testrunner-components - Versions diffs - 1.2.4 → 1.3.0 - Mend

llm-testrunner-components 1.2.4 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (78) hide show

package/dist/cjs/index.cjs.js CHANGED Viewed

@@ -295,6 +295,7 @@ const DEFAULT_EXPECTED_OUTCOME_SCHEMA = [
 function normalizeExpectedOutcomeField(field) {
     return {
         ...field,
+        evaluationSource: field.evaluationSource || { type: 'text' },
         evaluationParameters: normalizeEvaluationParametersForField(field.type, field.evaluationParameters),
     };
 }
@@ -318,6 +319,7 @@ function createExpectedOutcomeFieldFromSchema(schemaField) {
                 type: 'text',
                 label: schemaField.label,
                 placeholder: schemaField.placeholder,
+                evaluationSource: schemaField.evaluationSource || { type: 'text' },
                 value: '',
                 evaluationParameters: normalizeEvaluationParametersForField(schemaField.type, schemaField.evaluationParameters),
             };
@@ -326,6 +328,7 @@ function createExpectedOutcomeFieldFromSchema(schemaField) {
                 type: 'textarea',
                 label: schemaField.label,
                 placeholder: schemaField.placeholder,
+                evaluationSource: schemaField.evaluationSource || { type: 'text' },
                 rows: schemaField.rows,
                 value: '',
                 evaluationParameters: normalizeEvaluationParametersForField(schemaField.type, schemaField.evaluationParameters),
@@ -335,6 +338,7 @@ function createExpectedOutcomeFieldFromSchema(schemaField) {
                 type: 'chips-input',
                 label: schemaField.label,
                 placeholder: schemaField.placeholder,
+                evaluationSource: schemaField.evaluationSource || { type: 'text' },
                 value: [],
                 evaluationParameters: normalizeEvaluationParametersForField(schemaField.type, schemaField.evaluationParameters),
             };
@@ -343,6 +347,7 @@ function createExpectedOutcomeFieldFromSchema(schemaField) {
                 type: 'select',
                 label: schemaField.label,
                 placeholder: schemaField.placeholder,
+                evaluationSource: schemaField.evaluationSource || { type: 'text' },
                 value: schemaField.options[0],
                 options: schemaField.options,
                 evaluationParameters: normalizeEvaluationParametersForField(schemaField.type, schemaField.evaluationParameters),
@@ -2572,6 +2577,122 @@ function handleIntersectionResults(result, left, right) {
     result.value = merged.data;
     return result;
 }
+const $ZodRecord = /*@__PURE__*/ $constructor("$ZodRecord", (inst, def) => {
+    $ZodType.init(inst, def);
+    inst._zod.parse = (payload, ctx) => {
+        const input = payload.value;
+        if (!isPlainObject(input)) {
+            payload.issues.push({
+                expected: "record",
+                code: "invalid_type",
+                input,
+                inst,
+            });
+            return payload;
+        }
+        const proms = [];
+        const values = def.keyType._zod.values;
+        if (values) {
+            payload.value = {};
+            const recordKeys = new Set();
+            for (const key of values) {
+                if (typeof key === "string" || typeof key === "number" || typeof key === "symbol") {
+                    recordKeys.add(typeof key === "number" ? key.toString() : key);
+                    const result = def.valueType._zod.run({ value: input[key], issues: [] }, ctx);
+                    if (result instanceof Promise) {
+                        proms.push(result.then((result) => {
+                            if (result.issues.length) {
+                                payload.issues.push(...prefixIssues(key, result.issues));
+                            }
+                            payload.value[key] = result.value;
+                        }));
+                    }
+                    else {
+                        if (result.issues.length) {
+                            payload.issues.push(...prefixIssues(key, result.issues));
+                        }
+                        payload.value[key] = result.value;
+                    }
+                }
+            }
+            let unrecognized;
+            for (const key in input) {
+                if (!recordKeys.has(key)) {
+                    unrecognized = unrecognized ?? [];
+                    unrecognized.push(key);
+                }
+            }
+            if (unrecognized && unrecognized.length > 0) {
+                payload.issues.push({
+                    code: "unrecognized_keys",
+                    input,
+                    inst,
+                    keys: unrecognized,
+                });
+            }
+        }
+        else {
+            payload.value = {};
+            for (const key of Reflect.ownKeys(input)) {
+                if (key === "__proto__")
+                    continue;
+                let keyResult = def.keyType._zod.run({ value: key, issues: [] }, ctx);
+                if (keyResult instanceof Promise) {
+                    throw new Error("Async schemas not supported in object keys currently");
+                }
+                // Numeric string fallback: if key is a numeric string and failed, retry with Number(key)
+                // This handles z.number(), z.literal([1, 2, 3]), and unions containing numeric literals
+                const checkNumericKey = typeof key === "string" && number$1.test(key) && keyResult.issues.length;
+                if (checkNumericKey) {
+                    const retryResult = def.keyType._zod.run({ value: Number(key), issues: [] }, ctx);
+                    if (retryResult instanceof Promise) {
+                        throw new Error("Async schemas not supported in object keys currently");
+                    }
+                    if (retryResult.issues.length === 0) {
+                        keyResult = retryResult;
+                    }
+                }
+                if (keyResult.issues.length) {
+                    if (def.mode === "loose") {
+                        // Pass through unchanged
+                        payload.value[key] = input[key];
+                    }
+                    else {
+                        // Default "strict" behavior: error on invalid key
+                        payload.issues.push({
+                            code: "invalid_key",
+                            origin: "record",
+                            issues: keyResult.issues.map((iss) => finalizeIssue(iss, ctx, config())),
+                            input: key,
+                            path: [key],
+                            inst,
+                        });
+                    }
+                    continue;
+                }
+                const result = def.valueType._zod.run({ value: input[key], issues: [] }, ctx);
+                if (result instanceof Promise) {
+                    proms.push(result.then((result) => {
+                        if (result.issues.length) {
+                            payload.issues.push(...prefixIssues(key, result.issues));
+                        }
+                        payload.value[keyResult.value] = result.value;
+                    }));
+                }
+                else {
+                    if (result.issues.length) {
+                        payload.issues.push(...prefixIssues(key, result.issues));
+                    }
+                    payload.value[keyResult.value] = result.value;
+                }
+            }
+        }
+        if (proms.length) {
+            return Promise.all(proms).then(() => payload);
+        }
+        return payload;
+    };
+});
 const $ZodEnum = /*@__PURE__*/ $constructor("$ZodEnum", (inst, def) => {
     $ZodType.init(inst, def);
     const values = getEnumValues(def.entries);
@@ -4155,6 +4276,49 @@ const intersectionProcessor = (schema, ctx, json, params) => {
     ];
     json.allOf = allOf;
 };
+const recordProcessor = (schema, ctx, _json, params) => {
+    const json = _json;
+    const def = schema._zod.def;
+    json.type = "object";
+    // For looseRecord with regex patterns, use patternProperties
+    // This correctly represents "only validate keys matching the pattern" semantics
+    // and composes well with allOf (intersections)
+    const keyType = def.keyType;
+    const keyBag = keyType._zod.bag;
+    const patterns = keyBag?.patterns;
+    if (def.mode === "loose" && patterns && patterns.size > 0) {
+        // Use patternProperties for looseRecord with regex patterns
+        const valueSchema = process$1(def.valueType, ctx, {
+            ...params,
+            path: [...params.path, "patternProperties", "*"],
+        });
+        json.patternProperties = {};
+        for (const pattern of patterns) {
+            json.patternProperties[pattern.source] = valueSchema;
+        }
+    }
+    else {
+        // Default behavior: use propertyNames + additionalProperties
+        if (ctx.target === "draft-07" || ctx.target === "draft-2020-12") {
+            json.propertyNames = process$1(def.keyType, ctx, {
+                ...params,
+                path: [...params.path, "propertyNames"],
+            });
+        }
+        json.additionalProperties = process$1(def.valueType, ctx, {
+            ...params,
+            path: [...params.path, "additionalProperties"],
+        });
+    }
+    // Add required for keys with discrete values (enum, literal, etc.)
+    const keyValues = keyType._zod.values;
+    if (keyValues) {
+        const validKeyValues = [...keyValues].filter((v) => typeof v === "string" || typeof v === "number");
+        if (validKeyValues.length > 0) {
+            json.required = validKeyValues;
+        }
+    }
+};
 const nullableProcessor = (schema, ctx, json, params) => {
     const def = schema._zod.def;
     const inner = process$1(def.innerType, ctx, params);
@@ -4709,6 +4873,21 @@ function intersection(left, right) {
         right: right,
     });
 }
+const ZodRecord = /*@__PURE__*/ $constructor("ZodRecord", (inst, def) => {
+    $ZodRecord.init(inst, def);
+    ZodType.init(inst, def);
+    inst._zod.processJSONSchema = (ctx, json, params) => recordProcessor(inst, ctx, json, params);
+    inst.keyType = def.keyType;
+    inst.valueType = def.valueType;
+});
+function record(keyType, valueType, params) {
+    return new ZodRecord({
+        type: "record",
+        keyType,
+        valueType: valueType,
+        ...normalizeParams(params),
+    });
+}
 const ZodEnum = /*@__PURE__*/ $constructor("ZodEnum", (inst, def) => {
     $ZodEnum.init(inst, def);
     ZodType.init(inst, def);
@@ -4946,7 +5125,7 @@ const ZodCustom = /*@__PURE__*/ $constructor("ZodCustom", (inst, def) => {
     inst._zod.processJSONSchema = (ctx, json, params) => customProcessor(inst, ctx);
 });
 function custom(fn, _params) {
-    return _custom(ZodCustom, (() => true), _params);
+    return _custom(ZodCustom, fn ?? (() => true), _params);
 }
 function refine(fn, _params = {}) {
     return _refine(ZodCustom, fn, _params);
@@ -4961,6 +5140,19 @@ const optionalPositiveInt = number().int().positive().optional();
 const optionalString = string().optional();
 const selectOptionsSchema = array(nonEmptyString).min(1);
 const optionalNumber = number().optional();
+const textEvaluationSourceSchema = object({
+    type: literal('text'),
+});
+const customEvaluationSourceSchema = object({
+    type: literal('custom'),
+    extractorId: nonEmptyString,
+});
+const evaluationSourceExtractorSchema = custom(value => typeof value === 'function', 'Extractor must be a function.');
+record(string().min(1), evaluationSourceExtractorSchema);
+const evaluationSourceSchema = discriminatedUnion('type', [
+    textEvaluationSourceSchema,
+    customEvaluationSourceSchema,
+]);
 const expectedOutcomeModeSchema = _enum(['static', 'dynamic']);
 const evaluationParametersSchema = object({
     approach: _enum(EvaluationApproach),
@@ -4978,6 +5170,7 @@ const selectEvaluationParametersSchema = evaluationParametersSchema.superRefine(
 const defaultExpectedOutcomeBaseSchema = object({
     label: nonEmptyString,
     placeholder: optionalString,
+    evaluationSource: evaluationSourceSchema.optional(),
 });
 const createDefaultExpectedOutcomeFieldSchemas = (baseSchema) => ({
     text: baseSchema.extend({
@@ -5070,6 +5263,37 @@ function validateExpectedOutcomeSchema(schema) {
         throw new Error(`Invalid expectedOutcomeSchema: ${parsed.error.issues[0].message}`);
     }
 }
+function validateExpectedOutcomeArrayWithExtractors(expectedOutcome, allowedExtractorIds) {
+    const allowed = new Set(allowedExtractorIds);
+    const schema = expectedOutcomeArraySchema.superRefine((fields, ctx) => {
+        fields.forEach((field, index) => {
+            if (field.evaluationSource?.type !== 'custom') {
+                return;
+            }
+            if (allowed.has(field.evaluationSource.extractorId)) {
+                return;
+            }
+            ctx.addIssue({
+                code: 'custom',
+                path: [index, 'evaluationSource', 'extractorId'],
+                message: `Invalid expectedOutcome: Extractor "${field.evaluationSource.extractorId}" is not registered.`,
+            });
+        });
+    });
+    const parsed = schema.safeParse(expectedOutcome);
+    if (!parsed.success) {
+        throw new Error(parsed.error.issues[0].message);
+    }
+}
+function getExtractorIds(extractors) {
+    return Object.keys(extractors || {});
+}
+const modelResponseMetadataSchema = record(string(), unknown());
+const modelResponsePayloadSchema = object({
+    text: string().optional(),
+    metadata: modelResponseMetadataSchema.optional(),
+});
 const testCaseChatHistorySchema = object({
     enabled: boolean(),
@@ -5086,8 +5310,8 @@ object({
     id: string(),
     question: string(),
     expectedOutcome: expectedOutcomeArraySchema,
+    output: modelResponsePayloadSchema.optional(),
     chatHistory: testCaseChatHistorySchema,
-    output: string().optional(),
     isRunning: boolean().optional(),
     error: string().optional(),
     evaluationResult: custom().optional(),
@@ -5109,10 +5333,15 @@ function validateTestCaseInputArray(data) {
  * @param jsonContent - The JSON string to parse and validate
  * @returns Validation result with test cases or error message
  */
-function importTestSuite(jsonContent) {
+function importTestSuite(jsonContent, allowedExtractorIds = []) {
     try {
         const parsed = JSON.parse(jsonContent);
         validateTestCaseInputArray(parsed);
+        if (allowedExtractorIds.length > 0) {
+            parsed.forEach((testCase) => {
+                validateExpectedOutcomeArrayWithExtractors(testCase.expectedOutcome, allowedExtractorIds);
+            });
+        }
         const testCases = parsed.map((item, index) => {
             try {
                 return createTestCaseFromInput(item);
@@ -5138,7 +5367,7 @@ function importTestSuite(jsonContent) {
 }
 const MISSING_RESOLVER_MESSAGE = 'resolveExpectedOutcome is required when a test case has dynamic expected outcomes.';
-function isDynamicTextareaField(field) {
+function isDynamicTextareaField$1(field) {
     return field.type === 'textarea' && field.outcomeMode === 'dynamic';
 }
 function applyResolvedDynamicValues(testCase, resolvedValues) {
@@ -5148,7 +5377,7 @@ function applyResolvedDynamicValues(testCase, resolvedValues) {
     const expectedOutcome = [...(testCase.expectedOutcome || [])];
     for (const resolved of resolvedValues) {
         const field = expectedOutcome[resolved.index];
-        if (!field || !isDynamicTextareaField(field)) {
+        if (!field || !isDynamicTextareaField$1(field)) {
             continue;
         }
         expectedOutcome[resolved.index] = {
@@ -5163,7 +5392,7 @@ function applyResolvedDynamicValues(testCase, resolvedValues) {
 }
 async function resolveDynamicExpectedOutcomes(testCase, resolver) {
     const dynamicFields = (testCase.expectedOutcome || []).flatMap((field, index) => {
-        if (!isDynamicTextareaField(field)) {
+        if (!isDynamicTextareaField$1(field)) {
             return [];
         }
         return [{ field, index }];
@@ -5181,6 +5410,15 @@ async function resolveDynamicExpectedOutcomes(testCase, resolver) {
     return applyResolvedDynamicValues(testCase, resolvedValues);
 }
+function isChipsInputField(field) {
+    return field.type === 'chips-input';
+}
+function isTextareaField(field) {
+    return field.type === 'textarea';
+}
+function isDynamicTextareaField(field) {
+    return isTextareaField(field) && field.outcomeMode === 'dynamic';
+}
 function applyExpectedOutcomeChange(testCase, change) {
     const { index } = change;
     const expectedOutcome = [...(testCase.expectedOutcome || [])];
@@ -5188,73 +5426,99 @@ function applyExpectedOutcomeChange(testCase, change) {
     if (!target) {
         return testCase;
     }
+    const commit = (updatedField) => {
+        expectedOutcome[index] = updatedField;
+        return { ...testCase, expectedOutcome };
+    };
     switch (change.operation) {
         case 'set-value': {
-            if (target.type === 'chips-input') {
+            if (isChipsInputField(target)) {
                 return testCase;
             }
-            if (target.type === 'textarea' && target.outcomeMode === 'dynamic') {
+            if (isDynamicTextareaField(target)) {
                 return testCase;
             }
-            expectedOutcome[index] = {
+            return commit({
                 ...target,
                 value: change.value,
-            };
-            return { ...testCase, expectedOutcome };
+            });
         }
         case 'add-chip': {
-            if (target.type !== 'chips-input') {
+            if (!isChipsInputField(target)) {
                 return testCase;
             }
-            expectedOutcome[index] = {
+            return commit({
                 ...target,
                 value: [...target.value, change.value],
-            };
-            return { ...testCase, expectedOutcome };
+            });
         }
         case 'remove-chip': {
-            if (target.type !== 'chips-input') {
+            if (!isChipsInputField(target)) {
                 return testCase;
             }
-            expectedOutcome[index] = {
+            return commit({
                 ...target,
                 value: target.value.filter(chip => chip !== change.value),
-            };
-            return { ...testCase, expectedOutcome };
+            });
         }
         case 'set-evaluation-approach':
             return updateExpectedOutcomeFieldApproach(testCase, index, change.value);
         case 'set-outcome-mode': {
-            if (target.type !== 'textarea') {
+            if (!isTextareaField(target)) {
                 return testCase;
             }
             const mode = change.value;
             if (mode === 'static') {
                 const { resolutionQuery: _, ...rest } = target;
-                expectedOutcome[index] = {
+                return commit({
                     ...rest,
                     outcomeMode: 'static',
                     value: '',
-                };
+                });
             }
             else {
-                expectedOutcome[index] = {
+                return commit({
                     ...target,
                     outcomeMode: 'dynamic',
                     value: '',
-                };
+                });
             }
-            return { ...testCase, expectedOutcome };
         }
         case 'set-resolution-query': {
-            if (target.type !== 'textarea' || target.outcomeMode !== 'dynamic') {
+            if (!isDynamicTextareaField(target)) {
                 return testCase;
             }
-            expectedOutcome[index] = {
+            return commit({
                 ...target,
                 resolutionQuery: change.value,
-            };
-            return { ...testCase, expectedOutcome };
+            });
+        }
+        case 'set-evaluation-source-type': {
+            if (change.value === 'text') {
+                return commit({
+                    ...target,
+                    evaluationSource: { type: 'text' },
+                });
+            }
+            const extractorId = target.evaluationSource?.type === 'custom'
+                ? target.evaluationSource.extractorId
+                : (change.fallbackExtractorId ?? '');
+            return commit({
+                ...target,
+                evaluationSource: {
+                    type: 'custom',
+                    extractorId,
+                },
+            });
+        }
+        case 'set-evaluation-source-extractor': {
+            return commit({
+                ...target,
+                evaluationSource: {
+                    type: 'custom',
+                    extractorId: change.value,
+                },
+            });
         }
     }
 }
@@ -30035,7 +30299,7 @@ class LLMEvaluationEngine {
             const fieldRequest = {
                 testCaseId: request.testCaseId,
                 question: request.question,
-                actualResponse: request.actualResponse,
+                actualResponse: field.actualResponse,
                 expectedOutcome: field.expectedValue,
                 evaluationParameters: field.evaluationParameters,
             };
@@ -30105,6 +30369,58 @@ class LLMEvaluationEngine {
     }
 }
+function toTextSource() {
+    return { type: 'text' };
+}
+async function resolveActualValue(field, output, extractors) {
+    const source = field.evaluationSource || toTextSource();
+    if (source.type === 'text') {
+        const text = output?.text?.trim();
+        if (!text) {
+            return {
+                success: false,
+                error: 'Model response text is empty.',
+            };
+        }
+        return { success: true, value: text };
+    }
+    const extractor = extractors?.[source.extractorId];
+    if (!extractor) {
+        return {
+            success: false,
+            error: `Extractor "${source.extractorId}" is not registered.`,
+        };
+    }
+    try {
+        const extractedRaw = await extractor(output || {});
+        if (typeof extractedRaw !== 'string') {
+            return {
+                success: false,
+                error: `Extractor "${source.extractorId}" must return a string.`,
+            };
+        }
+        const extracted = extractedRaw.trim();
+        if (!extracted) {
+            return {
+                success: false,
+                error: `Extractor "${source.extractorId}" returned an empty value.`,
+            };
+        }
+        return {
+            success: true,
+            value: extracted,
+        };
+    }
+    catch (error) {
+        return {
+            success: false,
+            error: error instanceof Error
+                ? error.message
+                : `Extractor "${source.extractorId}" failed.`,
+        };
+    }
+}
 /**
  * Service for evaluating test case responses
  */
@@ -30118,34 +30434,71 @@ class EvaluationService {
      * @param testCase - The test case to evaluate
      * @param onResult - Callback to handle the evaluation result
      */
-    async evaluateTestCase(testCase, onResult) {
-        if (!testCase.output) {
-            console.warn('⚠️ No output to evaluate for test case:', testCase.id);
-            return;
-        }
-        const fields = (testCase.expectedOutcome || []).flatMap((field, index) => {
+    async evaluateTestCase(testCase, onResult, extractors) {
+        const fields = [];
+        const failedFields = [];
+        for (const [index, field] of (testCase.expectedOutcome || []).entries()) {
             if (field.type === 'textarea' && field.outcomeMode === 'dynamic') {
-                return [];
+                continue;
             }
-            return [
-                {
+            const evaluationParameters = normalizeEvaluationParametersForField(field.type, field.evaluationParameters);
+            const expectedValue = getFieldExpectedValue(field);
+            const resolvedActualValue = await resolveActualValue(field, testCase.output, extractors);
+            if (resolvedActualValue.success) {
+                fields.push({
                     index,
                     label: field.label,
                     type: field.type,
-                    expectedValue: getFieldExpectedValue(field),
-                    evaluationParameters: normalizeEvaluationParametersForField(field.type, field.evaluationParameters),
-                },
-            ];
-        });
+                    expectedValue,
+                    actualResponse: resolvedActualValue.value,
+                    evaluationParameters,
+                });
+            }
+            else {
+                failedFields.push({
+                    index,
+                    label: field.label,
+                    type: field.type,
+                    expectedValue,
+                    passed: false,
+                    keywordMatches: [],
+                    evaluationParameters,
+                    evaluationApproachResult: {
+                        score: 0,
+                        approachUsed: evaluationParameters.approach,
+                    },
+                    error: 'error' in resolvedActualValue
+                        ? resolvedActualValue.error
+                        : 'Failed to resolve actual value.',
+                });
+            }
+        }
+        if (fields.length === 0) {
+            if (failedFields.length === 0) {
+                console.warn('⚠️ No evaluable fields for test case:', testCase.id);
+                return;
+            }
+            onResult({
+                testCaseId: testCase.id,
+                passed: false,
+                keywordMatches: [],
+                fieldResults: failedFields,
+                timestamp: new Date().toISOString(),
+            });
+            return;
+        }
         const evaluationRequest = {
             testCaseId: testCase.id,
             question: testCase.question,
-            actualResponse: testCase.output,
             fields,
         };
         await this.engine.evaluateResponse(evaluationRequest, (result) => {
-            console.log('📊 Evaluation result received:', result);
-            onResult(result);
+            const combinedResults = [...(result.fieldResults || []), ...failedFields].sort((a, b) => a.index - b.index);
+            onResult({
+                ...result,
+                passed: combinedResults.every(field => field.passed && !field.error),
+                fieldResults: combinedResults,
+            });
         });
     }
 }
@@ -30193,7 +30546,7 @@ const LLMTestRunnerHeader = ({ isExportingTestSuite, isExportingTestResults, isR
 };
 const ResponseOutput = ({ output, isRunning, }) => {
-    return (index.h("div", { class: "response-output" }, output ? (index.h("div", { class: "response-output__content" }, output)) : (index.h("div", { class: "response-output__placeholder" }, isRunning ? 'Running...' : ''))));
+    return (index.h("div", { class: "response-output" }, output?.text ? (index.h("div", { class: "response-output__content" }, output.text)) : (index.h("div", { class: "response-output__placeholder" }, isRunning ? 'Running...' : ''))));
 };
 const EvaluationSummary = ({ result, isRunning, }) => {
@@ -30231,7 +30584,9 @@ var FormFieldType;
     FormFieldType["SELECT"] = "select";
 })(FormFieldType || (FormFieldType = {}));
-const ExpectedOutcomeRenderer = ({ testCaseId, fields, dynamicResolutionSupported = false, onExpectedOutcomeChange, }) => {
+const ExpectedOutcomeRenderer = ({ testCaseId, fields, dynamicResolutionSupported = false, extractorIds = [], onExpectedOutcomeChange, }) => {
+    const hasExtractorOptions = extractorIds.length > 0;
+    const firstExtractorId = extractorIds[0];
     const emit = (detail) => onExpectedOutcomeChange({
         detail,
     });
@@ -30261,6 +30616,23 @@ const ExpectedOutcomeRenderer = ({ testCaseId, fields, dynamicResolutionSupporte
         required: false,
         rows: 2,
     });
+    const buildEvaluationSourceConfig = (index) => ({
+        name: `expectedOutcomeEvaluationSource-${index}`,
+        fieldType: FormFieldType.SELECT,
+        label: 'Evaluation Source',
+        placeholder: 'Select evaluation source',
+        required: true,
+        optionList: ['text', 'custom'],
+        defaultValue: 'text',
+    });
+    const buildExtractorConfig = (index) => ({
+        name: `expectedOutcomeEvaluationSourceExtractor-${index}`,
+        fieldType: FormFieldType.SELECT,
+        label: 'Extractor',
+        placeholder: 'Select extractor',
+        required: true,
+        optionList: extractorIds,
+    });
     const renderEvaluationSelector = (field, index$1) => {
         const optionList = getAllowedApproachesForFieldType(field.type);
         return (index.h("app-select", { config: buildEvaluationConfig(index$1, optionList), value: field.evaluationParameters?.approach, onValueChange: (e) => emit({
@@ -30270,6 +30642,27 @@ const ExpectedOutcomeRenderer = ({ testCaseId, fields, dynamicResolutionSupporte
                 value: e.detail.value,
             }) }));
     };
+    const renderEvaluationSourceSelector = (field, index$1) => {
+        if (!hasExtractorOptions) {
+            return null;
+        }
+        const sourceType = field.evaluationSource?.type || 'text';
+        return (index.h("div", null, index.h("app-select", { config: buildEvaluationSourceConfig(index$1), value: sourceType, onValueChange: (e) => emit({
+                testCaseId,
+                index: index$1,
+                operation: 'set-evaluation-source-type',
+                value: e.detail.value,
+                fallbackExtractorId: firstExtractorId,
+            }) }), sourceType === 'custom' && (index.h("app-select", { config: buildExtractorConfig(index$1), value: field.evaluationSource?.type === 'custom'
+                ? field.evaluationSource.extractorId
+                : '', onValueChange: (e) => emit({
+                testCaseId,
+                index: index$1,
+                operation: 'set-evaluation-source-extractor',
+                value: e.detail.value,
+            }) }))));
+    };
+    const renderEvaluationOptions = (field, index$1) => (index.h("details", { class: "expected-outcome-renderer__options" }, index.h("summary", { class: "expected-outcome-renderer__options-summary" }, "More options"), index.h("div", { class: "expected-outcome-renderer__options-content" }, renderEvaluationSelector(field, index$1), renderEvaluationSourceSelector(field, index$1))));
     return (index.h("div", { class: "expected-outcome-renderer" }, (fields || []).map((field, index$1) => {
         if (field.type === 'textarea') {
             const isDynamic = dynamicResolutionSupported && field.outcomeMode === 'dynamic';
@@ -30301,7 +30694,7 @@ const ExpectedOutcomeRenderer = ({ testCaseId, fields, dynamicResolutionSupporte
                     index: index$1,
                     operation: 'set-resolution-query',
                     value: e.detail.value,
-                }) })), !isDynamic && renderEvaluationSelector(field, index$1)));
+                }) })), !isDynamic && renderEvaluationOptions(field, index$1)));
         }
         if (field.type === 'chips-input') {
             const config = {
@@ -30321,7 +30714,7 @@ const ExpectedOutcomeRenderer = ({ testCaseId, fields, dynamicResolutionSupporte
                     index: index$1,
                     operation: 'remove-chip',
                     value: e.detail.value,
-                }) }), renderEvaluationSelector(field, index$1)));
+                }) }), renderEvaluationOptions(field, index$1)));
         }
         if (field.type === 'select') {
             const config = {
@@ -30337,18 +30730,18 @@ const ExpectedOutcomeRenderer = ({ testCaseId, fields, dynamicResolutionSupporte
                     index: index$1,
                     operation: 'set-value',
                     value: e.detail.value,
-                }) }), renderEvaluationSelector(field, index$1)));
+                }) }), renderEvaluationOptions(field, index$1)));
         }
         return (index.h("div", { class: "expected-outcome-renderer__group" }, index.h("div", { class: "expected-outcome-renderer__text" }, index.h("label", null, field.label), index.h("input", { type: "text", value: field.value, placeholder: field.placeholder, onInput: (e) => emit({
                 testCaseId,
                 index: index$1,
                 operation: 'set-value',
                 value: e.target.value,
-            }) })), renderEvaluationSelector(field, index$1)));
+            }) })), renderEvaluationOptions(field, index$1)));
     })));
 };
-const LLMTestCaseRow = ({ testCase, dynamicResolutionSupported = false, onRun, onDelete, handleTestCaseChange, onExpectedOutcomeChange, onChatHistoryChange, }) => {
+const LLMTestCaseRow = ({ testCase, dynamicResolutionSupported = false, extractorIds = [], onRun, onDelete, handleTestCaseChange, onExpectedOutcomeChange, onChatHistoryChange, }) => {
     const questionConfig = {
         name: 'question',
         fieldType: FormFieldType.TEXT_AREA,
@@ -30374,11 +30767,11 @@ const LLMTestCaseRow = ({ testCase, dynamicResolutionSupported = false, onRun, o
                     value,
                 },
             });
-        } }), index.h(ExpectedOutcomeRenderer, { testCaseId: testCase.id, fields: testCase.expectedOutcome || [], dynamicResolutionSupported: dynamicResolutionSupported, onExpectedOutcomeChange: onExpectedOutcomeChange })), index.h(ResponseOutput, { output: testCase.output, isRunning: testCase.isRunning }), index.h(EvaluationSummary, { result: testCase.evaluationResult, isRunning: testCase.isRunning }), index.h(RowActions, { isRunning: testCase.isRunning, canRun: !!testCase.question.trim(), onRun: () => onRun(testCase), onDelete: () => onDelete(testCase.id) })));
+        } }), index.h(ExpectedOutcomeRenderer, { testCaseId: testCase.id, fields: testCase.expectedOutcome || [], dynamicResolutionSupported: dynamicResolutionSupported, extractorIds: extractorIds, onExpectedOutcomeChange: onExpectedOutcomeChange })), index.h(ResponseOutput, { output: testCase.output, isRunning: testCase.isRunning }), index.h(EvaluationSummary, { result: testCase.evaluationResult, isRunning: testCase.isRunning }), index.h(RowActions, { isRunning: testCase.isRunning, canRun: !!testCase.question.trim(), onRun: () => onRun(testCase), onDelete: () => onDelete(testCase.id) })));
 };
-const LLMTestCases = ({ testCases, dynamicResolutionSupported = false, onRun, onDelete, onAddTestCase, handleTestCaseChange, onExpectedOutcomeChange, onChatHistoryChange, }) => {
-    return (index.h("div", { class: "test-cases" }, index.h("div", { class: "test-cases__column-headers" }, index.h("div", { class: "test-cases__column-header" }, "Input"), index.h("div", { class: "test-cases__column-header" }, "Output"), index.h("div", { class: "test-cases__column-header" }, "Evaluation"), index.h("div", { class: "test-cases__column-header" }, "Actions")), testCases.map(testCase => (index.h(LLMTestCaseRow, { testCase: testCase, dynamicResolutionSupported: dynamicResolutionSupported, onRun: onRun, onDelete: onDelete, handleTestCaseChange: handleTestCaseChange, onExpectedOutcomeChange: onExpectedOutcomeChange, onChatHistoryChange: onChatHistoryChange }))), index.h("div", { class: "test-cases__add-section" }, index.h(Button, { variant: "outline", size: "md", onClick: onAddTestCase }, "+ Add Question"))));
+const LLMTestCases = ({ testCases, dynamicResolutionSupported = false, extractorIds = [], onRun, onDelete, onAddTestCase, handleTestCaseChange, onExpectedOutcomeChange, onChatHistoryChange, }) => {
+    return (index.h("div", { class: "test-cases" }, index.h("div", { class: "test-cases__column-headers" }, index.h("div", { class: "test-cases__column-header" }, "Input"), index.h("div", { class: "test-cases__column-header" }, "Output"), index.h("div", { class: "test-cases__column-header" }, "Evaluation"), index.h("div", { class: "test-cases__column-header" }, "Actions")), testCases.map(testCase => (index.h(LLMTestCaseRow, { testCase: testCase, dynamicResolutionSupported: dynamicResolutionSupported, extractorIds: extractorIds, onRun: onRun, onDelete: onDelete, handleTestCaseChange: handleTestCaseChange, onExpectedOutcomeChange: onExpectedOutcomeChange, onChatHistoryChange: onChatHistoryChange }))), index.h("div", { class: "test-cases__add-section" }, index.h(Button, { variant: "outline", size: "md", onClick: onAddTestCase }, "+ Add Question"))));
 };
 const tokensCss = () => `:host{--spacing:0.25rem;--spacing-1:calc(var(--spacing) * 1);--spacing-2:calc(var(--spacing) * 2);--spacing-3:calc(var(--spacing) * 3);--spacing-4:calc(var(--spacing) * 4);--spacing-5:calc(var(--spacing) * 5);--spacing-6:calc(var(--spacing) * 6);--spacing-8:calc(var(--spacing) * 8);--spacing-10:calc(var(--spacing) * 10);--spacing-12:calc(var(--spacing) * 12);--spacing-16:calc(var(--spacing) * 16);--spacing-20:calc(var(--spacing) * 20);--spacing-24:calc(var(--spacing) * 24);--radius-none:0;--radius-sm:0.125rem;--radius-md:0.375rem;--radius-lg:0.5rem;--radius-xl:0.75rem;--radius-2xl:1rem;--radius-3xl:1.5rem;--radius-full:9999px;--radius:var(--radius-lg);--font-size-xs:0.75rem;--font-size-sm:0.875rem;--font-size-base:1rem;--font-size-lg:1.125rem;--font-size-xl:1.25rem;--font-size-2xl:1.5rem;--font-size-3xl:1.875rem;--font-size-4xl:2.25rem;--font-weight-normal:400;--font-weight-medium:500;--font-weight-semibold:600;--font-weight-bold:700;--line-height-none:1;--line-height-tight:1.25;--line-height-snug:1.375;--line-height-normal:1.5;--line-height-relaxed:1.625;--line-height-loose:2;--letter-spacing-tight:-0.025em;--letter-spacing-normal:0;--letter-spacing-wide:0.05em;--shadow-sm:0 1px 2px 0 rgba(0, 0, 0, 0.05);--shadow-md:0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06);--shadow-lg:0 10px 15px -3px rgba(0, 0, 0, 0.1), 0 4px 6px -2px rgba(0, 0, 0, 0.05);--shadow-xl:0 20px 25px -5px rgba(0, 0, 0, 0.1), 0 10px 10px -5px rgba(0, 0, 0, 0.04);--shadow-2xl:0 25px 50px -12px rgba(0, 0, 0, 0.25);--border-width:1px;--z-base:0;--z-dropdown:1000;--z-sticky:1100;--z-modal:1200;--z-popover:1300;--z-tooltip:1400;--opacity-disabled:0.5;--opacity-hover:0.8;--opacity-muted:0.6;--max-w-sm:24rem;--max-w-md:28rem;--max-w-lg:32rem;--max-w-xl:42rem;--max-w-2xl:48rem;--max-w-full:100%;--breakpoint-sm:640px;--breakpoint-md:768px;--breakpoint-lg:1024px;--breakpoint-xl:1280px;--breakpoint-2xl:1536px;--background:#ffffff;--foreground:#0a0a0a;--card:#ffffff;--card-foreground:#0a0a0a;--popover:#ffffff;--popover-foreground:#0a0a0a;--primary:#0a0a0a;--primary-foreground:#fafafa;--secondary:#f4f4f5;--secondary-foreground:#0a0a0a;--muted:#f4f4f5;--muted-foreground:#71717a;--accent:#f4f4f5;--accent-foreground:#0a0a0a;--destructive:#ef4444;--destructive-foreground:#fafafa;--border:#e4e4e7;--input:#e4e4e7;--ring:#3b82f6;--success:#10b981;--success-foreground:#fafafa;--warning:#f59e0b;--warning-foreground:#fafafa;--info:#3b82f6;--info-foreground:#fafafa}:host([data-theme='dark']){--background:#0a0a0a;--foreground:#fafafa;--card:#171717;--card-foreground:#fafafa;--popover:#171717;--popover-foreground:#fafafa;--primary:#fafafa;--primary-foreground:#0a0a0a;--secondary:#27272a;--secondary-foreground:#fafafa;--muted:#27272a;--muted-foreground:#a1a1aa;--accent:#27272a;--accent-foreground:#fafafa;--destructive:#dc2626;--destructive-foreground:#fafafa;--border:#27272a;--input:#27272a;--ring:#3b82f6;--success:#059669;--success-foreground:#fafafa;--warning:#d97706;--warning-foreground:#fafafa;--info:#2563eb;--info-foreground:#fafafa}`;
@@ -30389,7 +30782,7 @@ const llmTestRunnerHeaderCss = () => `.test-runner-header{display:flex;justify-c
 const llmTestCasesCss = () => `.test-cases{background:var(--background)}.test-cases__column-headers{display:grid;grid-template-columns:1fr 1.5fr 0.5fr 120px;gap:var(--border-width);background:var(--border);border-bottom:2px solid var(--border)}.test-cases__column-header{background:var(--muted);padding:var(--spacing-4) var(--spacing-5);font-weight:var(--font-weight-semibold);color:var(--foreground);font-size:var(--font-size-sm);text-transform:uppercase;letter-spacing:var(--letter-spacing-wide)}.test-cases__add-section{padding:var(--spacing-6);text-align:center;background:var(--muted);border-top:var(--border-width) solid var(--border)}@media (max-width: 1200px){.test-cases__column-headers{display:none}}`;
-const llmTestCaseRowCss = () => `.test-case-row{display:grid;grid-template-columns:1fr 1.5fr 0.5fr 120px;gap:var(--border-width);border-bottom:var(--border-width) solid var(--border);min-height:200px}.test-case-row:hover{background:var(--muted)}.test-case-row__input-column{padding:var(--spacing-5);background:var(--background);border-right:var(--border-width) solid var(--border)}.expected-outcome-renderer{display:flex;flex-direction:column;gap:var(--spacing-4);margin-top:var(--spacing-4)}.expected-outcome-renderer__group{display:flex;flex-direction:column;gap:var(--spacing-2);padding:var(--spacing-3);border:var(--border-width) solid var(--border);border-radius:var(--radius-md);background:var(--background)}@media (max-width: 1200px){.test-case-row{grid-template-columns:1fr;gap:0}.test-case-row__input-column{border-right:none;border-bottom:var(--border-width) solid var(--border)}}@media (max-width: 768px){.test-case-row__input-column{padding:var(--spacing-4)}.test-case-row{min-height:auto}}`;
+const llmTestCaseRowCss = () => `.test-case-row{display:grid;grid-template-columns:1fr 1.5fr 0.5fr 120px;gap:var(--border-width);border-bottom:var(--border-width) solid var(--border);min-height:200px}.test-case-row:hover{background:var(--muted)}.test-case-row__input-column{padding:var(--spacing-5);background:var(--background);border-right:var(--border-width) solid var(--border)}.expected-outcome-renderer{display:flex;flex-direction:column;gap:var(--spacing-4);margin-top:var(--spacing-4)}.expected-outcome-renderer__group{display:flex;flex-direction:column;gap:var(--spacing-2);padding:var(--spacing-3);border:var(--border-width) solid var(--border);border-radius:var(--radius-md);background:var(--background)}.expected-outcome-renderer__options{border:var(--border-width) solid var(--border);border-radius:var(--radius-sm);background:var(--muted)}.expected-outcome-renderer__options-summary{cursor:pointer;font-size:var(--font-size-sm);color:var(--foreground);padding:var(--spacing-2) var(--spacing-3);user-select:none}.expected-outcome-renderer__options-content{display:flex;flex-direction:column;gap:var(--spacing-2);padding:0 var(--spacing-3) var(--spacing-3)}@media (max-width: 1200px){.test-case-row{grid-template-columns:1fr;gap:0}.test-case-row__input-column{border-right:none;border-bottom:var(--border-width) solid var(--border)}}@media (max-width: 768px){.test-case-row__input-column{padding:var(--spacing-4)}.test-case-row{min-height:auto}}`;
 const rowActionsCss = () => `.row-actions{height:100%;padding:var(--spacing-5);background:var(--background);display:flex;flex-direction:column;gap:var(--spacing-3);align-items:center;justify-content:flex-start;align-self:flex-start}@media (max-width: 1200px){.row-actions{border-right:none;border-bottom:var(--border-width) solid var(--border);flex-direction:row;justify-content:center}}@media (max-width: 768px){.row-actions{padding:var(--spacing-4)}}`;
@@ -30415,6 +30808,7 @@ const LLMTestRunner = class {
     useSave = false;
     usePromptEditor = false;
     resolveExpectedOutcome;
+    evaluationSourceExtractors;
     initialTestCases;
     defaultExpectedOutcomeSchema;
     testCases = [
@@ -30451,6 +30845,12 @@ const LLMTestRunner = class {
             // Initialize testCases from prop if provided
             if (this.initialTestCases !== undefined) {
                 validateTestCaseInputArray(this.initialTestCases);
+                const extractorIds = getExtractorIds(this.evaluationSourceExtractors);
+                if (extractorIds.length > 0) {
+                    this.initialTestCases.forEach(testCase => {
+                        validateExpectedOutcomeArrayWithExtractors(testCase.expectedOutcome, extractorIds);
+                    });
+                }
                 this.testCases = this.initialTestCases.map((rawTestCase, index) => {
                     try {
                         return createTestCaseFromInput(rawTestCase);
@@ -30474,8 +30874,6 @@ const LLMTestRunner = class {
             this.testCases = [];
         }
     }
-    componentDidLoad() { }
-    disconnectedCallback() { }
     async resetSavingState() {
         this.isSaving = false;
     }
@@ -30508,7 +30906,7 @@ const LLMTestRunner = class {
     updateTestCase(id, updates) {
         this.testCases = this.testCases.map(tc => tc.id === id ? { ...tc, ...updates } : tc);
     }
-    requestLlmText(testCase) {
+    requestLlmResponse(testCase) {
         return new Promise((resolve, reject) => {
             const payload = {
                 prompt: testCase.question,
@@ -30531,14 +30929,14 @@ const LLMTestRunner = class {
         const startTime = Date.now();
         this.updateTestCase(testCase.id, { isRunning: true });
         const [llmSettled, resolutionSettled] = await Promise.allSettled([
-            this.requestLlmText(testCase),
+            this.requestLlmResponse(testCase),
             resolveDynamicExpectedOutcomes(testCase, this.resolveExpectedOutcome),
         ]);
         const responseTime = Date.now() - startTime;
         if (llmSettled.status === 'rejected') {
             this.updateTestCase(testCase.id, {
                 isRunning: false,
-                output: null,
+                output: undefined,
                 error: this.addErrorMessage(llmSettled.reason, 'Unknown error'),
                 responseTime,
             });
@@ -30586,7 +30984,7 @@ const LLMTestRunner = class {
             this.updateTestCase(testCase.id, {
                 evaluationResult: result,
             });
-        });
+        }, this.evaluationSourceExtractors);
     }
     async runAllTests() {
         this.isRunningAll = true;
@@ -30617,7 +31015,7 @@ const LLMTestRunner = class {
         this.error = '';
         try {
             const content = await readFileAsync(file);
-            const result = importTestSuite(content);
+            const result = importTestSuite(content, getExtractorIds(this.evaluationSourceExtractors));
             if (!result.success) {
                 this.error = result.error || 'Unknown error occurred during import.';
                 return;
@@ -30678,7 +31076,7 @@ const LLMTestRunner = class {
         }
     }
     render() {
-        return (index.h("div", { key: 'cc808096f929b2e1c570c53144aab195d177c187', class: "test-runner-container" }, index.h(LLMTestRunnerHeader, { key: 'b91cf3df7df0e95bfd4908a2f91c7310b5b7a09a', isExportingTestSuite: this.isExportingTestSuite, isExportingTestResults: this.isExportingTestResults, isRunningAll: this.isRunningAll, useSave: this.useSave, isSaving: this.isSaving, usePromptEditor: this.usePromptEditor, onImport: file => this.handleImport(file), onExportSuite: () => this.handleExportTestSuite(), onExportResults: () => this.handleExportTestResults(), onRunAll: () => this.runAllTests(), onSave: () => this.handleSave() }), index.h(ErrorMessage, { key: 'c7991497173fa9843e7aa42f5283d0897ddff2e2', message: this.error, onClear: () => (this.error = '') }), index.h("div", { key: '2b57132564442b8047d8672c6adcba62cdc9ae87', class: "test-runner-container__content" }, index.h(LLMTestCases, { key: '146e9d8c76a34980a2a274dd856887c22e1ed0e9', testCases: this.testCases, dynamicResolutionSupported: !!this.resolveExpectedOutcome, onRun: testCase => this.runSingleTest(testCase).catch(() => { }), onDelete: id => this.deleteTestCase(id), onAddTestCase: () => this.addNewTestCase(), handleTestCaseChange: this.handleTestCaseChange, onExpectedOutcomeChange: this.handleExpectedOutcomeChange, onChatHistoryChange: this.handleChatHistoryChange }))));
+        return (index.h("div", { key: '7433beaa1d60d48f65600c43e11b302b892a7bca', class: "test-runner-container" }, index.h(LLMTestRunnerHeader, { key: '8083cc39376e7a710bd3f52efb184b959e885a87', isExportingTestSuite: this.isExportingTestSuite, isExportingTestResults: this.isExportingTestResults, isRunningAll: this.isRunningAll, useSave: this.useSave, isSaving: this.isSaving, usePromptEditor: this.usePromptEditor, onImport: file => this.handleImport(file), onExportSuite: () => this.handleExportTestSuite(), onExportResults: () => this.handleExportTestResults(), onRunAll: () => this.runAllTests(), onSave: () => this.handleSave() }), index.h(ErrorMessage, { key: 'ddced98c13cd595c4cfb6eef11b27cb173769518', message: this.error, onClear: () => (this.error = '') }), index.h("div", { key: '8d6f65c4d68d34869b644709eacb97fec93683c6', class: "test-runner-container__content" }, index.h(LLMTestCases, { key: '5ccb186132b23af6209209b0a14086e03cf790af', testCases: this.testCases, dynamicResolutionSupported: !!this.resolveExpectedOutcome, extractorIds: getExtractorIds(this.evaluationSourceExtractors), onRun: testCase => this.runSingleTest(testCase).catch(() => { }), onDelete: id => this.deleteTestCase(id), onAddTestCase: () => this.addNewTestCase(), handleTestCaseChange: this.handleTestCaseChange, onExpectedOutcomeChange: this.handleExpectedOutcomeChange, onChatHistoryChange: this.handleChatHistoryChange }))));
     }
 };
 LLMTestRunner.style = tokensCss() + (llmTestRunnerCss() + (llmTestRunnerHeaderCss() + (llmTestCasesCss() + (llmTestCaseRowCss() + (rowActionsCss() + (evaluationSummaryCss() + (responseOutputCss() + (errorMessageCss() + (buttonCss() + iconButtonCss())))))))));