npm - llm-testrunner-components - Versions diffs - 1.1.0 → 1.2.1 - Mend

llm-testrunner-components 1.1.0 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (82) hide show

package/dist/collection/components/llm-test-runner/test-cases/evaluation/evaluation-summary.css CHANGED Viewed

@@ -7,49 +7,88 @@
   flex-direction: column;
 }
-.evaluation-summary__details {
+.evaluation-summary__field-results {
   display: flex;
   flex-direction: column;
   gap: var(--spacing-2);
+  margin-top: var(--spacing-2);
 }
-.evaluation-summary__placeholder {
+.evaluation-summary__field-result {
+  border: var(--border-width) solid var(--border);
+  border-radius: var(--radius-md);
+  padding: var(--spacing-2);
   display: flex;
-  align-items: center;
-  justify-content: center;
+  flex-direction: column;
+  gap: var(--spacing-1);
+}
+.evaluation-summary__field-header {
+  display: flex;
+  flex-direction: column;
+  gap: var(--spacing-1);
+}
+.evaluation-summary__field-label {
+  font-weight: var(--font-weight-semibold);
+  font-size: var(--font-size-xs);
+}
+.evaluation-summary__field-approach {
   color: var(--muted-foreground);
-  font-style: italic;
-  flex: 1;
-  background: var(--muted);
-  border: 2px dashed var(--border);
-  border-radius: var(--radius);
+  font-size: 11px;
 }
-/* Evaluation Result Element */
-.evaluation-summary__result {
+.evaluation-summary__field-details {
   display: flex;
   flex-direction: column;
-  gap: var(--spacing-2);
+  gap: var(--spacing-1);
+  font-size: var(--font-size-xs);
 }
-.evaluation-summary__result-status {
+.evaluation-summary__field-status {
+  width: fit-content;
+  padding: 2px var(--spacing-2);
+  border-radius: var(--radius-sm);
+  font-size: 11px;
   font-weight: var(--font-weight-semibold);
-  font-size: var(--font-size-sm);
-  padding: var(--spacing-2) var(--spacing-3);
-  border-radius: var(--radius-md);
-  text-align: center;
+  border: var(--border-width) solid transparent;
 }
-.evaluation-summary__result-status--passed {
+.evaluation-summary__field-status--passed {
   background: var(--success);
   color: var(--success-foreground);
-  border: var(--border-width) solid var(--success);
+  border-color: var(--success);
 }
-.evaluation-summary__result-status--failed {
+.evaluation-summary__field-status--failed {
   background: var(--destructive);
   color: var(--destructive-foreground);
-  border: var(--border-width) solid var(--destructive);
+  border-color: var(--destructive);
+}
+.evaluation-summary__error-message {
+  color: var(--destructive);
+  font-size: var(--font-size-xs);
+}
+.evaluation-summary__placeholder {
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  color: var(--muted-foreground);
+  font-style: italic;
+  flex: 1;
+  background: var(--muted);
+  border: 2px dashed var(--border);
+  border-radius: var(--radius);
+}
+/* Evaluation Result Element */
+.evaluation-summary__result {
+  display: flex;
+  flex-direction: column;
+  gap: var(--spacing-2);
 }
 /* Responsive Design */

package/dist/collection/components/llm-test-runner/test-cases/evaluation/evaluation-summary.js CHANGED Viewed

@@ -1,5 +1,7 @@
 import { h } from "@stencil/core";
 export const EvaluationSummary = ({ result, isRunning, }) => {
-    return (h("div", { class: "evaluation-summary" }, result ? (h("div", { class: "evaluation-summary__result" }, h("div", { class: `evaluation-summary__result-status evaluation-summary__result-status--${result.passed ? 'passed' : 'failed'}` }, result.passed ? '✅ PASSED' : '❌ FAILED'), h("div", { class: "evaluation-summary__details" }, "Keywords: ", result.keywordMatches.filter(m => m.found).length, "/", result.keywordMatches.length, " found"))) : (h("div", { class: "evaluation-summary__placeholder" }, isRunning ? 'Evaluating...' : ''))));
+    const fieldResults = result?.fieldResults || [];
+    const hasFieldResults = fieldResults.length > 0;
+    return (h("div", { class: "evaluation-summary" }, result ? (h("div", { class: "evaluation-summary__result" }, hasFieldResults ? (h("div", { class: "evaluation-summary__field-results" }, fieldResults.map(fieldResult => (h("div", { class: "evaluation-summary__field-result" }, h("div", { class: "evaluation-summary__field-header" }, h("span", { class: "evaluation-summary__field-label" }, fieldResult.label), h("span", { class: "evaluation-summary__field-approach" }, "Strategy: ", fieldResult.evaluationParameters.approach)), h("div", { class: "evaluation-summary__field-details" }, h("span", { class: `evaluation-summary__field-status evaluation-summary__field-status--${fieldResult.passed ? 'passed' : 'failed'}` }, fieldResult.passed ? 'PASSED' : 'FAILED'), fieldResult.error && (h("span", { class: "evaluation-summary__error-message" }, fieldResult.error)), h("span", null, "Score: ", fieldResult.evaluationApproachResult.score.toFixed(2)), h("span", null, "Matches:", ' ', fieldResult.keywordMatches.filter(match => match.found).length, "/", fieldResult.keywordMatches.length))))))) : null)) : (h("div", { class: "evaluation-summary__placeholder" }, isRunning ? 'Evaluating...' : ''))));
 };
 //# sourceMappingURL=evaluation-summary.js.map

package/dist/collection/components/llm-test-runner/test-cases/evaluation/evaluation-summary.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"evaluation-summary.js","sourceRoot":"","sources":["../../../../../src/components/llm-test-runner/test-cases/evaluation/evaluation-summary.tsx"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAuB,MAAM,eAAe,CAAC;AAQvD,MAAM,CAAC,MAAM,iBAAiB,GAAgD,CAAC,EAC7E,MAAM,EACN,SAAS,GACV,EAAE,EAAE;IACH,OAAO,CACL,WAAK,KAAK,EAAC,oBAAoB,IAC5B,MAAM,CAAC,CAAC,CAAC,CACR,WAAK,KAAK,EAAC,4BAA4B;~~QACrC~~,~~WACE~~,KAAK,EAAE,~~wEAAwE~~,~~MAAM~~,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ,EAAE,~~IAEnH~~,~~MAAM~~,CAAC,MAAM,CAAC,CAAC,CAAC,~~UAAU~~,CAAC,CAAC,CAAC,~~UAAU~~,CACpC;~~QACN~~,~~WAAK~~,KAAK,EAAC,~~6BAA6B~~;;~~YAC3B~~,~~MAAM~~,CAAC,cAAc,CAAC,MAAM,CAAC,~~CAAC~~,CAAC,EAAE,CAAC,~~CAAC~~,CAAC,KAAK,CAAC,CAAC,MAAM;;~~YAC3D~~,~~MAAM~~,CAAC,cAAc,CAAC,MAAM~~;qBACzB~~,CACF,CACP,CAAC,CAAC,CAAC,CACF,WAAK,KAAK,EAAC,iCAAiC,IACzC,SAAS,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,EAAE,CAC7B,CACP,CACG,CACP,CAAC;AACJ,CAAC,CAAC","sourcesContent":["import { h, FunctionalComponent } from '@stencil/core';\nimport { EvaluationResult } from '../../../../lib/evaluation/types';\n\nexport interface EvaluationSummaryProps {\n result?: EvaluationResult;\n isRunning: boolean;\n}\n\nexport const EvaluationSummary: FunctionalComponent<EvaluationSummaryProps> = ({\n result,\n isRunning,\n}) => {\n return (\n <div class=\"evaluation-summary\">\n {result ? (\n <div class=\"evaluation-summary__result\">\n <div\n class={`evaluation-~~summary__result~~-status evaluation-~~summary__result~~-status--${~~result~~.passed ? 'passed' : 'failed'}`}\n >\n {~~result~~.passed ? '✅ PASSED' : '❌ FAILED'}\n </~~div~~>\n <~~div~~ class=\"evaluation-~~summary__details~~\">\n ~~Keywords~~: {~~result~~.keywordMatches.filter(m => m.found).length}/\n {~~result~~.keywordMatches.length} ~~found~~\n </div>\n </div>\n ) : (\n <div class=\"evaluation-summary__placeholder\">\n {isRunning ? 'Evaluating...' : ''}\n </div>\n )}\n </div>\n );\n};\n"]}
1	+ {"version":3,"file":"evaluation-summary.js","sourceRoot":"","sources":["../../../../../src/components/llm-test-runner/test-cases/evaluation/evaluation-summary.tsx"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAuB,MAAM,eAAe,CAAC;AAQvD,MAAM,CAAC,MAAM,iBAAiB,GAAgD,CAAC,EAC7E,MAAM,EACN,SAAS,GACV,EAAE,EAAE;IACH,MAAM,YAAY,GAAG,MAAM,EAAE,YAAY,IAAI,EAAE,CAAC;IAChD,MAAM,eAAe,GAAG,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC;IAEhD,OAAO,CACL,WAAK,KAAK,EAAC,oBAAoB,IAC5B,MAAM,CAAC,CAAC,CAAC,CACR,WAAK,KAAK,EAAC,4BAA4B,IACpC,eAAe,CAAC,CAAC,CAAC,CACjB,WAAK,KAAK,EAAC,mCAAmC,IAC3C,YAAY,CAAC,GAAG,CAAC,WAAW,CAAC,EAAE,CAAC,CAC/B,WAAK,KAAK,EAAC,kCAAkC;QAC3C,WAAK,KAAK,EAAC,kCAAkC;YAC3C,YAAM,KAAK,EAAC,iCAAiC,IAC1C,WAAW,CAAC,KAAK,CACb;YACP,YAAM,KAAK,EAAC,oCAAoC;;gBACnC,WAAW,CAAC,oBAAoB,CAAC,QAAQ,CAC/C,CACH;QACN,WAAK,KAAK,EAAC,mCAAmC;YAC5C,YACE,KAAK,EAAE,sEAAsE,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ,EAAE,IAEtH,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ,CACpC;YACN,WAAW,CAAC,KAAK,IAAI,CACpB,YAAM,KAAK,EAAC,mCAAmC,IAC5C,WAAW,CAAC,KAAK,CACb,CACR;YACD;;gBACU,WAAW,CAAC,wBAAwB,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,CACxD;YACP;;gBACW,GAAG;gBACX,WAAW,CAAC,cAAc,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM;;gBAC9D,WAAW,CAAC,cAAc,CAAC,MAAM,CAC7B,CACH,CACF,CACP,CAAC,CACE,CACP,CAAC,CAAC,CAAC,IAAI,CACJ,CACP,CAAC,CAAC,CAAC,CACF,WAAK,KAAK,EAAC,iCAAiC,IACzC,SAAS,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,EAAE,CAC7B,CACP,CACG,CACP,CAAC;AACJ,CAAC,CAAC","sourcesContent":["import { h, FunctionalComponent } from '@stencil/core';\nimport { EvaluationResult } from '../../../../lib/evaluation/types';\n\nexport interface EvaluationSummaryProps {\n result?: EvaluationResult;\n isRunning: boolean;\n}\n\nexport const EvaluationSummary: FunctionalComponent<EvaluationSummaryProps> = ({\n result,\n isRunning,\n}) => {\n const fieldResults = result?.fieldResults \|\| [];\n const hasFieldResults = fieldResults.length > 0;\n\n return (\n <div class=\"evaluation-summary\">\n {result ? (\n <div class=\"evaluation-summary__result\">\n {hasFieldResults ? (\n <div class=\"evaluation-summary__field-results\">\n {fieldResults.map(fieldResult => (\n <div class=\"evaluation-summary__field-result\">\n <div class=\"evaluation-summary__field-header\">\n <span class=\"evaluation-summary__field-label\">\n {fieldResult.label}\n </span>\n <span class=\"evaluation-summary__field-approach\">\n Strategy: {fieldResult.evaluationParameters.approach}\n </span>\n </div>\n <div class=\"evaluation-summary__field-details\">\n <span\n class={`evaluation-summary__field-status evaluation-summary__field-status--${fieldResult.passed ? 'passed' : 'failed'}`}\n >\n {fieldResult.passed ? 'PASSED' : 'FAILED'}\n </span>\n {fieldResult.error && (\n <span class=\"evaluation-summary__error-message\">\n {fieldResult.error}\n </span>\n )}\n <span>\n Score: {fieldResult.evaluationApproachResult.score.toFixed(2)}\n </span>\n <span>\n Matches:{' '}\n {fieldResult.keywordMatches.filter(match => match.found).length}/\n {fieldResult.keywordMatches.length}\n </span>\n </div>\n </div>\n ))}\n </div>\n ) : null}\n </div>\n ) : (\n <div class=\"evaluation-summary__placeholder\">\n {isRunning ? 'Evaluating...' : ''}\n </div>\n )}\n </div>\n );\n};\n"]}

package/dist/collection/components/llm-test-runner/test-cases/expected-outcome-renderer.js CHANGED Viewed

@@ -1,9 +1,29 @@
 import { h } from "@stencil/core";
 import { FormFieldType } from "../../../lib/form/schema";
+import { EvaluationApproach, } from "../../../lib/evaluation/constants";
+import { getAllowedApproachesForFieldType } from "../../../lib/evaluation/field-evaluation-approach";
 export const ExpectedOutcomeRenderer = ({ testCaseId, fields, onExpectedOutcomeChange, }) => {
     const emit = (detail) => onExpectedOutcomeChange({
         detail,
     });
+    const buildEvaluationConfig = (index, optionList) => ({
+        name: `expectedOutcomeEvaluation-${index}`,
+        fieldType: FormFieldType.SELECT,
+        label: 'Evaluation Approach',
+        placeholder: 'Select evaluation approach…',
+        required: true,
+        optionList,
+        defaultValue: EvaluationApproach.EXACT,
+    });
+    const renderEvaluationSelector = (field, index) => {
+        const optionList = getAllowedApproachesForFieldType(field.type);
+        return (h("app-select", { config: buildEvaluationConfig(index, optionList), value: field.evaluationParameters?.approach, onValueChange: (e) => emit({
+                testCaseId,
+                index,
+                operation: 'set-evaluation-approach',
+                value: e.detail.value,
+            }) }));
+    };
     return (h("div", { class: "expected-outcome-renderer" }, (fields || []).map((field, index) => {
         if (field.type === 'textarea') {
             const config = {
@@ -11,15 +31,15 @@ export const ExpectedOutcomeRenderer = ({ testCaseId, fields, onExpectedOutcomeC
                 fieldType: FormFieldType.TEXT_AREA,
                 label: field.label,
                 placeholder: field.placeholder,
-                required: field.required,
+                required: true,
                 rows: field.rows || 2,
             };
-            return (h("app-textarea", { config: config, value: field.value, onValueChange: (e) => emit({
+            return (h("div", { class: "expected-outcome-renderer__group" }, h("app-textarea", { config: config, value: field.value, onValueChange: (e) => emit({
                     testCaseId,
                     index,
                     operation: 'set-value',
                     value: e.detail.value,
-                }) }));
+                }) }), renderEvaluationSelector(field, index)));
         }
         if (field.type === 'chips-input') {
             const config = {
@@ -27,9 +47,9 @@ export const ExpectedOutcomeRenderer = ({ testCaseId, fields, onExpectedOutcomeC
                 fieldType: FormFieldType.CHIPS,
                 label: field.label,
                 placeholder: field.placeholder,
-                required: field.required,
+                required: true,
             };
-            return (h("app-chips", { config: config, value: field.value, onAddChip: (e) => emit({
+            return (h("div", { class: "expected-outcome-renderer__group" }, h("app-chips", { config: config, value: field.value, onAddChip: (e) => emit({
                     testCaseId,
                     index,
                     operation: 'add-chip',
@@ -39,7 +59,7 @@ export const ExpectedOutcomeRenderer = ({ testCaseId, fields, onExpectedOutcomeC
                     index,
                     operation: 'remove-chip',
                     value: e.detail.value,
-                }) }));
+                }) }), renderEvaluationSelector(field, index)));
         }
         if (field.type === 'select') {
             const config = {
@@ -47,22 +67,22 @@ export const ExpectedOutcomeRenderer = ({ testCaseId, fields, onExpectedOutcomeC
                 fieldType: FormFieldType.SELECT,
                 label: field.label,
                 placeholder: field.placeholder,
-                required: field.required,
+                required: true,
                 optionList: field.options,
             };
-            return (h("app-select", { config: config, value: field.value, onValueChange: (e) => emit({
+            return (h("div", { class: "expected-outcome-renderer__group" }, h("app-select", { config: config, value: field.value, onValueChange: (e) => emit({
                     testCaseId,
                     index,
                     operation: 'set-value',
                     value: e.detail.value,
-                }) }));
+                }) }), renderEvaluationSelector(field, index)));
         }
-        return (h("div", { class: "expected-outcome-renderer__text" }, h("label", null, field.label), h("input", { type: "text", value: field.value, placeholder: field.placeholder, onInput: (e) => emit({
+        return (h("div", { class: "expected-outcome-renderer__group" }, h("div", { class: "expected-outcome-renderer__text" }, h("label", null, field.label), h("input", { type: "text", value: field.value, placeholder: field.placeholder, onInput: (e) => emit({
                 testCaseId,
                 index,
                 operation: 'set-value',
                 value: e.target.value,
-            }) })));
+            }) })), renderEvaluationSelector(field, index)));
     })));
 };
 //# sourceMappingURL=expected-outcome-renderer.js.map

package/dist/collection/components/llm-test-runner/test-cases/expected-outcome-renderer.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"expected-outcome-renderer.js","sourceRoot":"","sources":["../../../../src/components/llm-test-runner/test-cases/expected-outcome-renderer.tsx"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAuB,MAAM,eAAe,CAAC;AAIvD,OAAO,EAAe,aAAa,EAAgC,MAAM,0BAA0B,CAAC;~~AAsBpG~~,MAAM,CAAC,MAAM,uBAAuB,GAAsD,CAAC,EACzF,UAAU,EACV,MAAM,EACN,uBAAuB,GACxB,EAAE,EAAE;IACH,MAAM,IAAI,GAAG,CAAC,MAAmC,EAAE,EAAE,CACnD,uBAAuB,CAAC;QACtB,MAAM;KACqC,CAAC,CAAC;IAEjD,OAAO,CACL,WAAK,KAAK,EAAC,2BAA2B,IACnC,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,KAAK,EAAE,EAAE;QACnC,IAAI,KAAK,CAAC,IAAI,KAAK,UAAU,EAAE,CAAC;YAC9B,MAAM,MAAM,GAAmB;gBAC7B,IAAI,EAAE,mBAAmB,KAAK,EAAE;gBAChC,SAAS,EAAE,aAAa,CAAC,SAAS;gBAClC,KAAK,EAAE,KAAK,CAAC,KAAK;gBAClB,WAAW,EAAE,KAAK,CAAC,WAAW;gBAC9B,QAAQ,EAAE,~~KAAK,CAAC,QAAQ~~;~~gBACxB~~,IAAI,EAAE,KAAK,CAAC,IAAI,IAAI,CAAC;aACtB,CAAC;YACF,OAAO,CACL,oBACE,MAAM,EAAE,MAAM,EACd,KAAK,EAAE,KAAK,CAAC,KAAK,EAClB,aAAa,EAAE,CAAC,CAAC,EAAE,EAAE,CACnB,IAAI,CAAC;~~oBACH~~,UAAU;~~oBACV~~,KAAK;~~oBACL~~,SAAS,EAAE,WAAW;~~oBACtB~~,KAAK,EAAE,CAAC,CAAC,MAAM,CAAC,KAAK;~~iBACtB~~,CAAC,GAEJ,~~CACH~~,CAAC;QACJ,CAAC;QAED,IAAI,KAAK,CAAC,IAAI,KAAK,aAAa,EAAE,CAAC;YACjC,MAAM,MAAM,GAAgB;gBAC1B,IAAI,EAAE,mBAAmB,KAAK,EAAE;gBAChC,SAAS,EAAE,aAAa,CAAC,KAAK;gBAC9B,KAAK,EAAE,KAAK,CAAC,KAAK;gBAClB,WAAW,EAAE,KAAK,CAAC,WAAW;gBAC9B,QAAQ,EAAE,~~KAAK,CAAC,QAAQ~~;~~aACzB~~,CAAC;YAEF,OAAO,CACL,iBACE,MAAM,EAAE,MAAM,EACd,KAAK,EAAE,KAAK,CAAC,KAAK,EAClB,SAAS,EAAE,CAAC,CAAC,EAAE,EAAE,CACf,IAAI,CAAC;~~oBACH~~,UAAU;~~oBACV~~,KAAK;~~oBACL~~,SAAS,EAAE,UAAU;~~oBACrB~~,KAAK,EAAE,CAAC,CAAC,MAAM,CAAC,KAAK;~~iBACtB~~,CAAC,EAEJ,YAAY,EAAE,CAAC,CAAC,EAAE,EAAE,CAClB,IAAI,CAAC;~~oBACH~~,UAAU;~~oBACV~~,KAAK;~~oBACL~~,SAAS,EAAE,aAAa;~~oBACxB~~,KAAK,EAAE,CAAC,CAAC,MAAM,CAAC,KAAK;~~iBACtB~~,CAAC,GAEJ,~~CACH~~,CAAC;QACJ,CAAC;QAED,IAAI,KAAK,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;YAC5B,MAAM,MAAM,GAAiB;gBAC3B,IAAI,EAAE,mBAAmB,KAAK,EAAE;gBAChC,SAAS,EAAE,aAAa,CAAC,MAAM;gBAC/B,KAAK,EAAE,KAAK,CAAC,KAAK;gBAClB,WAAW,EAAE,KAAK,CAAC,WAAW;gBAC9B,QAAQ,EAAE,~~KAAK,CAAC,QAAQ~~;~~gBACxB~~,UAAU,EAAE,KAAK,CAAC,OAAO;aAC1B,CAAC;YAEF,OAAO,CACL,kBACE,MAAM,EAAE,MAAM,EACd,KAAK,EAAE,KAAK,CAAC,KAAK,EAClB,aAAa,EAAE,CAAC,CAAC,EAAE,EAAE,CACnB,IAAI,CAAC;~~oBACH~~,UAAU;~~oBACV~~,KAAK;~~oBACL~~,SAAS,EAAE,WAAW;~~oBACtB~~,KAAK,EAAE,CAAC,CAAC,MAAM,CAAC,KAAK;~~iBACtB~~,CAAC,GAEJ,~~CACH~~,CAAC;QACJ,CAAC;QAED,OAAO,CACL,WAAK,KAAK,EAAC,iCAAiC;~~YAC1C~~,iBAAQ,KAAK,CAAC,KAAK,CAAS;~~YAC5B~~,aACE,IAAI,EAAC,MAAM,EACX,KAAK,EAAE,KAAK,CAAC,KAAK,EAClB,WAAW,EAAE,KAAK,CAAC,WAAW,EAC9B,OAAO,EAAE,CAAC,CAAC,EAAE,EAAE,CACb,IAAI,CAAC;~~oBACH~~,UAAU;~~oBACV~~,KAAK;~~oBACL~~,SAAS,EAAE,WAAW;~~oBACtB~~,KAAK,EAAG,CAAC,CAAC,MAA2B,CAAC,KAAK;~~iBAC5C~~,CAAC,GAEJ,CACE,CACP,CAAC;IACJ,CAAC,CAAC,CACE,CACP,CAAC;AACJ,CAAC,CAAC","sourcesContent":["import { h, FunctionalComponent } from '@stencil/core';\nimport {\n ExpectedOutcomeField,\n} from '../../../types/llm-test-runner';\nimport { ChipsConfig, FormFieldType, SelectConfig, TextAreaConfig } from '../../../lib/form/schema';\n\~~nexport type ExpectedOutcomeOperation =\~~n \| '~~set-value~~'~~\n \|~~ '~~add~~-~~chip~~'~~\n \|~~ '~~remove~~-~~chip~~';\n\nexport ~~interface~~ ExpectedOutcomeChangeDetail {\n testCaseId: string;\n ~~index:~~ ~~number;\n operation:~~ ~~ExpectedOutcomeOperation~~;\n ~~value?: string;\n}~~\n\ninterface ExpectedOutcomeRendererProps {\n testCaseId: string;\n fields: ExpectedOutcomeField[];\n onExpectedOutcomeChange: (\n e: CustomEvent<ExpectedOutcomeChangeDetail>,\n ) => void;\n}\n\nexport const ExpectedOutcomeRenderer: FunctionalComponent<ExpectedOutcomeRendererProps> = ({\n testCaseId,\n fields,\n onExpectedOutcomeChange,\n}) => {\n const emit = (detail: ExpectedOutcomeChangeDetail) =>\n onExpectedOutcomeChange({\n detail,\n } as CustomEvent<ExpectedOutcomeChangeDetail>);\n\n return (\n <div class=\"expected-outcome-renderer\">\n {(fields \|\| []).map((field, index) => {\n if (field.type === 'textarea') {\n const config: TextAreaConfig = {\n name: `expectedOutcome-${index}`,\n fieldType: FormFieldType.TEXT_AREA,\n label: field.label,\n placeholder: field.placeholder,\n required: ~~field.required~~,\n rows: field.rows \|\| 2,\n };\n return (\n <app-textarea\n config={config}\n value={field.value}\n onValueChange={(e) =>\n emit({\n testCaseId,\n index,\n operation: 'set-value',\n value: e.detail.value,\n })\n }\n ~~/>\~~n );\n }\n\n if (field.type === 'chips-input') {\n const config: ChipsConfig = {\n name: `expectedOutcome-${index}`,\n fieldType: FormFieldType.CHIPS,\n label: field.label,\n placeholder: field.placeholder,\n required: ~~field.required~~,\n };\n\n return (\n <app-chips\n config={config}\n value={field.value}\n onAddChip={(e) =>\n emit({\n testCaseId,\n index,\n operation: 'add-chip',\n value: e.detail.value,\n })\n }\n onRemoveChip={(e) =>\n emit({\n testCaseId,\n index,\n operation: 'remove-chip',\n value: e.detail.value,\n })\n }\n ~~/>\~~n );\n }\n\n if (field.type === 'select') {\n const config: SelectConfig = {\n name: `expectedOutcome-${index}`,\n fieldType: FormFieldType.SELECT,\n label: field.label,\n placeholder: field.placeholder,\n required: ~~field.required~~,\n optionList: field.options,\n };\n\n return (\n <app-select\n config={config}\n value={field.value}\n onValueChange={(e) =>\n emit({\n testCaseId,\n index,\n operation: 'set-value',\n value: e.detail.value,\n })\n }\n ~~/>\~~n );\n }\n\n return (\n <div class=\"expected-outcome-~~renderer__text~~\">\n <label>{field.label}</label>\n <input\n type=\"text\"\n value={field.value}\n placeholder={field.placeholder}\n onInput={(e) =>\n emit({\n testCaseId,\n index,\n operation: 'set-value',\n value: (e.target as HTMLInputElement).value,\n })\n }\n />\n </div>\n );\n })}\n </div>\n );\n};\n"]}
1	+ {"version":3,"file":"expected-outcome-renderer.js","sourceRoot":"","sources":["../../../../src/components/llm-test-runner/test-cases/expected-outcome-renderer.tsx"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAuB,MAAM,eAAe,CAAC;AAIvD,OAAO,EAAe,aAAa,EAAgC,MAAM,0BAA0B,CAAC;AACpG,OAAO,EACL,kBAAkB,GACnB,MAAM,mCAAmC,CAAC;AAC3C,OAAO,EAAE,gCAAgC,EAAE,MAAM,mDAAmD,CAAC;AAerG,MAAM,CAAC,MAAM,uBAAuB,GAAsD,CAAC,EACzF,UAAU,EACV,MAAM,EACN,uBAAuB,GACxB,EAAE,EAAE;IACH,MAAM,IAAI,GAAG,CAAC,MAAmC,EAAE,EAAE,CACnD,uBAAuB,CAAC;QACtB,MAAM;KACqC,CAAC,CAAC;IAEjD,MAAM,qBAAqB,GAAG,CAC5B,KAAa,EACb,UAAoB,EACN,EAAE,CAAC,CAAC;QAClB,IAAI,EAAE,6BAA6B,KAAK,EAAE;QAC1C,SAAS,EAAE,aAAa,CAAC,MAAM;QAC/B,KAAK,EAAE,qBAAqB;QAC5B,WAAW,EAAE,6BAA6B;QAC1C,QAAQ,EAAE,IAAI;QACd,UAAU;QACV,YAAY,EAAE,kBAAkB,CAAC,KAAK;KACvC,CAAC,CAAC;IAEH,MAAM,wBAAwB,GAAG,CAC/B,KAA2B,EAC3B,KAAa,EACb,EAAE;QACF,MAAM,UAAU,GAAG,gCAAgC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAEhE,OAAO,CACL,kBACE,MAAM,EAAE,qBAAqB,CAAC,KAAK,EAAE,UAAU,CAAC,EAChD,KAAK,EAAE,KAAK,CAAC,oBAAoB,EAAE,QAAQ,EAC3C,aAAa,EAAE,CAAC,CAAC,EAAE,EAAE,CACnB,IAAI,CAAC;gBACH,UAAU;gBACV,KAAK;gBACL,SAAS,EAAE,yBAAyB;gBACpC,KAAK,EAAE,CAAC,CAAC,MAAM,CAAC,KAA2B;aAC5C,CAAC,GAEJ,CACH,CAAC;IACJ,CAAC,CAAC;IAEF,OAAO,CACL,WAAK,KAAK,EAAC,2BAA2B,IACnC,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,KAAK,EAAE,EAAE;QACnC,IAAI,KAAK,CAAC,IAAI,KAAK,UAAU,EAAE,CAAC;YAC9B,MAAM,MAAM,GAAmB;gBAC7B,IAAI,EAAE,mBAAmB,KAAK,EAAE;gBAChC,SAAS,EAAE,aAAa,CAAC,SAAS;gBAClC,KAAK,EAAE,KAAK,CAAC,KAAK;gBAClB,WAAW,EAAE,KAAK,CAAC,WAAW;gBAC9B,QAAQ,EAAE,IAAI;gBACd,IAAI,EAAE,KAAK,CAAC,IAAI,IAAI,CAAC;aACtB,CAAC;YACF,OAAO,CACL,WAAK,KAAK,EAAC,kCAAkC;gBAC3C,oBACE,MAAM,EAAE,MAAM,EACd,KAAK,EAAE,KAAK,CAAC,KAAK,EAClB,aAAa,EAAE,CAAC,CAAC,EAAE,EAAE,CACnB,IAAI,CAAC;wBACH,UAAU;wBACV,KAAK;wBACL,SAAS,EAAE,WAAW;wBACtB,KAAK,EAAE,CAAC,CAAC,MAAM,CAAC,KAAK;qBACtB,CAAC,GAEJ;gBACD,wBAAwB,CAAC,KAAK,EAAE,KAAK,CAAC,CACnC,CACP,CAAC;QACJ,CAAC;QAED,IAAI,KAAK,CAAC,IAAI,KAAK,aAAa,EAAE,CAAC;YACjC,MAAM,MAAM,GAAgB;gBAC1B,IAAI,EAAE,mBAAmB,KAAK,EAAE;gBAChC,SAAS,EAAE,aAAa,CAAC,KAAK;gBAC9B,KAAK,EAAE,KAAK,CAAC,KAAK;gBAClB,WAAW,EAAE,KAAK,CAAC,WAAW;gBAC9B,QAAQ,EAAE,IAAI;aACf,CAAC;YAEF,OAAO,CACL,WAAK,KAAK,EAAC,kCAAkC;gBAC3C,iBACE,MAAM,EAAE,MAAM,EACd,KAAK,EAAE,KAAK,CAAC,KAAK,EAClB,SAAS,EAAE,CAAC,CAAC,EAAE,EAAE,CACf,IAAI,CAAC;wBACH,UAAU;wBACV,KAAK;wBACL,SAAS,EAAE,UAAU;wBACrB,KAAK,EAAE,CAAC,CAAC,MAAM,CAAC,KAAK;qBACtB,CAAC,EAEJ,YAAY,EAAE,CAAC,CAAC,EAAE,EAAE,CAClB,IAAI,CAAC;wBACH,UAAU;wBACV,KAAK;wBACL,SAAS,EAAE,aAAa;wBACxB,KAAK,EAAE,CAAC,CAAC,MAAM,CAAC,KAAK;qBACtB,CAAC,GAEJ;gBACD,wBAAwB,CAAC,KAAK,EAAE,KAAK,CAAC,CACnC,CACP,CAAC;QACJ,CAAC;QAED,IAAI,KAAK,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;YAC5B,MAAM,MAAM,GAAiB;gBAC3B,IAAI,EAAE,mBAAmB,KAAK,EAAE;gBAChC,SAAS,EAAE,aAAa,CAAC,MAAM;gBAC/B,KAAK,EAAE,KAAK,CAAC,KAAK;gBAClB,WAAW,EAAE,KAAK,CAAC,WAAW;gBAC9B,QAAQ,EAAE,IAAI;gBACd,UAAU,EAAE,KAAK,CAAC,OAAO;aAC1B,CAAC;YAEF,OAAO,CACL,WAAK,KAAK,EAAC,kCAAkC;gBAC3C,kBACE,MAAM,EAAE,MAAM,EACd,KAAK,EAAE,KAAK,CAAC,KAAK,EAClB,aAAa,EAAE,CAAC,CAAC,EAAE,EAAE,CACnB,IAAI,CAAC;wBACH,UAAU;wBACV,KAAK;wBACL,SAAS,EAAE,WAAW;wBACtB,KAAK,EAAE,CAAC,CAAC,MAAM,CAAC,KAAK;qBACtB,CAAC,GAEJ;gBACD,wBAAwB,CAAC,KAAK,EAAE,KAAK,CAAC,CACnC,CACP,CAAC;QACJ,CAAC;QAED,OAAO,CACL,WAAK,KAAK,EAAC,kCAAkC;YAC3C,WAAK,KAAK,EAAC,iCAAiC;gBAC1C,iBAAQ,KAAK,CAAC,KAAK,CAAS;gBAC5B,aACE,IAAI,EAAC,MAAM,EACX,KAAK,EAAE,KAAK,CAAC,KAAK,EAClB,WAAW,EAAE,KAAK,CAAC,WAAW,EAC9B,OAAO,EAAE,CAAC,CAAC,EAAE,EAAE,CACb,IAAI,CAAC;wBACH,UAAU;wBACV,KAAK;wBACL,SAAS,EAAE,WAAW;wBACtB,KAAK,EAAG,CAAC,CAAC,MAA2B,CAAC,KAAK;qBAC5C,CAAC,GAEJ,CACE;YACL,wBAAwB,CAAC,KAAK,EAAE,KAAK,CAAC,CACnC,CACP,CAAC;IACJ,CAAC,CAAC,CACE,CACP,CAAC;AACJ,CAAC,CAAC","sourcesContent":["import { h, FunctionalComponent } from '@stencil/core';\nimport {\n ExpectedOutcomeField,\n} from '../../../types/llm-test-runner';\nimport { ChipsConfig, FormFieldType, SelectConfig, TextAreaConfig } from '../../../lib/form/schema';\nimport {\n EvaluationApproach,\n} from '../../../lib/evaluation/constants';\nimport { getAllowedApproachesForFieldType } from '../../../lib/evaluation/field-evaluation-approach';\nimport { ExpectedOutcomeChange } from '../../../lib/test-cases/test-case-mutations';\n\nexport type ExpectedOutcomeChangeDetail = {\n testCaseId: string;\n} & ExpectedOutcomeChange;\n\ninterface ExpectedOutcomeRendererProps {\n testCaseId: string;\n fields: ExpectedOutcomeField[];\n onExpectedOutcomeChange: (\n e: CustomEvent<ExpectedOutcomeChangeDetail>,\n ) => void;\n}\n\nexport const ExpectedOutcomeRenderer: FunctionalComponent<ExpectedOutcomeRendererProps> = ({\n testCaseId,\n fields,\n onExpectedOutcomeChange,\n}) => {\n const emit = (detail: ExpectedOutcomeChangeDetail) =>\n onExpectedOutcomeChange({\n detail,\n } as CustomEvent<ExpectedOutcomeChangeDetail>);\n\n const buildEvaluationConfig = (\n index: number,\n optionList: string[],\n ): SelectConfig => ({\n name: `expectedOutcomeEvaluation-${index}`,\n fieldType: FormFieldType.SELECT,\n label: 'Evaluation Approach',\n placeholder: 'Select evaluation approach…',\n required: true,\n optionList,\n defaultValue: EvaluationApproach.EXACT,\n });\n\n const renderEvaluationSelector = (\n field: ExpectedOutcomeField,\n index: number,\n ) => {\n const optionList = getAllowedApproachesForFieldType(field.type);\n\n return (\n <app-select\n config={buildEvaluationConfig(index, optionList)}\n value={field.evaluationParameters?.approach}\n onValueChange={(e) =>\n emit({\n testCaseId,\n index,\n operation: 'set-evaluation-approach',\n value: e.detail.value as EvaluationApproach,\n })\n }\n />\n );\n };\n\n return (\n <div class=\"expected-outcome-renderer\">\n {(fields \|\| []).map((field, index) => {\n if (field.type === 'textarea') {\n const config: TextAreaConfig = {\n name: `expectedOutcome-${index}`,\n fieldType: FormFieldType.TEXT_AREA,\n label: field.label,\n placeholder: field.placeholder,\n required: true,\n rows: field.rows \|\| 2,\n };\n return (\n <div class=\"expected-outcome-renderer__group\">\n <app-textarea\n config={config}\n value={field.value}\n onValueChange={(e) =>\n emit({\n testCaseId,\n index,\n operation: 'set-value',\n value: e.detail.value,\n })\n }\n />\n {renderEvaluationSelector(field, index)}\n </div>\n );\n }\n\n if (field.type === 'chips-input') {\n const config: ChipsConfig = {\n name: `expectedOutcome-${index}`,\n fieldType: FormFieldType.CHIPS,\n label: field.label,\n placeholder: field.placeholder,\n required: true,\n };\n\n return (\n <div class=\"expected-outcome-renderer__group\">\n <app-chips\n config={config}\n value={field.value}\n onAddChip={(e) =>\n emit({\n testCaseId,\n index,\n operation: 'add-chip',\n value: e.detail.value,\n })\n }\n onRemoveChip={(e) =>\n emit({\n testCaseId,\n index,\n operation: 'remove-chip',\n value: e.detail.value,\n })\n }\n />\n {renderEvaluationSelector(field, index)}\n </div>\n );\n }\n\n if (field.type === 'select') {\n const config: SelectConfig = {\n name: `expectedOutcome-${index}`,\n fieldType: FormFieldType.SELECT,\n label: field.label,\n placeholder: field.placeholder,\n required: true,\n optionList: field.options,\n };\n\n return (\n <div class=\"expected-outcome-renderer__group\">\n <app-select\n config={config}\n value={field.value}\n onValueChange={(e) =>\n emit({\n testCaseId,\n index,\n operation: 'set-value',\n value: e.detail.value,\n })\n }\n />\n {renderEvaluationSelector(field, index)}\n </div>\n );\n }\n\n return (\n <div class=\"expected-outcome-renderer__group\">\n <div class=\"expected-outcome-renderer__text\">\n <label>{field.label}</label>\n <input\n type=\"text\"\n value={field.value}\n placeholder={field.placeholder}\n onInput={(e) =>\n emit({\n testCaseId,\n index,\n operation: 'set-value',\n value: (e.target as HTMLInputElement).value,\n })\n }\n />\n </div>\n {renderEvaluationSelector(field, index)}\n </div>\n );\n })}\n </div>\n );\n};\n"]}

package/dist/collection/components/llm-test-runner/test-cases/llm-test-case-row.css CHANGED Viewed

@@ -18,6 +18,23 @@
   border-right: var(--border-width) solid var(--border);
 }
+.expected-outcome-renderer {
+  display: flex;
+  flex-direction: column;
+  gap: var(--spacing-4);
+  margin-top: var(--spacing-4);
+}
+.expected-outcome-renderer__group {
+  display: flex;
+  flex-direction: column;
+  gap: var(--spacing-2);
+  padding: var(--spacing-3);
+  border: var(--border-width) solid var(--border);
+  border-radius: var(--radius-md);
+  background: var(--background);
+}
 /* Responsive Design */
 @media (max-width: 1200px) {
   .test-case-row {

package/dist/collection/components/llm-test-runner/test-cases/llm-test-case-row.js CHANGED Viewed

@@ -1,11 +1,10 @@
 import { h } from "@stencil/core";
-import { EvaluationApproach, EvaluationApproachValues, } from "../../../lib/evaluation/constants";
 import { ResponseOutput } from "./output/response-output";
 import { EvaluationSummary } from "./evaluation/evaluation-summary";
 import { RowActions } from "./actions/row-actions";
 import { FormFieldType } from "../../../lib/form/schema";
 import { ExpectedOutcomeRenderer, } from "./expected-outcome-renderer";
-export const LLMTestCaseRow = ({ testCase, onRun, onDelete, onUpdateApproach, handleTestCaseChange, onExpectedOutcomeChange, }) => {
+export const LLMTestCaseRow = ({ testCase, onRun, onDelete, handleTestCaseChange, onExpectedOutcomeChange, }) => {
     const questionConfig = {
         name: 'question',
         fieldType: FormFieldType.TEXT_AREA,
@@ -15,21 +14,12 @@ export const LLMTestCaseRow = ({ testCase, onRun, onDelete, onUpdateApproach, ha
         required: true,
         rows: 3,
     };
-    const evaluationConfig = {
-        name: 'EvaluationApproach',
-        fieldType: FormFieldType.SELECT,
-        label: 'Evaluation',
-        placeholder: 'Select evaluation approach…',
-        required: true,
-        optionList: EvaluationApproachValues,
-        defaultValue: EvaluationApproach.EXACT,
-    };
     return (h("div", { class: "test-case-row", key: testCase.id }, h("div", { class: "test-case-row__input-column" }, h("app-textarea", { config: questionConfig, value: testCase.question, onValueChange: (e) => handleTestCaseChange({
             detail: {
                 testCaseId: testCase.id,
                 key: 'question',
                 value: e.detail.value,
             },
-        }) }), h(ExpectedOutcomeRenderer, { testCaseId: testCase.id, fields: testCase.expectedOutcome || [], onExpectedOutcomeChange: onExpectedOutcomeChange }), h("app-select", { config: evaluationConfig, value: testCase.evaluationParameters?.approach, onValueChange: (e) => onUpdateApproach(testCase, e.detail.value) })), h(ResponseOutput, { output: testCase.output, isRunning: testCase.isRunning }), h(EvaluationSummary, { result: testCase.evaluationResult, isRunning: testCase.isRunning }), h(RowActions, { isRunning: testCase.isRunning, canRun: !!testCase.question.trim(), onRun: () => onRun(testCase), onDelete: () => onDelete(testCase.id) })));
+        }) }), h(ExpectedOutcomeRenderer, { testCaseId: testCase.id, fields: testCase.expectedOutcome || [], onExpectedOutcomeChange: onExpectedOutcomeChange })), h(ResponseOutput, { output: testCase.output, isRunning: testCase.isRunning }), h(EvaluationSummary, { result: testCase.evaluationResult, isRunning: testCase.isRunning }), h(RowActions, { isRunning: testCase.isRunning, canRun: !!testCase.question.trim(), onRun: () => onRun(testCase), onDelete: () => onDelete(testCase.id) })));
 };
 //# sourceMappingURL=llm-test-case-row.js.map

package/dist/collection/components/llm-test-runner/test-cases/llm-test-case-row.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"llm-test-case-row.js","sourceRoot":"","sources":["../../../../src/components/llm-test-runner/test-cases/llm-test-case-row.tsx"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAuB,MAAM,eAAe,CAAC;AAEvD,OAAO,~~EACL,kBAAkB,EAClB,wBAAwB,GACzB,MAAM,mCAAmC,CAAC;AAC3C,OAAO,~~EAAE,cAAc,EAAE,MAAM,0BAA0B,CAAC;AAC1D,OAAO,EAAE,iBAAiB,EAAE,MAAM,iCAAiC,CAAC;AACpE,OAAO,EAAE,UAAU,EAAE,MAAM,uBAAuB,CAAC;AACnD,OAAO,EAAE,aAAa,~~EAAgC~~,MAAM,0BAA0B,CAAC;~~AACvF~~,OAAO,EAEL,uBAAuB,GACxB,MAAM,6BAA6B,CAAC;~~AAerC~~,MAAM,CAAC,MAAM,cAAc,GAA6C,CAAC,EACvE,QAAQ,EACR,KAAK,EACL,QAAQ,EACR,~~gBAAgB,EAChB,~~oBAAoB,EACpB,uBAAuB,GACxB,EAAE,EAAE;IACH,MAAM,cAAc,GAAmB;QACrC,IAAI,EAAE,UAAU;QAChB,SAAS,EAAE,aAAa,CAAC,SAAS;QAClC,IAAI,EAAE,MAAM;QACZ,KAAK,EAAE,UAAU;QACjB,WAAW,EAAE,6BAA6B;QAC1C,QAAQ,EAAE,IAAI;QACd,IAAI,EAAE,CAAC;KACR,CAAC;IACF,MAAM,gBAAgB,GAAiB;QACrC,IAAI,EAAE,oBAAoB;QAC1B,SAAS,EAAE,aAAa,CAAC,MAAM;QAC/B,KAAK,EAAE,YAAY;QACnB,WAAW,EAAE,6BAA6B;QAC1C,QAAQ,EAAE,IAAI;QACd,UAAU,EAAE,wBAAwB;QACpC,YAAY,EAAE,kBAAkB,CAAC,KAAK;KACvC,CAAC;IAEF,OAAO,CACL,WAAK,KAAK,EAAC,eAAe,EAAC,GAAG,EAAE,QAAQ,CAAC,EAAE;QACzC,WAAK,KAAK,EAAC,6BAA6B;YACtC,oBACE,MAAM,EAAE,cAAc,EACtB,KAAK,EAAE,QAAQ,CAAC,QAAQ,EACxB,aAAa,EAAE,CAAC,CAAC,EAAE,EAAE,CACnB,oBAAoB,CAAC;oBACnB,MAAM,EAAE;wBACN,UAAU,EAAE,QAAQ,CAAC,EAAE;wBACvB,GAAG,EAAE,UAAU;wBACf,KAAK,EAAE,CAAC,CAAC,MAAM,CAAC,KAAK;qBACtB;iBACiE,CAAC,GAEvE;YACF,EAAC,uBAAuB,IACtB,UAAU,EAAE,QAAQ,CAAC,EAAE,EACvB,MAAM,EAAE,QAAQ,CAAC,eAAe,IAAI,EAAE,EACtC,uBAAuB,EAAE,uBAAuB,GAChD~~;YACF~~,~~kBACE,MAAM,EAAE,gBAAgB,EACxB,KAAK,EAAE,QAAQ,CAAC,oBAAoB,EAAE,QAAQ,EAC9C,aAAa,EAAE,CAAC,CAAC,EAAE,EAAE,CACnB,gBAAgB,CAAC,QAAQ,EAAE,CAAC,CAAC,MAAM,CAAC,KAA2B,CAAC,GAElE,~~CACE;QAEN,EAAC,cAAc,IAAC,MAAM,EAAE,QAAQ,CAAC,MAAM,EAAE,SAAS,EAAE,QAAQ,CAAC,SAAS,GAAI;QAE1E,EAAC,iBAAiB,IAChB,MAAM,EAAE,QAAQ,CAAC,gBAAgB,EACjC,SAAS,EAAE,QAAQ,CAAC,SAAS,GAC7B;QAEF,EAAC,UAAU,IACT,SAAS,EAAE,QAAQ,CAAC,SAAS,EAC7B,MAAM,EAAE,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,EAAE,EAClC,KAAK,EAAE,GAAG,EAAE,CAAC,KAAK,CAAC,QAAQ,CAAC,EAC5B,QAAQ,EAAE,GAAG,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC,GACrC,CACE,CACP,CAAC;AACJ,CAAC,CAAC","sourcesContent":["import { h, FunctionalComponent } from '@stencil/core';\nimport { TestCase } from '../../../types/llm-test-runner';\nimport {~~\n EvaluationApproach,\n EvaluationApproachValues,\n}~~ ~~from '../../../lib/evaluation/constants';\nimport {~~ ResponseOutput } from './output/response-output';\nimport { EvaluationSummary } from './evaluation/evaluation-summary';\nimport { RowActions } from './actions/row-actions';\nimport { FormFieldType, ~~SelectConfig,~~ TextAreaConfig } from '../../../lib/form/schema';\nimport {\n ExpectedOutcomeChangeDetail,\n ExpectedOutcomeRenderer,\n} from './expected-outcome-renderer';\n\nexport interface LLMTestCaseRowProps {\n testCase: TestCase;\n onRun: (testCase: TestCase) => void;\n onDelete: (id: string) => void;\n ~~onUpdateApproach: (testCase: TestCase, approach: EvaluationApproach) => void;\n~~ handleTestCaseChange: (\n e: CustomEvent<{ testCaseId: string; key: string; value: string }>,\n ) => void;\n onExpectedOutcomeChange: (\n e: CustomEvent<ExpectedOutcomeChangeDetail>,\n ) => void;\n}\n\nexport const LLMTestCaseRow: FunctionalComponent<LLMTestCaseRowProps> = ({\n testCase,\n onRun,\n onDelete,\n ~~onUpdateApproach,\n~~ handleTestCaseChange,\n onExpectedOutcomeChange,\n}) => {\n const questionConfig: TextAreaConfig = {\n name: 'question',\n fieldType: FormFieldType.TEXT_AREA,\n type: 'text',\n label: 'Question',\n placeholder: 'Enter your question here...',\n required: true,\n rows: 3,\n };\n const evaluationConfig: SelectConfig = {\n name: 'EvaluationApproach',\n fieldType: FormFieldType.SELECT,\n label: 'Evaluation',\n placeholder: 'Select evaluation approach…',\n required: true,\n optionList: EvaluationApproachValues,\n defaultValue: EvaluationApproach.EXACT,\n };\n\n return (\n <div class=\"test-case-row\" key={testCase.id}>\n <div class=\"test-case-row__input-column\">\n <app-textarea\n config={questionConfig}\n value={testCase.question}\n onValueChange={(e) =>\n handleTestCaseChange({\n detail: {\n testCaseId: testCase.id,\n key: 'question',\n value: e.detail.value,\n },\n } as CustomEvent<{ testCaseId: string; key: string; value: string }>)\n }\n />\n <ExpectedOutcomeRenderer\n testCaseId={testCase.id}\n fields={testCase.expectedOutcome \|\| []}\n onExpectedOutcomeChange={onExpectedOutcomeChange}\n />\n <app-select\n config={evaluationConfig}\n value={testCase.evaluationParameters?.approach}\n onValueChange={(e) =>\n onUpdateApproach(testCase, e.detail.value as EvaluationApproach)\n }\n />\n </div>\n\n <ResponseOutput output={testCase.output} isRunning={testCase.isRunning} />\n\n <EvaluationSummary\n result={testCase.evaluationResult}\n isRunning={testCase.isRunning}\n />\n\n <RowActions\n isRunning={testCase.isRunning}\n canRun={!!testCase.question.trim()}\n onRun={() => onRun(testCase)}\n onDelete={() => onDelete(testCase.id)}\n />\n </div>\n );\n};\n"]}
1	+ {"version":3,"file":"llm-test-case-row.js","sourceRoot":"","sources":["../../../../src/components/llm-test-runner/test-cases/llm-test-case-row.tsx"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAuB,MAAM,eAAe,CAAC;AAEvD,OAAO,EAAE,cAAc,EAAE,MAAM,0BAA0B,CAAC;AAC1D,OAAO,EAAE,iBAAiB,EAAE,MAAM,iCAAiC,CAAC;AACpE,OAAO,EAAE,UAAU,EAAE,MAAM,uBAAuB,CAAC;AACnD,OAAO,EAAE,aAAa,EAAkB,MAAM,0BAA0B,CAAC;AACzE,OAAO,EAEL,uBAAuB,GACxB,MAAM,6BAA6B,CAAC;AAcrC,MAAM,CAAC,MAAM,cAAc,GAA6C,CAAC,EACvE,QAAQ,EACR,KAAK,EACL,QAAQ,EACR,oBAAoB,EACpB,uBAAuB,GACxB,EAAE,EAAE;IACH,MAAM,cAAc,GAAmB;QACrC,IAAI,EAAE,UAAU;QAChB,SAAS,EAAE,aAAa,CAAC,SAAS;QAClC,IAAI,EAAE,MAAM;QACZ,KAAK,EAAE,UAAU;QACjB,WAAW,EAAE,6BAA6B;QAC1C,QAAQ,EAAE,IAAI;QACd,IAAI,EAAE,CAAC;KACR,CAAC;IACF,OAAO,CACL,WAAK,KAAK,EAAC,eAAe,EAAC,GAAG,EAAE,QAAQ,CAAC,EAAE;QACzC,WAAK,KAAK,EAAC,6BAA6B;YACtC,oBACE,MAAM,EAAE,cAAc,EACtB,KAAK,EAAE,QAAQ,CAAC,QAAQ,EACxB,aAAa,EAAE,CAAC,CAAC,EAAE,EAAE,CACnB,oBAAoB,CAAC;oBACnB,MAAM,EAAE;wBACN,UAAU,EAAE,QAAQ,CAAC,EAAE;wBACvB,GAAG,EAAE,UAAU;wBACf,KAAK,EAAE,CAAC,CAAC,MAAM,CAAC,KAAK;qBACtB;iBACiE,CAAC,GAEvE;YACF,EAAC,uBAAuB,IACtB,UAAU,EAAE,QAAQ,CAAC,EAAE,EACvB,MAAM,EAAE,QAAQ,CAAC,eAAe,IAAI,EAAE,EACtC,uBAAuB,EAAE,uBAAuB,GAChD,CACE;QAEN,EAAC,cAAc,IAAC,MAAM,EAAE,QAAQ,CAAC,MAAM,EAAE,SAAS,EAAE,QAAQ,CAAC,SAAS,GAAI;QAE1E,EAAC,iBAAiB,IAChB,MAAM,EAAE,QAAQ,CAAC,gBAAgB,EACjC,SAAS,EAAE,QAAQ,CAAC,SAAS,GAC7B;QAEF,EAAC,UAAU,IACT,SAAS,EAAE,QAAQ,CAAC,SAAS,EAC7B,MAAM,EAAE,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,EAAE,EAClC,KAAK,EAAE,GAAG,EAAE,CAAC,KAAK,CAAC,QAAQ,CAAC,EAC5B,QAAQ,EAAE,GAAG,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC,GACrC,CACE,CACP,CAAC;AACJ,CAAC,CAAC","sourcesContent":["import { h, FunctionalComponent } from '@stencil/core';\nimport { TestCase } from '../../../types/llm-test-runner';\nimport { ResponseOutput } from './output/response-output';\nimport { EvaluationSummary } from './evaluation/evaluation-summary';\nimport { RowActions } from './actions/row-actions';\nimport { FormFieldType, TextAreaConfig } from '../../../lib/form/schema';\nimport {\n ExpectedOutcomeChangeDetail,\n ExpectedOutcomeRenderer,\n} from './expected-outcome-renderer';\n\nexport interface LLMTestCaseRowProps {\n testCase: TestCase;\n onRun: (testCase: TestCase) => void;\n onDelete: (id: string) => void;\n handleTestCaseChange: (\n e: CustomEvent<{ testCaseId: string; key: string; value: string }>,\n ) => void;\n onExpectedOutcomeChange: (\n e: CustomEvent<ExpectedOutcomeChangeDetail>,\n ) => void;\n}\n\nexport const LLMTestCaseRow: FunctionalComponent<LLMTestCaseRowProps> = ({\n testCase,\n onRun,\n onDelete,\n handleTestCaseChange,\n onExpectedOutcomeChange,\n}) => {\n const questionConfig: TextAreaConfig = {\n name: 'question',\n fieldType: FormFieldType.TEXT_AREA,\n type: 'text',\n label: 'Question',\n placeholder: 'Enter your question here...',\n required: true,\n rows: 3,\n };\n return (\n <div class=\"test-case-row\" key={testCase.id}>\n <div class=\"test-case-row__input-column\">\n <app-textarea\n config={questionConfig}\n value={testCase.question}\n onValueChange={(e) =>\n handleTestCaseChange({\n detail: {\n testCaseId: testCase.id,\n key: 'question',\n value: e.detail.value,\n },\n } as CustomEvent<{ testCaseId: string; key: string; value: string }>)\n }\n />\n <ExpectedOutcomeRenderer\n testCaseId={testCase.id}\n fields={testCase.expectedOutcome \|\| []}\n onExpectedOutcomeChange={onExpectedOutcomeChange}\n />\n </div>\n\n <ResponseOutput output={testCase.output} isRunning={testCase.isRunning} />\n\n <EvaluationSummary\n result={testCase.evaluationResult}\n isRunning={testCase.isRunning}\n />\n\n <RowActions\n isRunning={testCase.isRunning}\n canRun={!!testCase.question.trim()}\n onRun={() => onRun(testCase)}\n onDelete={() => onDelete(testCase.id)}\n />\n </div>\n );\n};\n"]}

package/dist/collection/components/llm-test-runner/test-cases/llm-test-cases.js CHANGED Viewed

@@ -1,7 +1,7 @@
 import { h } from "@stencil/core";
 import { LLMTestCaseRow } from "./llm-test-case-row";
 import { Button } from "../../../lib/ui/button/index";
-export const LLMTestCases = ({ testCases, onRun, onDelete, onUpdateApproach, onAddTestCase, handleTestCaseChange, onExpectedOutcomeChange, }) => {
-    return (h("div", { class: "test-cases" }, h("div", { class: "test-cases__column-headers" }, h("div", { class: "test-cases__column-header" }, "Input"), h("div", { class: "test-cases__column-header" }, "Output"), h("div", { class: "test-cases__column-header" }, "Evaluation"), h("div", { class: "test-cases__column-header" }, "Actions")), testCases.map(testCase => (h(LLMTestCaseRow, { testCase: testCase, onRun: onRun, onDelete: onDelete, onUpdateApproach: onUpdateApproach, handleTestCaseChange: handleTestCaseChange, onExpectedOutcomeChange: onExpectedOutcomeChange }))), h("div", { class: "test-cases__add-section" }, h(Button, { variant: "outline", size: "md", onClick: onAddTestCase }, "+ Add Question"))));
+export const LLMTestCases = ({ testCases, onRun, onDelete, onAddTestCase, handleTestCaseChange, onExpectedOutcomeChange, }) => {
+    return (h("div", { class: "test-cases" }, h("div", { class: "test-cases__column-headers" }, h("div", { class: "test-cases__column-header" }, "Input"), h("div", { class: "test-cases__column-header" }, "Output"), h("div", { class: "test-cases__column-header" }, "Evaluation"), h("div", { class: "test-cases__column-header" }, "Actions")), testCases.map(testCase => (h(LLMTestCaseRow, { testCase: testCase, onRun: onRun, onDelete: onDelete, handleTestCaseChange: handleTestCaseChange, onExpectedOutcomeChange: onExpectedOutcomeChange }))), h("div", { class: "test-cases__add-section" }, h(Button, { variant: "outline", size: "md", onClick: onAddTestCase }, "+ Add Question"))));
 };
 //# sourceMappingURL=llm-test-cases.js.map

package/dist/collection/components/llm-test-runner/test-cases/llm-test-cases.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"llm-test-cases.js","sourceRoot":"","sources":["../../../../src/components/llm-test-runner/test-cases/llm-test-cases.tsx"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAuB,MAAM,eAAe,CAAC;~~AAGvD~~,OAAO,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AACrD,OAAO,EAAE,MAAM,EAAE,MAAM,8BAA8B,CAAC;~~AAiBtD~~,MAAM,CAAC,MAAM,YAAY,GAA2C,CAAC,EACnE,SAAS,EACT,KAAK,EACL,QAAQ,EACR,~~gBAAgB,EAChB,~~aAAa,EACb,oBAAoB,EACpB,uBAAuB,GACxB,EAAE,EAAE;IACH,OAAO,CACL,WAAK,KAAK,EAAC,YAAY;QACrB,WAAK,KAAK,EAAC,4BAA4B;YACrC,WAAK,KAAK,EAAC,2BAA2B,YAAY;YAClD,WAAK,KAAK,EAAC,2BAA2B,aAAa;YACnD,WAAK,KAAK,EAAC,2BAA2B,iBAAiB;YACvD,WAAK,KAAK,EAAC,2BAA2B,cAAc,CAChD;QAEL,SAAS,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC,CACzB,EAAC,cAAc,IACb,QAAQ,EAAE,QAAQ,EAClB,KAAK,EAAE,KAAK,EACZ,QAAQ,EAAE,QAAQ,EAClB,~~gBAAgB,EAAE,gBAAgB,EAClC,~~oBAAoB,EAAE,oBAAoB,EAC1C,uBAAuB,EAAE,uBAAuB,GAChD,CACH,CAAC;QAEF,WAAK,KAAK,EAAC,yBAAyB;YAClC,EAAC,MAAM,IAAC,OAAO,EAAC,SAAS,EAAC,IAAI,EAAC,IAAI,EAAC,OAAO,EAAE,aAAa,qBAEjD,CACL,CACF,CACP,CAAC;AACJ,CAAC,CAAC","sourcesContent":["import { h, FunctionalComponent } from '@stencil/core';\nimport { TestCase } from '../../../types/llm-test-runner';\nimport { ~~EvaluationApproach } from '../../../lib/evaluation/constants';\nimport {~~ LLMTestCaseRow } from './llm-test-case-row';\nimport { Button } from '../../../lib/ui/button/index';\nimport { ExpectedOutcomeChangeDetail } from './expected-outcome-renderer';\n\nexport interface LLMTestCasesProps {\n testCases: TestCase[];\n onRun: (testCase: TestCase) => void;\n onDelete: (id: string) => void;\n ~~onUpdateApproach: (testCase: TestCase, approach: EvaluationApproach) => void;\n~~ onAddTestCase: () => void;\n handleTestCaseChange: (\n e: CustomEvent<{ testCaseId: string; key: string; value: string }>,\n ) => void;\n onExpectedOutcomeChange: (\n e: CustomEvent<ExpectedOutcomeChangeDetail>,\n ) => void;\n}\n\nexport const LLMTestCases: FunctionalComponent<LLMTestCasesProps> = ({\n testCases,\n onRun,\n onDelete,\n ~~onUpdateApproach,\n~~ onAddTestCase,\n handleTestCaseChange,\n onExpectedOutcomeChange,\n}) => {\n return (\n <div class=\"test-cases\">\n <div class=\"test-cases__column-headers\">\n <div class=\"test-cases__column-header\">Input</div>\n <div class=\"test-cases__column-header\">Output</div>\n <div class=\"test-cases__column-header\">Evaluation</div>\n <div class=\"test-cases__column-header\">Actions</div>\n </div>\n\n {testCases.map(testCase => (\n <LLMTestCaseRow\n testCase={testCase}\n onRun={onRun}\n onDelete={onDelete}\n ~~onUpdateApproach={onUpdateApproach}\n~~ handleTestCaseChange={handleTestCaseChange}\n onExpectedOutcomeChange={onExpectedOutcomeChange}\n />\n ))}\n\n <div class=\"test-cases__add-section\">\n <Button variant=\"outline\" size=\"md\" onClick={onAddTestCase}>\n + Add Question\n </Button>\n </div>\n </div>\n );\n};\n"]}
1	+ {"version":3,"file":"llm-test-cases.js","sourceRoot":"","sources":["../../../../src/components/llm-test-runner/test-cases/llm-test-cases.tsx"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAuB,MAAM,eAAe,CAAC;AAEvD,OAAO,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AACrD,OAAO,EAAE,MAAM,EAAE,MAAM,8BAA8B,CAAC;AAgBtD,MAAM,CAAC,MAAM,YAAY,GAA2C,CAAC,EACnE,SAAS,EACT,KAAK,EACL,QAAQ,EACR,aAAa,EACb,oBAAoB,EACpB,uBAAuB,GACxB,EAAE,EAAE;IACH,OAAO,CACL,WAAK,KAAK,EAAC,YAAY;QACrB,WAAK,KAAK,EAAC,4BAA4B;YACrC,WAAK,KAAK,EAAC,2BAA2B,YAAY;YAClD,WAAK,KAAK,EAAC,2BAA2B,aAAa;YACnD,WAAK,KAAK,EAAC,2BAA2B,iBAAiB;YACvD,WAAK,KAAK,EAAC,2BAA2B,cAAc,CAChD;QAEL,SAAS,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC,CACzB,EAAC,cAAc,IACb,QAAQ,EAAE,QAAQ,EAClB,KAAK,EAAE,KAAK,EACZ,QAAQ,EAAE,QAAQ,EAClB,oBAAoB,EAAE,oBAAoB,EAC1C,uBAAuB,EAAE,uBAAuB,GAChD,CACH,CAAC;QAEF,WAAK,KAAK,EAAC,yBAAyB;YAClC,EAAC,MAAM,IAAC,OAAO,EAAC,SAAS,EAAC,IAAI,EAAC,IAAI,EAAC,OAAO,EAAE,aAAa,qBAEjD,CACL,CACF,CACP,CAAC;AACJ,CAAC,CAAC","sourcesContent":["import { h, FunctionalComponent } from '@stencil/core';\nimport { TestCase } from '../../../types/llm-test-runner';\nimport { LLMTestCaseRow } from './llm-test-case-row';\nimport { Button } from '../../../lib/ui/button/index';\nimport { ExpectedOutcomeChangeDetail } from './expected-outcome-renderer';\n\nexport interface LLMTestCasesProps {\n testCases: TestCase[];\n onRun: (testCase: TestCase) => void;\n onDelete: (id: string) => void;\n onAddTestCase: () => void;\n handleTestCaseChange: (\n e: CustomEvent<{ testCaseId: string; key: string; value: string }>,\n ) => void;\n onExpectedOutcomeChange: (\n e: CustomEvent<ExpectedOutcomeChangeDetail>,\n ) => void;\n}\n\nexport const LLMTestCases: FunctionalComponent<LLMTestCasesProps> = ({\n testCases,\n onRun,\n onDelete,\n onAddTestCase,\n handleTestCaseChange,\n onExpectedOutcomeChange,\n}) => {\n return (\n <div class=\"test-cases\">\n <div class=\"test-cases__column-headers\">\n <div class=\"test-cases__column-header\">Input</div>\n <div class=\"test-cases__column-header\">Output</div>\n <div class=\"test-cases__column-header\">Evaluation</div>\n <div class=\"test-cases__column-header\">Actions</div>\n </div>\n\n {testCases.map(testCase => (\n <LLMTestCaseRow\n testCase={testCase}\n onRun={onRun}\n onDelete={onDelete}\n handleTestCaseChange={handleTestCaseChange}\n onExpectedOutcomeChange={onExpectedOutcomeChange}\n />\n ))}\n\n <div class=\"test-cases__add-section\">\n <Button variant=\"outline\" size=\"md\" onClick={onAddTestCase}>\n + Add Question\n </Button>\n </div>\n </div>\n );\n};\n"]}

package/dist/collection/lib/evaluation/evaluation-engine.js CHANGED Viewed

@@ -6,56 +6,77 @@ import { performRougeLEvaluation } from "./evaluators/rougeL-evaluator";
 import { performBleuEvaluation } from "./evaluators/bleu/bleu-evaluator";
 export class LLMEvaluationEngine {
     async evaluateResponse(request, callback) {
-        try {
-            const approach = request.evaluationParameters.approach;
-            switch (approach) {
-                case EvaluationApproach.BLEU: {
-                    const bleuResult = performBleuEvaluation(request);
-                    callback(bleuResult);
-                    break;
-                }
-                case EvaluationApproach.EXACT: {
-                    const exactResult = await performEvaluation(request);
-                    callback(exactResult);
-                    break;
-                }
-                case EvaluationApproach.ROUGE_1: {
-                    const rougeResult = await performRouge1Evaluation(request);
-                    callback(rougeResult);
-                    break;
-                }
-                case EvaluationApproach.ROUGE_L: {
-                    const rougeLResult = await performRougeLEvaluation(request);
-                    callback(rougeLResult);
-                    break;
-                }
-                case EvaluationApproach.SEMANTIC: {
-                    const semanticResult = await performSemanticEvaluation(request);
-                    callback(semanticResult);
-                    break;
-                }
-                default: {
-                    console.warn(`Unknown matching approach: ${request.evaluationParameters.approach}, falling back to exact matching`);
-                    const fallbackResult = await performEvaluation(request);
-                    callback(fallbackResult);
-                }
-            }
-        }
-        catch (error) {
-            console.error('Evaluation failed:', error);
-            const errorResult = {
+        const settledResults = await Promise.allSettled(request.fields.map(async (field) => {
+            const fieldRequest = {
                 testCaseId: request.testCaseId,
+                question: request.question,
+                actualResponse: request.actualResponse,
+                expectedOutcome: field.expectedValue,
+                evaluationParameters: field.evaluationParameters,
+            };
+            const result = await this.evaluateField(fieldRequest);
+            const fieldResult = {
+                index: field.index,
+                label: field.label,
+                type: field.type,
+                expectedValue: field.expectedValue,
+                passed: result.passed,
+                keywordMatches: result.keywordMatches,
+                evaluationParameters: result.evaluationParameters,
+                evaluationApproachResult: result.evaluationApproachResult,
+            };
+            return fieldResult;
+        }));
+        const fieldResults = settledResults.map((settledResult, index) => {
+            const field = request.fields[index];
+            if (settledResult.status === 'fulfilled') {
+                return settledResult.value;
+            }
+            return {
+                index: field.index,
+                label: field.label,
+                type: field.type,
+                expectedValue: field.expectedValue,
                 passed: false,
                 keywordMatches: [],
-                timestamp: new Date().toISOString(),
-                evaluationParameters: request.evaluationParameters,
+                evaluationParameters: field.evaluationParameters,
                 evaluationApproachResult: {
                     score: 0,
-                    approachUsed: EvaluationApproach.EXACT,
+                    approachUsed: field.evaluationParameters.approach,
                 },
+                error: this.getSafeErrorMessage(settledResult.reason),
             };
-            callback(errorResult);
+        });
+        const keywordMatches = fieldResults.flatMap(field => field.keywordMatches);
+        const passed = fieldResults.every(field => field.passed && !field.error);
+        callback({
+            testCaseId: request.testCaseId,
+            passed,
+            keywordMatches,
+            fieldResults,
+            timestamp: new Date().toISOString(),
+        });
+    }
+    async evaluateField(request) {
+        const approach = request.evaluationParameters.approach;
+        switch (approach) {
+            case EvaluationApproach.BLEU:
+                return performBleuEvaluation(request);
+            case EvaluationApproach.EXACT:
+                return performEvaluation(request);
+            case EvaluationApproach.ROUGE_1:
+                return performRouge1Evaluation(request);
+            case EvaluationApproach.ROUGE_L:
+                return performRougeLEvaluation(request);
+            case EvaluationApproach.SEMANTIC:
+                return performSemanticEvaluation(request);
+            default:
+                console.warn(`Unknown matching approach: ${request.evaluationParameters.approach}, falling back to exact matching`);
+                return performEvaluation(request);
         }
     }
+    getSafeErrorMessage(error) {
+        return error instanceof Error ? error.message : 'Field evaluation failed.';
+    }
 }
 //# sourceMappingURL=evaluation-engine.js.map

package/dist/collection/lib/evaluation/evaluation-engine.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"evaluation-engine.js","sourceRoot":"","sources":["../../../src/lib/evaluation/evaluation-engine.ts"],"names":[],"mappings":"~~AAKA~~,OAAO,EAAE,iBAAiB,EAAE,MAAM,0BAA0B,CAAC;AAC7D,OAAO,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAC;AACjD,OAAO,EAAE,uBAAuB,EAAE,MAAM,+BAA+B,CAAC;AACxE,OAAO,EAAE,yBAAyB,EAAE,MAAM,6BAA6B,CAAC;AACxE,OAAO,EAAE,uBAAuB,EAAE,MAAM,+BAA+B,CAAC;AACxE,OAAO,EAAE,qBAAqB,EAAE,MAAM,kCAAkC,CAAC;AAEzE,MAAM,OAAO,mBAAmB;IAC9B,KAAK,CAAC,gBAAgB,CACpB,~~OAA0B~~,~~EAC1B~~,QAA4B;QAE5B,~~IAAI~~,~~CAAC;YACH~~,MAAM,~~QAAQ~~,~~GACZ~~,OAAO,CAAC,~~oBAAoB~~,CAAC,~~QAAQ~~,CAAC;~~YACxC~~,~~QAAQ~~,QAAQ,EAAE,CAAC;~~gBACjB~~,~~KAAK~~,~~kBAAkB~~,CAAC,~~IAAI~~,CAAC,CAAC,CAAC;~~oBAC7B~~,MAAM,~~UAAU~~,GAAG,~~qBAAqB~~,~~CAAC~~,~~OAAO,~~CAAC,~~CAAC;oBAClD~~,~~QAAQ,~~CAAC,~~UAAU~~,CAAC,CAAC;~~oBACrB~~,MAAM;~~gBACR~~,CAAC;~~gBAED~~,KAAK,~~kBAAkB~~,CAAC,KAAK,CAAC,CAAC,~~CAAC~~;~~oBAC9B~~,MAAM,~~WAAW~~,~~GAAG~~,MAAM,~~iBAAiB~~,~~CAAC~~,~~OAAO~~,CAAC,~~CAAC~~;~~oBACrD~~,~~QAAQ~~,~~CAAC~~,~~WAAW~~,CAAC,~~CAAC~~;~~oBACtB~~,MAAM;~~gBACR~~,CAAC;~~gBAED~~,~~KAAK~~,~~kBAAkB~~,CAAC,~~OAAO,~~CAAC,CAAC,CAAC;~~oBAChC~~,MAAM,~~WAAW~~,GAAG,MAAM,~~uBAAuB~~,~~CAAC~~,OAAO,CAAC,CAAC;~~oBAC3D~~,~~QAAQ~~,CAAC,WAAW,CAAC,CAAC;~~oBACtB~~,~~MAAM~~;~~gBACR~~,CAAC;~~gBAED~~,KAAK,~~kBAAkB~~,CAAC,~~OAAO~~,CAAC,CAAC,~~CAAC~~;~~oBAChC~~,MAAM,~~YAAY~~,~~GAAG~~,~~MAAM~~,~~uBAAuB~~,~~CAAC~~,~~OAAO~~,CAAC,~~CAAC~~;~~oBAC5D~~,~~QAAQ~~,CAAC,YAAY,~~CAAC~~,CAAC~~;oBACvB~~,~~MAAM;gBACR~~,CAAC;~~gBAED~~,KAAK,~~kBAAkB~~,CAAC,~~QAAQ~~,CAAC,CAAC,CAAC;~~oBACjC~~,MAAM,cAAc,GAAG,~~MAAM~~,~~yBAAyB,~~CAAC,OAAO,CAAC,CAAC~~;oBAChE~~,~~QAAQ~~,CAAC,cAAc,CAAC,CAAC;~~oBACzB~~,MAAM~~;gBACR~~,CAAC~~;gBAED~~,~~OAAO~~,CAAC,CAAC,CAAC~~;oBACR~~,~~OAAO~~,CAAC,IAAI,~~CACV~~,~~8BAA8B~~,~~OAAO,~~CAAC,~~oBAAoB~~,CAAC,QAAQ,~~kCAAkC~~,~~CACtG~~,CAAC;~~oBACF~~,MAAM,cAAc,~~GAAG~~,~~MAAM~~,~~iBAAiB~~,CAAC,~~OAAO~~,CAAC,CAAC;~~oBACxD~~,QAAQ,CAAC,~~cAAc~~,CAAC,CAAC;~~gBAC3B~~,CAAC;~~YACH~~,CAAC;~~QACH~~,CAAC~~;QAAC~~,OAAO,KAAK,~~EAAE~~,CAAC;~~YACf~~,OAAO,CAAC,KAAK,CAAC,~~oBAAoB~~,~~EAAE~~,~~KAAK~~,CAAC,CAAC;~~YAE3C~~,~~MAAM~~,~~WAAW~~,~~GAAqB~~;~~gBACpC~~,~~UAAU~~,~~EAAE~~,OAAO,CAAC,~~UAAU~~;~~gBAC9B~~,~~MAAM~~,~~EAAE~~,~~KAAK~~;~~gBACb~~,~~cAAc~~,~~EAAE~~,~~EAAE~~;~~gBAClB~~,~~SAAS~~,~~EAAE~~,IAAI,~~IAAI~~,~~EAAE~~,CAAC,~~WAAW~~,~~EAAE~~;~~gBACnC~~,~~oBAAoB~~,~~EAAE~~,OAAO,CAAC,~~oBAAoB~~;~~gBAClD~~,~~wBAAwB~~,~~EAAE~~;~~oBACxB~~,~~KAAK~~,~~EAAE~~,~~CAAC~~;~~oBACR~~,YAAY,~~EAAE~~,~~kBAAkB~~,CAAC,KAAK~~;iBACvC;aACF~~,CAAC~~;YAEF~~,~~QAAQ~~,CAAC,~~WAAW,~~CAAC,CAAC~~;QACxB~~,CAAC;~~IACH~~,CAAC;CACF","sourcesContent":["import {\n EvaluationRequest,\n EvaluationResult,\n EvaluationCallback,\n} from './types';\nimport { performEvaluation } from './evaluators/exact/exact';\nimport { EvaluationApproach } from './constants';\nimport { performRouge1Evaluation } from './evaluators/rouge1-evaluator';\nimport { performSemanticEvaluation } from './evaluators/semantic/index';\nimport { performRougeLEvaluation } from './evaluators/rougeL-evaluator';\nimport { performBleuEvaluation } from './evaluators/bleu/bleu-evaluator';\n\nexport class LLMEvaluationEngine {\n async evaluateResponse(\n request: ~~EvaluationRequest~~,\n callback: EvaluationCallback,\n ): Promise<void> {\n ~~try~~ {\n const ~~approach~~: ~~EvaluationApproach~~ =\n request.evaluationParameters.~~approach~~;\n ~~switch~~ (~~approach~~) {\n ~~case~~ ~~EvaluationApproach.BLEU~~: {\n ~~const~~ ~~bleuResult~~ = ~~performBleuEvaluation(request);\~~n ~~callback(bleuResult);\~~n ~~break~~;\n }\n\n ~~case~~ ~~EvaluationApproach.EXACT~~: {\n const ~~exactResult~~ = ~~await performEvaluation(~~request);\n ~~callback~~(~~exactResult~~);\n ~~break~~;\n }\n\n ~~case~~ ~~EvaluationApproach~~.~~ROUGE_1~~: {\n ~~const~~ ~~rougeResult~~ = ~~await~~ ~~performRouge1Evaluation(request);\~~n ~~callback~~(~~rougeResult~~);\n ~~break~~;\n }\n\n ~~case~~ ~~EvaluationApproach~~.~~ROUGE_L:~~ {\n const ~~rougeLResult~~ = ~~await~~ ~~performRougeLEvaluation(request~~);\n callback(~~rougeLResult~~);\n ~~break~~;\n }\n\n ~~case~~ ~~EvaluationApproach.SEMANTIC~~: {\n const ~~semanticResult~~ = ~~await~~ ~~performSemanticEvaluation~~(request);\n ~~callback~~(~~semanticResult~~);\n ~~break~~;\n }\n\n ~~default:~~ {\n console.warn(\n `Unknown matching approach: ${request.evaluationParameters.approach}, falling back to exact matching`,\n );\n ~~const~~ ~~fallbackResult = await~~ performEvaluation(request);\n ~~callback(fallbackResult);\n~~ }\n }\n } ~~catch~~ (error~~) {\n console.error('Evaluation failed~~:', ~~error~~)~~;\n\n const errorResult~~: ~~EvaluationResult~~ = {\n ~~testCaseId:~~ ~~request.testCaseId,\n passed:~~ ~~false,\n keywordMatches:~~ ~~[],\n timestamp:~~ ~~new~~ ~~Date()~~.~~toISOString(),\n evaluationParameters:~~ ~~request.evaluationParameters,\n evaluationApproachResult~~: ~~{\n score:~~ ~~0,\n approachUsed:~~ ~~EvaluationApproach~~.~~EXACT,\n },\n }~~;\n~~\n callback(errorResult);\n }\n~~ }\n}\n"]}
1	+ {"version":3,"file":"evaluation-engine.js","sourceRoot":"","sources":["../../../src/lib/evaluation/evaluation-engine.ts"],"names":[],"mappings":"AAOA,OAAO,EAAE,iBAAiB,EAAE,MAAM,0BAA0B,CAAC;AAC7D,OAAO,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAC;AACjD,OAAO,EAAE,uBAAuB,EAAE,MAAM,+BAA+B,CAAC;AACxE,OAAO,EAAE,yBAAyB,EAAE,MAAM,6BAA6B,CAAC;AACxE,OAAO,EAAE,uBAAuB,EAAE,MAAM,+BAA+B,CAAC;AACxE,OAAO,EAAE,qBAAqB,EAAE,MAAM,kCAAkC,CAAC;AAEzE,MAAM,OAAO,mBAAmB;IAC9B,KAAK,CAAC,gBAAgB,CACpB,OAA4B,EAC5B,QAA4B;QAE5B,MAAM,cAAc,GAAG,MAAM,OAAO,CAAC,UAAU,CAC7C,OAAO,CAAC,MAAM,CAAC,GAAG,CAAC,KAAK,EAAC,KAAK,EAAC,EAAE;YAC/B,MAAM,YAAY,GAAsB;gBACtC,UAAU,EAAE,OAAO,CAAC,UAAU;gBAC9B,QAAQ,EAAE,OAAO,CAAC,QAAQ;gBAC1B,cAAc,EAAE,OAAO,CAAC,cAAc;gBACtC,eAAe,EAAE,KAAK,CAAC,aAAa;gBACpC,oBAAoB,EAAE,KAAK,CAAC,oBAAoB;aACjD,CAAC;YACF,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,YAAY,CAAC,CAAC;YAEtD,MAAM,WAAW,GAA0B;gBACzC,KAAK,EAAE,KAAK,CAAC,KAAK;gBAClB,KAAK,EAAE,KAAK,CAAC,KAAK;gBAClB,IAAI,EAAE,KAAK,CAAC,IAAI;gBAChB,aAAa,EAAE,KAAK,CAAC,aAAa;gBAClC,MAAM,EAAE,MAAM,CAAC,MAAM;gBACrB,cAAc,EAAE,MAAM,CAAC,cAAc;gBACrC,oBAAoB,EAAE,MAAM,CAAC,oBAAqB;gBAClD,wBAAwB,EAAE,MAAM,CAAC,wBAAwB;aAC1D,CAAC;YACF,OAAO,WAAW,CAAC;QACrB,CAAC,CAAC,CACH,CAAC;QAEF,MAAM,YAAY,GAA4B,cAAc,CAAC,GAAG,CAC9D,CAAC,aAAa,EAAE,KAAK,EAAE,EAAE;YACvB,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YACpC,IAAI,aAAa,CAAC,MAAM,KAAK,WAAW,EAAE,CAAC;gBACzC,OAAO,aAAa,CAAC,KAAK,CAAC;YAC7B,CAAC;YAED,OAAO;gBACL,KAAK,EAAE,KAAK,CAAC,KAAK;gBAClB,KAAK,EAAE,KAAK,CAAC,KAAK;gBAClB,IAAI,EAAE,KAAK,CAAC,IAAI;gBAChB,aAAa,EAAE,KAAK,CAAC,aAAa;gBAClC,MAAM,EAAE,KAAK;gBACb,cAAc,EAAE,EAAE;gBAClB,oBAAoB,EAAE,KAAK,CAAC,oBAAoB;gBAChD,wBAAwB,EAAE;oBACxB,KAAK,EAAE,CAAC;oBACR,YAAY,EAAE,KAAK,CAAC,oBAAoB,CAAC,QAAQ;iBAClD;gBACD,KAAK,EAAE,IAAI,CAAC,mBAAmB,CAAC,aAAa,CAAC,MAAM,CAAC;aACtD,CAAC;QACJ,CAAC,CACF,CAAC;QAEF,MAAM,cAAc,GAAG,YAAY,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC,cAAc,CAAC,CAAC;QAC3E,MAAM,MAAM,GAAG,YAAY,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC,MAAM,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QAEzE,QAAQ,CAAC;YACP,UAAU,EAAE,OAAO,CAAC,UAAU;YAC9B,MAAM;YACN,cAAc;YACd,YAAY;YACZ,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;SACpC,CAAC,CAAC;IACL,CAAC;IAEO,KAAK,CAAC,aAAa,CAAC,OAA0B;QACpD,MAAM,QAAQ,GAAuB,OAAO,CAAC,oBAAoB,CAAC,QAAQ,CAAC;QAC3E,QAAQ,QAAQ,EAAE,CAAC;YACjB,KAAK,kBAAkB,CAAC,IAAI;gBAC1B,OAAO,qBAAqB,CAAC,OAAO,CAAC,CAAC;YACxC,KAAK,kBAAkB,CAAC,KAAK;gBAC3B,OAAO,iBAAiB,CAAC,OAAO,CAAC,CAAC;YACpC,KAAK,kBAAkB,CAAC,OAAO;gBAC7B,OAAO,uBAAuB,CAAC,OAAO,CAAC,CAAC;YAC1C,KAAK,kBAAkB,CAAC,OAAO;gBAC7B,OAAO,uBAAuB,CAAC,OAAO,CAAC,CAAC;YAC1C,KAAK,kBAAkB,CAAC,QAAQ;gBAC9B,OAAO,yBAAyB,CAAC,OAAO,CAAC,CAAC;YAC5C;gBACE,OAAO,CAAC,IAAI,CACV,8BAA8B,OAAO,CAAC,oBAAoB,CAAC,QAAQ,kCAAkC,CACtG,CAAC;gBACF,OAAO,iBAAiB,CAAC,OAAO,CAAC,CAAC;QACtC,CAAC;IACH,CAAC;IAEO,mBAAmB,CAAC,KAAc;QACxC,OAAO,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,0BAA0B,CAAC;IAC7E,CAAC;CACF","sourcesContent":["import {\n EvaluationRequest,\n EvaluationResult,\n EvaluationCallback,\n FieldEvaluationResult,\n EvaluationRequestV2,\n} from './types';\nimport { performEvaluation } from './evaluators/exact/exact';\nimport { EvaluationApproach } from './constants';\nimport { performRouge1Evaluation } from './evaluators/rouge1-evaluator';\nimport { performSemanticEvaluation } from './evaluators/semantic/index';\nimport { performRougeLEvaluation } from './evaluators/rougeL-evaluator';\nimport { performBleuEvaluation } from './evaluators/bleu/bleu-evaluator';\n\nexport class LLMEvaluationEngine {\n async evaluateResponse(\n request: EvaluationRequestV2,\n callback: EvaluationCallback,\n ): Promise<void> {\n const settledResults = await Promise.allSettled(\n request.fields.map(async field => {\n const fieldRequest: EvaluationRequest = {\n testCaseId: request.testCaseId,\n question: request.question,\n actualResponse: request.actualResponse,\n expectedOutcome: field.expectedValue,\n evaluationParameters: field.evaluationParameters,\n };\n const result = await this.evaluateField(fieldRequest);\n\n const fieldResult: FieldEvaluationResult = {\n index: field.index,\n label: field.label,\n type: field.type,\n expectedValue: field.expectedValue,\n passed: result.passed,\n keywordMatches: result.keywordMatches,\n evaluationParameters: result.evaluationParameters!,\n evaluationApproachResult: result.evaluationApproachResult,\n };\n return fieldResult;\n }),\n );\n\n const fieldResults: FieldEvaluationResult[] = settledResults.map(\n (settledResult, index) => {\n const field = request.fields[index];\n if (settledResult.status === 'fulfilled') {\n return settledResult.value;\n }\n\n return {\n index: field.index,\n label: field.label,\n type: field.type,\n expectedValue: field.expectedValue,\n passed: false,\n keywordMatches: [],\n evaluationParameters: field.evaluationParameters,\n evaluationApproachResult: {\n score: 0,\n approachUsed: field.evaluationParameters.approach,\n },\n error: this.getSafeErrorMessage(settledResult.reason),\n };\n },\n );\n\n const keywordMatches = fieldResults.flatMap(field => field.keywordMatches);\n const passed = fieldResults.every(field => field.passed && !field.error);\n\n callback({\n testCaseId: request.testCaseId,\n passed,\n keywordMatches,\n fieldResults,\n timestamp: new Date().toISOString(),\n });\n }\n\n private async evaluateField(request: EvaluationRequest): Promise<EvaluationResult> {\n const approach: EvaluationApproach = request.evaluationParameters.approach;\n switch (approach) {\n case EvaluationApproach.BLEU:\n return performBleuEvaluation(request);\n case EvaluationApproach.EXACT:\n return performEvaluation(request);\n case EvaluationApproach.ROUGE_1:\n return performRouge1Evaluation(request);\n case EvaluationApproach.ROUGE_L:\n return performRougeLEvaluation(request);\n case EvaluationApproach.SEMANTIC:\n return performSemanticEvaluation(request);\n default:\n console.warn(\n `Unknown matching approach: ${request.evaluationParameters.approach}, falling back to exact matching`,\n );\n return performEvaluation(request);\n }\n }\n\n private getSafeErrorMessage(error: unknown): string {\n return error instanceof Error ? error.message : 'Field evaluation failed.';\n }\n}\n"]}

package/dist/collection/lib/evaluation/evaluation-service.js CHANGED Viewed

@@ -1,5 +1,5 @@
 import { LLMEvaluationEngine } from "./evaluation-engine";
-import { serializeExpectedOutcome } from "../expected-outcome-serializer";
+import { normalizeEvaluationParametersForField } from "./field-evaluation-approach";
 /**
  * Service for evaluating test case responses
  */
@@ -18,12 +18,18 @@ export class EvaluationService {
             console.warn('⚠️ No output to evaluate for test case:', testCase.id);
             return;
         }
+        const fields = (testCase.expectedOutcome || []).map((field, index) => ({
+            index,
+            label: field.label,
+            type: field.type,
+            expectedValue: getFieldExpectedValue(field),
+            evaluationParameters: normalizeEvaluationParametersForField(field.type, field.evaluationParameters),
+        }));
         const evaluationRequest = {
             testCaseId: testCase.id,
             question: testCase.question,
-            expectedOutcome: serializeExpectedOutcome(testCase.expectedOutcome),
             actualResponse: testCase.output,
-            evaluationParameters: testCase.evaluationParameters,
+            fields,
         };
         await this.engine.evaluateResponse(evaluationRequest, (result) => {
             console.log('📊 Evaluation result received:', result);
@@ -31,4 +37,10 @@ export class EvaluationService {
         });
     }
 }
+function getFieldExpectedValue(field) {
+    if (field.type === 'chips-input') {
+        return field.value.join(', ');
+    }
+    return field.value;
+}
 //# sourceMappingURL=evaluation-service.js.map

package/dist/collection/lib/evaluation/evaluation-service.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"evaluation-service.js","sourceRoot":"","sources":["../../../src/lib/evaluation/evaluation-service.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;~~AAG1D~~,OAAO,EAAE,~~wBAAwB~~,EAAE,MAAM,~~gCAAgC~~,CAAC;~~AAE1E~~;;GAEG;AACH,MAAM,OAAO,iBAAiB;IACpB,MAAM,CAAsB;IAEpC;QACE,IAAI,CAAC,MAAM,GAAG,IAAI,mBAAmB,EAAE,CAAC;IAC1C,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,gBAAgB,CACpB,QAAkB,EAClB,QAA4C;QAE5C,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC;YACrB,OAAO,CAAC,IAAI,CAAC,yCAAyC,EAAE,QAAQ,CAAC,EAAE,CAAC,CAAC;YACrE,OAAO;QACT,CAAC;QAED,MAAM,~~iBAAiB~~,~~GAAsB~~;~~YAC3C~~,~~UAAU~~,EAAE,~~QAAQ~~,CAAC,EAAE;~~YACvB~~,~~QAAQ~~,EAAE,~~QAAQ~~,CAAC,~~QAAQ~~;~~YAC3B~~,~~eAAe~~,EAAE,~~wBAAwB~~,CAAC,~~QAAQ~~,CAAC,~~eAAe~~,CAAC;~~YACnE~~,~~cAAc~~,EAAE,QAAQ,CAAC,~~MAAM~~;~~YAC/B~~,~~oBAAoB~~,EAAE,QAAQ,CAAC,~~oBAAoB~~;~~SACpD~~,CAAC;QAEF,MAAM,IAAI,CAAC,MAAM,CAAC,gBAAgB,CAChC,iBAAiB,EACjB,CAAC,MAAwB,EAAE,EAAE;YAC3B,OAAO,CAAC,GAAG,CAAC,gCAAgC,EAAE,MAAM,CAAC,CAAC;YACtD,QAAQ,CAAC,MAAM,CAAC,CAAC;QACnB,CAAC,CACF,CAAC;IACJ,CAAC;CACF","sourcesContent":["import { LLMEvaluationEngine } from './evaluation-engine';\nimport { ~~EvaluationRequest,~~ EvaluationResult } from './types';\nimport { TestCase } from '../../types/llm-test-runner';\nimport { ~~serializeExpectedOutcome~~ } from '~~../expected~~-~~outcome~~-~~serializer~~';\n\n/*\n Service for evaluating test case responses\n /\nexport class EvaluationService {\n private engine: LLMEvaluationEngine;\n\n constructor() {\n this.engine = new LLMEvaluationEngine();\n }\n\n /\n Evaluates a test case response\n * @param testCase - The test case to evaluate\n * @param onResult - Callback to handle the evaluation result\n */\n async evaluateTestCase(\n testCase: TestCase,\n onResult: (result: EvaluationResult) => void,\n ): Promise<void> {\n if (!testCase.output) {\n console.warn('⚠️ No output to evaluate for test case:', testCase.id);\n return;\n }\n\n const ~~evaluationRequest~~: ~~EvaluationRequest~~ = {\n ~~testCaseId~~: ~~testCase~~.id,\n ~~question~~: ~~testCase~~.~~question~~,\n ~~expectedOutcome~~: ~~serializeExpectedOutcome~~(~~testCase~~.~~expectedOutcome~~),\n ~~actualResponse~~: testCase.~~output~~,\n ~~evaluationParameters~~: testCase.~~evaluationParameters~~,\n };\n\n await this.engine.evaluateResponse(\n evaluationRequest,\n (result: EvaluationResult) => {\n console.log('📊 Evaluation result received:', result);\n onResult(result);\n },\n );\n }\n}\n"]}
1	+ {"version":3,"file":"evaluation-service.js","sourceRoot":"","sources":["../../../src/lib/evaluation/evaluation-service.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AAO1D,OAAO,EAAE,qCAAqC,EAAE,MAAM,6BAA6B,CAAC;AAEpF;;GAEG;AACH,MAAM,OAAO,iBAAiB;IACpB,MAAM,CAAsB;IAEpC;QACE,IAAI,CAAC,MAAM,GAAG,IAAI,mBAAmB,EAAE,CAAC;IAC1C,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,gBAAgB,CACpB,QAAkB,EAClB,QAA4C;QAE5C,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC;YACrB,OAAO,CAAC,IAAI,CAAC,yCAAyC,EAAE,QAAQ,CAAC,EAAE,CAAC,CAAC;YACrE,OAAO;QACT,CAAC;QAED,MAAM,MAAM,GAA2B,CAAC,QAAQ,CAAC,eAAe,IAAI,EAAE,CAAC,CAAC,GAAG,CACzE,CAAC,KAAK,EAAE,KAAK,EAAE,EAAE,CAAC,CAAC;YACjB,KAAK;YACL,KAAK,EAAE,KAAK,CAAC,KAAK;YAClB,IAAI,EAAE,KAAK,CAAC,IAAI;YAChB,aAAa,EAAE,qBAAqB,CAAC,KAAK,CAAC;YAC3C,oBAAoB,EAAE,qCAAqC,CACzD,KAAK,CAAC,IAAI,EACV,KAAK,CAAC,oBAAoB,CAC3B;SACF,CAAC,CACH,CAAC;QAEF,MAAM,iBAAiB,GAAwB;YAC7C,UAAU,EAAE,QAAQ,CAAC,EAAE;YACvB,QAAQ,EAAE,QAAQ,CAAC,QAAQ;YAC3B,cAAc,EAAE,QAAQ,CAAC,MAAM;YAC/B,MAAM;SACP,CAAC;QAEF,MAAM,IAAI,CAAC,MAAM,CAAC,gBAAgB,CAChC,iBAAiB,EACjB,CAAC,MAAwB,EAAE,EAAE;YAC3B,OAAO,CAAC,GAAG,CAAC,gCAAgC,EAAE,MAAM,CAAC,CAAC;YACtD,QAAQ,CAAC,MAAM,CAAC,CAAC;QACnB,CAAC,CACF,CAAC;IACJ,CAAC;CACF;AAED,SAAS,qBAAqB,CAAC,KAA2B;IACxD,IAAI,KAAK,CAAC,IAAI,KAAK,aAAa,EAAE,CAAC;QACjC,OAAO,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAChC,CAAC;IACD,OAAO,KAAK,CAAC,KAAK,CAAC;AACrB,CAAC","sourcesContent":["import { LLMEvaluationEngine } from './evaluation-engine';\nimport {\n EvaluationResult,\n FieldEvaluationInput,\n EvaluationRequestV2,\n} from './types';\nimport { TestCase, ExpectedOutcomeField } from '../../types/llm-test-runner';\nimport { normalizeEvaluationParametersForField } from './field-evaluation-approach';\n\n/*\n Service for evaluating test case responses\n /\nexport class EvaluationService {\n private engine: LLMEvaluationEngine;\n\n constructor() {\n this.engine = new LLMEvaluationEngine();\n }\n\n /\n Evaluates a test case response\n * @param testCase - The test case to evaluate\n * @param onResult - Callback to handle the evaluation result\n */\n async evaluateTestCase(\n testCase: TestCase,\n onResult: (result: EvaluationResult) => void,\n ): Promise<void> {\n if (!testCase.output) {\n console.warn('⚠️ No output to evaluate for test case:', testCase.id);\n return;\n }\n\n const fields: FieldEvaluationInput[] = (testCase.expectedOutcome \|\| []).map(\n (field, index) => ({\n index,\n label: field.label,\n type: field.type,\n expectedValue: getFieldExpectedValue(field),\n evaluationParameters: normalizeEvaluationParametersForField(\n field.type,\n field.evaluationParameters,\n ),\n }),\n );\n\n const evaluationRequest: EvaluationRequestV2 = {\n testCaseId: testCase.id,\n question: testCase.question,\n actualResponse: testCase.output,\n fields,\n };\n\n await this.engine.evaluateResponse(\n evaluationRequest,\n (result: EvaluationResult) => {\n console.log('📊 Evaluation result received:', result);\n onResult(result);\n },\n );\n }\n}\n\nfunction getFieldExpectedValue(field: ExpectedOutcomeField): string {\n if (field.type === 'chips-input') {\n return field.value.join(', ');\n }\n return field.value;\n}\n"]}