npm - llm-testrunner-components - Versions diffs - 1.1.0 → 1.2.0 - Mend

llm-testrunner-components 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (71) hide show

package/dist/collection/components/llm-test-runner/test-cases/expected-outcome-renderer.js CHANGED Viewed

@@ -1,9 +1,29 @@
 import { h } from "@stencil/core";
 import { FormFieldType } from "../../../lib/form/schema";
+import { EvaluationApproach, } from "../../../lib/evaluation/constants";
+import { getAllowedApproachesForFieldType } from "../../../lib/evaluation/field-evaluation-approach";
 export const ExpectedOutcomeRenderer = ({ testCaseId, fields, onExpectedOutcomeChange, }) => {
     const emit = (detail) => onExpectedOutcomeChange({
         detail,
     });
+    const buildEvaluationConfig = (index, optionList) => ({
+        name: `expectedOutcomeEvaluation-${index}`,
+        fieldType: FormFieldType.SELECT,
+        label: 'Evaluation Approach',
+        placeholder: 'Select evaluation approach…',
+        required: true,
+        optionList,
+        defaultValue: EvaluationApproach.EXACT,
+    });
+    const renderEvaluationSelector = (field, index) => {
+        const optionList = getAllowedApproachesForFieldType(field.type);
+        return (h("app-select", { config: buildEvaluationConfig(index, optionList), value: field.evaluationParameters?.approach, onValueChange: (e) => emit({
+                testCaseId,
+                index,
+                operation: 'set-evaluation-approach',
+                value: e.detail.value,
+            }) }));
+    };
     return (h("div", { class: "expected-outcome-renderer" }, (fields || []).map((field, index) => {
         if (field.type === 'textarea') {
             const config = {
@@ -11,15 +31,15 @@ export const ExpectedOutcomeRenderer = ({ testCaseId, fields, onExpectedOutcomeC
                 fieldType: FormFieldType.TEXT_AREA,
                 label: field.label,
                 placeholder: field.placeholder,
-                required: field.required,
+                required: true,
                 rows: field.rows || 2,
             };
-            return (h("app-textarea", { config: config, value: field.value, onValueChange: (e) => emit({
+            return (h("div", { class: "expected-outcome-renderer__group" }, h("app-textarea", { config: config, value: field.value, onValueChange: (e) => emit({
                     testCaseId,
                     index,
                     operation: 'set-value',
                     value: e.detail.value,
-                }) }));
+                }) }), renderEvaluationSelector(field, index)));
         }
         if (field.type === 'chips-input') {
             const config = {
@@ -27,9 +47,9 @@ export const ExpectedOutcomeRenderer = ({ testCaseId, fields, onExpectedOutcomeC
                 fieldType: FormFieldType.CHIPS,
                 label: field.label,
                 placeholder: field.placeholder,
-                required: field.required,
+                required: true,
             };
-            return (h("app-chips", { config: config, value: field.value, onAddChip: (e) => emit({
+            return (h("div", { class: "expected-outcome-renderer__group" }, h("app-chips", { config: config, value: field.value, onAddChip: (e) => emit({
                     testCaseId,
                     index,
                     operation: 'add-chip',
@@ -39,7 +59,7 @@ export const ExpectedOutcomeRenderer = ({ testCaseId, fields, onExpectedOutcomeC
                     index,
                     operation: 'remove-chip',
                     value: e.detail.value,
-                }) }));
+                }) }), renderEvaluationSelector(field, index)));
         }
         if (field.type === 'select') {
             const config = {
@@ -47,22 +67,22 @@ export const ExpectedOutcomeRenderer = ({ testCaseId, fields, onExpectedOutcomeC
                 fieldType: FormFieldType.SELECT,
                 label: field.label,
                 placeholder: field.placeholder,
-                required: field.required,
+                required: true,
                 optionList: field.options,
             };
-            return (h("app-select", { config: config, value: field.value, onValueChange: (e) => emit({
+            return (h("div", { class: "expected-outcome-renderer__group" }, h("app-select", { config: config, value: field.value, onValueChange: (e) => emit({
                     testCaseId,
                     index,
                     operation: 'set-value',
                     value: e.detail.value,
-                }) }));
+                }) }), renderEvaluationSelector(field, index)));
         }
-        return (h("div", { class: "expected-outcome-renderer__text" }, h("label", null, field.label), h("input", { type: "text", value: field.value, placeholder: field.placeholder, onInput: (e) => emit({
+        return (h("div", { class: "expected-outcome-renderer__group" }, h("div", { class: "expected-outcome-renderer__text" }, h("label", null, field.label), h("input", { type: "text", value: field.value, placeholder: field.placeholder, onInput: (e) => emit({
                 testCaseId,
                 index,
                 operation: 'set-value',
                 value: e.target.value,
-            }) })));
+            }) })), renderEvaluationSelector(field, index)));
     })));
 };
 //# sourceMappingURL=expected-outcome-renderer.js.map

package/dist/collection/components/llm-test-runner/test-cases/expected-outcome-renderer.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"expected-outcome-renderer.js","sourceRoot":"","sources":["../../../../src/components/llm-test-runner/test-cases/expected-outcome-renderer.tsx"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAuB,MAAM,eAAe,CAAC;AAIvD,OAAO,EAAe,aAAa,EAAgC,MAAM,0BAA0B,CAAC;~~AAsBpG~~,MAAM,CAAC,MAAM,uBAAuB,GAAsD,CAAC,EACzF,UAAU,EACV,MAAM,EACN,uBAAuB,GACxB,EAAE,EAAE;IACH,MAAM,IAAI,GAAG,CAAC,MAAmC,EAAE,EAAE,CACnD,uBAAuB,CAAC;QACtB,MAAM;KACqC,CAAC,CAAC;IAEjD,OAAO,CACL,WAAK,KAAK,EAAC,2BAA2B,IACnC,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,KAAK,EAAE,EAAE;QACnC,IAAI,KAAK,CAAC,IAAI,KAAK,UAAU,EAAE,CAAC;YAC9B,MAAM,MAAM,GAAmB;gBAC7B,IAAI,EAAE,mBAAmB,KAAK,EAAE;gBAChC,SAAS,EAAE,aAAa,CAAC,SAAS;gBAClC,KAAK,EAAE,KAAK,CAAC,KAAK;gBAClB,WAAW,EAAE,KAAK,CAAC,WAAW;gBAC9B,QAAQ,EAAE,~~KAAK,CAAC,QAAQ~~;~~gBACxB~~,IAAI,EAAE,KAAK,CAAC,IAAI,IAAI,CAAC;aACtB,CAAC;YACF,OAAO,CACL,oBACE,MAAM,EAAE,MAAM,EACd,KAAK,EAAE,KAAK,CAAC,KAAK,EAClB,aAAa,EAAE,CAAC,CAAC,EAAE,EAAE,CACnB,IAAI,CAAC;~~oBACH~~,UAAU;~~oBACV~~,KAAK;~~oBACL~~,SAAS,EAAE,WAAW;~~oBACtB~~,KAAK,EAAE,CAAC,CAAC,MAAM,CAAC,KAAK;~~iBACtB~~,CAAC,GAEJ,~~CACH~~,CAAC;QACJ,CAAC;QAED,IAAI,KAAK,CAAC,IAAI,KAAK,aAAa,EAAE,CAAC;YACjC,MAAM,MAAM,GAAgB;gBAC1B,IAAI,EAAE,mBAAmB,KAAK,EAAE;gBAChC,SAAS,EAAE,aAAa,CAAC,KAAK;gBAC9B,KAAK,EAAE,KAAK,CAAC,KAAK;gBAClB,WAAW,EAAE,KAAK,CAAC,WAAW;gBAC9B,QAAQ,EAAE,~~KAAK,CAAC,QAAQ~~;~~aACzB~~,CAAC;YAEF,OAAO,CACL,iBACE,MAAM,EAAE,MAAM,EACd,KAAK,EAAE,KAAK,CAAC,KAAK,EAClB,SAAS,EAAE,CAAC,CAAC,EAAE,EAAE,CACf,IAAI,CAAC;~~oBACH~~,UAAU;~~oBACV~~,KAAK;~~oBACL~~,SAAS,EAAE,UAAU;~~oBACrB~~,KAAK,EAAE,CAAC,CAAC,MAAM,CAAC,KAAK;~~iBACtB~~,CAAC,EAEJ,YAAY,EAAE,CAAC,CAAC,EAAE,EAAE,CAClB,IAAI,CAAC;~~oBACH~~,UAAU;~~oBACV~~,KAAK;~~oBACL~~,SAAS,EAAE,aAAa;~~oBACxB~~,KAAK,EAAE,CAAC,CAAC,MAAM,CAAC,KAAK;~~iBACtB~~,CAAC,GAEJ,~~CACH~~,CAAC;QACJ,CAAC;QAED,IAAI,KAAK,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;YAC5B,MAAM,MAAM,GAAiB;gBAC3B,IAAI,EAAE,mBAAmB,KAAK,EAAE;gBAChC,SAAS,EAAE,aAAa,CAAC,MAAM;gBAC/B,KAAK,EAAE,KAAK,CAAC,KAAK;gBAClB,WAAW,EAAE,KAAK,CAAC,WAAW;gBAC9B,QAAQ,EAAE,~~KAAK,CAAC,QAAQ~~;~~gBACxB~~,UAAU,EAAE,KAAK,CAAC,OAAO;aAC1B,CAAC;YAEF,OAAO,CACL,kBACE,MAAM,EAAE,MAAM,EACd,KAAK,EAAE,KAAK,CAAC,KAAK,EAClB,aAAa,EAAE,CAAC,CAAC,EAAE,EAAE,CACnB,IAAI,CAAC;~~oBACH~~,UAAU;~~oBACV~~,KAAK;~~oBACL~~,SAAS,EAAE,WAAW;~~oBACtB~~,KAAK,EAAE,CAAC,CAAC,MAAM,CAAC,KAAK;~~iBACtB~~,CAAC,GAEJ,~~CACH~~,CAAC;QACJ,CAAC;QAED,OAAO,CACL,WAAK,KAAK,EAAC,iCAAiC;~~YAC1C~~,iBAAQ,KAAK,CAAC,KAAK,CAAS;~~YAC5B~~,aACE,IAAI,EAAC,MAAM,EACX,KAAK,EAAE,KAAK,CAAC,KAAK,EAClB,WAAW,EAAE,KAAK,CAAC,WAAW,EAC9B,OAAO,EAAE,CAAC,CAAC,EAAE,EAAE,CACb,IAAI,CAAC;~~oBACH~~,UAAU;~~oBACV~~,KAAK;~~oBACL~~,SAAS,EAAE,WAAW;~~oBACtB~~,KAAK,EAAG,CAAC,CAAC,MAA2B,CAAC,KAAK;~~iBAC5C~~,CAAC,GAEJ,CACE,CACP,CAAC;IACJ,CAAC,CAAC,CACE,CACP,CAAC;AACJ,CAAC,CAAC","sourcesContent":["import { h, FunctionalComponent } from '@stencil/core';\nimport {\n ExpectedOutcomeField,\n} from '../../../types/llm-test-runner';\nimport { ChipsConfig, FormFieldType, SelectConfig, TextAreaConfig } from '../../../lib/form/schema';\n\~~nexport type ExpectedOutcomeOperation =\~~n \| '~~set-value~~'~~\n \|~~ '~~add~~-~~chip~~'~~\n \|~~ '~~remove~~-~~chip~~';\n\nexport ~~interface~~ ExpectedOutcomeChangeDetail {\n testCaseId: string;\n ~~index:~~ ~~number;\n operation:~~ ~~ExpectedOutcomeOperation~~;\n ~~value?: string;\n}~~\n\ninterface ExpectedOutcomeRendererProps {\n testCaseId: string;\n fields: ExpectedOutcomeField[];\n onExpectedOutcomeChange: (\n e: CustomEvent<ExpectedOutcomeChangeDetail>,\n ) => void;\n}\n\nexport const ExpectedOutcomeRenderer: FunctionalComponent<ExpectedOutcomeRendererProps> = ({\n testCaseId,\n fields,\n onExpectedOutcomeChange,\n}) => {\n const emit = (detail: ExpectedOutcomeChangeDetail) =>\n onExpectedOutcomeChange({\n detail,\n } as CustomEvent<ExpectedOutcomeChangeDetail>);\n\n return (\n <div class=\"expected-outcome-renderer\">\n {(fields \|\| []).map((field, index) => {\n if (field.type === 'textarea') {\n const config: TextAreaConfig = {\n name: `expectedOutcome-${index}`,\n fieldType: FormFieldType.TEXT_AREA,\n label: field.label,\n placeholder: field.placeholder,\n required: ~~field.required~~,\n rows: field.rows \|\| 2,\n };\n return (\n <app-textarea\n config={config}\n value={field.value}\n onValueChange={(e) =>\n emit({\n testCaseId,\n index,\n operation: 'set-value',\n value: e.detail.value,\n })\n }\n ~~/>\~~n );\n }\n\n if (field.type === 'chips-input') {\n const config: ChipsConfig = {\n name: `expectedOutcome-${index}`,\n fieldType: FormFieldType.CHIPS,\n label: field.label,\n placeholder: field.placeholder,\n required: ~~field.required~~,\n };\n\n return (\n <app-chips\n config={config}\n value={field.value}\n onAddChip={(e) =>\n emit({\n testCaseId,\n index,\n operation: 'add-chip',\n value: e.detail.value,\n })\n }\n onRemoveChip={(e) =>\n emit({\n testCaseId,\n index,\n operation: 'remove-chip',\n value: e.detail.value,\n })\n }\n ~~/>\~~n );\n }\n\n if (field.type === 'select') {\n const config: SelectConfig = {\n name: `expectedOutcome-${index}`,\n fieldType: FormFieldType.SELECT,\n label: field.label,\n placeholder: field.placeholder,\n required: ~~field.required~~,\n optionList: field.options,\n };\n\n return (\n <app-select\n config={config}\n value={field.value}\n onValueChange={(e) =>\n emit({\n testCaseId,\n index,\n operation: 'set-value',\n value: e.detail.value,\n })\n }\n ~~/>\~~n );\n }\n\n return (\n <div class=\"expected-outcome-~~renderer__text~~\">\n <label>{field.label}</label>\n <input\n type=\"text\"\n value={field.value}\n placeholder={field.placeholder}\n onInput={(e) =>\n emit({\n testCaseId,\n index,\n operation: 'set-value',\n value: (e.target as HTMLInputElement).value,\n })\n }\n />\n </div>\n );\n })}\n </div>\n );\n};\n"]}
1	+ {"version":3,"file":"expected-outcome-renderer.js","sourceRoot":"","sources":["../../../../src/components/llm-test-runner/test-cases/expected-outcome-renderer.tsx"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAuB,MAAM,eAAe,CAAC;AAIvD,OAAO,EAAe,aAAa,EAAgC,MAAM,0BAA0B,CAAC;AACpG,OAAO,EACL,kBAAkB,GACnB,MAAM,mCAAmC,CAAC;AAC3C,OAAO,EAAE,gCAAgC,EAAE,MAAM,mDAAmD,CAAC;AAerG,MAAM,CAAC,MAAM,uBAAuB,GAAsD,CAAC,EACzF,UAAU,EACV,MAAM,EACN,uBAAuB,GACxB,EAAE,EAAE;IACH,MAAM,IAAI,GAAG,CAAC,MAAmC,EAAE,EAAE,CACnD,uBAAuB,CAAC;QACtB,MAAM;KACqC,CAAC,CAAC;IAEjD,MAAM,qBAAqB,GAAG,CAC5B,KAAa,EACb,UAAoB,EACN,EAAE,CAAC,CAAC;QAClB,IAAI,EAAE,6BAA6B,KAAK,EAAE;QAC1C,SAAS,EAAE,aAAa,CAAC,MAAM;QAC/B,KAAK,EAAE,qBAAqB;QAC5B,WAAW,EAAE,6BAA6B;QAC1C,QAAQ,EAAE,IAAI;QACd,UAAU;QACV,YAAY,EAAE,kBAAkB,CAAC,KAAK;KACvC,CAAC,CAAC;IAEH,MAAM,wBAAwB,GAAG,CAC/B,KAA2B,EAC3B,KAAa,EACb,EAAE;QACF,MAAM,UAAU,GAAG,gCAAgC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAEhE,OAAO,CACL,kBACE,MAAM,EAAE,qBAAqB,CAAC,KAAK,EAAE,UAAU,CAAC,EAChD,KAAK,EAAE,KAAK,CAAC,oBAAoB,EAAE,QAAQ,EAC3C,aAAa,EAAE,CAAC,CAAC,EAAE,EAAE,CACnB,IAAI,CAAC;gBACH,UAAU;gBACV,KAAK;gBACL,SAAS,EAAE,yBAAyB;gBACpC,KAAK,EAAE,CAAC,CAAC,MAAM,CAAC,KAA2B;aAC5C,CAAC,GAEJ,CACH,CAAC;IACJ,CAAC,CAAC;IAEF,OAAO,CACL,WAAK,KAAK,EAAC,2BAA2B,IACnC,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,KAAK,EAAE,EAAE;QACnC,IAAI,KAAK,CAAC,IAAI,KAAK,UAAU,EAAE,CAAC;YAC9B,MAAM,MAAM,GAAmB;gBAC7B,IAAI,EAAE,mBAAmB,KAAK,EAAE;gBAChC,SAAS,EAAE,aAAa,CAAC,SAAS;gBAClC,KAAK,EAAE,KAAK,CAAC,KAAK;gBAClB,WAAW,EAAE,KAAK,CAAC,WAAW;gBAC9B,QAAQ,EAAE,IAAI;gBACd,IAAI,EAAE,KAAK,CAAC,IAAI,IAAI,CAAC;aACtB,CAAC;YACF,OAAO,CACL,WAAK,KAAK,EAAC,kCAAkC;gBAC3C,oBACE,MAAM,EAAE,MAAM,EACd,KAAK,EAAE,KAAK,CAAC,KAAK,EAClB,aAAa,EAAE,CAAC,CAAC,EAAE,EAAE,CACnB,IAAI,CAAC;wBACH,UAAU;wBACV,KAAK;wBACL,SAAS,EAAE,WAAW;wBACtB,KAAK,EAAE,CAAC,CAAC,MAAM,CAAC,KAAK;qBACtB,CAAC,GAEJ;gBACD,wBAAwB,CAAC,KAAK,EAAE,KAAK,CAAC,CACnC,CACP,CAAC;QACJ,CAAC;QAED,IAAI,KAAK,CAAC,IAAI,KAAK,aAAa,EAAE,CAAC;YACjC,MAAM,MAAM,GAAgB;gBAC1B,IAAI,EAAE,mBAAmB,KAAK,EAAE;gBAChC,SAAS,EAAE,aAAa,CAAC,KAAK;gBAC9B,KAAK,EAAE,KAAK,CAAC,KAAK;gBAClB,WAAW,EAAE,KAAK,CAAC,WAAW;gBAC9B,QAAQ,EAAE,IAAI;aACf,CAAC;YAEF,OAAO,CACL,WAAK,KAAK,EAAC,kCAAkC;gBAC3C,iBACE,MAAM,EAAE,MAAM,EACd,KAAK,EAAE,KAAK,CAAC,KAAK,EAClB,SAAS,EAAE,CAAC,CAAC,EAAE,EAAE,CACf,IAAI,CAAC;wBACH,UAAU;wBACV,KAAK;wBACL,SAAS,EAAE,UAAU;wBACrB,KAAK,EAAE,CAAC,CAAC,MAAM,CAAC,KAAK;qBACtB,CAAC,EAEJ,YAAY,EAAE,CAAC,CAAC,EAAE,EAAE,CAClB,IAAI,CAAC;wBACH,UAAU;wBACV,KAAK;wBACL,SAAS,EAAE,aAAa;wBACxB,KAAK,EAAE,CAAC,CAAC,MAAM,CAAC,KAAK;qBACtB,CAAC,GAEJ;gBACD,wBAAwB,CAAC,KAAK,EAAE,KAAK,CAAC,CACnC,CACP,CAAC;QACJ,CAAC;QAED,IAAI,KAAK,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;YAC5B,MAAM,MAAM,GAAiB;gBAC3B,IAAI,EAAE,mBAAmB,KAAK,EAAE;gBAChC,SAAS,EAAE,aAAa,CAAC,MAAM;gBAC/B,KAAK,EAAE,KAAK,CAAC,KAAK;gBAClB,WAAW,EAAE,KAAK,CAAC,WAAW;gBAC9B,QAAQ,EAAE,IAAI;gBACd,UAAU,EAAE,KAAK,CAAC,OAAO;aAC1B,CAAC;YAEF,OAAO,CACL,WAAK,KAAK,EAAC,kCAAkC;gBAC3C,kBACE,MAAM,EAAE,MAAM,EACd,KAAK,EAAE,KAAK,CAAC,KAAK,EAClB,aAAa,EAAE,CAAC,CAAC,EAAE,EAAE,CACnB,IAAI,CAAC;wBACH,UAAU;wBACV,KAAK;wBACL,SAAS,EAAE,WAAW;wBACtB,KAAK,EAAE,CAAC,CAAC,MAAM,CAAC,KAAK;qBACtB,CAAC,GAEJ;gBACD,wBAAwB,CAAC,KAAK,EAAE,KAAK,CAAC,CACnC,CACP,CAAC;QACJ,CAAC;QAED,OAAO,CACL,WAAK,KAAK,EAAC,kCAAkC;YAC3C,WAAK,KAAK,EAAC,iCAAiC;gBAC1C,iBAAQ,KAAK,CAAC,KAAK,CAAS;gBAC5B,aACE,IAAI,EAAC,MAAM,EACX,KAAK,EAAE,KAAK,CAAC,KAAK,EAClB,WAAW,EAAE,KAAK,CAAC,WAAW,EAC9B,OAAO,EAAE,CAAC,CAAC,EAAE,EAAE,CACb,IAAI,CAAC;wBACH,UAAU;wBACV,KAAK;wBACL,SAAS,EAAE,WAAW;wBACtB,KAAK,EAAG,CAAC,CAAC,MAA2B,CAAC,KAAK;qBAC5C,CAAC,GAEJ,CACE;YACL,wBAAwB,CAAC,KAAK,EAAE,KAAK,CAAC,CACnC,CACP,CAAC;IACJ,CAAC,CAAC,CACE,CACP,CAAC;AACJ,CAAC,CAAC","sourcesContent":["import { h, FunctionalComponent } from '@stencil/core';\nimport {\n ExpectedOutcomeField,\n} from '../../../types/llm-test-runner';\nimport { ChipsConfig, FormFieldType, SelectConfig, TextAreaConfig } from '../../../lib/form/schema';\nimport {\n EvaluationApproach,\n} from '../../../lib/evaluation/constants';\nimport { getAllowedApproachesForFieldType } from '../../../lib/evaluation/field-evaluation-approach';\nimport { ExpectedOutcomeChange } from '../../../lib/test-cases/test-case-mutations';\n\nexport type ExpectedOutcomeChangeDetail = {\n testCaseId: string;\n} & ExpectedOutcomeChange;\n\ninterface ExpectedOutcomeRendererProps {\n testCaseId: string;\n fields: ExpectedOutcomeField[];\n onExpectedOutcomeChange: (\n e: CustomEvent<ExpectedOutcomeChangeDetail>,\n ) => void;\n}\n\nexport const ExpectedOutcomeRenderer: FunctionalComponent<ExpectedOutcomeRendererProps> = ({\n testCaseId,\n fields,\n onExpectedOutcomeChange,\n}) => {\n const emit = (detail: ExpectedOutcomeChangeDetail) =>\n onExpectedOutcomeChange({\n detail,\n } as CustomEvent<ExpectedOutcomeChangeDetail>);\n\n const buildEvaluationConfig = (\n index: number,\n optionList: string[],\n ): SelectConfig => ({\n name: `expectedOutcomeEvaluation-${index}`,\n fieldType: FormFieldType.SELECT,\n label: 'Evaluation Approach',\n placeholder: 'Select evaluation approach…',\n required: true,\n optionList,\n defaultValue: EvaluationApproach.EXACT,\n });\n\n const renderEvaluationSelector = (\n field: ExpectedOutcomeField,\n index: number,\n ) => {\n const optionList = getAllowedApproachesForFieldType(field.type);\n\n return (\n <app-select\n config={buildEvaluationConfig(index, optionList)}\n value={field.evaluationParameters?.approach}\n onValueChange={(e) =>\n emit({\n testCaseId,\n index,\n operation: 'set-evaluation-approach',\n value: e.detail.value as EvaluationApproach,\n })\n }\n />\n );\n };\n\n return (\n <div class=\"expected-outcome-renderer\">\n {(fields \|\| []).map((field, index) => {\n if (field.type === 'textarea') {\n const config: TextAreaConfig = {\n name: `expectedOutcome-${index}`,\n fieldType: FormFieldType.TEXT_AREA,\n label: field.label,\n placeholder: field.placeholder,\n required: true,\n rows: field.rows \|\| 2,\n };\n return (\n <div class=\"expected-outcome-renderer__group\">\n <app-textarea\n config={config}\n value={field.value}\n onValueChange={(e) =>\n emit({\n testCaseId,\n index,\n operation: 'set-value',\n value: e.detail.value,\n })\n }\n />\n {renderEvaluationSelector(field, index)}\n </div>\n );\n }\n\n if (field.type === 'chips-input') {\n const config: ChipsConfig = {\n name: `expectedOutcome-${index}`,\n fieldType: FormFieldType.CHIPS,\n label: field.label,\n placeholder: field.placeholder,\n required: true,\n };\n\n return (\n <div class=\"expected-outcome-renderer__group\">\n <app-chips\n config={config}\n value={field.value}\n onAddChip={(e) =>\n emit({\n testCaseId,\n index,\n operation: 'add-chip',\n value: e.detail.value,\n })\n }\n onRemoveChip={(e) =>\n emit({\n testCaseId,\n index,\n operation: 'remove-chip',\n value: e.detail.value,\n })\n }\n />\n {renderEvaluationSelector(field, index)}\n </div>\n );\n }\n\n if (field.type === 'select') {\n const config: SelectConfig = {\n name: `expectedOutcome-${index}`,\n fieldType: FormFieldType.SELECT,\n label: field.label,\n placeholder: field.placeholder,\n required: true,\n optionList: field.options,\n };\n\n return (\n <div class=\"expected-outcome-renderer__group\">\n <app-select\n config={config}\n value={field.value}\n onValueChange={(e) =>\n emit({\n testCaseId,\n index,\n operation: 'set-value',\n value: e.detail.value,\n })\n }\n />\n {renderEvaluationSelector(field, index)}\n </div>\n );\n }\n\n return (\n <div class=\"expected-outcome-renderer__group\">\n <div class=\"expected-outcome-renderer__text\">\n <label>{field.label}</label>\n <input\n type=\"text\"\n value={field.value}\n placeholder={field.placeholder}\n onInput={(e) =>\n emit({\n testCaseId,\n index,\n operation: 'set-value',\n value: (e.target as HTMLInputElement).value,\n })\n }\n />\n </div>\n {renderEvaluationSelector(field, index)}\n </div>\n );\n })}\n </div>\n );\n};\n"]}

package/dist/collection/components/llm-test-runner/test-cases/llm-test-case-row.css CHANGED Viewed

@@ -18,6 +18,23 @@
   border-right: var(--border-width) solid var(--border);
 }
+.expected-outcome-renderer {
+  display: flex;
+  flex-direction: column;
+  gap: var(--spacing-4);
+  margin-top: var(--spacing-4);
+}
+.expected-outcome-renderer__group {
+  display: flex;
+  flex-direction: column;
+  gap: var(--spacing-2);
+  padding: var(--spacing-3);
+  border: var(--border-width) solid var(--border);
+  border-radius: var(--radius-md);
+  background: var(--background);
+}
 /* Responsive Design */
 @media (max-width: 1200px) {
   .test-case-row {

package/dist/collection/components/llm-test-runner/test-cases/llm-test-case-row.js CHANGED Viewed

@@ -1,11 +1,10 @@
 import { h } from "@stencil/core";
-import { EvaluationApproach, EvaluationApproachValues, } from "../../../lib/evaluation/constants";
 import { ResponseOutput } from "./output/response-output";
 import { EvaluationSummary } from "./evaluation/evaluation-summary";
 import { RowActions } from "./actions/row-actions";
 import { FormFieldType } from "../../../lib/form/schema";
 import { ExpectedOutcomeRenderer, } from "./expected-outcome-renderer";
-export const LLMTestCaseRow = ({ testCase, onRun, onDelete, onUpdateApproach, handleTestCaseChange, onExpectedOutcomeChange, }) => {
+export const LLMTestCaseRow = ({ testCase, onRun, onDelete, handleTestCaseChange, onExpectedOutcomeChange, }) => {
     const questionConfig = {
         name: 'question',
         fieldType: FormFieldType.TEXT_AREA,
@@ -15,21 +14,12 @@ export const LLMTestCaseRow = ({ testCase, onRun, onDelete, onUpdateApproach, ha
         required: true,
         rows: 3,
     };
-    const evaluationConfig = {
-        name: 'EvaluationApproach',
-        fieldType: FormFieldType.SELECT,
-        label: 'Evaluation',
-        placeholder: 'Select evaluation approach…',
-        required: true,
-        optionList: EvaluationApproachValues,
-        defaultValue: EvaluationApproach.EXACT,
-    };
     return (h("div", { class: "test-case-row", key: testCase.id }, h("div", { class: "test-case-row__input-column" }, h("app-textarea", { config: questionConfig, value: testCase.question, onValueChange: (e) => handleTestCaseChange({
             detail: {
                 testCaseId: testCase.id,
                 key: 'question',
                 value: e.detail.value,
             },
-        }) }), h(ExpectedOutcomeRenderer, { testCaseId: testCase.id, fields: testCase.expectedOutcome || [], onExpectedOutcomeChange: onExpectedOutcomeChange }), h("app-select", { config: evaluationConfig, value: testCase.evaluationParameters?.approach, onValueChange: (e) => onUpdateApproach(testCase, e.detail.value) })), h(ResponseOutput, { output: testCase.output, isRunning: testCase.isRunning }), h(EvaluationSummary, { result: testCase.evaluationResult, isRunning: testCase.isRunning }), h(RowActions, { isRunning: testCase.isRunning, canRun: !!testCase.question.trim(), onRun: () => onRun(testCase), onDelete: () => onDelete(testCase.id) })));
+        }) }), h(ExpectedOutcomeRenderer, { testCaseId: testCase.id, fields: testCase.expectedOutcome || [], onExpectedOutcomeChange: onExpectedOutcomeChange })), h(ResponseOutput, { output: testCase.output, isRunning: testCase.isRunning }), h(EvaluationSummary, { result: testCase.evaluationResult, isRunning: testCase.isRunning }), h(RowActions, { isRunning: testCase.isRunning, canRun: !!testCase.question.trim(), onRun: () => onRun(testCase), onDelete: () => onDelete(testCase.id) })));
 };
 //# sourceMappingURL=llm-test-case-row.js.map

package/dist/collection/components/llm-test-runner/test-cases/llm-test-case-row.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"llm-test-case-row.js","sourceRoot":"","sources":["../../../../src/components/llm-test-runner/test-cases/llm-test-case-row.tsx"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAuB,MAAM,eAAe,CAAC;AAEvD,OAAO,~~EACL,kBAAkB,EAClB,wBAAwB,GACzB,MAAM,mCAAmC,CAAC;AAC3C,OAAO,~~EAAE,cAAc,EAAE,MAAM,0BAA0B,CAAC;AAC1D,OAAO,EAAE,iBAAiB,EAAE,MAAM,iCAAiC,CAAC;AACpE,OAAO,EAAE,UAAU,EAAE,MAAM,uBAAuB,CAAC;AACnD,OAAO,EAAE,aAAa,~~EAAgC~~,MAAM,0BAA0B,CAAC;~~AACvF~~,OAAO,EAEL,uBAAuB,GACxB,MAAM,6BAA6B,CAAC;~~AAerC~~,MAAM,CAAC,MAAM,cAAc,GAA6C,CAAC,EACvE,QAAQ,EACR,KAAK,EACL,QAAQ,EACR,~~gBAAgB,EAChB,~~oBAAoB,EACpB,uBAAuB,GACxB,EAAE,EAAE;IACH,MAAM,cAAc,GAAmB;QACrC,IAAI,EAAE,UAAU;QAChB,SAAS,EAAE,aAAa,CAAC,SAAS;QAClC,IAAI,EAAE,MAAM;QACZ,KAAK,EAAE,UAAU;QACjB,WAAW,EAAE,6BAA6B;QAC1C,QAAQ,EAAE,IAAI;QACd,IAAI,EAAE,CAAC;KACR,CAAC;IACF,MAAM,gBAAgB,GAAiB;QACrC,IAAI,EAAE,oBAAoB;QAC1B,SAAS,EAAE,aAAa,CAAC,MAAM;QAC/B,KAAK,EAAE,YAAY;QACnB,WAAW,EAAE,6BAA6B;QAC1C,QAAQ,EAAE,IAAI;QACd,UAAU,EAAE,wBAAwB;QACpC,YAAY,EAAE,kBAAkB,CAAC,KAAK;KACvC,CAAC;IAEF,OAAO,CACL,WAAK,KAAK,EAAC,eAAe,EAAC,GAAG,EAAE,QAAQ,CAAC,EAAE;QACzC,WAAK,KAAK,EAAC,6BAA6B;YACtC,oBACE,MAAM,EAAE,cAAc,EACtB,KAAK,EAAE,QAAQ,CAAC,QAAQ,EACxB,aAAa,EAAE,CAAC,CAAC,EAAE,EAAE,CACnB,oBAAoB,CAAC;oBACnB,MAAM,EAAE;wBACN,UAAU,EAAE,QAAQ,CAAC,EAAE;wBACvB,GAAG,EAAE,UAAU;wBACf,KAAK,EAAE,CAAC,CAAC,MAAM,CAAC,KAAK;qBACtB;iBACiE,CAAC,GAEvE;YACF,EAAC,uBAAuB,IACtB,UAAU,EAAE,QAAQ,CAAC,EAAE,EACvB,MAAM,EAAE,QAAQ,CAAC,eAAe,IAAI,EAAE,EACtC,uBAAuB,EAAE,uBAAuB,GAChD~~;YACF~~,~~kBACE,MAAM,EAAE,gBAAgB,EACxB,KAAK,EAAE,QAAQ,CAAC,oBAAoB,EAAE,QAAQ,EAC9C,aAAa,EAAE,CAAC,CAAC,EAAE,EAAE,CACnB,gBAAgB,CAAC,QAAQ,EAAE,CAAC,CAAC,MAAM,CAAC,KAA2B,CAAC,GAElE,~~CACE;QAEN,EAAC,cAAc,IAAC,MAAM,EAAE,QAAQ,CAAC,MAAM,EAAE,SAAS,EAAE,QAAQ,CAAC,SAAS,GAAI;QAE1E,EAAC,iBAAiB,IAChB,MAAM,EAAE,QAAQ,CAAC,gBAAgB,EACjC,SAAS,EAAE,QAAQ,CAAC,SAAS,GAC7B;QAEF,EAAC,UAAU,IACT,SAAS,EAAE,QAAQ,CAAC,SAAS,EAC7B,MAAM,EAAE,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,EAAE,EAClC,KAAK,EAAE,GAAG,EAAE,CAAC,KAAK,CAAC,QAAQ,CAAC,EAC5B,QAAQ,EAAE,GAAG,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC,GACrC,CACE,CACP,CAAC;AACJ,CAAC,CAAC","sourcesContent":["import { h, FunctionalComponent } from '@stencil/core';\nimport { TestCase } from '../../../types/llm-test-runner';\nimport {~~\n EvaluationApproach,\n EvaluationApproachValues,\n}~~ ~~from '../../../lib/evaluation/constants';\nimport {~~ ResponseOutput } from './output/response-output';\nimport { EvaluationSummary } from './evaluation/evaluation-summary';\nimport { RowActions } from './actions/row-actions';\nimport { FormFieldType, ~~SelectConfig,~~ TextAreaConfig } from '../../../lib/form/schema';\nimport {\n ExpectedOutcomeChangeDetail,\n ExpectedOutcomeRenderer,\n} from './expected-outcome-renderer';\n\nexport interface LLMTestCaseRowProps {\n testCase: TestCase;\n onRun: (testCase: TestCase) => void;\n onDelete: (id: string) => void;\n ~~onUpdateApproach: (testCase: TestCase, approach: EvaluationApproach) => void;\n~~ handleTestCaseChange: (\n e: CustomEvent<{ testCaseId: string; key: string; value: string }>,\n ) => void;\n onExpectedOutcomeChange: (\n e: CustomEvent<ExpectedOutcomeChangeDetail>,\n ) => void;\n}\n\nexport const LLMTestCaseRow: FunctionalComponent<LLMTestCaseRowProps> = ({\n testCase,\n onRun,\n onDelete,\n ~~onUpdateApproach,\n~~ handleTestCaseChange,\n onExpectedOutcomeChange,\n}) => {\n const questionConfig: TextAreaConfig = {\n name: 'question',\n fieldType: FormFieldType.TEXT_AREA,\n type: 'text',\n label: 'Question',\n placeholder: 'Enter your question here...',\n required: true,\n rows: 3,\n };\n const evaluationConfig: SelectConfig = {\n name: 'EvaluationApproach',\n fieldType: FormFieldType.SELECT,\n label: 'Evaluation',\n placeholder: 'Select evaluation approach…',\n required: true,\n optionList: EvaluationApproachValues,\n defaultValue: EvaluationApproach.EXACT,\n };\n\n return (\n <div class=\"test-case-row\" key={testCase.id}>\n <div class=\"test-case-row__input-column\">\n <app-textarea\n config={questionConfig}\n value={testCase.question}\n onValueChange={(e) =>\n handleTestCaseChange({\n detail: {\n testCaseId: testCase.id,\n key: 'question',\n value: e.detail.value,\n },\n } as CustomEvent<{ testCaseId: string; key: string; value: string }>)\n }\n />\n <ExpectedOutcomeRenderer\n testCaseId={testCase.id}\n fields={testCase.expectedOutcome \|\| []}\n onExpectedOutcomeChange={onExpectedOutcomeChange}\n />\n <app-select\n config={evaluationConfig}\n value={testCase.evaluationParameters?.approach}\n onValueChange={(e) =>\n onUpdateApproach(testCase, e.detail.value as EvaluationApproach)\n }\n />\n </div>\n\n <ResponseOutput output={testCase.output} isRunning={testCase.isRunning} />\n\n <EvaluationSummary\n result={testCase.evaluationResult}\n isRunning={testCase.isRunning}\n />\n\n <RowActions\n isRunning={testCase.isRunning}\n canRun={!!testCase.question.trim()}\n onRun={() => onRun(testCase)}\n onDelete={() => onDelete(testCase.id)}\n />\n </div>\n );\n};\n"]}
1	+ {"version":3,"file":"llm-test-case-row.js","sourceRoot":"","sources":["../../../../src/components/llm-test-runner/test-cases/llm-test-case-row.tsx"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAuB,MAAM,eAAe,CAAC;AAEvD,OAAO,EAAE,cAAc,EAAE,MAAM,0BAA0B,CAAC;AAC1D,OAAO,EAAE,iBAAiB,EAAE,MAAM,iCAAiC,CAAC;AACpE,OAAO,EAAE,UAAU,EAAE,MAAM,uBAAuB,CAAC;AACnD,OAAO,EAAE,aAAa,EAAkB,MAAM,0BAA0B,CAAC;AACzE,OAAO,EAEL,uBAAuB,GACxB,MAAM,6BAA6B,CAAC;AAcrC,MAAM,CAAC,MAAM,cAAc,GAA6C,CAAC,EACvE,QAAQ,EACR,KAAK,EACL,QAAQ,EACR,oBAAoB,EACpB,uBAAuB,GACxB,EAAE,EAAE;IACH,MAAM,cAAc,GAAmB;QACrC,IAAI,EAAE,UAAU;QAChB,SAAS,EAAE,aAAa,CAAC,SAAS;QAClC,IAAI,EAAE,MAAM;QACZ,KAAK,EAAE,UAAU;QACjB,WAAW,EAAE,6BAA6B;QAC1C,QAAQ,EAAE,IAAI;QACd,IAAI,EAAE,CAAC;KACR,CAAC;IACF,OAAO,CACL,WAAK,KAAK,EAAC,eAAe,EAAC,GAAG,EAAE,QAAQ,CAAC,EAAE;QACzC,WAAK,KAAK,EAAC,6BAA6B;YACtC,oBACE,MAAM,EAAE,cAAc,EACtB,KAAK,EAAE,QAAQ,CAAC,QAAQ,EACxB,aAAa,EAAE,CAAC,CAAC,EAAE,EAAE,CACnB,oBAAoB,CAAC;oBACnB,MAAM,EAAE;wBACN,UAAU,EAAE,QAAQ,CAAC,EAAE;wBACvB,GAAG,EAAE,UAAU;wBACf,KAAK,EAAE,CAAC,CAAC,MAAM,CAAC,KAAK;qBACtB;iBACiE,CAAC,GAEvE;YACF,EAAC,uBAAuB,IACtB,UAAU,EAAE,QAAQ,CAAC,EAAE,EACvB,MAAM,EAAE,QAAQ,CAAC,eAAe,IAAI,EAAE,EACtC,uBAAuB,EAAE,uBAAuB,GAChD,CACE;QAEN,EAAC,cAAc,IAAC,MAAM,EAAE,QAAQ,CAAC,MAAM,EAAE,SAAS,EAAE,QAAQ,CAAC,SAAS,GAAI;QAE1E,EAAC,iBAAiB,IAChB,MAAM,EAAE,QAAQ,CAAC,gBAAgB,EACjC,SAAS,EAAE,QAAQ,CAAC,SAAS,GAC7B;QAEF,EAAC,UAAU,IACT,SAAS,EAAE,QAAQ,CAAC,SAAS,EAC7B,MAAM,EAAE,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,EAAE,EAClC,KAAK,EAAE,GAAG,EAAE,CAAC,KAAK,CAAC,QAAQ,CAAC,EAC5B,QAAQ,EAAE,GAAG,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC,GACrC,CACE,CACP,CAAC;AACJ,CAAC,CAAC","sourcesContent":["import { h, FunctionalComponent } from '@stencil/core';\nimport { TestCase } from '../../../types/llm-test-runner';\nimport { ResponseOutput } from './output/response-output';\nimport { EvaluationSummary } from './evaluation/evaluation-summary';\nimport { RowActions } from './actions/row-actions';\nimport { FormFieldType, TextAreaConfig } from '../../../lib/form/schema';\nimport {\n ExpectedOutcomeChangeDetail,\n ExpectedOutcomeRenderer,\n} from './expected-outcome-renderer';\n\nexport interface LLMTestCaseRowProps {\n testCase: TestCase;\n onRun: (testCase: TestCase) => void;\n onDelete: (id: string) => void;\n handleTestCaseChange: (\n e: CustomEvent<{ testCaseId: string; key: string; value: string }>,\n ) => void;\n onExpectedOutcomeChange: (\n e: CustomEvent<ExpectedOutcomeChangeDetail>,\n ) => void;\n}\n\nexport const LLMTestCaseRow: FunctionalComponent<LLMTestCaseRowProps> = ({\n testCase,\n onRun,\n onDelete,\n handleTestCaseChange,\n onExpectedOutcomeChange,\n}) => {\n const questionConfig: TextAreaConfig = {\n name: 'question',\n fieldType: FormFieldType.TEXT_AREA,\n type: 'text',\n label: 'Question',\n placeholder: 'Enter your question here...',\n required: true,\n rows: 3,\n };\n return (\n <div class=\"test-case-row\" key={testCase.id}>\n <div class=\"test-case-row__input-column\">\n <app-textarea\n config={questionConfig}\n value={testCase.question}\n onValueChange={(e) =>\n handleTestCaseChange({\n detail: {\n testCaseId: testCase.id,\n key: 'question',\n value: e.detail.value,\n },\n } as CustomEvent<{ testCaseId: string; key: string; value: string }>)\n }\n />\n <ExpectedOutcomeRenderer\n testCaseId={testCase.id}\n fields={testCase.expectedOutcome \|\| []}\n onExpectedOutcomeChange={onExpectedOutcomeChange}\n />\n </div>\n\n <ResponseOutput output={testCase.output} isRunning={testCase.isRunning} />\n\n <EvaluationSummary\n result={testCase.evaluationResult}\n isRunning={testCase.isRunning}\n />\n\n <RowActions\n isRunning={testCase.isRunning}\n canRun={!!testCase.question.trim()}\n onRun={() => onRun(testCase)}\n onDelete={() => onDelete(testCase.id)}\n />\n </div>\n );\n};\n"]}

package/dist/collection/components/llm-test-runner/test-cases/llm-test-cases.js CHANGED Viewed

@@ -1,7 +1,7 @@
 import { h } from "@stencil/core";
 import { LLMTestCaseRow } from "./llm-test-case-row";
 import { Button } from "../../../lib/ui/button/index";
-export const LLMTestCases = ({ testCases, onRun, onDelete, onUpdateApproach, onAddTestCase, handleTestCaseChange, onExpectedOutcomeChange, }) => {
-    return (h("div", { class: "test-cases" }, h("div", { class: "test-cases__column-headers" }, h("div", { class: "test-cases__column-header" }, "Input"), h("div", { class: "test-cases__column-header" }, "Output"), h("div", { class: "test-cases__column-header" }, "Evaluation"), h("div", { class: "test-cases__column-header" }, "Actions")), testCases.map(testCase => (h(LLMTestCaseRow, { testCase: testCase, onRun: onRun, onDelete: onDelete, onUpdateApproach: onUpdateApproach, handleTestCaseChange: handleTestCaseChange, onExpectedOutcomeChange: onExpectedOutcomeChange }))), h("div", { class: "test-cases__add-section" }, h(Button, { variant: "outline", size: "md", onClick: onAddTestCase }, "+ Add Question"))));
+export const LLMTestCases = ({ testCases, onRun, onDelete, onAddTestCase, handleTestCaseChange, onExpectedOutcomeChange, }) => {
+    return (h("div", { class: "test-cases" }, h("div", { class: "test-cases__column-headers" }, h("div", { class: "test-cases__column-header" }, "Input"), h("div", { class: "test-cases__column-header" }, "Output"), h("div", { class: "test-cases__column-header" }, "Evaluation"), h("div", { class: "test-cases__column-header" }, "Actions")), testCases.map(testCase => (h(LLMTestCaseRow, { testCase: testCase, onRun: onRun, onDelete: onDelete, handleTestCaseChange: handleTestCaseChange, onExpectedOutcomeChange: onExpectedOutcomeChange }))), h("div", { class: "test-cases__add-section" }, h(Button, { variant: "outline", size: "md", onClick: onAddTestCase }, "+ Add Question"))));
 };
 //# sourceMappingURL=llm-test-cases.js.map

package/dist/collection/components/llm-test-runner/test-cases/llm-test-cases.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"llm-test-cases.js","sourceRoot":"","sources":["../../../../src/components/llm-test-runner/test-cases/llm-test-cases.tsx"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAuB,MAAM,eAAe,CAAC;~~AAGvD~~,OAAO,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AACrD,OAAO,EAAE,MAAM,EAAE,MAAM,8BAA8B,CAAC;~~AAiBtD~~,MAAM,CAAC,MAAM,YAAY,GAA2C,CAAC,EACnE,SAAS,EACT,KAAK,EACL,QAAQ,EACR,~~gBAAgB,EAChB,~~aAAa,EACb,oBAAoB,EACpB,uBAAuB,GACxB,EAAE,EAAE;IACH,OAAO,CACL,WAAK,KAAK,EAAC,YAAY;QACrB,WAAK,KAAK,EAAC,4BAA4B;YACrC,WAAK,KAAK,EAAC,2BAA2B,YAAY;YAClD,WAAK,KAAK,EAAC,2BAA2B,aAAa;YACnD,WAAK,KAAK,EAAC,2BAA2B,iBAAiB;YACvD,WAAK,KAAK,EAAC,2BAA2B,cAAc,CAChD;QAEL,SAAS,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC,CACzB,EAAC,cAAc,IACb,QAAQ,EAAE,QAAQ,EAClB,KAAK,EAAE,KAAK,EACZ,QAAQ,EAAE,QAAQ,EAClB,~~gBAAgB,EAAE,gBAAgB,EAClC,~~oBAAoB,EAAE,oBAAoB,EAC1C,uBAAuB,EAAE,uBAAuB,GAChD,CACH,CAAC;QAEF,WAAK,KAAK,EAAC,yBAAyB;YAClC,EAAC,MAAM,IAAC,OAAO,EAAC,SAAS,EAAC,IAAI,EAAC,IAAI,EAAC,OAAO,EAAE,aAAa,qBAEjD,CACL,CACF,CACP,CAAC;AACJ,CAAC,CAAC","sourcesContent":["import { h, FunctionalComponent } from '@stencil/core';\nimport { TestCase } from '../../../types/llm-test-runner';\nimport { ~~EvaluationApproach } from '../../../lib/evaluation/constants';\nimport {~~ LLMTestCaseRow } from './llm-test-case-row';\nimport { Button } from '../../../lib/ui/button/index';\nimport { ExpectedOutcomeChangeDetail } from './expected-outcome-renderer';\n\nexport interface LLMTestCasesProps {\n testCases: TestCase[];\n onRun: (testCase: TestCase) => void;\n onDelete: (id: string) => void;\n ~~onUpdateApproach: (testCase: TestCase, approach: EvaluationApproach) => void;\n~~ onAddTestCase: () => void;\n handleTestCaseChange: (\n e: CustomEvent<{ testCaseId: string; key: string; value: string }>,\n ) => void;\n onExpectedOutcomeChange: (\n e: CustomEvent<ExpectedOutcomeChangeDetail>,\n ) => void;\n}\n\nexport const LLMTestCases: FunctionalComponent<LLMTestCasesProps> = ({\n testCases,\n onRun,\n onDelete,\n ~~onUpdateApproach,\n~~ onAddTestCase,\n handleTestCaseChange,\n onExpectedOutcomeChange,\n}) => {\n return (\n <div class=\"test-cases\">\n <div class=\"test-cases__column-headers\">\n <div class=\"test-cases__column-header\">Input</div>\n <div class=\"test-cases__column-header\">Output</div>\n <div class=\"test-cases__column-header\">Evaluation</div>\n <div class=\"test-cases__column-header\">Actions</div>\n </div>\n\n {testCases.map(testCase => (\n <LLMTestCaseRow\n testCase={testCase}\n onRun={onRun}\n onDelete={onDelete}\n ~~onUpdateApproach={onUpdateApproach}\n~~ handleTestCaseChange={handleTestCaseChange}\n onExpectedOutcomeChange={onExpectedOutcomeChange}\n />\n ))}\n\n <div class=\"test-cases__add-section\">\n <Button variant=\"outline\" size=\"md\" onClick={onAddTestCase}>\n + Add Question\n </Button>\n </div>\n </div>\n );\n};\n"]}
1	+ {"version":3,"file":"llm-test-cases.js","sourceRoot":"","sources":["../../../../src/components/llm-test-runner/test-cases/llm-test-cases.tsx"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAuB,MAAM,eAAe,CAAC;AAEvD,OAAO,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AACrD,OAAO,EAAE,MAAM,EAAE,MAAM,8BAA8B,CAAC;AAgBtD,MAAM,CAAC,MAAM,YAAY,GAA2C,CAAC,EACnE,SAAS,EACT,KAAK,EACL,QAAQ,EACR,aAAa,EACb,oBAAoB,EACpB,uBAAuB,GACxB,EAAE,EAAE;IACH,OAAO,CACL,WAAK,KAAK,EAAC,YAAY;QACrB,WAAK,KAAK,EAAC,4BAA4B;YACrC,WAAK,KAAK,EAAC,2BAA2B,YAAY;YAClD,WAAK,KAAK,EAAC,2BAA2B,aAAa;YACnD,WAAK,KAAK,EAAC,2BAA2B,iBAAiB;YACvD,WAAK,KAAK,EAAC,2BAA2B,cAAc,CAChD;QAEL,SAAS,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC,CACzB,EAAC,cAAc,IACb,QAAQ,EAAE,QAAQ,EAClB,KAAK,EAAE,KAAK,EACZ,QAAQ,EAAE,QAAQ,EAClB,oBAAoB,EAAE,oBAAoB,EAC1C,uBAAuB,EAAE,uBAAuB,GAChD,CACH,CAAC;QAEF,WAAK,KAAK,EAAC,yBAAyB;YAClC,EAAC,MAAM,IAAC,OAAO,EAAC,SAAS,EAAC,IAAI,EAAC,IAAI,EAAC,OAAO,EAAE,aAAa,qBAEjD,CACL,CACF,CACP,CAAC;AACJ,CAAC,CAAC","sourcesContent":["import { h, FunctionalComponent } from '@stencil/core';\nimport { TestCase } from '../../../types/llm-test-runner';\nimport { LLMTestCaseRow } from './llm-test-case-row';\nimport { Button } from '../../../lib/ui/button/index';\nimport { ExpectedOutcomeChangeDetail } from './expected-outcome-renderer';\n\nexport interface LLMTestCasesProps {\n testCases: TestCase[];\n onRun: (testCase: TestCase) => void;\n onDelete: (id: string) => void;\n onAddTestCase: () => void;\n handleTestCaseChange: (\n e: CustomEvent<{ testCaseId: string; key: string; value: string }>,\n ) => void;\n onExpectedOutcomeChange: (\n e: CustomEvent<ExpectedOutcomeChangeDetail>,\n ) => void;\n}\n\nexport const LLMTestCases: FunctionalComponent<LLMTestCasesProps> = ({\n testCases,\n onRun,\n onDelete,\n onAddTestCase,\n handleTestCaseChange,\n onExpectedOutcomeChange,\n}) => {\n return (\n <div class=\"test-cases\">\n <div class=\"test-cases__column-headers\">\n <div class=\"test-cases__column-header\">Input</div>\n <div class=\"test-cases__column-header\">Output</div>\n <div class=\"test-cases__column-header\">Evaluation</div>\n <div class=\"test-cases__column-header\">Actions</div>\n </div>\n\n {testCases.map(testCase => (\n <LLMTestCaseRow\n testCase={testCase}\n onRun={onRun}\n onDelete={onDelete}\n handleTestCaseChange={handleTestCaseChange}\n onExpectedOutcomeChange={onExpectedOutcomeChange}\n />\n ))}\n\n <div class=\"test-cases__add-section\">\n <Button variant=\"outline\" size=\"md\" onClick={onAddTestCase}>\n + Add Question\n </Button>\n </div>\n </div>\n );\n};\n"]}

package/dist/collection/lib/evaluation/evaluation-engine.js CHANGED Viewed

@@ -6,56 +6,77 @@ import { performRougeLEvaluation } from "./evaluators/rougeL-evaluator";
 import { performBleuEvaluation } from "./evaluators/bleu/bleu-evaluator";
 export class LLMEvaluationEngine {
     async evaluateResponse(request, callback) {
-        try {
-            const approach = request.evaluationParameters.approach;
-            switch (approach) {
-                case EvaluationApproach.BLEU: {
-                    const bleuResult = performBleuEvaluation(request);
-                    callback(bleuResult);
-                    break;
-                }
-                case EvaluationApproach.EXACT: {
-                    const exactResult = await performEvaluation(request);
-                    callback(exactResult);
-                    break;
-                }
-                case EvaluationApproach.ROUGE_1: {
-                    const rougeResult = await performRouge1Evaluation(request);
-                    callback(rougeResult);
-                    break;
-                }
-                case EvaluationApproach.ROUGE_L: {
-                    const rougeLResult = await performRougeLEvaluation(request);
-                    callback(rougeLResult);
-                    break;
-                }
-                case EvaluationApproach.SEMANTIC: {
-                    const semanticResult = await performSemanticEvaluation(request);
-                    callback(semanticResult);
-                    break;
-                }
-                default: {
-                    console.warn(`Unknown matching approach: ${request.evaluationParameters.approach}, falling back to exact matching`);
-                    const fallbackResult = await performEvaluation(request);
-                    callback(fallbackResult);
-                }
-            }
-        }
-        catch (error) {
-            console.error('Evaluation failed:', error);
-            const errorResult = {
+        const settledResults = await Promise.allSettled(request.fields.map(async (field) => {
+            const fieldRequest = {
                 testCaseId: request.testCaseId,
+                question: request.question,
+                actualResponse: request.actualResponse,
+                expectedOutcome: field.expectedValue,
+                evaluationParameters: field.evaluationParameters,
+            };
+            const result = await this.evaluateField(fieldRequest);
+            const fieldResult = {
+                index: field.index,
+                label: field.label,
+                type: field.type,
+                expectedValue: field.expectedValue,
+                passed: result.passed,
+                keywordMatches: result.keywordMatches,
+                evaluationParameters: result.evaluationParameters,
+                evaluationApproachResult: result.evaluationApproachResult,
+            };
+            return fieldResult;
+        }));
+        const fieldResults = settledResults.map((settledResult, index) => {
+            const field = request.fields[index];
+            if (settledResult.status === 'fulfilled') {
+                return settledResult.value;
+            }
+            return {
+                index: field.index,
+                label: field.label,
+                type: field.type,
+                expectedValue: field.expectedValue,
                 passed: false,
                 keywordMatches: [],
-                timestamp: new Date().toISOString(),
-                evaluationParameters: request.evaluationParameters,
+                evaluationParameters: field.evaluationParameters,
                 evaluationApproachResult: {
                     score: 0,
-                    approachUsed: EvaluationApproach.EXACT,
+                    approachUsed: field.evaluationParameters.approach,
                 },
+                error: this.getSafeErrorMessage(settledResult.reason),
             };
-            callback(errorResult);
+        });
+        const keywordMatches = fieldResults.flatMap(field => field.keywordMatches);
+        const passed = fieldResults.every(field => field.passed && !field.error);
+        callback({
+            testCaseId: request.testCaseId,
+            passed,
+            keywordMatches,
+            fieldResults,
+            timestamp: new Date().toISOString(),
+        });
+    }
+    async evaluateField(request) {
+        const approach = request.evaluationParameters.approach;
+        switch (approach) {
+            case EvaluationApproach.BLEU:
+                return performBleuEvaluation(request);
+            case EvaluationApproach.EXACT:
+                return performEvaluation(request);
+            case EvaluationApproach.ROUGE_1:
+                return performRouge1Evaluation(request);
+            case EvaluationApproach.ROUGE_L:
+                return performRougeLEvaluation(request);
+            case EvaluationApproach.SEMANTIC:
+                return performSemanticEvaluation(request);
+            default:
+                console.warn(`Unknown matching approach: ${request.evaluationParameters.approach}, falling back to exact matching`);
+                return performEvaluation(request);
         }
     }
+    getSafeErrorMessage(error) {
+        return error instanceof Error ? error.message : 'Field evaluation failed.';
+    }
 }
 //# sourceMappingURL=evaluation-engine.js.map

package/dist/collection/lib/evaluation/evaluation-engine.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"evaluation-engine.js","sourceRoot":"","sources":["../../../src/lib/evaluation/evaluation-engine.ts"],"names":[],"mappings":"~~AAKA~~,OAAO,EAAE,iBAAiB,EAAE,MAAM,0BAA0B,CAAC;AAC7D,OAAO,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAC;AACjD,OAAO,EAAE,uBAAuB,EAAE,MAAM,+BAA+B,CAAC;AACxE,OAAO,EAAE,yBAAyB,EAAE,MAAM,6BAA6B,CAAC;AACxE,OAAO,EAAE,uBAAuB,EAAE,MAAM,+BAA+B,CAAC;AACxE,OAAO,EAAE,qBAAqB,EAAE,MAAM,kCAAkC,CAAC;AAEzE,MAAM,OAAO,mBAAmB;IAC9B,KAAK,CAAC,gBAAgB,CACpB,~~OAA0B~~,~~EAC1B~~,QAA4B;QAE5B,~~IAAI~~,~~CAAC;YACH~~,MAAM,~~QAAQ~~,~~GACZ~~,OAAO,CAAC,~~oBAAoB~~,CAAC,~~QAAQ~~,CAAC;~~YACxC~~,~~QAAQ~~,QAAQ,EAAE,CAAC;~~gBACjB~~,~~KAAK~~,~~kBAAkB~~,CAAC,~~IAAI~~,CAAC,CAAC,CAAC;~~oBAC7B~~,MAAM,~~UAAU~~,GAAG,~~qBAAqB~~,~~CAAC~~,~~OAAO,~~CAAC,~~CAAC;oBAClD~~,~~QAAQ,~~CAAC,~~UAAU~~,CAAC,CAAC;~~oBACrB~~,MAAM;~~gBACR~~,CAAC;~~gBAED~~,KAAK,~~kBAAkB~~,CAAC,KAAK,CAAC,CAAC,~~CAAC~~;~~oBAC9B~~,MAAM,~~WAAW~~,~~GAAG~~,MAAM,~~iBAAiB~~,~~CAAC~~,~~OAAO~~,CAAC,~~CAAC~~;~~oBACrD~~,~~QAAQ~~,~~CAAC~~,~~WAAW~~,CAAC,~~CAAC~~;~~oBACtB~~,MAAM;~~gBACR~~,CAAC;~~gBAED~~,~~KAAK~~,~~kBAAkB~~,CAAC,~~OAAO,~~CAAC,CAAC,CAAC;~~oBAChC~~,MAAM,~~WAAW~~,GAAG,MAAM,~~uBAAuB~~,~~CAAC~~,OAAO,CAAC,CAAC;~~oBAC3D~~,~~QAAQ~~,CAAC,WAAW,CAAC,CAAC;~~oBACtB~~,~~MAAM~~;~~gBACR~~,CAAC;~~gBAED~~,KAAK,~~kBAAkB~~,CAAC,~~OAAO~~,CAAC,CAAC,~~CAAC~~;~~oBAChC~~,MAAM,~~YAAY~~,~~GAAG~~,~~MAAM~~,~~uBAAuB~~,~~CAAC~~,~~OAAO~~,CAAC,~~CAAC~~;~~oBAC5D~~,~~QAAQ~~,CAAC,YAAY,~~CAAC~~,CAAC~~;oBACvB~~,~~MAAM;gBACR~~,CAAC;~~gBAED~~,KAAK,~~kBAAkB~~,CAAC,~~QAAQ~~,CAAC,CAAC,CAAC;~~oBACjC~~,MAAM,cAAc,GAAG,~~MAAM~~,~~yBAAyB,~~CAAC,OAAO,CAAC,CAAC~~;oBAChE~~,~~QAAQ~~,CAAC,cAAc,CAAC,CAAC;~~oBACzB~~,MAAM~~;gBACR~~,CAAC~~;gBAED~~,~~OAAO~~,CAAC,CAAC,CAAC~~;oBACR~~,~~OAAO~~,CAAC,IAAI,~~CACV~~,~~8BAA8B~~,~~OAAO,~~CAAC,~~oBAAoB~~,CAAC,QAAQ,~~kCAAkC~~,~~CACtG~~,CAAC;~~oBACF~~,MAAM,cAAc,~~GAAG~~,~~MAAM~~,~~iBAAiB~~,CAAC,~~OAAO~~,CAAC,CAAC;~~oBACxD~~,QAAQ,CAAC,~~cAAc~~,CAAC,CAAC;~~gBAC3B~~,CAAC;~~YACH~~,CAAC;~~QACH~~,CAAC~~;QAAC~~,OAAO,KAAK,~~EAAE~~,CAAC;~~YACf~~,OAAO,CAAC,KAAK,CAAC,~~oBAAoB~~,~~EAAE~~,~~KAAK~~,CAAC,CAAC;~~YAE3C~~,~~MAAM~~,~~WAAW~~,~~GAAqB~~;~~gBACpC~~,~~UAAU~~,~~EAAE~~,OAAO,CAAC,~~UAAU~~;~~gBAC9B~~,~~MAAM~~,~~EAAE~~,~~KAAK~~;~~gBACb~~,~~cAAc~~,~~EAAE~~,~~EAAE~~;~~gBAClB~~,~~SAAS~~,~~EAAE~~,IAAI,~~IAAI~~,~~EAAE~~,CAAC,~~WAAW~~,~~EAAE~~;~~gBACnC~~,~~oBAAoB~~,~~EAAE~~,OAAO,CAAC,~~oBAAoB~~;~~gBAClD~~,~~wBAAwB~~,~~EAAE~~;~~oBACxB~~,~~KAAK~~,~~EAAE~~,~~CAAC~~;~~oBACR~~,YAAY,~~EAAE~~,~~kBAAkB~~,CAAC,KAAK~~;iBACvC;aACF~~,CAAC~~;YAEF~~,~~QAAQ~~,CAAC,~~WAAW,~~CAAC,CAAC~~;QACxB~~,CAAC;~~IACH~~,CAAC;CACF","sourcesContent":["import {\n EvaluationRequest,\n EvaluationResult,\n EvaluationCallback,\n} from './types';\nimport { performEvaluation } from './evaluators/exact/exact';\nimport { EvaluationApproach } from './constants';\nimport { performRouge1Evaluation } from './evaluators/rouge1-evaluator';\nimport { performSemanticEvaluation } from './evaluators/semantic/index';\nimport { performRougeLEvaluation } from './evaluators/rougeL-evaluator';\nimport { performBleuEvaluation } from './evaluators/bleu/bleu-evaluator';\n\nexport class LLMEvaluationEngine {\n async evaluateResponse(\n request: ~~EvaluationRequest~~,\n callback: EvaluationCallback,\n ): Promise<void> {\n ~~try~~ {\n const ~~approach~~: ~~EvaluationApproach~~ =\n request.evaluationParameters.~~approach~~;\n ~~switch~~ (~~approach~~) {\n ~~case~~ ~~EvaluationApproach.BLEU~~: {\n ~~const~~ ~~bleuResult~~ = ~~performBleuEvaluation(request);\~~n ~~callback(bleuResult);\~~n ~~break~~;\n }\n\n ~~case~~ ~~EvaluationApproach.EXACT~~: {\n const ~~exactResult~~ = ~~await performEvaluation(~~request);\n ~~callback~~(~~exactResult~~);\n ~~break~~;\n }\n\n ~~case~~ ~~EvaluationApproach~~.~~ROUGE_1~~: {\n ~~const~~ ~~rougeResult~~ = ~~await~~ ~~performRouge1Evaluation(request);\~~n ~~callback~~(~~rougeResult~~);\n ~~break~~;\n }\n\n ~~case~~ ~~EvaluationApproach~~.~~ROUGE_L:~~ {\n const ~~rougeLResult~~ = ~~await~~ ~~performRougeLEvaluation(request~~);\n callback(~~rougeLResult~~);\n ~~break~~;\n }\n\n ~~case~~ ~~EvaluationApproach.SEMANTIC~~: {\n const ~~semanticResult~~ = ~~await~~ ~~performSemanticEvaluation~~(request);\n ~~callback~~(~~semanticResult~~);\n ~~break~~;\n }\n\n ~~default:~~ {\n console.warn(\n `Unknown matching approach: ${request.evaluationParameters.approach}, falling back to exact matching`,\n );\n ~~const~~ ~~fallbackResult = await~~ performEvaluation(request);\n ~~callback(fallbackResult);\n~~ }\n }\n } ~~catch~~ (error~~) {\n console.error('Evaluation failed~~:', ~~error~~)~~;\n\n const errorResult~~: ~~EvaluationResult~~ = {\n ~~testCaseId:~~ ~~request.testCaseId,\n passed:~~ ~~false,\n keywordMatches:~~ ~~[],\n timestamp:~~ ~~new~~ ~~Date()~~.~~toISOString(),\n evaluationParameters:~~ ~~request.evaluationParameters,\n evaluationApproachResult~~: ~~{\n score:~~ ~~0,\n approachUsed:~~ ~~EvaluationApproach~~.~~EXACT,\n },\n }~~;\n~~\n callback(errorResult);\n }\n~~ }\n}\n"]}
1	+ {"version":3,"file":"evaluation-engine.js","sourceRoot":"","sources":["../../../src/lib/evaluation/evaluation-engine.ts"],"names":[],"mappings":"AAOA,OAAO,EAAE,iBAAiB,EAAE,MAAM,0BAA0B,CAAC;AAC7D,OAAO,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAC;AACjD,OAAO,EAAE,uBAAuB,EAAE,MAAM,+BAA+B,CAAC;AACxE,OAAO,EAAE,yBAAyB,EAAE,MAAM,6BAA6B,CAAC;AACxE,OAAO,EAAE,uBAAuB,EAAE,MAAM,+BAA+B,CAAC;AACxE,OAAO,EAAE,qBAAqB,EAAE,MAAM,kCAAkC,CAAC;AAEzE,MAAM,OAAO,mBAAmB;IAC9B,KAAK,CAAC,gBAAgB,CACpB,OAA4B,EAC5B,QAA4B;QAE5B,MAAM,cAAc,GAAG,MAAM,OAAO,CAAC,UAAU,CAC7C,OAAO,CAAC,MAAM,CAAC,GAAG,CAAC,KAAK,EAAC,KAAK,EAAC,EAAE;YAC/B,MAAM,YAAY,GAAsB;gBACtC,UAAU,EAAE,OAAO,CAAC,UAAU;gBAC9B,QAAQ,EAAE,OAAO,CAAC,QAAQ;gBAC1B,cAAc,EAAE,OAAO,CAAC,cAAc;gBACtC,eAAe,EAAE,KAAK,CAAC,aAAa;gBACpC,oBAAoB,EAAE,KAAK,CAAC,oBAAoB;aACjD,CAAC;YACF,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,YAAY,CAAC,CAAC;YAEtD,MAAM,WAAW,GAA0B;gBACzC,KAAK,EAAE,KAAK,CAAC,KAAK;gBAClB,KAAK,EAAE,KAAK,CAAC,KAAK;gBAClB,IAAI,EAAE,KAAK,CAAC,IAAI;gBAChB,aAAa,EAAE,KAAK,CAAC,aAAa;gBAClC,MAAM,EAAE,MAAM,CAAC,MAAM;gBACrB,cAAc,EAAE,MAAM,CAAC,cAAc;gBACrC,oBAAoB,EAAE,MAAM,CAAC,oBAAqB;gBAClD,wBAAwB,EAAE,MAAM,CAAC,wBAAwB;aAC1D,CAAC;YACF,OAAO,WAAW,CAAC;QACrB,CAAC,CAAC,CACH,CAAC;QAEF,MAAM,YAAY,GAA4B,cAAc,CAAC,GAAG,CAC9D,CAAC,aAAa,EAAE,KAAK,EAAE,EAAE;YACvB,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YACpC,IAAI,aAAa,CAAC,MAAM,KAAK,WAAW,EAAE,CAAC;gBACzC,OAAO,aAAa,CAAC,KAAK,CAAC;YAC7B,CAAC;YAED,OAAO;gBACL,KAAK,EAAE,KAAK,CAAC,KAAK;gBAClB,KAAK,EAAE,KAAK,CAAC,KAAK;gBAClB,IAAI,EAAE,KAAK,CAAC,IAAI;gBAChB,aAAa,EAAE,KAAK,CAAC,aAAa;gBAClC,MAAM,EAAE,KAAK;gBACb,cAAc,EAAE,EAAE;gBAClB,oBAAoB,EAAE,KAAK,CAAC,oBAAoB;gBAChD,wBAAwB,EAAE;oBACxB,KAAK,EAAE,CAAC;oBACR,YAAY,EAAE,KAAK,CAAC,oBAAoB,CAAC,QAAQ;iBAClD;gBACD,KAAK,EAAE,IAAI,CAAC,mBAAmB,CAAC,aAAa,CAAC,MAAM,CAAC;aACtD,CAAC;QACJ,CAAC,CACF,CAAC;QAEF,MAAM,cAAc,GAAG,YAAY,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC,cAAc,CAAC,CAAC;QAC3E,MAAM,MAAM,GAAG,YAAY,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC,MAAM,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QAEzE,QAAQ,CAAC;YACP,UAAU,EAAE,OAAO,CAAC,UAAU;YAC9B,MAAM;YACN,cAAc;YACd,YAAY;YACZ,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;SACpC,CAAC,CAAC;IACL,CAAC;IAEO,KAAK,CAAC,aAAa,CAAC,OAA0B;QACpD,MAAM,QAAQ,GAAuB,OAAO,CAAC,oBAAoB,CAAC,QAAQ,CAAC;QAC3E,QAAQ,QAAQ,EAAE,CAAC;YACjB,KAAK,kBAAkB,CAAC,IAAI;gBAC1B,OAAO,qBAAqB,CAAC,OAAO,CAAC,CAAC;YACxC,KAAK,kBAAkB,CAAC,KAAK;gBAC3B,OAAO,iBAAiB,CAAC,OAAO,CAAC,CAAC;YACpC,KAAK,kBAAkB,CAAC,OAAO;gBAC7B,OAAO,uBAAuB,CAAC,OAAO,CAAC,CAAC;YAC1C,KAAK,kBAAkB,CAAC,OAAO;gBAC7B,OAAO,uBAAuB,CAAC,OAAO,CAAC,CAAC;YAC1C,KAAK,kBAAkB,CAAC,QAAQ;gBAC9B,OAAO,yBAAyB,CAAC,OAAO,CAAC,CAAC;YAC5C;gBACE,OAAO,CAAC,IAAI,CACV,8BAA8B,OAAO,CAAC,oBAAoB,CAAC,QAAQ,kCAAkC,CACtG,CAAC;gBACF,OAAO,iBAAiB,CAAC,OAAO,CAAC,CAAC;QACtC,CAAC;IACH,CAAC;IAEO,mBAAmB,CAAC,KAAc;QACxC,OAAO,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,0BAA0B,CAAC;IAC7E,CAAC;CACF","sourcesContent":["import {\n EvaluationRequest,\n EvaluationResult,\n EvaluationCallback,\n FieldEvaluationResult,\n EvaluationRequestV2,\n} from './types';\nimport { performEvaluation } from './evaluators/exact/exact';\nimport { EvaluationApproach } from './constants';\nimport { performRouge1Evaluation } from './evaluators/rouge1-evaluator';\nimport { performSemanticEvaluation } from './evaluators/semantic/index';\nimport { performRougeLEvaluation } from './evaluators/rougeL-evaluator';\nimport { performBleuEvaluation } from './evaluators/bleu/bleu-evaluator';\n\nexport class LLMEvaluationEngine {\n async evaluateResponse(\n request: EvaluationRequestV2,\n callback: EvaluationCallback,\n ): Promise<void> {\n const settledResults = await Promise.allSettled(\n request.fields.map(async field => {\n const fieldRequest: EvaluationRequest = {\n testCaseId: request.testCaseId,\n question: request.question,\n actualResponse: request.actualResponse,\n expectedOutcome: field.expectedValue,\n evaluationParameters: field.evaluationParameters,\n };\n const result = await this.evaluateField(fieldRequest);\n\n const fieldResult: FieldEvaluationResult = {\n index: field.index,\n label: field.label,\n type: field.type,\n expectedValue: field.expectedValue,\n passed: result.passed,\n keywordMatches: result.keywordMatches,\n evaluationParameters: result.evaluationParameters!,\n evaluationApproachResult: result.evaluationApproachResult,\n };\n return fieldResult;\n }),\n );\n\n const fieldResults: FieldEvaluationResult[] = settledResults.map(\n (settledResult, index) => {\n const field = request.fields[index];\n if (settledResult.status === 'fulfilled') {\n return settledResult.value;\n }\n\n return {\n index: field.index,\n label: field.label,\n type: field.type,\n expectedValue: field.expectedValue,\n passed: false,\n keywordMatches: [],\n evaluationParameters: field.evaluationParameters,\n evaluationApproachResult: {\n score: 0,\n approachUsed: field.evaluationParameters.approach,\n },\n error: this.getSafeErrorMessage(settledResult.reason),\n };\n },\n );\n\n const keywordMatches = fieldResults.flatMap(field => field.keywordMatches);\n const passed = fieldResults.every(field => field.passed && !field.error);\n\n callback({\n testCaseId: request.testCaseId,\n passed,\n keywordMatches,\n fieldResults,\n timestamp: new Date().toISOString(),\n });\n }\n\n private async evaluateField(request: EvaluationRequest): Promise<EvaluationResult> {\n const approach: EvaluationApproach = request.evaluationParameters.approach;\n switch (approach) {\n case EvaluationApproach.BLEU:\n return performBleuEvaluation(request);\n case EvaluationApproach.EXACT:\n return performEvaluation(request);\n case EvaluationApproach.ROUGE_1:\n return performRouge1Evaluation(request);\n case EvaluationApproach.ROUGE_L:\n return performRougeLEvaluation(request);\n case EvaluationApproach.SEMANTIC:\n return performSemanticEvaluation(request);\n default:\n console.warn(\n `Unknown matching approach: ${request.evaluationParameters.approach}, falling back to exact matching`,\n );\n return performEvaluation(request);\n }\n }\n\n private getSafeErrorMessage(error: unknown): string {\n return error instanceof Error ? error.message : 'Field evaluation failed.';\n }\n}\n"]}

package/dist/collection/lib/evaluation/evaluation-service.js CHANGED Viewed

@@ -1,5 +1,5 @@
 import { LLMEvaluationEngine } from "./evaluation-engine";
-import { serializeExpectedOutcome } from "../expected-outcome-serializer";
+import { normalizeEvaluationParametersForField } from "./field-evaluation-approach";
 /**
  * Service for evaluating test case responses
  */
@@ -18,12 +18,18 @@ export class EvaluationService {
             console.warn('⚠️ No output to evaluate for test case:', testCase.id);
             return;
         }
+        const fields = (testCase.expectedOutcome || []).map((field, index) => ({
+            index,
+            label: field.label,
+            type: field.type,
+            expectedValue: getFieldExpectedValue(field),
+            evaluationParameters: normalizeEvaluationParametersForField(field.type, field.evaluationParameters),
+        }));
         const evaluationRequest = {
             testCaseId: testCase.id,
             question: testCase.question,
-            expectedOutcome: serializeExpectedOutcome(testCase.expectedOutcome),
             actualResponse: testCase.output,
-            evaluationParameters: testCase.evaluationParameters,
+            fields,
         };
         await this.engine.evaluateResponse(evaluationRequest, (result) => {
             console.log('📊 Evaluation result received:', result);
@@ -31,4 +37,10 @@ export class EvaluationService {
         });
     }
 }
+function getFieldExpectedValue(field) {
+    if (field.type === 'chips-input') {
+        return field.value.join(', ');
+    }
+    return field.value;
+}
 //# sourceMappingURL=evaluation-service.js.map

package/dist/collection/lib/evaluation/evaluation-service.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"evaluation-service.js","sourceRoot":"","sources":["../../../src/lib/evaluation/evaluation-service.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;~~AAG1D~~,OAAO,EAAE,~~wBAAwB~~,EAAE,MAAM,~~gCAAgC~~,CAAC;~~AAE1E~~;;GAEG;AACH,MAAM,OAAO,iBAAiB;IACpB,MAAM,CAAsB;IAEpC;QACE,IAAI,CAAC,MAAM,GAAG,IAAI,mBAAmB,EAAE,CAAC;IAC1C,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,gBAAgB,CACpB,QAAkB,EAClB,QAA4C;QAE5C,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC;YACrB,OAAO,CAAC,IAAI,CAAC,yCAAyC,EAAE,QAAQ,CAAC,EAAE,CAAC,CAAC;YACrE,OAAO;QACT,CAAC;QAED,MAAM,~~iBAAiB~~,~~GAAsB~~;~~YAC3C~~,~~UAAU~~,EAAE,~~QAAQ~~,CAAC,EAAE;~~YACvB~~,~~QAAQ~~,EAAE,~~QAAQ~~,CAAC,~~QAAQ~~;~~YAC3B~~,~~eAAe~~,EAAE,~~wBAAwB~~,CAAC,~~QAAQ~~,CAAC,~~eAAe~~,CAAC;~~YACnE~~,~~cAAc~~,EAAE,QAAQ,CAAC,~~MAAM~~;~~YAC/B~~,~~oBAAoB~~,EAAE,QAAQ,CAAC,~~oBAAoB~~;~~SACpD~~,CAAC;QAEF,MAAM,IAAI,CAAC,MAAM,CAAC,gBAAgB,CAChC,iBAAiB,EACjB,CAAC,MAAwB,EAAE,EAAE;YAC3B,OAAO,CAAC,GAAG,CAAC,gCAAgC,EAAE,MAAM,CAAC,CAAC;YACtD,QAAQ,CAAC,MAAM,CAAC,CAAC;QACnB,CAAC,CACF,CAAC;IACJ,CAAC;CACF","sourcesContent":["import { LLMEvaluationEngine } from './evaluation-engine';\nimport { ~~EvaluationRequest,~~ EvaluationResult } from './types';\nimport { TestCase } from '../../types/llm-test-runner';\nimport { ~~serializeExpectedOutcome~~ } from '~~../expected~~-~~outcome~~-~~serializer~~';\n\n/*\n Service for evaluating test case responses\n /\nexport class EvaluationService {\n private engine: LLMEvaluationEngine;\n\n constructor() {\n this.engine = new LLMEvaluationEngine();\n }\n\n /\n Evaluates a test case response\n * @param testCase - The test case to evaluate\n * @param onResult - Callback to handle the evaluation result\n */\n async evaluateTestCase(\n testCase: TestCase,\n onResult: (result: EvaluationResult) => void,\n ): Promise<void> {\n if (!testCase.output) {\n console.warn('⚠️ No output to evaluate for test case:', testCase.id);\n return;\n }\n\n const ~~evaluationRequest~~: ~~EvaluationRequest~~ = {\n ~~testCaseId~~: ~~testCase~~.id,\n ~~question~~: ~~testCase~~.~~question~~,\n ~~expectedOutcome~~: ~~serializeExpectedOutcome~~(~~testCase~~.~~expectedOutcome~~),\n ~~actualResponse~~: testCase.~~output~~,\n ~~evaluationParameters~~: testCase.~~evaluationParameters~~,\n };\n\n await this.engine.evaluateResponse(\n evaluationRequest,\n (result: EvaluationResult) => {\n console.log('📊 Evaluation result received:', result);\n onResult(result);\n },\n );\n }\n}\n"]}
1	+ {"version":3,"file":"evaluation-service.js","sourceRoot":"","sources":["../../../src/lib/evaluation/evaluation-service.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AAO1D,OAAO,EAAE,qCAAqC,EAAE,MAAM,6BAA6B,CAAC;AAEpF;;GAEG;AACH,MAAM,OAAO,iBAAiB;IACpB,MAAM,CAAsB;IAEpC;QACE,IAAI,CAAC,MAAM,GAAG,IAAI,mBAAmB,EAAE,CAAC;IAC1C,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,gBAAgB,CACpB,QAAkB,EAClB,QAA4C;QAE5C,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC;YACrB,OAAO,CAAC,IAAI,CAAC,yCAAyC,EAAE,QAAQ,CAAC,EAAE,CAAC,CAAC;YACrE,OAAO;QACT,CAAC;QAED,MAAM,MAAM,GAA2B,CAAC,QAAQ,CAAC,eAAe,IAAI,EAAE,CAAC,CAAC,GAAG,CACzE,CAAC,KAAK,EAAE,KAAK,EAAE,EAAE,CAAC,CAAC;YACjB,KAAK;YACL,KAAK,EAAE,KAAK,CAAC,KAAK;YAClB,IAAI,EAAE,KAAK,CAAC,IAAI;YAChB,aAAa,EAAE,qBAAqB,CAAC,KAAK,CAAC;YAC3C,oBAAoB,EAAE,qCAAqC,CACzD,KAAK,CAAC,IAAI,EACV,KAAK,CAAC,oBAAoB,CAC3B;SACF,CAAC,CACH,CAAC;QAEF,MAAM,iBAAiB,GAAwB;YAC7C,UAAU,EAAE,QAAQ,CAAC,EAAE;YACvB,QAAQ,EAAE,QAAQ,CAAC,QAAQ;YAC3B,cAAc,EAAE,QAAQ,CAAC,MAAM;YAC/B,MAAM;SACP,CAAC;QAEF,MAAM,IAAI,CAAC,MAAM,CAAC,gBAAgB,CAChC,iBAAiB,EACjB,CAAC,MAAwB,EAAE,EAAE;YAC3B,OAAO,CAAC,GAAG,CAAC,gCAAgC,EAAE,MAAM,CAAC,CAAC;YACtD,QAAQ,CAAC,MAAM,CAAC,CAAC;QACnB,CAAC,CACF,CAAC;IACJ,CAAC;CACF;AAED,SAAS,qBAAqB,CAAC,KAA2B;IACxD,IAAI,KAAK,CAAC,IAAI,KAAK,aAAa,EAAE,CAAC;QACjC,OAAO,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAChC,CAAC;IACD,OAAO,KAAK,CAAC,KAAK,CAAC;AACrB,CAAC","sourcesContent":["import { LLMEvaluationEngine } from './evaluation-engine';\nimport {\n EvaluationResult,\n FieldEvaluationInput,\n EvaluationRequestV2,\n} from './types';\nimport { TestCase, ExpectedOutcomeField } from '../../types/llm-test-runner';\nimport { normalizeEvaluationParametersForField } from './field-evaluation-approach';\n\n/*\n Service for evaluating test case responses\n /\nexport class EvaluationService {\n private engine: LLMEvaluationEngine;\n\n constructor() {\n this.engine = new LLMEvaluationEngine();\n }\n\n /\n Evaluates a test case response\n * @param testCase - The test case to evaluate\n * @param onResult - Callback to handle the evaluation result\n */\n async evaluateTestCase(\n testCase: TestCase,\n onResult: (result: EvaluationResult) => void,\n ): Promise<void> {\n if (!testCase.output) {\n console.warn('⚠️ No output to evaluate for test case:', testCase.id);\n return;\n }\n\n const fields: FieldEvaluationInput[] = (testCase.expectedOutcome \|\| []).map(\n (field, index) => ({\n index,\n label: field.label,\n type: field.type,\n expectedValue: getFieldExpectedValue(field),\n evaluationParameters: normalizeEvaluationParametersForField(\n field.type,\n field.evaluationParameters,\n ),\n }),\n );\n\n const evaluationRequest: EvaluationRequestV2 = {\n testCaseId: testCase.id,\n question: testCase.question,\n actualResponse: testCase.output,\n fields,\n };\n\n await this.engine.evaluateResponse(\n evaluationRequest,\n (result: EvaluationResult) => {\n console.log('📊 Evaluation result received:', result);\n onResult(result);\n },\n );\n }\n}\n\nfunction getFieldExpectedValue(field: ExpectedOutcomeField): string {\n if (field.type === 'chips-input') {\n return field.value.join(', ');\n }\n return field.value;\n}\n"]}

package/dist/collection/lib/evaluation/{rouge1-evaluator.test.js → evaluators/rouge1-evaluator.test.js} RENAMED Viewed

@@ -1,8 +1,8 @@
 import { describe, it, expect } from "@jest/globals";
-import { DEFAULT_ROUGE_PASS_SCORE, EvaluationApproach } from "./constants";
+import { DEFAULT_ROUGE_PASS_SCORE, EvaluationApproach } from "../constants";
 // Using integration tests with actual js-rouge library (no mocks).
 // This approach tests the real ROUGE-1 scoring behavior rather than just orchestration logic.
-import { performRouge1Evaluation } from "./evaluators/rouge1-evaluator";
+import { performRouge1Evaluation } from "./rouge1-evaluator";
 const mockRequest = {
     testCaseId: 'test-000',
     question: 'What is your name?',

package/dist/collection/lib/evaluation/evaluators/rouge1-evaluator.test.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"rouge1-evaluator.test.js","sourceRoot":"","sources":["../../../../src/lib/evaluation/evaluators/rouge1-evaluator.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,eAAe,CAAC;AAErD,OAAO,EAAE,wBAAwB,EAAE,kBAAkB,EAAE,MAAM,cAAc,CAAC;AAC5E,mEAAmE;AACnE,8FAA8F;AAC9F,OAAO,EAAE,uBAAuB,EAAE,MAAM,oBAAoB,CAAC;AAE7D,MAAM,WAAW,GAAsB;IACrC,UAAU,EAAE,UAAU;IACtB,QAAQ,EAAE,oBAAoB;IAC9B,cAAc,EAAE,6BAA6B;IAC7C,eAAe,EAAE,iBAAiB;IAClC,oBAAoB,EAAE;QACpB,QAAQ,EAAE,kBAAkB,CAAC,OAAO;QACpC,SAAS,EAAE,GAAG;KACf;CACF,CAAC;AAEF,MAAM,sBAAsB,GAAsB;IAChD,GAAG,WAAW;IACd,oBAAoB,EAAE;QACpB,QAAQ,EAAE,kBAAkB,CAAC,OAAO;QACpC,SAAS,EAAE,SAAS;KACrB;CACF,CAAC;AAEF,QAAQ,CAAC,yBAAyB,EAAE,GAAG,EAAE;IACvC,QAAQ,CAAC,qBAAqB,EAAE,GAAG,EAAE;QACnC,EAAE,CAAC,0DAA0D,EAAE,KAAK,IAAI,EAAE;YACxE,MAAM,OAAO,GAAsB;gBACjC,GAAG,WAAW;gBACd,cAAc,EAAE,iCAAiC;gBACjD,eAAe,EAAE,iBAAiB;aACnC,CAAC;YAEF,MAAM,MAAM,GAAG,MAAM,uBAAuB,CAAC,OAAO,CAAC,CAAC;YAEtD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACjC,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAC7C,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAClD,MAAM,CACJ,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,wBAAwB,CAAC,KAAK,CACxD,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC;YACvB,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAClD,MAAM,CACJ,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,wBAAwB,CAAC,KAAK,CACxD,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC;QACzB,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,wDAAwD,EAAE,KAAK,IAAI,EAAE;YACtE,MAAM,OAAO,GAAsB;gBACjC,GAAG,WAAW;gBACd,cAAc,EAAE,oDAAoD;gBACpE,eAAe,EAAE,2CAA2C;aAC7D,CAAC;YAEF,MAAM,MAAM,GAAG,MAAM,uBAAuB,CAAC,OAAO,CAAC,CAAC;YAEtD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YAClC,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACnD,MAAM,CACJ,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,wBAAwB,CAAC,KAAK,CACxD,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC;YACpB,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACnD,MAAM,CACJ,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,wBAAwB,CAAC,KAAK,CACxD,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC;QACtB,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,8DAA8D,EAAE,KAAK,IAAI,EAAE;YAC5E,MAAM,OAAO,GAAsB;gBACjC,GAAG,WAAW;gBACd,cAAc,EAAE,iCAAiC;gBACjD,eAAe,EAAE,oCAAoC;aACtD,CAAC;YAEF,MAAM,MAAM,GAAG,MAAM,uBAAuB,CAAC,OAAO,CAAC,CAAC;YAEtD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YAClC,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAClD,MAAM,CACJ,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,wBAAwB,CAAC,KAAK,CACxD,CAAC,sBAAsB,CAAC,GAAG,CAAC,CAAC;YAC9B,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACnD,MAAM,CACJ,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,wBAAwB,CAAC,KAAK,CACxD,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC;QACtB,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,oBAAoB,EAAE,GAAG,EAAE;QAClC,EAAE,CAAC,gDAAgD,EAAE,KAAK,IAAI,EAAE;YAC9D,MAAM,MAAM,GAAG,MAAM,uBAAuB,CAAC,sBAAsB,CAAC,CAAC;YAErE,MAAM,CAAC,MAAM,CAAC,oBAAoB,CAAC,SAAS,CAAC,CAAC,IAAI,CAChD,wBAAwB,CACzB,CAAC;QACJ,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,6CAA6C,EAAE,KAAK,IAAI,EAAE;YAC3D,MAAM,OAAO,GAAsB;gBACjC,GAAG,WAAW;gBACd,cAAc,EAAE,yCAAyC;gBACzD,eAAe,EAAE,8BAA8B;gBAC/C,oBAAoB,EAAE;oBACpB,QAAQ,EAAE,kBAAkB,CAAC,OAAO;oBACpC,SAAS,EAAE,GAAG;iBACf;aACF,CAAC;YAEF,MAAM,MAAM,GAAG,MAAM,uBAAuB,CAAC,OAAO,CAAC,CAAC;YAEtD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACjC,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAC7D,MAAM,CAAC,MAAM,CAAC,oBAAoB,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC1D,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,4DAA4D,EAAE,KAAK,IAAI,EAAE;YAC1E,MAAM,OAAO,GAAsB;gBACjC,GAAG,WAAW;gBACd,cAAc,EAAE,iCAAiC;gBACjD,eAAe,EAAE,kBAAkB;gBACnC,oBAAoB,EAAE;oBACpB,QAAQ,EAAE,kBAAkB,CAAC,OAAO;oBACpC,SAAS,EAAE,GAAG;iBACf;aACF,CAAC;YAEF,MAAM,MAAM,GAAG,MAAM,uBAAuB,CAAC,OAAO,CAAC,CAAC;YAEtD,MAAM,CAAC,MAAM,CAAC,oBAAoB,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACxD,MAAM,CACJ,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,wBAAwB,CAAC,KAAK,CACxD,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC;QACtB,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,YAAY,EAAE,GAAG,EAAE;QAC1B,EAAE,CAAC,oCAAoC,EAAE,KAAK,IAAI,EAAE;YAClD,MAAM,OAAO,GAAG,EAAE,GAAG,WAAW,EAAE,cAAc,EAAE,EAAE,EAAE,CAAC;YAEvD,MAAM,MAAM,GAAG,MAAM,uBAAuB,CAAC,OAAO,CAAC,CAAC;YAEtD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YAClC,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,wBAAwB,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YACxE,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,wBAAwB,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC1E,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,4CAA4C,EAAE,KAAK,IAAI,EAAE;YAC1D,MAAM,OAAO,GAAG,EAAE,GAAG,WAAW,EAAE,eAAe,EAAE,EAAE,EAAE,CAAC;YAExD,MAAM,MAAM,GAAG,MAAM,uBAAuB,CAAC,OAAO,CAAC,CAAC;YAEtD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACjC,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC/C,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC","sourcesContent":["import { describe, it, expect } from '@jest/globals';\nimport { EvaluationRequest } from '../types';\nimport { DEFAULT_ROUGE_PASS_SCORE, EvaluationApproach } from '../constants';\n// Using integration tests with actual js-rouge library (no mocks).\n// This approach tests the real ROUGE-1 scoring behavior rather than just orchestration logic.\nimport { performRouge1Evaluation } from './rouge1-evaluator';\n\nconst mockRequest: EvaluationRequest = {\n testCaseId: 'test-000',\n question: 'What is your name?',\n actualResponse: 'I am a large language model',\n expectedOutcome: 'model\\nlanguage',\n evaluationParameters: {\n approach: EvaluationApproach.ROUGE_1,\n threshold: 0.5,\n },\n};\n\nconst mockRequestNoThreshold: EvaluationRequest = {\n ...mockRequest,\n evaluationParameters: {\n approach: EvaluationApproach.ROUGE_1,\n threshold: undefined,\n },\n};\n\ndescribe('performRouge1Evaluation', () => {\n describe('Basic functionality', () => {\n it('should pass when response contains exact keyword matches', async () => {\n const request: EvaluationRequest = {\n ...mockRequest,\n actualResponse: 'This is a language model system',\n expectedOutcome: 'language\\nmodel',\n };\n\n const result = await performRouge1Evaluation(request);\n\n expect(result.passed).toBe(true);\n expect(result.keywordMatches.length).toBe(2);\n expect(result.keywordMatches[0].found).toBe(true);\n expect(\n result.keywordMatches[0].evaluationApproachResult.score,\n ).toBeGreaterThan(0.5);\n expect(result.keywordMatches[1].found).toBe(true);\n expect(\n result.keywordMatches[1].evaluationApproachResult.score,\n ).toBeGreaterThan(0.5);\n });\n\n it('should fail when keywords are not sufficiently present', async () => {\n const request: EvaluationRequest = {\n ...mockRequest,\n actualResponse: 'This is completely unrelated content about cooking',\n expectedOutcome: 'machine learning\\nartificial intelligence',\n };\n\n const result = await performRouge1Evaluation(request);\n\n expect(result.passed).toBe(false);\n expect(result.keywordMatches[0].found).toBe(false);\n expect(\n result.keywordMatches[0].evaluationApproachResult.score,\n ).toBeLessThan(0.5);\n expect(result.keywordMatches[1].found).toBe(false);\n expect(\n result.keywordMatches[1].evaluationApproachResult.score,\n ).toBeLessThan(0.5);\n });\n\n it('should partially pass when only some keywords meet threshold', async () => {\n const request: EvaluationRequest = {\n ...mockRequest,\n actualResponse: 'Machine learning is fascinating',\n expectedOutcome: 'machine learning\\ndatabase systems',\n };\n\n const result = await performRouge1Evaluation(request);\n\n expect(result.passed).toBe(false);\n expect(result.keywordMatches[0].found).toBe(true);\n expect(\n result.keywordMatches[0].evaluationApproachResult.score,\n ).toBeGreaterThanOrEqual(0.5);\n expect(result.keywordMatches[1].found).toBe(false);\n expect(\n result.keywordMatches[1].evaluationApproachResult.score,\n ).toBeLessThan(0.5);\n });\n });\n\n describe('Threshold handling', () => {\n it('should use default threshold when not provided', async () => {\n const result = await performRouge1Evaluation(mockRequestNoThreshold);\n\n expect(result.evaluationParameters.threshold).toBe(\n DEFAULT_ROUGE_PASS_SCORE,\n );\n });\n\n it('should pass all keywords with threshold 0.0', async () => {\n const request: EvaluationRequest = {\n ...mockRequest,\n actualResponse: 'completely unrelated text about cooking',\n expectedOutcome: 'quantum physics\\nmathematics',\n evaluationParameters: {\n approach: EvaluationApproach.ROUGE_1,\n threshold: 0.0,\n },\n };\n\n const result = await performRouge1Evaluation(request);\n\n expect(result.passed).toBe(true);\n expect(result.keywordMatches.every(m => m.found)).toBe(true);\n expect(result.evaluationParameters.threshold).toBe(0.0);\n });\n\n it('should fail when threshold is 1.0 and match is not perfect', async () => {\n const request: EvaluationRequest = {\n ...mockRequest,\n actualResponse: 'This is about learning concepts',\n expectedOutcome: 'machine learning',\n evaluationParameters: {\n approach: EvaluationApproach.ROUGE_1,\n threshold: 1.0,\n },\n };\n\n const result = await performRouge1Evaluation(request);\n\n expect(result.evaluationParameters.threshold).toBe(1.0);\n expect(\n result.keywordMatches[0].evaluationApproachResult.score,\n ).toBeLessThan(1.0);\n });\n });\n\n describe('Edge cases', () => {\n it('should handle empty actualResponse', async () => {\n const request = { ...mockRequest, actualResponse: '' };\n\n const result = await performRouge1Evaluation(request);\n\n expect(result.passed).toBe(false);\n expect(result.keywordMatches[0].evaluationApproachResult.score).toBe(0);\n expect(result.keywordMatches[1].evaluationApproachResult.score).toBe(0);\n });\n\n it('should handle empty expectedOutcome string', async () => {\n const request = { ...mockRequest, expectedOutcome: '' };\n\n const result = await performRouge1Evaluation(request);\n\n expect(result.passed).toBe(true);\n expect(result.keywordMatches.length).toBe(0);\n });\n });\n});\n"]}

package/dist/collection/lib/evaluation/field-evaluation-approach.js ADDED Viewed

@@ -0,0 +1,24 @@
+import { EvaluationApproach, EvaluationApproachValues } from "./constants";
+const SELECT_ONLY_APPROACHES = [EvaluationApproach.EXACT];
+export function getAllowedApproachesForFieldType(fieldType) {
+    if (fieldType === 'select') {
+        return SELECT_ONLY_APPROACHES;
+    }
+    return EvaluationApproachValues;
+}
+export function isApproachAllowedForFieldType(fieldType, approach) {
+    return getAllowedApproachesForFieldType(fieldType).includes(approach);
+}
+export function normalizeEvaluationParametersForField(fieldType, evaluationParameters) {
+    const allowedApproaches = getAllowedApproachesForFieldType(fieldType);
+    const fallbackApproach = allowedApproaches[0];
+    const rawApproach = evaluationParameters?.approach;
+    const approach = rawApproach && allowedApproaches.includes(rawApproach)
+        ? rawApproach
+        : fallbackApproach;
+    return {
+        ...evaluationParameters,
+        approach,
+    };
+}
+//# sourceMappingURL=field-evaluation-approach.js.map

package/dist/collection/lib/evaluation/field-evaluation-approach.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"field-evaluation-approach.js","sourceRoot":"","sources":["../../../src/lib/evaluation/field-evaluation-approach.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,kBAAkB,EAAE,wBAAwB,EAAE,MAAM,aAAa,CAAC;AAK3E,MAAM,sBAAsB,GAAyB,CAAC,kBAAkB,CAAC,KAAK,CAAC,CAAC;AAEhF,MAAM,UAAU,gCAAgC,CAC9C,SAA8B;IAE9B,IAAI,SAAS,KAAK,QAAQ,EAAE,CAAC;QAC3B,OAAO,sBAAsB,CAAC;IAChC,CAAC;IACD,OAAO,wBAAwB,CAAC;AAClC,CAAC;AAED,MAAM,UAAU,6BAA6B,CAC3C,SAA8B,EAC9B,QAA4B;IAE5B,OAAO,gCAAgC,CAAC,SAAS,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;AACxE,CAAC;AAED,MAAM,UAAU,qCAAqC,CACnD,SAA8B,EAC9B,oBAA2C;IAE3C,MAAM,iBAAiB,GAAG,gCAAgC,CAAC,SAAS,CAAC,CAAC;IACtE,MAAM,gBAAgB,GAAG,iBAAiB,CAAC,CAAC,CAAC,CAAC;IAC9C,MAAM,WAAW,GAAG,oBAAoB,EAAE,QAAQ,CAAC;IACnD,MAAM,QAAQ,GACZ,WAAW,IAAI,iBAAiB,CAAC,QAAQ,CAAC,WAAW,CAAC;QACpD,CAAC,CAAC,WAAW;QACb,CAAC,CAAC,gBAAgB,CAAC;IAEvB,OAAO;QACL,GAAG,oBAAoB;QACvB,QAAQ;KACT,CAAC;AACJ,CAAC","sourcesContent":["import { EvaluationApproach, EvaluationApproachValues } from './constants';\nimport type { EvaluationParameters } from '../../types/evaluation';\n\nexport type EvaluationFieldType = 'text' | 'textarea' | 'chips-input' | 'select';\n\nconst SELECT_ONLY_APPROACHES: EvaluationApproach[] = [EvaluationApproach.EXACT];\n\nexport function getAllowedApproachesForFieldType(\n fieldType: EvaluationFieldType,\n): EvaluationApproach[] {\n if (fieldType === 'select') {\n return SELECT_ONLY_APPROACHES;\n }\n return EvaluationApproachValues;\n}\n\nexport function isApproachAllowedForFieldType(\n fieldType: EvaluationFieldType,\n approach: EvaluationApproach,\n): boolean {\n return getAllowedApproachesForFieldType(fieldType).includes(approach);\n}\n\nexport function normalizeEvaluationParametersForField(\n fieldType: EvaluationFieldType,\n evaluationParameters?: EvaluationParameters,\n): EvaluationParameters {\n const allowedApproaches = getAllowedApproachesForFieldType(fieldType);\n const fallbackApproach = allowedApproaches[0];\n const rawApproach = evaluationParameters?.approach;\n const approach =\n rawApproach && allowedApproaches.includes(rawApproach)\n ? rawApproach\n : fallbackApproach;\n\n return {\n ...evaluationParameters,\n approach,\n };\n}\n\n"]}

package/dist/collection/lib/evaluation/index.js CHANGED Viewed

@@ -1,7 +1,3 @@
 import { LLMEvaluationEngine } from "./evaluation-engine";
 export { LLMEvaluationEngine };
-export async function evaluateLLMResponse(request, callback) {
-    const engine = new LLMEvaluationEngine();
-    await engine.evaluateResponse(request, callback);
-}
 //# sourceMappingURL=index.js.map

package/dist/collection/lib/evaluation/index.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/lib/evaluation/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AAQ1D,OAAO,EAAE,mBAAmB,EAAE,CAAC~~;AAQ/B,MAAM,CAAC,KAAK,UAAU,mBAAmB,CACvC,OAA0B,EAC1B,QAA4B;IAE5B,MAAM,MAAM,GAAG,IAAI,mBAAmB,EAAE,CAAC;IACzC,MAAM,MAAM,CAAC,gBAAgB,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC;AACnD,CAAC~~","sourcesContent":["import { LLMEvaluationEngine } from './evaluation-engine';\nimport type {\n EvaluationRequest,\n EvaluationResult,\n KeywordMatch,\n EvaluationCallback,\n} from './types';\n\nexport { LLMEvaluationEngine };\nexport type {\n EvaluationRequest,\n EvaluationResult,\n KeywordMatch,\n EvaluationCallback,\n};\n\nexport async function evaluateLLMResponse(\n request: EvaluationRequest,\n callback: EvaluationCallback,\n): Promise<void> {\n const engine = new LLMEvaluationEngine();\n await engine.evaluateResponse(request, callback);\n}\n"]}
1	+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/lib/evaluation/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AAQ1D,OAAO,EAAE,mBAAmB,EAAE,CAAC","sourcesContent":["import { LLMEvaluationEngine } from './evaluation-engine';\nimport type {\n EvaluationRequest,\n EvaluationResult,\n KeywordMatch,\n EvaluationCallback,\n} from './types';\n\nexport { LLMEvaluationEngine };\nexport type {\n EvaluationRequest,\n EvaluationResult,\n KeywordMatch,\n EvaluationCallback,\n};"]}

package/dist/collection/lib/evaluation/types.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"types.js","sourceRoot":"","sources":["../../../src/lib/evaluation/types.ts"],"names":[],"mappings":"","sourcesContent":["import {\n EvaluationParameters,\n EvaluationApproachResult,\n} from '../../types/evaluation';\n\nexport interface EvaluationRequest {\n testCaseId: string;\n question: string;\n expectedOutcome: string;\n actualResponse: string;\n evaluationParameters: EvaluationParameters;\n}\n\nexport interface EvaluationResult {\n testCaseId: string;\n passed: boolean;\n keywordMatches: KeywordMatch[];\n timestamp?: string;\n evaluationParameters: EvaluationParameters;\n evaluationApproachResult: EvaluationApproachResult;\n}\n\nexport interface KeywordMatch {\n keyword: string;\n found: boolean;\n evaluationApproachResult: EvaluationApproachResult;\n}\n\nexport type EvaluationCallback = (result: EvaluationResult) => void;\n\nexport interface RougeKeywordDetails {\n rouge1: number;\n rougeL: number;\n scoreUsed: string;\n approach: string;\n}\n\nexport interface Rouge1OverallDetails {\n keywordsPassed: number;\n totalKeywords: number;\n passRate: string;\n thresholdUsed: number;\n approach: string;\n}\n"]}
1	+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../../../src/lib/evaluation/types.ts"],"names":[],"mappings":"","sourcesContent":["import {\n EvaluationParameters,\n EvaluationApproachResult,\n} from '../../types/evaluation';\nimport type { ExpectedOutcomeFieldType } from '../../types/llm-test-runner';\n\nexport interface EvaluationRequest {\n testCaseId: string;\n question: string;\n expectedOutcome: string;\n actualResponse: string;\n evaluationParameters: EvaluationParameters;\n}\n\nexport interface FieldEvaluationInput {\n index: number;\n label: string;\n type: ExpectedOutcomeFieldType;\n expectedValue: string;\n evaluationParameters: EvaluationParameters;\n}\n\nexport interface EvaluationRequestV2 {\n testCaseId: string;\n question: string;\n actualResponse: string;\n fields: FieldEvaluationInput[];\n}\n\nexport interface EvaluationResult {\n testCaseId: string;\n passed: boolean;\n keywordMatches: KeywordMatch[];\n fieldResults?: FieldEvaluationResult[];\n timestamp?: string;\n evaluationParameters?: EvaluationParameters;\n evaluationApproachResult?: EvaluationApproachResult;\n}\n\nexport interface FieldEvaluationResult {\n index: number;\n label: string;\n type: ExpectedOutcomeFieldType;\n expectedValue: string;\n passed: boolean;\n keywordMatches: KeywordMatch[];\n evaluationParameters: EvaluationParameters;\n evaluationApproachResult: EvaluationApproachResult;\n error?: string;\n}\n\nexport interface KeywordMatch {\n keyword: string;\n found: boolean;\n evaluationApproachResult: EvaluationApproachResult;\n}\n\nexport type EvaluationCallback = (result: EvaluationResult) => void;\n\nexport interface RougeKeywordDetails {\n rouge1: number;\n rougeL: number;\n scoreUsed: string;\n approach: string;\n}\n\nexport interface Rouge1OverallDetails {\n keywordsPassed: number;\n totalKeywords: number;\n passRate: string;\n thresholdUsed: number;\n approach: string;\n}\n"]}