llm-testrunner-components 1.2.4 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -5
- package/dist/cjs/app-chips_5.cjs.entry.js +1 -1
- package/dist/cjs/app-chips_5.cjs.entry.js.map +1 -1
- package/dist/cjs/index.cjs.js +464 -66
- package/dist/cjs/index.cjs.js.map +1 -1
- package/dist/cjs/llm-testrunner.cjs.js +1 -1
- package/dist/cjs/loader.cjs.js +1 -1
- package/dist/collection/components/llm-test-runner/llm-test-runner.js +46 -13
- package/dist/collection/components/llm-test-runner/llm-test-runner.js.map +1 -1
- package/dist/collection/components/llm-test-runner/test-cases/chat-history.css +5 -5
- package/dist/collection/components/llm-test-runner/test-cases/expected-outcome-renderer.js +45 -5
- package/dist/collection/components/llm-test-runner/test-cases/expected-outcome-renderer.js.map +1 -1
- package/dist/collection/components/llm-test-runner/test-cases/llm-test-case-row.css +21 -0
- package/dist/collection/components/llm-test-runner/test-cases/llm-test-case-row.js +2 -2
- package/dist/collection/components/llm-test-runner/test-cases/llm-test-case-row.js.map +1 -1
- package/dist/collection/components/llm-test-runner/test-cases/llm-test-cases.js +2 -2
- package/dist/collection/components/llm-test-runner/test-cases/llm-test-cases.js.map +1 -1
- package/dist/collection/components/llm-test-runner/test-cases/output/response-output.js +1 -1
- package/dist/collection/components/llm-test-runner/test-cases/output/response-output.js.map +1 -1
- package/dist/collection/lib/evaluation/actual-value-resolver.js +52 -0
- package/dist/collection/lib/evaluation/actual-value-resolver.js.map +1 -0
- package/dist/collection/lib/evaluation/evaluation-engine.js +1 -1
- package/dist/collection/lib/evaluation/evaluation-engine.js.map +1 -1
- package/dist/collection/lib/evaluation/evaluation-service.js +55 -17
- package/dist/collection/lib/evaluation/evaluation-service.js.map +1 -1
- package/dist/collection/lib/evaluation/types.js.map +1 -1
- package/dist/collection/lib/import-export/test-suite-importer.js +7 -1
- package/dist/collection/lib/import-export/test-suite-importer.js.map +1 -1
- package/dist/collection/lib/test-cases/test-case-factory.js +5 -0
- package/dist/collection/lib/test-cases/test-case-factory.js.map +1 -1
- package/dist/collection/lib/test-cases/test-case-mutations.js +58 -23
- package/dist/collection/lib/test-cases/test-case-mutations.js.map +1 -1
- package/dist/collection/schemas/expected-outcome.js +39 -0
- package/dist/collection/schemas/expected-outcome.js.map +1 -1
- package/dist/collection/schemas/model-response.js +7 -0
- package/dist/collection/schemas/model-response.js.map +1 -0
- package/dist/collection/schemas/test-case.js +2 -1
- package/dist/collection/schemas/test-case.js.map +1 -1
- package/dist/collection/types/expected-outcome.js.map +1 -1
- package/dist/collection/types/llm-test-runner.js.map +1 -1
- package/dist/components/chat-history.js +1 -1
- package/dist/components/index.js +1 -1
- package/dist/components/llm-test-runner.js +1 -1
- package/dist/components/p-kmtfMXcQ.js +2 -0
- package/dist/components/p-kmtfMXcQ.js.map +1 -0
- package/dist/components/{p-B87Lt3z4.js → p-wzA48RFK.js} +3 -3
- package/dist/components/p-wzA48RFK.js.map +1 -0
- package/dist/esm/app-chips_5.entry.js +1 -1
- package/dist/esm/app-chips_5.entry.js.map +1 -1
- package/dist/esm/index.js +464 -66
- package/dist/esm/index.js.map +1 -1
- package/dist/esm/llm-testrunner.js +1 -1
- package/dist/esm/loader.js +1 -1
- package/dist/llm-testrunner/index.esm.js +2 -2
- package/dist/llm-testrunner/index.esm.js.map +1 -1
- package/dist/llm-testrunner/llm-testrunner.esm.js +1 -1
- package/dist/llm-testrunner/{p-21202f12.entry.js → p-5bf1fc78.entry.js} +2 -2
- package/dist/llm-testrunner/{p-21202f12.entry.js.map → p-5bf1fc78.entry.js.map} +1 -1
- package/dist/types/components/llm-test-runner/llm-test-runner.d.ts +3 -4
- package/dist/types/components/llm-test-runner/test-cases/expected-outcome-renderer.d.ts +1 -0
- package/dist/types/components/llm-test-runner/test-cases/llm-test-case-row.d.ts +1 -0
- package/dist/types/components/llm-test-runner/test-cases/llm-test-cases.d.ts +1 -0
- package/dist/types/components/llm-test-runner/test-cases/output/response-output.d.ts +2 -1
- package/dist/types/components.d.ts +4 -2
- package/dist/types/lib/evaluation/actual-value-resolver.d.ts +9 -0
- package/dist/types/lib/evaluation/evaluation-service.d.ts +2 -2
- package/dist/types/lib/evaluation/types.d.ts +1 -1
- package/dist/types/lib/import-export/test-suite-importer.d.ts +1 -1
- package/dist/types/lib/test-cases/test-case-mutations.d.ts +10 -1
- package/dist/types/schemas/expected-outcome.d.ts +116 -0
- package/dist/types/schemas/model-response.d.ts +7 -0
- package/dist/types/schemas/test-case.d.ts +76 -1
- package/dist/types/types/expected-outcome.d.ts +1 -1
- package/dist/types/types/llm-test-runner.d.ts +4 -2
- package/package.json +1 -1
- package/dist/components/p-B87Lt3z4.js.map +0 -1
- package/dist/components/p-Bx2jqguC.js +0 -2
- package/dist/components/p-Bx2jqguC.js.map +0 -1
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { h } from "@stencil/core";
|
|
2
2
|
import { LLMTestCaseRow } from "./llm-test-case-row";
|
|
3
3
|
import { Button } from "../../../lib/ui/button/index";
|
|
4
|
-
export const LLMTestCases = ({ testCases, dynamicResolutionSupported = false, onRun, onDelete, onAddTestCase, handleTestCaseChange, onExpectedOutcomeChange, onChatHistoryChange, }) => {
|
|
5
|
-
return (h("div", { class: "test-cases" }, h("div", { class: "test-cases__column-headers" }, h("div", { class: "test-cases__column-header" }, "Input"), h("div", { class: "test-cases__column-header" }, "Output"), h("div", { class: "test-cases__column-header" }, "Evaluation"), h("div", { class: "test-cases__column-header" }, "Actions")), testCases.map(testCase => (h(LLMTestCaseRow, { testCase: testCase, dynamicResolutionSupported: dynamicResolutionSupported, onRun: onRun, onDelete: onDelete, handleTestCaseChange: handleTestCaseChange, onExpectedOutcomeChange: onExpectedOutcomeChange, onChatHistoryChange: onChatHistoryChange }))), h("div", { class: "test-cases__add-section" }, h(Button, { variant: "outline", size: "md", onClick: onAddTestCase }, "+ Add Question"))));
|
|
4
|
+
export const LLMTestCases = ({ testCases, dynamicResolutionSupported = false, extractorIds = [], onRun, onDelete, onAddTestCase, handleTestCaseChange, onExpectedOutcomeChange, onChatHistoryChange, }) => {
|
|
5
|
+
return (h("div", { class: "test-cases" }, h("div", { class: "test-cases__column-headers" }, h("div", { class: "test-cases__column-header" }, "Input"), h("div", { class: "test-cases__column-header" }, "Output"), h("div", { class: "test-cases__column-header" }, "Evaluation"), h("div", { class: "test-cases__column-header" }, "Actions")), testCases.map(testCase => (h(LLMTestCaseRow, { testCase: testCase, dynamicResolutionSupported: dynamicResolutionSupported, extractorIds: extractorIds, onRun: onRun, onDelete: onDelete, handleTestCaseChange: handleTestCaseChange, onExpectedOutcomeChange: onExpectedOutcomeChange, onChatHistoryChange: onChatHistoryChange }))), h("div", { class: "test-cases__add-section" }, h(Button, { variant: "outline", size: "md", onClick: onAddTestCase }, "+ Add Question"))));
|
|
6
6
|
};
|
|
7
7
|
//# sourceMappingURL=llm-test-cases.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"llm-test-cases.js","sourceRoot":"","sources":["../../../../src/components/llm-test-runner/test-cases/llm-test-cases.tsx"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAuB,MAAM,eAAe,CAAC;AAEvD,OAAO,EAAE,cAAc,EAA8B,MAAM,qBAAqB,CAAC;AACjF,OAAO,EAAE,MAAM,EAAE,MAAM,8BAA8B,CAAC;
|
|
1
|
+
{"version":3,"file":"llm-test-cases.js","sourceRoot":"","sources":["../../../../src/components/llm-test-runner/test-cases/llm-test-cases.tsx"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAuB,MAAM,eAAe,CAAC;AAEvD,OAAO,EAAE,cAAc,EAA8B,MAAM,qBAAqB,CAAC;AACjF,OAAO,EAAE,MAAM,EAAE,MAAM,8BAA8B,CAAC;AAmBtD,MAAM,CAAC,MAAM,YAAY,GAA2C,CAAC,EACnE,SAAS,EACT,0BAA0B,GAAG,KAAK,EAClC,YAAY,GAAG,EAAE,EACjB,KAAK,EACL,QAAQ,EACR,aAAa,EACb,oBAAoB,EACpB,uBAAuB,EACvB,mBAAmB,GACpB,EAAE,EAAE;IACH,OAAO,CACL,WAAK,KAAK,EAAC,YAAY;QACrB,WAAK,KAAK,EAAC,4BAA4B;YACrC,WAAK,KAAK,EAAC,2BAA2B,YAAY;YAClD,WAAK,KAAK,EAAC,2BAA2B,aAAa;YACnD,WAAK,KAAK,EAAC,2BAA2B,iBAAiB;YACvD,WAAK,KAAK,EAAC,2BAA2B,cAAc,CAChD;QAEL,SAAS,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC,CACzB,EAAC,cAAc,IACb,QAAQ,EAAE,QAAQ,EAClB,0BAA0B,EAAE,0BAA0B,EACtD,YAAY,EAAE,YAAY,EAC1B,KAAK,EAAE,KAAK,EACZ,QAAQ,EAAE,QAAQ,EAClB,oBAAoB,EAAE,oBAAoB,EAC1C,uBAAuB,EAAE,uBAAuB,EAChD,mBAAmB,EAAE,mBAAmB,GACxC,CACH,CAAC;QAEF,WAAK,KAAK,EAAC,yBAAyB;YAClC,EAAC,MAAM,IAAC,OAAO,EAAC,SAAS,EAAC,IAAI,EAAC,IAAI,EAAC,OAAO,EAAE,aAAa,qBAEjD,CACL,CACF,CACP,CAAC;AACJ,CAAC,CAAC","sourcesContent":["import { h, FunctionalComponent } from '@stencil/core';\nimport { TestCase } from '../../../types/llm-test-runner';\nimport { LLMTestCaseRow, ChatHistoryRowChangeDetail } from './llm-test-case-row';\nimport { Button } from '../../../lib/ui/button/index';\nimport { ExpectedOutcomeChangeDetail } from './expected-outcome-renderer';\n\nexport interface LLMTestCasesProps {\n testCases: TestCase[];\n dynamicResolutionSupported?: boolean;\n extractorIds?: string[];\n onRun: (testCase: TestCase) => void;\n onDelete: (id: string) => void;\n onAddTestCase: () => void;\n handleTestCaseChange: (\n e: CustomEvent<{ testCaseId: string; key: string; value: string }>,\n ) => void;\n onExpectedOutcomeChange: (\n e: CustomEvent<ExpectedOutcomeChangeDetail>,\n ) => void;\n onChatHistoryChange: (e: CustomEvent<ChatHistoryRowChangeDetail>) => void;\n}\n\nexport const LLMTestCases: FunctionalComponent<LLMTestCasesProps> = ({\n testCases,\n dynamicResolutionSupported = false,\n extractorIds = [],\n onRun,\n onDelete,\n onAddTestCase,\n handleTestCaseChange,\n onExpectedOutcomeChange,\n onChatHistoryChange,\n}) => {\n return (\n <div class=\"test-cases\">\n <div class=\"test-cases__column-headers\">\n <div class=\"test-cases__column-header\">Input</div>\n <div class=\"test-cases__column-header\">Output</div>\n <div class=\"test-cases__column-header\">Evaluation</div>\n <div class=\"test-cases__column-header\">Actions</div>\n </div>\n\n {testCases.map(testCase => (\n <LLMTestCaseRow\n testCase={testCase}\n dynamicResolutionSupported={dynamicResolutionSupported}\n extractorIds={extractorIds}\n onRun={onRun}\n onDelete={onDelete}\n handleTestCaseChange={handleTestCaseChange}\n onExpectedOutcomeChange={onExpectedOutcomeChange}\n onChatHistoryChange={onChatHistoryChange}\n />\n ))}\n\n <div class=\"test-cases__add-section\">\n <Button variant=\"outline\" size=\"md\" onClick={onAddTestCase}>\n + Add Question\n </Button>\n </div>\n </div>\n );\n};\n"]}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { h } from "@stencil/core";
|
|
2
2
|
export const ResponseOutput = ({ output, isRunning, }) => {
|
|
3
|
-
return (h("div", { class: "response-output" }, output ? (h("div", { class: "response-output__content" }, output)) : (h("div", { class: "response-output__placeholder" }, isRunning ? 'Running...' : ''))));
|
|
3
|
+
return (h("div", { class: "response-output" }, output?.text ? (h("div", { class: "response-output__content" }, output.text)) : (h("div", { class: "response-output__placeholder" }, isRunning ? 'Running...' : ''))));
|
|
4
4
|
};
|
|
5
5
|
//# sourceMappingURL=response-output.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"response-output.js","sourceRoot":"","sources":["../../../../../src/components/llm-test-runner/test-cases/output/response-output.tsx"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAuB,MAAM,eAAe,CAAC;
|
|
1
|
+
{"version":3,"file":"response-output.js","sourceRoot":"","sources":["../../../../../src/components/llm-test-runner/test-cases/output/response-output.tsx"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAuB,MAAM,eAAe,CAAC;AAQvD,MAAM,CAAC,MAAM,cAAc,GAA6C,CAAC,EACvE,MAAM,EACN,SAAS,GACV,EAAE,EAAE;IACH,OAAO,CACL,WAAK,KAAK,EAAC,iBAAiB,IACzB,MAAM,EAAE,IAAI,CAAC,CAAC,CAAC,CACd,WAAK,KAAK,EAAC,0BAA0B,IAAE,MAAM,CAAC,IAAI,CAAO,CAC1D,CAAC,CAAC,CAAC,CACF,WAAK,KAAK,EAAC,8BAA8B,IAAE,SAAS,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,EAAE,CAAO,CAChF,CACG,CACP,CAAC;AACJ,CAAC,CAAC","sourcesContent":["import { h, FunctionalComponent } from '@stencil/core';\nimport type { ModelResponsePayload } from '../../../../types/llm-test-runner';\n\nexport interface ResponseOutputProps {\n output?: ModelResponsePayload;\n isRunning: boolean;\n}\n\nexport const ResponseOutput: FunctionalComponent<ResponseOutputProps> = ({\n output,\n isRunning,\n}) => {\n return (\n <div class=\"response-output\">\n {output?.text ? (\n <div class=\"response-output__content\">{output.text}</div>\n ) : (\n <div class=\"response-output__placeholder\">{isRunning ? 'Running...' : ''}</div>\n )}\n </div>\n );\n};\n\n"]}
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
function toTextSource() {
|
|
2
|
+
return { type: 'text' };
|
|
3
|
+
}
|
|
4
|
+
export async function resolveActualValue(field, output, extractors) {
|
|
5
|
+
const source = field.evaluationSource || toTextSource();
|
|
6
|
+
if (source.type === 'text') {
|
|
7
|
+
const text = output?.text?.trim();
|
|
8
|
+
if (!text) {
|
|
9
|
+
return {
|
|
10
|
+
success: false,
|
|
11
|
+
error: 'Model response text is empty.',
|
|
12
|
+
};
|
|
13
|
+
}
|
|
14
|
+
return { success: true, value: text };
|
|
15
|
+
}
|
|
16
|
+
const extractor = extractors?.[source.extractorId];
|
|
17
|
+
if (!extractor) {
|
|
18
|
+
return {
|
|
19
|
+
success: false,
|
|
20
|
+
error: `Extractor "${source.extractorId}" is not registered.`,
|
|
21
|
+
};
|
|
22
|
+
}
|
|
23
|
+
try {
|
|
24
|
+
const extractedRaw = await extractor(output || {});
|
|
25
|
+
if (typeof extractedRaw !== 'string') {
|
|
26
|
+
return {
|
|
27
|
+
success: false,
|
|
28
|
+
error: `Extractor "${source.extractorId}" must return a string.`,
|
|
29
|
+
};
|
|
30
|
+
}
|
|
31
|
+
const extracted = extractedRaw.trim();
|
|
32
|
+
if (!extracted) {
|
|
33
|
+
return {
|
|
34
|
+
success: false,
|
|
35
|
+
error: `Extractor "${source.extractorId}" returned an empty value.`,
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
return {
|
|
39
|
+
success: true,
|
|
40
|
+
value: extracted,
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
catch (error) {
|
|
44
|
+
return {
|
|
45
|
+
success: false,
|
|
46
|
+
error: error instanceof Error
|
|
47
|
+
? error.message
|
|
48
|
+
: `Extractor "${source.extractorId}" failed.`,
|
|
49
|
+
};
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
//# sourceMappingURL=actual-value-resolver.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"actual-value-resolver.js","sourceRoot":"","sources":["../../../src/lib/evaluation/actual-value-resolver.ts"],"names":[],"mappings":"AAUA,SAAS,YAAY;IACnB,OAAO,EAAE,IAAI,EAAE,MAAM,EAAW,CAAC;AACnC,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,kBAAkB,CACtC,KAA2B,EAC3B,MAA6B,EAC7B,UAAuC;IAEvC,MAAM,MAAM,GAAG,KAAK,CAAC,gBAAgB,IAAI,YAAY,EAAE,CAAC;IAExD,IAAI,MAAM,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;QAC3B,MAAM,IAAI,GAAG,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;QAClC,IAAI,CAAC,IAAI,EAAE,CAAC;YACV,OAAO;gBACL,OAAO,EAAE,KAAK;gBACd,KAAK,EAAE,+BAA+B;aACvC,CAAC;QACJ,CAAC;QACD,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC;IACxC,CAAC;IAED,MAAM,SAAS,GAAG,UAAU,EAAE,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC;IACnD,IAAI,CAAC,SAAS,EAAE,CAAC;QACf,OAAO;YACL,OAAO,EAAE,KAAK;YACd,KAAK,EAAE,cAAc,MAAM,CAAC,WAAW,sBAAsB;SAC9D,CAAC;IACJ,CAAC;IAED,IAAI,CAAC;QACH,MAAM,YAAY,GAAG,MAAM,SAAS,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC;QACnD,IAAI,OAAO,YAAY,KAAK,QAAQ,EAAE,CAAC;YACrC,OAAO;gBACL,OAAO,EAAE,KAAK;gBACd,KAAK,EAAE,cAAc,MAAM,CAAC,WAAW,yBAAyB;aACjE,CAAC;QACJ,CAAC;QAED,MAAM,SAAS,GAAG,YAAY,CAAC,IAAI,EAAE,CAAC;QACtC,IAAI,CAAC,SAAS,EAAE,CAAC;YACf,OAAO;gBACL,OAAO,EAAE,KAAK;gBACd,KAAK,EAAE,cAAc,MAAM,CAAC,WAAW,4BAA4B;aACpE,CAAC;QACJ,CAAC;QAED,OAAO;YACL,OAAO,EAAE,IAAI;YACb,KAAK,EAAE,SAAS;SACjB,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO;YACL,OAAO,EAAE,KAAK;YACd,KAAK,EACH,KAAK,YAAY,KAAK;gBACpB,CAAC,CAAC,KAAK,CAAC,OAAO;gBACf,CAAC,CAAC,cAAc,MAAM,CAAC,WAAW,WAAW;SAClD,CAAC;IACJ,CAAC;AACH,CAAC","sourcesContent":["import type {\n EvaluationSourceExtractors,\n ExpectedOutcomeField,\n ModelResponsePayload,\n} from '../../types/llm-test-runner';\n\nexport type ResolvedActualValue =\n | { success: true; value: string }\n | { success: false; error: string };\n\nfunction toTextSource() {\n return { type: 'text' } as const;\n}\n\nexport async function resolveActualValue(\n field: ExpectedOutcomeField,\n output?: ModelResponsePayload,\n extractors?: EvaluationSourceExtractors,\n): Promise<ResolvedActualValue> {\n const source = field.evaluationSource || toTextSource();\n\n if (source.type === 'text') {\n const text = output?.text?.trim();\n if (!text) {\n return {\n success: false,\n error: 'Model response text is empty.',\n };\n }\n return { success: true, value: text };\n }\n\n const extractor = extractors?.[source.extractorId];\n if (!extractor) {\n return {\n success: false,\n error: `Extractor \"${source.extractorId}\" is not registered.`,\n };\n }\n\n try {\n const extractedRaw = await extractor(output || {});\n if (typeof extractedRaw !== 'string') {\n return {\n success: false,\n error: `Extractor \"${source.extractorId}\" must return a string.`,\n };\n }\n\n const extracted = extractedRaw.trim();\n if (!extracted) {\n return {\n success: false,\n error: `Extractor \"${source.extractorId}\" returned an empty value.`,\n };\n }\n\n return {\n success: true,\n value: extracted,\n };\n } catch (error) {\n return {\n success: false,\n error:\n error instanceof Error\n ? error.message\n : `Extractor \"${source.extractorId}\" failed.`,\n };\n }\n}\n"]}
|
|
@@ -10,7 +10,7 @@ export class LLMEvaluationEngine {
|
|
|
10
10
|
const fieldRequest = {
|
|
11
11
|
testCaseId: request.testCaseId,
|
|
12
12
|
question: request.question,
|
|
13
|
-
actualResponse:
|
|
13
|
+
actualResponse: field.actualResponse,
|
|
14
14
|
expectedOutcome: field.expectedValue,
|
|
15
15
|
evaluationParameters: field.evaluationParameters,
|
|
16
16
|
};
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"evaluation-engine.js","sourceRoot":"","sources":["../../../src/lib/evaluation/evaluation-engine.ts"],"names":[],"mappings":"AAOA,OAAO,EAAE,iBAAiB,EAAE,MAAM,0BAA0B,CAAC;AAC7D,OAAO,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAC;AACjD,OAAO,EAAE,uBAAuB,EAAE,MAAM,+BAA+B,CAAC;AACxE,OAAO,EAAE,yBAAyB,EAAE,MAAM,6BAA6B,CAAC;AACxE,OAAO,EAAE,uBAAuB,EAAE,MAAM,+BAA+B,CAAC;AACxE,OAAO,EAAE,qBAAqB,EAAE,MAAM,kCAAkC,CAAC;AAEzE,MAAM,OAAO,mBAAmB;IAC9B,KAAK,CAAC,gBAAgB,CACpB,OAA4B,EAC5B,QAA4B;QAE5B,MAAM,cAAc,GAAG,MAAM,OAAO,CAAC,UAAU,CAC7C,OAAO,CAAC,MAAM,CAAC,GAAG,CAAC,KAAK,EAAC,KAAK,EAAC,EAAE;YAC/B,MAAM,YAAY,GAAsB;gBACtC,UAAU,EAAE,OAAO,CAAC,UAAU;gBAC9B,QAAQ,EAAE,OAAO,CAAC,QAAQ;gBAC1B,cAAc,EAAE,
|
|
1
|
+
{"version":3,"file":"evaluation-engine.js","sourceRoot":"","sources":["../../../src/lib/evaluation/evaluation-engine.ts"],"names":[],"mappings":"AAOA,OAAO,EAAE,iBAAiB,EAAE,MAAM,0BAA0B,CAAC;AAC7D,OAAO,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAC;AACjD,OAAO,EAAE,uBAAuB,EAAE,MAAM,+BAA+B,CAAC;AACxE,OAAO,EAAE,yBAAyB,EAAE,MAAM,6BAA6B,CAAC;AACxE,OAAO,EAAE,uBAAuB,EAAE,MAAM,+BAA+B,CAAC;AACxE,OAAO,EAAE,qBAAqB,EAAE,MAAM,kCAAkC,CAAC;AAEzE,MAAM,OAAO,mBAAmB;IAC9B,KAAK,CAAC,gBAAgB,CACpB,OAA4B,EAC5B,QAA4B;QAE5B,MAAM,cAAc,GAAG,MAAM,OAAO,CAAC,UAAU,CAC7C,OAAO,CAAC,MAAM,CAAC,GAAG,CAAC,KAAK,EAAC,KAAK,EAAC,EAAE;YAC/B,MAAM,YAAY,GAAsB;gBACtC,UAAU,EAAE,OAAO,CAAC,UAAU;gBAC9B,QAAQ,EAAE,OAAO,CAAC,QAAQ;gBAC1B,cAAc,EAAE,KAAK,CAAC,cAAc;gBACpC,eAAe,EAAE,KAAK,CAAC,aAAa;gBACpC,oBAAoB,EAAE,KAAK,CAAC,oBAAoB;aACjD,CAAC;YACF,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,YAAY,CAAC,CAAC;YAEtD,MAAM,WAAW,GAA0B;gBACzC,KAAK,EAAE,KAAK,CAAC,KAAK;gBAClB,KAAK,EAAE,KAAK,CAAC,KAAK;gBAClB,IAAI,EAAE,KAAK,CAAC,IAAI;gBAChB,aAAa,EAAE,KAAK,CAAC,aAAa;gBAClC,MAAM,EAAE,MAAM,CAAC,MAAM;gBACrB,cAAc,EAAE,MAAM,CAAC,cAAc;gBACrC,oBAAoB,EAAE,MAAM,CAAC,oBAAqB;gBAClD,wBAAwB,EAAE,MAAM,CAAC,wBAAwB;aAC1D,CAAC;YACF,OAAO,WAAW,CAAC;QACrB,CAAC,CAAC,CACH,CAAC;QAEF,MAAM,YAAY,GAA4B,cAAc,CAAC,GAAG,CAC9D,CAAC,aAAa,EAAE,KAAK,EAAE,EAAE;YACvB,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YACpC,IAAI,aAAa,CAAC,MAAM,KAAK,WAAW,EAAE,CAAC;gBACzC,OAAO,aAAa,CAAC,KAAK,CAAC;YAC7B,CAAC;YAED,OAAO;gBACL,KAAK,EAAE,KAAK,CAAC,KAAK;gBAClB,KAAK,EAAE,KAAK,CAAC,KAAK;gBAClB,IAAI,EAAE,KAAK,CAAC,IAAI;gBAChB,aAAa,EAAE,KAAK,CAAC,aAAa;gBAClC,MAAM,EAAE,KAAK;gBACb,cAAc,EAAE,EAAE;gBAClB,oBAAoB,EAAE,KAAK,CAAC,oBAAoB;gBAChD,wBAAwB,EAAE;oBACxB,KAAK,EAAE,CAAC;oBACR,YAAY,EAAE,KAAK,CAAC,oBAAoB,CAAC,QAAQ;iBAClD;gBACD,KAAK,EAAE,IAAI,CAAC,mBAAmB,CAAC,aAAa,CAAC,MAAM,CAAC;aACtD,CAAC;QACJ,CAAC,CACF,CAAC;QAEF,MAAM,cAAc,GAAG,YAAY,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC,cAAc,CAAC,CAAC;QAC3E,MAAM,MAAM,GAAG,YAAY,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC,MAAM,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QAEzE,QAAQ,CAAC;YACP,UAAU,EAAE,OAAO,CAAC,UAAU;YAC9B,MAAM;YACN,cAAc;YACd,YAAY;YACZ,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;SACpC,CAAC,CAAC;IACL,CAAC;IAEO,KAAK,CAAC,aAAa,CAAC,OAA0B;QACpD,MAAM,QAAQ,GAAuB,OAAO,CAAC,oBAAoB,CAAC,QAAQ,CAAC;QAC3E,QAAQ,QAAQ,EAAE,CAAC;YACjB,KAAK,kBAAkB,CAAC,IAAI;gBAC1B,OAAO,qBAAqB,CAAC,OAAO,CAAC,CAAC;YACxC,KAAK,kBAAkB,CAAC,KAAK;gBAC3B,OAAO,iBAAiB,CAAC,OAAO,CAAC,CAAC;YACpC,KAAK,kBAAkB,CAAC,OAAO;gBAC7B,OAAO,uBAAuB,CAAC,OAAO,CAAC,CAAC;YAC1C,KAAK,kBAAkB,CAAC,OAAO;gBAC7B,OAAO,uBAAuB,CAAC,OAAO,CAAC,CAAC;YAC1C,KAAK,kBAAkB,CAAC,QAAQ;gBAC9B,OAAO,yBAAyB,CAAC,OAAO,CAAC,CAAC;YAC5C;gBACE,OAAO,CAAC,IAAI,CACV,8BAA8B,OAAO,CAAC,oBAAoB,CAAC,QAAQ,kCAAkC,CACtG,CAAC;gBACF,OAAO,iBAAiB,CAAC,OAAO,CAAC,CAAC;QACtC,CAAC;IACH,CAAC;IAEO,mBAAmB,CAAC,KAAc;QACxC,OAAO,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,0BAA0B,CAAC;IAC7E,CAAC;CACF","sourcesContent":["import {\n EvaluationRequest,\n EvaluationResult,\n EvaluationCallback,\n FieldEvaluationResult,\n EvaluationRequestV2,\n} from './types';\nimport { performEvaluation } from './evaluators/exact/exact';\nimport { EvaluationApproach } from './constants';\nimport { performRouge1Evaluation } from './evaluators/rouge1-evaluator';\nimport { performSemanticEvaluation } from './evaluators/semantic/index';\nimport { performRougeLEvaluation } from './evaluators/rougeL-evaluator';\nimport { performBleuEvaluation } from './evaluators/bleu/bleu-evaluator';\n\nexport class LLMEvaluationEngine {\n async evaluateResponse(\n request: EvaluationRequestV2,\n callback: EvaluationCallback,\n ): Promise<void> {\n const settledResults = await Promise.allSettled(\n request.fields.map(async field => {\n const fieldRequest: EvaluationRequest = {\n testCaseId: request.testCaseId,\n question: request.question,\n actualResponse: field.actualResponse,\n expectedOutcome: field.expectedValue,\n evaluationParameters: field.evaluationParameters,\n };\n const result = await this.evaluateField(fieldRequest);\n\n const fieldResult: FieldEvaluationResult = {\n index: field.index,\n label: field.label,\n type: field.type,\n expectedValue: field.expectedValue,\n passed: result.passed,\n keywordMatches: result.keywordMatches,\n evaluationParameters: result.evaluationParameters!,\n evaluationApproachResult: result.evaluationApproachResult,\n };\n return fieldResult;\n }),\n );\n\n const fieldResults: FieldEvaluationResult[] = settledResults.map(\n (settledResult, index) => {\n const field = request.fields[index];\n if (settledResult.status === 'fulfilled') {\n return settledResult.value;\n }\n\n return {\n index: field.index,\n label: field.label,\n type: field.type,\n expectedValue: field.expectedValue,\n passed: false,\n keywordMatches: [],\n evaluationParameters: field.evaluationParameters,\n evaluationApproachResult: {\n score: 0,\n approachUsed: field.evaluationParameters.approach,\n },\n error: this.getSafeErrorMessage(settledResult.reason),\n };\n },\n );\n\n const keywordMatches = fieldResults.flatMap(field => field.keywordMatches);\n const passed = fieldResults.every(field => field.passed && !field.error);\n\n callback({\n testCaseId: request.testCaseId,\n passed,\n keywordMatches,\n fieldResults,\n timestamp: new Date().toISOString(),\n });\n }\n\n private async evaluateField(request: EvaluationRequest): Promise<EvaluationResult> {\n const approach: EvaluationApproach = request.evaluationParameters.approach;\n switch (approach) {\n case EvaluationApproach.BLEU:\n return performBleuEvaluation(request);\n case EvaluationApproach.EXACT:\n return performEvaluation(request);\n case EvaluationApproach.ROUGE_1:\n return performRouge1Evaluation(request);\n case EvaluationApproach.ROUGE_L:\n return performRougeLEvaluation(request);\n case EvaluationApproach.SEMANTIC:\n return performSemanticEvaluation(request);\n default:\n console.warn(\n `Unknown matching approach: ${request.evaluationParameters.approach}, falling back to exact matching`,\n );\n return performEvaluation(request);\n }\n }\n\n private getSafeErrorMessage(error: unknown): string {\n return error instanceof Error ? error.message : 'Field evaluation failed.';\n }\n}\n"]}
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { LLMEvaluationEngine } from "./evaluation-engine";
|
|
2
2
|
import { normalizeEvaluationParametersForField } from "./field-evaluation-approach";
|
|
3
|
+
import { resolveActualValue } from "./actual-value-resolver";
|
|
3
4
|
/**
|
|
4
5
|
* Service for evaluating test case responses
|
|
5
6
|
*/
|
|
@@ -13,34 +14,71 @@ export class EvaluationService {
|
|
|
13
14
|
* @param testCase - The test case to evaluate
|
|
14
15
|
* @param onResult - Callback to handle the evaluation result
|
|
15
16
|
*/
|
|
16
|
-
async evaluateTestCase(testCase, onResult) {
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
}
|
|
21
|
-
const fields = (testCase.expectedOutcome || []).flatMap((field, index) => {
|
|
17
|
+
async evaluateTestCase(testCase, onResult, extractors) {
|
|
18
|
+
const fields = [];
|
|
19
|
+
const failedFields = [];
|
|
20
|
+
for (const [index, field] of (testCase.expectedOutcome || []).entries()) {
|
|
22
21
|
if (field.type === 'textarea' && field.outcomeMode === 'dynamic') {
|
|
23
|
-
|
|
22
|
+
continue;
|
|
24
23
|
}
|
|
25
|
-
|
|
26
|
-
|
|
24
|
+
const evaluationParameters = normalizeEvaluationParametersForField(field.type, field.evaluationParameters);
|
|
25
|
+
const expectedValue = getFieldExpectedValue(field);
|
|
26
|
+
const resolvedActualValue = await resolveActualValue(field, testCase.output, extractors);
|
|
27
|
+
if (resolvedActualValue.success) {
|
|
28
|
+
fields.push({
|
|
27
29
|
index,
|
|
28
30
|
label: field.label,
|
|
29
31
|
type: field.type,
|
|
30
|
-
expectedValue
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
32
|
+
expectedValue,
|
|
33
|
+
actualResponse: resolvedActualValue.value,
|
|
34
|
+
evaluationParameters,
|
|
35
|
+
});
|
|
36
|
+
}
|
|
37
|
+
else {
|
|
38
|
+
failedFields.push({
|
|
39
|
+
index,
|
|
40
|
+
label: field.label,
|
|
41
|
+
type: field.type,
|
|
42
|
+
expectedValue,
|
|
43
|
+
passed: false,
|
|
44
|
+
keywordMatches: [],
|
|
45
|
+
evaluationParameters,
|
|
46
|
+
evaluationApproachResult: {
|
|
47
|
+
score: 0,
|
|
48
|
+
approachUsed: evaluationParameters.approach,
|
|
49
|
+
},
|
|
50
|
+
error: 'error' in resolvedActualValue
|
|
51
|
+
? resolvedActualValue.error
|
|
52
|
+
: 'Failed to resolve actual value.',
|
|
53
|
+
});
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
if (fields.length === 0) {
|
|
57
|
+
if (failedFields.length === 0) {
|
|
58
|
+
console.warn('⚠️ No evaluable fields for test case:', testCase.id);
|
|
59
|
+
return;
|
|
60
|
+
}
|
|
61
|
+
onResult({
|
|
62
|
+
testCaseId: testCase.id,
|
|
63
|
+
passed: false,
|
|
64
|
+
keywordMatches: [],
|
|
65
|
+
fieldResults: failedFields,
|
|
66
|
+
timestamp: new Date().toISOString(),
|
|
67
|
+
});
|
|
68
|
+
return;
|
|
69
|
+
}
|
|
35
70
|
const evaluationRequest = {
|
|
36
71
|
testCaseId: testCase.id,
|
|
37
72
|
question: testCase.question,
|
|
38
|
-
actualResponse: testCase.output,
|
|
39
73
|
fields,
|
|
40
74
|
};
|
|
41
75
|
await this.engine.evaluateResponse(evaluationRequest, (result) => {
|
|
42
|
-
|
|
43
|
-
onResult(
|
|
76
|
+
const combinedResults = [...(result.fieldResults || []), ...failedFields].sort((a, b) => a.index - b.index);
|
|
77
|
+
onResult({
|
|
78
|
+
...result,
|
|
79
|
+
passed: combinedResults.every(field => field.passed && !field.error),
|
|
80
|
+
fieldResults: combinedResults,
|
|
81
|
+
});
|
|
44
82
|
});
|
|
45
83
|
}
|
|
46
84
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"evaluation-service.js","sourceRoot":"","sources":["../../../src/lib/evaluation/evaluation-service.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;
|
|
1
|
+
{"version":3,"file":"evaluation-service.js","sourceRoot":"","sources":["../../../src/lib/evaluation/evaluation-service.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AAY1D,OAAO,EAAE,qCAAqC,EAAE,MAAM,6BAA6B,CAAC;AACpF,OAAO,EAAE,kBAAkB,EAAE,MAAM,yBAAyB,CAAC;AAE7D;;GAEG;AACH,MAAM,OAAO,iBAAiB;IACpB,MAAM,CAAsB;IAEpC;QACE,IAAI,CAAC,MAAM,GAAG,IAAI,mBAAmB,EAAE,CAAC;IAC1C,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,gBAAgB,CACpB,QAAkB,EAClB,QAA4C,EAC5C,UAAuC;QAEvC,MAAM,MAAM,GAA2B,EAAE,CAAC;QAC1C,MAAM,YAAY,GAA4B,EAAE,CAAC;QAEjD,KAAK,MAAM,CAAC,KAAK,EAAE,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,eAAe,IAAI,EAAE,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC;YACxE,IAAI,KAAK,CAAC,IAAI,KAAK,UAAU,IAAI,KAAK,CAAC,WAAW,KAAK,SAAS,EAAE,CAAC;gBACjE,SAAS;YACX,CAAC;YAED,MAAM,oBAAoB,GAAG,qCAAqC,CAChE,KAAK,CAAC,IAAI,EACV,KAAK,CAAC,oBAAoB,CAC3B,CAAC;YACF,MAAM,aAAa,GAAG,qBAAqB,CAAC,KAAK,CAAC,CAAC;YACnD,MAAM,mBAAmB,GAAG,MAAM,kBAAkB,CAClD,KAAK,EACL,QAAQ,CAAC,MAAM,EACf,UAAU,CACX,CAAC;YAEF,IAAI,mBAAmB,CAAC,OAAO,EAAE,CAAC;gBAChC,MAAM,CAAC,IAAI,CAAC;oBACV,KAAK;oBACL,KAAK,EAAE,KAAK,CAAC,KAAK;oBAClB,IAAI,EAAE,KAAK,CAAC,IAAI;oBAChB,aAAa;oBACb,cAAc,EAAE,mBAAmB,CAAC,KAAK;oBACzC,oBAAoB;iBACrB,CAAC,CAAC;YACL,CAAC;iBAAM,CAAC;gBACN,YAAY,CAAC,IAAI,CAAC;oBAChB,KAAK;oBACL,KAAK,EAAE,KAAK,CAAC,KAAK;oBAClB,IAAI,EAAE,KAAK,CAAC,IAAI;oBAChB,aAAa;oBACb,MAAM,EAAE,KAAK;oBACb,cAAc,EAAE,EAAE;oBAClB,oBAAoB;oBACpB,wBAAwB,EAAE;wBACxB,KAAK,EAAE,CAAC;wBACR,YAAY,EAAE,oBAAoB,CAAC,QAAQ;qBAC5C;oBACD,KAAK,EACH,OAAO,IAAI,mBAAmB;wBAC5B,CAAC,CAAC,mBAAmB,CAAC,KAAK;wBAC3B,CAAC,CAAC,iCAAiC;iBACxC,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACxB,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBAC9B,OAAO,CAAC,IAAI,CAAC,uCAAuC,EAAE,QAAQ,CAAC,EAAE,CAAC,CAAC;gBACnE,OAAO;YACT,CAAC;YAED,QAAQ,CAAC;gBACP,UAAU,EAAE,QAAQ,CAAC,EAAE;gBACvB,MAAM,EAAE,KAAK;gBACb,cAAc,EAAE,EAAE;gBAClB,YAAY,EAAE,YAAY;gBAC1B,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;aACpC,CAAC,CAAC;YACH,OAAO;QACT,CAAC;QAED,MAAM,iBAAiB,GAAwB;YAC7C,UAAU,EAAE,QAAQ,CAAC,EAAE;YACvB,QAAQ,EAAE,QAAQ,CAAC,QAAQ;YAC3B,MAAM;SACP,CAAC;QAEF,MAAM,IAAI,CAAC,MAAM,CAAC,gBAAgB,CAAC,iBAAiB,EAAE,CAAC,MAAwB,EAAE,EAAE;YACjF,MAAM,eAAe,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,YAAY,IAAI,EAAE,CAAC,EAAE,GAAG,YAAY,CAAC,CAAC,IAAI,CAC5E,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAC5B,CAAC;YACF,QAAQ,CAAC;gBACP,GAAG,MAAM;gBACT,MAAM,EAAE,eAAe,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC,MAAM,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC;gBACpE,YAAY,EAAE,eAAe;aAC9B,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;IACL,CAAC;CACF;AAED,SAAS,qBAAqB,CAAC,KAA2B;IACxD,IAAI,KAAK,CAAC,IAAI,KAAK,aAAa,EAAE,CAAC;QACjC,OAAO,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAChC,CAAC;IACD,OAAO,KAAK,CAAC,KAAK,CAAC;AACrB,CAAC","sourcesContent":["import { LLMEvaluationEngine } from './evaluation-engine';\nimport {\n EvaluationResult,\n FieldEvaluationInput,\n EvaluationRequestV2,\n FieldEvaluationResult,\n} from './types';\nimport {\n TestCase,\n ExpectedOutcomeField,\n EvaluationSourceExtractors,\n} from '../../types/llm-test-runner';\nimport { normalizeEvaluationParametersForField } from './field-evaluation-approach';\nimport { resolveActualValue } from './actual-value-resolver';\n\n/**\n * Service for evaluating test case responses\n */\nexport class EvaluationService {\n private engine: LLMEvaluationEngine;\n\n constructor() {\n this.engine = new LLMEvaluationEngine();\n }\n\n /**\n * Evaluates a test case response\n * @param testCase - The test case to evaluate\n * @param onResult - Callback to handle the evaluation result\n */\n async evaluateTestCase(\n testCase: TestCase,\n onResult: (result: EvaluationResult) => void,\n extractors?: EvaluationSourceExtractors,\n ): Promise<void> {\n const fields: FieldEvaluationInput[] = [];\n const failedFields: FieldEvaluationResult[] = [];\n\n for (const [index, field] of (testCase.expectedOutcome || []).entries()) {\n if (field.type === 'textarea' && field.outcomeMode === 'dynamic') {\n continue;\n }\n\n const evaluationParameters = normalizeEvaluationParametersForField(\n field.type,\n field.evaluationParameters,\n );\n const expectedValue = getFieldExpectedValue(field);\n const resolvedActualValue = await resolveActualValue(\n field,\n testCase.output,\n extractors,\n );\n\n if (resolvedActualValue.success) {\n fields.push({\n index,\n label: field.label,\n type: field.type,\n expectedValue,\n actualResponse: resolvedActualValue.value,\n evaluationParameters,\n });\n } else {\n failedFields.push({\n index,\n label: field.label,\n type: field.type,\n expectedValue,\n passed: false,\n keywordMatches: [],\n evaluationParameters,\n evaluationApproachResult: {\n score: 0,\n approachUsed: evaluationParameters.approach,\n },\n error:\n 'error' in resolvedActualValue\n ? resolvedActualValue.error\n : 'Failed to resolve actual value.',\n });\n }\n }\n\n if (fields.length === 0) {\n if (failedFields.length === 0) {\n console.warn('⚠️ No evaluable fields for test case:', testCase.id);\n return;\n }\n\n onResult({\n testCaseId: testCase.id,\n passed: false,\n keywordMatches: [],\n fieldResults: failedFields,\n timestamp: new Date().toISOString(),\n });\n return;\n }\n\n const evaluationRequest: EvaluationRequestV2 = {\n testCaseId: testCase.id,\n question: testCase.question,\n fields,\n };\n\n await this.engine.evaluateResponse(evaluationRequest, (result: EvaluationResult) => {\n const combinedResults = [...(result.fieldResults || []), ...failedFields].sort(\n (a, b) => a.index - b.index,\n );\n onResult({\n ...result,\n passed: combinedResults.every(field => field.passed && !field.error),\n fieldResults: combinedResults,\n });\n });\n }\n}\n\nfunction getFieldExpectedValue(field: ExpectedOutcomeField): string {\n if (field.type === 'chips-input') {\n return field.value.join(', ');\n }\n return field.value;\n}\n"]}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../../src/lib/evaluation/types.ts"],"names":[],"mappings":"","sourcesContent":["import {\n EvaluationParameters,\n EvaluationApproachResult,\n} from '../../types/evaluation';\nimport type { ExpectedOutcomeFieldType } from '../../types/llm-test-runner';\n\nexport interface EvaluationRequest {\n testCaseId: string;\n question: string;\n expectedOutcome: string;\n actualResponse: string;\n evaluationParameters: EvaluationParameters;\n}\n\nexport interface FieldEvaluationInput {\n index: number;\n label: string;\n type: ExpectedOutcomeFieldType;\n expectedValue: string;\n evaluationParameters: EvaluationParameters;\n}\n\nexport interface EvaluationRequestV2 {\n testCaseId: string;\n question: string;\n
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../../src/lib/evaluation/types.ts"],"names":[],"mappings":"","sourcesContent":["import {\n EvaluationParameters,\n EvaluationApproachResult,\n} from '../../types/evaluation';\nimport type { ExpectedOutcomeFieldType } from '../../types/llm-test-runner';\n\nexport interface EvaluationRequest {\n testCaseId: string;\n question: string;\n expectedOutcome: string;\n actualResponse: string;\n evaluationParameters: EvaluationParameters;\n}\n\nexport interface FieldEvaluationInput {\n index: number;\n label: string;\n type: ExpectedOutcomeFieldType;\n expectedValue: string;\n actualResponse: string;\n evaluationParameters: EvaluationParameters;\n}\n\nexport interface EvaluationRequestV2 {\n testCaseId: string;\n question: string;\n fields: FieldEvaluationInput[];\n}\n\nexport interface EvaluationResult {\n testCaseId: string;\n passed: boolean;\n keywordMatches: KeywordMatch[];\n fieldResults?: FieldEvaluationResult[];\n timestamp?: string;\n evaluationParameters?: EvaluationParameters;\n evaluationApproachResult?: EvaluationApproachResult;\n}\n\nexport interface FieldEvaluationResult {\n index: number;\n label: string;\n type: ExpectedOutcomeFieldType;\n expectedValue: string;\n passed: boolean;\n keywordMatches: KeywordMatch[];\n evaluationParameters: EvaluationParameters;\n evaluationApproachResult: EvaluationApproachResult;\n error?: string;\n}\n\nexport interface KeywordMatch {\n keyword: string;\n found: boolean;\n evaluationApproachResult: EvaluationApproachResult;\n}\n\nexport type EvaluationCallback = (result: EvaluationResult) => void;\n\nexport interface RougeKeywordDetails {\n rouge1: number;\n rougeL: number;\n scoreUsed: string;\n approach: string;\n}\n\nexport interface Rouge1OverallDetails {\n keywordsPassed: number;\n totalKeywords: number;\n passRate: string;\n thresholdUsed: number;\n approach: string;\n}\n"]}
|
|
@@ -1,14 +1,20 @@
|
|
|
1
1
|
import { createTestCaseFromInput } from "../test-cases/test-case-factory";
|
|
2
2
|
import { validateTestCaseInputArray } from "../../schemas/test-case";
|
|
3
|
+
import { validateExpectedOutcomeArrayWithExtractors } from "../../schemas/expected-outcome";
|
|
3
4
|
/**
|
|
4
5
|
* Validates and imports test cases from JSON content
|
|
5
6
|
* @param jsonContent - The JSON string to parse and validate
|
|
6
7
|
* @returns Validation result with test cases or error message
|
|
7
8
|
*/
|
|
8
|
-
export function importTestSuite(jsonContent) {
|
|
9
|
+
export function importTestSuite(jsonContent, allowedExtractorIds = []) {
|
|
9
10
|
try {
|
|
10
11
|
const parsed = JSON.parse(jsonContent);
|
|
11
12
|
validateTestCaseInputArray(parsed);
|
|
13
|
+
if (allowedExtractorIds.length > 0) {
|
|
14
|
+
parsed.forEach((testCase) => {
|
|
15
|
+
validateExpectedOutcomeArrayWithExtractors(testCase.expectedOutcome, allowedExtractorIds);
|
|
16
|
+
});
|
|
17
|
+
}
|
|
12
18
|
const testCases = parsed.map((item, index) => {
|
|
13
19
|
try {
|
|
14
20
|
return createTestCaseFromInput(item);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"test-suite-importer.js","sourceRoot":"","sources":["../../../src/lib/import-export/test-suite-importer.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,uBAAuB,EAAE,MAAM,iCAAiC,CAAC;AAC1E,OAAO,EAAE,0BAA0B,EAAE,MAAM,yBAAyB,CAAC;
|
|
1
|
+
{"version":3,"file":"test-suite-importer.js","sourceRoot":"","sources":["../../../src/lib/import-export/test-suite-importer.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,uBAAuB,EAAE,MAAM,iCAAiC,CAAC;AAC1E,OAAO,EAAE,0BAA0B,EAAE,MAAM,yBAAyB,CAAC;AACrE,OAAO,EAAE,0CAA0C,EAAE,MAAM,gCAAgC,CAAC;AAQ5F;;;;GAIG;AACH,MAAM,UAAU,eAAe,CAC7B,WAAmB,EACnB,sBAAgC,EAAE;IAElC,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC;QACvC,0BAA0B,CAAC,MAAM,CAAC,CAAC;QACnC,IAAI,mBAAmB,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACnC,MAAM,CAAC,OAAO,CAAC,CAAC,QAAQ,EAAE,EAAE;gBAC1B,0CAA0C,CACxC,QAAQ,CAAC,eAAe,EACxB,mBAAmB,CACpB,CAAC;YACJ,CAAC,CAAC,CAAC;QACL,CAAC;QAED,MAAM,SAAS,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE;YAC3C,IAAI,CAAC;gBACH,OAAO,uBAAuB,CAAC,IAAI,CAAC,CAAC;YACvC,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,MAAM,OAAO,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC;gBACrE,MAAM,IAAI,KAAK,CAAC,8BAA8B,KAAK,KAAK,OAAO,EAAE,CAAC,CAAC;YACrE,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,OAAO;YACL,OAAO,EAAE,IAAI;YACb,SAAS;SACV,CAAC;IACJ,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,OAAO;YACL,OAAO,EAAE,KAAK;YACd,KAAK,EACH,GAAG,YAAY,KAAK;gBAClB,CAAC,CAAC,GAAG,CAAC,OAAO;gBACb,CAAC,CAAC,gEAAgE;SACvE,CAAC;IACJ,CAAC;AACH,CAAC","sourcesContent":["import type { TestCase } from '../../types/llm-test-runner';\nimport { createTestCaseFromInput } from '../test-cases/test-case-factory';\nimport { validateTestCaseInputArray } from '../../schemas/test-case';\nimport { validateExpectedOutcomeArrayWithExtractors } from '../../schemas/expected-outcome';\n\nexport interface ImportValidationResult {\n success: boolean;\n testCases?: TestCase[];\n error?: string;\n}\n\n/**\n * Validates and imports test cases from JSON content\n * @param jsonContent - The JSON string to parse and validate\n * @returns Validation result with test cases or error message\n */\nexport function importTestSuite(\n jsonContent: string,\n allowedExtractorIds: string[] = [],\n): ImportValidationResult {\n try {\n const parsed = JSON.parse(jsonContent);\n validateTestCaseInputArray(parsed);\n if (allowedExtractorIds.length > 0) {\n parsed.forEach((testCase) => {\n validateExpectedOutcomeArrayWithExtractors(\n testCase.expectedOutcome,\n allowedExtractorIds,\n );\n });\n }\n\n const testCases = parsed.map((item, index) => {\n try {\n return createTestCaseFromInput(item);\n } catch (err) {\n const message = err instanceof Error ? err.message : 'Unknown error';\n throw new Error(`Invalid test case at index ${index}: ${message}`);\n }\n });\n\n return {\n success: true,\n testCases,\n };\n } catch (err) {\n return {\n success: false,\n error:\n err instanceof Error\n ? err.message\n : 'Error processing file. Please ensure it is a valid JSON array.',\n };\n }\n}\n\n"]}
|
|
@@ -11,6 +11,7 @@ export const DEFAULT_EXPECTED_OUTCOME_SCHEMA = [
|
|
|
11
11
|
function normalizeExpectedOutcomeField(field) {
|
|
12
12
|
return {
|
|
13
13
|
...field,
|
|
14
|
+
evaluationSource: field.evaluationSource || { type: 'text' },
|
|
14
15
|
evaluationParameters: normalizeEvaluationParametersForField(field.type, field.evaluationParameters),
|
|
15
16
|
};
|
|
16
17
|
}
|
|
@@ -34,6 +35,7 @@ function createExpectedOutcomeFieldFromSchema(schemaField) {
|
|
|
34
35
|
type: 'text',
|
|
35
36
|
label: schemaField.label,
|
|
36
37
|
placeholder: schemaField.placeholder,
|
|
38
|
+
evaluationSource: schemaField.evaluationSource || { type: 'text' },
|
|
37
39
|
value: '',
|
|
38
40
|
evaluationParameters: normalizeEvaluationParametersForField(schemaField.type, schemaField.evaluationParameters),
|
|
39
41
|
};
|
|
@@ -42,6 +44,7 @@ function createExpectedOutcomeFieldFromSchema(schemaField) {
|
|
|
42
44
|
type: 'textarea',
|
|
43
45
|
label: schemaField.label,
|
|
44
46
|
placeholder: schemaField.placeholder,
|
|
47
|
+
evaluationSource: schemaField.evaluationSource || { type: 'text' },
|
|
45
48
|
rows: schemaField.rows,
|
|
46
49
|
value: '',
|
|
47
50
|
evaluationParameters: normalizeEvaluationParametersForField(schemaField.type, schemaField.evaluationParameters),
|
|
@@ -51,6 +54,7 @@ function createExpectedOutcomeFieldFromSchema(schemaField) {
|
|
|
51
54
|
type: 'chips-input',
|
|
52
55
|
label: schemaField.label,
|
|
53
56
|
placeholder: schemaField.placeholder,
|
|
57
|
+
evaluationSource: schemaField.evaluationSource || { type: 'text' },
|
|
54
58
|
value: [],
|
|
55
59
|
evaluationParameters: normalizeEvaluationParametersForField(schemaField.type, schemaField.evaluationParameters),
|
|
56
60
|
};
|
|
@@ -59,6 +63,7 @@ function createExpectedOutcomeFieldFromSchema(schemaField) {
|
|
|
59
63
|
type: 'select',
|
|
60
64
|
label: schemaField.label,
|
|
61
65
|
placeholder: schemaField.placeholder,
|
|
66
|
+
evaluationSource: schemaField.evaluationSource || { type: 'text' },
|
|
62
67
|
value: schemaField.options[0],
|
|
63
68
|
options: schemaField.options,
|
|
64
69
|
evaluationParameters: normalizeEvaluationParametersForField(schemaField.type, schemaField.evaluationParameters),
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"test-case-factory.js","sourceRoot":"","sources":["../../../src/lib/test-cases/test-case-factory.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,EAAE,IAAI,MAAM,EAAE,MAAM,MAAM,CAAC;AASpC,OAAO,EAAE,qCAAqC,EAAE,MAAM,yCAAyC,CAAC;AAEhG,MAAM,CAAC,MAAM,+BAA+B,GAA0B;IACpE;QACE,IAAI,EAAE,UAAU;QAChB,KAAK,EAAE,kBAAkB;QACzB,WAAW,EAAE,2BAA2B;QACxC,IAAI,EAAE,CAAC;KACR;CACF,CAAC;AAEF,SAAS,6BAA6B,CACpC,KAA2B;IAE3B,OAAO;QACL,GAAG,KAAK;QACR,oBAAoB,EAAE,qCAAqC,CACzD,KAAK,CAAC,IAAI,EACV,KAAK,CAAC,oBAAoB,CAC3B;KACF,CAAC;AACJ,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,cAAc,CAC5B,wBAA+C,+BAA+B;IAE9E,OAAO;QACL,EAAE,EAAE,MAAM,EAAE;QACZ,QAAQ,EAAE,EAAE;QACZ,eAAe,EAAE,+BAA+B,CAAC,qBAAqB,CAAC;QACvE,WAAW,EAAE,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,EAAE,EAAE;QAC1C,SAAS,EAAE,KAAK;KACjB,CAAC;AACJ,CAAC;AAED,SAAS,oCAAoC,CAC3C,WAAuC;IAEvC,QAAQ,WAAW,CAAC,IAAI,EAAE,CAAC;QACzB,KAAK,MAAM;YACT,OAAO;gBACL,IAAI,EAAE,MAAM;gBACZ,KAAK,EAAE,WAAW,CAAC,KAAK;gBACxB,WAAW,EAAE,WAAW,CAAC,WAAW;gBACpC,KAAK,EAAE,EAAE;gBACT,oBAAoB,EAAE,qCAAqC,CACzD,WAAW,CAAC,IAAI,EAChB,WAAW,CAAC,oBAAoB,CACjC;aACF,CAAC;QAEJ,KAAK,UAAU;YACb,OAAO;gBACL,IAAI,EAAE,UAAU;gBAChB,KAAK,EAAE,WAAW,CAAC,KAAK;gBACxB,WAAW,EAAE,WAAW,CAAC,WAAW;gBACpC,IAAI,EAAE,WAAW,CAAC,IAAI;gBACtB,KAAK,EAAE,EAAE;gBACT,oBAAoB,EAAE,qCAAqC,CACzD,WAAW,CAAC,IAAI,EAChB,WAAW,CAAC,oBAAoB,CACjC;aACF,CAAC;QAEJ,KAAK,aAAa;YAChB,OAAO;gBACL,IAAI,EAAE,aAAa;gBACnB,KAAK,EAAE,WAAW,CAAC,KAAK;gBACxB,WAAW,EAAE,WAAW,CAAC,WAAW;gBACpC,KAAK,EAAE,EAAE;gBACT,oBAAoB,EAAE,qCAAqC,CACzD,WAAW,CAAC,IAAI,EAChB,WAAW,CAAC,oBAAoB,CACjC;aACF,CAAC;QAEJ,KAAK,QAAQ;YACX,OAAO;gBACL,IAAI,EAAE,QAAQ;gBACd,KAAK,EAAE,WAAW,CAAC,KAAK;gBACxB,WAAW,EAAE,WAAW,CAAC,WAAW;gBACpC,KAAK,EAAE,WAAW,CAAC,OAAO,CAAC,CAAC,CAAC;gBAC7B,OAAO,EAAE,WAAW,CAAC,OAAO;gBAC5B,oBAAoB,EAAE,qCAAqC,CACzD,WAAW,CAAC,IAAI,EAChB,WAAW,CAAC,oBAAoB,CAC6B;aAChE,CAAC;QAEJ,OAAO,CAAC,CAAC,CAAC;YACR,MAAM,gBAAgB,GAAU,WAAW,CAAC;YAC5C,OAAO,gBAAgB,CAAC;QAC1B,CAAC;IACH,CAAC;AACH,CAAC;AAED,MAAM,UAAU,+BAA+B,CAC7C,qBAA4C;IAE5C,OAAO,qBAAqB,CAAC,GAAG,CAAC,oCAAoC,CAAC,CAAC;AACzE,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,uBAAuB,CAAC,IAAmB;IACzD,OAAO;QACL,GAAG,IAAI;QACP,WAAW,EAAE,IAAI,CAAC,WAAW,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,EAAE,EAAE;QAC9D,eAAe,EAAE,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC,6BAA6B,CAAC;KACzE,CAAC;AACJ,CAAC","sourcesContent":["import { v4 as uuidv4 } from 'uuid';\nimport {\n ExpectedOutcomeField,\n ExpectedOutcomeSchema,\n ExpectedOutcomeSchemaField,\n TestCase,\n TestCaseInput,\n} from '../../types/llm-test-runner';\nimport { EvaluationApproach } from '../evaluation/constants';\nimport { normalizeEvaluationParametersForField } from '../evaluation/field-evaluation-approach';\n\nexport const DEFAULT_EXPECTED_OUTCOME_SCHEMA: ExpectedOutcomeSchema = [\n {\n type: 'textarea',\n label: 'Expected Outcome',\n placeholder: 'Enter expected outcome...',\n rows: 2,\n },\n];\n\nfunction normalizeExpectedOutcomeField(\n field: ExpectedOutcomeField,\n): ExpectedOutcomeField {\n return {\n ...field,\n evaluationParameters: normalizeEvaluationParametersForField(\n field.type,\n field.evaluationParameters,\n ),\n };\n}\n\n/**\n * Creates a new test case with default values\n * @returns A new TestCase object with a unique ID\n */\nexport function createTestCase(\n expectedOutcomeSchema: ExpectedOutcomeSchema = DEFAULT_EXPECTED_OUTCOME_SCHEMA,\n): TestCase {\n return {\n id: uuidv4(),\n question: '',\n expectedOutcome: createExpectedOutcomeFromSchema(expectedOutcomeSchema),\n chatHistory: { enabled: false, value: '' },\n isRunning: false,\n };\n}\n\nfunction createExpectedOutcomeFieldFromSchema(\n schemaField: ExpectedOutcomeSchemaField,\n): ExpectedOutcomeField {\n switch (schemaField.type) {\n case 'text':\n return {\n type: 'text',\n label: schemaField.label,\n placeholder: schemaField.placeholder,\n value: '',\n evaluationParameters: normalizeEvaluationParametersForField(\n schemaField.type,\n schemaField.evaluationParameters,\n ),\n };\n\n case 'textarea':\n return {\n type: 'textarea',\n label: schemaField.label,\n placeholder: schemaField.placeholder,\n rows: schemaField.rows,\n value: '',\n evaluationParameters: normalizeEvaluationParametersForField(\n schemaField.type,\n schemaField.evaluationParameters,\n ),\n };\n\n case 'chips-input':\n return {\n type: 'chips-input',\n label: schemaField.label,\n placeholder: schemaField.placeholder,\n value: [],\n evaluationParameters: normalizeEvaluationParametersForField(\n schemaField.type,\n schemaField.evaluationParameters,\n ),\n };\n\n case 'select':\n return {\n type: 'select',\n label: schemaField.label,\n placeholder: schemaField.placeholder,\n value: schemaField.options[0],\n options: schemaField.options,\n evaluationParameters: normalizeEvaluationParametersForField(\n schemaField.type,\n schemaField.evaluationParameters,\n ) as { approach: EvaluationApproach.EXACT; threshold?: number },\n };\n\n default: {\n const _exhaustiveCheck: never = schemaField;\n return _exhaustiveCheck;\n }\n }\n}\n\nexport function createExpectedOutcomeFromSchema(\n expectedOutcomeSchema: ExpectedOutcomeSchema,\n): ExpectedOutcomeField[] {\n return expectedOutcomeSchema.map(createExpectedOutcomeFieldFromSchema);\n}\n\n/**\n * Creates a runtime test case from validated input data.\n * The input is expected to already satisfy `TestCaseInput`,\n * and this function only performs normalization/defaulting.\n *\n * @param data - Validated test case input\n * @returns A normalized TestCase object with runtime defaults applied\n */\nexport function createTestCaseFromInput(data: TestCaseInput): TestCase {\n return {\n ...data,\n chatHistory: data.chatHistory ?? { enabled: false, value: '' },\n expectedOutcome: data.expectedOutcome.map(normalizeExpectedOutcomeField),\n };\n}\n"]}
|
|
1
|
+
{"version":3,"file":"test-case-factory.js","sourceRoot":"","sources":["../../../src/lib/test-cases/test-case-factory.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,EAAE,IAAI,MAAM,EAAE,MAAM,MAAM,CAAC;AASpC,OAAO,EAAE,qCAAqC,EAAE,MAAM,yCAAyC,CAAC;AAEhG,MAAM,CAAC,MAAM,+BAA+B,GAA0B;IACpE;QACE,IAAI,EAAE,UAAU;QAChB,KAAK,EAAE,kBAAkB;QACzB,WAAW,EAAE,2BAA2B;QACxC,IAAI,EAAE,CAAC;KACR;CACF,CAAC;AAEF,SAAS,6BAA6B,CACpC,KAA2B;IAE3B,OAAO;QACL,GAAG,KAAK;QACR,gBAAgB,EAAE,KAAK,CAAC,gBAAgB,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE;QAC5D,oBAAoB,EAAE,qCAAqC,CACzD,KAAK,CAAC,IAAI,EACV,KAAK,CAAC,oBAAoB,CAC3B;KACF,CAAC;AACJ,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,cAAc,CAC5B,wBAA+C,+BAA+B;IAE9E,OAAO;QACL,EAAE,EAAE,MAAM,EAAE;QACZ,QAAQ,EAAE,EAAE;QACZ,eAAe,EAAE,+BAA+B,CAAC,qBAAqB,CAAC;QACvE,WAAW,EAAE,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,EAAE,EAAE;QAC1C,SAAS,EAAE,KAAK;KACjB,CAAC;AACJ,CAAC;AAED,SAAS,oCAAoC,CAC3C,WAAuC;IAEvC,QAAQ,WAAW,CAAC,IAAI,EAAE,CAAC;QACzB,KAAK,MAAM;YACT,OAAO;gBACL,IAAI,EAAE,MAAM;gBACZ,KAAK,EAAE,WAAW,CAAC,KAAK;gBACxB,WAAW,EAAE,WAAW,CAAC,WAAW;gBACpC,gBAAgB,EAAE,WAAW,CAAC,gBAAgB,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE;gBAClE,KAAK,EAAE,EAAE;gBACT,oBAAoB,EAAE,qCAAqC,CACzD,WAAW,CAAC,IAAI,EAChB,WAAW,CAAC,oBAAoB,CACjC;aACF,CAAC;QAEJ,KAAK,UAAU;YACb,OAAO;gBACL,IAAI,EAAE,UAAU;gBAChB,KAAK,EAAE,WAAW,CAAC,KAAK;gBACxB,WAAW,EAAE,WAAW,CAAC,WAAW;gBACpC,gBAAgB,EAAE,WAAW,CAAC,gBAAgB,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE;gBAClE,IAAI,EAAE,WAAW,CAAC,IAAI;gBACtB,KAAK,EAAE,EAAE;gBACT,oBAAoB,EAAE,qCAAqC,CACzD,WAAW,CAAC,IAAI,EAChB,WAAW,CAAC,oBAAoB,CACjC;aACF,CAAC;QAEJ,KAAK,aAAa;YAChB,OAAO;gBACL,IAAI,EAAE,aAAa;gBACnB,KAAK,EAAE,WAAW,CAAC,KAAK;gBACxB,WAAW,EAAE,WAAW,CAAC,WAAW;gBACpC,gBAAgB,EAAE,WAAW,CAAC,gBAAgB,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE;gBAClE,KAAK,EAAE,EAAE;gBACT,oBAAoB,EAAE,qCAAqC,CACzD,WAAW,CAAC,IAAI,EAChB,WAAW,CAAC,oBAAoB,CACjC;aACF,CAAC;QAEJ,KAAK,QAAQ;YACX,OAAO;gBACL,IAAI,EAAE,QAAQ;gBACd,KAAK,EAAE,WAAW,CAAC,KAAK;gBACxB,WAAW,EAAE,WAAW,CAAC,WAAW;gBACpC,gBAAgB,EAAE,WAAW,CAAC,gBAAgB,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE;gBAClE,KAAK,EAAE,WAAW,CAAC,OAAO,CAAC,CAAC,CAAC;gBAC7B,OAAO,EAAE,WAAW,CAAC,OAAO;gBAC5B,oBAAoB,EAAE,qCAAqC,CACzD,WAAW,CAAC,IAAI,EAChB,WAAW,CAAC,oBAAoB,CAC6B;aAChE,CAAC;QAEJ,OAAO,CAAC,CAAC,CAAC;YACR,MAAM,gBAAgB,GAAU,WAAW,CAAC;YAC5C,OAAO,gBAAgB,CAAC;QAC1B,CAAC;IACH,CAAC;AACH,CAAC;AAED,MAAM,UAAU,+BAA+B,CAC7C,qBAA4C;IAE5C,OAAO,qBAAqB,CAAC,GAAG,CAAC,oCAAoC,CAAC,CAAC;AACzE,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,uBAAuB,CAAC,IAAmB;IACzD,OAAO;QACL,GAAG,IAAI;QACP,WAAW,EAAE,IAAI,CAAC,WAAW,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,EAAE,EAAE;QAC9D,eAAe,EAAE,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC,6BAA6B,CAAC;KACzE,CAAC;AACJ,CAAC","sourcesContent":["import { v4 as uuidv4 } from 'uuid';\nimport {\n ExpectedOutcomeField,\n ExpectedOutcomeSchema,\n ExpectedOutcomeSchemaField,\n TestCase,\n TestCaseInput,\n} from '../../types/llm-test-runner';\nimport { EvaluationApproach } from '../evaluation/constants';\nimport { normalizeEvaluationParametersForField } from '../evaluation/field-evaluation-approach';\n\nexport const DEFAULT_EXPECTED_OUTCOME_SCHEMA: ExpectedOutcomeSchema = [\n {\n type: 'textarea',\n label: 'Expected Outcome',\n placeholder: 'Enter expected outcome...',\n rows: 2,\n },\n];\n\nfunction normalizeExpectedOutcomeField(\n field: ExpectedOutcomeField,\n): ExpectedOutcomeField {\n return {\n ...field,\n evaluationSource: field.evaluationSource || { type: 'text' },\n evaluationParameters: normalizeEvaluationParametersForField(\n field.type,\n field.evaluationParameters,\n ),\n };\n}\n\n/**\n * Creates a new test case with default values\n * @returns A new TestCase object with a unique ID\n */\nexport function createTestCase(\n expectedOutcomeSchema: ExpectedOutcomeSchema = DEFAULT_EXPECTED_OUTCOME_SCHEMA,\n): TestCase {\n return {\n id: uuidv4(),\n question: '',\n expectedOutcome: createExpectedOutcomeFromSchema(expectedOutcomeSchema),\n chatHistory: { enabled: false, value: '' },\n isRunning: false,\n };\n}\n\nfunction createExpectedOutcomeFieldFromSchema(\n schemaField: ExpectedOutcomeSchemaField,\n): ExpectedOutcomeField {\n switch (schemaField.type) {\n case 'text':\n return {\n type: 'text',\n label: schemaField.label,\n placeholder: schemaField.placeholder,\n evaluationSource: schemaField.evaluationSource || { type: 'text' },\n value: '',\n evaluationParameters: normalizeEvaluationParametersForField(\n schemaField.type,\n schemaField.evaluationParameters,\n ),\n };\n\n case 'textarea':\n return {\n type: 'textarea',\n label: schemaField.label,\n placeholder: schemaField.placeholder,\n evaluationSource: schemaField.evaluationSource || { type: 'text' },\n rows: schemaField.rows,\n value: '',\n evaluationParameters: normalizeEvaluationParametersForField(\n schemaField.type,\n schemaField.evaluationParameters,\n ),\n };\n\n case 'chips-input':\n return {\n type: 'chips-input',\n label: schemaField.label,\n placeholder: schemaField.placeholder,\n evaluationSource: schemaField.evaluationSource || { type: 'text' },\n value: [],\n evaluationParameters: normalizeEvaluationParametersForField(\n schemaField.type,\n schemaField.evaluationParameters,\n ),\n };\n\n case 'select':\n return {\n type: 'select',\n label: schemaField.label,\n placeholder: schemaField.placeholder,\n evaluationSource: schemaField.evaluationSource || { type: 'text' },\n value: schemaField.options[0],\n options: schemaField.options,\n evaluationParameters: normalizeEvaluationParametersForField(\n schemaField.type,\n schemaField.evaluationParameters,\n ) as { approach: EvaluationApproach.EXACT; threshold?: number },\n };\n\n default: {\n const _exhaustiveCheck: never = schemaField;\n return _exhaustiveCheck;\n }\n }\n}\n\nexport function createExpectedOutcomeFromSchema(\n expectedOutcomeSchema: ExpectedOutcomeSchema,\n): ExpectedOutcomeField[] {\n return expectedOutcomeSchema.map(createExpectedOutcomeFieldFromSchema);\n}\n\n/**\n * Creates a runtime test case from validated input data.\n * The input is expected to already satisfy `TestCaseInput`,\n * and this function only performs normalization/defaulting.\n *\n * @param data - Validated test case input\n * @returns A normalized TestCase object with runtime defaults applied\n */\nexport function createTestCaseFromInput(data: TestCaseInput): TestCase {\n return {\n ...data,\n chatHistory: data.chatHistory ?? { enabled: false, value: '' },\n expectedOutcome: data.expectedOutcome.map(normalizeExpectedOutcomeField),\n };\n}\n"]}
|
|
@@ -1,4 +1,13 @@
|
|
|
1
1
|
import { normalizeEvaluationParametersForField } from "../evaluation/field-evaluation-approach";
|
|
2
|
+
function isChipsInputField(field) {
|
|
3
|
+
return field.type === 'chips-input';
|
|
4
|
+
}
|
|
5
|
+
function isTextareaField(field) {
|
|
6
|
+
return field.type === 'textarea';
|
|
7
|
+
}
|
|
8
|
+
function isDynamicTextareaField(field) {
|
|
9
|
+
return isTextareaField(field) && field.outcomeMode === 'dynamic';
|
|
10
|
+
}
|
|
2
11
|
export function applyExpectedOutcomeChange(testCase, change) {
|
|
3
12
|
const { index } = change;
|
|
4
13
|
const expectedOutcome = [...(testCase.expectedOutcome || [])];
|
|
@@ -6,73 +15,99 @@ export function applyExpectedOutcomeChange(testCase, change) {
|
|
|
6
15
|
if (!target) {
|
|
7
16
|
return testCase;
|
|
8
17
|
}
|
|
18
|
+
const commit = (updatedField) => {
|
|
19
|
+
expectedOutcome[index] = updatedField;
|
|
20
|
+
return { ...testCase, expectedOutcome };
|
|
21
|
+
};
|
|
9
22
|
switch (change.operation) {
|
|
10
23
|
case 'set-value': {
|
|
11
|
-
if (target
|
|
24
|
+
if (isChipsInputField(target)) {
|
|
12
25
|
return testCase;
|
|
13
26
|
}
|
|
14
|
-
if (target
|
|
27
|
+
if (isDynamicTextareaField(target)) {
|
|
15
28
|
return testCase;
|
|
16
29
|
}
|
|
17
|
-
|
|
30
|
+
return commit({
|
|
18
31
|
...target,
|
|
19
32
|
value: change.value,
|
|
20
|
-
};
|
|
21
|
-
return { ...testCase, expectedOutcome };
|
|
33
|
+
});
|
|
22
34
|
}
|
|
23
35
|
case 'add-chip': {
|
|
24
|
-
if (target
|
|
36
|
+
if (!isChipsInputField(target)) {
|
|
25
37
|
return testCase;
|
|
26
38
|
}
|
|
27
|
-
|
|
39
|
+
return commit({
|
|
28
40
|
...target,
|
|
29
41
|
value: [...target.value, change.value],
|
|
30
|
-
};
|
|
31
|
-
return { ...testCase, expectedOutcome };
|
|
42
|
+
});
|
|
32
43
|
}
|
|
33
44
|
case 'remove-chip': {
|
|
34
|
-
if (target
|
|
45
|
+
if (!isChipsInputField(target)) {
|
|
35
46
|
return testCase;
|
|
36
47
|
}
|
|
37
|
-
|
|
48
|
+
return commit({
|
|
38
49
|
...target,
|
|
39
50
|
value: target.value.filter(chip => chip !== change.value),
|
|
40
|
-
};
|
|
41
|
-
return { ...testCase, expectedOutcome };
|
|
51
|
+
});
|
|
42
52
|
}
|
|
43
53
|
case 'set-evaluation-approach':
|
|
44
54
|
return updateExpectedOutcomeFieldApproach(testCase, index, change.value);
|
|
45
55
|
case 'set-outcome-mode': {
|
|
46
|
-
if (target
|
|
56
|
+
if (!isTextareaField(target)) {
|
|
47
57
|
return testCase;
|
|
48
58
|
}
|
|
49
59
|
const mode = change.value;
|
|
50
60
|
if (mode === 'static') {
|
|
51
61
|
const { resolutionQuery: _, ...rest } = target;
|
|
52
|
-
|
|
62
|
+
return commit({
|
|
53
63
|
...rest,
|
|
54
64
|
outcomeMode: 'static',
|
|
55
65
|
value: '',
|
|
56
|
-
};
|
|
66
|
+
});
|
|
57
67
|
}
|
|
58
68
|
else {
|
|
59
|
-
|
|
69
|
+
return commit({
|
|
60
70
|
...target,
|
|
61
71
|
outcomeMode: 'dynamic',
|
|
62
72
|
value: '',
|
|
63
|
-
};
|
|
73
|
+
});
|
|
64
74
|
}
|
|
65
|
-
return { ...testCase, expectedOutcome };
|
|
66
75
|
}
|
|
67
76
|
case 'set-resolution-query': {
|
|
68
|
-
if (target
|
|
77
|
+
if (!isDynamicTextareaField(target)) {
|
|
69
78
|
return testCase;
|
|
70
79
|
}
|
|
71
|
-
|
|
80
|
+
return commit({
|
|
72
81
|
...target,
|
|
73
82
|
resolutionQuery: change.value,
|
|
74
|
-
};
|
|
75
|
-
|
|
83
|
+
});
|
|
84
|
+
}
|
|
85
|
+
case 'set-evaluation-source-type': {
|
|
86
|
+
if (change.value === 'text') {
|
|
87
|
+
return commit({
|
|
88
|
+
...target,
|
|
89
|
+
evaluationSource: { type: 'text' },
|
|
90
|
+
});
|
|
91
|
+
}
|
|
92
|
+
const extractorId = target.evaluationSource?.type === 'custom'
|
|
93
|
+
? target.evaluationSource.extractorId
|
|
94
|
+
: (change.fallbackExtractorId ?? '');
|
|
95
|
+
return commit({
|
|
96
|
+
...target,
|
|
97
|
+
evaluationSource: {
|
|
98
|
+
type: 'custom',
|
|
99
|
+
extractorId,
|
|
100
|
+
},
|
|
101
|
+
});
|
|
102
|
+
}
|
|
103
|
+
case 'set-evaluation-source-extractor': {
|
|
104
|
+
return commit({
|
|
105
|
+
...target,
|
|
106
|
+
evaluationSource: {
|
|
107
|
+
type: 'custom',
|
|
108
|
+
extractorId: change.value,
|
|
109
|
+
},
|
|
110
|
+
});
|
|
76
111
|
}
|
|
77
112
|
}
|
|
78
113
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"test-case-mutations.js","sourceRoot":"","sources":["../../../src/lib/test-cases/test-case-mutations.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"test-case-mutations.js","sourceRoot":"","sources":["../../../src/lib/test-cases/test-case-mutations.ts"],"names":[],"mappings":"AAOA,OAAO,EAAE,qCAAqC,EAAE,MAAM,yCAAyC,CAAC;AAEhG,SAAS,iBAAiB,CACxB,KAA2B;IAE3B,OAAO,KAAK,CAAC,IAAI,KAAK,aAAa,CAAC;AACtC,CAAC;AAED,SAAS,eAAe,CACtB,KAA2B;IAE3B,OAAO,KAAK,CAAC,IAAI,KAAK,UAAU,CAAC;AACnC,CAAC;AAED,SAAS,sBAAsB,CAC7B,KAA2B;IAE3B,OAAO,eAAe,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC,WAAW,KAAK,SAAS,CAAC;AACnE,CAAC;AA8CD,MAAM,UAAU,0BAA0B,CACxC,QAAkB,EAClB,MAA6B;IAE7B,MAAM,EAAE,KAAK,EAAE,GAAG,MAAM,CAAC;IACzB,MAAM,eAAe,GAAG,CAAC,GAAG,CAAC,QAAQ,CAAC,eAAe,IAAI,EAAE,CAAC,CAAC,CAAC;IAC9D,MAAM,MAAM,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;IAEtC,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED,MAAM,MAAM,GAAG,CAAC,YAAkC,EAAY,EAAE;QAC9D,eAAe,CAAC,KAAK,CAAC,GAAG,YAAY,CAAC;QACtC,OAAO,EAAE,GAAG,QAAQ,EAAE,eAAe,EAAE,CAAC;IAC1C,CAAC,CAAC;IAEF,QAAQ,MAAM,CAAC,SAAS,EAAE,CAAC;QACzB,KAAK,WAAW,CAAC,CAAC,CAAC;YACjB,IAAI,iBAAiB,CAAC,MAAM,CAAC,EAAE,CAAC;gBAC9B,OAAO,QAAQ,CAAC;YAClB,CAAC;YACD,IAAI,sBAAsB,CAAC,MAAM,CAAC,EAAE,CAAC;gBACnC,OAAO,QAAQ,CAAC;YAClB,CAAC;YACD,OAAO,MAAM,CAAC;gBACZ,GAAG,MAAM;gBACT,KAAK,EAAE,MAAM,CAAC,KAAK;aACpB,CAAC,CAAC;QACL,CAAC;QACD,KAAK,UAAU,CAAC,CAAC,CAAC;YAChB,IAAI,CAAC,iBAAiB,CAAC,MAAM,CAAC,EAAE,CAAC;gBAC/B,OAAO,QAAQ,CAAC;YAClB,CAAC;YACD,OAAO,MAAM,CAAC;gBACZ,GAAG,MAAM;gBACT,KAAK,EAAE,CAAC,GAAG,MAAM,CAAC,KAAK,EAAE,MAAM,CAAC,KAAK,CAAC;aACvC,CAAC,CAAC;QACL,CAAC;QACD,KAAK,aAAa,CAAC,CAAC,CAAC;YACnB,IAAI,CAAC,iBAAiB,CAAC,MAAM,CAAC,EAAE,CAAC;gBAC/B,OAAO,QAAQ,CAAC;YAClB,CAAC;YACD,OAAO,MAAM,CAAC;gBACZ,GAAG,MAAM;gBACT,KAAK,EAAE,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,KAAK,MAAM,CAAC,KAAK,CAAC;aAC1D,CAAC,CAAC;QACL,CAAC;QACD,KAAK,yBAAyB;YAC5B,OAAO,kCAAkC,CAAC,QAAQ,EAAE,KAAK,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC;QAC3E,KAAK,kBAAkB,CAAC,CAAC,CAAC;YACxB,IAAI,CAAC,eAAe,CAAC,MAAM,CAAC,EAAE,CAAC;gBAC7B,OAAO,QAAQ,CAAC;YAClB,CAAC;YACD,MAAM,IAAI,GAAG,MAAM,CAAC,KAAK,CAAC;YAC1B,IAAI,IAAI,KAAK,QAAQ,EAAE,CAAC;gBACtB,MAAM,EAAE,eAAe,EAAE,CAAC,EAAE,GAAG,IAAI,EAAE,GAAG,MAAM,CAAC;gBAC/C,OAAO,MAAM,CAAC;oBACZ,GAAG,IAAI;oBACP,WAAW,EAAE,QAAQ;oBACrB,KAAK,EAAE,EAAE;iBACV,CAAC,CAAC;YACL,CAAC;iBAAM,CAAC;gBACN,OAAO,MAAM,CAAC;oBACZ,GAAG,MAAM;oBACT,WAAW,EAAE,SAAS;oBACtB,KAAK,EAAE,EAAE;iBACV,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QACD,KAAK,sBAAsB,CAAC,CAAC,CAAC;YAC5B,IAAI,CAAC,sBAAsB,CAAC,MAAM,CAAC,EAAE,CAAC;gBACpC,OAAO,QAAQ,CAAC;YAClB,CAAC;YACD,OAAO,MAAM,CAAC;gBACZ,GAAG,MAAM;gBACT,eAAe,EAAE,MAAM,CAAC,KAAK;aAC9B,CAAC,CAAC;QACL,CAAC;QACD,KAAK,4BAA4B,CAAC,CAAC,CAAC;YAClC,IAAI,MAAM,CAAC,KAAK,KAAK,MAAM,EAAE,CAAC;gBAC5B,OAAO,MAAM,CAAC;oBACZ,GAAG,MAAM;oBACT,gBAAgB,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE;iBACnC,CAAC,CAAC;YACL,CAAC;YAED,MAAM,WAAW,GACf,MAAM,CAAC,gBAAgB,EAAE,IAAI,KAAK,QAAQ;gBACxC,CAAC,CAAC,MAAM,CAAC,gBAAgB,CAAC,WAAW;gBACrC,CAAC,CAAC,CAAC,MAAM,CAAC,mBAAmB,IAAI,EAAE,CAAC,CAAC;YACzC,OAAO,MAAM,CAAC;gBACZ,GAAG,MAAM;gBACT,gBAAgB,EAAE;oBAChB,IAAI,EAAE,QAAQ;oBACd,WAAW;iBACZ;aACF,CAAC,CAAC;QACL,CAAC;QACD,KAAK,iCAAiC,CAAC,CAAC,CAAC;YACvC,OAAO,MAAM,CAAC;gBACZ,GAAG,MAAM;gBACT,gBAAgB,EAAE;oBAChB,IAAI,EAAE,QAAQ;oBACd,WAAW,EAAE,MAAM,CAAC,KAAK;iBAC1B;aACF,CAAC,CAAC;QACL,CAAC;IACH,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,kCAAkC,CAChD,QAAkB,EAClB,UAAkB,EAClB,QAA4B;IAE5B,MAAM,eAAe,GAAG,CAAC,GAAG,CAAC,QAAQ,CAAC,eAAe,IAAI,EAAE,CAAC,CAAC,CAAC;IAC9D,MAAM,MAAM,GAAG,eAAe,CAAC,UAAU,CAAC,CAAC;IAE3C,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED,MAAM,2BAA2B,GAAG,MAAM,CAAC,oBAAoB,CAAC;IAChE,eAAe,CAAC,UAAU,CAAC,GAAG;QAC5B,GAAG,MAAM;QACT,oBAAoB,EAAE,qCAAqC,CAAC,MAAM,CAAC,IAAI,EAAE;YACvE,GAAG,2BAA2B;YAC9B,QAAQ;SACT,CAAC;KACH,CAAC;IAEF,OAAO;QACL,GAAG,QAAQ;QACX,eAAe;KAChB,CAAC;AACJ,CAAC","sourcesContent":["import {\n TestCase,\n type ExpectedOutcomeField,\n type EvaluationSource,\n type ExpectedOutcomeMode,\n} from '../../types/llm-test-runner';\nimport { EvaluationApproach } from '../evaluation/constants';\nimport { normalizeEvaluationParametersForField } from '../evaluation/field-evaluation-approach';\n\nfunction isChipsInputField(\n field: ExpectedOutcomeField,\n): field is Extract<ExpectedOutcomeField, { type: 'chips-input' }> {\n return field.type === 'chips-input';\n}\n\nfunction isTextareaField(\n field: ExpectedOutcomeField,\n): field is Extract<ExpectedOutcomeField, { type: 'textarea' }> {\n return field.type === 'textarea';\n}\n\nfunction isDynamicTextareaField(\n field: ExpectedOutcomeField,\n): field is Extract<ExpectedOutcomeField, { type: 'textarea' }> {\n return isTextareaField(field) && field.outcomeMode === 'dynamic';\n}\n\n\nexport type ExpectedOutcomeChange =\n | {\n index: number;\n operation: 'set-value';\n value: string;\n }\n | {\n index: number;\n operation: 'add-chip';\n value: string;\n }\n | {\n index: number;\n operation: 'remove-chip';\n value: string;\n }\n | {\n index: number;\n operation: 'set-evaluation-approach';\n value: EvaluationApproach;\n }\n | {\n index: number;\n operation: 'set-outcome-mode';\n value: ExpectedOutcomeMode;\n }\n | {\n index: number;\n operation: 'set-resolution-query';\n value: string;\n }\n | {\n index: number;\n operation: 'set-evaluation-source-type';\n value: EvaluationSource['type'];\n fallbackExtractorId?: string;\n }\n | {\n index: number;\n operation: 'set-evaluation-source-extractor';\n value: string;\n };\n\nexport function applyExpectedOutcomeChange(\n testCase: TestCase,\n change: ExpectedOutcomeChange,\n): TestCase {\n const { index } = change;\n const expectedOutcome = [...(testCase.expectedOutcome || [])];\n const target = expectedOutcome[index];\n\n if (!target) {\n return testCase;\n }\n\n const commit = (updatedField: ExpectedOutcomeField): TestCase => {\n expectedOutcome[index] = updatedField;\n return { ...testCase, expectedOutcome };\n };\n\n switch (change.operation) {\n case 'set-value': {\n if (isChipsInputField(target)) {\n return testCase;\n }\n if (isDynamicTextareaField(target)) {\n return testCase;\n }\n return commit({\n ...target,\n value: change.value,\n });\n }\n case 'add-chip': {\n if (!isChipsInputField(target)) {\n return testCase;\n }\n return commit({\n ...target,\n value: [...target.value, change.value],\n });\n }\n case 'remove-chip': {\n if (!isChipsInputField(target)) {\n return testCase;\n }\n return commit({\n ...target,\n value: target.value.filter(chip => chip !== change.value),\n });\n }\n case 'set-evaluation-approach':\n return updateExpectedOutcomeFieldApproach(testCase, index, change.value);\n case 'set-outcome-mode': {\n if (!isTextareaField(target)) {\n return testCase;\n }\n const mode = change.value;\n if (mode === 'static') {\n const { resolutionQuery: _, ...rest } = target;\n return commit({\n ...rest,\n outcomeMode: 'static',\n value: '',\n });\n } else {\n return commit({\n ...target,\n outcomeMode: 'dynamic',\n value: '',\n });\n }\n }\n case 'set-resolution-query': {\n if (!isDynamicTextareaField(target)) {\n return testCase;\n }\n return commit({\n ...target,\n resolutionQuery: change.value,\n });\n }\n case 'set-evaluation-source-type': {\n if (change.value === 'text') {\n return commit({\n ...target,\n evaluationSource: { type: 'text' },\n });\n }\n\n const extractorId =\n target.evaluationSource?.type === 'custom'\n ? target.evaluationSource.extractorId\n : (change.fallbackExtractorId ?? '');\n return commit({\n ...target,\n evaluationSource: {\n type: 'custom',\n extractorId,\n },\n });\n }\n case 'set-evaluation-source-extractor': {\n return commit({\n ...target,\n evaluationSource: {\n type: 'custom',\n extractorId: change.value,\n },\n });\n }\n }\n}\n\n/**\n * Updates the evaluation approach for a specific expected outcome field.\n * Select fields always use exact matching.\n */\nexport function updateExpectedOutcomeFieldApproach(\n testCase: TestCase,\n fieldIndex: number,\n approach: EvaluationApproach,\n): TestCase {\n const expectedOutcome = [...(testCase.expectedOutcome || [])];\n const target = expectedOutcome[fieldIndex];\n\n if (!target) {\n return testCase;\n }\n\n const currentEvaluationParameters = target.evaluationParameters;\n expectedOutcome[fieldIndex] = {\n ...target,\n evaluationParameters: normalizeEvaluationParametersForField(target.type, {\n ...currentEvaluationParameters,\n approach,\n }),\n };\n\n return {\n ...testCase,\n expectedOutcome,\n };\n}\n"]}
|