llm-testrunner-components 1.2.0 → 1.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/index.cjs.js +12 -7
- package/dist/cjs/index.cjs.js.map +1 -1
- package/dist/cjs/llm-testrunner.cjs.js +1 -1
- package/dist/cjs/loader.cjs.js +1 -1
- package/dist/collection/components/llm-test-runner/header/llm-test-runner-header.js +2 -2
- package/dist/collection/components/llm-test-runner/header/llm-test-runner-header.js.map +1 -1
- package/dist/collection/components/llm-test-runner/llm-test-runner.js +54 -1
- package/dist/collection/components/llm-test-runner/llm-test-runner.js.map +1 -1
- package/dist/collection/lib/evaluation/evaluators/semantic/SemanticEvaluator.js +4 -3
- package/dist/collection/lib/evaluation/evaluators/semantic/SemanticEvaluator.js.map +1 -1
- package/dist/collection/lib/test-cases/test-case-factory.js +1 -1
- package/dist/collection/lib/test-cases/test-case-factory.js.map +1 -1
- package/dist/components/index.js +1 -1
- package/dist/components/llm-test-runner.js +1 -1
- package/dist/components/{p-Bb89MYYu.js → p-CJBscebi.js} +3 -3
- package/dist/components/p-CJBscebi.js.map +1 -0
- package/dist/esm/index.js +12 -7
- package/dist/esm/index.js.map +1 -1
- package/dist/esm/llm-testrunner.js +1 -1
- package/dist/esm/loader.js +1 -1
- package/dist/llm-testrunner/index.esm.js +2 -2
- package/dist/llm-testrunner/index.esm.js.map +1 -1
- package/dist/llm-testrunner/llm-testrunner.esm.js +1 -1
- package/dist/types/components/llm-test-runner/header/llm-test-runner-header.d.ts +1 -0
- package/dist/types/components/llm-test-runner/llm-test-runner.d.ts +2 -0
- package/dist/types/components.d.ts +12 -0
- package/package.json +5 -1
- package/dist/components/p-Bb89MYYu.js.map +0 -1
|
@@ -19,7 +19,7 @@ var patchBrowser = () => {
|
|
|
19
19
|
|
|
20
20
|
patchBrowser().then(async (options) => {
|
|
21
21
|
await appGlobals.globalScripts();
|
|
22
|
-
return index.bootstrapLazy([["app-chips_4.cjs",[[513,"llm-test-runner",{"delayMs":[2,"delay-ms"],"useSave":[4,"use-save"],"initialTestCases":[16],"defaultExpectedOutcomeSchema":[16],"testCases":[32],"isRunningAll":[32],"error":[32],"isExportingTestSuite":[32],"isExportingTestResults":[32],"isSaving":[32],"resetSavingState":[64]}],[513,"app-chips",{"value":[16],"config":[16]}],[513,"app-select",{"value":[1],"config":[16]}],[513,"app-textarea",{"value":[1],"config":[16]}]]]], options);
|
|
22
|
+
return index.bootstrapLazy([["app-chips_4.cjs",[[513,"llm-test-runner",{"delayMs":[2,"delay-ms"],"useSave":[4,"use-save"],"usePromptEditor":[4,"use-prompt-editor"],"initialTestCases":[16],"defaultExpectedOutcomeSchema":[16],"testCases":[32],"isRunningAll":[32],"error":[32],"isExportingTestSuite":[32],"isExportingTestResults":[32],"isSaving":[32],"resetSavingState":[64],"getTestCases":[64]}],[513,"app-chips",{"value":[16],"config":[16]}],[513,"app-select",{"value":[1],"config":[16]}],[513,"app-textarea",{"value":[1],"config":[16]}]]]], options);
|
|
23
23
|
});
|
|
24
24
|
|
|
25
25
|
exports.setNonce = index.setNonce;
|
package/dist/cjs/loader.cjs.js
CHANGED
|
@@ -6,7 +6,7 @@ var appGlobals = require('./app-globals-Chb-oJtg.js');
|
|
|
6
6
|
const defineCustomElements = async (win, options) => {
|
|
7
7
|
if (typeof window === 'undefined') return undefined;
|
|
8
8
|
await appGlobals.globalScripts();
|
|
9
|
-
return index.bootstrapLazy([["app-chips_4.cjs",[[513,"llm-test-runner",{"delayMs":[2,"delay-ms"],"useSave":[4,"use-save"],"initialTestCases":[16],"defaultExpectedOutcomeSchema":[16],"testCases":[32],"isRunningAll":[32],"error":[32],"isExportingTestSuite":[32],"isExportingTestResults":[32],"isSaving":[32],"resetSavingState":[64]}],[513,"app-chips",{"value":[16],"config":[16]}],[513,"app-select",{"value":[1],"config":[16]}],[513,"app-textarea",{"value":[1],"config":[16]}]]]], options);
|
|
9
|
+
return index.bootstrapLazy([["app-chips_4.cjs",[[513,"llm-test-runner",{"delayMs":[2,"delay-ms"],"useSave":[4,"use-save"],"usePromptEditor":[4,"use-prompt-editor"],"initialTestCases":[16],"defaultExpectedOutcomeSchema":[16],"testCases":[32],"isRunningAll":[32],"error":[32],"isExportingTestSuite":[32],"isExportingTestResults":[32],"isSaving":[32],"resetSavingState":[64],"getTestCases":[64]}],[513,"app-chips",{"value":[16],"config":[16]}],[513,"app-select",{"value":[1],"config":[16]}],[513,"app-textarea",{"value":[1],"config":[16]}]]]], options);
|
|
10
10
|
};
|
|
11
11
|
|
|
12
12
|
exports.setNonce = index.setNonce;
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { h } from "@stencil/core";
|
|
2
2
|
import { Button } from "../../../lib/ui/button/index";
|
|
3
|
-
export const LLMTestRunnerHeader = ({ isExportingTestSuite, isExportingTestResults, isRunningAll, useSave = false, isSaving = false, onImport, onExportSuite, onExportResults, onRunAll, onSave, }) => {
|
|
3
|
+
export const LLMTestRunnerHeader = ({ isExportingTestSuite, isExportingTestResults, isRunningAll, useSave = false, isSaving = false, usePromptEditor = false, onImport, onExportSuite, onExportResults, onRunAll, onSave, }) => {
|
|
4
4
|
let fileInputRef;
|
|
5
5
|
const handleFileSelect = () => {
|
|
6
6
|
fileInputRef?.click();
|
|
@@ -13,6 +13,6 @@ export const LLMTestRunnerHeader = ({ isExportingTestSuite, isExportingTestResul
|
|
|
13
13
|
onImport(file);
|
|
14
14
|
}
|
|
15
15
|
};
|
|
16
|
-
return (h("header", { class: "test-runner-header" }, h("div", { class: "test-runner-header__left" }, h("input", { class: "test-runner-header--hidden", type: "file", ref: el => (fileInputRef = el), onChange: handleFileChange, accept: ".json,application/json" }), h(Button, { variant: "secondary", size: "md", onClick: handleFileSelect, icon: "\u2191" }, "Import Test Suite"), h(Button, { variant: "secondary", size: "md", onClick: onExportSuite, disabled: isExportingTestSuite, loading: isExportingTestSuite, icon: isExportingTestSuite ? '⏳' : '↓' }, isExportingTestSuite ? 'Exporting...' : 'Export Test Suite')), h("div", { class: "test-runner-header__right" }, h(Button, { variant: "secondary", size: "md", icon: "\u2699\uFE0F" }, "Prompt Editor"), h(Button, { variant: "secondary", size: "md", onClick: onExportResults, disabled: isExportingTestResults, loading: isExportingTestResults, icon: isExportingTestResults ? '⏳' : '↓' }, isExportingTestResults ? 'Exporting...' : 'Export Test Results'), useSave && (h(Button, { variant: "secondary", size: "md", onClick: onSave, disabled: isSaving, loading: isSaving, icon: isSaving ? '⏳' : '💾' }, isSaving ? 'Saving...' : 'Save')), h(Button, { "aria-label": "Run All", variant: "primary", size: "md", onClick: onRunAll, disabled: isRunningAll, loading: isRunningAll }, isRunningAll ? 'Running...' : 'Run All'))));
|
|
16
|
+
return (h("header", { class: "test-runner-header" }, h("div", { class: "test-runner-header__left" }, h("input", { class: "test-runner-header--hidden", type: "file", ref: el => (fileInputRef = el), onChange: handleFileChange, accept: ".json,application/json" }), h(Button, { variant: "secondary", size: "md", onClick: handleFileSelect, icon: "\u2191" }, "Import Test Suite"), h(Button, { variant: "secondary", size: "md", onClick: onExportSuite, disabled: isExportingTestSuite, loading: isExportingTestSuite, icon: isExportingTestSuite ? '⏳' : '↓' }, isExportingTestSuite ? 'Exporting...' : 'Export Test Suite')), h("div", { class: "test-runner-header__right" }, usePromptEditor && (h(Button, { variant: "secondary", size: "md", icon: "\u2699\uFE0F" }, "Prompt Editor")), h(Button, { variant: "secondary", size: "md", onClick: onExportResults, disabled: isExportingTestResults, loading: isExportingTestResults, icon: isExportingTestResults ? '⏳' : '↓' }, isExportingTestResults ? 'Exporting...' : 'Export Test Results'), useSave && (h(Button, { variant: "secondary", size: "md", onClick: onSave, disabled: isSaving, loading: isSaving, icon: isSaving ? '⏳' : '💾' }, isSaving ? 'Saving...' : 'Save')), h(Button, { "aria-label": "Run All", variant: "primary", size: "md", onClick: onRunAll, disabled: isRunningAll, loading: isRunningAll }, isRunningAll ? 'Running...' : 'Run All'))));
|
|
17
17
|
};
|
|
18
18
|
//# sourceMappingURL=llm-test-runner-header.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"llm-test-runner-header.js","sourceRoot":"","sources":["../../../../src/components/llm-test-runner/header/llm-test-runner-header.tsx"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAuB,MAAM,eAAe,CAAC;AACvD,OAAO,EAAE,MAAM,EAAE,MAAM,8BAA8B,CAAC;
|
|
1
|
+
{"version":3,"file":"llm-test-runner-header.js","sourceRoot":"","sources":["../../../../src/components/llm-test-runner/header/llm-test-runner-header.tsx"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAuB,MAAM,eAAe,CAAC;AACvD,OAAO,EAAE,MAAM,EAAE,MAAM,8BAA8B,CAAC;AAgBtD,MAAM,CAAC,MAAM,mBAAmB,GAE5B,CAAC,EACH,oBAAoB,EACpB,sBAAsB,EACtB,YAAY,EACZ,OAAO,GAAG,KAAK,EACf,QAAQ,GAAG,KAAK,EAChB,eAAe,GAAG,KAAK,EACvB,QAAQ,EACR,aAAa,EACb,eAAe,EACf,QAAQ,EACR,MAAM,GACP,EAAE,EAAE;IACH,IAAI,YAA8B,CAAC;IAEnC,MAAM,gBAAgB,GAAG,GAAG,EAAE;QAC5B,YAAY,EAAE,KAAK,EAAE,CAAC;IACxB,CAAC,CAAC;IAEF,MAAM,gBAAgB,GAAG,CAAC,KAAY,EAAE,EAAE;QACxC,MAAM,MAAM,GAAG,KAAK,CAAC,MAA0B,CAAC;QAChD,MAAM,IAAI,GAAG,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC;QAC/B,MAAM,CAAC,KAAK,GAAG,EAAE,CAAC,CAAC,sBAAsB;QACzC,IAAI,IAAI,EAAE,CAAC;YACT,QAAQ,CAAC,IAAI,CAAC,CAAC;QACjB,CAAC;IACH,CAAC,CAAC;IAEF,OAAO,CACL,cAAQ,KAAK,EAAC,oBAAoB;QAChC,WAAK,KAAK,EAAC,0BAA0B;YACnC,aACE,KAAK,EAAC,4BAA4B,EAClC,IAAI,EAAC,MAAM,EACX,GAAG,EAAE,EAAE,CAAC,EAAE,CAAC,CAAC,YAAY,GAAG,EAAsB,CAAC,EAClD,QAAQ,EAAE,gBAAgB,EAC1B,MAAM,EAAC,wBAAwB,GAC/B;YACF,EAAC,MAAM,IACL,OAAO,EAAC,WAAW,EACnB,IAAI,EAAC,IAAI,EACT,OAAO,EAAE,gBAAgB,EACzB,IAAI,EAAC,QAAG,wBAGD;YACT,EAAC,MAAM,IACL,OAAO,EAAC,WAAW,EACnB,IAAI,EAAC,IAAI,EACT,OAAO,EAAE,aAAa,EACtB,QAAQ,EAAE,oBAAoB,EAC9B,OAAO,EAAE,oBAAoB,EAC7B,IAAI,EAAE,oBAAoB,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,IAErC,oBAAoB,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,mBAAmB,CACrD,CACL;QAEN,WAAK,KAAK,EAAC,2BAA2B;YACnC,eAAe,IAAI,CAClB,EAAC,MAAM,IAAC,OAAO,EAAC,WAAW,EAAC,IAAI,EAAC,IAAI,EAAC,IAAI,EAAC,cAAI,oBAEtC,CACV;YACD,EAAC,MAAM,IACL,OAAO,EAAC,WAAW,EACnB,IAAI,EAAC,IAAI,EACT,OAAO,EAAE,eAAe,EACxB,QAAQ,EAAE,sBAAsB,EAChC,OAAO,EAAE,sBAAsB,EAC/B,IAAI,EAAE,sBAAsB,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,IAEvC,sBAAsB,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,qBAAqB,CACzD;YACR,OAAO,IAAI,CACV,EAAC,MAAM,IACL,OAAO,EAAC,WAAW,EACnB,IAAI,EAAC,IAAI,EACT,OAAO,EAAE,MAAM,EACf,QAAQ,EAAE,QAAQ,EAClB,OAAO,EAAE,QAAQ,EACjB,IAAI,EAAE,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,IAE1B,QAAQ,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,MAAM,CACzB,CACV;YACD,EAAC,MAAM,kBACM,SAAS,EACpB,OAAO,EAAC,SAAS,EACjB,IAAI,EAAC,IAAI,EACT,OAAO,EAAE,QAAQ,EACjB,QAAQ,EAAE,YAAY,EACtB,OAAO,EAAE,YAAY,IAEpB,YAAY,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,SAAS,CACjC,CACL,CACC,CACV,CAAC;AACJ,CAAC,CAAC","sourcesContent":["import { h, FunctionalComponent } from '@stencil/core';\nimport { Button } from '../../../lib/ui/button/index';\n\nexport interface LLMTestRunnerHeaderProps {\n isExportingTestSuite: boolean;\n isExportingTestResults: boolean;\n isRunningAll: boolean;\n useSave?: boolean;\n isSaving?: boolean;\n usePromptEditor?: boolean;\n onImport: (file: File) => void;\n onExportSuite: () => void;\n onExportResults: () => void;\n onRunAll: () => void;\n onSave?: () => void;\n}\n\nexport const LLMTestRunnerHeader: FunctionalComponent<\n LLMTestRunnerHeaderProps\n> = ({\n isExportingTestSuite,\n isExportingTestResults,\n isRunningAll,\n useSave = false,\n isSaving = false,\n usePromptEditor = false,\n onImport,\n onExportSuite,\n onExportResults,\n onRunAll,\n onSave,\n}) => {\n let fileInputRef: HTMLInputElement;\n\n const handleFileSelect = () => {\n fileInputRef?.click();\n };\n\n const handleFileChange = (event: Event) => {\n const target = event.target as HTMLInputElement;\n const file = target.files?.[0];\n target.value = ''; // Clear for re-upload\n if (file) {\n onImport(file);\n }\n };\n\n return (\n <header class=\"test-runner-header\">\n <div class=\"test-runner-header__left\">\n <input\n class=\"test-runner-header--hidden\"\n type=\"file\"\n ref={el => (fileInputRef = el as HTMLInputElement)}\n onChange={handleFileChange}\n accept=\".json,application/json\"\n />\n <Button\n variant=\"secondary\"\n size=\"md\"\n onClick={handleFileSelect}\n icon=\"↑\"\n >\n Import Test Suite\n </Button>\n <Button\n variant=\"secondary\"\n size=\"md\"\n onClick={onExportSuite}\n disabled={isExportingTestSuite}\n loading={isExportingTestSuite}\n icon={isExportingTestSuite ? '⏳' : '↓'}\n >\n {isExportingTestSuite ? 'Exporting...' : 'Export Test Suite'}\n </Button>\n </div>\n\n <div class=\"test-runner-header__right\">\n {usePromptEditor && (\n <Button variant=\"secondary\" size=\"md\" icon=\"⚙️\">\n Prompt Editor\n </Button>\n )}\n <Button\n variant=\"secondary\"\n size=\"md\"\n onClick={onExportResults}\n disabled={isExportingTestResults}\n loading={isExportingTestResults}\n icon={isExportingTestResults ? '⏳' : '↓'}\n >\n {isExportingTestResults ? 'Exporting...' : 'Export Test Results'}\n </Button>\n {useSave && (\n <Button\n variant=\"secondary\"\n size=\"md\"\n onClick={onSave}\n disabled={isSaving}\n loading={isSaving}\n icon={isSaving ? '⏳' : '💾'}\n >\n {isSaving ? 'Saving...' : 'Save'}\n </Button>\n )}\n <Button\n aria-label=\"Run All\"\n variant=\"primary\"\n size=\"md\"\n onClick={onRunAll}\n disabled={isRunningAll}\n loading={isRunningAll}\n >\n {isRunningAll ? 'Running...' : 'Run All'}\n </Button>\n </div>\n </header>\n );\n};\n"]}
|
|
@@ -18,6 +18,7 @@ export class LLMTestRunner {
|
|
|
18
18
|
save;
|
|
19
19
|
delayMs = 500;
|
|
20
20
|
useSave = false;
|
|
21
|
+
usePromptEditor = false;
|
|
21
22
|
initialTestCases;
|
|
22
23
|
defaultExpectedOutcomeSchema;
|
|
23
24
|
testCases = [
|
|
@@ -81,6 +82,9 @@ export class LLMTestRunner {
|
|
|
81
82
|
async resetSavingState() {
|
|
82
83
|
this.isSaving = false;
|
|
83
84
|
}
|
|
85
|
+
async getTestCases() {
|
|
86
|
+
return this.testCases;
|
|
87
|
+
}
|
|
84
88
|
handleTestCaseChange = (event) => {
|
|
85
89
|
const { testCaseId, key, value } = event.detail;
|
|
86
90
|
this.testCases = this.testCases.map(tc => tc.id === testCaseId ? { ...tc, [key]: value } : tc);
|
|
@@ -243,7 +247,7 @@ export class LLMTestRunner {
|
|
|
243
247
|
}
|
|
244
248
|
}
|
|
245
249
|
render() {
|
|
246
|
-
return (h("div", { key: '
|
|
250
|
+
return (h("div", { key: '29cf8a93402ebad6f6df43e147fa10406577c9aa', class: "test-runner-container" }, h(LLMTestRunnerHeader, { key: 'a07d3d1d823f8d473808752932cd1b2ab72d9e08', isExportingTestSuite: this.isExportingTestSuite, isExportingTestResults: this.isExportingTestResults, isRunningAll: this.isRunningAll, useSave: this.useSave, isSaving: this.isSaving, usePromptEditor: this.usePromptEditor, onImport: file => this.handleImport(file), onExportSuite: () => this.handleExportTestSuite(), onExportResults: () => this.handleExportTestResults(), onRunAll: () => this.runAllTests(), onSave: () => this.handleSave() }), h(ErrorMessage, { key: 'ec68912728b06fc4a76c330fb1b7d5acde92c3d1', message: this.error, onClear: () => (this.error = '') }), h("div", { key: 'ce308dd4bd5437c94ae6e3e8a28970b799865281', class: "test-runner-container__content" }, h(LLMTestCases, { key: '3368df0bb7de4d099da1fad400f59dfc9a2cfb62', testCases: this.testCases, onRun: testCase => this.runSingleTest(testCase).catch(() => { }), onDelete: id => this.deleteTestCase(id), onAddTestCase: () => this.addNewTestCase(), handleTestCaseChange: this.handleTestCaseChange, onExpectedOutcomeChange: this.handleExpectedOutcomeChange }))));
|
|
247
251
|
}
|
|
248
252
|
static get is() { return "llm-test-runner"; }
|
|
249
253
|
static get encapsulation() { return "shadow"; }
|
|
@@ -299,6 +303,26 @@ export class LLMTestRunner {
|
|
|
299
303
|
"attribute": "use-save",
|
|
300
304
|
"defaultValue": "false"
|
|
301
305
|
},
|
|
306
|
+
"usePromptEditor": {
|
|
307
|
+
"type": "boolean",
|
|
308
|
+
"mutable": false,
|
|
309
|
+
"complexType": {
|
|
310
|
+
"original": "boolean",
|
|
311
|
+
"resolved": "boolean",
|
|
312
|
+
"references": {}
|
|
313
|
+
},
|
|
314
|
+
"required": false,
|
|
315
|
+
"optional": true,
|
|
316
|
+
"docs": {
|
|
317
|
+
"tags": [],
|
|
318
|
+
"text": ""
|
|
319
|
+
},
|
|
320
|
+
"getter": false,
|
|
321
|
+
"setter": false,
|
|
322
|
+
"reflect": false,
|
|
323
|
+
"attribute": "use-prompt-editor",
|
|
324
|
+
"defaultValue": "false"
|
|
325
|
+
},
|
|
302
326
|
"initialTestCases": {
|
|
303
327
|
"type": "unknown",
|
|
304
328
|
"mutable": false,
|
|
@@ -424,6 +448,35 @@ export class LLMTestRunner {
|
|
|
424
448
|
"text": "",
|
|
425
449
|
"tags": []
|
|
426
450
|
}
|
|
451
|
+
},
|
|
452
|
+
"getTestCases": {
|
|
453
|
+
"complexType": {
|
|
454
|
+
"signature": "() => Promise<TestCase[]>",
|
|
455
|
+
"parameters": [],
|
|
456
|
+
"references": {
|
|
457
|
+
"Promise": {
|
|
458
|
+
"location": "global",
|
|
459
|
+
"id": "global::Promise"
|
|
460
|
+
},
|
|
461
|
+
"EvaluationResult": {
|
|
462
|
+
"location": "import",
|
|
463
|
+
"path": "../../lib/evaluation/types",
|
|
464
|
+
"id": "src/lib/evaluation/types.ts::EvaluationResult",
|
|
465
|
+
"referenceLocation": "EvaluationResult"
|
|
466
|
+
},
|
|
467
|
+
"TestCase": {
|
|
468
|
+
"location": "import",
|
|
469
|
+
"path": "../../types/llm-test-runner",
|
|
470
|
+
"id": "src/types/llm-test-runner.ts::TestCase",
|
|
471
|
+
"referenceLocation": "TestCase"
|
|
472
|
+
}
|
|
473
|
+
},
|
|
474
|
+
"return": "Promise<{ id: string; question: string; expectedOutcome: ({ label: string; type: \"select\"; options: string[]; value: string; placeholder?: string; evaluationParameters?: { approach: EvaluationApproach; threshold?: number; }; } | { label: string; type: \"text\"; value: string; placeholder?: string; evaluationParameters?: { approach: EvaluationApproach; threshold?: number; }; } | { label: string; type: \"textarea\"; value: string; placeholder?: string; rows?: number; evaluationParameters?: { approach: EvaluationApproach; threshold?: number; }; } | { label: string; type: \"chips-input\"; value: string[]; placeholder?: string; evaluationParameters?: { approach: EvaluationApproach; threshold?: number; }; })[]; output?: string; isRunning?: boolean; error?: string; evaluationResult?: EvaluationResult; responseTime?: number; }[]>"
|
|
475
|
+
},
|
|
476
|
+
"docs": {
|
|
477
|
+
"text": "",
|
|
478
|
+
"tags": []
|
|
479
|
+
}
|
|
427
480
|
}
|
|
428
481
|
};
|
|
429
482
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"llm-test-runner.js","sourceRoot":"","sources":["../../../src/components/llm-test-runner/llm-test-runner.tsx"],"names":[],"mappings":"AAAA,OAAO,EACL,SAAS,EACT,KAAK,EACL,IAAI,EACJ,CAAC,EAED,KAAK,EACL,MAAM,GACP,MAAM,eAAe,CAAC;AAEvB,OAAO,EAAE,YAAY,EAAE,MAAM,gCAAgC,CAAC;AAC9D,OAAO,EAAE,kBAAkB,EAAE,MAAM,qDAAqD,CAAC;AAOzF,OAAO,EAAE,aAAa,EAAE,MAAM,4BAA4B,CAAC;AAC3D,OAAO,EAAE,YAAY,EAAE,MAAM,8BAA8B,CAAC;AAC5D,OAAO,EAAE,qBAAqB,EAAE,MAAM,6CAA6C,CAAC;AACpF,OAAO,EAAE,sBAAsB,EAAE,MAAM,0CAA0C,CAAC;AAClF,OAAO,EAAE,eAAe,EAAE,MAAM,6CAA6C,CAAC;AAC9E,OAAO,EACL,cAAc,EACd,uBAAuB,EACvB,+BAA+B,GAChC,MAAM,wCAAwC,CAAC;AAChD,OAAO,KAAK,iBAAiB,MAAM,0CAA0C,CAAC;AAC9E,OAAO,EAAE,iBAAiB,EAAE,MAAM,yCAAyC,CAAC;AAC5E,OAAO,EAAE,0BAA0B,EAAE,MAAM,yBAAyB,CAAC;AACrE,OAAO,EAAE,6BAA6B,EAAE,MAAM,gCAAgC,CAAC;AAC/E,OAAO,EAAE,mBAAmB,EAAE,MAAM,iCAAiC,CAAC;AACtE,OAAO,EAAE,YAAY,EAAE,MAAM,6BAA6B,CAAC;AAoB3D,MAAM,OAAO,aAAa;IACf,UAAU,CAAkC;IAC5C,IAAI,CAA4B;IACjC,OAAO,GAAY,GAAG,CAAC;IACvB,OAAO,GAAa,KAAK,CAAC;IAC1B,gBAAgB,CAAc;IAC9B,4BAA4B,CAAyB;IACpD,SAAS,GAAe;QAC/B;YACE,EAAE,EAAE,GAAG;YACP,QAAQ,EAAE,EAAE;YACZ,eAAe,EAAE;gBACf;oBACE,IAAI,EAAE,UAAU;oBAChB,KAAK,EAAE,kBAAkB;oBACzB,KAAK,EAAE,EAAE;iBACV;aACF;YACD,SAAS,EAAE,KAAK;SACjB;KACF,CAAC;IACO,YAAY,GAAY,KAAK,CAAC;IAC9B,KAAK,GAAW,EAAE,CAAC;IACnB,oBAAoB,GAAY,KAAK,CAAC;IACtC,sBAAsB,GAAY,KAAK,CAAC;IACxC,QAAQ,GAAY,KAAK,CAAC;IAE3B,iBAAiB,CAAoB;IAErC,gCAAgC;QACtC,IAAI,IAAI,CAAC,4BAA4B,KAAK,SAAS,EAAE,CAAC;YACpD,OAAO,+BAA+B,CAAC;QACzC,CAAC;QAED,6BAA6B,CAAC,IAAI,CAAC,4BAA4B,CAAC,CAAC;QACjE,OAAO,IAAI,CAAC,4BAA4B,CAAC;IAC3C,CAAC;IAED,iBAAiB;QACf,IAAI,CAAC,iBAAiB,GAAG,IAAI,iBAAiB,EAAE,CAAC;QACjD,IAAI,CAAC;YACH,6CAA6C;YAC7C,IAAI,IAAI,CAAC,gBAAgB,KAAK,SAAS,EAAE,CAAC;gBACxC,0BAA0B,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;gBAClD,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,CAAC,WAAW,EAAE,KAAK,EAAE,EAAE;oBAChE,IAAI,CAAC;wBACH,OAAO,uBAAuB,CAAC,WAAW,CAAC,CAAC;oBAC9C,CAAC;oBAAC,OAAO,GAAG,EAAE,CAAC;wBACb,MAAM,OAAO,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC;wBACrE,MAAM,IAAI,KAAK,CAAC,sCAAsC,KAAK,KAAK,OAAO,EAAE,CAAC,CAAC;oBAC7E,CAAC;gBACH,CAAC,CAAC,CAAC;YACL,CAAC;iBAAM,CAAC;gBACN,MAAM,MAAM,GAAG,IAAI,CAAC,gCAAgC,EAAE,CAAC;gBACvD,IAAI,CAAC,SAAS,GAAG,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC;YAC5C,CAAC;QACH,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,IAAI,CAAC,KAAK;gBACR,GAAG,YAAY,KAAK;oBAClB,CAAC,CAAC,GAAG,CAAC,OAAO;oBACb,CAAC,CAAC,gDAAgD,CAAC;YACvD,IAAI,CAAC,SAAS,GAAG,EAAE,CAAC;QACtB,CAAC;IACH,CAAC;IAED,gBAAgB,KAAI,CAAC;IAErB,oBAAoB,KAAI,CAAC;IAGzB,KAAK,CAAC,gBAAgB;QACpB,IAAI,CAAC,QAAQ,GAAG,KAAK,CAAC;IACxB,CAAC;IAEO,oBAAoB,GAAG,CAC7B,KAAsE,EACtE,EAAE;QACF,MAAM,EAAE,UAAU,EAAE,GAAG,EAAE,KAAK,EAAE,GAAG,KAAK,CAAC,MAAM,CAAC;QAChD,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CACvC,EAAE,CAAC,EAAE,KAAK,UAAU,CAAC,CAAC,CAAC,EAAE,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,EAAE,CACpD,CAAC;IACJ,CAAC,CAAC;IAEM,cAAc;QACpB,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,IAAI,CAAC,gCAAgC,EAAE,CAAC;YACvD,MAAM,WAAW,GAAG,cAAc,CAAC,MAAM,CAAC,CAAC;YAC3C,IAAI,CAAC,SAAS,GAAG,CAAC,GAAG,IAAI,CAAC,SAAS,EAAE,WAAW,CAAC,CAAC;QACpD,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,IAAI,CAAC,KAAK;gBACR,GAAG,YAAY,KAAK;oBAClB,CAAC,CAAC,GAAG,CAAC,OAAO;oBACb,CAAC,CAAC,gDAAgD,CAAC;QACzD,CAAC;IACH,CAAC;IAEO,cAAc,CAAC,EAAU,EAAE,OAA0B;QAC3D,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CACvC,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,EAAE,GAAG,EAAE,EAAE,GAAG,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,CAC1C,CAAC;IACJ,CAAC;IAEO,KAAK,CAAC,aAAa,CAAC,QAAkB;QAC5C,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC7B,IAAI,CAAC,cAAc,CAAC,QAAQ,CAAC,EAAE,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QACtD,OAAO,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;YAC3C,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC;gBACnB,MAAM,EAAE,QAAQ,CAAC,QAAQ;gBACzB,OAAO,EAAE,KAAK,EAAE,UAAkB,EAAE,EAAE;oBACpC,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;oBAC3B,MAAM,YAAY,GAAG,OAAO,GAAG,SAAS,CAAC;oBACzC,IAAI,CAAC,cAAc,CAAC,QAAQ,CAAC,EAAE,EAAE;wBAC/B,SAAS,EAAE,KAAK;wBAChB,MAAM,EAAE,UAAU;wBAClB,KAAK,EAAE,IAAI;wBACX,YAAY,EAAE,YAAY;qBAC3B,CAAC,CAAC;oBAEH,MAAM,IAAI,CAAC,gBAAgB,CAAC;wBAC1B,GAAG,QAAQ;wBACX,MAAM,EAAE,UAAU;wBAClB,YAAY,EAAE,YAAY;qBAC3B,CAAC,CAAC;oBACH,OAAO,EAAE,CAAC;gBACZ,CAAC;gBACD,MAAM,EAAE,CAAC,KAAsB,EAAE,EAAE;oBACjC,IAAI,CAAC,cAAc,CAAC,QAAQ,CAAC,EAAE,EAAE;wBAC/B,SAAS,EAAE,KAAK;wBAChB,MAAM,EAAE,IAAI;wBACZ,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe;qBAChE,CAAC,CAAC;oBACH,MAAM,CAAC,KAAK,CAAC,CAAC;gBAChB,CAAC;aACF,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;IACL,CAAC;IAEO,cAAc,CAAC,EAAU;QAC/B,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC;IAC7D,CAAC;IAEO,2BAA2B,GAAG,CACpC,KAA+C,EAC/C,EAAE;QACF,MAAM,EAAE,UAAU,EAAE,GAAG,MAAM,EAAE,GAAG,KAAK,CAAC,MAAM,CAAC;QAE/C,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE;YACvC,IAAI,EAAE,CAAC,EAAE,KAAK,UAAU,EAAE,CAAC;gBACzB,OAAO,EAAE,CAAC;YACZ,CAAC;YAED,OAAO,iBAAiB,CAAC,0BAA0B,CAAC,EAAE,EAAE,MAAM,CAAC,CAAC;QAClE,CAAC,CAAC,CAAC;IACL,CAAC,CAAC;IAEM,KAAK,CAAC,gBAAgB,CAAC,QAAkB;QAC/C,MAAM,IAAI,CAAC,iBAAiB,CAAC,gBAAgB,CAC3C,QAAQ,EACR,CAAC,MAAwB,EAAE,EAAE;YAC3B,IAAI,CAAC,cAAc,CAAC,QAAQ,CAAC,EAAE,EAAE;gBAC/B,gBAAgB,EAAE,MAAM;aACzB,CAAC,CAAC;QACL,CAAC,CACF,CAAC;IACJ,CAAC;IAEO,KAAK,CAAC,WAAW;QACvB,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC;QACzB,MAAM,KAAK,GAAG,EAAE,CAAC;QACjB,KAAK,MAAM,QAAQ,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;YACtC,IAAI,CAAC,QAAQ,CAAC,SAAS,IAAI,QAAQ,CAAC,QAAQ,CAAC,IAAI,EAAE,EAAE,CAAC;gBACpD,KAAK,CAAC,IAAI,CAAC,GAAG,EAAE,CACd,IAAI,CAAC,aAAa,CAAC,QAAQ,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE;oBACvC,OAAO,CAAC,KAAK,CAAC,gBAAgB,QAAQ,CAAC,EAAE,SAAS,EAAE,GAAG,CAAC,CAAC;gBAC3D,CAAC,CAAC,CACH,CAAC;YACJ,CAAC;QACH,CAAC;QACD,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,IAAI,kBAAkB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACrD,MAAM,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;QAC9B,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,OAAO,CAAC,KAAK,CAAC,6BAA6B,EAAE,GAAG,CAAC,CAAC;QACpD,CAAC;QACD,IAAI,CAAC,YAAY,GAAG,KAAK,CAAC;IAC5B,CAAC;IAEO,KAAK,CAAC,YAAY,CAAC,IAAU;QACnC,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,KAAK,kBAAkB,CAAC;QACpD,MAAM,eAAe,GAAG,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;QAElE,IAAI,CAAC,UAAU,IAAI,CAAC,eAAe,EAAE,CAAC;YACpC,IAAI,CAAC,KAAK,GAAG,+CAA+C,CAAC;YAC7D,OAAO;QACT,CAAC;QAED,IAAI,CAAC,KAAK,GAAG,EAAE,CAAC;QAEhB,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,MAAM,aAAa,CAAC,IAAI,CAAC,CAAC;YAC1C,MAAM,MAAM,GAAG,eAAe,CAAC,OAAO,CAAC,CAAC;YAExC,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;gBACpB,IAAI,CAAC,KAAK,GAAG,MAAM,CAAC,KAAK,IAAI,uCAAuC,CAAC;gBACrE,OAAO;YACT,CAAC;YAED,IAAI,CAAC,SAAS,GAAG,MAAM,CAAC,SAAS,IAAI,EAAE,CAAC;QAC1C,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,IAAI,CAAC,KAAK;gBACR,GAAG,YAAY,KAAK;oBAClB,CAAC,CAAC,GAAG,CAAC,OAAO;oBACb,CAAC,CAAC,gEAAgE,CAAC;YACvE,OAAO,CAAC,KAAK,CAAC,wBAAwB,EAAE,GAAG,CAAC,CAAC;QAC/C,CAAC;IACH,CAAC;IAEO,KAAK,CAAC,qBAAqB;QACjC,IAAI,CAAC,oBAAoB,GAAG,IAAI,CAAC;QACjC,IAAI,CAAC;YACH,MAAM,WAAW,GAAG,qBAAqB,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YAE1D,gDAAgD;YAChD,MAAM,IAAI,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC,CAAC;YAEvD,YAAY,CAAC,WAAW,EAAE,iBAAiB,EAAE,kBAAkB,CAAC,CAAC;QACnE,CAAC;gBAAS,CAAC;YACT,IAAI,CAAC,oBAAoB,GAAG,KAAK,CAAC;QACpC,CAAC;IACH,CAAC;IAEO,KAAK,CAAC,uBAAuB;QACnC,IAAI,CAAC,sBAAsB,GAAG,IAAI,CAAC;QACnC,IAAI,CAAC;YACH,MAAM,UAAU,GAAG,sBAAsB,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YAE1D,gDAAgD;YAChD,MAAM,IAAI,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC,CAAC;YAEvD,YAAY,CAAC,UAAU,EAAE,kBAAkB,EAAE,UAAU,CAAC,CAAC;QAC3D,CAAC;gBAAS,CAAC;YACT,IAAI,CAAC,sBAAsB,GAAG,KAAK,CAAC;QACtC,CAAC;IACH,CAAC;IAEO,KAAK,CAAC,UAAU;QACtB,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC;QACrB,IAAI,CAAC;YACH,MAAM,OAAO,GAAG;gBACd,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;gBACnC,SAAS,EAAE,IAAI,CAAC,SAAS;aAC1B,CAAC;YACF,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YAExB,yEAAyE;YACzE,UAAU,CAAC,GAAG,EAAE;gBACd,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;oBAClB,OAAO,CAAC,IAAI,CAAC,2CAA2C,CAAC,CAAC;oBAC1D,IAAI,CAAC,QAAQ,GAAG,KAAK,CAAC;gBACxB,CAAC;YACH,CAAC,EAAE,KAAK,CAAC,CAAC;QACZ,CAAC;gBAAS,CAAC;YACT,iEAAiE;YACjE,mEAAmE;QACrE,CAAC;IACH,CAAC;IAED,MAAM;QACJ,OAAO,CACL,4DAAK,KAAK,EAAC,uBAAuB;YAChC,EAAC,mBAAmB,qDAClB,oBAAoB,EAAE,IAAI,CAAC,oBAAoB,EAC/C,sBAAsB,EAAE,IAAI,CAAC,sBAAsB,EACnD,YAAY,EAAE,IAAI,CAAC,YAAY,EAC/B,OAAO,EAAE,IAAI,CAAC,OAAO,EACrB,QAAQ,EAAE,IAAI,CAAC,QAAQ,EACvB,QAAQ,EAAE,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC,EACzC,aAAa,EAAE,GAAG,EAAE,CAAC,IAAI,CAAC,qBAAqB,EAAE,EACjD,eAAe,EAAE,GAAG,EAAE,CAAC,IAAI,CAAC,uBAAuB,EAAE,EACrD,QAAQ,EAAE,GAAG,EAAE,CAAC,IAAI,CAAC,WAAW,EAAE,EAClC,MAAM,EAAE,GAAG,EAAE,CAAC,IAAI,CAAC,UAAU,EAAE,GAC/B;YACF,EAAC,YAAY,qDAAC,OAAO,EAAE,IAAI,CAAC,KAAK,EAAE,OAAO,EAAE,GAAG,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,EAAE,CAAC,GAAI;YACvE,4DAAK,KAAK,EAAC,gCAAgC;gBACzC,EAAC,YAAY,qDACX,SAAS,EAAE,IAAI,CAAC,SAAS,EACzB,KAAK,EAAE,QAAQ,CAAC,EAAE,CAAC,IAAI,CAAC,aAAa,CAAC,QAAQ,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,EAC/D,QAAQ,EAAE,EAAE,CAAC,EAAE,CAAC,IAAI,CAAC,cAAc,CAAC,EAAE,CAAC,EACvC,aAAa,EAAE,GAAG,EAAE,CAAC,IAAI,CAAC,cAAc,EAAE,EAC1C,oBAAoB,EAAE,IAAI,CAAC,oBAAoB,EAC/C,uBAAuB,EAAE,IAAI,CAAC,2BAA2B,GACzD,CACE,CACF,CACP,CAAC;IACJ,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CACF","sourcesContent":["import {\n Component,\n State,\n Prop,\n h,\n EventEmitter,\n Event,\n Method,\n} from '@stencil/core';\nimport { EvaluationResult } from '../../lib/evaluation/types';\nimport { ErrorMessage } from '../error-message/error-message';\nimport { RateLimitedFetcher } from '../../lib/rate-limited-fetcher/rate-limited-fetcher';\nimport {\n ExpectedOutcomeSchema,\n TestCase,\n LLMRequestPayload,\n SavePayload,\n} from '../../types/llm-test-runner';\nimport { readFileAsync } from '../../lib/file/file-reader';\nimport { downloadFile } from '../../lib/file/file-download';\nimport { formatTestSuiteAsJson } from '../../lib/import-export/test-suite-exporter';\nimport { exportTestResultsToCsv } from '../../lib/import-export/test-results-csv';\nimport { importTestSuite } from '../../lib/import-export/test-suite-importer';\nimport {\n createTestCase,\n createTestCaseFromInput,\n DEFAULT_EXPECTED_OUTCOME_SCHEMA,\n} from '../../lib/test-cases/test-case-factory';\nimport * as TestCaseMutations from '../../lib/test-cases/test-case-mutations';\nimport { EvaluationService } from '../../lib/evaluation/evaluation-service';\nimport { validateTestCaseInputArray } from '../../schemas/test-case';\nimport { validateExpectedOutcomeSchema } from '../../schemas/expected-outcome';\nimport { LLMTestRunnerHeader } from './header/llm-test-runner-header';\nimport { LLMTestCases } from './test-cases/llm-test-cases';\nimport { ExpectedOutcomeChangeDetail } from './test-cases/expected-outcome-renderer';\n\n@Component({\n tag: 'llm-test-runner',\n styleUrls: [\n '../../styles/tokens.css',\n 'llm-test-runner.css',\n 'header/llm-test-runner-header.css',\n 'test-cases/llm-test-cases.css',\n 'test-cases/llm-test-case-row.css',\n 'test-cases/actions/row-actions.css',\n 'test-cases/evaluation/evaluation-summary.css',\n 'test-cases/output/response-output.css',\n '../error-message/error-message.css',\n '../../lib/ui/button/button.css',\n '../../lib/ui/icon-button/icon-button.css',\n ],\n shadow: true,\n})\nexport class LLMTestRunner {\n @Event() llmRequest: EventEmitter<LLMRequestPayload>;\n @Event() save: EventEmitter<SavePayload>;\n @Prop() delayMs?: number = 500;\n @Prop() useSave?: boolean = false;\n @Prop() initialTestCases?: TestCase[];\n @Prop() defaultExpectedOutcomeSchema?: ExpectedOutcomeSchema;\n @State() testCases: TestCase[] = [\n {\n id: '1',\n question: '',\n expectedOutcome: [\n {\n type: 'textarea',\n label: 'Expected Outcome',\n value: '',\n },\n ],\n isRunning: false,\n },\n ];\n @State() isRunningAll: boolean = false;\n @State() error: string = '';\n @State() isExportingTestSuite: boolean = false;\n @State() isExportingTestResults: boolean = false;\n @State() isSaving: boolean = false;\n\n private evaluationService: EvaluationService;\n\n private getResolvedExpectedOutcomeSchema(): ExpectedOutcomeSchema {\n if (this.defaultExpectedOutcomeSchema === undefined) {\n return DEFAULT_EXPECTED_OUTCOME_SCHEMA;\n }\n\n validateExpectedOutcomeSchema(this.defaultExpectedOutcomeSchema);\n return this.defaultExpectedOutcomeSchema;\n }\n\n componentWillLoad() {\n this.evaluationService = new EvaluationService();\n try {\n // Initialize testCases from prop if provided\n if (this.initialTestCases !== undefined) {\n validateTestCaseInputArray(this.initialTestCases);\n this.testCases = this.initialTestCases.map((rawTestCase, index) => {\n try {\n return createTestCaseFromInput(rawTestCase);\n } catch (err) {\n const message = err instanceof Error ? err.message : 'Unknown error';\n throw new Error(`Invalid initial test case at index ${index}: ${message}`);\n }\n });\n } else {\n const schema = this.getResolvedExpectedOutcomeSchema();\n this.testCases = [createTestCase(schema)];\n }\n } catch (err) {\n this.error =\n err instanceof Error\n ? err.message\n : 'Invalid defaultExpectedOutcomeSchema provided.';\n this.testCases = [];\n }\n }\n\n componentDidLoad() {}\n\n disconnectedCallback() {}\n\n @Method()\n async resetSavingState(): Promise<void> {\n this.isSaving = false;\n }\n\n private handleTestCaseChange = (\n event: CustomEvent<{ testCaseId: string; key: string; value: string }>,\n ) => {\n const { testCaseId, key, value } = event.detail;\n this.testCases = this.testCases.map(tc =>\n tc.id === testCaseId ? { ...tc, [key]: value } : tc,\n );\n };\n\n private addNewTestCase() {\n try {\n const schema = this.getResolvedExpectedOutcomeSchema();\n const newTestCase = createTestCase(schema);\n this.testCases = [...this.testCases, newTestCase];\n } catch (err) {\n this.error =\n err instanceof Error\n ? err.message\n : 'Invalid defaultExpectedOutcomeSchema provided.';\n }\n }\n\n private updateTestCase(id: string, updates: Partial<TestCase>) {\n this.testCases = this.testCases.map(tc =>\n tc.id === id ? { ...tc, ...updates } : tc,\n );\n }\n\n private async runSingleTest(testCase: TestCase): Promise<void> {\n const startTime = Date.now();\n this.updateTestCase(testCase.id, { isRunning: true });\n return new Promise<void>((resolve, reject) => {\n this.llmRequest.emit({\n prompt: testCase.question,\n resolve: async (aiResponse: string) => {\n const endTime = Date.now();\n const responseTime = endTime - startTime;\n this.updateTestCase(testCase.id, {\n isRunning: false,\n output: aiResponse,\n error: null,\n responseTime: responseTime,\n });\n\n await this.evaluateResponse({\n ...testCase,\n output: aiResponse,\n responseTime: responseTime,\n });\n resolve();\n },\n reject: (error: Error | unknown) => {\n this.updateTestCase(testCase.id, {\n isRunning: false,\n output: null,\n error: error instanceof Error ? error.message : 'Unknown error',\n });\n reject(error);\n },\n });\n });\n }\n\n private deleteTestCase(id: string) {\n this.testCases = this.testCases.filter(tc => tc.id !== id);\n }\n\n private handleExpectedOutcomeChange = (\n event: CustomEvent<ExpectedOutcomeChangeDetail>,\n ) => {\n const { testCaseId, ...change } = event.detail;\n\n this.testCases = this.testCases.map(tc => {\n if (tc.id !== testCaseId) {\n return tc;\n }\n\n return TestCaseMutations.applyExpectedOutcomeChange(tc, change);\n });\n };\n\n private async evaluateResponse(testCase: TestCase): Promise<void> {\n await this.evaluationService.evaluateTestCase(\n testCase,\n (result: EvaluationResult) => {\n this.updateTestCase(testCase.id, {\n evaluationResult: result,\n });\n },\n );\n }\n\n private async runAllTests() {\n this.isRunningAll = true;\n const tasks = [];\n for (const testCase of this.testCases) {\n if (!testCase.isRunning && testCase.question.trim()) {\n tasks.push(() =>\n this.runSingleTest(testCase).catch(err => {\n console.error(`⚠️ Test case ${testCase.id} failed`, err);\n }),\n );\n }\n }\n try {\n const fetcher = new RateLimitedFetcher(this.delayMs);\n await fetcher.runAll(tasks);\n } catch (err) {\n console.error('⚠️ Error running all tests:', err);\n }\n this.isRunningAll = false;\n }\n\n private async handleImport(file: File): Promise<void> {\n const isJsonType = file.type === 'application/json';\n const isJsonExtension = file.name.toLowerCase().endsWith('.json');\n\n if (!isJsonType && !isJsonExtension) {\n this.error = 'Invalid file type. Please select a JSON file.';\n return;\n }\n\n this.error = '';\n\n try {\n const content = await readFileAsync(file);\n const result = importTestSuite(content);\n\n if (!result.success) {\n this.error = result.error || 'Unknown error occurred during import.';\n return;\n }\n\n this.testCases = result.testCases || [];\n } catch (err) {\n this.error =\n err instanceof Error\n ? err.message\n : 'Error processing file. Please ensure it is a valid JSON array.';\n console.error('File Processing Error:', err);\n }\n }\n\n private async handleExportTestSuite() {\n this.isExportingTestSuite = true;\n try {\n const jsonContent = formatTestSuiteAsJson(this.testCases);\n\n // Added a small delay to show the loading state\n await new Promise(resolve => setTimeout(resolve, 500));\n\n downloadFile(jsonContent, 'test-suite.json', 'application/json');\n } finally {\n this.isExportingTestSuite = false;\n }\n }\n\n private async handleExportTestResults() {\n this.isExportingTestResults = true;\n try {\n const csvContent = exportTestResultsToCsv(this.testCases);\n\n // Added a small delay to show the loading state\n await new Promise(resolve => setTimeout(resolve, 500));\n\n downloadFile(csvContent, 'test-results.csv', 'text/csv');\n } finally {\n this.isExportingTestResults = false;\n }\n }\n\n private async handleSave() {\n this.isSaving = true;\n try {\n const testRun = {\n timestamp: new Date().toISOString(),\n testCases: this.testCases,\n };\n this.save.emit(testRun);\n\n // Failsafe: Auto-reset saving state after 10 seconds to prevent stuck UI\n setTimeout(() => {\n if (this.isSaving) {\n console.warn('Save operation timed out, resetting state');\n this.isSaving = false;\n }\n }, 10000);\n } finally {\n // Parent will call resetSavingState() when actual save completes\n // If not called within 10 seconds, failsafe above will reset state\n }\n }\n\n render() {\n return (\n <div class=\"test-runner-container\">\n <LLMTestRunnerHeader\n isExportingTestSuite={this.isExportingTestSuite}\n isExportingTestResults={this.isExportingTestResults}\n isRunningAll={this.isRunningAll}\n useSave={this.useSave}\n isSaving={this.isSaving}\n onImport={file => this.handleImport(file)}\n onExportSuite={() => this.handleExportTestSuite()}\n onExportResults={() => this.handleExportTestResults()}\n onRunAll={() => this.runAllTests()}\n onSave={() => this.handleSave()}\n />\n <ErrorMessage message={this.error} onClear={() => (this.error = '')} />\n <div class=\"test-runner-container__content\">\n <LLMTestCases\n testCases={this.testCases}\n onRun={testCase => this.runSingleTest(testCase).catch(() => {})}\n onDelete={id => this.deleteTestCase(id)}\n onAddTestCase={() => this.addNewTestCase()}\n handleTestCaseChange={this.handleTestCaseChange}\n onExpectedOutcomeChange={this.handleExpectedOutcomeChange}\n />\n </div>\n </div>\n );\n }\n}\n"]}
|
|
1
|
+
{"version":3,"file":"llm-test-runner.js","sourceRoot":"","sources":["../../../src/components/llm-test-runner/llm-test-runner.tsx"],"names":[],"mappings":"AAAA,OAAO,EACL,SAAS,EACT,KAAK,EACL,IAAI,EACJ,CAAC,EAED,KAAK,EACL,MAAM,GACP,MAAM,eAAe,CAAC;AAEvB,OAAO,EAAE,YAAY,EAAE,MAAM,gCAAgC,CAAC;AAC9D,OAAO,EAAE,kBAAkB,EAAE,MAAM,qDAAqD,CAAC;AAOzF,OAAO,EAAE,aAAa,EAAE,MAAM,4BAA4B,CAAC;AAC3D,OAAO,EAAE,YAAY,EAAE,MAAM,8BAA8B,CAAC;AAC5D,OAAO,EAAE,qBAAqB,EAAE,MAAM,6CAA6C,CAAC;AACpF,OAAO,EAAE,sBAAsB,EAAE,MAAM,0CAA0C,CAAC;AAClF,OAAO,EAAE,eAAe,EAAE,MAAM,6CAA6C,CAAC;AAC9E,OAAO,EACL,cAAc,EACd,uBAAuB,EACvB,+BAA+B,GAChC,MAAM,wCAAwC,CAAC;AAChD,OAAO,KAAK,iBAAiB,MAAM,0CAA0C,CAAC;AAC9E,OAAO,EAAE,iBAAiB,EAAE,MAAM,yCAAyC,CAAC;AAC5E,OAAO,EAAE,0BAA0B,EAAE,MAAM,yBAAyB,CAAC;AACrE,OAAO,EAAE,6BAA6B,EAAE,MAAM,gCAAgC,CAAC;AAC/E,OAAO,EAAE,mBAAmB,EAAE,MAAM,iCAAiC,CAAC;AACtE,OAAO,EAAE,YAAY,EAAE,MAAM,6BAA6B,CAAC;AAoB3D,MAAM,OAAO,aAAa;IACf,UAAU,CAAkC;IAC5C,IAAI,CAA4B;IACjC,OAAO,GAAY,GAAG,CAAC;IACvB,OAAO,GAAa,KAAK,CAAC;IAC1B,eAAe,GAAa,KAAK,CAAC;IAClC,gBAAgB,CAAc;IAC9B,4BAA4B,CAAyB;IACpD,SAAS,GAAe;QAC/B;YACE,EAAE,EAAE,GAAG;YACP,QAAQ,EAAE,EAAE;YACZ,eAAe,EAAE;gBACf;oBACE,IAAI,EAAE,UAAU;oBAChB,KAAK,EAAE,kBAAkB;oBACzB,KAAK,EAAE,EAAE;iBACV;aACF;YACD,SAAS,EAAE,KAAK;SACjB;KACF,CAAC;IACO,YAAY,GAAY,KAAK,CAAC;IAC9B,KAAK,GAAW,EAAE,CAAC;IACnB,oBAAoB,GAAY,KAAK,CAAC;IACtC,sBAAsB,GAAY,KAAK,CAAC;IACxC,QAAQ,GAAY,KAAK,CAAC;IAE3B,iBAAiB,CAAoB;IAErC,gCAAgC;QACtC,IAAI,IAAI,CAAC,4BAA4B,KAAK,SAAS,EAAE,CAAC;YACpD,OAAO,+BAA+B,CAAC;QACzC,CAAC;QAED,6BAA6B,CAAC,IAAI,CAAC,4BAA4B,CAAC,CAAC;QACjE,OAAO,IAAI,CAAC,4BAA4B,CAAC;IAC3C,CAAC;IAED,iBAAiB;QACf,IAAI,CAAC,iBAAiB,GAAG,IAAI,iBAAiB,EAAE,CAAC;QACjD,IAAI,CAAC;YACH,6CAA6C;YAC7C,IAAI,IAAI,CAAC,gBAAgB,KAAK,SAAS,EAAE,CAAC;gBACxC,0BAA0B,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;gBAClD,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,CAAC,WAAW,EAAE,KAAK,EAAE,EAAE;oBAChE,IAAI,CAAC;wBACH,OAAO,uBAAuB,CAAC,WAAW,CAAC,CAAC;oBAC9C,CAAC;oBAAC,OAAO,GAAG,EAAE,CAAC;wBACb,MAAM,OAAO,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC;wBACrE,MAAM,IAAI,KAAK,CAAC,sCAAsC,KAAK,KAAK,OAAO,EAAE,CAAC,CAAC;oBAC7E,CAAC;gBACH,CAAC,CAAC,CAAC;YACL,CAAC;iBAAM,CAAC;gBACN,MAAM,MAAM,GAAG,IAAI,CAAC,gCAAgC,EAAE,CAAC;gBACvD,IAAI,CAAC,SAAS,GAAG,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC;YAC5C,CAAC;QACH,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,IAAI,CAAC,KAAK;gBACR,GAAG,YAAY,KAAK;oBAClB,CAAC,CAAC,GAAG,CAAC,OAAO;oBACb,CAAC,CAAC,gDAAgD,CAAC;YACvD,IAAI,CAAC,SAAS,GAAG,EAAE,CAAC;QACtB,CAAC;IACH,CAAC;IAED,gBAAgB,KAAI,CAAC;IAErB,oBAAoB,KAAI,CAAC;IAGzB,KAAK,CAAC,gBAAgB;QACpB,IAAI,CAAC,QAAQ,GAAG,KAAK,CAAC;IACxB,CAAC;IAGD,KAAK,CAAC,YAAY;QAChB,OAAO,IAAI,CAAC,SAAS,CAAC;IACxB,CAAC;IAEO,oBAAoB,GAAG,CAC7B,KAAsE,EACtE,EAAE;QACF,MAAM,EAAE,UAAU,EAAE,GAAG,EAAE,KAAK,EAAE,GAAG,KAAK,CAAC,MAAM,CAAC;QAChD,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CACvC,EAAE,CAAC,EAAE,KAAK,UAAU,CAAC,CAAC,CAAC,EAAE,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,EAAE,CACpD,CAAC;IACJ,CAAC,CAAC;IAEM,cAAc;QACpB,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,IAAI,CAAC,gCAAgC,EAAE,CAAC;YACvD,MAAM,WAAW,GAAG,cAAc,CAAC,MAAM,CAAC,CAAC;YAC3C,IAAI,CAAC,SAAS,GAAG,CAAC,GAAG,IAAI,CAAC,SAAS,EAAE,WAAW,CAAC,CAAC;QACpD,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,IAAI,CAAC,KAAK;gBACR,GAAG,YAAY,KAAK;oBAClB,CAAC,CAAC,GAAG,CAAC,OAAO;oBACb,CAAC,CAAC,gDAAgD,CAAC;QACzD,CAAC;IACH,CAAC;IAEO,cAAc,CAAC,EAAU,EAAE,OAA0B;QAC3D,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CACvC,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,EAAE,GAAG,EAAE,EAAE,GAAG,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,CAC1C,CAAC;IACJ,CAAC;IAEO,KAAK,CAAC,aAAa,CAAC,QAAkB;QAC5C,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC7B,IAAI,CAAC,cAAc,CAAC,QAAQ,CAAC,EAAE,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QACtD,OAAO,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;YAC3C,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC;gBACnB,MAAM,EAAE,QAAQ,CAAC,QAAQ;gBACzB,OAAO,EAAE,KAAK,EAAE,UAAkB,EAAE,EAAE;oBACpC,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;oBAC3B,MAAM,YAAY,GAAG,OAAO,GAAG,SAAS,CAAC;oBACzC,IAAI,CAAC,cAAc,CAAC,QAAQ,CAAC,EAAE,EAAE;wBAC/B,SAAS,EAAE,KAAK;wBAChB,MAAM,EAAE,UAAU;wBAClB,KAAK,EAAE,IAAI;wBACX,YAAY,EAAE,YAAY;qBAC3B,CAAC,CAAC;oBAEH,MAAM,IAAI,CAAC,gBAAgB,CAAC;wBAC1B,GAAG,QAAQ;wBACX,MAAM,EAAE,UAAU;wBAClB,YAAY,EAAE,YAAY;qBAC3B,CAAC,CAAC;oBACH,OAAO,EAAE,CAAC;gBACZ,CAAC;gBACD,MAAM,EAAE,CAAC,KAAsB,EAAE,EAAE;oBACjC,IAAI,CAAC,cAAc,CAAC,QAAQ,CAAC,EAAE,EAAE;wBAC/B,SAAS,EAAE,KAAK;wBAChB,MAAM,EAAE,IAAI;wBACZ,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe;qBAChE,CAAC,CAAC;oBACH,MAAM,CAAC,KAAK,CAAC,CAAC;gBAChB,CAAC;aACF,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;IACL,CAAC;IAEO,cAAc,CAAC,EAAU;QAC/B,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC;IAC7D,CAAC;IAEO,2BAA2B,GAAG,CACpC,KAA+C,EAC/C,EAAE;QACF,MAAM,EAAE,UAAU,EAAE,GAAG,MAAM,EAAE,GAAG,KAAK,CAAC,MAAM,CAAC;QAE/C,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE;YACvC,IAAI,EAAE,CAAC,EAAE,KAAK,UAAU,EAAE,CAAC;gBACzB,OAAO,EAAE,CAAC;YACZ,CAAC;YAED,OAAO,iBAAiB,CAAC,0BAA0B,CAAC,EAAE,EAAE,MAAM,CAAC,CAAC;QAClE,CAAC,CAAC,CAAC;IACL,CAAC,CAAC;IAEM,KAAK,CAAC,gBAAgB,CAAC,QAAkB;QAC/C,MAAM,IAAI,CAAC,iBAAiB,CAAC,gBAAgB,CAC3C,QAAQ,EACR,CAAC,MAAwB,EAAE,EAAE;YAC3B,IAAI,CAAC,cAAc,CAAC,QAAQ,CAAC,EAAE,EAAE;gBAC/B,gBAAgB,EAAE,MAAM;aACzB,CAAC,CAAC;QACL,CAAC,CACF,CAAC;IACJ,CAAC;IAEO,KAAK,CAAC,WAAW;QACvB,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC;QACzB,MAAM,KAAK,GAAG,EAAE,CAAC;QACjB,KAAK,MAAM,QAAQ,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;YACtC,IAAI,CAAC,QAAQ,CAAC,SAAS,IAAI,QAAQ,CAAC,QAAQ,CAAC,IAAI,EAAE,EAAE,CAAC;gBACpD,KAAK,CAAC,IAAI,CAAC,GAAG,EAAE,CACd,IAAI,CAAC,aAAa,CAAC,QAAQ,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE;oBACvC,OAAO,CAAC,KAAK,CAAC,gBAAgB,QAAQ,CAAC,EAAE,SAAS,EAAE,GAAG,CAAC,CAAC;gBAC3D,CAAC,CAAC,CACH,CAAC;YACJ,CAAC;QACH,CAAC;QACD,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,IAAI,kBAAkB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACrD,MAAM,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;QAC9B,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,OAAO,CAAC,KAAK,CAAC,6BAA6B,EAAE,GAAG,CAAC,CAAC;QACpD,CAAC;QACD,IAAI,CAAC,YAAY,GAAG,KAAK,CAAC;IAC5B,CAAC;IAEO,KAAK,CAAC,YAAY,CAAC,IAAU;QACnC,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,KAAK,kBAAkB,CAAC;QACpD,MAAM,eAAe,GAAG,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;QAElE,IAAI,CAAC,UAAU,IAAI,CAAC,eAAe,EAAE,CAAC;YACpC,IAAI,CAAC,KAAK,GAAG,+CAA+C,CAAC;YAC7D,OAAO;QACT,CAAC;QAED,IAAI,CAAC,KAAK,GAAG,EAAE,CAAC;QAEhB,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,MAAM,aAAa,CAAC,IAAI,CAAC,CAAC;YAC1C,MAAM,MAAM,GAAG,eAAe,CAAC,OAAO,CAAC,CAAC;YAExC,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;gBACpB,IAAI,CAAC,KAAK,GAAG,MAAM,CAAC,KAAK,IAAI,uCAAuC,CAAC;gBACrE,OAAO;YACT,CAAC;YAED,IAAI,CAAC,SAAS,GAAG,MAAM,CAAC,SAAS,IAAI,EAAE,CAAC;QAC1C,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,IAAI,CAAC,KAAK;gBACR,GAAG,YAAY,KAAK;oBAClB,CAAC,CAAC,GAAG,CAAC,OAAO;oBACb,CAAC,CAAC,gEAAgE,CAAC;YACvE,OAAO,CAAC,KAAK,CAAC,wBAAwB,EAAE,GAAG,CAAC,CAAC;QAC/C,CAAC;IACH,CAAC;IAEO,KAAK,CAAC,qBAAqB;QACjC,IAAI,CAAC,oBAAoB,GAAG,IAAI,CAAC;QACjC,IAAI,CAAC;YACH,MAAM,WAAW,GAAG,qBAAqB,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YAE1D,gDAAgD;YAChD,MAAM,IAAI,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC,CAAC;YAEvD,YAAY,CAAC,WAAW,EAAE,iBAAiB,EAAE,kBAAkB,CAAC,CAAC;QACnE,CAAC;gBAAS,CAAC;YACT,IAAI,CAAC,oBAAoB,GAAG,KAAK,CAAC;QACpC,CAAC;IACH,CAAC;IAEO,KAAK,CAAC,uBAAuB;QACnC,IAAI,CAAC,sBAAsB,GAAG,IAAI,CAAC;QACnC,IAAI,CAAC;YACH,MAAM,UAAU,GAAG,sBAAsB,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YAE1D,gDAAgD;YAChD,MAAM,IAAI,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC,CAAC;YAEvD,YAAY,CAAC,UAAU,EAAE,kBAAkB,EAAE,UAAU,CAAC,CAAC;QAC3D,CAAC;gBAAS,CAAC;YACT,IAAI,CAAC,sBAAsB,GAAG,KAAK,CAAC;QACtC,CAAC;IACH,CAAC;IAEO,KAAK,CAAC,UAAU;QACtB,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC;QACrB,IAAI,CAAC;YACH,MAAM,OAAO,GAAG;gBACd,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;gBACnC,SAAS,EAAE,IAAI,CAAC,SAAS;aAC1B,CAAC;YACF,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YAExB,yEAAyE;YACzE,UAAU,CAAC,GAAG,EAAE;gBACd,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;oBAClB,OAAO,CAAC,IAAI,CAAC,2CAA2C,CAAC,CAAC;oBAC1D,IAAI,CAAC,QAAQ,GAAG,KAAK,CAAC;gBACxB,CAAC;YACH,CAAC,EAAE,KAAK,CAAC,CAAC;QACZ,CAAC;gBAAS,CAAC;YACT,iEAAiE;YACjE,mEAAmE;QACrE,CAAC;IACH,CAAC;IAED,MAAM;QACJ,OAAO,CACL,4DAAK,KAAK,EAAC,uBAAuB;YAChC,EAAC,mBAAmB,qDAClB,oBAAoB,EAAE,IAAI,CAAC,oBAAoB,EAC/C,sBAAsB,EAAE,IAAI,CAAC,sBAAsB,EACnD,YAAY,EAAE,IAAI,CAAC,YAAY,EAC/B,OAAO,EAAE,IAAI,CAAC,OAAO,EACrB,QAAQ,EAAE,IAAI,CAAC,QAAQ,EACvB,eAAe,EAAE,IAAI,CAAC,eAAe,EACrC,QAAQ,EAAE,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC,EACzC,aAAa,EAAE,GAAG,EAAE,CAAC,IAAI,CAAC,qBAAqB,EAAE,EACjD,eAAe,EAAE,GAAG,EAAE,CAAC,IAAI,CAAC,uBAAuB,EAAE,EACrD,QAAQ,EAAE,GAAG,EAAE,CAAC,IAAI,CAAC,WAAW,EAAE,EAClC,MAAM,EAAE,GAAG,EAAE,CAAC,IAAI,CAAC,UAAU,EAAE,GAC/B;YACF,EAAC,YAAY,qDAAC,OAAO,EAAE,IAAI,CAAC,KAAK,EAAE,OAAO,EAAE,GAAG,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,EAAE,CAAC,GAAI;YACvE,4DAAK,KAAK,EAAC,gCAAgC;gBACzC,EAAC,YAAY,qDACX,SAAS,EAAE,IAAI,CAAC,SAAS,EACzB,KAAK,EAAE,QAAQ,CAAC,EAAE,CAAC,IAAI,CAAC,aAAa,CAAC,QAAQ,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,EAC/D,QAAQ,EAAE,EAAE,CAAC,EAAE,CAAC,IAAI,CAAC,cAAc,CAAC,EAAE,CAAC,EACvC,aAAa,EAAE,GAAG,EAAE,CAAC,IAAI,CAAC,cAAc,EAAE,EAC1C,oBAAoB,EAAE,IAAI,CAAC,oBAAoB,EAC/C,uBAAuB,EAAE,IAAI,CAAC,2BAA2B,GACzD,CACE,CACF,CACP,CAAC;IACJ,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CACF","sourcesContent":["import {\n Component,\n State,\n Prop,\n h,\n EventEmitter,\n Event,\n Method,\n} from '@stencil/core';\nimport { EvaluationResult } from '../../lib/evaluation/types';\nimport { ErrorMessage } from '../error-message/error-message';\nimport { RateLimitedFetcher } from '../../lib/rate-limited-fetcher/rate-limited-fetcher';\nimport {\n ExpectedOutcomeSchema,\n TestCase,\n LLMRequestPayload,\n SavePayload,\n} from '../../types/llm-test-runner';\nimport { readFileAsync } from '../../lib/file/file-reader';\nimport { downloadFile } from '../../lib/file/file-download';\nimport { formatTestSuiteAsJson } from '../../lib/import-export/test-suite-exporter';\nimport { exportTestResultsToCsv } from '../../lib/import-export/test-results-csv';\nimport { importTestSuite } from '../../lib/import-export/test-suite-importer';\nimport {\n createTestCase,\n createTestCaseFromInput,\n DEFAULT_EXPECTED_OUTCOME_SCHEMA,\n} from '../../lib/test-cases/test-case-factory';\nimport * as TestCaseMutations from '../../lib/test-cases/test-case-mutations';\nimport { EvaluationService } from '../../lib/evaluation/evaluation-service';\nimport { validateTestCaseInputArray } from '../../schemas/test-case';\nimport { validateExpectedOutcomeSchema } from '../../schemas/expected-outcome';\nimport { LLMTestRunnerHeader } from './header/llm-test-runner-header';\nimport { LLMTestCases } from './test-cases/llm-test-cases';\nimport { ExpectedOutcomeChangeDetail } from './test-cases/expected-outcome-renderer';\n\n@Component({\n tag: 'llm-test-runner',\n styleUrls: [\n '../../styles/tokens.css',\n 'llm-test-runner.css',\n 'header/llm-test-runner-header.css',\n 'test-cases/llm-test-cases.css',\n 'test-cases/llm-test-case-row.css',\n 'test-cases/actions/row-actions.css',\n 'test-cases/evaluation/evaluation-summary.css',\n 'test-cases/output/response-output.css',\n '../error-message/error-message.css',\n '../../lib/ui/button/button.css',\n '../../lib/ui/icon-button/icon-button.css',\n ],\n shadow: true,\n})\nexport class LLMTestRunner {\n @Event() llmRequest: EventEmitter<LLMRequestPayload>;\n @Event() save: EventEmitter<SavePayload>;\n @Prop() delayMs?: number = 500;\n @Prop() useSave?: boolean = false;\n @Prop() usePromptEditor?: boolean = false;\n @Prop() initialTestCases?: TestCase[];\n @Prop() defaultExpectedOutcomeSchema?: ExpectedOutcomeSchema;\n @State() testCases: TestCase[] = [\n {\n id: '1',\n question: '',\n expectedOutcome: [\n {\n type: 'textarea',\n label: 'Expected Outcome',\n value: '',\n },\n ],\n isRunning: false,\n },\n ];\n @State() isRunningAll: boolean = false;\n @State() error: string = '';\n @State() isExportingTestSuite: boolean = false;\n @State() isExportingTestResults: boolean = false;\n @State() isSaving: boolean = false;\n\n private evaluationService: EvaluationService;\n\n private getResolvedExpectedOutcomeSchema(): ExpectedOutcomeSchema {\n if (this.defaultExpectedOutcomeSchema === undefined) {\n return DEFAULT_EXPECTED_OUTCOME_SCHEMA;\n }\n\n validateExpectedOutcomeSchema(this.defaultExpectedOutcomeSchema);\n return this.defaultExpectedOutcomeSchema;\n }\n\n componentWillLoad() {\n this.evaluationService = new EvaluationService();\n try {\n // Initialize testCases from prop if provided\n if (this.initialTestCases !== undefined) {\n validateTestCaseInputArray(this.initialTestCases);\n this.testCases = this.initialTestCases.map((rawTestCase, index) => {\n try {\n return createTestCaseFromInput(rawTestCase);\n } catch (err) {\n const message = err instanceof Error ? err.message : 'Unknown error';\n throw new Error(`Invalid initial test case at index ${index}: ${message}`);\n }\n });\n } else {\n const schema = this.getResolvedExpectedOutcomeSchema();\n this.testCases = [createTestCase(schema)];\n }\n } catch (err) {\n this.error =\n err instanceof Error\n ? err.message\n : 'Invalid defaultExpectedOutcomeSchema provided.';\n this.testCases = [];\n }\n }\n\n componentDidLoad() {}\n\n disconnectedCallback() {}\n\n @Method()\n async resetSavingState(): Promise<void> {\n this.isSaving = false;\n }\n\n @Method()\n async getTestCases(): Promise<TestCase[]> {\n return this.testCases;\n }\n\n private handleTestCaseChange = (\n event: CustomEvent<{ testCaseId: string; key: string; value: string }>,\n ) => {\n const { testCaseId, key, value } = event.detail;\n this.testCases = this.testCases.map(tc =>\n tc.id === testCaseId ? { ...tc, [key]: value } : tc,\n );\n };\n\n private addNewTestCase() {\n try {\n const schema = this.getResolvedExpectedOutcomeSchema();\n const newTestCase = createTestCase(schema);\n this.testCases = [...this.testCases, newTestCase];\n } catch (err) {\n this.error =\n err instanceof Error\n ? err.message\n : 'Invalid defaultExpectedOutcomeSchema provided.';\n }\n }\n\n private updateTestCase(id: string, updates: Partial<TestCase>) {\n this.testCases = this.testCases.map(tc =>\n tc.id === id ? { ...tc, ...updates } : tc,\n );\n }\n\n private async runSingleTest(testCase: TestCase): Promise<void> {\n const startTime = Date.now();\n this.updateTestCase(testCase.id, { isRunning: true });\n return new Promise<void>((resolve, reject) => {\n this.llmRequest.emit({\n prompt: testCase.question,\n resolve: async (aiResponse: string) => {\n const endTime = Date.now();\n const responseTime = endTime - startTime;\n this.updateTestCase(testCase.id, {\n isRunning: false,\n output: aiResponse,\n error: null,\n responseTime: responseTime,\n });\n\n await this.evaluateResponse({\n ...testCase,\n output: aiResponse,\n responseTime: responseTime,\n });\n resolve();\n },\n reject: (error: Error | unknown) => {\n this.updateTestCase(testCase.id, {\n isRunning: false,\n output: null,\n error: error instanceof Error ? error.message : 'Unknown error',\n });\n reject(error);\n },\n });\n });\n }\n\n private deleteTestCase(id: string) {\n this.testCases = this.testCases.filter(tc => tc.id !== id);\n }\n\n private handleExpectedOutcomeChange = (\n event: CustomEvent<ExpectedOutcomeChangeDetail>,\n ) => {\n const { testCaseId, ...change } = event.detail;\n\n this.testCases = this.testCases.map(tc => {\n if (tc.id !== testCaseId) {\n return tc;\n }\n\n return TestCaseMutations.applyExpectedOutcomeChange(tc, change);\n });\n };\n\n private async evaluateResponse(testCase: TestCase): Promise<void> {\n await this.evaluationService.evaluateTestCase(\n testCase,\n (result: EvaluationResult) => {\n this.updateTestCase(testCase.id, {\n evaluationResult: result,\n });\n },\n );\n }\n\n private async runAllTests() {\n this.isRunningAll = true;\n const tasks = [];\n for (const testCase of this.testCases) {\n if (!testCase.isRunning && testCase.question.trim()) {\n tasks.push(() =>\n this.runSingleTest(testCase).catch(err => {\n console.error(`⚠️ Test case ${testCase.id} failed`, err);\n }),\n );\n }\n }\n try {\n const fetcher = new RateLimitedFetcher(this.delayMs);\n await fetcher.runAll(tasks);\n } catch (err) {\n console.error('⚠️ Error running all tests:', err);\n }\n this.isRunningAll = false;\n }\n\n private async handleImport(file: File): Promise<void> {\n const isJsonType = file.type === 'application/json';\n const isJsonExtension = file.name.toLowerCase().endsWith('.json');\n\n if (!isJsonType && !isJsonExtension) {\n this.error = 'Invalid file type. Please select a JSON file.';\n return;\n }\n\n this.error = '';\n\n try {\n const content = await readFileAsync(file);\n const result = importTestSuite(content);\n\n if (!result.success) {\n this.error = result.error || 'Unknown error occurred during import.';\n return;\n }\n\n this.testCases = result.testCases || [];\n } catch (err) {\n this.error =\n err instanceof Error\n ? err.message\n : 'Error processing file. Please ensure it is a valid JSON array.';\n console.error('File Processing Error:', err);\n }\n }\n\n private async handleExportTestSuite() {\n this.isExportingTestSuite = true;\n try {\n const jsonContent = formatTestSuiteAsJson(this.testCases);\n\n // Added a small delay to show the loading state\n await new Promise(resolve => setTimeout(resolve, 500));\n\n downloadFile(jsonContent, 'test-suite.json', 'application/json');\n } finally {\n this.isExportingTestSuite = false;\n }\n }\n\n private async handleExportTestResults() {\n this.isExportingTestResults = true;\n try {\n const csvContent = exportTestResultsToCsv(this.testCases);\n\n // Added a small delay to show the loading state\n await new Promise(resolve => setTimeout(resolve, 500));\n\n downloadFile(csvContent, 'test-results.csv', 'text/csv');\n } finally {\n this.isExportingTestResults = false;\n }\n }\n\n private async handleSave() {\n this.isSaving = true;\n try {\n const testRun = {\n timestamp: new Date().toISOString(),\n testCases: this.testCases,\n };\n this.save.emit(testRun);\n\n // Failsafe: Auto-reset saving state after 10 seconds to prevent stuck UI\n setTimeout(() => {\n if (this.isSaving) {\n console.warn('Save operation timed out, resetting state');\n this.isSaving = false;\n }\n }, 10000);\n } finally {\n // Parent will call resetSavingState() when actual save completes\n // If not called within 10 seconds, failsafe above will reset state\n }\n }\n\n render() {\n return (\n <div class=\"test-runner-container\">\n <LLMTestRunnerHeader\n isExportingTestSuite={this.isExportingTestSuite}\n isExportingTestResults={this.isExportingTestResults}\n isRunningAll={this.isRunningAll}\n useSave={this.useSave}\n isSaving={this.isSaving}\n usePromptEditor={this.usePromptEditor}\n onImport={file => this.handleImport(file)}\n onExportSuite={() => this.handleExportTestSuite()}\n onExportResults={() => this.handleExportTestResults()}\n onRunAll={() => this.runAllTests()}\n onSave={() => this.handleSave()}\n />\n <ErrorMessage message={this.error} onClear={() => (this.error = '')} />\n <div class=\"test-runner-container__content\">\n <LLMTestCases\n testCases={this.testCases}\n onRun={testCase => this.runSingleTest(testCase).catch(() => {})}\n onDelete={id => this.deleteTestCase(id)}\n onAddTestCase={() => this.addNewTestCase()}\n handleTestCaseChange={this.handleTestCaseChange}\n onExpectedOutcomeChange={this.handleExpectedOutcomeChange}\n />\n </div>\n </div>\n );\n }\n}\n"]}
|
|
@@ -16,6 +16,7 @@ export class SemanticEvaluator {
|
|
|
16
16
|
}
|
|
17
17
|
}
|
|
18
18
|
async performEvaluation(request) {
|
|
19
|
+
const threshold = request.evaluationParameters?.threshold ?? DEFAULT_SEMANTIC_PASS_SCORE;
|
|
19
20
|
try {
|
|
20
21
|
await this.initialize();
|
|
21
22
|
// Split expectedOutcome by newlines to create keywords array
|
|
@@ -25,7 +26,7 @@ export class SemanticEvaluator {
|
|
|
25
26
|
.map(k => k.trim())
|
|
26
27
|
.filter(k => k.length > 0)
|
|
27
28
|
: [];
|
|
28
|
-
const keywordMatches = await evaluateKeywordsSemantically(SemanticEvaluator.extractor, request.actualResponse, expectedKeywords,
|
|
29
|
+
const keywordMatches = await evaluateKeywordsSemantically(SemanticEvaluator.extractor, request.actualResponse, expectedKeywords, threshold);
|
|
29
30
|
const totalItems = keywordMatches.length;
|
|
30
31
|
// calculate the overall score by averaging the score of the keyword matches
|
|
31
32
|
const keywordScore = keywordMatches.reduce((acc, curr) => acc + curr.evaluationApproachResult.score, 0);
|
|
@@ -33,7 +34,7 @@ export class SemanticEvaluator {
|
|
|
33
34
|
const passed = keywordMatches.every(match => match.found);
|
|
34
35
|
const evaluationParameters = {
|
|
35
36
|
approach: EvaluationApproach.SEMANTIC,
|
|
36
|
-
threshold
|
|
37
|
+
threshold,
|
|
37
38
|
};
|
|
38
39
|
return {
|
|
39
40
|
testCaseId: request.testCaseId,
|
|
@@ -55,7 +56,7 @@ export class SemanticEvaluator {
|
|
|
55
56
|
keywordMatches: [],
|
|
56
57
|
evaluationParameters: {
|
|
57
58
|
approach: EvaluationApproach.SEMANTIC,
|
|
58
|
-
threshold
|
|
59
|
+
threshold,
|
|
59
60
|
},
|
|
60
61
|
evaluationApproachResult: {
|
|
61
62
|
score: 0,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"SemanticEvaluator.js","sourceRoot":"","sources":["../../../../../src/lib/evaluation/evaluators/semantic/SemanticEvaluator.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,iBAAiB,EAAE,MAAM,gBAAgB,CAAC;AACnD,OAAO,EAAE,4BAA4B,EAAE,MAAM,qBAAqB,CAAC;AAGnE,OAAO,EACL,2BAA2B,EAC3B,kBAAkB,GACnB,MAAM,iBAAiB,CAAC;AAEzB,MAAM,OAAO,iBAAiB;IAC5B,qEAAqE;IAC7D,MAAM,CAAC,SAAS,GAA8B,IAAI,CAAC;IAE3D,KAAK,CAAC,UAAU;QACd,IAAI,iBAAiB,CAAC,SAAS;YAAE,OAAO;QACxC,IAAI,CAAC;YACH,iBAAiB,CAAC,SAAS,GAAG,MAAM,iBAAiB,EAAE,CAAC;QAC1D,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,KAAK,CAAC,2CAA2C,EAAE,KAAK,CAAC,CAAC;YAClE,MAAM,KAAK,CAAC;QACd,CAAC;IACH,CAAC;IAED,KAAK,CAAC,iBAAiB,CACrB,OAA0B;QAE1B,IAAI,CAAC;YACH,MAAM,IAAI,CAAC,UAAU,EAAE,CAAC;YAExB,6DAA6D;YAC7D,MAAM,gBAAgB,GAAG,OAAO,CAAC,eAAe;gBAC9C,CAAC,CAAC,OAAO,CAAC,eAAe;qBACpB,KAAK,CAAC,QAAQ,CAAC;qBACf,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;qBAClB,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;gBAC9B,CAAC,CAAC,EAAE,CAAC;YAEP,MAAM,cAAc,GAAG,MAAM,4BAA4B,CACvD,iBAAiB,CAAC,SAAS,EAC3B,OAAO,CAAC,cAAc,EACtB,gBAAgB,EAChB,
|
|
1
|
+
{"version":3,"file":"SemanticEvaluator.js","sourceRoot":"","sources":["../../../../../src/lib/evaluation/evaluators/semantic/SemanticEvaluator.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,iBAAiB,EAAE,MAAM,gBAAgB,CAAC;AACnD,OAAO,EAAE,4BAA4B,EAAE,MAAM,qBAAqB,CAAC;AAGnE,OAAO,EACL,2BAA2B,EAC3B,kBAAkB,GACnB,MAAM,iBAAiB,CAAC;AAEzB,MAAM,OAAO,iBAAiB;IAC5B,qEAAqE;IAC7D,MAAM,CAAC,SAAS,GAA8B,IAAI,CAAC;IAE3D,KAAK,CAAC,UAAU;QACd,IAAI,iBAAiB,CAAC,SAAS;YAAE,OAAO;QACxC,IAAI,CAAC;YACH,iBAAiB,CAAC,SAAS,GAAG,MAAM,iBAAiB,EAAE,CAAC;QAC1D,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,KAAK,CAAC,2CAA2C,EAAE,KAAK,CAAC,CAAC;YAClE,MAAM,KAAK,CAAC;QACd,CAAC;IACH,CAAC;IAED,KAAK,CAAC,iBAAiB,CACrB,OAA0B;QAE1B,MAAM,SAAS,GACb,OAAO,CAAC,oBAAoB,EAAE,SAAS,IAAI,2BAA2B,CAAC;QAEzE,IAAI,CAAC;YACH,MAAM,IAAI,CAAC,UAAU,EAAE,CAAC;YAExB,6DAA6D;YAC7D,MAAM,gBAAgB,GAAG,OAAO,CAAC,eAAe;gBAC9C,CAAC,CAAC,OAAO,CAAC,eAAe;qBACpB,KAAK,CAAC,QAAQ,CAAC;qBACf,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;qBAClB,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;gBAC9B,CAAC,CAAC,EAAE,CAAC;YAEP,MAAM,cAAc,GAAG,MAAM,4BAA4B,CACvD,iBAAiB,CAAC,SAAS,EAC3B,OAAO,CAAC,cAAc,EACtB,gBAAgB,EAChB,SAAS,CACV,CAAC;YAEF,MAAM,UAAU,GAAG,cAAc,CAAC,MAAM,CAAC;YACzC,4EAA4E;YAC5E,MAAM,YAAY,GAAG,cAAc,CAAC,MAAM,CACxC,CAAC,GAAG,EAAE,IAAI,EAAE,EAAE,CAAC,GAAG,GAAG,IAAI,CAAC,wBAAwB,CAAC,KAAK,EACxD,CAAC,CACF,CAAC;YACF,MAAM,YAAY,GAAG,UAAU,GAAG,CAAC,CAAC,CAAC,CAAC,YAAY,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,4BAA4B;YACjG,MAAM,MAAM,GAAG,cAAc,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;YAE1D,MAAM,oBAAoB,GAAG;gBAC3B,QAAQ,EAAE,kBAAkB,CAAC,QAAQ;gBACrC,SAAS;aACc,CAAC;YAE1B,OAAO;gBACL,UAAU,EAAE,OAAO,CAAC,UAAU;gBAC9B,MAAM;gBACN,cAAc;gBACd,oBAAoB;gBACpB,wBAAwB,EAAE;oBACxB,KAAK,EAAE,YAAY;oBACnB,YAAY,EAAE,kBAAkB,CAAC,QAAQ;iBAC1C;gBACD,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;aACpC,CAAC;QACJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,KAAK,CAAC,wCAAwC,EAAE,KAAK,CAAC,CAAC;YAC/D,OAAO;gBACL,UAAU,EAAE,OAAO,CAAC,UAAU;gBAC9B,MAAM,EAAE,KAAK;gBACb,cAAc,EAAE,EAAE;gBAClB,oBAAoB,EAAE;oBACpB,QAAQ,EAAE,kBAAkB,CAAC,QAAQ;oBACrC,SAAS;iBACV;gBACD,wBAAwB,EAAE;oBACxB,KAAK,EAAE,CAAC;oBACR,YAAY,EAAE,kBAAkB,CAAC,QAAQ;iBAC1C;gBACD,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;aACpC,CAAC;QACJ,CAAC;IACH,CAAC","sourcesContent":["import { EvaluationResult, EvaluationRequest } from '../../types';\nimport { loadSemanticModel } from './model-loader';\nimport { evaluateKeywordsSemantically } from './evaluate-keywords';\nimport { FeatureExtractionPipeline } from '@xenova/transformers';\nimport { EvaluationParameters } from '../../../../types/evaluation';\nimport {\n DEFAULT_SEMANTIC_PASS_SCORE,\n EvaluationApproach,\n} from '../../constants';\n\nexport class SemanticEvaluator {\n // TODO(LLM-39): Refactor SemanticEvaluator into a singleton pattern.\n private static extractor: FeatureExtractionPipeline = null;\n\n async initialize(): Promise<void> {\n if (SemanticEvaluator.extractor) return;\n try {\n SemanticEvaluator.extractor = await loadSemanticModel();\n } catch (error) {\n console.error('Failed to load semantic evaluation model:', error);\n throw error;\n }\n }\n\n async performEvaluation(\n request: EvaluationRequest,\n ): Promise<EvaluationResult> {\n const threshold =\n request.evaluationParameters?.threshold ?? DEFAULT_SEMANTIC_PASS_SCORE;\n\n try {\n await this.initialize();\n\n // Split expectedOutcome by newlines to create keywords array\n const expectedKeywords = request.expectedOutcome\n ? request.expectedOutcome\n .split(/[\\n,]+/)\n .map(k => k.trim())\n .filter(k => k.length > 0)\n : [];\n\n const keywordMatches = await evaluateKeywordsSemantically(\n SemanticEvaluator.extractor,\n request.actualResponse,\n expectedKeywords,\n threshold,\n );\n\n const totalItems = keywordMatches.length;\n // calculate the overall score by averaging the score of the keyword matches\n const keywordScore = keywordMatches.reduce(\n (acc, curr) => acc + curr.evaluationApproachResult.score,\n 0,\n );\n const overallScore = totalItems > 0 ? keywordScore / totalItems : 0; // to avoid division by zero\n const passed = keywordMatches.every(match => match.found);\n\n const evaluationParameters = {\n approach: EvaluationApproach.SEMANTIC,\n threshold,\n } as EvaluationParameters;\n\n return {\n testCaseId: request.testCaseId,\n passed,\n keywordMatches,\n evaluationParameters,\n evaluationApproachResult: {\n score: overallScore,\n approachUsed: EvaluationApproach.SEMANTIC,\n },\n timestamp: new Date().toISOString(),\n };\n } catch (error) {\n console.error('Failed to perform semantic evaluation:', error);\n return {\n testCaseId: request.testCaseId,\n passed: false,\n keywordMatches: [],\n evaluationParameters: {\n approach: EvaluationApproach.SEMANTIC,\n threshold,\n },\n evaluationApproachResult: {\n score: 0,\n approachUsed: EvaluationApproach.SEMANTIC,\n },\n timestamp: new Date().toISOString(),\n };\n }\n }\n}\n"]}
|
|
@@ -58,7 +58,7 @@ function createExpectedOutcomeFieldFromSchema(schemaField) {
|
|
|
58
58
|
type: 'select',
|
|
59
59
|
label: schemaField.label,
|
|
60
60
|
placeholder: schemaField.placeholder,
|
|
61
|
-
value:
|
|
61
|
+
value: schemaField.options[0],
|
|
62
62
|
options: schemaField.options,
|
|
63
63
|
evaluationParameters: normalizeEvaluationParametersForField(schemaField.type, schemaField.evaluationParameters),
|
|
64
64
|
};
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"test-case-factory.js","sourceRoot":"","sources":["../../../src/lib/test-cases/test-case-factory.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,EAAE,IAAI,MAAM,EAAE,MAAM,MAAM,CAAC;AASpC,OAAO,EAAE,qCAAqC,EAAE,MAAM,yCAAyC,CAAC;AAEhG,MAAM,CAAC,MAAM,+BAA+B,GAA0B;IACpE;QACE,IAAI,EAAE,UAAU;QAChB,KAAK,EAAE,kBAAkB;QACzB,WAAW,EAAE,2BAA2B;QACxC,IAAI,EAAE,CAAC;KACR;CACF,CAAC;AAEF,SAAS,6BAA6B,CACpC,KAA2B;IAE3B,OAAO;QACL,GAAG,KAAK;QACR,oBAAoB,EAAE,qCAAqC,CACzD,KAAK,CAAC,IAAI,EACV,KAAK,CAAC,oBAAoB,CAC3B;KACF,CAAC;AACJ,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,cAAc,CAC5B,wBAA+C,+BAA+B;IAE9E,OAAO;QACL,EAAE,EAAE,MAAM,EAAE;QACZ,QAAQ,EAAE,EAAE;QACZ,eAAe,EAAE,+BAA+B,CAAC,qBAAqB,CAAC;QACvE,SAAS,EAAE,KAAK;KACjB,CAAC;AACJ,CAAC;AAED,SAAS,oCAAoC,CAC3C,WAAuC;IAEvC,QAAQ,WAAW,CAAC,IAAI,EAAE,CAAC;QACzB,KAAK,MAAM;YACT,OAAO;gBACL,IAAI,EAAE,MAAM;gBACZ,KAAK,EAAE,WAAW,CAAC,KAAK;gBACxB,WAAW,EAAE,WAAW,CAAC,WAAW;gBACpC,KAAK,EAAE,EAAE;gBACT,oBAAoB,EAAE,qCAAqC,CACzD,WAAW,CAAC,IAAI,EAChB,WAAW,CAAC,oBAAoB,CACjC;aACF,CAAC;QAEJ,KAAK,UAAU;YACb,OAAO;gBACL,IAAI,EAAE,UAAU;gBAChB,KAAK,EAAE,WAAW,CAAC,KAAK;gBACxB,WAAW,EAAE,WAAW,CAAC,WAAW;gBACpC,IAAI,EAAE,WAAW,CAAC,IAAI;gBACtB,KAAK,EAAE,EAAE;gBACT,oBAAoB,EAAE,qCAAqC,CACzD,WAAW,CAAC,IAAI,EAChB,WAAW,CAAC,oBAAoB,CACjC;aACF,CAAC;QAEJ,KAAK,aAAa;YAChB,OAAO;gBACL,IAAI,EAAE,aAAa;gBACnB,KAAK,EAAE,WAAW,CAAC,KAAK;gBACxB,WAAW,EAAE,WAAW,CAAC,WAAW;gBACpC,KAAK,EAAE,EAAE;gBACT,oBAAoB,EAAE,qCAAqC,CACzD,WAAW,CAAC,IAAI,EAChB,WAAW,CAAC,oBAAoB,CACjC;aACF,CAAC;QAEJ,KAAK,QAAQ;YACX,OAAO;gBACL,IAAI,EAAE,QAAQ;gBACd,KAAK,EAAE,WAAW,CAAC,KAAK;gBACxB,WAAW,EAAE,WAAW,CAAC,WAAW;gBACpC,KAAK,EAAE,
|
|
1
|
+
{"version":3,"file":"test-case-factory.js","sourceRoot":"","sources":["../../../src/lib/test-cases/test-case-factory.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,EAAE,IAAI,MAAM,EAAE,MAAM,MAAM,CAAC;AASpC,OAAO,EAAE,qCAAqC,EAAE,MAAM,yCAAyC,CAAC;AAEhG,MAAM,CAAC,MAAM,+BAA+B,GAA0B;IACpE;QACE,IAAI,EAAE,UAAU;QAChB,KAAK,EAAE,kBAAkB;QACzB,WAAW,EAAE,2BAA2B;QACxC,IAAI,EAAE,CAAC;KACR;CACF,CAAC;AAEF,SAAS,6BAA6B,CACpC,KAA2B;IAE3B,OAAO;QACL,GAAG,KAAK;QACR,oBAAoB,EAAE,qCAAqC,CACzD,KAAK,CAAC,IAAI,EACV,KAAK,CAAC,oBAAoB,CAC3B;KACF,CAAC;AACJ,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,cAAc,CAC5B,wBAA+C,+BAA+B;IAE9E,OAAO;QACL,EAAE,EAAE,MAAM,EAAE;QACZ,QAAQ,EAAE,EAAE;QACZ,eAAe,EAAE,+BAA+B,CAAC,qBAAqB,CAAC;QACvE,SAAS,EAAE,KAAK;KACjB,CAAC;AACJ,CAAC;AAED,SAAS,oCAAoC,CAC3C,WAAuC;IAEvC,QAAQ,WAAW,CAAC,IAAI,EAAE,CAAC;QACzB,KAAK,MAAM;YACT,OAAO;gBACL,IAAI,EAAE,MAAM;gBACZ,KAAK,EAAE,WAAW,CAAC,KAAK;gBACxB,WAAW,EAAE,WAAW,CAAC,WAAW;gBACpC,KAAK,EAAE,EAAE;gBACT,oBAAoB,EAAE,qCAAqC,CACzD,WAAW,CAAC,IAAI,EAChB,WAAW,CAAC,oBAAoB,CACjC;aACF,CAAC;QAEJ,KAAK,UAAU;YACb,OAAO;gBACL,IAAI,EAAE,UAAU;gBAChB,KAAK,EAAE,WAAW,CAAC,KAAK;gBACxB,WAAW,EAAE,WAAW,CAAC,WAAW;gBACpC,IAAI,EAAE,WAAW,CAAC,IAAI;gBACtB,KAAK,EAAE,EAAE;gBACT,oBAAoB,EAAE,qCAAqC,CACzD,WAAW,CAAC,IAAI,EAChB,WAAW,CAAC,oBAAoB,CACjC;aACF,CAAC;QAEJ,KAAK,aAAa;YAChB,OAAO;gBACL,IAAI,EAAE,aAAa;gBACnB,KAAK,EAAE,WAAW,CAAC,KAAK;gBACxB,WAAW,EAAE,WAAW,CAAC,WAAW;gBACpC,KAAK,EAAE,EAAE;gBACT,oBAAoB,EAAE,qCAAqC,CACzD,WAAW,CAAC,IAAI,EAChB,WAAW,CAAC,oBAAoB,CACjC;aACF,CAAC;QAEJ,KAAK,QAAQ;YACX,OAAO;gBACL,IAAI,EAAE,QAAQ;gBACd,KAAK,EAAE,WAAW,CAAC,KAAK;gBACxB,WAAW,EAAE,WAAW,CAAC,WAAW;gBACpC,KAAK,EAAE,WAAW,CAAC,OAAO,CAAC,CAAC,CAAC;gBAC7B,OAAO,EAAE,WAAW,CAAC,OAAO;gBAC5B,oBAAoB,EAAE,qCAAqC,CACzD,WAAW,CAAC,IAAI,EAChB,WAAW,CAAC,oBAAoB,CAC6B;aAChE,CAAC;QAEJ,OAAO,CAAC,CAAC,CAAC;YACR,MAAM,gBAAgB,GAAU,WAAW,CAAC;YAC5C,OAAO,gBAAgB,CAAC;QAC1B,CAAC;IACH,CAAC;AACH,CAAC;AAED,MAAM,UAAU,+BAA+B,CAC7C,qBAA4C;IAE5C,OAAO,qBAAqB,CAAC,GAAG,CAAC,oCAAoC,CAAC,CAAC;AACzE,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,uBAAuB,CAAC,IAAmB;IACzD,OAAO;QACL,GAAG,IAAI;QACP,eAAe,EAAE,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC,6BAA6B,CAAC;KACzE,CAAC;AACJ,CAAC","sourcesContent":["import { v4 as uuidv4 } from 'uuid';\nimport {\n ExpectedOutcomeField,\n ExpectedOutcomeSchema,\n ExpectedOutcomeSchemaField,\n TestCase,\n TestCaseInput,\n} from '../../types/llm-test-runner';\nimport { EvaluationApproach } from '../evaluation/constants';\nimport { normalizeEvaluationParametersForField } from '../evaluation/field-evaluation-approach';\n\nexport const DEFAULT_EXPECTED_OUTCOME_SCHEMA: ExpectedOutcomeSchema = [\n {\n type: 'textarea',\n label: 'Expected Outcome',\n placeholder: 'Enter expected outcome...',\n rows: 2,\n },\n];\n\nfunction normalizeExpectedOutcomeField(\n field: ExpectedOutcomeField,\n): ExpectedOutcomeField {\n return {\n ...field,\n evaluationParameters: normalizeEvaluationParametersForField(\n field.type,\n field.evaluationParameters,\n ),\n };\n}\n\n/**\n * Creates a new test case with default values\n * @returns A new TestCase object with a unique ID\n */\nexport function createTestCase(\n expectedOutcomeSchema: ExpectedOutcomeSchema = DEFAULT_EXPECTED_OUTCOME_SCHEMA,\n): TestCase {\n return {\n id: uuidv4(),\n question: '',\n expectedOutcome: createExpectedOutcomeFromSchema(expectedOutcomeSchema),\n isRunning: false,\n };\n}\n\nfunction createExpectedOutcomeFieldFromSchema(\n schemaField: ExpectedOutcomeSchemaField,\n): ExpectedOutcomeField {\n switch (schemaField.type) {\n case 'text':\n return {\n type: 'text',\n label: schemaField.label,\n placeholder: schemaField.placeholder,\n value: '',\n evaluationParameters: normalizeEvaluationParametersForField(\n schemaField.type,\n schemaField.evaluationParameters,\n ),\n };\n\n case 'textarea':\n return {\n type: 'textarea',\n label: schemaField.label,\n placeholder: schemaField.placeholder,\n rows: schemaField.rows,\n value: '',\n evaluationParameters: normalizeEvaluationParametersForField(\n schemaField.type,\n schemaField.evaluationParameters,\n ),\n };\n\n case 'chips-input':\n return {\n type: 'chips-input',\n label: schemaField.label,\n placeholder: schemaField.placeholder,\n value: [],\n evaluationParameters: normalizeEvaluationParametersForField(\n schemaField.type,\n schemaField.evaluationParameters,\n ),\n };\n\n case 'select':\n return {\n type: 'select',\n label: schemaField.label,\n placeholder: schemaField.placeholder,\n value: schemaField.options[0],\n options: schemaField.options,\n evaluationParameters: normalizeEvaluationParametersForField(\n schemaField.type,\n schemaField.evaluationParameters,\n ) as { approach: EvaluationApproach.EXACT; threshold?: number },\n };\n\n default: {\n const _exhaustiveCheck: never = schemaField;\n return _exhaustiveCheck;\n }\n }\n}\n\nexport function createExpectedOutcomeFromSchema(\n expectedOutcomeSchema: ExpectedOutcomeSchema,\n): ExpectedOutcomeField[] {\n return expectedOutcomeSchema.map(createExpectedOutcomeFieldFromSchema);\n}\n\n/**\n * Creates a runtime test case from validated input data.\n * The input is expected to already satisfy `TestCaseInput`,\n * and this function only performs normalization/defaulting.\n *\n * @param data - Validated test case input\n * @returns A normalized TestCase object with runtime defaults applied\n */\nexport function createTestCaseFromInput(data: TestCaseInput): TestCase {\n return {\n ...data,\n expectedOutcome: data.expectedOutcome.map(normalizeExpectedOutcomeField),\n };\n}\n"]}
|
package/dist/components/index.js
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import{G as t}from"./p-D9BrlHdP.js";export{g as getAssetPath,r as render,s as setAssetPath,a as setNonce,b as setPlatformOptions}from"./p-D9BrlHdP.js";export{L as LLMTestRunner}from"./p-
|
|
1
|
+
import{G as t}from"./p-D9BrlHdP.js";export{g as getAssetPath,r as render,s as setAssetPath,a as setNonce,b as setPlatformOptions}from"./p-D9BrlHdP.js";export{L as LLMTestRunner}from"./p-CJBscebi.js";class e{sdk;constructor(s){this.sdk=new t({apiKey:s})}async invoke(s){const t=await this.sdk.models.generateContent({model:"gemini-3-flash-preview",contents:s});return t.text}}function n(){window.env={API_KEY:""};window.GeminiAdapter=e}const o=n||(()=>{});const i=o;i();
|
|
2
2
|
//# sourceMappingURL=index.js.map
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import{L as
|
|
1
|
+
import{L as s,d as o}from"./p-CJBscebi.js";const t=s;const c=o;export{t as LlmTestRunner,c as defineCustomElement};
|
|
2
2
|
//# sourceMappingURL=llm-test-runner.js.map
|