llm-testrunner-components 1.2.0 → 1.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/index.cjs.js +12 -7
- package/dist/cjs/index.cjs.js.map +1 -1
- package/dist/cjs/llm-testrunner.cjs.js +1 -1
- package/dist/cjs/loader.cjs.js +1 -1
- package/dist/collection/components/llm-test-runner/header/llm-test-runner-header.js +2 -2
- package/dist/collection/components/llm-test-runner/header/llm-test-runner-header.js.map +1 -1
- package/dist/collection/components/llm-test-runner/llm-test-runner.js +54 -1
- package/dist/collection/components/llm-test-runner/llm-test-runner.js.map +1 -1
- package/dist/collection/lib/evaluation/evaluators/semantic/SemanticEvaluator.js +4 -3
- package/dist/collection/lib/evaluation/evaluators/semantic/SemanticEvaluator.js.map +1 -1
- package/dist/collection/lib/test-cases/test-case-factory.js +1 -1
- package/dist/collection/lib/test-cases/test-case-factory.js.map +1 -1
- package/dist/components/index.js +1 -1
- package/dist/components/llm-test-runner.js +1 -1
- package/dist/components/{p-Bb89MYYu.js → p-CJBscebi.js} +3 -3
- package/dist/components/p-CJBscebi.js.map +1 -0
- package/dist/esm/index.js +12 -7
- package/dist/esm/index.js.map +1 -1
- package/dist/esm/llm-testrunner.js +1 -1
- package/dist/esm/loader.js +1 -1
- package/dist/llm-testrunner/index.esm.js +2 -2
- package/dist/llm-testrunner/index.esm.js.map +1 -1
- package/dist/llm-testrunner/llm-testrunner.esm.js +1 -1
- package/dist/types/components/llm-test-runner/header/llm-test-runner-header.d.ts +1 -0
- package/dist/types/components/llm-test-runner/llm-test-runner.d.ts +2 -0
- package/dist/types/components.d.ts +12 -0
- package/package.json +5 -1
- package/dist/components/p-Bb89MYYu.js.map +0 -1
package/dist/cjs/index.cjs.js
CHANGED
|
@@ -338,7 +338,7 @@ function createExpectedOutcomeFieldFromSchema(schemaField) {
|
|
|
338
338
|
type: 'select',
|
|
339
339
|
label: schemaField.label,
|
|
340
340
|
placeholder: schemaField.placeholder,
|
|
341
|
-
value:
|
|
341
|
+
value: schemaField.options[0],
|
|
342
342
|
options: schemaField.options,
|
|
343
343
|
evaluationParameters: normalizeEvaluationParametersForField(schemaField.type, schemaField.evaluationParameters),
|
|
344
344
|
};
|
|
@@ -29619,6 +29619,7 @@ class SemanticEvaluator {
|
|
|
29619
29619
|
}
|
|
29620
29620
|
}
|
|
29621
29621
|
async performEvaluation(request) {
|
|
29622
|
+
const threshold = request.evaluationParameters?.threshold ?? DEFAULT_SEMANTIC_PASS_SCORE;
|
|
29622
29623
|
try {
|
|
29623
29624
|
await this.initialize();
|
|
29624
29625
|
// Split expectedOutcome by newlines to create keywords array
|
|
@@ -29628,7 +29629,7 @@ class SemanticEvaluator {
|
|
|
29628
29629
|
.map(k => k.trim())
|
|
29629
29630
|
.filter(k => k.length > 0)
|
|
29630
29631
|
: [];
|
|
29631
|
-
const keywordMatches = await evaluateKeywordsSemantically(SemanticEvaluator.extractor, request.actualResponse, expectedKeywords,
|
|
29632
|
+
const keywordMatches = await evaluateKeywordsSemantically(SemanticEvaluator.extractor, request.actualResponse, expectedKeywords, threshold);
|
|
29632
29633
|
const totalItems = keywordMatches.length;
|
|
29633
29634
|
// calculate the overall score by averaging the score of the keyword matches
|
|
29634
29635
|
const keywordScore = keywordMatches.reduce((acc, curr) => acc + curr.evaluationApproachResult.score, 0);
|
|
@@ -29636,7 +29637,7 @@ class SemanticEvaluator {
|
|
|
29636
29637
|
const passed = keywordMatches.every(match => match.found);
|
|
29637
29638
|
const evaluationParameters = {
|
|
29638
29639
|
approach: EvaluationApproach.SEMANTIC,
|
|
29639
|
-
threshold
|
|
29640
|
+
threshold,
|
|
29640
29641
|
};
|
|
29641
29642
|
return {
|
|
29642
29643
|
testCaseId: request.testCaseId,
|
|
@@ -29658,7 +29659,7 @@ class SemanticEvaluator {
|
|
|
29658
29659
|
keywordMatches: [],
|
|
29659
29660
|
evaluationParameters: {
|
|
29660
29661
|
approach: EvaluationApproach.SEMANTIC,
|
|
29661
|
-
threshold
|
|
29662
|
+
threshold,
|
|
29662
29663
|
},
|
|
29663
29664
|
evaluationApproachResult: {
|
|
29664
29665
|
score: 0,
|
|
@@ -30063,7 +30064,7 @@ const Button = (props, children) => {
|
|
|
30063
30064
|
return (index.h("button", { type: type, class: classes, disabled: disabled || loading, onClick: onClick, "aria-busy": loading, "aria-label": ariaLabel }, icon && index.h("span", { class: "icon" }, icon), children));
|
|
30064
30065
|
};
|
|
30065
30066
|
|
|
30066
|
-
const LLMTestRunnerHeader = ({ isExportingTestSuite, isExportingTestResults, isRunningAll, useSave = false, isSaving = false, onImport, onExportSuite, onExportResults, onRunAll, onSave, }) => {
|
|
30067
|
+
const LLMTestRunnerHeader = ({ isExportingTestSuite, isExportingTestResults, isRunningAll, useSave = false, isSaving = false, usePromptEditor = false, onImport, onExportSuite, onExportResults, onRunAll, onSave, }) => {
|
|
30067
30068
|
let fileInputRef;
|
|
30068
30069
|
const handleFileSelect = () => {
|
|
30069
30070
|
fileInputRef?.click();
|
|
@@ -30076,7 +30077,7 @@ const LLMTestRunnerHeader = ({ isExportingTestSuite, isExportingTestResults, isR
|
|
|
30076
30077
|
onImport(file);
|
|
30077
30078
|
}
|
|
30078
30079
|
};
|
|
30079
|
-
return (index.h("header", { class: "test-runner-header" }, index.h("div", { class: "test-runner-header__left" }, index.h("input", { class: "test-runner-header--hidden", type: "file", ref: el => (fileInputRef = el), onChange: handleFileChange, accept: ".json,application/json" }), index.h(Button, { variant: "secondary", size: "md", onClick: handleFileSelect, icon: "\u2191" }, "Import Test Suite"), index.h(Button, { variant: "secondary", size: "md", onClick: onExportSuite, disabled: isExportingTestSuite, loading: isExportingTestSuite, icon: isExportingTestSuite ? '⏳' : '↓' }, isExportingTestSuite ? 'Exporting...' : 'Export Test Suite')), index.h("div", { class: "test-runner-header__right" }, index.h(Button, { variant: "secondary", size: "md", icon: "\u2699\uFE0F" }, "Prompt Editor"), index.h(Button, { variant: "secondary", size: "md", onClick: onExportResults, disabled: isExportingTestResults, loading: isExportingTestResults, icon: isExportingTestResults ? '⏳' : '↓' }, isExportingTestResults ? 'Exporting...' : 'Export Test Results'), useSave && (index.h(Button, { variant: "secondary", size: "md", onClick: onSave, disabled: isSaving, loading: isSaving, icon: isSaving ? '⏳' : '💾' }, isSaving ? 'Saving...' : 'Save')), index.h(Button, { "aria-label": "Run All", variant: "primary", size: "md", onClick: onRunAll, disabled: isRunningAll, loading: isRunningAll }, isRunningAll ? 'Running...' : 'Run All'))));
|
|
30080
|
+
return (index.h("header", { class: "test-runner-header" }, index.h("div", { class: "test-runner-header__left" }, index.h("input", { class: "test-runner-header--hidden", type: "file", ref: el => (fileInputRef = el), onChange: handleFileChange, accept: ".json,application/json" }), index.h(Button, { variant: "secondary", size: "md", onClick: handleFileSelect, icon: "\u2191" }, "Import Test Suite"), index.h(Button, { variant: "secondary", size: "md", onClick: onExportSuite, disabled: isExportingTestSuite, loading: isExportingTestSuite, icon: isExportingTestSuite ? '⏳' : '↓' }, isExportingTestSuite ? 'Exporting...' : 'Export Test Suite')), index.h("div", { class: "test-runner-header__right" }, usePromptEditor && (index.h(Button, { variant: "secondary", size: "md", icon: "\u2699\uFE0F" }, "Prompt Editor")), index.h(Button, { variant: "secondary", size: "md", onClick: onExportResults, disabled: isExportingTestResults, loading: isExportingTestResults, icon: isExportingTestResults ? '⏳' : '↓' }, isExportingTestResults ? 'Exporting...' : 'Export Test Results'), useSave && (index.h(Button, { variant: "secondary", size: "md", onClick: onSave, disabled: isSaving, loading: isSaving, icon: isSaving ? '⏳' : '💾' }, isSaving ? 'Saving...' : 'Save')), index.h(Button, { "aria-label": "Run All", variant: "primary", size: "md", onClick: onRunAll, disabled: isRunningAll, loading: isRunningAll }, isRunningAll ? 'Running...' : 'Run All'))));
|
|
30080
30081
|
};
|
|
30081
30082
|
|
|
30082
30083
|
const ResponseOutput = ({ output, isRunning, }) => {
|
|
@@ -30257,6 +30258,7 @@ const LLMTestRunner = class {
|
|
|
30257
30258
|
save;
|
|
30258
30259
|
delayMs = 500;
|
|
30259
30260
|
useSave = false;
|
|
30261
|
+
usePromptEditor = false;
|
|
30260
30262
|
initialTestCases;
|
|
30261
30263
|
defaultExpectedOutcomeSchema;
|
|
30262
30264
|
testCases = [
|
|
@@ -30320,6 +30322,9 @@ const LLMTestRunner = class {
|
|
|
30320
30322
|
async resetSavingState() {
|
|
30321
30323
|
this.isSaving = false;
|
|
30322
30324
|
}
|
|
30325
|
+
async getTestCases() {
|
|
30326
|
+
return this.testCases;
|
|
30327
|
+
}
|
|
30323
30328
|
handleTestCaseChange = (event) => {
|
|
30324
30329
|
const { testCaseId, key, value } = event.detail;
|
|
30325
30330
|
this.testCases = this.testCases.map(tc => tc.id === testCaseId ? { ...tc, [key]: value } : tc);
|
|
@@ -30482,7 +30487,7 @@ const LLMTestRunner = class {
|
|
|
30482
30487
|
}
|
|
30483
30488
|
}
|
|
30484
30489
|
render() {
|
|
30485
|
-
return (index.h("div", { key: '
|
|
30490
|
+
return (index.h("div", { key: '29cf8a93402ebad6f6df43e147fa10406577c9aa', class: "test-runner-container" }, index.h(LLMTestRunnerHeader, { key: 'a07d3d1d823f8d473808752932cd1b2ab72d9e08', isExportingTestSuite: this.isExportingTestSuite, isExportingTestResults: this.isExportingTestResults, isRunningAll: this.isRunningAll, useSave: this.useSave, isSaving: this.isSaving, usePromptEditor: this.usePromptEditor, onImport: file => this.handleImport(file), onExportSuite: () => this.handleExportTestSuite(), onExportResults: () => this.handleExportTestResults(), onRunAll: () => this.runAllTests(), onSave: () => this.handleSave() }), index.h(ErrorMessage, { key: 'ec68912728b06fc4a76c330fb1b7d5acde92c3d1', message: this.error, onClear: () => (this.error = '') }), index.h("div", { key: 'ce308dd4bd5437c94ae6e3e8a28970b799865281', class: "test-runner-container__content" }, index.h(LLMTestCases, { key: '3368df0bb7de4d099da1fad400f59dfc9a2cfb62', testCases: this.testCases, onRun: testCase => this.runSingleTest(testCase).catch(() => { }), onDelete: id => this.deleteTestCase(id), onAddTestCase: () => this.addNewTestCase(), handleTestCaseChange: this.handleTestCaseChange, onExpectedOutcomeChange: this.handleExpectedOutcomeChange }))));
|
|
30486
30491
|
}
|
|
30487
30492
|
};
|
|
30488
30493
|
LLMTestRunner.style = tokensCss() + (llmTestRunnerCss() + (llmTestRunnerHeaderCss() + (llmTestCasesCss() + (llmTestCaseRowCss() + (rowActionsCss() + (evaluationSummaryCss() + (responseOutputCss() + (errorMessageCss() + (buttonCss() + iconButtonCss())))))))));
|