llm-testrunner-components 1.2.2 → 1.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/{app-chips_4.cjs.entry.js → app-chips_5.cjs.entry.js} +41 -6
- package/dist/cjs/app-chips_5.cjs.entry.js.map +1 -0
- package/dist/cjs/index.cjs.js +235 -44
- package/dist/cjs/index.cjs.js.map +1 -1
- package/dist/cjs/llm-testrunner.cjs.js +1 -1
- package/dist/cjs/loader.cjs.js +1 -1
- package/dist/collection/collection-manifest.json +1 -0
- package/dist/collection/components/llm-test-runner/llm-test-runner.import-export.test.js +22 -12
- package/dist/collection/components/llm-test-runner/llm-test-runner.import-export.test.js.map +1 -1
- package/dist/collection/components/llm-test-runner/llm-test-runner.js +91 -30
- package/dist/collection/components/llm-test-runner/llm-test-runner.js.map +1 -1
- package/dist/collection/components/llm-test-runner/test-cases/chat-history.css +101 -0
- package/dist/collection/components/llm-test-runner/test-cases/chat-history.js +105 -0
- package/dist/collection/components/llm-test-runner/test-cases/chat-history.js.map +1 -0
- package/dist/collection/components/llm-test-runner/test-cases/expected-outcome-renderer.js +37 -4
- package/dist/collection/components/llm-test-runner/test-cases/expected-outcome-renderer.js.map +1 -1
- package/dist/collection/components/llm-test-runner/test-cases/llm-test-case-row.js +12 -2
- package/dist/collection/components/llm-test-runner/test-cases/llm-test-case-row.js.map +1 -1
- package/dist/collection/components/llm-test-runner/test-cases/llm-test-cases.js +2 -2
- package/dist/collection/components/llm-test-runner/test-cases/llm-test-cases.js.map +1 -1
- package/dist/collection/index.js.map +1 -1
- package/dist/collection/lib/evaluation/evaluation-service.js +14 -7
- package/dist/collection/lib/evaluation/evaluation-service.js.map +1 -1
- package/dist/collection/lib/form/components/app-chips.js +1 -1
- package/dist/collection/lib/form/components/app-select.js +1 -1
- package/dist/collection/lib/form/components/app-textarea.css +17 -0
- package/dist/collection/lib/form/components/app-textarea.js +4 -1
- package/dist/collection/lib/form/components/app-textarea.js.map +1 -1
- package/dist/collection/lib/import-export/test-suite-exporter.js +4 -0
- package/dist/collection/lib/import-export/test-suite-exporter.js.map +1 -1
- package/dist/collection/lib/test-cases/dynamic-expected-outcome-resolver.js +44 -0
- package/dist/collection/lib/test-cases/dynamic-expected-outcome-resolver.js.map +1 -0
- package/dist/collection/lib/test-cases/test-case-factory.js +2 -0
- package/dist/collection/lib/test-cases/test-case-factory.js.map +1 -1
- package/dist/collection/lib/test-cases/test-case-mutations.js +35 -0
- package/dist/collection/lib/test-cases/test-case-mutations.js.map +1 -1
- package/dist/collection/schemas/expected-outcome.js +15 -1
- package/dist/collection/schemas/expected-outcome.js.map +1 -1
- package/dist/collection/schemas/test-case.js +6 -0
- package/dist/collection/schemas/test-case.js.map +1 -1
- package/dist/collection/types/expected-outcome.js.map +1 -1
- package/dist/collection/types/llm-test-runner.js.map +1 -1
- package/dist/collection/types/test-case.js.map +1 -1
- package/dist/components/app-chips.js +1 -1
- package/dist/components/app-select.js +1 -1
- package/dist/components/app-textarea.js +1 -1
- package/dist/components/chat-history.d.ts +11 -0
- package/dist/components/chat-history.js +2 -0
- package/dist/components/chat-history.js.map +1 -0
- package/dist/components/index.js +1 -1
- package/dist/components/llm-test-runner.js +1 -1
- package/dist/components/{p-CJBscebi.js → p-B87Lt3z4.js} +3 -3
- package/dist/components/p-B87Lt3z4.js.map +1 -0
- package/dist/components/p-Bx2jqguC.js +2 -0
- package/dist/components/p-Bx2jqguC.js.map +1 -0
- package/dist/components/p-D2qDAxFN.js +2 -0
- package/dist/components/p-D2qDAxFN.js.map +1 -0
- package/dist/components/{p-Dv7cB5FU.js → p-D4dHUFN9.js} +2 -2
- package/dist/components/{p-CE5-1jfZ.js → p-eN2dLrsr.js} +2 -2
- package/dist/esm/{app-chips_4.entry.js → app-chips_5.entry.js} +41 -7
- package/dist/esm/app-chips_5.entry.js.map +1 -0
- package/dist/esm/index.js +235 -44
- package/dist/esm/index.js.map +1 -1
- package/dist/esm/llm-testrunner.js +1 -1
- package/dist/esm/loader.js +1 -1
- package/dist/llm-testrunner/index.esm.js +2 -2
- package/dist/llm-testrunner/index.esm.js.map +1 -1
- package/dist/llm-testrunner/llm-testrunner.esm.js +1 -1
- package/dist/llm-testrunner/p-21202f12.entry.js +2 -0
- package/dist/llm-testrunner/p-21202f12.entry.js.map +1 -0
- package/dist/react/components.d.ts +6 -1
- package/dist/react/components.d.ts.map +1 -1
- package/dist/react/components.js +9 -0
- package/dist/types/components/llm-test-runner/llm-test-runner.d.ts +6 -0
- package/dist/types/components/llm-test-runner/test-cases/chat-history.d.ts +14 -0
- package/dist/types/components/llm-test-runner/test-cases/expected-outcome-renderer.d.ts +1 -0
- package/dist/types/components/llm-test-runner/test-cases/llm-test-case-row.d.ts +6 -0
- package/dist/types/components/llm-test-runner/test-cases/llm-test-cases.d.ts +3 -0
- package/dist/types/components.d.ts +55 -0
- package/dist/types/index.d.ts +1 -1
- package/dist/types/lib/import-export/test-suite-exporter.d.ts +4 -0
- package/dist/types/lib/test-cases/dynamic-expected-outcome-resolver.d.ts +7 -0
- package/dist/types/lib/test-cases/test-case-mutations.d.ts +9 -1
- package/dist/types/schemas/expected-outcome.d.ts +16 -1
- package/dist/types/schemas/test-case.d.ts +34 -2
- package/dist/types/types/expected-outcome.d.ts +1 -1
- package/dist/types/types/llm-test-runner.d.ts +3 -2
- package/dist/types/types/test-case.d.ts +1 -1
- package/package.json +1 -1
- package/dist/cjs/app-chips_4.cjs.entry.js.map +0 -1
- package/dist/components/p-BZrzx5jG.js +0 -2
- package/dist/components/p-BZrzx5jG.js.map +0 -1
- package/dist/components/p-CJBscebi.js.map +0 -1
- package/dist/esm/app-chips_4.entry.js.map +0 -1
- package/dist/llm-testrunner/p-2cc09217.entry.js +0 -2
- package/dist/llm-testrunner/p-2cc09217.entry.js.map +0 -1
- /package/dist/components/{p-Dv7cB5FU.js.map → p-D4dHUFN9.js.map} +0 -0
- /package/dist/components/{p-CE5-1jfZ.js.map → p-eN2dLrsr.js.map} +0 -0
package/dist/esm/index.js
CHANGED
|
@@ -103,6 +103,10 @@ function formatTestSuiteAsJson(testCases) {
|
|
|
103
103
|
id: testCase.id,
|
|
104
104
|
question: testCase.question,
|
|
105
105
|
expectedOutcome: testCase.expectedOutcome,
|
|
106
|
+
chatHistory: {
|
|
107
|
+
enabled: testCase.chatHistory.enabled,
|
|
108
|
+
value: testCase.chatHistory.value,
|
|
109
|
+
},
|
|
106
110
|
}));
|
|
107
111
|
return JSON.stringify(exportData, null, 2);
|
|
108
112
|
}
|
|
@@ -300,6 +304,7 @@ function createTestCase(expectedOutcomeSchema = DEFAULT_EXPECTED_OUTCOME_SCHEMA)
|
|
|
300
304
|
id: v4(),
|
|
301
305
|
question: '',
|
|
302
306
|
expectedOutcome: createExpectedOutcomeFromSchema(expectedOutcomeSchema),
|
|
307
|
+
chatHistory: { enabled: false, value: '' },
|
|
303
308
|
isRunning: false,
|
|
304
309
|
};
|
|
305
310
|
}
|
|
@@ -359,6 +364,7 @@ function createExpectedOutcomeFromSchema(expectedOutcomeSchema) {
|
|
|
359
364
|
function createTestCaseFromInput(data) {
|
|
360
365
|
return {
|
|
361
366
|
...data,
|
|
367
|
+
chatHistory: data.chatHistory ?? { enabled: false, value: '' },
|
|
362
368
|
expectedOutcome: data.expectedOutcome.map(normalizeExpectedOutcomeField),
|
|
363
369
|
};
|
|
364
370
|
}
|
|
@@ -4952,6 +4958,7 @@ const optionalPositiveInt = number().int().positive().optional();
|
|
|
4952
4958
|
const optionalString = string().optional();
|
|
4953
4959
|
const selectOptionsSchema = array(nonEmptyString).min(1);
|
|
4954
4960
|
const optionalNumber = number().optional();
|
|
4961
|
+
const expectedOutcomeModeSchema = _enum(['static', 'dynamic']);
|
|
4955
4962
|
const evaluationParametersSchema = object({
|
|
4956
4963
|
approach: _enum(EvaluationApproach),
|
|
4957
4964
|
threshold: optionalNumber,
|
|
@@ -5013,8 +5020,21 @@ const expectedOutcomeFieldSchema = discriminatedUnion('type', [
|
|
|
5013
5020
|
defaultFieldDefinitions.text.extend({
|
|
5014
5021
|
value: string(),
|
|
5015
5022
|
}),
|
|
5016
|
-
defaultFieldDefinitions.textarea
|
|
5023
|
+
defaultFieldDefinitions.textarea
|
|
5024
|
+
.extend({
|
|
5017
5025
|
value: string(),
|
|
5026
|
+
outcomeMode: expectedOutcomeModeSchema.default('static'),
|
|
5027
|
+
resolutionQuery: string().optional(),
|
|
5028
|
+
})
|
|
5029
|
+
.superRefine((field, ctx) => {
|
|
5030
|
+
if (field.outcomeMode === 'dynamic' &&
|
|
5031
|
+
(!field.resolutionQuery || field.resolutionQuery.trim().length === 0)) {
|
|
5032
|
+
ctx.addIssue({
|
|
5033
|
+
code: 'custom',
|
|
5034
|
+
path: ['resolutionQuery'],
|
|
5035
|
+
message: 'resolutionQuery is required when outcomeMode is dynamic.',
|
|
5036
|
+
});
|
|
5037
|
+
}
|
|
5018
5038
|
}),
|
|
5019
5039
|
defaultFieldDefinitions.chipsInput.extend({
|
|
5020
5040
|
value: array(string()).superRefine((values, ctx) => {
|
|
@@ -5048,16 +5068,22 @@ function validateExpectedOutcomeSchema(schema) {
|
|
|
5048
5068
|
}
|
|
5049
5069
|
}
|
|
5050
5070
|
|
|
5071
|
+
const testCaseChatHistorySchema = object({
|
|
5072
|
+
enabled: boolean(),
|
|
5073
|
+
value: string(),
|
|
5074
|
+
});
|
|
5051
5075
|
const testCaseInputSchema = object({
|
|
5052
5076
|
id: string(),
|
|
5053
5077
|
question: string(),
|
|
5054
5078
|
expectedOutcome: expectedOutcomeArraySchema,
|
|
5079
|
+
chatHistory: testCaseChatHistorySchema.optional(),
|
|
5055
5080
|
});
|
|
5056
5081
|
const testCaseInputArraySchema = array(testCaseInputSchema);
|
|
5057
5082
|
object({
|
|
5058
5083
|
id: string(),
|
|
5059
5084
|
question: string(),
|
|
5060
5085
|
expectedOutcome: expectedOutcomeArraySchema,
|
|
5086
|
+
chatHistory: testCaseChatHistorySchema,
|
|
5061
5087
|
output: string().optional(),
|
|
5062
5088
|
isRunning: boolean().optional(),
|
|
5063
5089
|
error: string().optional(),
|
|
@@ -5108,6 +5134,50 @@ function importTestSuite(jsonContent) {
|
|
|
5108
5134
|
}
|
|
5109
5135
|
}
|
|
5110
5136
|
|
|
5137
|
+
const MISSING_RESOLVER_MESSAGE = 'resolveExpectedOutcome is required when a test case has dynamic expected outcomes.';
|
|
5138
|
+
function isDynamicTextareaField(field) {
|
|
5139
|
+
return field.type === 'textarea' && field.outcomeMode === 'dynamic';
|
|
5140
|
+
}
|
|
5141
|
+
function applyResolvedDynamicValues(testCase, resolvedValues) {
|
|
5142
|
+
if (resolvedValues.length === 0) {
|
|
5143
|
+
return testCase;
|
|
5144
|
+
}
|
|
5145
|
+
const expectedOutcome = [...(testCase.expectedOutcome || [])];
|
|
5146
|
+
for (const resolved of resolvedValues) {
|
|
5147
|
+
const field = expectedOutcome[resolved.index];
|
|
5148
|
+
if (!field || !isDynamicTextareaField(field)) {
|
|
5149
|
+
continue;
|
|
5150
|
+
}
|
|
5151
|
+
expectedOutcome[resolved.index] = {
|
|
5152
|
+
...field,
|
|
5153
|
+
value: resolved.value,
|
|
5154
|
+
};
|
|
5155
|
+
}
|
|
5156
|
+
return {
|
|
5157
|
+
...testCase,
|
|
5158
|
+
expectedOutcome,
|
|
5159
|
+
};
|
|
5160
|
+
}
|
|
5161
|
+
async function resolveDynamicExpectedOutcomes(testCase, resolver) {
|
|
5162
|
+
const dynamicFields = (testCase.expectedOutcome || []).flatMap((field, index) => {
|
|
5163
|
+
if (!isDynamicTextareaField(field)) {
|
|
5164
|
+
return [];
|
|
5165
|
+
}
|
|
5166
|
+
return [{ field, index }];
|
|
5167
|
+
});
|
|
5168
|
+
if (dynamicFields.length === 0) {
|
|
5169
|
+
return testCase;
|
|
5170
|
+
}
|
|
5171
|
+
if (!resolver) {
|
|
5172
|
+
throw new Error(MISSING_RESOLVER_MESSAGE);
|
|
5173
|
+
}
|
|
5174
|
+
const resolvedValues = await Promise.all(dynamicFields.map(async ({ field, index }) => ({
|
|
5175
|
+
index,
|
|
5176
|
+
value: await resolver(field.resolutionQuery || '', { testCase, fieldIndex: index }),
|
|
5177
|
+
})));
|
|
5178
|
+
return applyResolvedDynamicValues(testCase, resolvedValues);
|
|
5179
|
+
}
|
|
5180
|
+
|
|
5111
5181
|
function applyExpectedOutcomeChange(testCase, change) {
|
|
5112
5182
|
const { index } = change;
|
|
5113
5183
|
const expectedOutcome = [...(testCase.expectedOutcome || [])];
|
|
@@ -5120,6 +5190,9 @@ function applyExpectedOutcomeChange(testCase, change) {
|
|
|
5120
5190
|
if (target.type === 'chips-input') {
|
|
5121
5191
|
return testCase;
|
|
5122
5192
|
}
|
|
5193
|
+
if (target.type === 'textarea' && target.outcomeMode === 'dynamic') {
|
|
5194
|
+
return testCase;
|
|
5195
|
+
}
|
|
5123
5196
|
expectedOutcome[index] = {
|
|
5124
5197
|
...target,
|
|
5125
5198
|
value: change.value,
|
|
@@ -5148,6 +5221,38 @@ function applyExpectedOutcomeChange(testCase, change) {
|
|
|
5148
5221
|
}
|
|
5149
5222
|
case 'set-evaluation-approach':
|
|
5150
5223
|
return updateExpectedOutcomeFieldApproach(testCase, index, change.value);
|
|
5224
|
+
case 'set-outcome-mode': {
|
|
5225
|
+
if (target.type !== 'textarea') {
|
|
5226
|
+
return testCase;
|
|
5227
|
+
}
|
|
5228
|
+
const mode = change.value;
|
|
5229
|
+
if (mode === 'static') {
|
|
5230
|
+
const { resolutionQuery: _, ...rest } = target;
|
|
5231
|
+
expectedOutcome[index] = {
|
|
5232
|
+
...rest,
|
|
5233
|
+
outcomeMode: 'static',
|
|
5234
|
+
value: '',
|
|
5235
|
+
};
|
|
5236
|
+
}
|
|
5237
|
+
else {
|
|
5238
|
+
expectedOutcome[index] = {
|
|
5239
|
+
...target,
|
|
5240
|
+
outcomeMode: 'dynamic',
|
|
5241
|
+
value: '',
|
|
5242
|
+
};
|
|
5243
|
+
}
|
|
5244
|
+
return { ...testCase, expectedOutcome };
|
|
5245
|
+
}
|
|
5246
|
+
case 'set-resolution-query': {
|
|
5247
|
+
if (target.type !== 'textarea' || target.outcomeMode !== 'dynamic') {
|
|
5248
|
+
return testCase;
|
|
5249
|
+
}
|
|
5250
|
+
expectedOutcome[index] = {
|
|
5251
|
+
...target,
|
|
5252
|
+
resolutionQuery: change.value,
|
|
5253
|
+
};
|
|
5254
|
+
return { ...testCase, expectedOutcome };
|
|
5255
|
+
}
|
|
5151
5256
|
}
|
|
5152
5257
|
}
|
|
5153
5258
|
/**
|
|
@@ -30015,13 +30120,20 @@ class EvaluationService {
|
|
|
30015
30120
|
console.warn('⚠️ No output to evaluate for test case:', testCase.id);
|
|
30016
30121
|
return;
|
|
30017
30122
|
}
|
|
30018
|
-
const fields = (testCase.expectedOutcome || []).
|
|
30019
|
-
|
|
30020
|
-
|
|
30021
|
-
|
|
30022
|
-
|
|
30023
|
-
|
|
30024
|
-
|
|
30123
|
+
const fields = (testCase.expectedOutcome || []).flatMap((field, index) => {
|
|
30124
|
+
if (field.type === 'textarea' && field.outcomeMode === 'dynamic') {
|
|
30125
|
+
return [];
|
|
30126
|
+
}
|
|
30127
|
+
return [
|
|
30128
|
+
{
|
|
30129
|
+
index,
|
|
30130
|
+
label: field.label,
|
|
30131
|
+
type: field.type,
|
|
30132
|
+
expectedValue: getFieldExpectedValue(field),
|
|
30133
|
+
evaluationParameters: normalizeEvaluationParametersForField(field.type, field.evaluationParameters),
|
|
30134
|
+
},
|
|
30135
|
+
];
|
|
30136
|
+
});
|
|
30025
30137
|
const evaluationRequest = {
|
|
30026
30138
|
testCaseId: testCase.id,
|
|
30027
30139
|
question: testCase.question,
|
|
@@ -30116,7 +30228,7 @@ var FormFieldType;
|
|
|
30116
30228
|
FormFieldType["SELECT"] = "select";
|
|
30117
30229
|
})(FormFieldType || (FormFieldType = {}));
|
|
30118
30230
|
|
|
30119
|
-
const ExpectedOutcomeRenderer = ({ testCaseId, fields, onExpectedOutcomeChange, }) => {
|
|
30231
|
+
const ExpectedOutcomeRenderer = ({ testCaseId, fields, dynamicResolutionSupported = false, onExpectedOutcomeChange, }) => {
|
|
30120
30232
|
const emit = (detail) => onExpectedOutcomeChange({
|
|
30121
30233
|
detail,
|
|
30122
30234
|
});
|
|
@@ -30129,6 +30241,23 @@ const ExpectedOutcomeRenderer = ({ testCaseId, fields, onExpectedOutcomeChange,
|
|
|
30129
30241
|
optionList,
|
|
30130
30242
|
defaultValue: EvaluationApproach.EXACT,
|
|
30131
30243
|
});
|
|
30244
|
+
const buildOutcomeModeConfig = (index) => ({
|
|
30245
|
+
name: `expectedOutcomeMode-${index}`,
|
|
30246
|
+
fieldType: FormFieldType.SELECT,
|
|
30247
|
+
label: 'Outcome Mode',
|
|
30248
|
+
placeholder: 'Select outcome mode',
|
|
30249
|
+
required: true,
|
|
30250
|
+
optionList: ['static', 'dynamic'],
|
|
30251
|
+
defaultValue: 'static',
|
|
30252
|
+
});
|
|
30253
|
+
const buildResolutionQueryConfig = (index) => ({
|
|
30254
|
+
name: `expectedOutcomeResolutionQuery-${index}`,
|
|
30255
|
+
fieldType: FormFieldType.TEXT_AREA,
|
|
30256
|
+
label: 'Resolution Query',
|
|
30257
|
+
placeholder: 'Query used to resolve expected value',
|
|
30258
|
+
required: false,
|
|
30259
|
+
rows: 2,
|
|
30260
|
+
});
|
|
30132
30261
|
const renderEvaluationSelector = (field, index) => {
|
|
30133
30262
|
const optionList = getAllowedApproachesForFieldType(field.type);
|
|
30134
30263
|
return (h("app-select", { config: buildEvaluationConfig(index, optionList), value: field.evaluationParameters?.approach, onValueChange: (e) => emit({
|
|
@@ -30140,12 +30269,17 @@ const ExpectedOutcomeRenderer = ({ testCaseId, fields, onExpectedOutcomeChange,
|
|
|
30140
30269
|
};
|
|
30141
30270
|
return (h("div", { class: "expected-outcome-renderer" }, (fields || []).map((field, index) => {
|
|
30142
30271
|
if (field.type === 'textarea') {
|
|
30272
|
+
const isDynamic = dynamicResolutionSupported && field.outcomeMode === 'dynamic';
|
|
30143
30273
|
const config = {
|
|
30144
30274
|
name: `expectedOutcome-${index}`,
|
|
30145
30275
|
fieldType: FormFieldType.TEXT_AREA,
|
|
30146
30276
|
label: field.label,
|
|
30147
|
-
placeholder: field.placeholder,
|
|
30148
|
-
required:
|
|
30277
|
+
placeholder: isDynamic ? 'Resolved on run' : field.placeholder,
|
|
30278
|
+
required: !isDynamic,
|
|
30279
|
+
readOnly: isDynamic,
|
|
30280
|
+
helpText: isDynamic
|
|
30281
|
+
? 'Filled automatically when the test is run'
|
|
30282
|
+
: undefined,
|
|
30149
30283
|
rows: field.rows || 2,
|
|
30150
30284
|
};
|
|
30151
30285
|
return (h("div", { class: "expected-outcome-renderer__group" }, h("app-textarea", { config: config, value: field.value, onValueChange: (e) => emit({
|
|
@@ -30153,7 +30287,18 @@ const ExpectedOutcomeRenderer = ({ testCaseId, fields, onExpectedOutcomeChange,
|
|
|
30153
30287
|
index,
|
|
30154
30288
|
operation: 'set-value',
|
|
30155
30289
|
value: e.detail.value,
|
|
30156
|
-
}) }),
|
|
30290
|
+
}) }), dynamicResolutionSupported && (h("app-select", { config: buildOutcomeModeConfig(index), value: field.outcomeMode || 'static', onValueChange: (e) => emit({
|
|
30291
|
+
testCaseId,
|
|
30292
|
+
index,
|
|
30293
|
+
operation: 'set-outcome-mode',
|
|
30294
|
+
value: e.detail.value,
|
|
30295
|
+
}) })), dynamicResolutionSupported &&
|
|
30296
|
+
field.outcomeMode === 'dynamic' && (h("app-textarea", { config: buildResolutionQueryConfig(index), value: field.resolutionQuery || '', onValueChange: (e) => emit({
|
|
30297
|
+
testCaseId,
|
|
30298
|
+
index,
|
|
30299
|
+
operation: 'set-resolution-query',
|
|
30300
|
+
value: e.detail.value,
|
|
30301
|
+
}) })), !isDynamic && renderEvaluationSelector(field, index)));
|
|
30157
30302
|
}
|
|
30158
30303
|
if (field.type === 'chips-input') {
|
|
30159
30304
|
const config = {
|
|
@@ -30200,7 +30345,7 @@ const ExpectedOutcomeRenderer = ({ testCaseId, fields, onExpectedOutcomeChange,
|
|
|
30200
30345
|
})));
|
|
30201
30346
|
};
|
|
30202
30347
|
|
|
30203
|
-
const LLMTestCaseRow = ({ testCase, onRun, onDelete, handleTestCaseChange, onExpectedOutcomeChange, }) => {
|
|
30348
|
+
const LLMTestCaseRow = ({ testCase, dynamicResolutionSupported = false, onRun, onDelete, handleTestCaseChange, onExpectedOutcomeChange, onChatHistoryChange, }) => {
|
|
30204
30349
|
const questionConfig = {
|
|
30205
30350
|
name: 'question',
|
|
30206
30351
|
fieldType: FormFieldType.TEXT_AREA,
|
|
@@ -30216,11 +30361,21 @@ const LLMTestCaseRow = ({ testCase, onRun, onDelete, handleTestCaseChange, onExp
|
|
|
30216
30361
|
key: 'question',
|
|
30217
30362
|
value: e.detail.value,
|
|
30218
30363
|
},
|
|
30219
|
-
}) }), h(
|
|
30364
|
+
}) }), h("chat-history", { chatHistoryEnabled: testCase.chatHistory?.enabled ?? false, chatHistoryValue: testCase.chatHistory?.value ?? '', onChatHistoryChange: (e) => {
|
|
30365
|
+
const { enabled, value } = e
|
|
30366
|
+
.detail;
|
|
30367
|
+
onChatHistoryChange({
|
|
30368
|
+
detail: {
|
|
30369
|
+
testCaseId: testCase.id,
|
|
30370
|
+
enabled,
|
|
30371
|
+
value,
|
|
30372
|
+
},
|
|
30373
|
+
});
|
|
30374
|
+
} }), h(ExpectedOutcomeRenderer, { testCaseId: testCase.id, fields: testCase.expectedOutcome || [], dynamicResolutionSupported: dynamicResolutionSupported, onExpectedOutcomeChange: onExpectedOutcomeChange })), h(ResponseOutput, { output: testCase.output, isRunning: testCase.isRunning }), h(EvaluationSummary, { result: testCase.evaluationResult, isRunning: testCase.isRunning }), h(RowActions, { isRunning: testCase.isRunning, canRun: !!testCase.question.trim(), onRun: () => onRun(testCase), onDelete: () => onDelete(testCase.id) })));
|
|
30220
30375
|
};
|
|
30221
30376
|
|
|
30222
|
-
const LLMTestCases = ({ testCases, onRun, onDelete, onAddTestCase, handleTestCaseChange, onExpectedOutcomeChange, }) => {
|
|
30223
|
-
return (h("div", { class: "test-cases" }, h("div", { class: "test-cases__column-headers" }, h("div", { class: "test-cases__column-header" }, "Input"), h("div", { class: "test-cases__column-header" }, "Output"), h("div", { class: "test-cases__column-header" }, "Evaluation"), h("div", { class: "test-cases__column-header" }, "Actions")), testCases.map(testCase => (h(LLMTestCaseRow, { testCase: testCase, onRun: onRun, onDelete: onDelete, handleTestCaseChange: handleTestCaseChange, onExpectedOutcomeChange: onExpectedOutcomeChange }))), h("div", { class: "test-cases__add-section" }, h(Button, { variant: "outline", size: "md", onClick: onAddTestCase }, "+ Add Question"))));
|
|
30377
|
+
const LLMTestCases = ({ testCases, dynamicResolutionSupported = false, onRun, onDelete, onAddTestCase, handleTestCaseChange, onExpectedOutcomeChange, onChatHistoryChange, }) => {
|
|
30378
|
+
return (h("div", { class: "test-cases" }, h("div", { class: "test-cases__column-headers" }, h("div", { class: "test-cases__column-header" }, "Input"), h("div", { class: "test-cases__column-header" }, "Output"), h("div", { class: "test-cases__column-header" }, "Evaluation"), h("div", { class: "test-cases__column-header" }, "Actions")), testCases.map(testCase => (h(LLMTestCaseRow, { testCase: testCase, dynamicResolutionSupported: dynamicResolutionSupported, onRun: onRun, onDelete: onDelete, handleTestCaseChange: handleTestCaseChange, onExpectedOutcomeChange: onExpectedOutcomeChange, onChatHistoryChange: onChatHistoryChange }))), h("div", { class: "test-cases__add-section" }, h(Button, { variant: "outline", size: "md", onClick: onAddTestCase }, "+ Add Question"))));
|
|
30224
30379
|
};
|
|
30225
30380
|
|
|
30226
30381
|
const tokensCss = () => `:host{--spacing:0.25rem;--spacing-1:calc(var(--spacing) * 1);--spacing-2:calc(var(--spacing) * 2);--spacing-3:calc(var(--spacing) * 3);--spacing-4:calc(var(--spacing) * 4);--spacing-5:calc(var(--spacing) * 5);--spacing-6:calc(var(--spacing) * 6);--spacing-8:calc(var(--spacing) * 8);--spacing-10:calc(var(--spacing) * 10);--spacing-12:calc(var(--spacing) * 12);--spacing-16:calc(var(--spacing) * 16);--spacing-20:calc(var(--spacing) * 20);--spacing-24:calc(var(--spacing) * 24);--radius-none:0;--radius-sm:0.125rem;--radius-md:0.375rem;--radius-lg:0.5rem;--radius-xl:0.75rem;--radius-2xl:1rem;--radius-3xl:1.5rem;--radius-full:9999px;--radius:var(--radius-lg);--font-size-xs:0.75rem;--font-size-sm:0.875rem;--font-size-base:1rem;--font-size-lg:1.125rem;--font-size-xl:1.25rem;--font-size-2xl:1.5rem;--font-size-3xl:1.875rem;--font-size-4xl:2.25rem;--font-weight-normal:400;--font-weight-medium:500;--font-weight-semibold:600;--font-weight-bold:700;--line-height-none:1;--line-height-tight:1.25;--line-height-snug:1.375;--line-height-normal:1.5;--line-height-relaxed:1.625;--line-height-loose:2;--letter-spacing-tight:-0.025em;--letter-spacing-normal:0;--letter-spacing-wide:0.05em;--shadow-sm:0 1px 2px 0 rgba(0, 0, 0, 0.05);--shadow-md:0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06);--shadow-lg:0 10px 15px -3px rgba(0, 0, 0, 0.1), 0 4px 6px -2px rgba(0, 0, 0, 0.05);--shadow-xl:0 20px 25px -5px rgba(0, 0, 0, 0.1), 0 10px 10px -5px rgba(0, 0, 0, 0.04);--shadow-2xl:0 25px 50px -12px rgba(0, 0, 0, 0.25);--border-width:1px;--z-base:0;--z-dropdown:1000;--z-sticky:1100;--z-modal:1200;--z-popover:1300;--z-tooltip:1400;--opacity-disabled:0.5;--opacity-hover:0.8;--opacity-muted:0.6;--max-w-sm:24rem;--max-w-md:28rem;--max-w-lg:32rem;--max-w-xl:42rem;--max-w-2xl:48rem;--max-w-full:100%;--breakpoint-sm:640px;--breakpoint-md:768px;--breakpoint-lg:1024px;--breakpoint-xl:1280px;--breakpoint-2xl:1536px;--background:#ffffff;--foreground:#0a0a0a;--card:#ffffff;--card-foreground:#0a0a0a;--popover:#ffffff;--popover-foreground:#0a0a0a;--primary:#0a0a0a;--primary-foreground:#fafafa;--secondary:#f4f4f5;--secondary-foreground:#0a0a0a;--muted:#f4f4f5;--muted-foreground:#71717a;--accent:#f4f4f5;--accent-foreground:#0a0a0a;--destructive:#ef4444;--destructive-foreground:#fafafa;--border:#e4e4e7;--input:#e4e4e7;--ring:#3b82f6;--success:#10b981;--success-foreground:#fafafa;--warning:#f59e0b;--warning-foreground:#fafafa;--info:#3b82f6;--info-foreground:#fafafa}:host([data-theme='dark']){--background:#0a0a0a;--foreground:#fafafa;--card:#171717;--card-foreground:#fafafa;--popover:#171717;--popover-foreground:#fafafa;--primary:#fafafa;--primary-foreground:#0a0a0a;--secondary:#27272a;--secondary-foreground:#fafafa;--muted:#27272a;--muted-foreground:#a1a1aa;--accent:#27272a;--accent-foreground:#fafafa;--destructive:#dc2626;--destructive-foreground:#fafafa;--border:#27272a;--input:#27272a;--ring:#3b82f6;--success:#059669;--success-foreground:#fafafa;--warning:#d97706;--warning-foreground:#fafafa;--info:#2563eb;--info-foreground:#fafafa}`;
|
|
@@ -30256,6 +30411,7 @@ const LLMTestRunner = class {
|
|
|
30256
30411
|
delayMs = 500;
|
|
30257
30412
|
useSave = false;
|
|
30258
30413
|
usePromptEditor = false;
|
|
30414
|
+
resolveExpectedOutcome;
|
|
30259
30415
|
initialTestCases;
|
|
30260
30416
|
defaultExpectedOutcomeSchema;
|
|
30261
30417
|
testCases = [
|
|
@@ -30269,6 +30425,7 @@ const LLMTestRunner = class {
|
|
|
30269
30425
|
value: '',
|
|
30270
30426
|
},
|
|
30271
30427
|
],
|
|
30428
|
+
chatHistory: { enabled: false, value: '' },
|
|
30272
30429
|
isRunning: false,
|
|
30273
30430
|
},
|
|
30274
30431
|
];
|
|
@@ -30326,6 +30483,12 @@ const LLMTestRunner = class {
|
|
|
30326
30483
|
const { testCaseId, key, value } = event.detail;
|
|
30327
30484
|
this.testCases = this.testCases.map(tc => tc.id === testCaseId ? { ...tc, [key]: value } : tc);
|
|
30328
30485
|
};
|
|
30486
|
+
handleChatHistoryChange = (event) => {
|
|
30487
|
+
const { testCaseId, enabled, value } = event.detail;
|
|
30488
|
+
this.updateTestCase(testCaseId, {
|
|
30489
|
+
chatHistory: { enabled, value },
|
|
30490
|
+
});
|
|
30491
|
+
};
|
|
30329
30492
|
addNewTestCase() {
|
|
30330
30493
|
try {
|
|
30331
30494
|
const schema = this.getResolvedExpectedOutcomeSchema();
|
|
@@ -30342,38 +30505,66 @@ const LLMTestRunner = class {
|
|
|
30342
30505
|
updateTestCase(id, updates) {
|
|
30343
30506
|
this.testCases = this.testCases.map(tc => tc.id === id ? { ...tc, ...updates } : tc);
|
|
30344
30507
|
}
|
|
30508
|
+
requestLlmText(testCase) {
|
|
30509
|
+
return new Promise((resolve, reject) => {
|
|
30510
|
+
const payload = {
|
|
30511
|
+
prompt: testCase.question,
|
|
30512
|
+
resolve,
|
|
30513
|
+
reject,
|
|
30514
|
+
};
|
|
30515
|
+
if (testCase.chatHistory?.enabled) {
|
|
30516
|
+
payload.chatHistory = testCase.chatHistory.value;
|
|
30517
|
+
}
|
|
30518
|
+
this.llmRequest.emit(payload);
|
|
30519
|
+
});
|
|
30520
|
+
}
|
|
30521
|
+
throwError(reason) {
|
|
30522
|
+
throw reason instanceof Error ? reason : new Error(String(reason));
|
|
30523
|
+
}
|
|
30524
|
+
addErrorMessage(reason, fallback) {
|
|
30525
|
+
return reason instanceof Error ? reason.message : fallback;
|
|
30526
|
+
}
|
|
30345
30527
|
async runSingleTest(testCase) {
|
|
30346
30528
|
const startTime = Date.now();
|
|
30347
30529
|
this.updateTestCase(testCase.id, { isRunning: true });
|
|
30348
|
-
|
|
30349
|
-
this.
|
|
30350
|
-
|
|
30351
|
-
|
|
30352
|
-
|
|
30353
|
-
|
|
30354
|
-
|
|
30355
|
-
|
|
30356
|
-
|
|
30357
|
-
|
|
30358
|
-
|
|
30359
|
-
});
|
|
30360
|
-
await this.evaluateResponse({
|
|
30361
|
-
...testCase,
|
|
30362
|
-
output: aiResponse,
|
|
30363
|
-
responseTime: responseTime,
|
|
30364
|
-
});
|
|
30365
|
-
resolve();
|
|
30366
|
-
},
|
|
30367
|
-
reject: (error) => {
|
|
30368
|
-
this.updateTestCase(testCase.id, {
|
|
30369
|
-
isRunning: false,
|
|
30370
|
-
output: null,
|
|
30371
|
-
error: error instanceof Error ? error.message : 'Unknown error',
|
|
30372
|
-
});
|
|
30373
|
-
reject(error);
|
|
30374
|
-
},
|
|
30530
|
+
const [llmSettled, resolutionSettled] = await Promise.allSettled([
|
|
30531
|
+
this.requestLlmText(testCase),
|
|
30532
|
+
resolveDynamicExpectedOutcomes(testCase, this.resolveExpectedOutcome),
|
|
30533
|
+
]);
|
|
30534
|
+
const responseTime = Date.now() - startTime;
|
|
30535
|
+
if (llmSettled.status === 'rejected') {
|
|
30536
|
+
this.updateTestCase(testCase.id, {
|
|
30537
|
+
isRunning: false,
|
|
30538
|
+
output: null,
|
|
30539
|
+
error: this.addErrorMessage(llmSettled.reason, 'Unknown error'),
|
|
30540
|
+
responseTime,
|
|
30375
30541
|
});
|
|
30542
|
+
this.throwError(llmSettled.reason);
|
|
30543
|
+
}
|
|
30544
|
+
const aiResponse = llmSettled.value;
|
|
30545
|
+
if (resolutionSettled.status === 'rejected') {
|
|
30546
|
+
this.updateTestCase(testCase.id, {
|
|
30547
|
+
isRunning: false,
|
|
30548
|
+
output: aiResponse,
|
|
30549
|
+
error: this.addErrorMessage(resolutionSettled.reason, 'Failed to resolve dynamic expected outcome.'),
|
|
30550
|
+
responseTime,
|
|
30551
|
+
});
|
|
30552
|
+
this.throwError(resolutionSettled.reason);
|
|
30553
|
+
}
|
|
30554
|
+
const resolvedTestCase = resolutionSettled.value;
|
|
30555
|
+
const forEvaluationTestCase = {
|
|
30556
|
+
...resolvedTestCase,
|
|
30557
|
+
output: aiResponse,
|
|
30558
|
+
responseTime,
|
|
30559
|
+
};
|
|
30560
|
+
this.updateTestCase(testCase.id, {
|
|
30561
|
+
isRunning: false,
|
|
30562
|
+
output: aiResponse,
|
|
30563
|
+
error: null,
|
|
30564
|
+
responseTime,
|
|
30565
|
+
expectedOutcome: forEvaluationTestCase.expectedOutcome,
|
|
30376
30566
|
});
|
|
30567
|
+
await this.evaluateResponse(forEvaluationTestCase);
|
|
30377
30568
|
}
|
|
30378
30569
|
deleteTestCase(id) {
|
|
30379
30570
|
this.testCases = this.testCases.filter(tc => tc.id !== id);
|
|
@@ -30484,7 +30675,7 @@ const LLMTestRunner = class {
|
|
|
30484
30675
|
}
|
|
30485
30676
|
}
|
|
30486
30677
|
render() {
|
|
30487
|
-
return (h("div", { key: '
|
|
30678
|
+
return (h("div", { key: 'cc808096f929b2e1c570c53144aab195d177c187', class: "test-runner-container" }, h(LLMTestRunnerHeader, { key: 'b91cf3df7df0e95bfd4908a2f91c7310b5b7a09a', isExportingTestSuite: this.isExportingTestSuite, isExportingTestResults: this.isExportingTestResults, isRunningAll: this.isRunningAll, useSave: this.useSave, isSaving: this.isSaving, usePromptEditor: this.usePromptEditor, onImport: file => this.handleImport(file), onExportSuite: () => this.handleExportTestSuite(), onExportResults: () => this.handleExportTestResults(), onRunAll: () => this.runAllTests(), onSave: () => this.handleSave() }), h(ErrorMessage, { key: 'c7991497173fa9843e7aa42f5283d0897ddff2e2', message: this.error, onClear: () => (this.error = '') }), h("div", { key: '2b57132564442b8047d8672c6adcba62cdc9ae87', class: "test-runner-container__content" }, h(LLMTestCases, { key: '146e9d8c76a34980a2a274dd856887c22e1ed0e9', testCases: this.testCases, dynamicResolutionSupported: !!this.resolveExpectedOutcome, onRun: testCase => this.runSingleTest(testCase).catch(() => { }), onDelete: id => this.deleteTestCase(id), onAddTestCase: () => this.addNewTestCase(), handleTestCaseChange: this.handleTestCaseChange, onExpectedOutcomeChange: this.handleExpectedOutcomeChange, onChatHistoryChange: this.handleChatHistoryChange }))));
|
|
30488
30679
|
}
|
|
30489
30680
|
};
|
|
30490
30681
|
LLMTestRunner.style = tokensCss() + (llmTestRunnerCss() + (llmTestRunnerHeaderCss() + (llmTestCasesCss() + (llmTestCaseRowCss() + (rowActionsCss() + (evaluationSummaryCss() + (responseOutputCss() + (errorMessageCss() + (buttonCss() + iconButtonCss())))))))));
|