llm-testrunner-components 1.2.3 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -5
- package/dist/cjs/{app-chips_4.cjs.entry.js → app-chips_5.cjs.entry.js} +38 -6
- package/dist/cjs/app-chips_5.cjs.entry.js.map +1 -0
- package/dist/cjs/index.cjs.js +499 -68
- package/dist/cjs/index.cjs.js.map +1 -1
- package/dist/cjs/llm-testrunner.cjs.js +1 -1
- package/dist/cjs/loader.cjs.js +1 -1
- package/dist/collection/collection-manifest.json +1 -0
- package/dist/collection/components/llm-test-runner/llm-test-runner.import-export.test.js +22 -12
- package/dist/collection/components/llm-test-runner/llm-test-runner.import-export.test.js.map +1 -1
- package/dist/collection/components/llm-test-runner/llm-test-runner.js +59 -15
- package/dist/collection/components/llm-test-runner/llm-test-runner.js.map +1 -1
- package/dist/collection/components/llm-test-runner/test-cases/chat-history.css +101 -0
- package/dist/collection/components/llm-test-runner/test-cases/chat-history.js +105 -0
- package/dist/collection/components/llm-test-runner/test-cases/chat-history.js.map +1 -0
- package/dist/collection/components/llm-test-runner/test-cases/expected-outcome-renderer.js +45 -5
- package/dist/collection/components/llm-test-runner/test-cases/expected-outcome-renderer.js.map +1 -1
- package/dist/collection/components/llm-test-runner/test-cases/llm-test-case-row.css +21 -0
- package/dist/collection/components/llm-test-runner/test-cases/llm-test-case-row.js +12 -2
- package/dist/collection/components/llm-test-runner/test-cases/llm-test-case-row.js.map +1 -1
- package/dist/collection/components/llm-test-runner/test-cases/llm-test-cases.js +2 -2
- package/dist/collection/components/llm-test-runner/test-cases/llm-test-cases.js.map +1 -1
- package/dist/collection/components/llm-test-runner/test-cases/output/response-output.js +1 -1
- package/dist/collection/components/llm-test-runner/test-cases/output/response-output.js.map +1 -1
- package/dist/collection/index.js.map +1 -1
- package/dist/collection/lib/evaluation/actual-value-resolver.js +52 -0
- package/dist/collection/lib/evaluation/actual-value-resolver.js.map +1 -0
- package/dist/collection/lib/evaluation/evaluation-engine.js +1 -1
- package/dist/collection/lib/evaluation/evaluation-engine.js.map +1 -1
- package/dist/collection/lib/evaluation/evaluation-service.js +55 -17
- package/dist/collection/lib/evaluation/evaluation-service.js.map +1 -1
- package/dist/collection/lib/evaluation/types.js.map +1 -1
- package/dist/collection/lib/form/components/app-chips.js +1 -1
- package/dist/collection/lib/form/components/app-select.js +1 -1
- package/dist/collection/lib/form/components/app-textarea.js +2 -2
- package/dist/collection/lib/import-export/test-suite-exporter.js +4 -0
- package/dist/collection/lib/import-export/test-suite-exporter.js.map +1 -1
- package/dist/collection/lib/import-export/test-suite-importer.js +7 -1
- package/dist/collection/lib/import-export/test-suite-importer.js.map +1 -1
- package/dist/collection/lib/test-cases/test-case-factory.js +7 -0
- package/dist/collection/lib/test-cases/test-case-factory.js.map +1 -1
- package/dist/collection/lib/test-cases/test-case-mutations.js +58 -23
- package/dist/collection/lib/test-cases/test-case-mutations.js.map +1 -1
- package/dist/collection/schemas/expected-outcome.js +39 -0
- package/dist/collection/schemas/expected-outcome.js.map +1 -1
- package/dist/collection/schemas/model-response.js +7 -0
- package/dist/collection/schemas/model-response.js.map +1 -0
- package/dist/collection/schemas/test-case.js +8 -1
- package/dist/collection/schemas/test-case.js.map +1 -1
- package/dist/collection/types/expected-outcome.js.map +1 -1
- package/dist/collection/types/llm-test-runner.js.map +1 -1
- package/dist/collection/types/test-case.js.map +1 -1
- package/dist/components/app-chips.js +1 -1
- package/dist/components/app-select.js +1 -1
- package/dist/components/app-textarea.js +1 -1
- package/dist/components/chat-history.d.ts +11 -0
- package/dist/components/chat-history.js +2 -0
- package/dist/components/chat-history.js.map +1 -0
- package/dist/components/index.js +1 -1
- package/dist/components/llm-test-runner.js +1 -1
- package/dist/components/{p-CVtKFBJl.js → p-D2qDAxFN.js} +2 -2
- package/dist/components/{p-Dv7cB5FU.js → p-D4dHUFN9.js} +2 -2
- package/dist/components/{p-CE5-1jfZ.js → p-eN2dLrsr.js} +2 -2
- package/dist/components/p-kmtfMXcQ.js +2 -0
- package/dist/components/p-kmtfMXcQ.js.map +1 -0
- package/dist/components/{p-BcygfrMf.js → p-wzA48RFK.js} +3 -3
- package/dist/components/p-wzA48RFK.js.map +1 -0
- package/dist/esm/{app-chips_4.entry.js → app-chips_5.entry.js} +38 -7
- package/dist/esm/app-chips_5.entry.js.map +1 -0
- package/dist/esm/index.js +499 -68
- package/dist/esm/index.js.map +1 -1
- package/dist/esm/llm-testrunner.js +1 -1
- package/dist/esm/loader.js +1 -1
- package/dist/llm-testrunner/index.esm.js +2 -2
- package/dist/llm-testrunner/index.esm.js.map +1 -1
- package/dist/llm-testrunner/llm-testrunner.esm.js +1 -1
- package/dist/llm-testrunner/p-5bf1fc78.entry.js +2 -0
- package/dist/llm-testrunner/p-5bf1fc78.entry.js.map +1 -0
- package/dist/react/components.d.ts +6 -1
- package/dist/react/components.d.ts.map +1 -1
- package/dist/react/components.js +9 -0
- package/dist/types/components/llm-test-runner/llm-test-runner.d.ts +4 -4
- package/dist/types/components/llm-test-runner/test-cases/chat-history.d.ts +14 -0
- package/dist/types/components/llm-test-runner/test-cases/expected-outcome-renderer.d.ts +1 -0
- package/dist/types/components/llm-test-runner/test-cases/llm-test-case-row.d.ts +6 -0
- package/dist/types/components/llm-test-runner/test-cases/llm-test-cases.d.ts +3 -0
- package/dist/types/components/llm-test-runner/test-cases/output/response-output.d.ts +2 -1
- package/dist/types/components.d.ts +55 -2
- package/dist/types/index.d.ts +1 -1
- package/dist/types/lib/evaluation/actual-value-resolver.d.ts +9 -0
- package/dist/types/lib/evaluation/evaluation-service.d.ts +2 -2
- package/dist/types/lib/evaluation/types.d.ts +1 -1
- package/dist/types/lib/import-export/test-suite-exporter.d.ts +4 -0
- package/dist/types/lib/import-export/test-suite-importer.d.ts +1 -1
- package/dist/types/lib/test-cases/test-case-mutations.d.ts +10 -1
- package/dist/types/schemas/expected-outcome.d.ts +116 -0
- package/dist/types/schemas/model-response.d.ts +7 -0
- package/dist/types/schemas/test-case.d.ts +93 -1
- package/dist/types/types/expected-outcome.d.ts +1 -1
- package/dist/types/types/llm-test-runner.d.ts +6 -3
- package/dist/types/types/test-case.d.ts +1 -1
- package/package.json +1 -1
- package/dist/cjs/app-chips_4.cjs.entry.js.map +0 -1
- package/dist/components/p-BcygfrMf.js.map +0 -1
- package/dist/esm/app-chips_4.entry.js.map +0 -1
- package/dist/llm-testrunner/p-5df053b4.entry.js +0 -2
- package/dist/llm-testrunner/p-5df053b4.entry.js.map +0 -1
- /package/dist/components/{p-CVtKFBJl.js.map → p-D2qDAxFN.js.map} +0 -0
- /package/dist/components/{p-Dv7cB5FU.js.map → p-D4dHUFN9.js.map} +0 -0
- /package/dist/components/{p-CE5-1jfZ.js.map → p-eN2dLrsr.js.map} +0 -0
package/dist/cjs/index.cjs.js
CHANGED
|
@@ -106,6 +106,10 @@ function formatTestSuiteAsJson(testCases) {
|
|
|
106
106
|
id: testCase.id,
|
|
107
107
|
question: testCase.question,
|
|
108
108
|
expectedOutcome: testCase.expectedOutcome,
|
|
109
|
+
chatHistory: {
|
|
110
|
+
enabled: testCase.chatHistory.enabled,
|
|
111
|
+
value: testCase.chatHistory.value,
|
|
112
|
+
},
|
|
109
113
|
}));
|
|
110
114
|
return JSON.stringify(exportData, null, 2);
|
|
111
115
|
}
|
|
@@ -291,6 +295,7 @@ const DEFAULT_EXPECTED_OUTCOME_SCHEMA = [
|
|
|
291
295
|
function normalizeExpectedOutcomeField(field) {
|
|
292
296
|
return {
|
|
293
297
|
...field,
|
|
298
|
+
evaluationSource: field.evaluationSource || { type: 'text' },
|
|
294
299
|
evaluationParameters: normalizeEvaluationParametersForField(field.type, field.evaluationParameters),
|
|
295
300
|
};
|
|
296
301
|
}
|
|
@@ -303,6 +308,7 @@ function createTestCase(expectedOutcomeSchema = DEFAULT_EXPECTED_OUTCOME_SCHEMA)
|
|
|
303
308
|
id: v4(),
|
|
304
309
|
question: '',
|
|
305
310
|
expectedOutcome: createExpectedOutcomeFromSchema(expectedOutcomeSchema),
|
|
311
|
+
chatHistory: { enabled: false, value: '' },
|
|
306
312
|
isRunning: false,
|
|
307
313
|
};
|
|
308
314
|
}
|
|
@@ -313,6 +319,7 @@ function createExpectedOutcomeFieldFromSchema(schemaField) {
|
|
|
313
319
|
type: 'text',
|
|
314
320
|
label: schemaField.label,
|
|
315
321
|
placeholder: schemaField.placeholder,
|
|
322
|
+
evaluationSource: schemaField.evaluationSource || { type: 'text' },
|
|
316
323
|
value: '',
|
|
317
324
|
evaluationParameters: normalizeEvaluationParametersForField(schemaField.type, schemaField.evaluationParameters),
|
|
318
325
|
};
|
|
@@ -321,6 +328,7 @@ function createExpectedOutcomeFieldFromSchema(schemaField) {
|
|
|
321
328
|
type: 'textarea',
|
|
322
329
|
label: schemaField.label,
|
|
323
330
|
placeholder: schemaField.placeholder,
|
|
331
|
+
evaluationSource: schemaField.evaluationSource || { type: 'text' },
|
|
324
332
|
rows: schemaField.rows,
|
|
325
333
|
value: '',
|
|
326
334
|
evaluationParameters: normalizeEvaluationParametersForField(schemaField.type, schemaField.evaluationParameters),
|
|
@@ -330,6 +338,7 @@ function createExpectedOutcomeFieldFromSchema(schemaField) {
|
|
|
330
338
|
type: 'chips-input',
|
|
331
339
|
label: schemaField.label,
|
|
332
340
|
placeholder: schemaField.placeholder,
|
|
341
|
+
evaluationSource: schemaField.evaluationSource || { type: 'text' },
|
|
333
342
|
value: [],
|
|
334
343
|
evaluationParameters: normalizeEvaluationParametersForField(schemaField.type, schemaField.evaluationParameters),
|
|
335
344
|
};
|
|
@@ -338,6 +347,7 @@ function createExpectedOutcomeFieldFromSchema(schemaField) {
|
|
|
338
347
|
type: 'select',
|
|
339
348
|
label: schemaField.label,
|
|
340
349
|
placeholder: schemaField.placeholder,
|
|
350
|
+
evaluationSource: schemaField.evaluationSource || { type: 'text' },
|
|
341
351
|
value: schemaField.options[0],
|
|
342
352
|
options: schemaField.options,
|
|
343
353
|
evaluationParameters: normalizeEvaluationParametersForField(schemaField.type, schemaField.evaluationParameters),
|
|
@@ -362,6 +372,7 @@ function createExpectedOutcomeFromSchema(expectedOutcomeSchema) {
|
|
|
362
372
|
function createTestCaseFromInput(data) {
|
|
363
373
|
return {
|
|
364
374
|
...data,
|
|
375
|
+
chatHistory: data.chatHistory ?? { enabled: false, value: '' },
|
|
365
376
|
expectedOutcome: data.expectedOutcome.map(normalizeExpectedOutcomeField),
|
|
366
377
|
};
|
|
367
378
|
}
|
|
@@ -2566,6 +2577,122 @@ function handleIntersectionResults(result, left, right) {
|
|
|
2566
2577
|
result.value = merged.data;
|
|
2567
2578
|
return result;
|
|
2568
2579
|
}
|
|
2580
|
+
const $ZodRecord = /*@__PURE__*/ $constructor("$ZodRecord", (inst, def) => {
|
|
2581
|
+
$ZodType.init(inst, def);
|
|
2582
|
+
inst._zod.parse = (payload, ctx) => {
|
|
2583
|
+
const input = payload.value;
|
|
2584
|
+
if (!isPlainObject(input)) {
|
|
2585
|
+
payload.issues.push({
|
|
2586
|
+
expected: "record",
|
|
2587
|
+
code: "invalid_type",
|
|
2588
|
+
input,
|
|
2589
|
+
inst,
|
|
2590
|
+
});
|
|
2591
|
+
return payload;
|
|
2592
|
+
}
|
|
2593
|
+
const proms = [];
|
|
2594
|
+
const values = def.keyType._zod.values;
|
|
2595
|
+
if (values) {
|
|
2596
|
+
payload.value = {};
|
|
2597
|
+
const recordKeys = new Set();
|
|
2598
|
+
for (const key of values) {
|
|
2599
|
+
if (typeof key === "string" || typeof key === "number" || typeof key === "symbol") {
|
|
2600
|
+
recordKeys.add(typeof key === "number" ? key.toString() : key);
|
|
2601
|
+
const result = def.valueType._zod.run({ value: input[key], issues: [] }, ctx);
|
|
2602
|
+
if (result instanceof Promise) {
|
|
2603
|
+
proms.push(result.then((result) => {
|
|
2604
|
+
if (result.issues.length) {
|
|
2605
|
+
payload.issues.push(...prefixIssues(key, result.issues));
|
|
2606
|
+
}
|
|
2607
|
+
payload.value[key] = result.value;
|
|
2608
|
+
}));
|
|
2609
|
+
}
|
|
2610
|
+
else {
|
|
2611
|
+
if (result.issues.length) {
|
|
2612
|
+
payload.issues.push(...prefixIssues(key, result.issues));
|
|
2613
|
+
}
|
|
2614
|
+
payload.value[key] = result.value;
|
|
2615
|
+
}
|
|
2616
|
+
}
|
|
2617
|
+
}
|
|
2618
|
+
let unrecognized;
|
|
2619
|
+
for (const key in input) {
|
|
2620
|
+
if (!recordKeys.has(key)) {
|
|
2621
|
+
unrecognized = unrecognized ?? [];
|
|
2622
|
+
unrecognized.push(key);
|
|
2623
|
+
}
|
|
2624
|
+
}
|
|
2625
|
+
if (unrecognized && unrecognized.length > 0) {
|
|
2626
|
+
payload.issues.push({
|
|
2627
|
+
code: "unrecognized_keys",
|
|
2628
|
+
input,
|
|
2629
|
+
inst,
|
|
2630
|
+
keys: unrecognized,
|
|
2631
|
+
});
|
|
2632
|
+
}
|
|
2633
|
+
}
|
|
2634
|
+
else {
|
|
2635
|
+
payload.value = {};
|
|
2636
|
+
for (const key of Reflect.ownKeys(input)) {
|
|
2637
|
+
if (key === "__proto__")
|
|
2638
|
+
continue;
|
|
2639
|
+
let keyResult = def.keyType._zod.run({ value: key, issues: [] }, ctx);
|
|
2640
|
+
if (keyResult instanceof Promise) {
|
|
2641
|
+
throw new Error("Async schemas not supported in object keys currently");
|
|
2642
|
+
}
|
|
2643
|
+
// Numeric string fallback: if key is a numeric string and failed, retry with Number(key)
|
|
2644
|
+
// This handles z.number(), z.literal([1, 2, 3]), and unions containing numeric literals
|
|
2645
|
+
const checkNumericKey = typeof key === "string" && number$1.test(key) && keyResult.issues.length;
|
|
2646
|
+
if (checkNumericKey) {
|
|
2647
|
+
const retryResult = def.keyType._zod.run({ value: Number(key), issues: [] }, ctx);
|
|
2648
|
+
if (retryResult instanceof Promise) {
|
|
2649
|
+
throw new Error("Async schemas not supported in object keys currently");
|
|
2650
|
+
}
|
|
2651
|
+
if (retryResult.issues.length === 0) {
|
|
2652
|
+
keyResult = retryResult;
|
|
2653
|
+
}
|
|
2654
|
+
}
|
|
2655
|
+
if (keyResult.issues.length) {
|
|
2656
|
+
if (def.mode === "loose") {
|
|
2657
|
+
// Pass through unchanged
|
|
2658
|
+
payload.value[key] = input[key];
|
|
2659
|
+
}
|
|
2660
|
+
else {
|
|
2661
|
+
// Default "strict" behavior: error on invalid key
|
|
2662
|
+
payload.issues.push({
|
|
2663
|
+
code: "invalid_key",
|
|
2664
|
+
origin: "record",
|
|
2665
|
+
issues: keyResult.issues.map((iss) => finalizeIssue(iss, ctx, config())),
|
|
2666
|
+
input: key,
|
|
2667
|
+
path: [key],
|
|
2668
|
+
inst,
|
|
2669
|
+
});
|
|
2670
|
+
}
|
|
2671
|
+
continue;
|
|
2672
|
+
}
|
|
2673
|
+
const result = def.valueType._zod.run({ value: input[key], issues: [] }, ctx);
|
|
2674
|
+
if (result instanceof Promise) {
|
|
2675
|
+
proms.push(result.then((result) => {
|
|
2676
|
+
if (result.issues.length) {
|
|
2677
|
+
payload.issues.push(...prefixIssues(key, result.issues));
|
|
2678
|
+
}
|
|
2679
|
+
payload.value[keyResult.value] = result.value;
|
|
2680
|
+
}));
|
|
2681
|
+
}
|
|
2682
|
+
else {
|
|
2683
|
+
if (result.issues.length) {
|
|
2684
|
+
payload.issues.push(...prefixIssues(key, result.issues));
|
|
2685
|
+
}
|
|
2686
|
+
payload.value[keyResult.value] = result.value;
|
|
2687
|
+
}
|
|
2688
|
+
}
|
|
2689
|
+
}
|
|
2690
|
+
if (proms.length) {
|
|
2691
|
+
return Promise.all(proms).then(() => payload);
|
|
2692
|
+
}
|
|
2693
|
+
return payload;
|
|
2694
|
+
};
|
|
2695
|
+
});
|
|
2569
2696
|
const $ZodEnum = /*@__PURE__*/ $constructor("$ZodEnum", (inst, def) => {
|
|
2570
2697
|
$ZodType.init(inst, def);
|
|
2571
2698
|
const values = getEnumValues(def.entries);
|
|
@@ -4149,6 +4276,49 @@ const intersectionProcessor = (schema, ctx, json, params) => {
|
|
|
4149
4276
|
];
|
|
4150
4277
|
json.allOf = allOf;
|
|
4151
4278
|
};
|
|
4279
|
+
const recordProcessor = (schema, ctx, _json, params) => {
|
|
4280
|
+
const json = _json;
|
|
4281
|
+
const def = schema._zod.def;
|
|
4282
|
+
json.type = "object";
|
|
4283
|
+
// For looseRecord with regex patterns, use patternProperties
|
|
4284
|
+
// This correctly represents "only validate keys matching the pattern" semantics
|
|
4285
|
+
// and composes well with allOf (intersections)
|
|
4286
|
+
const keyType = def.keyType;
|
|
4287
|
+
const keyBag = keyType._zod.bag;
|
|
4288
|
+
const patterns = keyBag?.patterns;
|
|
4289
|
+
if (def.mode === "loose" && patterns && patterns.size > 0) {
|
|
4290
|
+
// Use patternProperties for looseRecord with regex patterns
|
|
4291
|
+
const valueSchema = process$1(def.valueType, ctx, {
|
|
4292
|
+
...params,
|
|
4293
|
+
path: [...params.path, "patternProperties", "*"],
|
|
4294
|
+
});
|
|
4295
|
+
json.patternProperties = {};
|
|
4296
|
+
for (const pattern of patterns) {
|
|
4297
|
+
json.patternProperties[pattern.source] = valueSchema;
|
|
4298
|
+
}
|
|
4299
|
+
}
|
|
4300
|
+
else {
|
|
4301
|
+
// Default behavior: use propertyNames + additionalProperties
|
|
4302
|
+
if (ctx.target === "draft-07" || ctx.target === "draft-2020-12") {
|
|
4303
|
+
json.propertyNames = process$1(def.keyType, ctx, {
|
|
4304
|
+
...params,
|
|
4305
|
+
path: [...params.path, "propertyNames"],
|
|
4306
|
+
});
|
|
4307
|
+
}
|
|
4308
|
+
json.additionalProperties = process$1(def.valueType, ctx, {
|
|
4309
|
+
...params,
|
|
4310
|
+
path: [...params.path, "additionalProperties"],
|
|
4311
|
+
});
|
|
4312
|
+
}
|
|
4313
|
+
// Add required for keys with discrete values (enum, literal, etc.)
|
|
4314
|
+
const keyValues = keyType._zod.values;
|
|
4315
|
+
if (keyValues) {
|
|
4316
|
+
const validKeyValues = [...keyValues].filter((v) => typeof v === "string" || typeof v === "number");
|
|
4317
|
+
if (validKeyValues.length > 0) {
|
|
4318
|
+
json.required = validKeyValues;
|
|
4319
|
+
}
|
|
4320
|
+
}
|
|
4321
|
+
};
|
|
4152
4322
|
const nullableProcessor = (schema, ctx, json, params) => {
|
|
4153
4323
|
const def = schema._zod.def;
|
|
4154
4324
|
const inner = process$1(def.innerType, ctx, params);
|
|
@@ -4703,6 +4873,21 @@ function intersection(left, right) {
|
|
|
4703
4873
|
right: right,
|
|
4704
4874
|
});
|
|
4705
4875
|
}
|
|
4876
|
+
const ZodRecord = /*@__PURE__*/ $constructor("ZodRecord", (inst, def) => {
|
|
4877
|
+
$ZodRecord.init(inst, def);
|
|
4878
|
+
ZodType.init(inst, def);
|
|
4879
|
+
inst._zod.processJSONSchema = (ctx, json, params) => recordProcessor(inst, ctx, json, params);
|
|
4880
|
+
inst.keyType = def.keyType;
|
|
4881
|
+
inst.valueType = def.valueType;
|
|
4882
|
+
});
|
|
4883
|
+
function record(keyType, valueType, params) {
|
|
4884
|
+
return new ZodRecord({
|
|
4885
|
+
type: "record",
|
|
4886
|
+
keyType,
|
|
4887
|
+
valueType: valueType,
|
|
4888
|
+
...normalizeParams(params),
|
|
4889
|
+
});
|
|
4890
|
+
}
|
|
4706
4891
|
const ZodEnum = /*@__PURE__*/ $constructor("ZodEnum", (inst, def) => {
|
|
4707
4892
|
$ZodEnum.init(inst, def);
|
|
4708
4893
|
ZodType.init(inst, def);
|
|
@@ -4940,7 +5125,7 @@ const ZodCustom = /*@__PURE__*/ $constructor("ZodCustom", (inst, def) => {
|
|
|
4940
5125
|
inst._zod.processJSONSchema = (ctx, json, params) => customProcessor(inst, ctx);
|
|
4941
5126
|
});
|
|
4942
5127
|
function custom(fn, _params) {
|
|
4943
|
-
return _custom(ZodCustom, (() => true), _params);
|
|
5128
|
+
return _custom(ZodCustom, fn ?? (() => true), _params);
|
|
4944
5129
|
}
|
|
4945
5130
|
function refine(fn, _params = {}) {
|
|
4946
5131
|
return _refine(ZodCustom, fn, _params);
|
|
@@ -4955,6 +5140,19 @@ const optionalPositiveInt = number().int().positive().optional();
|
|
|
4955
5140
|
const optionalString = string().optional();
|
|
4956
5141
|
const selectOptionsSchema = array(nonEmptyString).min(1);
|
|
4957
5142
|
const optionalNumber = number().optional();
|
|
5143
|
+
const textEvaluationSourceSchema = object({
|
|
5144
|
+
type: literal('text'),
|
|
5145
|
+
});
|
|
5146
|
+
const customEvaluationSourceSchema = object({
|
|
5147
|
+
type: literal('custom'),
|
|
5148
|
+
extractorId: nonEmptyString,
|
|
5149
|
+
});
|
|
5150
|
+
const evaluationSourceExtractorSchema = custom(value => typeof value === 'function', 'Extractor must be a function.');
|
|
5151
|
+
record(string().min(1), evaluationSourceExtractorSchema);
|
|
5152
|
+
const evaluationSourceSchema = discriminatedUnion('type', [
|
|
5153
|
+
textEvaluationSourceSchema,
|
|
5154
|
+
customEvaluationSourceSchema,
|
|
5155
|
+
]);
|
|
4958
5156
|
const expectedOutcomeModeSchema = _enum(['static', 'dynamic']);
|
|
4959
5157
|
const evaluationParametersSchema = object({
|
|
4960
5158
|
approach: _enum(EvaluationApproach),
|
|
@@ -4972,6 +5170,7 @@ const selectEvaluationParametersSchema = evaluationParametersSchema.superRefine(
|
|
|
4972
5170
|
const defaultExpectedOutcomeBaseSchema = object({
|
|
4973
5171
|
label: nonEmptyString,
|
|
4974
5172
|
placeholder: optionalString,
|
|
5173
|
+
evaluationSource: evaluationSourceSchema.optional(),
|
|
4975
5174
|
});
|
|
4976
5175
|
const createDefaultExpectedOutcomeFieldSchemas = (baseSchema) => ({
|
|
4977
5176
|
text: baseSchema.extend({
|
|
@@ -5064,18 +5263,55 @@ function validateExpectedOutcomeSchema(schema) {
|
|
|
5064
5263
|
throw new Error(`Invalid expectedOutcomeSchema: ${parsed.error.issues[0].message}`);
|
|
5065
5264
|
}
|
|
5066
5265
|
}
|
|
5266
|
+
function validateExpectedOutcomeArrayWithExtractors(expectedOutcome, allowedExtractorIds) {
|
|
5267
|
+
const allowed = new Set(allowedExtractorIds);
|
|
5268
|
+
const schema = expectedOutcomeArraySchema.superRefine((fields, ctx) => {
|
|
5269
|
+
fields.forEach((field, index) => {
|
|
5270
|
+
if (field.evaluationSource?.type !== 'custom') {
|
|
5271
|
+
return;
|
|
5272
|
+
}
|
|
5273
|
+
if (allowed.has(field.evaluationSource.extractorId)) {
|
|
5274
|
+
return;
|
|
5275
|
+
}
|
|
5276
|
+
ctx.addIssue({
|
|
5277
|
+
code: 'custom',
|
|
5278
|
+
path: [index, 'evaluationSource', 'extractorId'],
|
|
5279
|
+
message: `Invalid expectedOutcome: Extractor "${field.evaluationSource.extractorId}" is not registered.`,
|
|
5280
|
+
});
|
|
5281
|
+
});
|
|
5282
|
+
});
|
|
5283
|
+
const parsed = schema.safeParse(expectedOutcome);
|
|
5284
|
+
if (!parsed.success) {
|
|
5285
|
+
throw new Error(parsed.error.issues[0].message);
|
|
5286
|
+
}
|
|
5287
|
+
}
|
|
5288
|
+
function getExtractorIds(extractors) {
|
|
5289
|
+
return Object.keys(extractors || {});
|
|
5290
|
+
}
|
|
5067
5291
|
|
|
5292
|
+
const modelResponseMetadataSchema = record(string(), unknown());
|
|
5293
|
+
const modelResponsePayloadSchema = object({
|
|
5294
|
+
text: string().optional(),
|
|
5295
|
+
metadata: modelResponseMetadataSchema.optional(),
|
|
5296
|
+
});
|
|
5297
|
+
|
|
5298
|
+
const testCaseChatHistorySchema = object({
|
|
5299
|
+
enabled: boolean(),
|
|
5300
|
+
value: string(),
|
|
5301
|
+
});
|
|
5068
5302
|
const testCaseInputSchema = object({
|
|
5069
5303
|
id: string(),
|
|
5070
5304
|
question: string(),
|
|
5071
5305
|
expectedOutcome: expectedOutcomeArraySchema,
|
|
5306
|
+
chatHistory: testCaseChatHistorySchema.optional(),
|
|
5072
5307
|
});
|
|
5073
5308
|
const testCaseInputArraySchema = array(testCaseInputSchema);
|
|
5074
5309
|
object({
|
|
5075
5310
|
id: string(),
|
|
5076
5311
|
question: string(),
|
|
5077
5312
|
expectedOutcome: expectedOutcomeArraySchema,
|
|
5078
|
-
output:
|
|
5313
|
+
output: modelResponsePayloadSchema.optional(),
|
|
5314
|
+
chatHistory: testCaseChatHistorySchema,
|
|
5079
5315
|
isRunning: boolean().optional(),
|
|
5080
5316
|
error: string().optional(),
|
|
5081
5317
|
evaluationResult: custom().optional(),
|
|
@@ -5097,10 +5333,15 @@ function validateTestCaseInputArray(data) {
|
|
|
5097
5333
|
* @param jsonContent - The JSON string to parse and validate
|
|
5098
5334
|
* @returns Validation result with test cases or error message
|
|
5099
5335
|
*/
|
|
5100
|
-
function importTestSuite(jsonContent) {
|
|
5336
|
+
function importTestSuite(jsonContent, allowedExtractorIds = []) {
|
|
5101
5337
|
try {
|
|
5102
5338
|
const parsed = JSON.parse(jsonContent);
|
|
5103
5339
|
validateTestCaseInputArray(parsed);
|
|
5340
|
+
if (allowedExtractorIds.length > 0) {
|
|
5341
|
+
parsed.forEach((testCase) => {
|
|
5342
|
+
validateExpectedOutcomeArrayWithExtractors(testCase.expectedOutcome, allowedExtractorIds);
|
|
5343
|
+
});
|
|
5344
|
+
}
|
|
5104
5345
|
const testCases = parsed.map((item, index) => {
|
|
5105
5346
|
try {
|
|
5106
5347
|
return createTestCaseFromInput(item);
|
|
@@ -5126,7 +5367,7 @@ function importTestSuite(jsonContent) {
|
|
|
5126
5367
|
}
|
|
5127
5368
|
|
|
5128
5369
|
const MISSING_RESOLVER_MESSAGE = 'resolveExpectedOutcome is required when a test case has dynamic expected outcomes.';
|
|
5129
|
-
function isDynamicTextareaField(field) {
|
|
5370
|
+
function isDynamicTextareaField$1(field) {
|
|
5130
5371
|
return field.type === 'textarea' && field.outcomeMode === 'dynamic';
|
|
5131
5372
|
}
|
|
5132
5373
|
function applyResolvedDynamicValues(testCase, resolvedValues) {
|
|
@@ -5136,7 +5377,7 @@ function applyResolvedDynamicValues(testCase, resolvedValues) {
|
|
|
5136
5377
|
const expectedOutcome = [...(testCase.expectedOutcome || [])];
|
|
5137
5378
|
for (const resolved of resolvedValues) {
|
|
5138
5379
|
const field = expectedOutcome[resolved.index];
|
|
5139
|
-
if (!field || !isDynamicTextareaField(field)) {
|
|
5380
|
+
if (!field || !isDynamicTextareaField$1(field)) {
|
|
5140
5381
|
continue;
|
|
5141
5382
|
}
|
|
5142
5383
|
expectedOutcome[resolved.index] = {
|
|
@@ -5151,7 +5392,7 @@ function applyResolvedDynamicValues(testCase, resolvedValues) {
|
|
|
5151
5392
|
}
|
|
5152
5393
|
async function resolveDynamicExpectedOutcomes(testCase, resolver) {
|
|
5153
5394
|
const dynamicFields = (testCase.expectedOutcome || []).flatMap((field, index) => {
|
|
5154
|
-
if (!isDynamicTextareaField(field)) {
|
|
5395
|
+
if (!isDynamicTextareaField$1(field)) {
|
|
5155
5396
|
return [];
|
|
5156
5397
|
}
|
|
5157
5398
|
return [{ field, index }];
|
|
@@ -5169,6 +5410,15 @@ async function resolveDynamicExpectedOutcomes(testCase, resolver) {
|
|
|
5169
5410
|
return applyResolvedDynamicValues(testCase, resolvedValues);
|
|
5170
5411
|
}
|
|
5171
5412
|
|
|
5413
|
+
function isChipsInputField(field) {
|
|
5414
|
+
return field.type === 'chips-input';
|
|
5415
|
+
}
|
|
5416
|
+
function isTextareaField(field) {
|
|
5417
|
+
return field.type === 'textarea';
|
|
5418
|
+
}
|
|
5419
|
+
function isDynamicTextareaField(field) {
|
|
5420
|
+
return isTextareaField(field) && field.outcomeMode === 'dynamic';
|
|
5421
|
+
}
|
|
5172
5422
|
function applyExpectedOutcomeChange(testCase, change) {
|
|
5173
5423
|
const { index } = change;
|
|
5174
5424
|
const expectedOutcome = [...(testCase.expectedOutcome || [])];
|
|
@@ -5176,73 +5426,99 @@ function applyExpectedOutcomeChange(testCase, change) {
|
|
|
5176
5426
|
if (!target) {
|
|
5177
5427
|
return testCase;
|
|
5178
5428
|
}
|
|
5429
|
+
const commit = (updatedField) => {
|
|
5430
|
+
expectedOutcome[index] = updatedField;
|
|
5431
|
+
return { ...testCase, expectedOutcome };
|
|
5432
|
+
};
|
|
5179
5433
|
switch (change.operation) {
|
|
5180
5434
|
case 'set-value': {
|
|
5181
|
-
if (target
|
|
5435
|
+
if (isChipsInputField(target)) {
|
|
5182
5436
|
return testCase;
|
|
5183
5437
|
}
|
|
5184
|
-
if (target
|
|
5438
|
+
if (isDynamicTextareaField(target)) {
|
|
5185
5439
|
return testCase;
|
|
5186
5440
|
}
|
|
5187
|
-
|
|
5441
|
+
return commit({
|
|
5188
5442
|
...target,
|
|
5189
5443
|
value: change.value,
|
|
5190
|
-
};
|
|
5191
|
-
return { ...testCase, expectedOutcome };
|
|
5444
|
+
});
|
|
5192
5445
|
}
|
|
5193
5446
|
case 'add-chip': {
|
|
5194
|
-
if (target
|
|
5447
|
+
if (!isChipsInputField(target)) {
|
|
5195
5448
|
return testCase;
|
|
5196
5449
|
}
|
|
5197
|
-
|
|
5450
|
+
return commit({
|
|
5198
5451
|
...target,
|
|
5199
5452
|
value: [...target.value, change.value],
|
|
5200
|
-
};
|
|
5201
|
-
return { ...testCase, expectedOutcome };
|
|
5453
|
+
});
|
|
5202
5454
|
}
|
|
5203
5455
|
case 'remove-chip': {
|
|
5204
|
-
if (target
|
|
5456
|
+
if (!isChipsInputField(target)) {
|
|
5205
5457
|
return testCase;
|
|
5206
5458
|
}
|
|
5207
|
-
|
|
5459
|
+
return commit({
|
|
5208
5460
|
...target,
|
|
5209
5461
|
value: target.value.filter(chip => chip !== change.value),
|
|
5210
|
-
};
|
|
5211
|
-
return { ...testCase, expectedOutcome };
|
|
5462
|
+
});
|
|
5212
5463
|
}
|
|
5213
5464
|
case 'set-evaluation-approach':
|
|
5214
5465
|
return updateExpectedOutcomeFieldApproach(testCase, index, change.value);
|
|
5215
5466
|
case 'set-outcome-mode': {
|
|
5216
|
-
if (target
|
|
5467
|
+
if (!isTextareaField(target)) {
|
|
5217
5468
|
return testCase;
|
|
5218
5469
|
}
|
|
5219
5470
|
const mode = change.value;
|
|
5220
5471
|
if (mode === 'static') {
|
|
5221
5472
|
const { resolutionQuery: _, ...rest } = target;
|
|
5222
|
-
|
|
5473
|
+
return commit({
|
|
5223
5474
|
...rest,
|
|
5224
5475
|
outcomeMode: 'static',
|
|
5225
5476
|
value: '',
|
|
5226
|
-
};
|
|
5477
|
+
});
|
|
5227
5478
|
}
|
|
5228
5479
|
else {
|
|
5229
|
-
|
|
5480
|
+
return commit({
|
|
5230
5481
|
...target,
|
|
5231
5482
|
outcomeMode: 'dynamic',
|
|
5232
5483
|
value: '',
|
|
5233
|
-
};
|
|
5484
|
+
});
|
|
5234
5485
|
}
|
|
5235
|
-
return { ...testCase, expectedOutcome };
|
|
5236
5486
|
}
|
|
5237
5487
|
case 'set-resolution-query': {
|
|
5238
|
-
if (target
|
|
5488
|
+
if (!isDynamicTextareaField(target)) {
|
|
5239
5489
|
return testCase;
|
|
5240
5490
|
}
|
|
5241
|
-
|
|
5491
|
+
return commit({
|
|
5242
5492
|
...target,
|
|
5243
5493
|
resolutionQuery: change.value,
|
|
5244
|
-
};
|
|
5245
|
-
|
|
5494
|
+
});
|
|
5495
|
+
}
|
|
5496
|
+
case 'set-evaluation-source-type': {
|
|
5497
|
+
if (change.value === 'text') {
|
|
5498
|
+
return commit({
|
|
5499
|
+
...target,
|
|
5500
|
+
evaluationSource: { type: 'text' },
|
|
5501
|
+
});
|
|
5502
|
+
}
|
|
5503
|
+
const extractorId = target.evaluationSource?.type === 'custom'
|
|
5504
|
+
? target.evaluationSource.extractorId
|
|
5505
|
+
: (change.fallbackExtractorId ?? '');
|
|
5506
|
+
return commit({
|
|
5507
|
+
...target,
|
|
5508
|
+
evaluationSource: {
|
|
5509
|
+
type: 'custom',
|
|
5510
|
+
extractorId,
|
|
5511
|
+
},
|
|
5512
|
+
});
|
|
5513
|
+
}
|
|
5514
|
+
case 'set-evaluation-source-extractor': {
|
|
5515
|
+
return commit({
|
|
5516
|
+
...target,
|
|
5517
|
+
evaluationSource: {
|
|
5518
|
+
type: 'custom',
|
|
5519
|
+
extractorId: change.value,
|
|
5520
|
+
},
|
|
5521
|
+
});
|
|
5246
5522
|
}
|
|
5247
5523
|
}
|
|
5248
5524
|
}
|
|
@@ -30023,7 +30299,7 @@ class LLMEvaluationEngine {
|
|
|
30023
30299
|
const fieldRequest = {
|
|
30024
30300
|
testCaseId: request.testCaseId,
|
|
30025
30301
|
question: request.question,
|
|
30026
|
-
actualResponse:
|
|
30302
|
+
actualResponse: field.actualResponse,
|
|
30027
30303
|
expectedOutcome: field.expectedValue,
|
|
30028
30304
|
evaluationParameters: field.evaluationParameters,
|
|
30029
30305
|
};
|
|
@@ -30093,6 +30369,58 @@ class LLMEvaluationEngine {
|
|
|
30093
30369
|
}
|
|
30094
30370
|
}
|
|
30095
30371
|
|
|
30372
|
+
function toTextSource() {
|
|
30373
|
+
return { type: 'text' };
|
|
30374
|
+
}
|
|
30375
|
+
async function resolveActualValue(field, output, extractors) {
|
|
30376
|
+
const source = field.evaluationSource || toTextSource();
|
|
30377
|
+
if (source.type === 'text') {
|
|
30378
|
+
const text = output?.text?.trim();
|
|
30379
|
+
if (!text) {
|
|
30380
|
+
return {
|
|
30381
|
+
success: false,
|
|
30382
|
+
error: 'Model response text is empty.',
|
|
30383
|
+
};
|
|
30384
|
+
}
|
|
30385
|
+
return { success: true, value: text };
|
|
30386
|
+
}
|
|
30387
|
+
const extractor = extractors?.[source.extractorId];
|
|
30388
|
+
if (!extractor) {
|
|
30389
|
+
return {
|
|
30390
|
+
success: false,
|
|
30391
|
+
error: `Extractor "${source.extractorId}" is not registered.`,
|
|
30392
|
+
};
|
|
30393
|
+
}
|
|
30394
|
+
try {
|
|
30395
|
+
const extractedRaw = await extractor(output || {});
|
|
30396
|
+
if (typeof extractedRaw !== 'string') {
|
|
30397
|
+
return {
|
|
30398
|
+
success: false,
|
|
30399
|
+
error: `Extractor "${source.extractorId}" must return a string.`,
|
|
30400
|
+
};
|
|
30401
|
+
}
|
|
30402
|
+
const extracted = extractedRaw.trim();
|
|
30403
|
+
if (!extracted) {
|
|
30404
|
+
return {
|
|
30405
|
+
success: false,
|
|
30406
|
+
error: `Extractor "${source.extractorId}" returned an empty value.`,
|
|
30407
|
+
};
|
|
30408
|
+
}
|
|
30409
|
+
return {
|
|
30410
|
+
success: true,
|
|
30411
|
+
value: extracted,
|
|
30412
|
+
};
|
|
30413
|
+
}
|
|
30414
|
+
catch (error) {
|
|
30415
|
+
return {
|
|
30416
|
+
success: false,
|
|
30417
|
+
error: error instanceof Error
|
|
30418
|
+
? error.message
|
|
30419
|
+
: `Extractor "${source.extractorId}" failed.`,
|
|
30420
|
+
};
|
|
30421
|
+
}
|
|
30422
|
+
}
|
|
30423
|
+
|
|
30096
30424
|
/**
|
|
30097
30425
|
* Service for evaluating test case responses
|
|
30098
30426
|
*/
|
|
@@ -30106,34 +30434,71 @@ class EvaluationService {
|
|
|
30106
30434
|
* @param testCase - The test case to evaluate
|
|
30107
30435
|
* @param onResult - Callback to handle the evaluation result
|
|
30108
30436
|
*/
|
|
30109
|
-
async evaluateTestCase(testCase, onResult) {
|
|
30110
|
-
|
|
30111
|
-
|
|
30112
|
-
|
|
30113
|
-
}
|
|
30114
|
-
const fields = (testCase.expectedOutcome || []).flatMap((field, index) => {
|
|
30437
|
+
async evaluateTestCase(testCase, onResult, extractors) {
|
|
30438
|
+
const fields = [];
|
|
30439
|
+
const failedFields = [];
|
|
30440
|
+
for (const [index, field] of (testCase.expectedOutcome || []).entries()) {
|
|
30115
30441
|
if (field.type === 'textarea' && field.outcomeMode === 'dynamic') {
|
|
30116
|
-
|
|
30442
|
+
continue;
|
|
30117
30443
|
}
|
|
30118
|
-
|
|
30119
|
-
|
|
30444
|
+
const evaluationParameters = normalizeEvaluationParametersForField(field.type, field.evaluationParameters);
|
|
30445
|
+
const expectedValue = getFieldExpectedValue(field);
|
|
30446
|
+
const resolvedActualValue = await resolveActualValue(field, testCase.output, extractors);
|
|
30447
|
+
if (resolvedActualValue.success) {
|
|
30448
|
+
fields.push({
|
|
30120
30449
|
index,
|
|
30121
30450
|
label: field.label,
|
|
30122
30451
|
type: field.type,
|
|
30123
|
-
expectedValue
|
|
30124
|
-
|
|
30125
|
-
|
|
30126
|
-
|
|
30127
|
-
|
|
30452
|
+
expectedValue,
|
|
30453
|
+
actualResponse: resolvedActualValue.value,
|
|
30454
|
+
evaluationParameters,
|
|
30455
|
+
});
|
|
30456
|
+
}
|
|
30457
|
+
else {
|
|
30458
|
+
failedFields.push({
|
|
30459
|
+
index,
|
|
30460
|
+
label: field.label,
|
|
30461
|
+
type: field.type,
|
|
30462
|
+
expectedValue,
|
|
30463
|
+
passed: false,
|
|
30464
|
+
keywordMatches: [],
|
|
30465
|
+
evaluationParameters,
|
|
30466
|
+
evaluationApproachResult: {
|
|
30467
|
+
score: 0,
|
|
30468
|
+
approachUsed: evaluationParameters.approach,
|
|
30469
|
+
},
|
|
30470
|
+
error: 'error' in resolvedActualValue
|
|
30471
|
+
? resolvedActualValue.error
|
|
30472
|
+
: 'Failed to resolve actual value.',
|
|
30473
|
+
});
|
|
30474
|
+
}
|
|
30475
|
+
}
|
|
30476
|
+
if (fields.length === 0) {
|
|
30477
|
+
if (failedFields.length === 0) {
|
|
30478
|
+
console.warn('⚠️ No evaluable fields for test case:', testCase.id);
|
|
30479
|
+
return;
|
|
30480
|
+
}
|
|
30481
|
+
onResult({
|
|
30482
|
+
testCaseId: testCase.id,
|
|
30483
|
+
passed: false,
|
|
30484
|
+
keywordMatches: [],
|
|
30485
|
+
fieldResults: failedFields,
|
|
30486
|
+
timestamp: new Date().toISOString(),
|
|
30487
|
+
});
|
|
30488
|
+
return;
|
|
30489
|
+
}
|
|
30128
30490
|
const evaluationRequest = {
|
|
30129
30491
|
testCaseId: testCase.id,
|
|
30130
30492
|
question: testCase.question,
|
|
30131
|
-
actualResponse: testCase.output,
|
|
30132
30493
|
fields,
|
|
30133
30494
|
};
|
|
30134
30495
|
await this.engine.evaluateResponse(evaluationRequest, (result) => {
|
|
30135
|
-
|
|
30136
|
-
onResult(
|
|
30496
|
+
const combinedResults = [...(result.fieldResults || []), ...failedFields].sort((a, b) => a.index - b.index);
|
|
30497
|
+
onResult({
|
|
30498
|
+
...result,
|
|
30499
|
+
passed: combinedResults.every(field => field.passed && !field.error),
|
|
30500
|
+
fieldResults: combinedResults,
|
|
30501
|
+
});
|
|
30137
30502
|
});
|
|
30138
30503
|
}
|
|
30139
30504
|
}
|
|
@@ -30181,7 +30546,7 @@ const LLMTestRunnerHeader = ({ isExportingTestSuite, isExportingTestResults, isR
|
|
|
30181
30546
|
};
|
|
30182
30547
|
|
|
30183
30548
|
const ResponseOutput = ({ output, isRunning, }) => {
|
|
30184
|
-
return (index.h("div", { class: "response-output" }, output ? (index.h("div", { class: "response-output__content" }, output)) : (index.h("div", { class: "response-output__placeholder" }, isRunning ? 'Running...' : ''))));
|
|
30549
|
+
return (index.h("div", { class: "response-output" }, output?.text ? (index.h("div", { class: "response-output__content" }, output.text)) : (index.h("div", { class: "response-output__placeholder" }, isRunning ? 'Running...' : ''))));
|
|
30185
30550
|
};
|
|
30186
30551
|
|
|
30187
30552
|
const EvaluationSummary = ({ result, isRunning, }) => {
|
|
@@ -30219,7 +30584,9 @@ var FormFieldType;
|
|
|
30219
30584
|
FormFieldType["SELECT"] = "select";
|
|
30220
30585
|
})(FormFieldType || (FormFieldType = {}));
|
|
30221
30586
|
|
|
30222
|
-
const ExpectedOutcomeRenderer = ({ testCaseId, fields, dynamicResolutionSupported = false, onExpectedOutcomeChange, }) => {
|
|
30587
|
+
const ExpectedOutcomeRenderer = ({ testCaseId, fields, dynamicResolutionSupported = false, extractorIds = [], onExpectedOutcomeChange, }) => {
|
|
30588
|
+
const hasExtractorOptions = extractorIds.length > 0;
|
|
30589
|
+
const firstExtractorId = extractorIds[0];
|
|
30223
30590
|
const emit = (detail) => onExpectedOutcomeChange({
|
|
30224
30591
|
detail,
|
|
30225
30592
|
});
|
|
@@ -30249,6 +30616,23 @@ const ExpectedOutcomeRenderer = ({ testCaseId, fields, dynamicResolutionSupporte
|
|
|
30249
30616
|
required: false,
|
|
30250
30617
|
rows: 2,
|
|
30251
30618
|
});
|
|
30619
|
+
const buildEvaluationSourceConfig = (index) => ({
|
|
30620
|
+
name: `expectedOutcomeEvaluationSource-${index}`,
|
|
30621
|
+
fieldType: FormFieldType.SELECT,
|
|
30622
|
+
label: 'Evaluation Source',
|
|
30623
|
+
placeholder: 'Select evaluation source',
|
|
30624
|
+
required: true,
|
|
30625
|
+
optionList: ['text', 'custom'],
|
|
30626
|
+
defaultValue: 'text',
|
|
30627
|
+
});
|
|
30628
|
+
const buildExtractorConfig = (index) => ({
|
|
30629
|
+
name: `expectedOutcomeEvaluationSourceExtractor-${index}`,
|
|
30630
|
+
fieldType: FormFieldType.SELECT,
|
|
30631
|
+
label: 'Extractor',
|
|
30632
|
+
placeholder: 'Select extractor',
|
|
30633
|
+
required: true,
|
|
30634
|
+
optionList: extractorIds,
|
|
30635
|
+
});
|
|
30252
30636
|
const renderEvaluationSelector = (field, index$1) => {
|
|
30253
30637
|
const optionList = getAllowedApproachesForFieldType(field.type);
|
|
30254
30638
|
return (index.h("app-select", { config: buildEvaluationConfig(index$1, optionList), value: field.evaluationParameters?.approach, onValueChange: (e) => emit({
|
|
@@ -30258,6 +30642,27 @@ const ExpectedOutcomeRenderer = ({ testCaseId, fields, dynamicResolutionSupporte
|
|
|
30258
30642
|
value: e.detail.value,
|
|
30259
30643
|
}) }));
|
|
30260
30644
|
};
|
|
30645
|
+
const renderEvaluationSourceSelector = (field, index$1) => {
|
|
30646
|
+
if (!hasExtractorOptions) {
|
|
30647
|
+
return null;
|
|
30648
|
+
}
|
|
30649
|
+
const sourceType = field.evaluationSource?.type || 'text';
|
|
30650
|
+
return (index.h("div", null, index.h("app-select", { config: buildEvaluationSourceConfig(index$1), value: sourceType, onValueChange: (e) => emit({
|
|
30651
|
+
testCaseId,
|
|
30652
|
+
index: index$1,
|
|
30653
|
+
operation: 'set-evaluation-source-type',
|
|
30654
|
+
value: e.detail.value,
|
|
30655
|
+
fallbackExtractorId: firstExtractorId,
|
|
30656
|
+
}) }), sourceType === 'custom' && (index.h("app-select", { config: buildExtractorConfig(index$1), value: field.evaluationSource?.type === 'custom'
|
|
30657
|
+
? field.evaluationSource.extractorId
|
|
30658
|
+
: '', onValueChange: (e) => emit({
|
|
30659
|
+
testCaseId,
|
|
30660
|
+
index: index$1,
|
|
30661
|
+
operation: 'set-evaluation-source-extractor',
|
|
30662
|
+
value: e.detail.value,
|
|
30663
|
+
}) }))));
|
|
30664
|
+
};
|
|
30665
|
+
const renderEvaluationOptions = (field, index$1) => (index.h("details", { class: "expected-outcome-renderer__options" }, index.h("summary", { class: "expected-outcome-renderer__options-summary" }, "More options"), index.h("div", { class: "expected-outcome-renderer__options-content" }, renderEvaluationSelector(field, index$1), renderEvaluationSourceSelector(field, index$1))));
|
|
30261
30666
|
return (index.h("div", { class: "expected-outcome-renderer" }, (fields || []).map((field, index$1) => {
|
|
30262
30667
|
if (field.type === 'textarea') {
|
|
30263
30668
|
const isDynamic = dynamicResolutionSupported && field.outcomeMode === 'dynamic';
|
|
@@ -30289,7 +30694,7 @@ const ExpectedOutcomeRenderer = ({ testCaseId, fields, dynamicResolutionSupporte
|
|
|
30289
30694
|
index: index$1,
|
|
30290
30695
|
operation: 'set-resolution-query',
|
|
30291
30696
|
value: e.detail.value,
|
|
30292
|
-
}) })), !isDynamic &&
|
|
30697
|
+
}) })), !isDynamic && renderEvaluationOptions(field, index$1)));
|
|
30293
30698
|
}
|
|
30294
30699
|
if (field.type === 'chips-input') {
|
|
30295
30700
|
const config = {
|
|
@@ -30309,7 +30714,7 @@ const ExpectedOutcomeRenderer = ({ testCaseId, fields, dynamicResolutionSupporte
|
|
|
30309
30714
|
index: index$1,
|
|
30310
30715
|
operation: 'remove-chip',
|
|
30311
30716
|
value: e.detail.value,
|
|
30312
|
-
}) }),
|
|
30717
|
+
}) }), renderEvaluationOptions(field, index$1)));
|
|
30313
30718
|
}
|
|
30314
30719
|
if (field.type === 'select') {
|
|
30315
30720
|
const config = {
|
|
@@ -30325,18 +30730,18 @@ const ExpectedOutcomeRenderer = ({ testCaseId, fields, dynamicResolutionSupporte
|
|
|
30325
30730
|
index: index$1,
|
|
30326
30731
|
operation: 'set-value',
|
|
30327
30732
|
value: e.detail.value,
|
|
30328
|
-
}) }),
|
|
30733
|
+
}) }), renderEvaluationOptions(field, index$1)));
|
|
30329
30734
|
}
|
|
30330
30735
|
return (index.h("div", { class: "expected-outcome-renderer__group" }, index.h("div", { class: "expected-outcome-renderer__text" }, index.h("label", null, field.label), index.h("input", { type: "text", value: field.value, placeholder: field.placeholder, onInput: (e) => emit({
|
|
30331
30736
|
testCaseId,
|
|
30332
30737
|
index: index$1,
|
|
30333
30738
|
operation: 'set-value',
|
|
30334
30739
|
value: e.target.value,
|
|
30335
|
-
}) })),
|
|
30740
|
+
}) })), renderEvaluationOptions(field, index$1)));
|
|
30336
30741
|
})));
|
|
30337
30742
|
};
|
|
30338
30743
|
|
|
30339
|
-
const LLMTestCaseRow = ({ testCase, dynamicResolutionSupported = false, onRun, onDelete, handleTestCaseChange, onExpectedOutcomeChange, }) => {
|
|
30744
|
+
const LLMTestCaseRow = ({ testCase, dynamicResolutionSupported = false, extractorIds = [], onRun, onDelete, handleTestCaseChange, onExpectedOutcomeChange, onChatHistoryChange, }) => {
|
|
30340
30745
|
const questionConfig = {
|
|
30341
30746
|
name: 'question',
|
|
30342
30747
|
fieldType: FormFieldType.TEXT_AREA,
|
|
@@ -30352,11 +30757,21 @@ const LLMTestCaseRow = ({ testCase, dynamicResolutionSupported = false, onRun, o
|
|
|
30352
30757
|
key: 'question',
|
|
30353
30758
|
value: e.detail.value,
|
|
30354
30759
|
},
|
|
30355
|
-
}) }), index.h(
|
|
30760
|
+
}) }), index.h("chat-history", { chatHistoryEnabled: testCase.chatHistory?.enabled ?? false, chatHistoryValue: testCase.chatHistory?.value ?? '', onChatHistoryChange: (e) => {
|
|
30761
|
+
const { enabled, value } = e
|
|
30762
|
+
.detail;
|
|
30763
|
+
onChatHistoryChange({
|
|
30764
|
+
detail: {
|
|
30765
|
+
testCaseId: testCase.id,
|
|
30766
|
+
enabled,
|
|
30767
|
+
value,
|
|
30768
|
+
},
|
|
30769
|
+
});
|
|
30770
|
+
} }), index.h(ExpectedOutcomeRenderer, { testCaseId: testCase.id, fields: testCase.expectedOutcome || [], dynamicResolutionSupported: dynamicResolutionSupported, extractorIds: extractorIds, onExpectedOutcomeChange: onExpectedOutcomeChange })), index.h(ResponseOutput, { output: testCase.output, isRunning: testCase.isRunning }), index.h(EvaluationSummary, { result: testCase.evaluationResult, isRunning: testCase.isRunning }), index.h(RowActions, { isRunning: testCase.isRunning, canRun: !!testCase.question.trim(), onRun: () => onRun(testCase), onDelete: () => onDelete(testCase.id) })));
|
|
30356
30771
|
};
|
|
30357
30772
|
|
|
30358
|
-
const LLMTestCases = ({ testCases, dynamicResolutionSupported = false, onRun, onDelete, onAddTestCase, handleTestCaseChange, onExpectedOutcomeChange, }) => {
|
|
30359
|
-
return (index.h("div", { class: "test-cases" }, index.h("div", { class: "test-cases__column-headers" }, index.h("div", { class: "test-cases__column-header" }, "Input"), index.h("div", { class: "test-cases__column-header" }, "Output"), index.h("div", { class: "test-cases__column-header" }, "Evaluation"), index.h("div", { class: "test-cases__column-header" }, "Actions")), testCases.map(testCase => (index.h(LLMTestCaseRow, { testCase: testCase, dynamicResolutionSupported: dynamicResolutionSupported, onRun: onRun, onDelete: onDelete, handleTestCaseChange: handleTestCaseChange, onExpectedOutcomeChange: onExpectedOutcomeChange }))), index.h("div", { class: "test-cases__add-section" }, index.h(Button, { variant: "outline", size: "md", onClick: onAddTestCase }, "+ Add Question"))));
|
|
30773
|
+
const LLMTestCases = ({ testCases, dynamicResolutionSupported = false, extractorIds = [], onRun, onDelete, onAddTestCase, handleTestCaseChange, onExpectedOutcomeChange, onChatHistoryChange, }) => {
|
|
30774
|
+
return (index.h("div", { class: "test-cases" }, index.h("div", { class: "test-cases__column-headers" }, index.h("div", { class: "test-cases__column-header" }, "Input"), index.h("div", { class: "test-cases__column-header" }, "Output"), index.h("div", { class: "test-cases__column-header" }, "Evaluation"), index.h("div", { class: "test-cases__column-header" }, "Actions")), testCases.map(testCase => (index.h(LLMTestCaseRow, { testCase: testCase, dynamicResolutionSupported: dynamicResolutionSupported, extractorIds: extractorIds, onRun: onRun, onDelete: onDelete, handleTestCaseChange: handleTestCaseChange, onExpectedOutcomeChange: onExpectedOutcomeChange, onChatHistoryChange: onChatHistoryChange }))), index.h("div", { class: "test-cases__add-section" }, index.h(Button, { variant: "outline", size: "md", onClick: onAddTestCase }, "+ Add Question"))));
|
|
30360
30775
|
};
|
|
30361
30776
|
|
|
30362
30777
|
const tokensCss = () => `:host{--spacing:0.25rem;--spacing-1:calc(var(--spacing) * 1);--spacing-2:calc(var(--spacing) * 2);--spacing-3:calc(var(--spacing) * 3);--spacing-4:calc(var(--spacing) * 4);--spacing-5:calc(var(--spacing) * 5);--spacing-6:calc(var(--spacing) * 6);--spacing-8:calc(var(--spacing) * 8);--spacing-10:calc(var(--spacing) * 10);--spacing-12:calc(var(--spacing) * 12);--spacing-16:calc(var(--spacing) * 16);--spacing-20:calc(var(--spacing) * 20);--spacing-24:calc(var(--spacing) * 24);--radius-none:0;--radius-sm:0.125rem;--radius-md:0.375rem;--radius-lg:0.5rem;--radius-xl:0.75rem;--radius-2xl:1rem;--radius-3xl:1.5rem;--radius-full:9999px;--radius:var(--radius-lg);--font-size-xs:0.75rem;--font-size-sm:0.875rem;--font-size-base:1rem;--font-size-lg:1.125rem;--font-size-xl:1.25rem;--font-size-2xl:1.5rem;--font-size-3xl:1.875rem;--font-size-4xl:2.25rem;--font-weight-normal:400;--font-weight-medium:500;--font-weight-semibold:600;--font-weight-bold:700;--line-height-none:1;--line-height-tight:1.25;--line-height-snug:1.375;--line-height-normal:1.5;--line-height-relaxed:1.625;--line-height-loose:2;--letter-spacing-tight:-0.025em;--letter-spacing-normal:0;--letter-spacing-wide:0.05em;--shadow-sm:0 1px 2px 0 rgba(0, 0, 0, 0.05);--shadow-md:0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06);--shadow-lg:0 10px 15px -3px rgba(0, 0, 0, 0.1), 0 4px 6px -2px rgba(0, 0, 0, 0.05);--shadow-xl:0 20px 25px -5px rgba(0, 0, 0, 0.1), 0 10px 10px -5px rgba(0, 0, 0, 0.04);--shadow-2xl:0 25px 50px -12px rgba(0, 0, 0, 0.25);--border-width:1px;--z-base:0;--z-dropdown:1000;--z-sticky:1100;--z-modal:1200;--z-popover:1300;--z-tooltip:1400;--opacity-disabled:0.5;--opacity-hover:0.8;--opacity-muted:0.6;--max-w-sm:24rem;--max-w-md:28rem;--max-w-lg:32rem;--max-w-xl:42rem;--max-w-2xl:48rem;--max-w-full:100%;--breakpoint-sm:640px;--breakpoint-md:768px;--breakpoint-lg:1024px;--breakpoint-xl:1280px;--breakpoint-2xl:1536px;--background:#ffffff;--foreground:#0a0a0a;--card:#ffffff;--card-foreground:#0a0a0a;--popover:#ffffff;--popover-foreground:#0a0a0a;--primary:#0a0a0a;--primary-foreground:#fafafa;--secondary:#f4f4f5;--secondary-foreground:#0a0a0a;--muted:#f4f4f5;--muted-foreground:#71717a;--accent:#f4f4f5;--accent-foreground:#0a0a0a;--destructive:#ef4444;--destructive-foreground:#fafafa;--border:#e4e4e7;--input:#e4e4e7;--ring:#3b82f6;--success:#10b981;--success-foreground:#fafafa;--warning:#f59e0b;--warning-foreground:#fafafa;--info:#3b82f6;--info-foreground:#fafafa}:host([data-theme='dark']){--background:#0a0a0a;--foreground:#fafafa;--card:#171717;--card-foreground:#fafafa;--popover:#171717;--popover-foreground:#fafafa;--primary:#fafafa;--primary-foreground:#0a0a0a;--secondary:#27272a;--secondary-foreground:#fafafa;--muted:#27272a;--muted-foreground:#a1a1aa;--accent:#27272a;--accent-foreground:#fafafa;--destructive:#dc2626;--destructive-foreground:#fafafa;--border:#27272a;--input:#27272a;--ring:#3b82f6;--success:#059669;--success-foreground:#fafafa;--warning:#d97706;--warning-foreground:#fafafa;--info:#2563eb;--info-foreground:#fafafa}`;
|
|
@@ -30367,7 +30782,7 @@ const llmTestRunnerHeaderCss = () => `.test-runner-header{display:flex;justify-c
|
|
|
30367
30782
|
|
|
30368
30783
|
const llmTestCasesCss = () => `.test-cases{background:var(--background)}.test-cases__column-headers{display:grid;grid-template-columns:1fr 1.5fr 0.5fr 120px;gap:var(--border-width);background:var(--border);border-bottom:2px solid var(--border)}.test-cases__column-header{background:var(--muted);padding:var(--spacing-4) var(--spacing-5);font-weight:var(--font-weight-semibold);color:var(--foreground);font-size:var(--font-size-sm);text-transform:uppercase;letter-spacing:var(--letter-spacing-wide)}.test-cases__add-section{padding:var(--spacing-6);text-align:center;background:var(--muted);border-top:var(--border-width) solid var(--border)}@media (max-width: 1200px){.test-cases__column-headers{display:none}}`;
|
|
30369
30784
|
|
|
30370
|
-
const llmTestCaseRowCss = () => `.test-case-row{display:grid;grid-template-columns:1fr 1.5fr 0.5fr 120px;gap:var(--border-width);border-bottom:var(--border-width) solid var(--border);min-height:200px}.test-case-row:hover{background:var(--muted)}.test-case-row__input-column{padding:var(--spacing-5);background:var(--background);border-right:var(--border-width) solid var(--border)}.expected-outcome-renderer{display:flex;flex-direction:column;gap:var(--spacing-4);margin-top:var(--spacing-4)}.expected-outcome-renderer__group{display:flex;flex-direction:column;gap:var(--spacing-2);padding:var(--spacing-3);border:var(--border-width) solid var(--border);border-radius:var(--radius-md);background:var(--background)}@media (max-width: 1200px){.test-case-row{grid-template-columns:1fr;gap:0}.test-case-row__input-column{border-right:none;border-bottom:var(--border-width) solid var(--border)}}@media (max-width: 768px){.test-case-row__input-column{padding:var(--spacing-4)}.test-case-row{min-height:auto}}`;
|
|
30785
|
+
const llmTestCaseRowCss = () => `.test-case-row{display:grid;grid-template-columns:1fr 1.5fr 0.5fr 120px;gap:var(--border-width);border-bottom:var(--border-width) solid var(--border);min-height:200px}.test-case-row:hover{background:var(--muted)}.test-case-row__input-column{padding:var(--spacing-5);background:var(--background);border-right:var(--border-width) solid var(--border)}.expected-outcome-renderer{display:flex;flex-direction:column;gap:var(--spacing-4);margin-top:var(--spacing-4)}.expected-outcome-renderer__group{display:flex;flex-direction:column;gap:var(--spacing-2);padding:var(--spacing-3);border:var(--border-width) solid var(--border);border-radius:var(--radius-md);background:var(--background)}.expected-outcome-renderer__options{border:var(--border-width) solid var(--border);border-radius:var(--radius-sm);background:var(--muted)}.expected-outcome-renderer__options-summary{cursor:pointer;font-size:var(--font-size-sm);color:var(--foreground);padding:var(--spacing-2) var(--spacing-3);user-select:none}.expected-outcome-renderer__options-content{display:flex;flex-direction:column;gap:var(--spacing-2);padding:0 var(--spacing-3) var(--spacing-3)}@media (max-width: 1200px){.test-case-row{grid-template-columns:1fr;gap:0}.test-case-row__input-column{border-right:none;border-bottom:var(--border-width) solid var(--border)}}@media (max-width: 768px){.test-case-row__input-column{padding:var(--spacing-4)}.test-case-row{min-height:auto}}`;
|
|
30371
30786
|
|
|
30372
30787
|
const rowActionsCss = () => `.row-actions{height:100%;padding:var(--spacing-5);background:var(--background);display:flex;flex-direction:column;gap:var(--spacing-3);align-items:center;justify-content:flex-start;align-self:flex-start}@media (max-width: 1200px){.row-actions{border-right:none;border-bottom:var(--border-width) solid var(--border);flex-direction:row;justify-content:center}}@media (max-width: 768px){.row-actions{padding:var(--spacing-4)}}`;
|
|
30373
30788
|
|
|
@@ -30393,6 +30808,7 @@ const LLMTestRunner = class {
|
|
|
30393
30808
|
useSave = false;
|
|
30394
30809
|
usePromptEditor = false;
|
|
30395
30810
|
resolveExpectedOutcome;
|
|
30811
|
+
evaluationSourceExtractors;
|
|
30396
30812
|
initialTestCases;
|
|
30397
30813
|
defaultExpectedOutcomeSchema;
|
|
30398
30814
|
testCases = [
|
|
@@ -30406,6 +30822,7 @@ const LLMTestRunner = class {
|
|
|
30406
30822
|
value: '',
|
|
30407
30823
|
},
|
|
30408
30824
|
],
|
|
30825
|
+
chatHistory: { enabled: false, value: '' },
|
|
30409
30826
|
isRunning: false,
|
|
30410
30827
|
},
|
|
30411
30828
|
];
|
|
@@ -30428,6 +30845,12 @@ const LLMTestRunner = class {
|
|
|
30428
30845
|
// Initialize testCases from prop if provided
|
|
30429
30846
|
if (this.initialTestCases !== undefined) {
|
|
30430
30847
|
validateTestCaseInputArray(this.initialTestCases);
|
|
30848
|
+
const extractorIds = getExtractorIds(this.evaluationSourceExtractors);
|
|
30849
|
+
if (extractorIds.length > 0) {
|
|
30850
|
+
this.initialTestCases.forEach(testCase => {
|
|
30851
|
+
validateExpectedOutcomeArrayWithExtractors(testCase.expectedOutcome, extractorIds);
|
|
30852
|
+
});
|
|
30853
|
+
}
|
|
30431
30854
|
this.testCases = this.initialTestCases.map((rawTestCase, index) => {
|
|
30432
30855
|
try {
|
|
30433
30856
|
return createTestCaseFromInput(rawTestCase);
|
|
@@ -30451,8 +30874,6 @@ const LLMTestRunner = class {
|
|
|
30451
30874
|
this.testCases = [];
|
|
30452
30875
|
}
|
|
30453
30876
|
}
|
|
30454
|
-
componentDidLoad() { }
|
|
30455
|
-
disconnectedCallback() { }
|
|
30456
30877
|
async resetSavingState() {
|
|
30457
30878
|
this.isSaving = false;
|
|
30458
30879
|
}
|
|
@@ -30463,6 +30884,12 @@ const LLMTestRunner = class {
|
|
|
30463
30884
|
const { testCaseId, key, value } = event.detail;
|
|
30464
30885
|
this.testCases = this.testCases.map(tc => tc.id === testCaseId ? { ...tc, [key]: value } : tc);
|
|
30465
30886
|
};
|
|
30887
|
+
handleChatHistoryChange = (event) => {
|
|
30888
|
+
const { testCaseId, enabled, value } = event.detail;
|
|
30889
|
+
this.updateTestCase(testCaseId, {
|
|
30890
|
+
chatHistory: { enabled, value },
|
|
30891
|
+
});
|
|
30892
|
+
};
|
|
30466
30893
|
addNewTestCase() {
|
|
30467
30894
|
try {
|
|
30468
30895
|
const schema = this.getResolvedExpectedOutcomeSchema();
|
|
@@ -30479,13 +30906,17 @@ const LLMTestRunner = class {
|
|
|
30479
30906
|
updateTestCase(id, updates) {
|
|
30480
30907
|
this.testCases = this.testCases.map(tc => tc.id === id ? { ...tc, ...updates } : tc);
|
|
30481
30908
|
}
|
|
30482
|
-
|
|
30909
|
+
requestLlmResponse(testCase) {
|
|
30483
30910
|
return new Promise((resolve, reject) => {
|
|
30484
|
-
|
|
30911
|
+
const payload = {
|
|
30485
30912
|
prompt: testCase.question,
|
|
30486
30913
|
resolve,
|
|
30487
30914
|
reject,
|
|
30488
|
-
}
|
|
30915
|
+
};
|
|
30916
|
+
if (testCase.chatHistory?.enabled) {
|
|
30917
|
+
payload.chatHistory = testCase.chatHistory.value;
|
|
30918
|
+
}
|
|
30919
|
+
this.llmRequest.emit(payload);
|
|
30489
30920
|
});
|
|
30490
30921
|
}
|
|
30491
30922
|
throwError(reason) {
|
|
@@ -30498,14 +30929,14 @@ const LLMTestRunner = class {
|
|
|
30498
30929
|
const startTime = Date.now();
|
|
30499
30930
|
this.updateTestCase(testCase.id, { isRunning: true });
|
|
30500
30931
|
const [llmSettled, resolutionSettled] = await Promise.allSettled([
|
|
30501
|
-
this.
|
|
30932
|
+
this.requestLlmResponse(testCase),
|
|
30502
30933
|
resolveDynamicExpectedOutcomes(testCase, this.resolveExpectedOutcome),
|
|
30503
30934
|
]);
|
|
30504
30935
|
const responseTime = Date.now() - startTime;
|
|
30505
30936
|
if (llmSettled.status === 'rejected') {
|
|
30506
30937
|
this.updateTestCase(testCase.id, {
|
|
30507
30938
|
isRunning: false,
|
|
30508
|
-
output:
|
|
30939
|
+
output: undefined,
|
|
30509
30940
|
error: this.addErrorMessage(llmSettled.reason, 'Unknown error'),
|
|
30510
30941
|
responseTime,
|
|
30511
30942
|
});
|
|
@@ -30553,7 +30984,7 @@ const LLMTestRunner = class {
|
|
|
30553
30984
|
this.updateTestCase(testCase.id, {
|
|
30554
30985
|
evaluationResult: result,
|
|
30555
30986
|
});
|
|
30556
|
-
});
|
|
30987
|
+
}, this.evaluationSourceExtractors);
|
|
30557
30988
|
}
|
|
30558
30989
|
async runAllTests() {
|
|
30559
30990
|
this.isRunningAll = true;
|
|
@@ -30584,7 +31015,7 @@ const LLMTestRunner = class {
|
|
|
30584
31015
|
this.error = '';
|
|
30585
31016
|
try {
|
|
30586
31017
|
const content = await readFileAsync(file);
|
|
30587
|
-
const result = importTestSuite(content);
|
|
31018
|
+
const result = importTestSuite(content, getExtractorIds(this.evaluationSourceExtractors));
|
|
30588
31019
|
if (!result.success) {
|
|
30589
31020
|
this.error = result.error || 'Unknown error occurred during import.';
|
|
30590
31021
|
return;
|
|
@@ -30645,7 +31076,7 @@ const LLMTestRunner = class {
|
|
|
30645
31076
|
}
|
|
30646
31077
|
}
|
|
30647
31078
|
render() {
|
|
30648
|
-
return (index.h("div", { key: '
|
|
31079
|
+
return (index.h("div", { key: '7433beaa1d60d48f65600c43e11b302b892a7bca', class: "test-runner-container" }, index.h(LLMTestRunnerHeader, { key: '8083cc39376e7a710bd3f52efb184b959e885a87', isExportingTestSuite: this.isExportingTestSuite, isExportingTestResults: this.isExportingTestResults, isRunningAll: this.isRunningAll, useSave: this.useSave, isSaving: this.isSaving, usePromptEditor: this.usePromptEditor, onImport: file => this.handleImport(file), onExportSuite: () => this.handleExportTestSuite(), onExportResults: () => this.handleExportTestResults(), onRunAll: () => this.runAllTests(), onSave: () => this.handleSave() }), index.h(ErrorMessage, { key: 'ddced98c13cd595c4cfb6eef11b27cb173769518', message: this.error, onClear: () => (this.error = '') }), index.h("div", { key: '8d6f65c4d68d34869b644709eacb97fec93683c6', class: "test-runner-container__content" }, index.h(LLMTestCases, { key: '5ccb186132b23af6209209b0a14086e03cf790af', testCases: this.testCases, dynamicResolutionSupported: !!this.resolveExpectedOutcome, extractorIds: getExtractorIds(this.evaluationSourceExtractors), onRun: testCase => this.runSingleTest(testCase).catch(() => { }), onDelete: id => this.deleteTestCase(id), onAddTestCase: () => this.addNewTestCase(), handleTestCaseChange: this.handleTestCaseChange, onExpectedOutcomeChange: this.handleExpectedOutcomeChange, onChatHistoryChange: this.handleChatHistoryChange }))));
|
|
30649
31080
|
}
|
|
30650
31081
|
};
|
|
30651
31082
|
LLMTestRunner.style = tokensCss() + (llmTestRunnerCss() + (llmTestRunnerHeaderCss() + (llmTestCasesCss() + (llmTestCaseRowCss() + (rowActionsCss() + (evaluationSummaryCss() + (responseOutputCss() + (errorMessageCss() + (buttonCss() + iconButtonCss())))))))));
|