llm-testrunner-components 1.2.3 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -5
- package/dist/cjs/{app-chips_4.cjs.entry.js → app-chips_5.cjs.entry.js} +38 -6
- package/dist/cjs/app-chips_5.cjs.entry.js.map +1 -0
- package/dist/cjs/index.cjs.js +499 -68
- package/dist/cjs/index.cjs.js.map +1 -1
- package/dist/cjs/llm-testrunner.cjs.js +1 -1
- package/dist/cjs/loader.cjs.js +1 -1
- package/dist/collection/collection-manifest.json +1 -0
- package/dist/collection/components/llm-test-runner/llm-test-runner.import-export.test.js +22 -12
- package/dist/collection/components/llm-test-runner/llm-test-runner.import-export.test.js.map +1 -1
- package/dist/collection/components/llm-test-runner/llm-test-runner.js +59 -15
- package/dist/collection/components/llm-test-runner/llm-test-runner.js.map +1 -1
- package/dist/collection/components/llm-test-runner/test-cases/chat-history.css +101 -0
- package/dist/collection/components/llm-test-runner/test-cases/chat-history.js +105 -0
- package/dist/collection/components/llm-test-runner/test-cases/chat-history.js.map +1 -0
- package/dist/collection/components/llm-test-runner/test-cases/expected-outcome-renderer.js +45 -5
- package/dist/collection/components/llm-test-runner/test-cases/expected-outcome-renderer.js.map +1 -1
- package/dist/collection/components/llm-test-runner/test-cases/llm-test-case-row.css +21 -0
- package/dist/collection/components/llm-test-runner/test-cases/llm-test-case-row.js +12 -2
- package/dist/collection/components/llm-test-runner/test-cases/llm-test-case-row.js.map +1 -1
- package/dist/collection/components/llm-test-runner/test-cases/llm-test-cases.js +2 -2
- package/dist/collection/components/llm-test-runner/test-cases/llm-test-cases.js.map +1 -1
- package/dist/collection/components/llm-test-runner/test-cases/output/response-output.js +1 -1
- package/dist/collection/components/llm-test-runner/test-cases/output/response-output.js.map +1 -1
- package/dist/collection/index.js.map +1 -1
- package/dist/collection/lib/evaluation/actual-value-resolver.js +52 -0
- package/dist/collection/lib/evaluation/actual-value-resolver.js.map +1 -0
- package/dist/collection/lib/evaluation/evaluation-engine.js +1 -1
- package/dist/collection/lib/evaluation/evaluation-engine.js.map +1 -1
- package/dist/collection/lib/evaluation/evaluation-service.js +55 -17
- package/dist/collection/lib/evaluation/evaluation-service.js.map +1 -1
- package/dist/collection/lib/evaluation/types.js.map +1 -1
- package/dist/collection/lib/form/components/app-chips.js +1 -1
- package/dist/collection/lib/form/components/app-select.js +1 -1
- package/dist/collection/lib/form/components/app-textarea.js +2 -2
- package/dist/collection/lib/import-export/test-suite-exporter.js +4 -0
- package/dist/collection/lib/import-export/test-suite-exporter.js.map +1 -1
- package/dist/collection/lib/import-export/test-suite-importer.js +7 -1
- package/dist/collection/lib/import-export/test-suite-importer.js.map +1 -1
- package/dist/collection/lib/test-cases/test-case-factory.js +7 -0
- package/dist/collection/lib/test-cases/test-case-factory.js.map +1 -1
- package/dist/collection/lib/test-cases/test-case-mutations.js +58 -23
- package/dist/collection/lib/test-cases/test-case-mutations.js.map +1 -1
- package/dist/collection/schemas/expected-outcome.js +39 -0
- package/dist/collection/schemas/expected-outcome.js.map +1 -1
- package/dist/collection/schemas/model-response.js +7 -0
- package/dist/collection/schemas/model-response.js.map +1 -0
- package/dist/collection/schemas/test-case.js +8 -1
- package/dist/collection/schemas/test-case.js.map +1 -1
- package/dist/collection/types/expected-outcome.js.map +1 -1
- package/dist/collection/types/llm-test-runner.js.map +1 -1
- package/dist/collection/types/test-case.js.map +1 -1
- package/dist/components/app-chips.js +1 -1
- package/dist/components/app-select.js +1 -1
- package/dist/components/app-textarea.js +1 -1
- package/dist/components/chat-history.d.ts +11 -0
- package/dist/components/chat-history.js +2 -0
- package/dist/components/chat-history.js.map +1 -0
- package/dist/components/index.js +1 -1
- package/dist/components/llm-test-runner.js +1 -1
- package/dist/components/{p-CVtKFBJl.js → p-D2qDAxFN.js} +2 -2
- package/dist/components/{p-Dv7cB5FU.js → p-D4dHUFN9.js} +2 -2
- package/dist/components/{p-CE5-1jfZ.js → p-eN2dLrsr.js} +2 -2
- package/dist/components/p-kmtfMXcQ.js +2 -0
- package/dist/components/p-kmtfMXcQ.js.map +1 -0
- package/dist/components/{p-BcygfrMf.js → p-wzA48RFK.js} +3 -3
- package/dist/components/p-wzA48RFK.js.map +1 -0
- package/dist/esm/{app-chips_4.entry.js → app-chips_5.entry.js} +38 -7
- package/dist/esm/app-chips_5.entry.js.map +1 -0
- package/dist/esm/index.js +499 -68
- package/dist/esm/index.js.map +1 -1
- package/dist/esm/llm-testrunner.js +1 -1
- package/dist/esm/loader.js +1 -1
- package/dist/llm-testrunner/index.esm.js +2 -2
- package/dist/llm-testrunner/index.esm.js.map +1 -1
- package/dist/llm-testrunner/llm-testrunner.esm.js +1 -1
- package/dist/llm-testrunner/p-5bf1fc78.entry.js +2 -0
- package/dist/llm-testrunner/p-5bf1fc78.entry.js.map +1 -0
- package/dist/react/components.d.ts +6 -1
- package/dist/react/components.d.ts.map +1 -1
- package/dist/react/components.js +9 -0
- package/dist/types/components/llm-test-runner/llm-test-runner.d.ts +4 -4
- package/dist/types/components/llm-test-runner/test-cases/chat-history.d.ts +14 -0
- package/dist/types/components/llm-test-runner/test-cases/expected-outcome-renderer.d.ts +1 -0
- package/dist/types/components/llm-test-runner/test-cases/llm-test-case-row.d.ts +6 -0
- package/dist/types/components/llm-test-runner/test-cases/llm-test-cases.d.ts +3 -0
- package/dist/types/components/llm-test-runner/test-cases/output/response-output.d.ts +2 -1
- package/dist/types/components.d.ts +55 -2
- package/dist/types/index.d.ts +1 -1
- package/dist/types/lib/evaluation/actual-value-resolver.d.ts +9 -0
- package/dist/types/lib/evaluation/evaluation-service.d.ts +2 -2
- package/dist/types/lib/evaluation/types.d.ts +1 -1
- package/dist/types/lib/import-export/test-suite-exporter.d.ts +4 -0
- package/dist/types/lib/import-export/test-suite-importer.d.ts +1 -1
- package/dist/types/lib/test-cases/test-case-mutations.d.ts +10 -1
- package/dist/types/schemas/expected-outcome.d.ts +116 -0
- package/dist/types/schemas/model-response.d.ts +7 -0
- package/dist/types/schemas/test-case.d.ts +93 -1
- package/dist/types/types/expected-outcome.d.ts +1 -1
- package/dist/types/types/llm-test-runner.d.ts +6 -3
- package/dist/types/types/test-case.d.ts +1 -1
- package/package.json +1 -1
- package/dist/cjs/app-chips_4.cjs.entry.js.map +0 -1
- package/dist/components/p-BcygfrMf.js.map +0 -1
- package/dist/esm/app-chips_4.entry.js.map +0 -1
- package/dist/llm-testrunner/p-5df053b4.entry.js +0 -2
- package/dist/llm-testrunner/p-5df053b4.entry.js.map +0 -1
- /package/dist/components/{p-CVtKFBJl.js.map → p-D2qDAxFN.js.map} +0 -0
- /package/dist/components/{p-Dv7cB5FU.js.map → p-D4dHUFN9.js.map} +0 -0
- /package/dist/components/{p-CE5-1jfZ.js.map → p-eN2dLrsr.js.map} +0 -0
package/dist/esm/index.js
CHANGED
|
@@ -103,6 +103,10 @@ function formatTestSuiteAsJson(testCases) {
|
|
|
103
103
|
id: testCase.id,
|
|
104
104
|
question: testCase.question,
|
|
105
105
|
expectedOutcome: testCase.expectedOutcome,
|
|
106
|
+
chatHistory: {
|
|
107
|
+
enabled: testCase.chatHistory.enabled,
|
|
108
|
+
value: testCase.chatHistory.value,
|
|
109
|
+
},
|
|
106
110
|
}));
|
|
107
111
|
return JSON.stringify(exportData, null, 2);
|
|
108
112
|
}
|
|
@@ -288,6 +292,7 @@ const DEFAULT_EXPECTED_OUTCOME_SCHEMA = [
|
|
|
288
292
|
function normalizeExpectedOutcomeField(field) {
|
|
289
293
|
return {
|
|
290
294
|
...field,
|
|
295
|
+
evaluationSource: field.evaluationSource || { type: 'text' },
|
|
291
296
|
evaluationParameters: normalizeEvaluationParametersForField(field.type, field.evaluationParameters),
|
|
292
297
|
};
|
|
293
298
|
}
|
|
@@ -300,6 +305,7 @@ function createTestCase(expectedOutcomeSchema = DEFAULT_EXPECTED_OUTCOME_SCHEMA)
|
|
|
300
305
|
id: v4(),
|
|
301
306
|
question: '',
|
|
302
307
|
expectedOutcome: createExpectedOutcomeFromSchema(expectedOutcomeSchema),
|
|
308
|
+
chatHistory: { enabled: false, value: '' },
|
|
303
309
|
isRunning: false,
|
|
304
310
|
};
|
|
305
311
|
}
|
|
@@ -310,6 +316,7 @@ function createExpectedOutcomeFieldFromSchema(schemaField) {
|
|
|
310
316
|
type: 'text',
|
|
311
317
|
label: schemaField.label,
|
|
312
318
|
placeholder: schemaField.placeholder,
|
|
319
|
+
evaluationSource: schemaField.evaluationSource || { type: 'text' },
|
|
313
320
|
value: '',
|
|
314
321
|
evaluationParameters: normalizeEvaluationParametersForField(schemaField.type, schemaField.evaluationParameters),
|
|
315
322
|
};
|
|
@@ -318,6 +325,7 @@ function createExpectedOutcomeFieldFromSchema(schemaField) {
|
|
|
318
325
|
type: 'textarea',
|
|
319
326
|
label: schemaField.label,
|
|
320
327
|
placeholder: schemaField.placeholder,
|
|
328
|
+
evaluationSource: schemaField.evaluationSource || { type: 'text' },
|
|
321
329
|
rows: schemaField.rows,
|
|
322
330
|
value: '',
|
|
323
331
|
evaluationParameters: normalizeEvaluationParametersForField(schemaField.type, schemaField.evaluationParameters),
|
|
@@ -327,6 +335,7 @@ function createExpectedOutcomeFieldFromSchema(schemaField) {
|
|
|
327
335
|
type: 'chips-input',
|
|
328
336
|
label: schemaField.label,
|
|
329
337
|
placeholder: schemaField.placeholder,
|
|
338
|
+
evaluationSource: schemaField.evaluationSource || { type: 'text' },
|
|
330
339
|
value: [],
|
|
331
340
|
evaluationParameters: normalizeEvaluationParametersForField(schemaField.type, schemaField.evaluationParameters),
|
|
332
341
|
};
|
|
@@ -335,6 +344,7 @@ function createExpectedOutcomeFieldFromSchema(schemaField) {
|
|
|
335
344
|
type: 'select',
|
|
336
345
|
label: schemaField.label,
|
|
337
346
|
placeholder: schemaField.placeholder,
|
|
347
|
+
evaluationSource: schemaField.evaluationSource || { type: 'text' },
|
|
338
348
|
value: schemaField.options[0],
|
|
339
349
|
options: schemaField.options,
|
|
340
350
|
evaluationParameters: normalizeEvaluationParametersForField(schemaField.type, schemaField.evaluationParameters),
|
|
@@ -359,6 +369,7 @@ function createExpectedOutcomeFromSchema(expectedOutcomeSchema) {
|
|
|
359
369
|
function createTestCaseFromInput(data) {
|
|
360
370
|
return {
|
|
361
371
|
...data,
|
|
372
|
+
chatHistory: data.chatHistory ?? { enabled: false, value: '' },
|
|
362
373
|
expectedOutcome: data.expectedOutcome.map(normalizeExpectedOutcomeField),
|
|
363
374
|
};
|
|
364
375
|
}
|
|
@@ -2563,6 +2574,122 @@ function handleIntersectionResults(result, left, right) {
|
|
|
2563
2574
|
result.value = merged.data;
|
|
2564
2575
|
return result;
|
|
2565
2576
|
}
|
|
2577
|
+
const $ZodRecord = /*@__PURE__*/ $constructor("$ZodRecord", (inst, def) => {
|
|
2578
|
+
$ZodType.init(inst, def);
|
|
2579
|
+
inst._zod.parse = (payload, ctx) => {
|
|
2580
|
+
const input = payload.value;
|
|
2581
|
+
if (!isPlainObject(input)) {
|
|
2582
|
+
payload.issues.push({
|
|
2583
|
+
expected: "record",
|
|
2584
|
+
code: "invalid_type",
|
|
2585
|
+
input,
|
|
2586
|
+
inst,
|
|
2587
|
+
});
|
|
2588
|
+
return payload;
|
|
2589
|
+
}
|
|
2590
|
+
const proms = [];
|
|
2591
|
+
const values = def.keyType._zod.values;
|
|
2592
|
+
if (values) {
|
|
2593
|
+
payload.value = {};
|
|
2594
|
+
const recordKeys = new Set();
|
|
2595
|
+
for (const key of values) {
|
|
2596
|
+
if (typeof key === "string" || typeof key === "number" || typeof key === "symbol") {
|
|
2597
|
+
recordKeys.add(typeof key === "number" ? key.toString() : key);
|
|
2598
|
+
const result = def.valueType._zod.run({ value: input[key], issues: [] }, ctx);
|
|
2599
|
+
if (result instanceof Promise) {
|
|
2600
|
+
proms.push(result.then((result) => {
|
|
2601
|
+
if (result.issues.length) {
|
|
2602
|
+
payload.issues.push(...prefixIssues(key, result.issues));
|
|
2603
|
+
}
|
|
2604
|
+
payload.value[key] = result.value;
|
|
2605
|
+
}));
|
|
2606
|
+
}
|
|
2607
|
+
else {
|
|
2608
|
+
if (result.issues.length) {
|
|
2609
|
+
payload.issues.push(...prefixIssues(key, result.issues));
|
|
2610
|
+
}
|
|
2611
|
+
payload.value[key] = result.value;
|
|
2612
|
+
}
|
|
2613
|
+
}
|
|
2614
|
+
}
|
|
2615
|
+
let unrecognized;
|
|
2616
|
+
for (const key in input) {
|
|
2617
|
+
if (!recordKeys.has(key)) {
|
|
2618
|
+
unrecognized = unrecognized ?? [];
|
|
2619
|
+
unrecognized.push(key);
|
|
2620
|
+
}
|
|
2621
|
+
}
|
|
2622
|
+
if (unrecognized && unrecognized.length > 0) {
|
|
2623
|
+
payload.issues.push({
|
|
2624
|
+
code: "unrecognized_keys",
|
|
2625
|
+
input,
|
|
2626
|
+
inst,
|
|
2627
|
+
keys: unrecognized,
|
|
2628
|
+
});
|
|
2629
|
+
}
|
|
2630
|
+
}
|
|
2631
|
+
else {
|
|
2632
|
+
payload.value = {};
|
|
2633
|
+
for (const key of Reflect.ownKeys(input)) {
|
|
2634
|
+
if (key === "__proto__")
|
|
2635
|
+
continue;
|
|
2636
|
+
let keyResult = def.keyType._zod.run({ value: key, issues: [] }, ctx);
|
|
2637
|
+
if (keyResult instanceof Promise) {
|
|
2638
|
+
throw new Error("Async schemas not supported in object keys currently");
|
|
2639
|
+
}
|
|
2640
|
+
// Numeric string fallback: if key is a numeric string and failed, retry with Number(key)
|
|
2641
|
+
// This handles z.number(), z.literal([1, 2, 3]), and unions containing numeric literals
|
|
2642
|
+
const checkNumericKey = typeof key === "string" && number$1.test(key) && keyResult.issues.length;
|
|
2643
|
+
if (checkNumericKey) {
|
|
2644
|
+
const retryResult = def.keyType._zod.run({ value: Number(key), issues: [] }, ctx);
|
|
2645
|
+
if (retryResult instanceof Promise) {
|
|
2646
|
+
throw new Error("Async schemas not supported in object keys currently");
|
|
2647
|
+
}
|
|
2648
|
+
if (retryResult.issues.length === 0) {
|
|
2649
|
+
keyResult = retryResult;
|
|
2650
|
+
}
|
|
2651
|
+
}
|
|
2652
|
+
if (keyResult.issues.length) {
|
|
2653
|
+
if (def.mode === "loose") {
|
|
2654
|
+
// Pass through unchanged
|
|
2655
|
+
payload.value[key] = input[key];
|
|
2656
|
+
}
|
|
2657
|
+
else {
|
|
2658
|
+
// Default "strict" behavior: error on invalid key
|
|
2659
|
+
payload.issues.push({
|
|
2660
|
+
code: "invalid_key",
|
|
2661
|
+
origin: "record",
|
|
2662
|
+
issues: keyResult.issues.map((iss) => finalizeIssue(iss, ctx, config())),
|
|
2663
|
+
input: key,
|
|
2664
|
+
path: [key],
|
|
2665
|
+
inst,
|
|
2666
|
+
});
|
|
2667
|
+
}
|
|
2668
|
+
continue;
|
|
2669
|
+
}
|
|
2670
|
+
const result = def.valueType._zod.run({ value: input[key], issues: [] }, ctx);
|
|
2671
|
+
if (result instanceof Promise) {
|
|
2672
|
+
proms.push(result.then((result) => {
|
|
2673
|
+
if (result.issues.length) {
|
|
2674
|
+
payload.issues.push(...prefixIssues(key, result.issues));
|
|
2675
|
+
}
|
|
2676
|
+
payload.value[keyResult.value] = result.value;
|
|
2677
|
+
}));
|
|
2678
|
+
}
|
|
2679
|
+
else {
|
|
2680
|
+
if (result.issues.length) {
|
|
2681
|
+
payload.issues.push(...prefixIssues(key, result.issues));
|
|
2682
|
+
}
|
|
2683
|
+
payload.value[keyResult.value] = result.value;
|
|
2684
|
+
}
|
|
2685
|
+
}
|
|
2686
|
+
}
|
|
2687
|
+
if (proms.length) {
|
|
2688
|
+
return Promise.all(proms).then(() => payload);
|
|
2689
|
+
}
|
|
2690
|
+
return payload;
|
|
2691
|
+
};
|
|
2692
|
+
});
|
|
2566
2693
|
const $ZodEnum = /*@__PURE__*/ $constructor("$ZodEnum", (inst, def) => {
|
|
2567
2694
|
$ZodType.init(inst, def);
|
|
2568
2695
|
const values = getEnumValues(def.entries);
|
|
@@ -4146,6 +4273,49 @@ const intersectionProcessor = (schema, ctx, json, params) => {
|
|
|
4146
4273
|
];
|
|
4147
4274
|
json.allOf = allOf;
|
|
4148
4275
|
};
|
|
4276
|
+
const recordProcessor = (schema, ctx, _json, params) => {
|
|
4277
|
+
const json = _json;
|
|
4278
|
+
const def = schema._zod.def;
|
|
4279
|
+
json.type = "object";
|
|
4280
|
+
// For looseRecord with regex patterns, use patternProperties
|
|
4281
|
+
// This correctly represents "only validate keys matching the pattern" semantics
|
|
4282
|
+
// and composes well with allOf (intersections)
|
|
4283
|
+
const keyType = def.keyType;
|
|
4284
|
+
const keyBag = keyType._zod.bag;
|
|
4285
|
+
const patterns = keyBag?.patterns;
|
|
4286
|
+
if (def.mode === "loose" && patterns && patterns.size > 0) {
|
|
4287
|
+
// Use patternProperties for looseRecord with regex patterns
|
|
4288
|
+
const valueSchema = process$1(def.valueType, ctx, {
|
|
4289
|
+
...params,
|
|
4290
|
+
path: [...params.path, "patternProperties", "*"],
|
|
4291
|
+
});
|
|
4292
|
+
json.patternProperties = {};
|
|
4293
|
+
for (const pattern of patterns) {
|
|
4294
|
+
json.patternProperties[pattern.source] = valueSchema;
|
|
4295
|
+
}
|
|
4296
|
+
}
|
|
4297
|
+
else {
|
|
4298
|
+
// Default behavior: use propertyNames + additionalProperties
|
|
4299
|
+
if (ctx.target === "draft-07" || ctx.target === "draft-2020-12") {
|
|
4300
|
+
json.propertyNames = process$1(def.keyType, ctx, {
|
|
4301
|
+
...params,
|
|
4302
|
+
path: [...params.path, "propertyNames"],
|
|
4303
|
+
});
|
|
4304
|
+
}
|
|
4305
|
+
json.additionalProperties = process$1(def.valueType, ctx, {
|
|
4306
|
+
...params,
|
|
4307
|
+
path: [...params.path, "additionalProperties"],
|
|
4308
|
+
});
|
|
4309
|
+
}
|
|
4310
|
+
// Add required for keys with discrete values (enum, literal, etc.)
|
|
4311
|
+
const keyValues = keyType._zod.values;
|
|
4312
|
+
if (keyValues) {
|
|
4313
|
+
const validKeyValues = [...keyValues].filter((v) => typeof v === "string" || typeof v === "number");
|
|
4314
|
+
if (validKeyValues.length > 0) {
|
|
4315
|
+
json.required = validKeyValues;
|
|
4316
|
+
}
|
|
4317
|
+
}
|
|
4318
|
+
};
|
|
4149
4319
|
const nullableProcessor = (schema, ctx, json, params) => {
|
|
4150
4320
|
const def = schema._zod.def;
|
|
4151
4321
|
const inner = process$1(def.innerType, ctx, params);
|
|
@@ -4700,6 +4870,21 @@ function intersection(left, right) {
|
|
|
4700
4870
|
right: right,
|
|
4701
4871
|
});
|
|
4702
4872
|
}
|
|
4873
|
+
const ZodRecord = /*@__PURE__*/ $constructor("ZodRecord", (inst, def) => {
|
|
4874
|
+
$ZodRecord.init(inst, def);
|
|
4875
|
+
ZodType.init(inst, def);
|
|
4876
|
+
inst._zod.processJSONSchema = (ctx, json, params) => recordProcessor(inst, ctx, json, params);
|
|
4877
|
+
inst.keyType = def.keyType;
|
|
4878
|
+
inst.valueType = def.valueType;
|
|
4879
|
+
});
|
|
4880
|
+
function record(keyType, valueType, params) {
|
|
4881
|
+
return new ZodRecord({
|
|
4882
|
+
type: "record",
|
|
4883
|
+
keyType,
|
|
4884
|
+
valueType: valueType,
|
|
4885
|
+
...normalizeParams(params),
|
|
4886
|
+
});
|
|
4887
|
+
}
|
|
4703
4888
|
const ZodEnum = /*@__PURE__*/ $constructor("ZodEnum", (inst, def) => {
|
|
4704
4889
|
$ZodEnum.init(inst, def);
|
|
4705
4890
|
ZodType.init(inst, def);
|
|
@@ -4937,7 +5122,7 @@ const ZodCustom = /*@__PURE__*/ $constructor("ZodCustom", (inst, def) => {
|
|
|
4937
5122
|
inst._zod.processJSONSchema = (ctx, json, params) => customProcessor(inst, ctx);
|
|
4938
5123
|
});
|
|
4939
5124
|
function custom(fn, _params) {
|
|
4940
|
-
return _custom(ZodCustom, (() => true), _params);
|
|
5125
|
+
return _custom(ZodCustom, fn ?? (() => true), _params);
|
|
4941
5126
|
}
|
|
4942
5127
|
function refine(fn, _params = {}) {
|
|
4943
5128
|
return _refine(ZodCustom, fn, _params);
|
|
@@ -4952,6 +5137,19 @@ const optionalPositiveInt = number().int().positive().optional();
|
|
|
4952
5137
|
const optionalString = string().optional();
|
|
4953
5138
|
const selectOptionsSchema = array(nonEmptyString).min(1);
|
|
4954
5139
|
const optionalNumber = number().optional();
|
|
5140
|
+
const textEvaluationSourceSchema = object({
|
|
5141
|
+
type: literal('text'),
|
|
5142
|
+
});
|
|
5143
|
+
const customEvaluationSourceSchema = object({
|
|
5144
|
+
type: literal('custom'),
|
|
5145
|
+
extractorId: nonEmptyString,
|
|
5146
|
+
});
|
|
5147
|
+
const evaluationSourceExtractorSchema = custom(value => typeof value === 'function', 'Extractor must be a function.');
|
|
5148
|
+
record(string().min(1), evaluationSourceExtractorSchema);
|
|
5149
|
+
const evaluationSourceSchema = discriminatedUnion('type', [
|
|
5150
|
+
textEvaluationSourceSchema,
|
|
5151
|
+
customEvaluationSourceSchema,
|
|
5152
|
+
]);
|
|
4955
5153
|
const expectedOutcomeModeSchema = _enum(['static', 'dynamic']);
|
|
4956
5154
|
const evaluationParametersSchema = object({
|
|
4957
5155
|
approach: _enum(EvaluationApproach),
|
|
@@ -4969,6 +5167,7 @@ const selectEvaluationParametersSchema = evaluationParametersSchema.superRefine(
|
|
|
4969
5167
|
const defaultExpectedOutcomeBaseSchema = object({
|
|
4970
5168
|
label: nonEmptyString,
|
|
4971
5169
|
placeholder: optionalString,
|
|
5170
|
+
evaluationSource: evaluationSourceSchema.optional(),
|
|
4972
5171
|
});
|
|
4973
5172
|
const createDefaultExpectedOutcomeFieldSchemas = (baseSchema) => ({
|
|
4974
5173
|
text: baseSchema.extend({
|
|
@@ -5061,18 +5260,55 @@ function validateExpectedOutcomeSchema(schema) {
|
|
|
5061
5260
|
throw new Error(`Invalid expectedOutcomeSchema: ${parsed.error.issues[0].message}`);
|
|
5062
5261
|
}
|
|
5063
5262
|
}
|
|
5263
|
+
function validateExpectedOutcomeArrayWithExtractors(expectedOutcome, allowedExtractorIds) {
|
|
5264
|
+
const allowed = new Set(allowedExtractorIds);
|
|
5265
|
+
const schema = expectedOutcomeArraySchema.superRefine((fields, ctx) => {
|
|
5266
|
+
fields.forEach((field, index) => {
|
|
5267
|
+
if (field.evaluationSource?.type !== 'custom') {
|
|
5268
|
+
return;
|
|
5269
|
+
}
|
|
5270
|
+
if (allowed.has(field.evaluationSource.extractorId)) {
|
|
5271
|
+
return;
|
|
5272
|
+
}
|
|
5273
|
+
ctx.addIssue({
|
|
5274
|
+
code: 'custom',
|
|
5275
|
+
path: [index, 'evaluationSource', 'extractorId'],
|
|
5276
|
+
message: `Invalid expectedOutcome: Extractor "${field.evaluationSource.extractorId}" is not registered.`,
|
|
5277
|
+
});
|
|
5278
|
+
});
|
|
5279
|
+
});
|
|
5280
|
+
const parsed = schema.safeParse(expectedOutcome);
|
|
5281
|
+
if (!parsed.success) {
|
|
5282
|
+
throw new Error(parsed.error.issues[0].message);
|
|
5283
|
+
}
|
|
5284
|
+
}
|
|
5285
|
+
function getExtractorIds(extractors) {
|
|
5286
|
+
return Object.keys(extractors || {});
|
|
5287
|
+
}
|
|
5064
5288
|
|
|
5289
|
+
const modelResponseMetadataSchema = record(string(), unknown());
|
|
5290
|
+
const modelResponsePayloadSchema = object({
|
|
5291
|
+
text: string().optional(),
|
|
5292
|
+
metadata: modelResponseMetadataSchema.optional(),
|
|
5293
|
+
});
|
|
5294
|
+
|
|
5295
|
+
const testCaseChatHistorySchema = object({
|
|
5296
|
+
enabled: boolean(),
|
|
5297
|
+
value: string(),
|
|
5298
|
+
});
|
|
5065
5299
|
const testCaseInputSchema = object({
|
|
5066
5300
|
id: string(),
|
|
5067
5301
|
question: string(),
|
|
5068
5302
|
expectedOutcome: expectedOutcomeArraySchema,
|
|
5303
|
+
chatHistory: testCaseChatHistorySchema.optional(),
|
|
5069
5304
|
});
|
|
5070
5305
|
const testCaseInputArraySchema = array(testCaseInputSchema);
|
|
5071
5306
|
object({
|
|
5072
5307
|
id: string(),
|
|
5073
5308
|
question: string(),
|
|
5074
5309
|
expectedOutcome: expectedOutcomeArraySchema,
|
|
5075
|
-
output:
|
|
5310
|
+
output: modelResponsePayloadSchema.optional(),
|
|
5311
|
+
chatHistory: testCaseChatHistorySchema,
|
|
5076
5312
|
isRunning: boolean().optional(),
|
|
5077
5313
|
error: string().optional(),
|
|
5078
5314
|
evaluationResult: custom().optional(),
|
|
@@ -5094,10 +5330,15 @@ function validateTestCaseInputArray(data) {
|
|
|
5094
5330
|
* @param jsonContent - The JSON string to parse and validate
|
|
5095
5331
|
* @returns Validation result with test cases or error message
|
|
5096
5332
|
*/
|
|
5097
|
-
function importTestSuite(jsonContent) {
|
|
5333
|
+
function importTestSuite(jsonContent, allowedExtractorIds = []) {
|
|
5098
5334
|
try {
|
|
5099
5335
|
const parsed = JSON.parse(jsonContent);
|
|
5100
5336
|
validateTestCaseInputArray(parsed);
|
|
5337
|
+
if (allowedExtractorIds.length > 0) {
|
|
5338
|
+
parsed.forEach((testCase) => {
|
|
5339
|
+
validateExpectedOutcomeArrayWithExtractors(testCase.expectedOutcome, allowedExtractorIds);
|
|
5340
|
+
});
|
|
5341
|
+
}
|
|
5101
5342
|
const testCases = parsed.map((item, index) => {
|
|
5102
5343
|
try {
|
|
5103
5344
|
return createTestCaseFromInput(item);
|
|
@@ -5123,7 +5364,7 @@ function importTestSuite(jsonContent) {
|
|
|
5123
5364
|
}
|
|
5124
5365
|
|
|
5125
5366
|
const MISSING_RESOLVER_MESSAGE = 'resolveExpectedOutcome is required when a test case has dynamic expected outcomes.';
|
|
5126
|
-
function isDynamicTextareaField(field) {
|
|
5367
|
+
function isDynamicTextareaField$1(field) {
|
|
5127
5368
|
return field.type === 'textarea' && field.outcomeMode === 'dynamic';
|
|
5128
5369
|
}
|
|
5129
5370
|
function applyResolvedDynamicValues(testCase, resolvedValues) {
|
|
@@ -5133,7 +5374,7 @@ function applyResolvedDynamicValues(testCase, resolvedValues) {
|
|
|
5133
5374
|
const expectedOutcome = [...(testCase.expectedOutcome || [])];
|
|
5134
5375
|
for (const resolved of resolvedValues) {
|
|
5135
5376
|
const field = expectedOutcome[resolved.index];
|
|
5136
|
-
if (!field || !isDynamicTextareaField(field)) {
|
|
5377
|
+
if (!field || !isDynamicTextareaField$1(field)) {
|
|
5137
5378
|
continue;
|
|
5138
5379
|
}
|
|
5139
5380
|
expectedOutcome[resolved.index] = {
|
|
@@ -5148,7 +5389,7 @@ function applyResolvedDynamicValues(testCase, resolvedValues) {
|
|
|
5148
5389
|
}
|
|
5149
5390
|
async function resolveDynamicExpectedOutcomes(testCase, resolver) {
|
|
5150
5391
|
const dynamicFields = (testCase.expectedOutcome || []).flatMap((field, index) => {
|
|
5151
|
-
if (!isDynamicTextareaField(field)) {
|
|
5392
|
+
if (!isDynamicTextareaField$1(field)) {
|
|
5152
5393
|
return [];
|
|
5153
5394
|
}
|
|
5154
5395
|
return [{ field, index }];
|
|
@@ -5166,6 +5407,15 @@ async function resolveDynamicExpectedOutcomes(testCase, resolver) {
|
|
|
5166
5407
|
return applyResolvedDynamicValues(testCase, resolvedValues);
|
|
5167
5408
|
}
|
|
5168
5409
|
|
|
5410
|
+
function isChipsInputField(field) {
|
|
5411
|
+
return field.type === 'chips-input';
|
|
5412
|
+
}
|
|
5413
|
+
function isTextareaField(field) {
|
|
5414
|
+
return field.type === 'textarea';
|
|
5415
|
+
}
|
|
5416
|
+
function isDynamicTextareaField(field) {
|
|
5417
|
+
return isTextareaField(field) && field.outcomeMode === 'dynamic';
|
|
5418
|
+
}
|
|
5169
5419
|
function applyExpectedOutcomeChange(testCase, change) {
|
|
5170
5420
|
const { index } = change;
|
|
5171
5421
|
const expectedOutcome = [...(testCase.expectedOutcome || [])];
|
|
@@ -5173,73 +5423,99 @@ function applyExpectedOutcomeChange(testCase, change) {
|
|
|
5173
5423
|
if (!target) {
|
|
5174
5424
|
return testCase;
|
|
5175
5425
|
}
|
|
5426
|
+
const commit = (updatedField) => {
|
|
5427
|
+
expectedOutcome[index] = updatedField;
|
|
5428
|
+
return { ...testCase, expectedOutcome };
|
|
5429
|
+
};
|
|
5176
5430
|
switch (change.operation) {
|
|
5177
5431
|
case 'set-value': {
|
|
5178
|
-
if (target
|
|
5432
|
+
if (isChipsInputField(target)) {
|
|
5179
5433
|
return testCase;
|
|
5180
5434
|
}
|
|
5181
|
-
if (target
|
|
5435
|
+
if (isDynamicTextareaField(target)) {
|
|
5182
5436
|
return testCase;
|
|
5183
5437
|
}
|
|
5184
|
-
|
|
5438
|
+
return commit({
|
|
5185
5439
|
...target,
|
|
5186
5440
|
value: change.value,
|
|
5187
|
-
};
|
|
5188
|
-
return { ...testCase, expectedOutcome };
|
|
5441
|
+
});
|
|
5189
5442
|
}
|
|
5190
5443
|
case 'add-chip': {
|
|
5191
|
-
if (target
|
|
5444
|
+
if (!isChipsInputField(target)) {
|
|
5192
5445
|
return testCase;
|
|
5193
5446
|
}
|
|
5194
|
-
|
|
5447
|
+
return commit({
|
|
5195
5448
|
...target,
|
|
5196
5449
|
value: [...target.value, change.value],
|
|
5197
|
-
};
|
|
5198
|
-
return { ...testCase, expectedOutcome };
|
|
5450
|
+
});
|
|
5199
5451
|
}
|
|
5200
5452
|
case 'remove-chip': {
|
|
5201
|
-
if (target
|
|
5453
|
+
if (!isChipsInputField(target)) {
|
|
5202
5454
|
return testCase;
|
|
5203
5455
|
}
|
|
5204
|
-
|
|
5456
|
+
return commit({
|
|
5205
5457
|
...target,
|
|
5206
5458
|
value: target.value.filter(chip => chip !== change.value),
|
|
5207
|
-
};
|
|
5208
|
-
return { ...testCase, expectedOutcome };
|
|
5459
|
+
});
|
|
5209
5460
|
}
|
|
5210
5461
|
case 'set-evaluation-approach':
|
|
5211
5462
|
return updateExpectedOutcomeFieldApproach(testCase, index, change.value);
|
|
5212
5463
|
case 'set-outcome-mode': {
|
|
5213
|
-
if (target
|
|
5464
|
+
if (!isTextareaField(target)) {
|
|
5214
5465
|
return testCase;
|
|
5215
5466
|
}
|
|
5216
5467
|
const mode = change.value;
|
|
5217
5468
|
if (mode === 'static') {
|
|
5218
5469
|
const { resolutionQuery: _, ...rest } = target;
|
|
5219
|
-
|
|
5470
|
+
return commit({
|
|
5220
5471
|
...rest,
|
|
5221
5472
|
outcomeMode: 'static',
|
|
5222
5473
|
value: '',
|
|
5223
|
-
};
|
|
5474
|
+
});
|
|
5224
5475
|
}
|
|
5225
5476
|
else {
|
|
5226
|
-
|
|
5477
|
+
return commit({
|
|
5227
5478
|
...target,
|
|
5228
5479
|
outcomeMode: 'dynamic',
|
|
5229
5480
|
value: '',
|
|
5230
|
-
};
|
|
5481
|
+
});
|
|
5231
5482
|
}
|
|
5232
|
-
return { ...testCase, expectedOutcome };
|
|
5233
5483
|
}
|
|
5234
5484
|
case 'set-resolution-query': {
|
|
5235
|
-
if (target
|
|
5485
|
+
if (!isDynamicTextareaField(target)) {
|
|
5236
5486
|
return testCase;
|
|
5237
5487
|
}
|
|
5238
|
-
|
|
5488
|
+
return commit({
|
|
5239
5489
|
...target,
|
|
5240
5490
|
resolutionQuery: change.value,
|
|
5241
|
-
};
|
|
5242
|
-
|
|
5491
|
+
});
|
|
5492
|
+
}
|
|
5493
|
+
case 'set-evaluation-source-type': {
|
|
5494
|
+
if (change.value === 'text') {
|
|
5495
|
+
return commit({
|
|
5496
|
+
...target,
|
|
5497
|
+
evaluationSource: { type: 'text' },
|
|
5498
|
+
});
|
|
5499
|
+
}
|
|
5500
|
+
const extractorId = target.evaluationSource?.type === 'custom'
|
|
5501
|
+
? target.evaluationSource.extractorId
|
|
5502
|
+
: (change.fallbackExtractorId ?? '');
|
|
5503
|
+
return commit({
|
|
5504
|
+
...target,
|
|
5505
|
+
evaluationSource: {
|
|
5506
|
+
type: 'custom',
|
|
5507
|
+
extractorId,
|
|
5508
|
+
},
|
|
5509
|
+
});
|
|
5510
|
+
}
|
|
5511
|
+
case 'set-evaluation-source-extractor': {
|
|
5512
|
+
return commit({
|
|
5513
|
+
...target,
|
|
5514
|
+
evaluationSource: {
|
|
5515
|
+
type: 'custom',
|
|
5516
|
+
extractorId: change.value,
|
|
5517
|
+
},
|
|
5518
|
+
});
|
|
5243
5519
|
}
|
|
5244
5520
|
}
|
|
5245
5521
|
}
|
|
@@ -30020,7 +30296,7 @@ class LLMEvaluationEngine {
|
|
|
30020
30296
|
const fieldRequest = {
|
|
30021
30297
|
testCaseId: request.testCaseId,
|
|
30022
30298
|
question: request.question,
|
|
30023
|
-
actualResponse:
|
|
30299
|
+
actualResponse: field.actualResponse,
|
|
30024
30300
|
expectedOutcome: field.expectedValue,
|
|
30025
30301
|
evaluationParameters: field.evaluationParameters,
|
|
30026
30302
|
};
|
|
@@ -30090,6 +30366,58 @@ class LLMEvaluationEngine {
|
|
|
30090
30366
|
}
|
|
30091
30367
|
}
|
|
30092
30368
|
|
|
30369
|
+
function toTextSource() {
|
|
30370
|
+
return { type: 'text' };
|
|
30371
|
+
}
|
|
30372
|
+
async function resolveActualValue(field, output, extractors) {
|
|
30373
|
+
const source = field.evaluationSource || toTextSource();
|
|
30374
|
+
if (source.type === 'text') {
|
|
30375
|
+
const text = output?.text?.trim();
|
|
30376
|
+
if (!text) {
|
|
30377
|
+
return {
|
|
30378
|
+
success: false,
|
|
30379
|
+
error: 'Model response text is empty.',
|
|
30380
|
+
};
|
|
30381
|
+
}
|
|
30382
|
+
return { success: true, value: text };
|
|
30383
|
+
}
|
|
30384
|
+
const extractor = extractors?.[source.extractorId];
|
|
30385
|
+
if (!extractor) {
|
|
30386
|
+
return {
|
|
30387
|
+
success: false,
|
|
30388
|
+
error: `Extractor "${source.extractorId}" is not registered.`,
|
|
30389
|
+
};
|
|
30390
|
+
}
|
|
30391
|
+
try {
|
|
30392
|
+
const extractedRaw = await extractor(output || {});
|
|
30393
|
+
if (typeof extractedRaw !== 'string') {
|
|
30394
|
+
return {
|
|
30395
|
+
success: false,
|
|
30396
|
+
error: `Extractor "${source.extractorId}" must return a string.`,
|
|
30397
|
+
};
|
|
30398
|
+
}
|
|
30399
|
+
const extracted = extractedRaw.trim();
|
|
30400
|
+
if (!extracted) {
|
|
30401
|
+
return {
|
|
30402
|
+
success: false,
|
|
30403
|
+
error: `Extractor "${source.extractorId}" returned an empty value.`,
|
|
30404
|
+
};
|
|
30405
|
+
}
|
|
30406
|
+
return {
|
|
30407
|
+
success: true,
|
|
30408
|
+
value: extracted,
|
|
30409
|
+
};
|
|
30410
|
+
}
|
|
30411
|
+
catch (error) {
|
|
30412
|
+
return {
|
|
30413
|
+
success: false,
|
|
30414
|
+
error: error instanceof Error
|
|
30415
|
+
? error.message
|
|
30416
|
+
: `Extractor "${source.extractorId}" failed.`,
|
|
30417
|
+
};
|
|
30418
|
+
}
|
|
30419
|
+
}
|
|
30420
|
+
|
|
30093
30421
|
/**
|
|
30094
30422
|
* Service for evaluating test case responses
|
|
30095
30423
|
*/
|
|
@@ -30103,34 +30431,71 @@ class EvaluationService {
|
|
|
30103
30431
|
* @param testCase - The test case to evaluate
|
|
30104
30432
|
* @param onResult - Callback to handle the evaluation result
|
|
30105
30433
|
*/
|
|
30106
|
-
async evaluateTestCase(testCase, onResult) {
|
|
30107
|
-
|
|
30108
|
-
|
|
30109
|
-
|
|
30110
|
-
}
|
|
30111
|
-
const fields = (testCase.expectedOutcome || []).flatMap((field, index) => {
|
|
30434
|
+
async evaluateTestCase(testCase, onResult, extractors) {
|
|
30435
|
+
const fields = [];
|
|
30436
|
+
const failedFields = [];
|
|
30437
|
+
for (const [index, field] of (testCase.expectedOutcome || []).entries()) {
|
|
30112
30438
|
if (field.type === 'textarea' && field.outcomeMode === 'dynamic') {
|
|
30113
|
-
|
|
30439
|
+
continue;
|
|
30114
30440
|
}
|
|
30115
|
-
|
|
30116
|
-
|
|
30441
|
+
const evaluationParameters = normalizeEvaluationParametersForField(field.type, field.evaluationParameters);
|
|
30442
|
+
const expectedValue = getFieldExpectedValue(field);
|
|
30443
|
+
const resolvedActualValue = await resolveActualValue(field, testCase.output, extractors);
|
|
30444
|
+
if (resolvedActualValue.success) {
|
|
30445
|
+
fields.push({
|
|
30117
30446
|
index,
|
|
30118
30447
|
label: field.label,
|
|
30119
30448
|
type: field.type,
|
|
30120
|
-
expectedValue
|
|
30121
|
-
|
|
30122
|
-
|
|
30123
|
-
|
|
30124
|
-
|
|
30449
|
+
expectedValue,
|
|
30450
|
+
actualResponse: resolvedActualValue.value,
|
|
30451
|
+
evaluationParameters,
|
|
30452
|
+
});
|
|
30453
|
+
}
|
|
30454
|
+
else {
|
|
30455
|
+
failedFields.push({
|
|
30456
|
+
index,
|
|
30457
|
+
label: field.label,
|
|
30458
|
+
type: field.type,
|
|
30459
|
+
expectedValue,
|
|
30460
|
+
passed: false,
|
|
30461
|
+
keywordMatches: [],
|
|
30462
|
+
evaluationParameters,
|
|
30463
|
+
evaluationApproachResult: {
|
|
30464
|
+
score: 0,
|
|
30465
|
+
approachUsed: evaluationParameters.approach,
|
|
30466
|
+
},
|
|
30467
|
+
error: 'error' in resolvedActualValue
|
|
30468
|
+
? resolvedActualValue.error
|
|
30469
|
+
: 'Failed to resolve actual value.',
|
|
30470
|
+
});
|
|
30471
|
+
}
|
|
30472
|
+
}
|
|
30473
|
+
if (fields.length === 0) {
|
|
30474
|
+
if (failedFields.length === 0) {
|
|
30475
|
+
console.warn('⚠️ No evaluable fields for test case:', testCase.id);
|
|
30476
|
+
return;
|
|
30477
|
+
}
|
|
30478
|
+
onResult({
|
|
30479
|
+
testCaseId: testCase.id,
|
|
30480
|
+
passed: false,
|
|
30481
|
+
keywordMatches: [],
|
|
30482
|
+
fieldResults: failedFields,
|
|
30483
|
+
timestamp: new Date().toISOString(),
|
|
30484
|
+
});
|
|
30485
|
+
return;
|
|
30486
|
+
}
|
|
30125
30487
|
const evaluationRequest = {
|
|
30126
30488
|
testCaseId: testCase.id,
|
|
30127
30489
|
question: testCase.question,
|
|
30128
|
-
actualResponse: testCase.output,
|
|
30129
30490
|
fields,
|
|
30130
30491
|
};
|
|
30131
30492
|
await this.engine.evaluateResponse(evaluationRequest, (result) => {
|
|
30132
|
-
|
|
30133
|
-
onResult(
|
|
30493
|
+
const combinedResults = [...(result.fieldResults || []), ...failedFields].sort((a, b) => a.index - b.index);
|
|
30494
|
+
onResult({
|
|
30495
|
+
...result,
|
|
30496
|
+
passed: combinedResults.every(field => field.passed && !field.error),
|
|
30497
|
+
fieldResults: combinedResults,
|
|
30498
|
+
});
|
|
30134
30499
|
});
|
|
30135
30500
|
}
|
|
30136
30501
|
}
|
|
@@ -30178,7 +30543,7 @@ const LLMTestRunnerHeader = ({ isExportingTestSuite, isExportingTestResults, isR
|
|
|
30178
30543
|
};
|
|
30179
30544
|
|
|
30180
30545
|
const ResponseOutput = ({ output, isRunning, }) => {
|
|
30181
|
-
return (h("div", { class: "response-output" }, output ? (h("div", { class: "response-output__content" }, output)) : (h("div", { class: "response-output__placeholder" }, isRunning ? 'Running...' : ''))));
|
|
30546
|
+
return (h("div", { class: "response-output" }, output?.text ? (h("div", { class: "response-output__content" }, output.text)) : (h("div", { class: "response-output__placeholder" }, isRunning ? 'Running...' : ''))));
|
|
30182
30547
|
};
|
|
30183
30548
|
|
|
30184
30549
|
const EvaluationSummary = ({ result, isRunning, }) => {
|
|
@@ -30216,7 +30581,9 @@ var FormFieldType;
|
|
|
30216
30581
|
FormFieldType["SELECT"] = "select";
|
|
30217
30582
|
})(FormFieldType || (FormFieldType = {}));
|
|
30218
30583
|
|
|
30219
|
-
const ExpectedOutcomeRenderer = ({ testCaseId, fields, dynamicResolutionSupported = false, onExpectedOutcomeChange, }) => {
|
|
30584
|
+
const ExpectedOutcomeRenderer = ({ testCaseId, fields, dynamicResolutionSupported = false, extractorIds = [], onExpectedOutcomeChange, }) => {
|
|
30585
|
+
const hasExtractorOptions = extractorIds.length > 0;
|
|
30586
|
+
const firstExtractorId = extractorIds[0];
|
|
30220
30587
|
const emit = (detail) => onExpectedOutcomeChange({
|
|
30221
30588
|
detail,
|
|
30222
30589
|
});
|
|
@@ -30246,6 +30613,23 @@ const ExpectedOutcomeRenderer = ({ testCaseId, fields, dynamicResolutionSupporte
|
|
|
30246
30613
|
required: false,
|
|
30247
30614
|
rows: 2,
|
|
30248
30615
|
});
|
|
30616
|
+
const buildEvaluationSourceConfig = (index) => ({
|
|
30617
|
+
name: `expectedOutcomeEvaluationSource-${index}`,
|
|
30618
|
+
fieldType: FormFieldType.SELECT,
|
|
30619
|
+
label: 'Evaluation Source',
|
|
30620
|
+
placeholder: 'Select evaluation source',
|
|
30621
|
+
required: true,
|
|
30622
|
+
optionList: ['text', 'custom'],
|
|
30623
|
+
defaultValue: 'text',
|
|
30624
|
+
});
|
|
30625
|
+
const buildExtractorConfig = (index) => ({
|
|
30626
|
+
name: `expectedOutcomeEvaluationSourceExtractor-${index}`,
|
|
30627
|
+
fieldType: FormFieldType.SELECT,
|
|
30628
|
+
label: 'Extractor',
|
|
30629
|
+
placeholder: 'Select extractor',
|
|
30630
|
+
required: true,
|
|
30631
|
+
optionList: extractorIds,
|
|
30632
|
+
});
|
|
30249
30633
|
const renderEvaluationSelector = (field, index) => {
|
|
30250
30634
|
const optionList = getAllowedApproachesForFieldType(field.type);
|
|
30251
30635
|
return (h("app-select", { config: buildEvaluationConfig(index, optionList), value: field.evaluationParameters?.approach, onValueChange: (e) => emit({
|
|
@@ -30255,6 +30639,27 @@ const ExpectedOutcomeRenderer = ({ testCaseId, fields, dynamicResolutionSupporte
|
|
|
30255
30639
|
value: e.detail.value,
|
|
30256
30640
|
}) }));
|
|
30257
30641
|
};
|
|
30642
|
+
const renderEvaluationSourceSelector = (field, index) => {
|
|
30643
|
+
if (!hasExtractorOptions) {
|
|
30644
|
+
return null;
|
|
30645
|
+
}
|
|
30646
|
+
const sourceType = field.evaluationSource?.type || 'text';
|
|
30647
|
+
return (h("div", null, h("app-select", { config: buildEvaluationSourceConfig(index), value: sourceType, onValueChange: (e) => emit({
|
|
30648
|
+
testCaseId,
|
|
30649
|
+
index,
|
|
30650
|
+
operation: 'set-evaluation-source-type',
|
|
30651
|
+
value: e.detail.value,
|
|
30652
|
+
fallbackExtractorId: firstExtractorId,
|
|
30653
|
+
}) }), sourceType === 'custom' && (h("app-select", { config: buildExtractorConfig(index), value: field.evaluationSource?.type === 'custom'
|
|
30654
|
+
? field.evaluationSource.extractorId
|
|
30655
|
+
: '', onValueChange: (e) => emit({
|
|
30656
|
+
testCaseId,
|
|
30657
|
+
index,
|
|
30658
|
+
operation: 'set-evaluation-source-extractor',
|
|
30659
|
+
value: e.detail.value,
|
|
30660
|
+
}) }))));
|
|
30661
|
+
};
|
|
30662
|
+
const renderEvaluationOptions = (field, index) => (h("details", { class: "expected-outcome-renderer__options" }, h("summary", { class: "expected-outcome-renderer__options-summary" }, "More options"), h("div", { class: "expected-outcome-renderer__options-content" }, renderEvaluationSelector(field, index), renderEvaluationSourceSelector(field, index))));
|
|
30258
30663
|
return (h("div", { class: "expected-outcome-renderer" }, (fields || []).map((field, index) => {
|
|
30259
30664
|
if (field.type === 'textarea') {
|
|
30260
30665
|
const isDynamic = dynamicResolutionSupported && field.outcomeMode === 'dynamic';
|
|
@@ -30286,7 +30691,7 @@ const ExpectedOutcomeRenderer = ({ testCaseId, fields, dynamicResolutionSupporte
|
|
|
30286
30691
|
index,
|
|
30287
30692
|
operation: 'set-resolution-query',
|
|
30288
30693
|
value: e.detail.value,
|
|
30289
|
-
}) })), !isDynamic &&
|
|
30694
|
+
}) })), !isDynamic && renderEvaluationOptions(field, index)));
|
|
30290
30695
|
}
|
|
30291
30696
|
if (field.type === 'chips-input') {
|
|
30292
30697
|
const config = {
|
|
@@ -30306,7 +30711,7 @@ const ExpectedOutcomeRenderer = ({ testCaseId, fields, dynamicResolutionSupporte
|
|
|
30306
30711
|
index,
|
|
30307
30712
|
operation: 'remove-chip',
|
|
30308
30713
|
value: e.detail.value,
|
|
30309
|
-
}) }),
|
|
30714
|
+
}) }), renderEvaluationOptions(field, index)));
|
|
30310
30715
|
}
|
|
30311
30716
|
if (field.type === 'select') {
|
|
30312
30717
|
const config = {
|
|
@@ -30322,18 +30727,18 @@ const ExpectedOutcomeRenderer = ({ testCaseId, fields, dynamicResolutionSupporte
|
|
|
30322
30727
|
index,
|
|
30323
30728
|
operation: 'set-value',
|
|
30324
30729
|
value: e.detail.value,
|
|
30325
|
-
}) }),
|
|
30730
|
+
}) }), renderEvaluationOptions(field, index)));
|
|
30326
30731
|
}
|
|
30327
30732
|
return (h("div", { class: "expected-outcome-renderer__group" }, h("div", { class: "expected-outcome-renderer__text" }, h("label", null, field.label), h("input", { type: "text", value: field.value, placeholder: field.placeholder, onInput: (e) => emit({
|
|
30328
30733
|
testCaseId,
|
|
30329
30734
|
index,
|
|
30330
30735
|
operation: 'set-value',
|
|
30331
30736
|
value: e.target.value,
|
|
30332
|
-
}) })),
|
|
30737
|
+
}) })), renderEvaluationOptions(field, index)));
|
|
30333
30738
|
})));
|
|
30334
30739
|
};
|
|
30335
30740
|
|
|
30336
|
-
const LLMTestCaseRow = ({ testCase, dynamicResolutionSupported = false, onRun, onDelete, handleTestCaseChange, onExpectedOutcomeChange, }) => {
|
|
30741
|
+
const LLMTestCaseRow = ({ testCase, dynamicResolutionSupported = false, extractorIds = [], onRun, onDelete, handleTestCaseChange, onExpectedOutcomeChange, onChatHistoryChange, }) => {
|
|
30337
30742
|
const questionConfig = {
|
|
30338
30743
|
name: 'question',
|
|
30339
30744
|
fieldType: FormFieldType.TEXT_AREA,
|
|
@@ -30349,11 +30754,21 @@ const LLMTestCaseRow = ({ testCase, dynamicResolutionSupported = false, onRun, o
|
|
|
30349
30754
|
key: 'question',
|
|
30350
30755
|
value: e.detail.value,
|
|
30351
30756
|
},
|
|
30352
|
-
}) }), h(
|
|
30757
|
+
}) }), h("chat-history", { chatHistoryEnabled: testCase.chatHistory?.enabled ?? false, chatHistoryValue: testCase.chatHistory?.value ?? '', onChatHistoryChange: (e) => {
|
|
30758
|
+
const { enabled, value } = e
|
|
30759
|
+
.detail;
|
|
30760
|
+
onChatHistoryChange({
|
|
30761
|
+
detail: {
|
|
30762
|
+
testCaseId: testCase.id,
|
|
30763
|
+
enabled,
|
|
30764
|
+
value,
|
|
30765
|
+
},
|
|
30766
|
+
});
|
|
30767
|
+
} }), h(ExpectedOutcomeRenderer, { testCaseId: testCase.id, fields: testCase.expectedOutcome || [], dynamicResolutionSupported: dynamicResolutionSupported, extractorIds: extractorIds, onExpectedOutcomeChange: onExpectedOutcomeChange })), h(ResponseOutput, { output: testCase.output, isRunning: testCase.isRunning }), h(EvaluationSummary, { result: testCase.evaluationResult, isRunning: testCase.isRunning }), h(RowActions, { isRunning: testCase.isRunning, canRun: !!testCase.question.trim(), onRun: () => onRun(testCase), onDelete: () => onDelete(testCase.id) })));
|
|
30353
30768
|
};
|
|
30354
30769
|
|
|
30355
|
-
const LLMTestCases = ({ testCases, dynamicResolutionSupported = false, onRun, onDelete, onAddTestCase, handleTestCaseChange, onExpectedOutcomeChange, }) => {
|
|
30356
|
-
return (h("div", { class: "test-cases" }, h("div", { class: "test-cases__column-headers" }, h("div", { class: "test-cases__column-header" }, "Input"), h("div", { class: "test-cases__column-header" }, "Output"), h("div", { class: "test-cases__column-header" }, "Evaluation"), h("div", { class: "test-cases__column-header" }, "Actions")), testCases.map(testCase => (h(LLMTestCaseRow, { testCase: testCase, dynamicResolutionSupported: dynamicResolutionSupported, onRun: onRun, onDelete: onDelete, handleTestCaseChange: handleTestCaseChange, onExpectedOutcomeChange: onExpectedOutcomeChange }))), h("div", { class: "test-cases__add-section" }, h(Button, { variant: "outline", size: "md", onClick: onAddTestCase }, "+ Add Question"))));
|
|
30770
|
+
const LLMTestCases = ({ testCases, dynamicResolutionSupported = false, extractorIds = [], onRun, onDelete, onAddTestCase, handleTestCaseChange, onExpectedOutcomeChange, onChatHistoryChange, }) => {
|
|
30771
|
+
return (h("div", { class: "test-cases" }, h("div", { class: "test-cases__column-headers" }, h("div", { class: "test-cases__column-header" }, "Input"), h("div", { class: "test-cases__column-header" }, "Output"), h("div", { class: "test-cases__column-header" }, "Evaluation"), h("div", { class: "test-cases__column-header" }, "Actions")), testCases.map(testCase => (h(LLMTestCaseRow, { testCase: testCase, dynamicResolutionSupported: dynamicResolutionSupported, extractorIds: extractorIds, onRun: onRun, onDelete: onDelete, handleTestCaseChange: handleTestCaseChange, onExpectedOutcomeChange: onExpectedOutcomeChange, onChatHistoryChange: onChatHistoryChange }))), h("div", { class: "test-cases__add-section" }, h(Button, { variant: "outline", size: "md", onClick: onAddTestCase }, "+ Add Question"))));
|
|
30357
30772
|
};
|
|
30358
30773
|
|
|
30359
30774
|
const tokensCss = () => `:host{--spacing:0.25rem;--spacing-1:calc(var(--spacing) * 1);--spacing-2:calc(var(--spacing) * 2);--spacing-3:calc(var(--spacing) * 3);--spacing-4:calc(var(--spacing) * 4);--spacing-5:calc(var(--spacing) * 5);--spacing-6:calc(var(--spacing) * 6);--spacing-8:calc(var(--spacing) * 8);--spacing-10:calc(var(--spacing) * 10);--spacing-12:calc(var(--spacing) * 12);--spacing-16:calc(var(--spacing) * 16);--spacing-20:calc(var(--spacing) * 20);--spacing-24:calc(var(--spacing) * 24);--radius-none:0;--radius-sm:0.125rem;--radius-md:0.375rem;--radius-lg:0.5rem;--radius-xl:0.75rem;--radius-2xl:1rem;--radius-3xl:1.5rem;--radius-full:9999px;--radius:var(--radius-lg);--font-size-xs:0.75rem;--font-size-sm:0.875rem;--font-size-base:1rem;--font-size-lg:1.125rem;--font-size-xl:1.25rem;--font-size-2xl:1.5rem;--font-size-3xl:1.875rem;--font-size-4xl:2.25rem;--font-weight-normal:400;--font-weight-medium:500;--font-weight-semibold:600;--font-weight-bold:700;--line-height-none:1;--line-height-tight:1.25;--line-height-snug:1.375;--line-height-normal:1.5;--line-height-relaxed:1.625;--line-height-loose:2;--letter-spacing-tight:-0.025em;--letter-spacing-normal:0;--letter-spacing-wide:0.05em;--shadow-sm:0 1px 2px 0 rgba(0, 0, 0, 0.05);--shadow-md:0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06);--shadow-lg:0 10px 15px -3px rgba(0, 0, 0, 0.1), 0 4px 6px -2px rgba(0, 0, 0, 0.05);--shadow-xl:0 20px 25px -5px rgba(0, 0, 0, 0.1), 0 10px 10px -5px rgba(0, 0, 0, 0.04);--shadow-2xl:0 25px 50px -12px rgba(0, 0, 0, 0.25);--border-width:1px;--z-base:0;--z-dropdown:1000;--z-sticky:1100;--z-modal:1200;--z-popover:1300;--z-tooltip:1400;--opacity-disabled:0.5;--opacity-hover:0.8;--opacity-muted:0.6;--max-w-sm:24rem;--max-w-md:28rem;--max-w-lg:32rem;--max-w-xl:42rem;--max-w-2xl:48rem;--max-w-full:100%;--breakpoint-sm:640px;--breakpoint-md:768px;--breakpoint-lg:1024px;--breakpoint-xl:1280px;--breakpoint-2xl:1536px;--background:#ffffff;--foreground:#0a0a0a;--card:#ffffff;--card-foreground:#0a0a0a;--popover:#ffffff;--popover-foreground:#0a0a0a;--primary:#0a0a0a;--primary-foreground:#fafafa;--secondary:#f4f4f5;--secondary-foreground:#0a0a0a;--muted:#f4f4f5;--muted-foreground:#71717a;--accent:#f4f4f5;--accent-foreground:#0a0a0a;--destructive:#ef4444;--destructive-foreground:#fafafa;--border:#e4e4e7;--input:#e4e4e7;--ring:#3b82f6;--success:#10b981;--success-foreground:#fafafa;--warning:#f59e0b;--warning-foreground:#fafafa;--info:#3b82f6;--info-foreground:#fafafa}:host([data-theme='dark']){--background:#0a0a0a;--foreground:#fafafa;--card:#171717;--card-foreground:#fafafa;--popover:#171717;--popover-foreground:#fafafa;--primary:#fafafa;--primary-foreground:#0a0a0a;--secondary:#27272a;--secondary-foreground:#fafafa;--muted:#27272a;--muted-foreground:#a1a1aa;--accent:#27272a;--accent-foreground:#fafafa;--destructive:#dc2626;--destructive-foreground:#fafafa;--border:#27272a;--input:#27272a;--ring:#3b82f6;--success:#059669;--success-foreground:#fafafa;--warning:#d97706;--warning-foreground:#fafafa;--info:#2563eb;--info-foreground:#fafafa}`;
|
|
@@ -30364,7 +30779,7 @@ const llmTestRunnerHeaderCss = () => `.test-runner-header{display:flex;justify-c
|
|
|
30364
30779
|
|
|
30365
30780
|
const llmTestCasesCss = () => `.test-cases{background:var(--background)}.test-cases__column-headers{display:grid;grid-template-columns:1fr 1.5fr 0.5fr 120px;gap:var(--border-width);background:var(--border);border-bottom:2px solid var(--border)}.test-cases__column-header{background:var(--muted);padding:var(--spacing-4) var(--spacing-5);font-weight:var(--font-weight-semibold);color:var(--foreground);font-size:var(--font-size-sm);text-transform:uppercase;letter-spacing:var(--letter-spacing-wide)}.test-cases__add-section{padding:var(--spacing-6);text-align:center;background:var(--muted);border-top:var(--border-width) solid var(--border)}@media (max-width: 1200px){.test-cases__column-headers{display:none}}`;
|
|
30366
30781
|
|
|
30367
|
-
const llmTestCaseRowCss = () => `.test-case-row{display:grid;grid-template-columns:1fr 1.5fr 0.5fr 120px;gap:var(--border-width);border-bottom:var(--border-width) solid var(--border);min-height:200px}.test-case-row:hover{background:var(--muted)}.test-case-row__input-column{padding:var(--spacing-5);background:var(--background);border-right:var(--border-width) solid var(--border)}.expected-outcome-renderer{display:flex;flex-direction:column;gap:var(--spacing-4);margin-top:var(--spacing-4)}.expected-outcome-renderer__group{display:flex;flex-direction:column;gap:var(--spacing-2);padding:var(--spacing-3);border:var(--border-width) solid var(--border);border-radius:var(--radius-md);background:var(--background)}@media (max-width: 1200px){.test-case-row{grid-template-columns:1fr;gap:0}.test-case-row__input-column{border-right:none;border-bottom:var(--border-width) solid var(--border)}}@media (max-width: 768px){.test-case-row__input-column{padding:var(--spacing-4)}.test-case-row{min-height:auto}}`;
|
|
30782
|
+
const llmTestCaseRowCss = () => `.test-case-row{display:grid;grid-template-columns:1fr 1.5fr 0.5fr 120px;gap:var(--border-width);border-bottom:var(--border-width) solid var(--border);min-height:200px}.test-case-row:hover{background:var(--muted)}.test-case-row__input-column{padding:var(--spacing-5);background:var(--background);border-right:var(--border-width) solid var(--border)}.expected-outcome-renderer{display:flex;flex-direction:column;gap:var(--spacing-4);margin-top:var(--spacing-4)}.expected-outcome-renderer__group{display:flex;flex-direction:column;gap:var(--spacing-2);padding:var(--spacing-3);border:var(--border-width) solid var(--border);border-radius:var(--radius-md);background:var(--background)}.expected-outcome-renderer__options{border:var(--border-width) solid var(--border);border-radius:var(--radius-sm);background:var(--muted)}.expected-outcome-renderer__options-summary{cursor:pointer;font-size:var(--font-size-sm);color:var(--foreground);padding:var(--spacing-2) var(--spacing-3);user-select:none}.expected-outcome-renderer__options-content{display:flex;flex-direction:column;gap:var(--spacing-2);padding:0 var(--spacing-3) var(--spacing-3)}@media (max-width: 1200px){.test-case-row{grid-template-columns:1fr;gap:0}.test-case-row__input-column{border-right:none;border-bottom:var(--border-width) solid var(--border)}}@media (max-width: 768px){.test-case-row__input-column{padding:var(--spacing-4)}.test-case-row{min-height:auto}}`;
|
|
30368
30783
|
|
|
30369
30784
|
const rowActionsCss = () => `.row-actions{height:100%;padding:var(--spacing-5);background:var(--background);display:flex;flex-direction:column;gap:var(--spacing-3);align-items:center;justify-content:flex-start;align-self:flex-start}@media (max-width: 1200px){.row-actions{border-right:none;border-bottom:var(--border-width) solid var(--border);flex-direction:row;justify-content:center}}@media (max-width: 768px){.row-actions{padding:var(--spacing-4)}}`;
|
|
30370
30785
|
|
|
@@ -30390,6 +30805,7 @@ const LLMTestRunner = class {
|
|
|
30390
30805
|
useSave = false;
|
|
30391
30806
|
usePromptEditor = false;
|
|
30392
30807
|
resolveExpectedOutcome;
|
|
30808
|
+
evaluationSourceExtractors;
|
|
30393
30809
|
initialTestCases;
|
|
30394
30810
|
defaultExpectedOutcomeSchema;
|
|
30395
30811
|
testCases = [
|
|
@@ -30403,6 +30819,7 @@ const LLMTestRunner = class {
|
|
|
30403
30819
|
value: '',
|
|
30404
30820
|
},
|
|
30405
30821
|
],
|
|
30822
|
+
chatHistory: { enabled: false, value: '' },
|
|
30406
30823
|
isRunning: false,
|
|
30407
30824
|
},
|
|
30408
30825
|
];
|
|
@@ -30425,6 +30842,12 @@ const LLMTestRunner = class {
|
|
|
30425
30842
|
// Initialize testCases from prop if provided
|
|
30426
30843
|
if (this.initialTestCases !== undefined) {
|
|
30427
30844
|
validateTestCaseInputArray(this.initialTestCases);
|
|
30845
|
+
const extractorIds = getExtractorIds(this.evaluationSourceExtractors);
|
|
30846
|
+
if (extractorIds.length > 0) {
|
|
30847
|
+
this.initialTestCases.forEach(testCase => {
|
|
30848
|
+
validateExpectedOutcomeArrayWithExtractors(testCase.expectedOutcome, extractorIds);
|
|
30849
|
+
});
|
|
30850
|
+
}
|
|
30428
30851
|
this.testCases = this.initialTestCases.map((rawTestCase, index) => {
|
|
30429
30852
|
try {
|
|
30430
30853
|
return createTestCaseFromInput(rawTestCase);
|
|
@@ -30448,8 +30871,6 @@ const LLMTestRunner = class {
|
|
|
30448
30871
|
this.testCases = [];
|
|
30449
30872
|
}
|
|
30450
30873
|
}
|
|
30451
|
-
componentDidLoad() { }
|
|
30452
|
-
disconnectedCallback() { }
|
|
30453
30874
|
async resetSavingState() {
|
|
30454
30875
|
this.isSaving = false;
|
|
30455
30876
|
}
|
|
@@ -30460,6 +30881,12 @@ const LLMTestRunner = class {
|
|
|
30460
30881
|
const { testCaseId, key, value } = event.detail;
|
|
30461
30882
|
this.testCases = this.testCases.map(tc => tc.id === testCaseId ? { ...tc, [key]: value } : tc);
|
|
30462
30883
|
};
|
|
30884
|
+
handleChatHistoryChange = (event) => {
|
|
30885
|
+
const { testCaseId, enabled, value } = event.detail;
|
|
30886
|
+
this.updateTestCase(testCaseId, {
|
|
30887
|
+
chatHistory: { enabled, value },
|
|
30888
|
+
});
|
|
30889
|
+
};
|
|
30463
30890
|
addNewTestCase() {
|
|
30464
30891
|
try {
|
|
30465
30892
|
const schema = this.getResolvedExpectedOutcomeSchema();
|
|
@@ -30476,13 +30903,17 @@ const LLMTestRunner = class {
|
|
|
30476
30903
|
updateTestCase(id, updates) {
|
|
30477
30904
|
this.testCases = this.testCases.map(tc => tc.id === id ? { ...tc, ...updates } : tc);
|
|
30478
30905
|
}
|
|
30479
|
-
|
|
30906
|
+
requestLlmResponse(testCase) {
|
|
30480
30907
|
return new Promise((resolve, reject) => {
|
|
30481
|
-
|
|
30908
|
+
const payload = {
|
|
30482
30909
|
prompt: testCase.question,
|
|
30483
30910
|
resolve,
|
|
30484
30911
|
reject,
|
|
30485
|
-
}
|
|
30912
|
+
};
|
|
30913
|
+
if (testCase.chatHistory?.enabled) {
|
|
30914
|
+
payload.chatHistory = testCase.chatHistory.value;
|
|
30915
|
+
}
|
|
30916
|
+
this.llmRequest.emit(payload);
|
|
30486
30917
|
});
|
|
30487
30918
|
}
|
|
30488
30919
|
throwError(reason) {
|
|
@@ -30495,14 +30926,14 @@ const LLMTestRunner = class {
|
|
|
30495
30926
|
const startTime = Date.now();
|
|
30496
30927
|
this.updateTestCase(testCase.id, { isRunning: true });
|
|
30497
30928
|
const [llmSettled, resolutionSettled] = await Promise.allSettled([
|
|
30498
|
-
this.
|
|
30929
|
+
this.requestLlmResponse(testCase),
|
|
30499
30930
|
resolveDynamicExpectedOutcomes(testCase, this.resolveExpectedOutcome),
|
|
30500
30931
|
]);
|
|
30501
30932
|
const responseTime = Date.now() - startTime;
|
|
30502
30933
|
if (llmSettled.status === 'rejected') {
|
|
30503
30934
|
this.updateTestCase(testCase.id, {
|
|
30504
30935
|
isRunning: false,
|
|
30505
|
-
output:
|
|
30936
|
+
output: undefined,
|
|
30506
30937
|
error: this.addErrorMessage(llmSettled.reason, 'Unknown error'),
|
|
30507
30938
|
responseTime,
|
|
30508
30939
|
});
|
|
@@ -30550,7 +30981,7 @@ const LLMTestRunner = class {
|
|
|
30550
30981
|
this.updateTestCase(testCase.id, {
|
|
30551
30982
|
evaluationResult: result,
|
|
30552
30983
|
});
|
|
30553
|
-
});
|
|
30984
|
+
}, this.evaluationSourceExtractors);
|
|
30554
30985
|
}
|
|
30555
30986
|
async runAllTests() {
|
|
30556
30987
|
this.isRunningAll = true;
|
|
@@ -30581,7 +31012,7 @@ const LLMTestRunner = class {
|
|
|
30581
31012
|
this.error = '';
|
|
30582
31013
|
try {
|
|
30583
31014
|
const content = await readFileAsync(file);
|
|
30584
|
-
const result = importTestSuite(content);
|
|
31015
|
+
const result = importTestSuite(content, getExtractorIds(this.evaluationSourceExtractors));
|
|
30585
31016
|
if (!result.success) {
|
|
30586
31017
|
this.error = result.error || 'Unknown error occurred during import.';
|
|
30587
31018
|
return;
|
|
@@ -30642,7 +31073,7 @@ const LLMTestRunner = class {
|
|
|
30642
31073
|
}
|
|
30643
31074
|
}
|
|
30644
31075
|
render() {
|
|
30645
|
-
return (h("div", { key: '
|
|
31076
|
+
return (h("div", { key: '7433beaa1d60d48f65600c43e11b302b892a7bca', class: "test-runner-container" }, h(LLMTestRunnerHeader, { key: '8083cc39376e7a710bd3f52efb184b959e885a87', isExportingTestSuite: this.isExportingTestSuite, isExportingTestResults: this.isExportingTestResults, isRunningAll: this.isRunningAll, useSave: this.useSave, isSaving: this.isSaving, usePromptEditor: this.usePromptEditor, onImport: file => this.handleImport(file), onExportSuite: () => this.handleExportTestSuite(), onExportResults: () => this.handleExportTestResults(), onRunAll: () => this.runAllTests(), onSave: () => this.handleSave() }), h(ErrorMessage, { key: 'ddced98c13cd595c4cfb6eef11b27cb173769518', message: this.error, onClear: () => (this.error = '') }), h("div", { key: '8d6f65c4d68d34869b644709eacb97fec93683c6', class: "test-runner-container__content" }, h(LLMTestCases, { key: '5ccb186132b23af6209209b0a14086e03cf790af', testCases: this.testCases, dynamicResolutionSupported: !!this.resolveExpectedOutcome, extractorIds: getExtractorIds(this.evaluationSourceExtractors), onRun: testCase => this.runSingleTest(testCase).catch(() => { }), onDelete: id => this.deleteTestCase(id), onAddTestCase: () => this.addNewTestCase(), handleTestCaseChange: this.handleTestCaseChange, onExpectedOutcomeChange: this.handleExpectedOutcomeChange, onChatHistoryChange: this.handleChatHistoryChange }))));
|
|
30646
31077
|
}
|
|
30647
31078
|
};
|
|
30648
31079
|
LLMTestRunner.style = tokensCss() + (llmTestRunnerCss() + (llmTestRunnerHeaderCss() + (llmTestCasesCss() + (llmTestCaseRowCss() + (rowActionsCss() + (evaluationSummaryCss() + (responseOutputCss() + (errorMessageCss() + (buttonCss() + iconButtonCss())))))))));
|