llm-testrunner-components 1.2.4 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/README.md +9 -5
  2. package/dist/cjs/{app-chips_5.cjs.entry.js → app-chips_4.cjs.entry.js} +20 -22
  3. package/dist/cjs/app-chips_4.cjs.entry.js.map +1 -0
  4. package/dist/cjs/index.cjs.js +464 -66
  5. package/dist/cjs/index.cjs.js.map +1 -1
  6. package/dist/cjs/llm-test-runner.cjs.entry.js +11 -0
  7. package/dist/cjs/llm-test-runner.cjs.entry.js.map +1 -0
  8. package/dist/cjs/llm-testrunner.cjs.js +1 -1
  9. package/dist/cjs/loader.cjs.js +1 -1
  10. package/dist/collection/components/llm-test-runner/llm-test-runner.js +46 -13
  11. package/dist/collection/components/llm-test-runner/llm-test-runner.js.map +1 -1
  12. package/dist/collection/components/llm-test-runner/test-cases/chat-history.css +5 -5
  13. package/dist/collection/components/llm-test-runner/test-cases/expected-outcome-renderer.js +45 -5
  14. package/dist/collection/components/llm-test-runner/test-cases/expected-outcome-renderer.js.map +1 -1
  15. package/dist/collection/components/llm-test-runner/test-cases/llm-test-case-row.css +21 -0
  16. package/dist/collection/components/llm-test-runner/test-cases/llm-test-case-row.js +2 -2
  17. package/dist/collection/components/llm-test-runner/test-cases/llm-test-case-row.js.map +1 -1
  18. package/dist/collection/components/llm-test-runner/test-cases/llm-test-cases.js +2 -2
  19. package/dist/collection/components/llm-test-runner/test-cases/llm-test-cases.js.map +1 -1
  20. package/dist/collection/components/llm-test-runner/test-cases/output/response-output.js +1 -1
  21. package/dist/collection/components/llm-test-runner/test-cases/output/response-output.js.map +1 -1
  22. package/dist/collection/demo/demo-modes.js +130 -0
  23. package/dist/collection/demo/vanilla-demo.js +56 -0
  24. package/dist/collection/lib/evaluation/actual-value-resolver.js +52 -0
  25. package/dist/collection/lib/evaluation/actual-value-resolver.js.map +1 -0
  26. package/dist/collection/lib/evaluation/evaluation-engine.js +1 -1
  27. package/dist/collection/lib/evaluation/evaluation-engine.js.map +1 -1
  28. package/dist/collection/lib/evaluation/evaluation-service.js +55 -17
  29. package/dist/collection/lib/evaluation/evaluation-service.js.map +1 -1
  30. package/dist/collection/lib/evaluation/types.js.map +1 -1
  31. package/dist/collection/lib/form/components/app-textarea.css +2 -2
  32. package/dist/collection/lib/import-export/test-suite-importer.js +7 -1
  33. package/dist/collection/lib/import-export/test-suite-importer.js.map +1 -1
  34. package/dist/collection/lib/test-cases/test-case-factory.js +5 -0
  35. package/dist/collection/lib/test-cases/test-case-factory.js.map +1 -1
  36. package/dist/collection/lib/test-cases/test-case-mutations.js +58 -23
  37. package/dist/collection/lib/test-cases/test-case-mutations.js.map +1 -1
  38. package/dist/collection/schemas/expected-outcome.js +39 -0
  39. package/dist/collection/schemas/expected-outcome.js.map +1 -1
  40. package/dist/collection/schemas/model-response.js +7 -0
  41. package/dist/collection/schemas/model-response.js.map +1 -0
  42. package/dist/collection/schemas/test-case.js +2 -1
  43. package/dist/collection/schemas/test-case.js.map +1 -1
  44. package/dist/collection/types/expected-outcome.js.map +1 -1
  45. package/dist/collection/types/llm-test-runner.js.map +1 -1
  46. package/dist/components/app-textarea.js +1 -1
  47. package/dist/components/chat-history.js +1 -1
  48. package/dist/components/index.js +1 -1
  49. package/dist/components/llm-test-runner.js +1 -1
  50. package/dist/components/{p-B87Lt3z4.js → p-D3eincg_.js} +3 -3
  51. package/dist/components/p-D3eincg_.js.map +1 -0
  52. package/dist/components/{p-D2qDAxFN.js → p-D6BL2E3J.js} +2 -2
  53. package/dist/components/{p-D2qDAxFN.js.map → p-D6BL2E3J.js.map} +1 -1
  54. package/dist/components/p-kmtfMXcQ.js +2 -0
  55. package/dist/components/p-kmtfMXcQ.js.map +1 -0
  56. package/dist/esm/{app-chips_5.entry.js → app-chips_4.entry.js} +4 -5
  57. package/dist/esm/app-chips_4.entry.js.map +1 -0
  58. package/dist/esm/index.js +464 -66
  59. package/dist/esm/index.js.map +1 -1
  60. package/dist/esm/llm-test-runner.entry.js +5 -0
  61. package/dist/esm/llm-test-runner.entry.js.map +1 -0
  62. package/dist/esm/llm-testrunner.js +1 -1
  63. package/dist/esm/loader.js +1 -1
  64. package/dist/llm-testrunner/index.esm.js +2 -2
  65. package/dist/llm-testrunner/index.esm.js.map +1 -1
  66. package/dist/llm-testrunner/llm-testrunner.esm.js +1 -1
  67. package/dist/llm-testrunner/p-c3fec0bb.entry.js +2 -0
  68. package/dist/llm-testrunner/{p-21202f12.entry.js.map → p-c3fec0bb.entry.js.map} +1 -1
  69. package/dist/llm-testrunner/p-caccdb4b.entry.js +2 -0
  70. package/dist/llm-testrunner/p-caccdb4b.entry.js.map +1 -0
  71. package/dist/types/components/llm-test-runner/llm-test-runner.d.ts +3 -4
  72. package/dist/types/components/llm-test-runner/test-cases/expected-outcome-renderer.d.ts +1 -0
  73. package/dist/types/components/llm-test-runner/test-cases/llm-test-case-row.d.ts +1 -0
  74. package/dist/types/components/llm-test-runner/test-cases/llm-test-cases.d.ts +1 -0
  75. package/dist/types/components/llm-test-runner/test-cases/output/response-output.d.ts +2 -1
  76. package/dist/types/components.d.ts +4 -2
  77. package/dist/types/lib/evaluation/actual-value-resolver.d.ts +9 -0
  78. package/dist/types/lib/evaluation/evaluation-service.d.ts +2 -2
  79. package/dist/types/lib/evaluation/types.d.ts +1 -1
  80. package/dist/types/lib/import-export/test-suite-importer.d.ts +1 -1
  81. package/dist/types/lib/test-cases/test-case-mutations.d.ts +10 -1
  82. package/dist/types/schemas/expected-outcome.d.ts +116 -0
  83. package/dist/types/schemas/model-response.d.ts +7 -0
  84. package/dist/types/schemas/test-case.d.ts +76 -1
  85. package/dist/types/types/expected-outcome.d.ts +1 -1
  86. package/dist/types/types/llm-test-runner.d.ts +4 -2
  87. package/package.json +1 -1
  88. package/dist/cjs/app-chips_5.cjs.entry.js.map +0 -1
  89. package/dist/components/p-B87Lt3z4.js.map +0 -1
  90. package/dist/components/p-Bx2jqguC.js +0 -2
  91. package/dist/components/p-Bx2jqguC.js.map +0 -1
  92. package/dist/esm/app-chips_5.entry.js.map +0 -1
  93. package/dist/llm-testrunner/p-21202f12.entry.js +0 -2
@@ -295,6 +295,7 @@ const DEFAULT_EXPECTED_OUTCOME_SCHEMA = [
295
295
  function normalizeExpectedOutcomeField(field) {
296
296
  return {
297
297
  ...field,
298
+ evaluationSource: field.evaluationSource || { type: 'text' },
298
299
  evaluationParameters: normalizeEvaluationParametersForField(field.type, field.evaluationParameters),
299
300
  };
300
301
  }
@@ -318,6 +319,7 @@ function createExpectedOutcomeFieldFromSchema(schemaField) {
318
319
  type: 'text',
319
320
  label: schemaField.label,
320
321
  placeholder: schemaField.placeholder,
322
+ evaluationSource: schemaField.evaluationSource || { type: 'text' },
321
323
  value: '',
322
324
  evaluationParameters: normalizeEvaluationParametersForField(schemaField.type, schemaField.evaluationParameters),
323
325
  };
@@ -326,6 +328,7 @@ function createExpectedOutcomeFieldFromSchema(schemaField) {
326
328
  type: 'textarea',
327
329
  label: schemaField.label,
328
330
  placeholder: schemaField.placeholder,
331
+ evaluationSource: schemaField.evaluationSource || { type: 'text' },
329
332
  rows: schemaField.rows,
330
333
  value: '',
331
334
  evaluationParameters: normalizeEvaluationParametersForField(schemaField.type, schemaField.evaluationParameters),
@@ -335,6 +338,7 @@ function createExpectedOutcomeFieldFromSchema(schemaField) {
335
338
  type: 'chips-input',
336
339
  label: schemaField.label,
337
340
  placeholder: schemaField.placeholder,
341
+ evaluationSource: schemaField.evaluationSource || { type: 'text' },
338
342
  value: [],
339
343
  evaluationParameters: normalizeEvaluationParametersForField(schemaField.type, schemaField.evaluationParameters),
340
344
  };
@@ -343,6 +347,7 @@ function createExpectedOutcomeFieldFromSchema(schemaField) {
343
347
  type: 'select',
344
348
  label: schemaField.label,
345
349
  placeholder: schemaField.placeholder,
350
+ evaluationSource: schemaField.evaluationSource || { type: 'text' },
346
351
  value: schemaField.options[0],
347
352
  options: schemaField.options,
348
353
  evaluationParameters: normalizeEvaluationParametersForField(schemaField.type, schemaField.evaluationParameters),
@@ -2572,6 +2577,122 @@ function handleIntersectionResults(result, left, right) {
2572
2577
  result.value = merged.data;
2573
2578
  return result;
2574
2579
  }
2580
+ const $ZodRecord = /*@__PURE__*/ $constructor("$ZodRecord", (inst, def) => {
2581
+ $ZodType.init(inst, def);
2582
+ inst._zod.parse = (payload, ctx) => {
2583
+ const input = payload.value;
2584
+ if (!isPlainObject(input)) {
2585
+ payload.issues.push({
2586
+ expected: "record",
2587
+ code: "invalid_type",
2588
+ input,
2589
+ inst,
2590
+ });
2591
+ return payload;
2592
+ }
2593
+ const proms = [];
2594
+ const values = def.keyType._zod.values;
2595
+ if (values) {
2596
+ payload.value = {};
2597
+ const recordKeys = new Set();
2598
+ for (const key of values) {
2599
+ if (typeof key === "string" || typeof key === "number" || typeof key === "symbol") {
2600
+ recordKeys.add(typeof key === "number" ? key.toString() : key);
2601
+ const result = def.valueType._zod.run({ value: input[key], issues: [] }, ctx);
2602
+ if (result instanceof Promise) {
2603
+ proms.push(result.then((result) => {
2604
+ if (result.issues.length) {
2605
+ payload.issues.push(...prefixIssues(key, result.issues));
2606
+ }
2607
+ payload.value[key] = result.value;
2608
+ }));
2609
+ }
2610
+ else {
2611
+ if (result.issues.length) {
2612
+ payload.issues.push(...prefixIssues(key, result.issues));
2613
+ }
2614
+ payload.value[key] = result.value;
2615
+ }
2616
+ }
2617
+ }
2618
+ let unrecognized;
2619
+ for (const key in input) {
2620
+ if (!recordKeys.has(key)) {
2621
+ unrecognized = unrecognized ?? [];
2622
+ unrecognized.push(key);
2623
+ }
2624
+ }
2625
+ if (unrecognized && unrecognized.length > 0) {
2626
+ payload.issues.push({
2627
+ code: "unrecognized_keys",
2628
+ input,
2629
+ inst,
2630
+ keys: unrecognized,
2631
+ });
2632
+ }
2633
+ }
2634
+ else {
2635
+ payload.value = {};
2636
+ for (const key of Reflect.ownKeys(input)) {
2637
+ if (key === "__proto__")
2638
+ continue;
2639
+ let keyResult = def.keyType._zod.run({ value: key, issues: [] }, ctx);
2640
+ if (keyResult instanceof Promise) {
2641
+ throw new Error("Async schemas not supported in object keys currently");
2642
+ }
2643
+ // Numeric string fallback: if key is a numeric string and failed, retry with Number(key)
2644
+ // This handles z.number(), z.literal([1, 2, 3]), and unions containing numeric literals
2645
+ const checkNumericKey = typeof key === "string" && number$1.test(key) && keyResult.issues.length;
2646
+ if (checkNumericKey) {
2647
+ const retryResult = def.keyType._zod.run({ value: Number(key), issues: [] }, ctx);
2648
+ if (retryResult instanceof Promise) {
2649
+ throw new Error("Async schemas not supported in object keys currently");
2650
+ }
2651
+ if (retryResult.issues.length === 0) {
2652
+ keyResult = retryResult;
2653
+ }
2654
+ }
2655
+ if (keyResult.issues.length) {
2656
+ if (def.mode === "loose") {
2657
+ // Pass through unchanged
2658
+ payload.value[key] = input[key];
2659
+ }
2660
+ else {
2661
+ // Default "strict" behavior: error on invalid key
2662
+ payload.issues.push({
2663
+ code: "invalid_key",
2664
+ origin: "record",
2665
+ issues: keyResult.issues.map((iss) => finalizeIssue(iss, ctx, config())),
2666
+ input: key,
2667
+ path: [key],
2668
+ inst,
2669
+ });
2670
+ }
2671
+ continue;
2672
+ }
2673
+ const result = def.valueType._zod.run({ value: input[key], issues: [] }, ctx);
2674
+ if (result instanceof Promise) {
2675
+ proms.push(result.then((result) => {
2676
+ if (result.issues.length) {
2677
+ payload.issues.push(...prefixIssues(key, result.issues));
2678
+ }
2679
+ payload.value[keyResult.value] = result.value;
2680
+ }));
2681
+ }
2682
+ else {
2683
+ if (result.issues.length) {
2684
+ payload.issues.push(...prefixIssues(key, result.issues));
2685
+ }
2686
+ payload.value[keyResult.value] = result.value;
2687
+ }
2688
+ }
2689
+ }
2690
+ if (proms.length) {
2691
+ return Promise.all(proms).then(() => payload);
2692
+ }
2693
+ return payload;
2694
+ };
2695
+ });
2575
2696
  const $ZodEnum = /*@__PURE__*/ $constructor("$ZodEnum", (inst, def) => {
2576
2697
  $ZodType.init(inst, def);
2577
2698
  const values = getEnumValues(def.entries);
@@ -4155,6 +4276,49 @@ const intersectionProcessor = (schema, ctx, json, params) => {
4155
4276
  ];
4156
4277
  json.allOf = allOf;
4157
4278
  };
4279
+ const recordProcessor = (schema, ctx, _json, params) => {
4280
+ const json = _json;
4281
+ const def = schema._zod.def;
4282
+ json.type = "object";
4283
+ // For looseRecord with regex patterns, use patternProperties
4284
+ // This correctly represents "only validate keys matching the pattern" semantics
4285
+ // and composes well with allOf (intersections)
4286
+ const keyType = def.keyType;
4287
+ const keyBag = keyType._zod.bag;
4288
+ const patterns = keyBag?.patterns;
4289
+ if (def.mode === "loose" && patterns && patterns.size > 0) {
4290
+ // Use patternProperties for looseRecord with regex patterns
4291
+ const valueSchema = process$1(def.valueType, ctx, {
4292
+ ...params,
4293
+ path: [...params.path, "patternProperties", "*"],
4294
+ });
4295
+ json.patternProperties = {};
4296
+ for (const pattern of patterns) {
4297
+ json.patternProperties[pattern.source] = valueSchema;
4298
+ }
4299
+ }
4300
+ else {
4301
+ // Default behavior: use propertyNames + additionalProperties
4302
+ if (ctx.target === "draft-07" || ctx.target === "draft-2020-12") {
4303
+ json.propertyNames = process$1(def.keyType, ctx, {
4304
+ ...params,
4305
+ path: [...params.path, "propertyNames"],
4306
+ });
4307
+ }
4308
+ json.additionalProperties = process$1(def.valueType, ctx, {
4309
+ ...params,
4310
+ path: [...params.path, "additionalProperties"],
4311
+ });
4312
+ }
4313
+ // Add required for keys with discrete values (enum, literal, etc.)
4314
+ const keyValues = keyType._zod.values;
4315
+ if (keyValues) {
4316
+ const validKeyValues = [...keyValues].filter((v) => typeof v === "string" || typeof v === "number");
4317
+ if (validKeyValues.length > 0) {
4318
+ json.required = validKeyValues;
4319
+ }
4320
+ }
4321
+ };
4158
4322
  const nullableProcessor = (schema, ctx, json, params) => {
4159
4323
  const def = schema._zod.def;
4160
4324
  const inner = process$1(def.innerType, ctx, params);
@@ -4709,6 +4873,21 @@ function intersection(left, right) {
4709
4873
  right: right,
4710
4874
  });
4711
4875
  }
4876
+ const ZodRecord = /*@__PURE__*/ $constructor("ZodRecord", (inst, def) => {
4877
+ $ZodRecord.init(inst, def);
4878
+ ZodType.init(inst, def);
4879
+ inst._zod.processJSONSchema = (ctx, json, params) => recordProcessor(inst, ctx, json, params);
4880
+ inst.keyType = def.keyType;
4881
+ inst.valueType = def.valueType;
4882
+ });
4883
+ function record(keyType, valueType, params) {
4884
+ return new ZodRecord({
4885
+ type: "record",
4886
+ keyType,
4887
+ valueType: valueType,
4888
+ ...normalizeParams(params),
4889
+ });
4890
+ }
4712
4891
  const ZodEnum = /*@__PURE__*/ $constructor("ZodEnum", (inst, def) => {
4713
4892
  $ZodEnum.init(inst, def);
4714
4893
  ZodType.init(inst, def);
@@ -4946,7 +5125,7 @@ const ZodCustom = /*@__PURE__*/ $constructor("ZodCustom", (inst, def) => {
4946
5125
  inst._zod.processJSONSchema = (ctx, json, params) => customProcessor(inst, ctx);
4947
5126
  });
4948
5127
  function custom(fn, _params) {
4949
- return _custom(ZodCustom, (() => true), _params);
5128
+ return _custom(ZodCustom, fn ?? (() => true), _params);
4950
5129
  }
4951
5130
  function refine(fn, _params = {}) {
4952
5131
  return _refine(ZodCustom, fn, _params);
@@ -4961,6 +5140,19 @@ const optionalPositiveInt = number().int().positive().optional();
4961
5140
  const optionalString = string().optional();
4962
5141
  const selectOptionsSchema = array(nonEmptyString).min(1);
4963
5142
  const optionalNumber = number().optional();
5143
+ const textEvaluationSourceSchema = object({
5144
+ type: literal('text'),
5145
+ });
5146
+ const customEvaluationSourceSchema = object({
5147
+ type: literal('custom'),
5148
+ extractorId: nonEmptyString,
5149
+ });
5150
+ const evaluationSourceExtractorSchema = custom(value => typeof value === 'function', 'Extractor must be a function.');
5151
+ record(string().min(1), evaluationSourceExtractorSchema);
5152
+ const evaluationSourceSchema = discriminatedUnion('type', [
5153
+ textEvaluationSourceSchema,
5154
+ customEvaluationSourceSchema,
5155
+ ]);
4964
5156
  const expectedOutcomeModeSchema = _enum(['static', 'dynamic']);
4965
5157
  const evaluationParametersSchema = object({
4966
5158
  approach: _enum(EvaluationApproach),
@@ -4978,6 +5170,7 @@ const selectEvaluationParametersSchema = evaluationParametersSchema.superRefine(
4978
5170
  const defaultExpectedOutcomeBaseSchema = object({
4979
5171
  label: nonEmptyString,
4980
5172
  placeholder: optionalString,
5173
+ evaluationSource: evaluationSourceSchema.optional(),
4981
5174
  });
4982
5175
  const createDefaultExpectedOutcomeFieldSchemas = (baseSchema) => ({
4983
5176
  text: baseSchema.extend({
@@ -5070,6 +5263,37 @@ function validateExpectedOutcomeSchema(schema) {
5070
5263
  throw new Error(`Invalid expectedOutcomeSchema: ${parsed.error.issues[0].message}`);
5071
5264
  }
5072
5265
  }
5266
+ function validateExpectedOutcomeArrayWithExtractors(expectedOutcome, allowedExtractorIds) {
5267
+ const allowed = new Set(allowedExtractorIds);
5268
+ const schema = expectedOutcomeArraySchema.superRefine((fields, ctx) => {
5269
+ fields.forEach((field, index) => {
5270
+ if (field.evaluationSource?.type !== 'custom') {
5271
+ return;
5272
+ }
5273
+ if (allowed.has(field.evaluationSource.extractorId)) {
5274
+ return;
5275
+ }
5276
+ ctx.addIssue({
5277
+ code: 'custom',
5278
+ path: [index, 'evaluationSource', 'extractorId'],
5279
+ message: `Invalid expectedOutcome: Extractor "${field.evaluationSource.extractorId}" is not registered.`,
5280
+ });
5281
+ });
5282
+ });
5283
+ const parsed = schema.safeParse(expectedOutcome);
5284
+ if (!parsed.success) {
5285
+ throw new Error(parsed.error.issues[0].message);
5286
+ }
5287
+ }
5288
+ function getExtractorIds(extractors) {
5289
+ return Object.keys(extractors || {});
5290
+ }
5291
+
5292
+ const modelResponseMetadataSchema = record(string(), unknown());
5293
+ const modelResponsePayloadSchema = object({
5294
+ text: string().optional(),
5295
+ metadata: modelResponseMetadataSchema.optional(),
5296
+ });
5073
5297
 
5074
5298
  const testCaseChatHistorySchema = object({
5075
5299
  enabled: boolean(),
@@ -5086,8 +5310,8 @@ object({
5086
5310
  id: string(),
5087
5311
  question: string(),
5088
5312
  expectedOutcome: expectedOutcomeArraySchema,
5313
+ output: modelResponsePayloadSchema.optional(),
5089
5314
  chatHistory: testCaseChatHistorySchema,
5090
- output: string().optional(),
5091
5315
  isRunning: boolean().optional(),
5092
5316
  error: string().optional(),
5093
5317
  evaluationResult: custom().optional(),
@@ -5109,10 +5333,15 @@ function validateTestCaseInputArray(data) {
5109
5333
  * @param jsonContent - The JSON string to parse and validate
5110
5334
  * @returns Validation result with test cases or error message
5111
5335
  */
5112
- function importTestSuite(jsonContent) {
5336
+ function importTestSuite(jsonContent, allowedExtractorIds = []) {
5113
5337
  try {
5114
5338
  const parsed = JSON.parse(jsonContent);
5115
5339
  validateTestCaseInputArray(parsed);
5340
+ if (allowedExtractorIds.length > 0) {
5341
+ parsed.forEach((testCase) => {
5342
+ validateExpectedOutcomeArrayWithExtractors(testCase.expectedOutcome, allowedExtractorIds);
5343
+ });
5344
+ }
5116
5345
  const testCases = parsed.map((item, index) => {
5117
5346
  try {
5118
5347
  return createTestCaseFromInput(item);
@@ -5138,7 +5367,7 @@ function importTestSuite(jsonContent) {
5138
5367
  }
5139
5368
 
5140
5369
  const MISSING_RESOLVER_MESSAGE = 'resolveExpectedOutcome is required when a test case has dynamic expected outcomes.';
5141
- function isDynamicTextareaField(field) {
5370
+ function isDynamicTextareaField$1(field) {
5142
5371
  return field.type === 'textarea' && field.outcomeMode === 'dynamic';
5143
5372
  }
5144
5373
  function applyResolvedDynamicValues(testCase, resolvedValues) {
@@ -5148,7 +5377,7 @@ function applyResolvedDynamicValues(testCase, resolvedValues) {
5148
5377
  const expectedOutcome = [...(testCase.expectedOutcome || [])];
5149
5378
  for (const resolved of resolvedValues) {
5150
5379
  const field = expectedOutcome[resolved.index];
5151
- if (!field || !isDynamicTextareaField(field)) {
5380
+ if (!field || !isDynamicTextareaField$1(field)) {
5152
5381
  continue;
5153
5382
  }
5154
5383
  expectedOutcome[resolved.index] = {
@@ -5163,7 +5392,7 @@ function applyResolvedDynamicValues(testCase, resolvedValues) {
5163
5392
  }
5164
5393
  async function resolveDynamicExpectedOutcomes(testCase, resolver) {
5165
5394
  const dynamicFields = (testCase.expectedOutcome || []).flatMap((field, index) => {
5166
- if (!isDynamicTextareaField(field)) {
5395
+ if (!isDynamicTextareaField$1(field)) {
5167
5396
  return [];
5168
5397
  }
5169
5398
  return [{ field, index }];
@@ -5181,6 +5410,15 @@ async function resolveDynamicExpectedOutcomes(testCase, resolver) {
5181
5410
  return applyResolvedDynamicValues(testCase, resolvedValues);
5182
5411
  }
5183
5412
 
5413
+ function isChipsInputField(field) {
5414
+ return field.type === 'chips-input';
5415
+ }
5416
+ function isTextareaField(field) {
5417
+ return field.type === 'textarea';
5418
+ }
5419
+ function isDynamicTextareaField(field) {
5420
+ return isTextareaField(field) && field.outcomeMode === 'dynamic';
5421
+ }
5184
5422
  function applyExpectedOutcomeChange(testCase, change) {
5185
5423
  const { index } = change;
5186
5424
  const expectedOutcome = [...(testCase.expectedOutcome || [])];
@@ -5188,73 +5426,99 @@ function applyExpectedOutcomeChange(testCase, change) {
5188
5426
  if (!target) {
5189
5427
  return testCase;
5190
5428
  }
5429
+ const commit = (updatedField) => {
5430
+ expectedOutcome[index] = updatedField;
5431
+ return { ...testCase, expectedOutcome };
5432
+ };
5191
5433
  switch (change.operation) {
5192
5434
  case 'set-value': {
5193
- if (target.type === 'chips-input') {
5435
+ if (isChipsInputField(target)) {
5194
5436
  return testCase;
5195
5437
  }
5196
- if (target.type === 'textarea' && target.outcomeMode === 'dynamic') {
5438
+ if (isDynamicTextareaField(target)) {
5197
5439
  return testCase;
5198
5440
  }
5199
- expectedOutcome[index] = {
5441
+ return commit({
5200
5442
  ...target,
5201
5443
  value: change.value,
5202
- };
5203
- return { ...testCase, expectedOutcome };
5444
+ });
5204
5445
  }
5205
5446
  case 'add-chip': {
5206
- if (target.type !== 'chips-input') {
5447
+ if (!isChipsInputField(target)) {
5207
5448
  return testCase;
5208
5449
  }
5209
- expectedOutcome[index] = {
5450
+ return commit({
5210
5451
  ...target,
5211
5452
  value: [...target.value, change.value],
5212
- };
5213
- return { ...testCase, expectedOutcome };
5453
+ });
5214
5454
  }
5215
5455
  case 'remove-chip': {
5216
- if (target.type !== 'chips-input') {
5456
+ if (!isChipsInputField(target)) {
5217
5457
  return testCase;
5218
5458
  }
5219
- expectedOutcome[index] = {
5459
+ return commit({
5220
5460
  ...target,
5221
5461
  value: target.value.filter(chip => chip !== change.value),
5222
- };
5223
- return { ...testCase, expectedOutcome };
5462
+ });
5224
5463
  }
5225
5464
  case 'set-evaluation-approach':
5226
5465
  return updateExpectedOutcomeFieldApproach(testCase, index, change.value);
5227
5466
  case 'set-outcome-mode': {
5228
- if (target.type !== 'textarea') {
5467
+ if (!isTextareaField(target)) {
5229
5468
  return testCase;
5230
5469
  }
5231
5470
  const mode = change.value;
5232
5471
  if (mode === 'static') {
5233
5472
  const { resolutionQuery: _, ...rest } = target;
5234
- expectedOutcome[index] = {
5473
+ return commit({
5235
5474
  ...rest,
5236
5475
  outcomeMode: 'static',
5237
5476
  value: '',
5238
- };
5477
+ });
5239
5478
  }
5240
5479
  else {
5241
- expectedOutcome[index] = {
5480
+ return commit({
5242
5481
  ...target,
5243
5482
  outcomeMode: 'dynamic',
5244
5483
  value: '',
5245
- };
5484
+ });
5246
5485
  }
5247
- return { ...testCase, expectedOutcome };
5248
5486
  }
5249
5487
  case 'set-resolution-query': {
5250
- if (target.type !== 'textarea' || target.outcomeMode !== 'dynamic') {
5488
+ if (!isDynamicTextareaField(target)) {
5251
5489
  return testCase;
5252
5490
  }
5253
- expectedOutcome[index] = {
5491
+ return commit({
5254
5492
  ...target,
5255
5493
  resolutionQuery: change.value,
5256
- };
5257
- return { ...testCase, expectedOutcome };
5494
+ });
5495
+ }
5496
+ case 'set-evaluation-source-type': {
5497
+ if (change.value === 'text') {
5498
+ return commit({
5499
+ ...target,
5500
+ evaluationSource: { type: 'text' },
5501
+ });
5502
+ }
5503
+ const extractorId = target.evaluationSource?.type === 'custom'
5504
+ ? target.evaluationSource.extractorId
5505
+ : (change.fallbackExtractorId ?? '');
5506
+ return commit({
5507
+ ...target,
5508
+ evaluationSource: {
5509
+ type: 'custom',
5510
+ extractorId,
5511
+ },
5512
+ });
5513
+ }
5514
+ case 'set-evaluation-source-extractor': {
5515
+ return commit({
5516
+ ...target,
5517
+ evaluationSource: {
5518
+ type: 'custom',
5519
+ extractorId: change.value,
5520
+ },
5521
+ });
5258
5522
  }
5259
5523
  }
5260
5524
  }
@@ -30035,7 +30299,7 @@ class LLMEvaluationEngine {
30035
30299
  const fieldRequest = {
30036
30300
  testCaseId: request.testCaseId,
30037
30301
  question: request.question,
30038
- actualResponse: request.actualResponse,
30302
+ actualResponse: field.actualResponse,
30039
30303
  expectedOutcome: field.expectedValue,
30040
30304
  evaluationParameters: field.evaluationParameters,
30041
30305
  };
@@ -30105,6 +30369,58 @@ class LLMEvaluationEngine {
30105
30369
  }
30106
30370
  }
30107
30371
 
30372
+ function toTextSource() {
30373
+ return { type: 'text' };
30374
+ }
30375
+ async function resolveActualValue(field, output, extractors) {
30376
+ const source = field.evaluationSource || toTextSource();
30377
+ if (source.type === 'text') {
30378
+ const text = output?.text?.trim();
30379
+ if (!text) {
30380
+ return {
30381
+ success: false,
30382
+ error: 'Model response text is empty.',
30383
+ };
30384
+ }
30385
+ return { success: true, value: text };
30386
+ }
30387
+ const extractor = extractors?.[source.extractorId];
30388
+ if (!extractor) {
30389
+ return {
30390
+ success: false,
30391
+ error: `Extractor "${source.extractorId}" is not registered.`,
30392
+ };
30393
+ }
30394
+ try {
30395
+ const extractedRaw = await extractor(output || {});
30396
+ if (typeof extractedRaw !== 'string') {
30397
+ return {
30398
+ success: false,
30399
+ error: `Extractor "${source.extractorId}" must return a string.`,
30400
+ };
30401
+ }
30402
+ const extracted = extractedRaw.trim();
30403
+ if (!extracted) {
30404
+ return {
30405
+ success: false,
30406
+ error: `Extractor "${source.extractorId}" returned an empty value.`,
30407
+ };
30408
+ }
30409
+ return {
30410
+ success: true,
30411
+ value: extracted,
30412
+ };
30413
+ }
30414
+ catch (error) {
30415
+ return {
30416
+ success: false,
30417
+ error: error instanceof Error
30418
+ ? error.message
30419
+ : `Extractor "${source.extractorId}" failed.`,
30420
+ };
30421
+ }
30422
+ }
30423
+
30108
30424
  /**
30109
30425
  * Service for evaluating test case responses
30110
30426
  */
@@ -30118,34 +30434,71 @@ class EvaluationService {
30118
30434
  * @param testCase - The test case to evaluate
30119
30435
  * @param onResult - Callback to handle the evaluation result
30120
30436
  */
30121
- async evaluateTestCase(testCase, onResult) {
30122
- if (!testCase.output) {
30123
- console.warn('⚠️ No output to evaluate for test case:', testCase.id);
30124
- return;
30125
- }
30126
- const fields = (testCase.expectedOutcome || []).flatMap((field, index) => {
30437
+ async evaluateTestCase(testCase, onResult, extractors) {
30438
+ const fields = [];
30439
+ const failedFields = [];
30440
+ for (const [index, field] of (testCase.expectedOutcome || []).entries()) {
30127
30441
  if (field.type === 'textarea' && field.outcomeMode === 'dynamic') {
30128
- return [];
30442
+ continue;
30129
30443
  }
30130
- return [
30131
- {
30444
+ const evaluationParameters = normalizeEvaluationParametersForField(field.type, field.evaluationParameters);
30445
+ const expectedValue = getFieldExpectedValue(field);
30446
+ const resolvedActualValue = await resolveActualValue(field, testCase.output, extractors);
30447
+ if (resolvedActualValue.success) {
30448
+ fields.push({
30132
30449
  index,
30133
30450
  label: field.label,
30134
30451
  type: field.type,
30135
- expectedValue: getFieldExpectedValue(field),
30136
- evaluationParameters: normalizeEvaluationParametersForField(field.type, field.evaluationParameters),
30137
- },
30138
- ];
30139
- });
30452
+ expectedValue,
30453
+ actualResponse: resolvedActualValue.value,
30454
+ evaluationParameters,
30455
+ });
30456
+ }
30457
+ else {
30458
+ failedFields.push({
30459
+ index,
30460
+ label: field.label,
30461
+ type: field.type,
30462
+ expectedValue,
30463
+ passed: false,
30464
+ keywordMatches: [],
30465
+ evaluationParameters,
30466
+ evaluationApproachResult: {
30467
+ score: 0,
30468
+ approachUsed: evaluationParameters.approach,
30469
+ },
30470
+ error: 'error' in resolvedActualValue
30471
+ ? resolvedActualValue.error
30472
+ : 'Failed to resolve actual value.',
30473
+ });
30474
+ }
30475
+ }
30476
+ if (fields.length === 0) {
30477
+ if (failedFields.length === 0) {
30478
+ console.warn('⚠️ No evaluable fields for test case:', testCase.id);
30479
+ return;
30480
+ }
30481
+ onResult({
30482
+ testCaseId: testCase.id,
30483
+ passed: false,
30484
+ keywordMatches: [],
30485
+ fieldResults: failedFields,
30486
+ timestamp: new Date().toISOString(),
30487
+ });
30488
+ return;
30489
+ }
30140
30490
  const evaluationRequest = {
30141
30491
  testCaseId: testCase.id,
30142
30492
  question: testCase.question,
30143
- actualResponse: testCase.output,
30144
30493
  fields,
30145
30494
  };
30146
30495
  await this.engine.evaluateResponse(evaluationRequest, (result) => {
30147
- console.log('📊 Evaluation result received:', result);
30148
- onResult(result);
30496
+ const combinedResults = [...(result.fieldResults || []), ...failedFields].sort((a, b) => a.index - b.index);
30497
+ onResult({
30498
+ ...result,
30499
+ passed: combinedResults.every(field => field.passed && !field.error),
30500
+ fieldResults: combinedResults,
30501
+ });
30149
30502
  });
30150
30503
  }
30151
30504
  }
@@ -30193,7 +30546,7 @@ const LLMTestRunnerHeader = ({ isExportingTestSuite, isExportingTestResults, isR
30193
30546
  };
30194
30547
 
30195
30548
  const ResponseOutput = ({ output, isRunning, }) => {
30196
- return (index.h("div", { class: "response-output" }, output ? (index.h("div", { class: "response-output__content" }, output)) : (index.h("div", { class: "response-output__placeholder" }, isRunning ? 'Running...' : ''))));
30549
+ return (index.h("div", { class: "response-output" }, output?.text ? (index.h("div", { class: "response-output__content" }, output.text)) : (index.h("div", { class: "response-output__placeholder" }, isRunning ? 'Running...' : ''))));
30197
30550
  };
30198
30551
 
30199
30552
  const EvaluationSummary = ({ result, isRunning, }) => {
@@ -30231,7 +30584,9 @@ var FormFieldType;
30231
30584
  FormFieldType["SELECT"] = "select";
30232
30585
  })(FormFieldType || (FormFieldType = {}));
30233
30586
 
30234
- const ExpectedOutcomeRenderer = ({ testCaseId, fields, dynamicResolutionSupported = false, onExpectedOutcomeChange, }) => {
30587
+ const ExpectedOutcomeRenderer = ({ testCaseId, fields, dynamicResolutionSupported = false, extractorIds = [], onExpectedOutcomeChange, }) => {
30588
+ const hasExtractorOptions = extractorIds.length > 0;
30589
+ const firstExtractorId = extractorIds[0];
30235
30590
  const emit = (detail) => onExpectedOutcomeChange({
30236
30591
  detail,
30237
30592
  });
@@ -30261,6 +30616,23 @@ const ExpectedOutcomeRenderer = ({ testCaseId, fields, dynamicResolutionSupporte
30261
30616
  required: false,
30262
30617
  rows: 2,
30263
30618
  });
30619
+ const buildEvaluationSourceConfig = (index) => ({
30620
+ name: `expectedOutcomeEvaluationSource-${index}`,
30621
+ fieldType: FormFieldType.SELECT,
30622
+ label: 'Evaluation Source',
30623
+ placeholder: 'Select evaluation source',
30624
+ required: true,
30625
+ optionList: ['text', 'custom'],
30626
+ defaultValue: 'text',
30627
+ });
30628
+ const buildExtractorConfig = (index) => ({
30629
+ name: `expectedOutcomeEvaluationSourceExtractor-${index}`,
30630
+ fieldType: FormFieldType.SELECT,
30631
+ label: 'Extractor',
30632
+ placeholder: 'Select extractor',
30633
+ required: true,
30634
+ optionList: extractorIds,
30635
+ });
30264
30636
  const renderEvaluationSelector = (field, index$1) => {
30265
30637
  const optionList = getAllowedApproachesForFieldType(field.type);
30266
30638
  return (index.h("app-select", { config: buildEvaluationConfig(index$1, optionList), value: field.evaluationParameters?.approach, onValueChange: (e) => emit({
@@ -30270,6 +30642,27 @@ const ExpectedOutcomeRenderer = ({ testCaseId, fields, dynamicResolutionSupporte
30270
30642
  value: e.detail.value,
30271
30643
  }) }));
30272
30644
  };
30645
+ const renderEvaluationSourceSelector = (field, index$1) => {
30646
+ if (!hasExtractorOptions) {
30647
+ return null;
30648
+ }
30649
+ const sourceType = field.evaluationSource?.type || 'text';
30650
+ return (index.h("div", null, index.h("app-select", { config: buildEvaluationSourceConfig(index$1), value: sourceType, onValueChange: (e) => emit({
30651
+ testCaseId,
30652
+ index: index$1,
30653
+ operation: 'set-evaluation-source-type',
30654
+ value: e.detail.value,
30655
+ fallbackExtractorId: firstExtractorId,
30656
+ }) }), sourceType === 'custom' && (index.h("app-select", { config: buildExtractorConfig(index$1), value: field.evaluationSource?.type === 'custom'
30657
+ ? field.evaluationSource.extractorId
30658
+ : '', onValueChange: (e) => emit({
30659
+ testCaseId,
30660
+ index: index$1,
30661
+ operation: 'set-evaluation-source-extractor',
30662
+ value: e.detail.value,
30663
+ }) }))));
30664
+ };
30665
+ const renderEvaluationOptions = (field, index$1) => (index.h("details", { class: "expected-outcome-renderer__options" }, index.h("summary", { class: "expected-outcome-renderer__options-summary" }, "More options"), index.h("div", { class: "expected-outcome-renderer__options-content" }, renderEvaluationSelector(field, index$1), renderEvaluationSourceSelector(field, index$1))));
30273
30666
  return (index.h("div", { class: "expected-outcome-renderer" }, (fields || []).map((field, index$1) => {
30274
30667
  if (field.type === 'textarea') {
30275
30668
  const isDynamic = dynamicResolutionSupported && field.outcomeMode === 'dynamic';
@@ -30301,7 +30694,7 @@ const ExpectedOutcomeRenderer = ({ testCaseId, fields, dynamicResolutionSupporte
30301
30694
  index: index$1,
30302
30695
  operation: 'set-resolution-query',
30303
30696
  value: e.detail.value,
30304
- }) })), !isDynamic && renderEvaluationSelector(field, index$1)));
30697
+ }) })), !isDynamic && renderEvaluationOptions(field, index$1)));
30305
30698
  }
30306
30699
  if (field.type === 'chips-input') {
30307
30700
  const config = {
@@ -30321,7 +30714,7 @@ const ExpectedOutcomeRenderer = ({ testCaseId, fields, dynamicResolutionSupporte
30321
30714
  index: index$1,
30322
30715
  operation: 'remove-chip',
30323
30716
  value: e.detail.value,
30324
- }) }), renderEvaluationSelector(field, index$1)));
30717
+ }) }), renderEvaluationOptions(field, index$1)));
30325
30718
  }
30326
30719
  if (field.type === 'select') {
30327
30720
  const config = {
@@ -30337,18 +30730,18 @@ const ExpectedOutcomeRenderer = ({ testCaseId, fields, dynamicResolutionSupporte
30337
30730
  index: index$1,
30338
30731
  operation: 'set-value',
30339
30732
  value: e.detail.value,
30340
- }) }), renderEvaluationSelector(field, index$1)));
30733
+ }) }), renderEvaluationOptions(field, index$1)));
30341
30734
  }
30342
30735
  return (index.h("div", { class: "expected-outcome-renderer__group" }, index.h("div", { class: "expected-outcome-renderer__text" }, index.h("label", null, field.label), index.h("input", { type: "text", value: field.value, placeholder: field.placeholder, onInput: (e) => emit({
30343
30736
  testCaseId,
30344
30737
  index: index$1,
30345
30738
  operation: 'set-value',
30346
30739
  value: e.target.value,
30347
- }) })), renderEvaluationSelector(field, index$1)));
30740
+ }) })), renderEvaluationOptions(field, index$1)));
30348
30741
  })));
30349
30742
  };
30350
30743
 
30351
- const LLMTestCaseRow = ({ testCase, dynamicResolutionSupported = false, onRun, onDelete, handleTestCaseChange, onExpectedOutcomeChange, onChatHistoryChange, }) => {
30744
+ const LLMTestCaseRow = ({ testCase, dynamicResolutionSupported = false, extractorIds = [], onRun, onDelete, handleTestCaseChange, onExpectedOutcomeChange, onChatHistoryChange, }) => {
30352
30745
  const questionConfig = {
30353
30746
  name: 'question',
30354
30747
  fieldType: FormFieldType.TEXT_AREA,
@@ -30374,11 +30767,11 @@ const LLMTestCaseRow = ({ testCase, dynamicResolutionSupported = false, onRun, o
30374
30767
  value,
30375
30768
  },
30376
30769
  });
30377
- } }), index.h(ExpectedOutcomeRenderer, { testCaseId: testCase.id, fields: testCase.expectedOutcome || [], dynamicResolutionSupported: dynamicResolutionSupported, onExpectedOutcomeChange: onExpectedOutcomeChange })), index.h(ResponseOutput, { output: testCase.output, isRunning: testCase.isRunning }), index.h(EvaluationSummary, { result: testCase.evaluationResult, isRunning: testCase.isRunning }), index.h(RowActions, { isRunning: testCase.isRunning, canRun: !!testCase.question.trim(), onRun: () => onRun(testCase), onDelete: () => onDelete(testCase.id) })));
30770
+ } }), index.h(ExpectedOutcomeRenderer, { testCaseId: testCase.id, fields: testCase.expectedOutcome || [], dynamicResolutionSupported: dynamicResolutionSupported, extractorIds: extractorIds, onExpectedOutcomeChange: onExpectedOutcomeChange })), index.h(ResponseOutput, { output: testCase.output, isRunning: testCase.isRunning }), index.h(EvaluationSummary, { result: testCase.evaluationResult, isRunning: testCase.isRunning }), index.h(RowActions, { isRunning: testCase.isRunning, canRun: !!testCase.question.trim(), onRun: () => onRun(testCase), onDelete: () => onDelete(testCase.id) })));
30378
30771
  };
30379
30772
 
30380
- const LLMTestCases = ({ testCases, dynamicResolutionSupported = false, onRun, onDelete, onAddTestCase, handleTestCaseChange, onExpectedOutcomeChange, onChatHistoryChange, }) => {
30381
- return (index.h("div", { class: "test-cases" }, index.h("div", { class: "test-cases__column-headers" }, index.h("div", { class: "test-cases__column-header" }, "Input"), index.h("div", { class: "test-cases__column-header" }, "Output"), index.h("div", { class: "test-cases__column-header" }, "Evaluation"), index.h("div", { class: "test-cases__column-header" }, "Actions")), testCases.map(testCase => (index.h(LLMTestCaseRow, { testCase: testCase, dynamicResolutionSupported: dynamicResolutionSupported, onRun: onRun, onDelete: onDelete, handleTestCaseChange: handleTestCaseChange, onExpectedOutcomeChange: onExpectedOutcomeChange, onChatHistoryChange: onChatHistoryChange }))), index.h("div", { class: "test-cases__add-section" }, index.h(Button, { variant: "outline", size: "md", onClick: onAddTestCase }, "+ Add Question"))));
30773
+ const LLMTestCases = ({ testCases, dynamicResolutionSupported = false, extractorIds = [], onRun, onDelete, onAddTestCase, handleTestCaseChange, onExpectedOutcomeChange, onChatHistoryChange, }) => {
30774
+ return (index.h("div", { class: "test-cases" }, index.h("div", { class: "test-cases__column-headers" }, index.h("div", { class: "test-cases__column-header" }, "Input"), index.h("div", { class: "test-cases__column-header" }, "Output"), index.h("div", { class: "test-cases__column-header" }, "Evaluation"), index.h("div", { class: "test-cases__column-header" }, "Actions")), testCases.map(testCase => (index.h(LLMTestCaseRow, { testCase: testCase, dynamicResolutionSupported: dynamicResolutionSupported, extractorIds: extractorIds, onRun: onRun, onDelete: onDelete, handleTestCaseChange: handleTestCaseChange, onExpectedOutcomeChange: onExpectedOutcomeChange, onChatHistoryChange: onChatHistoryChange }))), index.h("div", { class: "test-cases__add-section" }, index.h(Button, { variant: "outline", size: "md", onClick: onAddTestCase }, "+ Add Question"))));
30382
30775
  };
30383
30776
 
30384
30777
  const tokensCss = () => `:host{--spacing:0.25rem;--spacing-1:calc(var(--spacing) * 1);--spacing-2:calc(var(--spacing) * 2);--spacing-3:calc(var(--spacing) * 3);--spacing-4:calc(var(--spacing) * 4);--spacing-5:calc(var(--spacing) * 5);--spacing-6:calc(var(--spacing) * 6);--spacing-8:calc(var(--spacing) * 8);--spacing-10:calc(var(--spacing) * 10);--spacing-12:calc(var(--spacing) * 12);--spacing-16:calc(var(--spacing) * 16);--spacing-20:calc(var(--spacing) * 20);--spacing-24:calc(var(--spacing) * 24);--radius-none:0;--radius-sm:0.125rem;--radius-md:0.375rem;--radius-lg:0.5rem;--radius-xl:0.75rem;--radius-2xl:1rem;--radius-3xl:1.5rem;--radius-full:9999px;--radius:var(--radius-lg);--font-size-xs:0.75rem;--font-size-sm:0.875rem;--font-size-base:1rem;--font-size-lg:1.125rem;--font-size-xl:1.25rem;--font-size-2xl:1.5rem;--font-size-3xl:1.875rem;--font-size-4xl:2.25rem;--font-weight-normal:400;--font-weight-medium:500;--font-weight-semibold:600;--font-weight-bold:700;--line-height-none:1;--line-height-tight:1.25;--line-height-snug:1.375;--line-height-normal:1.5;--line-height-relaxed:1.625;--line-height-loose:2;--letter-spacing-tight:-0.025em;--letter-spacing-normal:0;--letter-spacing-wide:0.05em;--shadow-sm:0 1px 2px 0 rgba(0, 0, 0, 0.05);--shadow-md:0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06);--shadow-lg:0 10px 15px -3px rgba(0, 0, 0, 0.1), 0 4px 6px -2px rgba(0, 0, 0, 0.05);--shadow-xl:0 20px 25px -5px rgba(0, 0, 0, 0.1), 0 10px 10px -5px rgba(0, 0, 0, 0.04);--shadow-2xl:0 25px 50px -12px rgba(0, 0, 0, 0.25);--border-width:1px;--z-base:0;--z-dropdown:1000;--z-sticky:1100;--z-modal:1200;--z-popover:1300;--z-tooltip:1400;--opacity-disabled:0.5;--opacity-hover:0.8;--opacity-muted:0.6;--max-w-sm:24rem;--max-w-md:28rem;--max-w-lg:32rem;--max-w-xl:42rem;--max-w-2xl:48rem;--max-w-full:100%;--breakpoint-sm:640px;--breakpoint-md:768px;--breakpoint-lg:1024px;--breakpoint-xl:1280px;--breakpoint-2xl:1536px;--background:#ffffff;--foreground:#0a0a0a;--card:#ffffff;--card-foreground:#0a0a0a;--popover:#ffffff;--popover-foreground:#0a0a0a;--primary:#0a0a0a;--primary-foreground:#fafafa;--secondary:#f4f4f5;--secondary-foreground:#0a0a0a;--muted:#f4f4f5;--muted-foreground:#71717a;--accent:#f4f4f5;--accent-foreground:#0a0a0a;--destructive:#ef4444;--destructive-foreground:#fafafa;--border:#e4e4e7;--input:#e4e4e7;--ring:#3b82f6;--success:#10b981;--success-foreground:#fafafa;--warning:#f59e0b;--warning-foreground:#fafafa;--info:#3b82f6;--info-foreground:#fafafa}:host([data-theme='dark']){--background:#0a0a0a;--foreground:#fafafa;--card:#171717;--card-foreground:#fafafa;--popover:#171717;--popover-foreground:#fafafa;--primary:#fafafa;--primary-foreground:#0a0a0a;--secondary:#27272a;--secondary-foreground:#fafafa;--muted:#27272a;--muted-foreground:#a1a1aa;--accent:#27272a;--accent-foreground:#fafafa;--destructive:#dc2626;--destructive-foreground:#fafafa;--border:#27272a;--input:#27272a;--ring:#3b82f6;--success:#059669;--success-foreground:#fafafa;--warning:#d97706;--warning-foreground:#fafafa;--info:#2563eb;--info-foreground:#fafafa}`;
@@ -30389,7 +30782,7 @@ const llmTestRunnerHeaderCss = () => `.test-runner-header{display:flex;justify-c
30389
30782
 
30390
30783
  const llmTestCasesCss = () => `.test-cases{background:var(--background)}.test-cases__column-headers{display:grid;grid-template-columns:1fr 1.5fr 0.5fr 120px;gap:var(--border-width);background:var(--border);border-bottom:2px solid var(--border)}.test-cases__column-header{background:var(--muted);padding:var(--spacing-4) var(--spacing-5);font-weight:var(--font-weight-semibold);color:var(--foreground);font-size:var(--font-size-sm);text-transform:uppercase;letter-spacing:var(--letter-spacing-wide)}.test-cases__add-section{padding:var(--spacing-6);text-align:center;background:var(--muted);border-top:var(--border-width) solid var(--border)}@media (max-width: 1200px){.test-cases__column-headers{display:none}}`;
30391
30784
 
30392
- const llmTestCaseRowCss = () => `.test-case-row{display:grid;grid-template-columns:1fr 1.5fr 0.5fr 120px;gap:var(--border-width);border-bottom:var(--border-width) solid var(--border);min-height:200px}.test-case-row:hover{background:var(--muted)}.test-case-row__input-column{padding:var(--spacing-5);background:var(--background);border-right:var(--border-width) solid var(--border)}.expected-outcome-renderer{display:flex;flex-direction:column;gap:var(--spacing-4);margin-top:var(--spacing-4)}.expected-outcome-renderer__group{display:flex;flex-direction:column;gap:var(--spacing-2);padding:var(--spacing-3);border:var(--border-width) solid var(--border);border-radius:var(--radius-md);background:var(--background)}@media (max-width: 1200px){.test-case-row{grid-template-columns:1fr;gap:0}.test-case-row__input-column{border-right:none;border-bottom:var(--border-width) solid var(--border)}}@media (max-width: 768px){.test-case-row__input-column{padding:var(--spacing-4)}.test-case-row{min-height:auto}}`;
30785
+ const llmTestCaseRowCss = () => `.test-case-row{display:grid;grid-template-columns:1fr 1.5fr 0.5fr 120px;gap:var(--border-width);border-bottom:var(--border-width) solid var(--border);min-height:200px}.test-case-row:hover{background:var(--muted)}.test-case-row__input-column{padding:var(--spacing-5);background:var(--background);border-right:var(--border-width) solid var(--border)}.expected-outcome-renderer{display:flex;flex-direction:column;gap:var(--spacing-4);margin-top:var(--spacing-4)}.expected-outcome-renderer__group{display:flex;flex-direction:column;gap:var(--spacing-2);padding:var(--spacing-3);border:var(--border-width) solid var(--border);border-radius:var(--radius-md);background:var(--background)}.expected-outcome-renderer__options{border:var(--border-width) solid var(--border);border-radius:var(--radius-sm);background:var(--muted)}.expected-outcome-renderer__options-summary{cursor:pointer;font-size:var(--font-size-sm);color:var(--foreground);padding:var(--spacing-2) var(--spacing-3);user-select:none}.expected-outcome-renderer__options-content{display:flex;flex-direction:column;gap:var(--spacing-2);padding:0 var(--spacing-3) var(--spacing-3)}@media (max-width: 1200px){.test-case-row{grid-template-columns:1fr;gap:0}.test-case-row__input-column{border-right:none;border-bottom:var(--border-width) solid var(--border)}}@media (max-width: 768px){.test-case-row__input-column{padding:var(--spacing-4)}.test-case-row{min-height:auto}}`;
30393
30786
 
30394
30787
  const rowActionsCss = () => `.row-actions{height:100%;padding:var(--spacing-5);background:var(--background);display:flex;flex-direction:column;gap:var(--spacing-3);align-items:center;justify-content:flex-start;align-self:flex-start}@media (max-width: 1200px){.row-actions{border-right:none;border-bottom:var(--border-width) solid var(--border);flex-direction:row;justify-content:center}}@media (max-width: 768px){.row-actions{padding:var(--spacing-4)}}`;
30395
30788
 
@@ -30415,6 +30808,7 @@ const LLMTestRunner = class {
30415
30808
  useSave = false;
30416
30809
  usePromptEditor = false;
30417
30810
  resolveExpectedOutcome;
30811
+ evaluationSourceExtractors;
30418
30812
  initialTestCases;
30419
30813
  defaultExpectedOutcomeSchema;
30420
30814
  testCases = [
@@ -30451,6 +30845,12 @@ const LLMTestRunner = class {
30451
30845
  // Initialize testCases from prop if provided
30452
30846
  if (this.initialTestCases !== undefined) {
30453
30847
  validateTestCaseInputArray(this.initialTestCases);
30848
+ const extractorIds = getExtractorIds(this.evaluationSourceExtractors);
30849
+ if (extractorIds.length > 0) {
30850
+ this.initialTestCases.forEach(testCase => {
30851
+ validateExpectedOutcomeArrayWithExtractors(testCase.expectedOutcome, extractorIds);
30852
+ });
30853
+ }
30454
30854
  this.testCases = this.initialTestCases.map((rawTestCase, index) => {
30455
30855
  try {
30456
30856
  return createTestCaseFromInput(rawTestCase);
@@ -30474,8 +30874,6 @@ const LLMTestRunner = class {
30474
30874
  this.testCases = [];
30475
30875
  }
30476
30876
  }
30477
- componentDidLoad() { }
30478
- disconnectedCallback() { }
30479
30877
  async resetSavingState() {
30480
30878
  this.isSaving = false;
30481
30879
  }
@@ -30508,7 +30906,7 @@ const LLMTestRunner = class {
30508
30906
  updateTestCase(id, updates) {
30509
30907
  this.testCases = this.testCases.map(tc => tc.id === id ? { ...tc, ...updates } : tc);
30510
30908
  }
30511
- requestLlmText(testCase) {
30909
+ requestLlmResponse(testCase) {
30512
30910
  return new Promise((resolve, reject) => {
30513
30911
  const payload = {
30514
30912
  prompt: testCase.question,
@@ -30531,14 +30929,14 @@ const LLMTestRunner = class {
30531
30929
  const startTime = Date.now();
30532
30930
  this.updateTestCase(testCase.id, { isRunning: true });
30533
30931
  const [llmSettled, resolutionSettled] = await Promise.allSettled([
30534
- this.requestLlmText(testCase),
30932
+ this.requestLlmResponse(testCase),
30535
30933
  resolveDynamicExpectedOutcomes(testCase, this.resolveExpectedOutcome),
30536
30934
  ]);
30537
30935
  const responseTime = Date.now() - startTime;
30538
30936
  if (llmSettled.status === 'rejected') {
30539
30937
  this.updateTestCase(testCase.id, {
30540
30938
  isRunning: false,
30541
- output: null,
30939
+ output: undefined,
30542
30940
  error: this.addErrorMessage(llmSettled.reason, 'Unknown error'),
30543
30941
  responseTime,
30544
30942
  });
@@ -30586,7 +30984,7 @@ const LLMTestRunner = class {
30586
30984
  this.updateTestCase(testCase.id, {
30587
30985
  evaluationResult: result,
30588
30986
  });
30589
- });
30987
+ }, this.evaluationSourceExtractors);
30590
30988
  }
30591
30989
  async runAllTests() {
30592
30990
  this.isRunningAll = true;
@@ -30617,7 +31015,7 @@ const LLMTestRunner = class {
30617
31015
  this.error = '';
30618
31016
  try {
30619
31017
  const content = await readFileAsync(file);
30620
- const result = importTestSuite(content);
31018
+ const result = importTestSuite(content, getExtractorIds(this.evaluationSourceExtractors));
30621
31019
  if (!result.success) {
30622
31020
  this.error = result.error || 'Unknown error occurred during import.';
30623
31021
  return;
@@ -30678,7 +31076,7 @@ const LLMTestRunner = class {
30678
31076
  }
30679
31077
  }
30680
31078
  render() {
30681
- return (index.h("div", { key: 'cc808096f929b2e1c570c53144aab195d177c187', class: "test-runner-container" }, index.h(LLMTestRunnerHeader, { key: 'b91cf3df7df0e95bfd4908a2f91c7310b5b7a09a', isExportingTestSuite: this.isExportingTestSuite, isExportingTestResults: this.isExportingTestResults, isRunningAll: this.isRunningAll, useSave: this.useSave, isSaving: this.isSaving, usePromptEditor: this.usePromptEditor, onImport: file => this.handleImport(file), onExportSuite: () => this.handleExportTestSuite(), onExportResults: () => this.handleExportTestResults(), onRunAll: () => this.runAllTests(), onSave: () => this.handleSave() }), index.h(ErrorMessage, { key: 'c7991497173fa9843e7aa42f5283d0897ddff2e2', message: this.error, onClear: () => (this.error = '') }), index.h("div", { key: '2b57132564442b8047d8672c6adcba62cdc9ae87', class: "test-runner-container__content" }, index.h(LLMTestCases, { key: '146e9d8c76a34980a2a274dd856887c22e1ed0e9', testCases: this.testCases, dynamicResolutionSupported: !!this.resolveExpectedOutcome, onRun: testCase => this.runSingleTest(testCase).catch(() => { }), onDelete: id => this.deleteTestCase(id), onAddTestCase: () => this.addNewTestCase(), handleTestCaseChange: this.handleTestCaseChange, onExpectedOutcomeChange: this.handleExpectedOutcomeChange, onChatHistoryChange: this.handleChatHistoryChange }))));
31079
+ return (index.h("div", { key: '7433beaa1d60d48f65600c43e11b302b892a7bca', class: "test-runner-container" }, index.h(LLMTestRunnerHeader, { key: '8083cc39376e7a710bd3f52efb184b959e885a87', isExportingTestSuite: this.isExportingTestSuite, isExportingTestResults: this.isExportingTestResults, isRunningAll: this.isRunningAll, useSave: this.useSave, isSaving: this.isSaving, usePromptEditor: this.usePromptEditor, onImport: file => this.handleImport(file), onExportSuite: () => this.handleExportTestSuite(), onExportResults: () => this.handleExportTestResults(), onRunAll: () => this.runAllTests(), onSave: () => this.handleSave() }), index.h(ErrorMessage, { key: 'ddced98c13cd595c4cfb6eef11b27cb173769518', message: this.error, onClear: () => (this.error = '') }), index.h("div", { key: '8d6f65c4d68d34869b644709eacb97fec93683c6', class: "test-runner-container__content" }, index.h(LLMTestCases, { key: '5ccb186132b23af6209209b0a14086e03cf790af', testCases: this.testCases, dynamicResolutionSupported: !!this.resolveExpectedOutcome, extractorIds: getExtractorIds(this.evaluationSourceExtractors), onRun: testCase => this.runSingleTest(testCase).catch(() => { }), onDelete: id => this.deleteTestCase(id), onAddTestCase: () => this.addNewTestCase(), handleTestCaseChange: this.handleTestCaseChange, onExpectedOutcomeChange: this.handleExpectedOutcomeChange, onChatHistoryChange: this.handleChatHistoryChange }))));
30682
31080
  }
30683
31081
  };
30684
31082
  LLMTestRunner.style = tokensCss() + (llmTestRunnerCss() + (llmTestRunnerHeaderCss() + (llmTestCasesCss() + (llmTestCaseRowCss() + (rowActionsCss() + (evaluationSummaryCss() + (responseOutputCss() + (errorMessageCss() + (buttonCss() + iconButtonCss())))))))));