llm-testrunner-components 1.2.4 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/README.md +9 -5
  2. package/dist/cjs/{app-chips_5.cjs.entry.js → app-chips_4.cjs.entry.js} +20 -22
  3. package/dist/cjs/app-chips_4.cjs.entry.js.map +1 -0
  4. package/dist/cjs/index.cjs.js +464 -66
  5. package/dist/cjs/index.cjs.js.map +1 -1
  6. package/dist/cjs/llm-test-runner.cjs.entry.js +11 -0
  7. package/dist/cjs/llm-test-runner.cjs.entry.js.map +1 -0
  8. package/dist/cjs/llm-testrunner.cjs.js +1 -1
  9. package/dist/cjs/loader.cjs.js +1 -1
  10. package/dist/collection/components/llm-test-runner/llm-test-runner.js +46 -13
  11. package/dist/collection/components/llm-test-runner/llm-test-runner.js.map +1 -1
  12. package/dist/collection/components/llm-test-runner/test-cases/chat-history.css +5 -5
  13. package/dist/collection/components/llm-test-runner/test-cases/expected-outcome-renderer.js +45 -5
  14. package/dist/collection/components/llm-test-runner/test-cases/expected-outcome-renderer.js.map +1 -1
  15. package/dist/collection/components/llm-test-runner/test-cases/llm-test-case-row.css +21 -0
  16. package/dist/collection/components/llm-test-runner/test-cases/llm-test-case-row.js +2 -2
  17. package/dist/collection/components/llm-test-runner/test-cases/llm-test-case-row.js.map +1 -1
  18. package/dist/collection/components/llm-test-runner/test-cases/llm-test-cases.js +2 -2
  19. package/dist/collection/components/llm-test-runner/test-cases/llm-test-cases.js.map +1 -1
  20. package/dist/collection/components/llm-test-runner/test-cases/output/response-output.js +1 -1
  21. package/dist/collection/components/llm-test-runner/test-cases/output/response-output.js.map +1 -1
  22. package/dist/collection/demo/demo-modes.js +130 -0
  23. package/dist/collection/demo/vanilla-demo.js +56 -0
  24. package/dist/collection/lib/evaluation/actual-value-resolver.js +52 -0
  25. package/dist/collection/lib/evaluation/actual-value-resolver.js.map +1 -0
  26. package/dist/collection/lib/evaluation/evaluation-engine.js +1 -1
  27. package/dist/collection/lib/evaluation/evaluation-engine.js.map +1 -1
  28. package/dist/collection/lib/evaluation/evaluation-service.js +55 -17
  29. package/dist/collection/lib/evaluation/evaluation-service.js.map +1 -1
  30. package/dist/collection/lib/evaluation/types.js.map +1 -1
  31. package/dist/collection/lib/form/components/app-textarea.css +2 -2
  32. package/dist/collection/lib/import-export/test-suite-importer.js +7 -1
  33. package/dist/collection/lib/import-export/test-suite-importer.js.map +1 -1
  34. package/dist/collection/lib/test-cases/test-case-factory.js +5 -0
  35. package/dist/collection/lib/test-cases/test-case-factory.js.map +1 -1
  36. package/dist/collection/lib/test-cases/test-case-mutations.js +58 -23
  37. package/dist/collection/lib/test-cases/test-case-mutations.js.map +1 -1
  38. package/dist/collection/schemas/expected-outcome.js +39 -0
  39. package/dist/collection/schemas/expected-outcome.js.map +1 -1
  40. package/dist/collection/schemas/model-response.js +7 -0
  41. package/dist/collection/schemas/model-response.js.map +1 -0
  42. package/dist/collection/schemas/test-case.js +2 -1
  43. package/dist/collection/schemas/test-case.js.map +1 -1
  44. package/dist/collection/types/expected-outcome.js.map +1 -1
  45. package/dist/collection/types/llm-test-runner.js.map +1 -1
  46. package/dist/components/app-textarea.js +1 -1
  47. package/dist/components/chat-history.js +1 -1
  48. package/dist/components/index.js +1 -1
  49. package/dist/components/llm-test-runner.js +1 -1
  50. package/dist/components/{p-B87Lt3z4.js → p-D3eincg_.js} +3 -3
  51. package/dist/components/p-D3eincg_.js.map +1 -0
  52. package/dist/components/{p-D2qDAxFN.js → p-D6BL2E3J.js} +2 -2
  53. package/dist/components/{p-D2qDAxFN.js.map → p-D6BL2E3J.js.map} +1 -1
  54. package/dist/components/p-kmtfMXcQ.js +2 -0
  55. package/dist/components/p-kmtfMXcQ.js.map +1 -0
  56. package/dist/esm/{app-chips_5.entry.js → app-chips_4.entry.js} +4 -5
  57. package/dist/esm/app-chips_4.entry.js.map +1 -0
  58. package/dist/esm/index.js +464 -66
  59. package/dist/esm/index.js.map +1 -1
  60. package/dist/esm/llm-test-runner.entry.js +5 -0
  61. package/dist/esm/llm-test-runner.entry.js.map +1 -0
  62. package/dist/esm/llm-testrunner.js +1 -1
  63. package/dist/esm/loader.js +1 -1
  64. package/dist/llm-testrunner/index.esm.js +2 -2
  65. package/dist/llm-testrunner/index.esm.js.map +1 -1
  66. package/dist/llm-testrunner/llm-testrunner.esm.js +1 -1
  67. package/dist/llm-testrunner/p-c3fec0bb.entry.js +2 -0
  68. package/dist/llm-testrunner/{p-21202f12.entry.js.map → p-c3fec0bb.entry.js.map} +1 -1
  69. package/dist/llm-testrunner/p-caccdb4b.entry.js +2 -0
  70. package/dist/llm-testrunner/p-caccdb4b.entry.js.map +1 -0
  71. package/dist/types/components/llm-test-runner/llm-test-runner.d.ts +3 -4
  72. package/dist/types/components/llm-test-runner/test-cases/expected-outcome-renderer.d.ts +1 -0
  73. package/dist/types/components/llm-test-runner/test-cases/llm-test-case-row.d.ts +1 -0
  74. package/dist/types/components/llm-test-runner/test-cases/llm-test-cases.d.ts +1 -0
  75. package/dist/types/components/llm-test-runner/test-cases/output/response-output.d.ts +2 -1
  76. package/dist/types/components.d.ts +4 -2
  77. package/dist/types/lib/evaluation/actual-value-resolver.d.ts +9 -0
  78. package/dist/types/lib/evaluation/evaluation-service.d.ts +2 -2
  79. package/dist/types/lib/evaluation/types.d.ts +1 -1
  80. package/dist/types/lib/import-export/test-suite-importer.d.ts +1 -1
  81. package/dist/types/lib/test-cases/test-case-mutations.d.ts +10 -1
  82. package/dist/types/schemas/expected-outcome.d.ts +116 -0
  83. package/dist/types/schemas/model-response.d.ts +7 -0
  84. package/dist/types/schemas/test-case.d.ts +76 -1
  85. package/dist/types/types/expected-outcome.d.ts +1 -1
  86. package/dist/types/types/llm-test-runner.d.ts +4 -2
  87. package/package.json +1 -1
  88. package/dist/cjs/app-chips_5.cjs.entry.js.map +0 -1
  89. package/dist/components/p-B87Lt3z4.js.map +0 -1
  90. package/dist/components/p-Bx2jqguC.js +0 -2
  91. package/dist/components/p-Bx2jqguC.js.map +0 -1
  92. package/dist/esm/app-chips_5.entry.js.map +0 -1
  93. package/dist/llm-testrunner/p-21202f12.entry.js +0 -2
package/dist/esm/index.js CHANGED
@@ -292,6 +292,7 @@ const DEFAULT_EXPECTED_OUTCOME_SCHEMA = [
292
292
  function normalizeExpectedOutcomeField(field) {
293
293
  return {
294
294
  ...field,
295
+ evaluationSource: field.evaluationSource || { type: 'text' },
295
296
  evaluationParameters: normalizeEvaluationParametersForField(field.type, field.evaluationParameters),
296
297
  };
297
298
  }
@@ -315,6 +316,7 @@ function createExpectedOutcomeFieldFromSchema(schemaField) {
315
316
  type: 'text',
316
317
  label: schemaField.label,
317
318
  placeholder: schemaField.placeholder,
319
+ evaluationSource: schemaField.evaluationSource || { type: 'text' },
318
320
  value: '',
319
321
  evaluationParameters: normalizeEvaluationParametersForField(schemaField.type, schemaField.evaluationParameters),
320
322
  };
@@ -323,6 +325,7 @@ function createExpectedOutcomeFieldFromSchema(schemaField) {
323
325
  type: 'textarea',
324
326
  label: schemaField.label,
325
327
  placeholder: schemaField.placeholder,
328
+ evaluationSource: schemaField.evaluationSource || { type: 'text' },
326
329
  rows: schemaField.rows,
327
330
  value: '',
328
331
  evaluationParameters: normalizeEvaluationParametersForField(schemaField.type, schemaField.evaluationParameters),
@@ -332,6 +335,7 @@ function createExpectedOutcomeFieldFromSchema(schemaField) {
332
335
  type: 'chips-input',
333
336
  label: schemaField.label,
334
337
  placeholder: schemaField.placeholder,
338
+ evaluationSource: schemaField.evaluationSource || { type: 'text' },
335
339
  value: [],
336
340
  evaluationParameters: normalizeEvaluationParametersForField(schemaField.type, schemaField.evaluationParameters),
337
341
  };
@@ -340,6 +344,7 @@ function createExpectedOutcomeFieldFromSchema(schemaField) {
340
344
  type: 'select',
341
345
  label: schemaField.label,
342
346
  placeholder: schemaField.placeholder,
347
+ evaluationSource: schemaField.evaluationSource || { type: 'text' },
343
348
  value: schemaField.options[0],
344
349
  options: schemaField.options,
345
350
  evaluationParameters: normalizeEvaluationParametersForField(schemaField.type, schemaField.evaluationParameters),
@@ -2569,6 +2574,122 @@ function handleIntersectionResults(result, left, right) {
2569
2574
  result.value = merged.data;
2570
2575
  return result;
2571
2576
  }
2577
+ const $ZodRecord = /*@__PURE__*/ $constructor("$ZodRecord", (inst, def) => {
2578
+ $ZodType.init(inst, def);
2579
+ inst._zod.parse = (payload, ctx) => {
2580
+ const input = payload.value;
2581
+ if (!isPlainObject(input)) {
2582
+ payload.issues.push({
2583
+ expected: "record",
2584
+ code: "invalid_type",
2585
+ input,
2586
+ inst,
2587
+ });
2588
+ return payload;
2589
+ }
2590
+ const proms = [];
2591
+ const values = def.keyType._zod.values;
2592
+ if (values) {
2593
+ payload.value = {};
2594
+ const recordKeys = new Set();
2595
+ for (const key of values) {
2596
+ if (typeof key === "string" || typeof key === "number" || typeof key === "symbol") {
2597
+ recordKeys.add(typeof key === "number" ? key.toString() : key);
2598
+ const result = def.valueType._zod.run({ value: input[key], issues: [] }, ctx);
2599
+ if (result instanceof Promise) {
2600
+ proms.push(result.then((result) => {
2601
+ if (result.issues.length) {
2602
+ payload.issues.push(...prefixIssues(key, result.issues));
2603
+ }
2604
+ payload.value[key] = result.value;
2605
+ }));
2606
+ }
2607
+ else {
2608
+ if (result.issues.length) {
2609
+ payload.issues.push(...prefixIssues(key, result.issues));
2610
+ }
2611
+ payload.value[key] = result.value;
2612
+ }
2613
+ }
2614
+ }
2615
+ let unrecognized;
2616
+ for (const key in input) {
2617
+ if (!recordKeys.has(key)) {
2618
+ unrecognized = unrecognized ?? [];
2619
+ unrecognized.push(key);
2620
+ }
2621
+ }
2622
+ if (unrecognized && unrecognized.length > 0) {
2623
+ payload.issues.push({
2624
+ code: "unrecognized_keys",
2625
+ input,
2626
+ inst,
2627
+ keys: unrecognized,
2628
+ });
2629
+ }
2630
+ }
2631
+ else {
2632
+ payload.value = {};
2633
+ for (const key of Reflect.ownKeys(input)) {
2634
+ if (key === "__proto__")
2635
+ continue;
2636
+ let keyResult = def.keyType._zod.run({ value: key, issues: [] }, ctx);
2637
+ if (keyResult instanceof Promise) {
2638
+ throw new Error("Async schemas not supported in object keys currently");
2639
+ }
2640
+ // Numeric string fallback: if key is a numeric string and failed, retry with Number(key)
2641
+ // This handles z.number(), z.literal([1, 2, 3]), and unions containing numeric literals
2642
+ const checkNumericKey = typeof key === "string" && number$1.test(key) && keyResult.issues.length;
2643
+ if (checkNumericKey) {
2644
+ const retryResult = def.keyType._zod.run({ value: Number(key), issues: [] }, ctx);
2645
+ if (retryResult instanceof Promise) {
2646
+ throw new Error("Async schemas not supported in object keys currently");
2647
+ }
2648
+ if (retryResult.issues.length === 0) {
2649
+ keyResult = retryResult;
2650
+ }
2651
+ }
2652
+ if (keyResult.issues.length) {
2653
+ if (def.mode === "loose") {
2654
+ // Pass through unchanged
2655
+ payload.value[key] = input[key];
2656
+ }
2657
+ else {
2658
+ // Default "strict" behavior: error on invalid key
2659
+ payload.issues.push({
2660
+ code: "invalid_key",
2661
+ origin: "record",
2662
+ issues: keyResult.issues.map((iss) => finalizeIssue(iss, ctx, config())),
2663
+ input: key,
2664
+ path: [key],
2665
+ inst,
2666
+ });
2667
+ }
2668
+ continue;
2669
+ }
2670
+ const result = def.valueType._zod.run({ value: input[key], issues: [] }, ctx);
2671
+ if (result instanceof Promise) {
2672
+ proms.push(result.then((result) => {
2673
+ if (result.issues.length) {
2674
+ payload.issues.push(...prefixIssues(key, result.issues));
2675
+ }
2676
+ payload.value[keyResult.value] = result.value;
2677
+ }));
2678
+ }
2679
+ else {
2680
+ if (result.issues.length) {
2681
+ payload.issues.push(...prefixIssues(key, result.issues));
2682
+ }
2683
+ payload.value[keyResult.value] = result.value;
2684
+ }
2685
+ }
2686
+ }
2687
+ if (proms.length) {
2688
+ return Promise.all(proms).then(() => payload);
2689
+ }
2690
+ return payload;
2691
+ };
2692
+ });
2572
2693
  const $ZodEnum = /*@__PURE__*/ $constructor("$ZodEnum", (inst, def) => {
2573
2694
  $ZodType.init(inst, def);
2574
2695
  const values = getEnumValues(def.entries);
@@ -4152,6 +4273,49 @@ const intersectionProcessor = (schema, ctx, json, params) => {
4152
4273
  ];
4153
4274
  json.allOf = allOf;
4154
4275
  };
4276
+ const recordProcessor = (schema, ctx, _json, params) => {
4277
+ const json = _json;
4278
+ const def = schema._zod.def;
4279
+ json.type = "object";
4280
+ // For looseRecord with regex patterns, use patternProperties
4281
+ // This correctly represents "only validate keys matching the pattern" semantics
4282
+ // and composes well with allOf (intersections)
4283
+ const keyType = def.keyType;
4284
+ const keyBag = keyType._zod.bag;
4285
+ const patterns = keyBag?.patterns;
4286
+ if (def.mode === "loose" && patterns && patterns.size > 0) {
4287
+ // Use patternProperties for looseRecord with regex patterns
4288
+ const valueSchema = process$1(def.valueType, ctx, {
4289
+ ...params,
4290
+ path: [...params.path, "patternProperties", "*"],
4291
+ });
4292
+ json.patternProperties = {};
4293
+ for (const pattern of patterns) {
4294
+ json.patternProperties[pattern.source] = valueSchema;
4295
+ }
4296
+ }
4297
+ else {
4298
+ // Default behavior: use propertyNames + additionalProperties
4299
+ if (ctx.target === "draft-07" || ctx.target === "draft-2020-12") {
4300
+ json.propertyNames = process$1(def.keyType, ctx, {
4301
+ ...params,
4302
+ path: [...params.path, "propertyNames"],
4303
+ });
4304
+ }
4305
+ json.additionalProperties = process$1(def.valueType, ctx, {
4306
+ ...params,
4307
+ path: [...params.path, "additionalProperties"],
4308
+ });
4309
+ }
4310
+ // Add required for keys with discrete values (enum, literal, etc.)
4311
+ const keyValues = keyType._zod.values;
4312
+ if (keyValues) {
4313
+ const validKeyValues = [...keyValues].filter((v) => typeof v === "string" || typeof v === "number");
4314
+ if (validKeyValues.length > 0) {
4315
+ json.required = validKeyValues;
4316
+ }
4317
+ }
4318
+ };
4155
4319
  const nullableProcessor = (schema, ctx, json, params) => {
4156
4320
  const def = schema._zod.def;
4157
4321
  const inner = process$1(def.innerType, ctx, params);
@@ -4706,6 +4870,21 @@ function intersection(left, right) {
4706
4870
  right: right,
4707
4871
  });
4708
4872
  }
4873
+ const ZodRecord = /*@__PURE__*/ $constructor("ZodRecord", (inst, def) => {
4874
+ $ZodRecord.init(inst, def);
4875
+ ZodType.init(inst, def);
4876
+ inst._zod.processJSONSchema = (ctx, json, params) => recordProcessor(inst, ctx, json, params);
4877
+ inst.keyType = def.keyType;
4878
+ inst.valueType = def.valueType;
4879
+ });
4880
+ function record(keyType, valueType, params) {
4881
+ return new ZodRecord({
4882
+ type: "record",
4883
+ keyType,
4884
+ valueType: valueType,
4885
+ ...normalizeParams(params),
4886
+ });
4887
+ }
4709
4888
  const ZodEnum = /*@__PURE__*/ $constructor("ZodEnum", (inst, def) => {
4710
4889
  $ZodEnum.init(inst, def);
4711
4890
  ZodType.init(inst, def);
@@ -4943,7 +5122,7 @@ const ZodCustom = /*@__PURE__*/ $constructor("ZodCustom", (inst, def) => {
4943
5122
  inst._zod.processJSONSchema = (ctx, json, params) => customProcessor(inst, ctx);
4944
5123
  });
4945
5124
  function custom(fn, _params) {
4946
- return _custom(ZodCustom, (() => true), _params);
5125
+ return _custom(ZodCustom, fn ?? (() => true), _params);
4947
5126
  }
4948
5127
  function refine(fn, _params = {}) {
4949
5128
  return _refine(ZodCustom, fn, _params);
@@ -4958,6 +5137,19 @@ const optionalPositiveInt = number().int().positive().optional();
4958
5137
  const optionalString = string().optional();
4959
5138
  const selectOptionsSchema = array(nonEmptyString).min(1);
4960
5139
  const optionalNumber = number().optional();
5140
+ const textEvaluationSourceSchema = object({
5141
+ type: literal('text'),
5142
+ });
5143
+ const customEvaluationSourceSchema = object({
5144
+ type: literal('custom'),
5145
+ extractorId: nonEmptyString,
5146
+ });
5147
+ const evaluationSourceExtractorSchema = custom(value => typeof value === 'function', 'Extractor must be a function.');
5148
+ record(string().min(1), evaluationSourceExtractorSchema);
5149
+ const evaluationSourceSchema = discriminatedUnion('type', [
5150
+ textEvaluationSourceSchema,
5151
+ customEvaluationSourceSchema,
5152
+ ]);
4961
5153
  const expectedOutcomeModeSchema = _enum(['static', 'dynamic']);
4962
5154
  const evaluationParametersSchema = object({
4963
5155
  approach: _enum(EvaluationApproach),
@@ -4975,6 +5167,7 @@ const selectEvaluationParametersSchema = evaluationParametersSchema.superRefine(
4975
5167
  const defaultExpectedOutcomeBaseSchema = object({
4976
5168
  label: nonEmptyString,
4977
5169
  placeholder: optionalString,
5170
+ evaluationSource: evaluationSourceSchema.optional(),
4978
5171
  });
4979
5172
  const createDefaultExpectedOutcomeFieldSchemas = (baseSchema) => ({
4980
5173
  text: baseSchema.extend({
@@ -5067,6 +5260,37 @@ function validateExpectedOutcomeSchema(schema) {
5067
5260
  throw new Error(`Invalid expectedOutcomeSchema: ${parsed.error.issues[0].message}`);
5068
5261
  }
5069
5262
  }
5263
+ function validateExpectedOutcomeArrayWithExtractors(expectedOutcome, allowedExtractorIds) {
5264
+ const allowed = new Set(allowedExtractorIds);
5265
+ const schema = expectedOutcomeArraySchema.superRefine((fields, ctx) => {
5266
+ fields.forEach((field, index) => {
5267
+ if (field.evaluationSource?.type !== 'custom') {
5268
+ return;
5269
+ }
5270
+ if (allowed.has(field.evaluationSource.extractorId)) {
5271
+ return;
5272
+ }
5273
+ ctx.addIssue({
5274
+ code: 'custom',
5275
+ path: [index, 'evaluationSource', 'extractorId'],
5276
+ message: `Invalid expectedOutcome: Extractor "${field.evaluationSource.extractorId}" is not registered.`,
5277
+ });
5278
+ });
5279
+ });
5280
+ const parsed = schema.safeParse(expectedOutcome);
5281
+ if (!parsed.success) {
5282
+ throw new Error(parsed.error.issues[0].message);
5283
+ }
5284
+ }
5285
+ function getExtractorIds(extractors) {
5286
+ return Object.keys(extractors || {});
5287
+ }
5288
+
5289
+ const modelResponseMetadataSchema = record(string(), unknown());
5290
+ const modelResponsePayloadSchema = object({
5291
+ text: string().optional(),
5292
+ metadata: modelResponseMetadataSchema.optional(),
5293
+ });
5070
5294
 
5071
5295
  const testCaseChatHistorySchema = object({
5072
5296
  enabled: boolean(),
@@ -5083,8 +5307,8 @@ object({
5083
5307
  id: string(),
5084
5308
  question: string(),
5085
5309
  expectedOutcome: expectedOutcomeArraySchema,
5310
+ output: modelResponsePayloadSchema.optional(),
5086
5311
  chatHistory: testCaseChatHistorySchema,
5087
- output: string().optional(),
5088
5312
  isRunning: boolean().optional(),
5089
5313
  error: string().optional(),
5090
5314
  evaluationResult: custom().optional(),
@@ -5106,10 +5330,15 @@ function validateTestCaseInputArray(data) {
5106
5330
  * @param jsonContent - The JSON string to parse and validate
5107
5331
  * @returns Validation result with test cases or error message
5108
5332
  */
5109
- function importTestSuite(jsonContent) {
5333
+ function importTestSuite(jsonContent, allowedExtractorIds = []) {
5110
5334
  try {
5111
5335
  const parsed = JSON.parse(jsonContent);
5112
5336
  validateTestCaseInputArray(parsed);
5337
+ if (allowedExtractorIds.length > 0) {
5338
+ parsed.forEach((testCase) => {
5339
+ validateExpectedOutcomeArrayWithExtractors(testCase.expectedOutcome, allowedExtractorIds);
5340
+ });
5341
+ }
5113
5342
  const testCases = parsed.map((item, index) => {
5114
5343
  try {
5115
5344
  return createTestCaseFromInput(item);
@@ -5135,7 +5364,7 @@ function importTestSuite(jsonContent) {
5135
5364
  }
5136
5365
 
5137
5366
  const MISSING_RESOLVER_MESSAGE = 'resolveExpectedOutcome is required when a test case has dynamic expected outcomes.';
5138
- function isDynamicTextareaField(field) {
5367
+ function isDynamicTextareaField$1(field) {
5139
5368
  return field.type === 'textarea' && field.outcomeMode === 'dynamic';
5140
5369
  }
5141
5370
  function applyResolvedDynamicValues(testCase, resolvedValues) {
@@ -5145,7 +5374,7 @@ function applyResolvedDynamicValues(testCase, resolvedValues) {
5145
5374
  const expectedOutcome = [...(testCase.expectedOutcome || [])];
5146
5375
  for (const resolved of resolvedValues) {
5147
5376
  const field = expectedOutcome[resolved.index];
5148
- if (!field || !isDynamicTextareaField(field)) {
5377
+ if (!field || !isDynamicTextareaField$1(field)) {
5149
5378
  continue;
5150
5379
  }
5151
5380
  expectedOutcome[resolved.index] = {
@@ -5160,7 +5389,7 @@ function applyResolvedDynamicValues(testCase, resolvedValues) {
5160
5389
  }
5161
5390
  async function resolveDynamicExpectedOutcomes(testCase, resolver) {
5162
5391
  const dynamicFields = (testCase.expectedOutcome || []).flatMap((field, index) => {
5163
- if (!isDynamicTextareaField(field)) {
5392
+ if (!isDynamicTextareaField$1(field)) {
5164
5393
  return [];
5165
5394
  }
5166
5395
  return [{ field, index }];
@@ -5178,6 +5407,15 @@ async function resolveDynamicExpectedOutcomes(testCase, resolver) {
5178
5407
  return applyResolvedDynamicValues(testCase, resolvedValues);
5179
5408
  }
5180
5409
 
5410
+ function isChipsInputField(field) {
5411
+ return field.type === 'chips-input';
5412
+ }
5413
+ function isTextareaField(field) {
5414
+ return field.type === 'textarea';
5415
+ }
5416
+ function isDynamicTextareaField(field) {
5417
+ return isTextareaField(field) && field.outcomeMode === 'dynamic';
5418
+ }
5181
5419
  function applyExpectedOutcomeChange(testCase, change) {
5182
5420
  const { index } = change;
5183
5421
  const expectedOutcome = [...(testCase.expectedOutcome || [])];
@@ -5185,73 +5423,99 @@ function applyExpectedOutcomeChange(testCase, change) {
5185
5423
  if (!target) {
5186
5424
  return testCase;
5187
5425
  }
5426
+ const commit = (updatedField) => {
5427
+ expectedOutcome[index] = updatedField;
5428
+ return { ...testCase, expectedOutcome };
5429
+ };
5188
5430
  switch (change.operation) {
5189
5431
  case 'set-value': {
5190
- if (target.type === 'chips-input') {
5432
+ if (isChipsInputField(target)) {
5191
5433
  return testCase;
5192
5434
  }
5193
- if (target.type === 'textarea' && target.outcomeMode === 'dynamic') {
5435
+ if (isDynamicTextareaField(target)) {
5194
5436
  return testCase;
5195
5437
  }
5196
- expectedOutcome[index] = {
5438
+ return commit({
5197
5439
  ...target,
5198
5440
  value: change.value,
5199
- };
5200
- return { ...testCase, expectedOutcome };
5441
+ });
5201
5442
  }
5202
5443
  case 'add-chip': {
5203
- if (target.type !== 'chips-input') {
5444
+ if (!isChipsInputField(target)) {
5204
5445
  return testCase;
5205
5446
  }
5206
- expectedOutcome[index] = {
5447
+ return commit({
5207
5448
  ...target,
5208
5449
  value: [...target.value, change.value],
5209
- };
5210
- return { ...testCase, expectedOutcome };
5450
+ });
5211
5451
  }
5212
5452
  case 'remove-chip': {
5213
- if (target.type !== 'chips-input') {
5453
+ if (!isChipsInputField(target)) {
5214
5454
  return testCase;
5215
5455
  }
5216
- expectedOutcome[index] = {
5456
+ return commit({
5217
5457
  ...target,
5218
5458
  value: target.value.filter(chip => chip !== change.value),
5219
- };
5220
- return { ...testCase, expectedOutcome };
5459
+ });
5221
5460
  }
5222
5461
  case 'set-evaluation-approach':
5223
5462
  return updateExpectedOutcomeFieldApproach(testCase, index, change.value);
5224
5463
  case 'set-outcome-mode': {
5225
- if (target.type !== 'textarea') {
5464
+ if (!isTextareaField(target)) {
5226
5465
  return testCase;
5227
5466
  }
5228
5467
  const mode = change.value;
5229
5468
  if (mode === 'static') {
5230
5469
  const { resolutionQuery: _, ...rest } = target;
5231
- expectedOutcome[index] = {
5470
+ return commit({
5232
5471
  ...rest,
5233
5472
  outcomeMode: 'static',
5234
5473
  value: '',
5235
- };
5474
+ });
5236
5475
  }
5237
5476
  else {
5238
- expectedOutcome[index] = {
5477
+ return commit({
5239
5478
  ...target,
5240
5479
  outcomeMode: 'dynamic',
5241
5480
  value: '',
5242
- };
5481
+ });
5243
5482
  }
5244
- return { ...testCase, expectedOutcome };
5245
5483
  }
5246
5484
  case 'set-resolution-query': {
5247
- if (target.type !== 'textarea' || target.outcomeMode !== 'dynamic') {
5485
+ if (!isDynamicTextareaField(target)) {
5248
5486
  return testCase;
5249
5487
  }
5250
- expectedOutcome[index] = {
5488
+ return commit({
5251
5489
  ...target,
5252
5490
  resolutionQuery: change.value,
5253
- };
5254
- return { ...testCase, expectedOutcome };
5491
+ });
5492
+ }
5493
+ case 'set-evaluation-source-type': {
5494
+ if (change.value === 'text') {
5495
+ return commit({
5496
+ ...target,
5497
+ evaluationSource: { type: 'text' },
5498
+ });
5499
+ }
5500
+ const extractorId = target.evaluationSource?.type === 'custom'
5501
+ ? target.evaluationSource.extractorId
5502
+ : (change.fallbackExtractorId ?? '');
5503
+ return commit({
5504
+ ...target,
5505
+ evaluationSource: {
5506
+ type: 'custom',
5507
+ extractorId,
5508
+ },
5509
+ });
5510
+ }
5511
+ case 'set-evaluation-source-extractor': {
5512
+ return commit({
5513
+ ...target,
5514
+ evaluationSource: {
5515
+ type: 'custom',
5516
+ extractorId: change.value,
5517
+ },
5518
+ });
5255
5519
  }
5256
5520
  }
5257
5521
  }
@@ -30032,7 +30296,7 @@ class LLMEvaluationEngine {
30032
30296
  const fieldRequest = {
30033
30297
  testCaseId: request.testCaseId,
30034
30298
  question: request.question,
30035
- actualResponse: request.actualResponse,
30299
+ actualResponse: field.actualResponse,
30036
30300
  expectedOutcome: field.expectedValue,
30037
30301
  evaluationParameters: field.evaluationParameters,
30038
30302
  };
@@ -30102,6 +30366,58 @@ class LLMEvaluationEngine {
30102
30366
  }
30103
30367
  }
30104
30368
 
30369
+ function toTextSource() {
30370
+ return { type: 'text' };
30371
+ }
30372
+ async function resolveActualValue(field, output, extractors) {
30373
+ const source = field.evaluationSource || toTextSource();
30374
+ if (source.type === 'text') {
30375
+ const text = output?.text?.trim();
30376
+ if (!text) {
30377
+ return {
30378
+ success: false,
30379
+ error: 'Model response text is empty.',
30380
+ };
30381
+ }
30382
+ return { success: true, value: text };
30383
+ }
30384
+ const extractor = extractors?.[source.extractorId];
30385
+ if (!extractor) {
30386
+ return {
30387
+ success: false,
30388
+ error: `Extractor "${source.extractorId}" is not registered.`,
30389
+ };
30390
+ }
30391
+ try {
30392
+ const extractedRaw = await extractor(output || {});
30393
+ if (typeof extractedRaw !== 'string') {
30394
+ return {
30395
+ success: false,
30396
+ error: `Extractor "${source.extractorId}" must return a string.`,
30397
+ };
30398
+ }
30399
+ const extracted = extractedRaw.trim();
30400
+ if (!extracted) {
30401
+ return {
30402
+ success: false,
30403
+ error: `Extractor "${source.extractorId}" returned an empty value.`,
30404
+ };
30405
+ }
30406
+ return {
30407
+ success: true,
30408
+ value: extracted,
30409
+ };
30410
+ }
30411
+ catch (error) {
30412
+ return {
30413
+ success: false,
30414
+ error: error instanceof Error
30415
+ ? error.message
30416
+ : `Extractor "${source.extractorId}" failed.`,
30417
+ };
30418
+ }
30419
+ }
30420
+
30105
30421
  /**
30106
30422
  * Service for evaluating test case responses
30107
30423
  */
@@ -30115,34 +30431,71 @@ class EvaluationService {
30115
30431
  * @param testCase - The test case to evaluate
30116
30432
  * @param onResult - Callback to handle the evaluation result
30117
30433
  */
30118
- async evaluateTestCase(testCase, onResult) {
30119
- if (!testCase.output) {
30120
- console.warn('⚠️ No output to evaluate for test case:', testCase.id);
30121
- return;
30122
- }
30123
- const fields = (testCase.expectedOutcome || []).flatMap((field, index) => {
30434
+ async evaluateTestCase(testCase, onResult, extractors) {
30435
+ const fields = [];
30436
+ const failedFields = [];
30437
+ for (const [index, field] of (testCase.expectedOutcome || []).entries()) {
30124
30438
  if (field.type === 'textarea' && field.outcomeMode === 'dynamic') {
30125
- return [];
30439
+ continue;
30126
30440
  }
30127
- return [
30128
- {
30441
+ const evaluationParameters = normalizeEvaluationParametersForField(field.type, field.evaluationParameters);
30442
+ const expectedValue = getFieldExpectedValue(field);
30443
+ const resolvedActualValue = await resolveActualValue(field, testCase.output, extractors);
30444
+ if (resolvedActualValue.success) {
30445
+ fields.push({
30129
30446
  index,
30130
30447
  label: field.label,
30131
30448
  type: field.type,
30132
- expectedValue: getFieldExpectedValue(field),
30133
- evaluationParameters: normalizeEvaluationParametersForField(field.type, field.evaluationParameters),
30134
- },
30135
- ];
30136
- });
30449
+ expectedValue,
30450
+ actualResponse: resolvedActualValue.value,
30451
+ evaluationParameters,
30452
+ });
30453
+ }
30454
+ else {
30455
+ failedFields.push({
30456
+ index,
30457
+ label: field.label,
30458
+ type: field.type,
30459
+ expectedValue,
30460
+ passed: false,
30461
+ keywordMatches: [],
30462
+ evaluationParameters,
30463
+ evaluationApproachResult: {
30464
+ score: 0,
30465
+ approachUsed: evaluationParameters.approach,
30466
+ },
30467
+ error: 'error' in resolvedActualValue
30468
+ ? resolvedActualValue.error
30469
+ : 'Failed to resolve actual value.',
30470
+ });
30471
+ }
30472
+ }
30473
+ if (fields.length === 0) {
30474
+ if (failedFields.length === 0) {
30475
+ console.warn('⚠️ No evaluable fields for test case:', testCase.id);
30476
+ return;
30477
+ }
30478
+ onResult({
30479
+ testCaseId: testCase.id,
30480
+ passed: false,
30481
+ keywordMatches: [],
30482
+ fieldResults: failedFields,
30483
+ timestamp: new Date().toISOString(),
30484
+ });
30485
+ return;
30486
+ }
30137
30487
  const evaluationRequest = {
30138
30488
  testCaseId: testCase.id,
30139
30489
  question: testCase.question,
30140
- actualResponse: testCase.output,
30141
30490
  fields,
30142
30491
  };
30143
30492
  await this.engine.evaluateResponse(evaluationRequest, (result) => {
30144
- console.log('📊 Evaluation result received:', result);
30145
- onResult(result);
30493
+ const combinedResults = [...(result.fieldResults || []), ...failedFields].sort((a, b) => a.index - b.index);
30494
+ onResult({
30495
+ ...result,
30496
+ passed: combinedResults.every(field => field.passed && !field.error),
30497
+ fieldResults: combinedResults,
30498
+ });
30146
30499
  });
30147
30500
  }
30148
30501
  }
@@ -30190,7 +30543,7 @@ const LLMTestRunnerHeader = ({ isExportingTestSuite, isExportingTestResults, isR
30190
30543
  };
30191
30544
 
30192
30545
  const ResponseOutput = ({ output, isRunning, }) => {
30193
- return (h("div", { class: "response-output" }, output ? (h("div", { class: "response-output__content" }, output)) : (h("div", { class: "response-output__placeholder" }, isRunning ? 'Running...' : ''))));
30546
+ return (h("div", { class: "response-output" }, output?.text ? (h("div", { class: "response-output__content" }, output.text)) : (h("div", { class: "response-output__placeholder" }, isRunning ? 'Running...' : ''))));
30194
30547
  };
30195
30548
 
30196
30549
  const EvaluationSummary = ({ result, isRunning, }) => {
@@ -30228,7 +30581,9 @@ var FormFieldType;
30228
30581
  FormFieldType["SELECT"] = "select";
30229
30582
  })(FormFieldType || (FormFieldType = {}));
30230
30583
 
30231
- const ExpectedOutcomeRenderer = ({ testCaseId, fields, dynamicResolutionSupported = false, onExpectedOutcomeChange, }) => {
30584
+ const ExpectedOutcomeRenderer = ({ testCaseId, fields, dynamicResolutionSupported = false, extractorIds = [], onExpectedOutcomeChange, }) => {
30585
+ const hasExtractorOptions = extractorIds.length > 0;
30586
+ const firstExtractorId = extractorIds[0];
30232
30587
  const emit = (detail) => onExpectedOutcomeChange({
30233
30588
  detail,
30234
30589
  });
@@ -30258,6 +30613,23 @@ const ExpectedOutcomeRenderer = ({ testCaseId, fields, dynamicResolutionSupporte
30258
30613
  required: false,
30259
30614
  rows: 2,
30260
30615
  });
30616
+ const buildEvaluationSourceConfig = (index) => ({
30617
+ name: `expectedOutcomeEvaluationSource-${index}`,
30618
+ fieldType: FormFieldType.SELECT,
30619
+ label: 'Evaluation Source',
30620
+ placeholder: 'Select evaluation source',
30621
+ required: true,
30622
+ optionList: ['text', 'custom'],
30623
+ defaultValue: 'text',
30624
+ });
30625
+ const buildExtractorConfig = (index) => ({
30626
+ name: `expectedOutcomeEvaluationSourceExtractor-${index}`,
30627
+ fieldType: FormFieldType.SELECT,
30628
+ label: 'Extractor',
30629
+ placeholder: 'Select extractor',
30630
+ required: true,
30631
+ optionList: extractorIds,
30632
+ });
30261
30633
  const renderEvaluationSelector = (field, index) => {
30262
30634
  const optionList = getAllowedApproachesForFieldType(field.type);
30263
30635
  return (h("app-select", { config: buildEvaluationConfig(index, optionList), value: field.evaluationParameters?.approach, onValueChange: (e) => emit({
@@ -30267,6 +30639,27 @@ const ExpectedOutcomeRenderer = ({ testCaseId, fields, dynamicResolutionSupporte
30267
30639
  value: e.detail.value,
30268
30640
  }) }));
30269
30641
  };
30642
+ const renderEvaluationSourceSelector = (field, index) => {
30643
+ if (!hasExtractorOptions) {
30644
+ return null;
30645
+ }
30646
+ const sourceType = field.evaluationSource?.type || 'text';
30647
+ return (h("div", null, h("app-select", { config: buildEvaluationSourceConfig(index), value: sourceType, onValueChange: (e) => emit({
30648
+ testCaseId,
30649
+ index,
30650
+ operation: 'set-evaluation-source-type',
30651
+ value: e.detail.value,
30652
+ fallbackExtractorId: firstExtractorId,
30653
+ }) }), sourceType === 'custom' && (h("app-select", { config: buildExtractorConfig(index), value: field.evaluationSource?.type === 'custom'
30654
+ ? field.evaluationSource.extractorId
30655
+ : '', onValueChange: (e) => emit({
30656
+ testCaseId,
30657
+ index,
30658
+ operation: 'set-evaluation-source-extractor',
30659
+ value: e.detail.value,
30660
+ }) }))));
30661
+ };
30662
+ const renderEvaluationOptions = (field, index) => (h("details", { class: "expected-outcome-renderer__options" }, h("summary", { class: "expected-outcome-renderer__options-summary" }, "More options"), h("div", { class: "expected-outcome-renderer__options-content" }, renderEvaluationSelector(field, index), renderEvaluationSourceSelector(field, index))));
30270
30663
  return (h("div", { class: "expected-outcome-renderer" }, (fields || []).map((field, index) => {
30271
30664
  if (field.type === 'textarea') {
30272
30665
  const isDynamic = dynamicResolutionSupported && field.outcomeMode === 'dynamic';
@@ -30298,7 +30691,7 @@ const ExpectedOutcomeRenderer = ({ testCaseId, fields, dynamicResolutionSupporte
30298
30691
  index,
30299
30692
  operation: 'set-resolution-query',
30300
30693
  value: e.detail.value,
30301
- }) })), !isDynamic && renderEvaluationSelector(field, index)));
30694
+ }) })), !isDynamic && renderEvaluationOptions(field, index)));
30302
30695
  }
30303
30696
  if (field.type === 'chips-input') {
30304
30697
  const config = {
@@ -30318,7 +30711,7 @@ const ExpectedOutcomeRenderer = ({ testCaseId, fields, dynamicResolutionSupporte
30318
30711
  index,
30319
30712
  operation: 'remove-chip',
30320
30713
  value: e.detail.value,
30321
- }) }), renderEvaluationSelector(field, index)));
30714
+ }) }), renderEvaluationOptions(field, index)));
30322
30715
  }
30323
30716
  if (field.type === 'select') {
30324
30717
  const config = {
@@ -30334,18 +30727,18 @@ const ExpectedOutcomeRenderer = ({ testCaseId, fields, dynamicResolutionSupporte
30334
30727
  index,
30335
30728
  operation: 'set-value',
30336
30729
  value: e.detail.value,
30337
- }) }), renderEvaluationSelector(field, index)));
30730
+ }) }), renderEvaluationOptions(field, index)));
30338
30731
  }
30339
30732
  return (h("div", { class: "expected-outcome-renderer__group" }, h("div", { class: "expected-outcome-renderer__text" }, h("label", null, field.label), h("input", { type: "text", value: field.value, placeholder: field.placeholder, onInput: (e) => emit({
30340
30733
  testCaseId,
30341
30734
  index,
30342
30735
  operation: 'set-value',
30343
30736
  value: e.target.value,
30344
- }) })), renderEvaluationSelector(field, index)));
30737
+ }) })), renderEvaluationOptions(field, index)));
30345
30738
  })));
30346
30739
  };
30347
30740
 
30348
- const LLMTestCaseRow = ({ testCase, dynamicResolutionSupported = false, onRun, onDelete, handleTestCaseChange, onExpectedOutcomeChange, onChatHistoryChange, }) => {
30741
+ const LLMTestCaseRow = ({ testCase, dynamicResolutionSupported = false, extractorIds = [], onRun, onDelete, handleTestCaseChange, onExpectedOutcomeChange, onChatHistoryChange, }) => {
30349
30742
  const questionConfig = {
30350
30743
  name: 'question',
30351
30744
  fieldType: FormFieldType.TEXT_AREA,
@@ -30371,11 +30764,11 @@ const LLMTestCaseRow = ({ testCase, dynamicResolutionSupported = false, onRun, o
30371
30764
  value,
30372
30765
  },
30373
30766
  });
30374
- } }), h(ExpectedOutcomeRenderer, { testCaseId: testCase.id, fields: testCase.expectedOutcome || [], dynamicResolutionSupported: dynamicResolutionSupported, onExpectedOutcomeChange: onExpectedOutcomeChange })), h(ResponseOutput, { output: testCase.output, isRunning: testCase.isRunning }), h(EvaluationSummary, { result: testCase.evaluationResult, isRunning: testCase.isRunning }), h(RowActions, { isRunning: testCase.isRunning, canRun: !!testCase.question.trim(), onRun: () => onRun(testCase), onDelete: () => onDelete(testCase.id) })));
30767
+ } }), h(ExpectedOutcomeRenderer, { testCaseId: testCase.id, fields: testCase.expectedOutcome || [], dynamicResolutionSupported: dynamicResolutionSupported, extractorIds: extractorIds, onExpectedOutcomeChange: onExpectedOutcomeChange })), h(ResponseOutput, { output: testCase.output, isRunning: testCase.isRunning }), h(EvaluationSummary, { result: testCase.evaluationResult, isRunning: testCase.isRunning }), h(RowActions, { isRunning: testCase.isRunning, canRun: !!testCase.question.trim(), onRun: () => onRun(testCase), onDelete: () => onDelete(testCase.id) })));
30375
30768
  };
30376
30769
 
30377
- const LLMTestCases = ({ testCases, dynamicResolutionSupported = false, onRun, onDelete, onAddTestCase, handleTestCaseChange, onExpectedOutcomeChange, onChatHistoryChange, }) => {
30378
- return (h("div", { class: "test-cases" }, h("div", { class: "test-cases__column-headers" }, h("div", { class: "test-cases__column-header" }, "Input"), h("div", { class: "test-cases__column-header" }, "Output"), h("div", { class: "test-cases__column-header" }, "Evaluation"), h("div", { class: "test-cases__column-header" }, "Actions")), testCases.map(testCase => (h(LLMTestCaseRow, { testCase: testCase, dynamicResolutionSupported: dynamicResolutionSupported, onRun: onRun, onDelete: onDelete, handleTestCaseChange: handleTestCaseChange, onExpectedOutcomeChange: onExpectedOutcomeChange, onChatHistoryChange: onChatHistoryChange }))), h("div", { class: "test-cases__add-section" }, h(Button, { variant: "outline", size: "md", onClick: onAddTestCase }, "+ Add Question"))));
30770
+ const LLMTestCases = ({ testCases, dynamicResolutionSupported = false, extractorIds = [], onRun, onDelete, onAddTestCase, handleTestCaseChange, onExpectedOutcomeChange, onChatHistoryChange, }) => {
30771
+ return (h("div", { class: "test-cases" }, h("div", { class: "test-cases__column-headers" }, h("div", { class: "test-cases__column-header" }, "Input"), h("div", { class: "test-cases__column-header" }, "Output"), h("div", { class: "test-cases__column-header" }, "Evaluation"), h("div", { class: "test-cases__column-header" }, "Actions")), testCases.map(testCase => (h(LLMTestCaseRow, { testCase: testCase, dynamicResolutionSupported: dynamicResolutionSupported, extractorIds: extractorIds, onRun: onRun, onDelete: onDelete, handleTestCaseChange: handleTestCaseChange, onExpectedOutcomeChange: onExpectedOutcomeChange, onChatHistoryChange: onChatHistoryChange }))), h("div", { class: "test-cases__add-section" }, h(Button, { variant: "outline", size: "md", onClick: onAddTestCase }, "+ Add Question"))));
30379
30772
  };
30380
30773
 
30381
30774
  const tokensCss = () => `:host{--spacing:0.25rem;--spacing-1:calc(var(--spacing) * 1);--spacing-2:calc(var(--spacing) * 2);--spacing-3:calc(var(--spacing) * 3);--spacing-4:calc(var(--spacing) * 4);--spacing-5:calc(var(--spacing) * 5);--spacing-6:calc(var(--spacing) * 6);--spacing-8:calc(var(--spacing) * 8);--spacing-10:calc(var(--spacing) * 10);--spacing-12:calc(var(--spacing) * 12);--spacing-16:calc(var(--spacing) * 16);--spacing-20:calc(var(--spacing) * 20);--spacing-24:calc(var(--spacing) * 24);--radius-none:0;--radius-sm:0.125rem;--radius-md:0.375rem;--radius-lg:0.5rem;--radius-xl:0.75rem;--radius-2xl:1rem;--radius-3xl:1.5rem;--radius-full:9999px;--radius:var(--radius-lg);--font-size-xs:0.75rem;--font-size-sm:0.875rem;--font-size-base:1rem;--font-size-lg:1.125rem;--font-size-xl:1.25rem;--font-size-2xl:1.5rem;--font-size-3xl:1.875rem;--font-size-4xl:2.25rem;--font-weight-normal:400;--font-weight-medium:500;--font-weight-semibold:600;--font-weight-bold:700;--line-height-none:1;--line-height-tight:1.25;--line-height-snug:1.375;--line-height-normal:1.5;--line-height-relaxed:1.625;--line-height-loose:2;--letter-spacing-tight:-0.025em;--letter-spacing-normal:0;--letter-spacing-wide:0.05em;--shadow-sm:0 1px 2px 0 rgba(0, 0, 0, 0.05);--shadow-md:0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06);--shadow-lg:0 10px 15px -3px rgba(0, 0, 0, 0.1), 0 4px 6px -2px rgba(0, 0, 0, 0.05);--shadow-xl:0 20px 25px -5px rgba(0, 0, 0, 0.1), 0 10px 10px -5px rgba(0, 0, 0, 0.04);--shadow-2xl:0 25px 50px -12px rgba(0, 0, 0, 0.25);--border-width:1px;--z-base:0;--z-dropdown:1000;--z-sticky:1100;--z-modal:1200;--z-popover:1300;--z-tooltip:1400;--opacity-disabled:0.5;--opacity-hover:0.8;--opacity-muted:0.6;--max-w-sm:24rem;--max-w-md:28rem;--max-w-lg:32rem;--max-w-xl:42rem;--max-w-2xl:48rem;--max-w-full:100%;--breakpoint-sm:640px;--breakpoint-md:768px;--breakpoint-lg:1024px;--breakpoint-xl:1280px;--breakpoint-2xl:1536px;--background:#ffffff;--foreground:#0a0a0a;--card:#ffffff;--card-foreground:#0a0a0a;--popover:#ffffff;--popover-foreground:#0a0a0a;--primary:#0a0a0a;--primary-foreground:#fafafa;--secondary:#f4f4f5;--secondary-foreground:#0a0a0a;--muted:#f4f4f5;--muted-foreground:#71717a;--accent:#f4f4f5;--accent-foreground:#0a0a0a;--destructive:#ef4444;--destructive-foreground:#fafafa;--border:#e4e4e7;--input:#e4e4e7;--ring:#3b82f6;--success:#10b981;--success-foreground:#fafafa;--warning:#f59e0b;--warning-foreground:#fafafa;--info:#3b82f6;--info-foreground:#fafafa}:host([data-theme='dark']){--background:#0a0a0a;--foreground:#fafafa;--card:#171717;--card-foreground:#fafafa;--popover:#171717;--popover-foreground:#fafafa;--primary:#fafafa;--primary-foreground:#0a0a0a;--secondary:#27272a;--secondary-foreground:#fafafa;--muted:#27272a;--muted-foreground:#a1a1aa;--accent:#27272a;--accent-foreground:#fafafa;--destructive:#dc2626;--destructive-foreground:#fafafa;--border:#27272a;--input:#27272a;--ring:#3b82f6;--success:#059669;--success-foreground:#fafafa;--warning:#d97706;--warning-foreground:#fafafa;--info:#2563eb;--info-foreground:#fafafa}`;
@@ -30386,7 +30779,7 @@ const llmTestRunnerHeaderCss = () => `.test-runner-header{display:flex;justify-c
30386
30779
 
30387
30780
  const llmTestCasesCss = () => `.test-cases{background:var(--background)}.test-cases__column-headers{display:grid;grid-template-columns:1fr 1.5fr 0.5fr 120px;gap:var(--border-width);background:var(--border);border-bottom:2px solid var(--border)}.test-cases__column-header{background:var(--muted);padding:var(--spacing-4) var(--spacing-5);font-weight:var(--font-weight-semibold);color:var(--foreground);font-size:var(--font-size-sm);text-transform:uppercase;letter-spacing:var(--letter-spacing-wide)}.test-cases__add-section{padding:var(--spacing-6);text-align:center;background:var(--muted);border-top:var(--border-width) solid var(--border)}@media (max-width: 1200px){.test-cases__column-headers{display:none}}`;
30388
30781
 
30389
- const llmTestCaseRowCss = () => `.test-case-row{display:grid;grid-template-columns:1fr 1.5fr 0.5fr 120px;gap:var(--border-width);border-bottom:var(--border-width) solid var(--border);min-height:200px}.test-case-row:hover{background:var(--muted)}.test-case-row__input-column{padding:var(--spacing-5);background:var(--background);border-right:var(--border-width) solid var(--border)}.expected-outcome-renderer{display:flex;flex-direction:column;gap:var(--spacing-4);margin-top:var(--spacing-4)}.expected-outcome-renderer__group{display:flex;flex-direction:column;gap:var(--spacing-2);padding:var(--spacing-3);border:var(--border-width) solid var(--border);border-radius:var(--radius-md);background:var(--background)}@media (max-width: 1200px){.test-case-row{grid-template-columns:1fr;gap:0}.test-case-row__input-column{border-right:none;border-bottom:var(--border-width) solid var(--border)}}@media (max-width: 768px){.test-case-row__input-column{padding:var(--spacing-4)}.test-case-row{min-height:auto}}`;
30782
+ const llmTestCaseRowCss = () => `.test-case-row{display:grid;grid-template-columns:1fr 1.5fr 0.5fr 120px;gap:var(--border-width);border-bottom:var(--border-width) solid var(--border);min-height:200px}.test-case-row:hover{background:var(--muted)}.test-case-row__input-column{padding:var(--spacing-5);background:var(--background);border-right:var(--border-width) solid var(--border)}.expected-outcome-renderer{display:flex;flex-direction:column;gap:var(--spacing-4);margin-top:var(--spacing-4)}.expected-outcome-renderer__group{display:flex;flex-direction:column;gap:var(--spacing-2);padding:var(--spacing-3);border:var(--border-width) solid var(--border);border-radius:var(--radius-md);background:var(--background)}.expected-outcome-renderer__options{border:var(--border-width) solid var(--border);border-radius:var(--radius-sm);background:var(--muted)}.expected-outcome-renderer__options-summary{cursor:pointer;font-size:var(--font-size-sm);color:var(--foreground);padding:var(--spacing-2) var(--spacing-3);user-select:none}.expected-outcome-renderer__options-content{display:flex;flex-direction:column;gap:var(--spacing-2);padding:0 var(--spacing-3) var(--spacing-3)}@media (max-width: 1200px){.test-case-row{grid-template-columns:1fr;gap:0}.test-case-row__input-column{border-right:none;border-bottom:var(--border-width) solid var(--border)}}@media (max-width: 768px){.test-case-row__input-column{padding:var(--spacing-4)}.test-case-row{min-height:auto}}`;
30390
30783
 
30391
30784
  const rowActionsCss = () => `.row-actions{height:100%;padding:var(--spacing-5);background:var(--background);display:flex;flex-direction:column;gap:var(--spacing-3);align-items:center;justify-content:flex-start;align-self:flex-start}@media (max-width: 1200px){.row-actions{border-right:none;border-bottom:var(--border-width) solid var(--border);flex-direction:row;justify-content:center}}@media (max-width: 768px){.row-actions{padding:var(--spacing-4)}}`;
30392
30785
 
@@ -30412,6 +30805,7 @@ const LLMTestRunner = class {
30412
30805
  useSave = false;
30413
30806
  usePromptEditor = false;
30414
30807
  resolveExpectedOutcome;
30808
+ evaluationSourceExtractors;
30415
30809
  initialTestCases;
30416
30810
  defaultExpectedOutcomeSchema;
30417
30811
  testCases = [
@@ -30448,6 +30842,12 @@ const LLMTestRunner = class {
30448
30842
  // Initialize testCases from prop if provided
30449
30843
  if (this.initialTestCases !== undefined) {
30450
30844
  validateTestCaseInputArray(this.initialTestCases);
30845
+ const extractorIds = getExtractorIds(this.evaluationSourceExtractors);
30846
+ if (extractorIds.length > 0) {
30847
+ this.initialTestCases.forEach(testCase => {
30848
+ validateExpectedOutcomeArrayWithExtractors(testCase.expectedOutcome, extractorIds);
30849
+ });
30850
+ }
30451
30851
  this.testCases = this.initialTestCases.map((rawTestCase, index) => {
30452
30852
  try {
30453
30853
  return createTestCaseFromInput(rawTestCase);
@@ -30471,8 +30871,6 @@ const LLMTestRunner = class {
30471
30871
  this.testCases = [];
30472
30872
  }
30473
30873
  }
30474
- componentDidLoad() { }
30475
- disconnectedCallback() { }
30476
30874
  async resetSavingState() {
30477
30875
  this.isSaving = false;
30478
30876
  }
@@ -30505,7 +30903,7 @@ const LLMTestRunner = class {
30505
30903
  updateTestCase(id, updates) {
30506
30904
  this.testCases = this.testCases.map(tc => tc.id === id ? { ...tc, ...updates } : tc);
30507
30905
  }
30508
- requestLlmText(testCase) {
30906
+ requestLlmResponse(testCase) {
30509
30907
  return new Promise((resolve, reject) => {
30510
30908
  const payload = {
30511
30909
  prompt: testCase.question,
@@ -30528,14 +30926,14 @@ const LLMTestRunner = class {
30528
30926
  const startTime = Date.now();
30529
30927
  this.updateTestCase(testCase.id, { isRunning: true });
30530
30928
  const [llmSettled, resolutionSettled] = await Promise.allSettled([
30531
- this.requestLlmText(testCase),
30929
+ this.requestLlmResponse(testCase),
30532
30930
  resolveDynamicExpectedOutcomes(testCase, this.resolveExpectedOutcome),
30533
30931
  ]);
30534
30932
  const responseTime = Date.now() - startTime;
30535
30933
  if (llmSettled.status === 'rejected') {
30536
30934
  this.updateTestCase(testCase.id, {
30537
30935
  isRunning: false,
30538
- output: null,
30936
+ output: undefined,
30539
30937
  error: this.addErrorMessage(llmSettled.reason, 'Unknown error'),
30540
30938
  responseTime,
30541
30939
  });
@@ -30583,7 +30981,7 @@ const LLMTestRunner = class {
30583
30981
  this.updateTestCase(testCase.id, {
30584
30982
  evaluationResult: result,
30585
30983
  });
30586
- });
30984
+ }, this.evaluationSourceExtractors);
30587
30985
  }
30588
30986
  async runAllTests() {
30589
30987
  this.isRunningAll = true;
@@ -30614,7 +31012,7 @@ const LLMTestRunner = class {
30614
31012
  this.error = '';
30615
31013
  try {
30616
31014
  const content = await readFileAsync(file);
30617
- const result = importTestSuite(content);
31015
+ const result = importTestSuite(content, getExtractorIds(this.evaluationSourceExtractors));
30618
31016
  if (!result.success) {
30619
31017
  this.error = result.error || 'Unknown error occurred during import.';
30620
31018
  return;
@@ -30675,7 +31073,7 @@ const LLMTestRunner = class {
30675
31073
  }
30676
31074
  }
30677
31075
  render() {
30678
- return (h("div", { key: 'cc808096f929b2e1c570c53144aab195d177c187', class: "test-runner-container" }, h(LLMTestRunnerHeader, { key: 'b91cf3df7df0e95bfd4908a2f91c7310b5b7a09a', isExportingTestSuite: this.isExportingTestSuite, isExportingTestResults: this.isExportingTestResults, isRunningAll: this.isRunningAll, useSave: this.useSave, isSaving: this.isSaving, usePromptEditor: this.usePromptEditor, onImport: file => this.handleImport(file), onExportSuite: () => this.handleExportTestSuite(), onExportResults: () => this.handleExportTestResults(), onRunAll: () => this.runAllTests(), onSave: () => this.handleSave() }), h(ErrorMessage, { key: 'c7991497173fa9843e7aa42f5283d0897ddff2e2', message: this.error, onClear: () => (this.error = '') }), h("div", { key: '2b57132564442b8047d8672c6adcba62cdc9ae87', class: "test-runner-container__content" }, h(LLMTestCases, { key: '146e9d8c76a34980a2a274dd856887c22e1ed0e9', testCases: this.testCases, dynamicResolutionSupported: !!this.resolveExpectedOutcome, onRun: testCase => this.runSingleTest(testCase).catch(() => { }), onDelete: id => this.deleteTestCase(id), onAddTestCase: () => this.addNewTestCase(), handleTestCaseChange: this.handleTestCaseChange, onExpectedOutcomeChange: this.handleExpectedOutcomeChange, onChatHistoryChange: this.handleChatHistoryChange }))));
31076
+ return (h("div", { key: '7433beaa1d60d48f65600c43e11b302b892a7bca', class: "test-runner-container" }, h(LLMTestRunnerHeader, { key: '8083cc39376e7a710bd3f52efb184b959e885a87', isExportingTestSuite: this.isExportingTestSuite, isExportingTestResults: this.isExportingTestResults, isRunningAll: this.isRunningAll, useSave: this.useSave, isSaving: this.isSaving, usePromptEditor: this.usePromptEditor, onImport: file => this.handleImport(file), onExportSuite: () => this.handleExportTestSuite(), onExportResults: () => this.handleExportTestResults(), onRunAll: () => this.runAllTests(), onSave: () => this.handleSave() }), h(ErrorMessage, { key: 'ddced98c13cd595c4cfb6eef11b27cb173769518', message: this.error, onClear: () => (this.error = '') }), h("div", { key: '8d6f65c4d68d34869b644709eacb97fec93683c6', class: "test-runner-container__content" }, h(LLMTestCases, { key: '5ccb186132b23af6209209b0a14086e03cf790af', testCases: this.testCases, dynamicResolutionSupported: !!this.resolveExpectedOutcome, extractorIds: getExtractorIds(this.evaluationSourceExtractors), onRun: testCase => this.runSingleTest(testCase).catch(() => { }), onDelete: id => this.deleteTestCase(id), onAddTestCase: () => this.addNewTestCase(), handleTestCaseChange: this.handleTestCaseChange, onExpectedOutcomeChange: this.handleExpectedOutcomeChange, onChatHistoryChange: this.handleChatHistoryChange }))));
30679
31077
  }
30680
31078
  };
30681
31079
  LLMTestRunner.style = tokensCss() + (llmTestRunnerCss() + (llmTestRunnerHeaderCss() + (llmTestCasesCss() + (llmTestCaseRowCss() + (rowActionsCss() + (evaluationSummaryCss() + (responseOutputCss() + (errorMessageCss() + (buttonCss() + iconButtonCss())))))))));