llm-testrunner-components 1.2.3 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. package/README.md +9 -5
  2. package/dist/cjs/{app-chips_4.cjs.entry.js → app-chips_5.cjs.entry.js} +38 -6
  3. package/dist/cjs/app-chips_5.cjs.entry.js.map +1 -0
  4. package/dist/cjs/index.cjs.js +499 -68
  5. package/dist/cjs/index.cjs.js.map +1 -1
  6. package/dist/cjs/llm-testrunner.cjs.js +1 -1
  7. package/dist/cjs/loader.cjs.js +1 -1
  8. package/dist/collection/collection-manifest.json +1 -0
  9. package/dist/collection/components/llm-test-runner/llm-test-runner.import-export.test.js +22 -12
  10. package/dist/collection/components/llm-test-runner/llm-test-runner.import-export.test.js.map +1 -1
  11. package/dist/collection/components/llm-test-runner/llm-test-runner.js +59 -15
  12. package/dist/collection/components/llm-test-runner/llm-test-runner.js.map +1 -1
  13. package/dist/collection/components/llm-test-runner/test-cases/chat-history.css +101 -0
  14. package/dist/collection/components/llm-test-runner/test-cases/chat-history.js +105 -0
  15. package/dist/collection/components/llm-test-runner/test-cases/chat-history.js.map +1 -0
  16. package/dist/collection/components/llm-test-runner/test-cases/expected-outcome-renderer.js +45 -5
  17. package/dist/collection/components/llm-test-runner/test-cases/expected-outcome-renderer.js.map +1 -1
  18. package/dist/collection/components/llm-test-runner/test-cases/llm-test-case-row.css +21 -0
  19. package/dist/collection/components/llm-test-runner/test-cases/llm-test-case-row.js +12 -2
  20. package/dist/collection/components/llm-test-runner/test-cases/llm-test-case-row.js.map +1 -1
  21. package/dist/collection/components/llm-test-runner/test-cases/llm-test-cases.js +2 -2
  22. package/dist/collection/components/llm-test-runner/test-cases/llm-test-cases.js.map +1 -1
  23. package/dist/collection/components/llm-test-runner/test-cases/output/response-output.js +1 -1
  24. package/dist/collection/components/llm-test-runner/test-cases/output/response-output.js.map +1 -1
  25. package/dist/collection/index.js.map +1 -1
  26. package/dist/collection/lib/evaluation/actual-value-resolver.js +52 -0
  27. package/dist/collection/lib/evaluation/actual-value-resolver.js.map +1 -0
  28. package/dist/collection/lib/evaluation/evaluation-engine.js +1 -1
  29. package/dist/collection/lib/evaluation/evaluation-engine.js.map +1 -1
  30. package/dist/collection/lib/evaluation/evaluation-service.js +55 -17
  31. package/dist/collection/lib/evaluation/evaluation-service.js.map +1 -1
  32. package/dist/collection/lib/evaluation/types.js.map +1 -1
  33. package/dist/collection/lib/form/components/app-chips.js +1 -1
  34. package/dist/collection/lib/form/components/app-select.js +1 -1
  35. package/dist/collection/lib/form/components/app-textarea.js +2 -2
  36. package/dist/collection/lib/import-export/test-suite-exporter.js +4 -0
  37. package/dist/collection/lib/import-export/test-suite-exporter.js.map +1 -1
  38. package/dist/collection/lib/import-export/test-suite-importer.js +7 -1
  39. package/dist/collection/lib/import-export/test-suite-importer.js.map +1 -1
  40. package/dist/collection/lib/test-cases/test-case-factory.js +7 -0
  41. package/dist/collection/lib/test-cases/test-case-factory.js.map +1 -1
  42. package/dist/collection/lib/test-cases/test-case-mutations.js +58 -23
  43. package/dist/collection/lib/test-cases/test-case-mutations.js.map +1 -1
  44. package/dist/collection/schemas/expected-outcome.js +39 -0
  45. package/dist/collection/schemas/expected-outcome.js.map +1 -1
  46. package/dist/collection/schemas/model-response.js +7 -0
  47. package/dist/collection/schemas/model-response.js.map +1 -0
  48. package/dist/collection/schemas/test-case.js +8 -1
  49. package/dist/collection/schemas/test-case.js.map +1 -1
  50. package/dist/collection/types/expected-outcome.js.map +1 -1
  51. package/dist/collection/types/llm-test-runner.js.map +1 -1
  52. package/dist/collection/types/test-case.js.map +1 -1
  53. package/dist/components/app-chips.js +1 -1
  54. package/dist/components/app-select.js +1 -1
  55. package/dist/components/app-textarea.js +1 -1
  56. package/dist/components/chat-history.d.ts +11 -0
  57. package/dist/components/chat-history.js +2 -0
  58. package/dist/components/chat-history.js.map +1 -0
  59. package/dist/components/index.js +1 -1
  60. package/dist/components/llm-test-runner.js +1 -1
  61. package/dist/components/{p-CVtKFBJl.js → p-D2qDAxFN.js} +2 -2
  62. package/dist/components/{p-Dv7cB5FU.js → p-D4dHUFN9.js} +2 -2
  63. package/dist/components/{p-CE5-1jfZ.js → p-eN2dLrsr.js} +2 -2
  64. package/dist/components/p-kmtfMXcQ.js +2 -0
  65. package/dist/components/p-kmtfMXcQ.js.map +1 -0
  66. package/dist/components/{p-BcygfrMf.js → p-wzA48RFK.js} +3 -3
  67. package/dist/components/p-wzA48RFK.js.map +1 -0
  68. package/dist/esm/{app-chips_4.entry.js → app-chips_5.entry.js} +38 -7
  69. package/dist/esm/app-chips_5.entry.js.map +1 -0
  70. package/dist/esm/index.js +499 -68
  71. package/dist/esm/index.js.map +1 -1
  72. package/dist/esm/llm-testrunner.js +1 -1
  73. package/dist/esm/loader.js +1 -1
  74. package/dist/llm-testrunner/index.esm.js +2 -2
  75. package/dist/llm-testrunner/index.esm.js.map +1 -1
  76. package/dist/llm-testrunner/llm-testrunner.esm.js +1 -1
  77. package/dist/llm-testrunner/p-5bf1fc78.entry.js +2 -0
  78. package/dist/llm-testrunner/p-5bf1fc78.entry.js.map +1 -0
  79. package/dist/react/components.d.ts +6 -1
  80. package/dist/react/components.d.ts.map +1 -1
  81. package/dist/react/components.js +9 -0
  82. package/dist/types/components/llm-test-runner/llm-test-runner.d.ts +4 -4
  83. package/dist/types/components/llm-test-runner/test-cases/chat-history.d.ts +14 -0
  84. package/dist/types/components/llm-test-runner/test-cases/expected-outcome-renderer.d.ts +1 -0
  85. package/dist/types/components/llm-test-runner/test-cases/llm-test-case-row.d.ts +6 -0
  86. package/dist/types/components/llm-test-runner/test-cases/llm-test-cases.d.ts +3 -0
  87. package/dist/types/components/llm-test-runner/test-cases/output/response-output.d.ts +2 -1
  88. package/dist/types/components.d.ts +55 -2
  89. package/dist/types/index.d.ts +1 -1
  90. package/dist/types/lib/evaluation/actual-value-resolver.d.ts +9 -0
  91. package/dist/types/lib/evaluation/evaluation-service.d.ts +2 -2
  92. package/dist/types/lib/evaluation/types.d.ts +1 -1
  93. package/dist/types/lib/import-export/test-suite-exporter.d.ts +4 -0
  94. package/dist/types/lib/import-export/test-suite-importer.d.ts +1 -1
  95. package/dist/types/lib/test-cases/test-case-mutations.d.ts +10 -1
  96. package/dist/types/schemas/expected-outcome.d.ts +116 -0
  97. package/dist/types/schemas/model-response.d.ts +7 -0
  98. package/dist/types/schemas/test-case.d.ts +93 -1
  99. package/dist/types/types/expected-outcome.d.ts +1 -1
  100. package/dist/types/types/llm-test-runner.d.ts +6 -3
  101. package/dist/types/types/test-case.d.ts +1 -1
  102. package/package.json +1 -1
  103. package/dist/cjs/app-chips_4.cjs.entry.js.map +0 -1
  104. package/dist/components/p-BcygfrMf.js.map +0 -1
  105. package/dist/esm/app-chips_4.entry.js.map +0 -1
  106. package/dist/llm-testrunner/p-5df053b4.entry.js +0 -2
  107. package/dist/llm-testrunner/p-5df053b4.entry.js.map +0 -1
  108. /package/dist/components/{p-CVtKFBJl.js.map → p-D2qDAxFN.js.map} +0 -0
  109. /package/dist/components/{p-Dv7cB5FU.js.map → p-D4dHUFN9.js.map} +0 -0
  110. /package/dist/components/{p-CE5-1jfZ.js.map → p-eN2dLrsr.js.map} +0 -0
@@ -106,6 +106,10 @@ function formatTestSuiteAsJson(testCases) {
106
106
  id: testCase.id,
107
107
  question: testCase.question,
108
108
  expectedOutcome: testCase.expectedOutcome,
109
+ chatHistory: {
110
+ enabled: testCase.chatHistory.enabled,
111
+ value: testCase.chatHistory.value,
112
+ },
109
113
  }));
110
114
  return JSON.stringify(exportData, null, 2);
111
115
  }
@@ -291,6 +295,7 @@ const DEFAULT_EXPECTED_OUTCOME_SCHEMA = [
291
295
  function normalizeExpectedOutcomeField(field) {
292
296
  return {
293
297
  ...field,
298
+ evaluationSource: field.evaluationSource || { type: 'text' },
294
299
  evaluationParameters: normalizeEvaluationParametersForField(field.type, field.evaluationParameters),
295
300
  };
296
301
  }
@@ -303,6 +308,7 @@ function createTestCase(expectedOutcomeSchema = DEFAULT_EXPECTED_OUTCOME_SCHEMA)
303
308
  id: v4(),
304
309
  question: '',
305
310
  expectedOutcome: createExpectedOutcomeFromSchema(expectedOutcomeSchema),
311
+ chatHistory: { enabled: false, value: '' },
306
312
  isRunning: false,
307
313
  };
308
314
  }
@@ -313,6 +319,7 @@ function createExpectedOutcomeFieldFromSchema(schemaField) {
313
319
  type: 'text',
314
320
  label: schemaField.label,
315
321
  placeholder: schemaField.placeholder,
322
+ evaluationSource: schemaField.evaluationSource || { type: 'text' },
316
323
  value: '',
317
324
  evaluationParameters: normalizeEvaluationParametersForField(schemaField.type, schemaField.evaluationParameters),
318
325
  };
@@ -321,6 +328,7 @@ function createExpectedOutcomeFieldFromSchema(schemaField) {
321
328
  type: 'textarea',
322
329
  label: schemaField.label,
323
330
  placeholder: schemaField.placeholder,
331
+ evaluationSource: schemaField.evaluationSource || { type: 'text' },
324
332
  rows: schemaField.rows,
325
333
  value: '',
326
334
  evaluationParameters: normalizeEvaluationParametersForField(schemaField.type, schemaField.evaluationParameters),
@@ -330,6 +338,7 @@ function createExpectedOutcomeFieldFromSchema(schemaField) {
330
338
  type: 'chips-input',
331
339
  label: schemaField.label,
332
340
  placeholder: schemaField.placeholder,
341
+ evaluationSource: schemaField.evaluationSource || { type: 'text' },
333
342
  value: [],
334
343
  evaluationParameters: normalizeEvaluationParametersForField(schemaField.type, schemaField.evaluationParameters),
335
344
  };
@@ -338,6 +347,7 @@ function createExpectedOutcomeFieldFromSchema(schemaField) {
338
347
  type: 'select',
339
348
  label: schemaField.label,
340
349
  placeholder: schemaField.placeholder,
350
+ evaluationSource: schemaField.evaluationSource || { type: 'text' },
341
351
  value: schemaField.options[0],
342
352
  options: schemaField.options,
343
353
  evaluationParameters: normalizeEvaluationParametersForField(schemaField.type, schemaField.evaluationParameters),
@@ -362,6 +372,7 @@ function createExpectedOutcomeFromSchema(expectedOutcomeSchema) {
362
372
  function createTestCaseFromInput(data) {
363
373
  return {
364
374
  ...data,
375
+ chatHistory: data.chatHistory ?? { enabled: false, value: '' },
365
376
  expectedOutcome: data.expectedOutcome.map(normalizeExpectedOutcomeField),
366
377
  };
367
378
  }
@@ -2566,6 +2577,122 @@ function handleIntersectionResults(result, left, right) {
2566
2577
  result.value = merged.data;
2567
2578
  return result;
2568
2579
  }
2580
+ const $ZodRecord = /*@__PURE__*/ $constructor("$ZodRecord", (inst, def) => {
2581
+ $ZodType.init(inst, def);
2582
+ inst._zod.parse = (payload, ctx) => {
2583
+ const input = payload.value;
2584
+ if (!isPlainObject(input)) {
2585
+ payload.issues.push({
2586
+ expected: "record",
2587
+ code: "invalid_type",
2588
+ input,
2589
+ inst,
2590
+ });
2591
+ return payload;
2592
+ }
2593
+ const proms = [];
2594
+ const values = def.keyType._zod.values;
2595
+ if (values) {
2596
+ payload.value = {};
2597
+ const recordKeys = new Set();
2598
+ for (const key of values) {
2599
+ if (typeof key === "string" || typeof key === "number" || typeof key === "symbol") {
2600
+ recordKeys.add(typeof key === "number" ? key.toString() : key);
2601
+ const result = def.valueType._zod.run({ value: input[key], issues: [] }, ctx);
2602
+ if (result instanceof Promise) {
2603
+ proms.push(result.then((result) => {
2604
+ if (result.issues.length) {
2605
+ payload.issues.push(...prefixIssues(key, result.issues));
2606
+ }
2607
+ payload.value[key] = result.value;
2608
+ }));
2609
+ }
2610
+ else {
2611
+ if (result.issues.length) {
2612
+ payload.issues.push(...prefixIssues(key, result.issues));
2613
+ }
2614
+ payload.value[key] = result.value;
2615
+ }
2616
+ }
2617
+ }
2618
+ let unrecognized;
2619
+ for (const key in input) {
2620
+ if (!recordKeys.has(key)) {
2621
+ unrecognized = unrecognized ?? [];
2622
+ unrecognized.push(key);
2623
+ }
2624
+ }
2625
+ if (unrecognized && unrecognized.length > 0) {
2626
+ payload.issues.push({
2627
+ code: "unrecognized_keys",
2628
+ input,
2629
+ inst,
2630
+ keys: unrecognized,
2631
+ });
2632
+ }
2633
+ }
2634
+ else {
2635
+ payload.value = {};
2636
+ for (const key of Reflect.ownKeys(input)) {
2637
+ if (key === "__proto__")
2638
+ continue;
2639
+ let keyResult = def.keyType._zod.run({ value: key, issues: [] }, ctx);
2640
+ if (keyResult instanceof Promise) {
2641
+ throw new Error("Async schemas not supported in object keys currently");
2642
+ }
2643
+ // Numeric string fallback: if key is a numeric string and failed, retry with Number(key)
2644
+ // This handles z.number(), z.literal([1, 2, 3]), and unions containing numeric literals
2645
+ const checkNumericKey = typeof key === "string" && number$1.test(key) && keyResult.issues.length;
2646
+ if (checkNumericKey) {
2647
+ const retryResult = def.keyType._zod.run({ value: Number(key), issues: [] }, ctx);
2648
+ if (retryResult instanceof Promise) {
2649
+ throw new Error("Async schemas not supported in object keys currently");
2650
+ }
2651
+ if (retryResult.issues.length === 0) {
2652
+ keyResult = retryResult;
2653
+ }
2654
+ }
2655
+ if (keyResult.issues.length) {
2656
+ if (def.mode === "loose") {
2657
+ // Pass through unchanged
2658
+ payload.value[key] = input[key];
2659
+ }
2660
+ else {
2661
+ // Default "strict" behavior: error on invalid key
2662
+ payload.issues.push({
2663
+ code: "invalid_key",
2664
+ origin: "record",
2665
+ issues: keyResult.issues.map((iss) => finalizeIssue(iss, ctx, config())),
2666
+ input: key,
2667
+ path: [key],
2668
+ inst,
2669
+ });
2670
+ }
2671
+ continue;
2672
+ }
2673
+ const result = def.valueType._zod.run({ value: input[key], issues: [] }, ctx);
2674
+ if (result instanceof Promise) {
2675
+ proms.push(result.then((result) => {
2676
+ if (result.issues.length) {
2677
+ payload.issues.push(...prefixIssues(key, result.issues));
2678
+ }
2679
+ payload.value[keyResult.value] = result.value;
2680
+ }));
2681
+ }
2682
+ else {
2683
+ if (result.issues.length) {
2684
+ payload.issues.push(...prefixIssues(key, result.issues));
2685
+ }
2686
+ payload.value[keyResult.value] = result.value;
2687
+ }
2688
+ }
2689
+ }
2690
+ if (proms.length) {
2691
+ return Promise.all(proms).then(() => payload);
2692
+ }
2693
+ return payload;
2694
+ };
2695
+ });
2569
2696
  const $ZodEnum = /*@__PURE__*/ $constructor("$ZodEnum", (inst, def) => {
2570
2697
  $ZodType.init(inst, def);
2571
2698
  const values = getEnumValues(def.entries);
@@ -4149,6 +4276,49 @@ const intersectionProcessor = (schema, ctx, json, params) => {
4149
4276
  ];
4150
4277
  json.allOf = allOf;
4151
4278
  };
4279
+ const recordProcessor = (schema, ctx, _json, params) => {
4280
+ const json = _json;
4281
+ const def = schema._zod.def;
4282
+ json.type = "object";
4283
+ // For looseRecord with regex patterns, use patternProperties
4284
+ // This correctly represents "only validate keys matching the pattern" semantics
4285
+ // and composes well with allOf (intersections)
4286
+ const keyType = def.keyType;
4287
+ const keyBag = keyType._zod.bag;
4288
+ const patterns = keyBag?.patterns;
4289
+ if (def.mode === "loose" && patterns && patterns.size > 0) {
4290
+ // Use patternProperties for looseRecord with regex patterns
4291
+ const valueSchema = process$1(def.valueType, ctx, {
4292
+ ...params,
4293
+ path: [...params.path, "patternProperties", "*"],
4294
+ });
4295
+ json.patternProperties = {};
4296
+ for (const pattern of patterns) {
4297
+ json.patternProperties[pattern.source] = valueSchema;
4298
+ }
4299
+ }
4300
+ else {
4301
+ // Default behavior: use propertyNames + additionalProperties
4302
+ if (ctx.target === "draft-07" || ctx.target === "draft-2020-12") {
4303
+ json.propertyNames = process$1(def.keyType, ctx, {
4304
+ ...params,
4305
+ path: [...params.path, "propertyNames"],
4306
+ });
4307
+ }
4308
+ json.additionalProperties = process$1(def.valueType, ctx, {
4309
+ ...params,
4310
+ path: [...params.path, "additionalProperties"],
4311
+ });
4312
+ }
4313
+ // Add required for keys with discrete values (enum, literal, etc.)
4314
+ const keyValues = keyType._zod.values;
4315
+ if (keyValues) {
4316
+ const validKeyValues = [...keyValues].filter((v) => typeof v === "string" || typeof v === "number");
4317
+ if (validKeyValues.length > 0) {
4318
+ json.required = validKeyValues;
4319
+ }
4320
+ }
4321
+ };
4152
4322
  const nullableProcessor = (schema, ctx, json, params) => {
4153
4323
  const def = schema._zod.def;
4154
4324
  const inner = process$1(def.innerType, ctx, params);
@@ -4703,6 +4873,21 @@ function intersection(left, right) {
4703
4873
  right: right,
4704
4874
  });
4705
4875
  }
4876
+ const ZodRecord = /*@__PURE__*/ $constructor("ZodRecord", (inst, def) => {
4877
+ $ZodRecord.init(inst, def);
4878
+ ZodType.init(inst, def);
4879
+ inst._zod.processJSONSchema = (ctx, json, params) => recordProcessor(inst, ctx, json, params);
4880
+ inst.keyType = def.keyType;
4881
+ inst.valueType = def.valueType;
4882
+ });
4883
+ function record(keyType, valueType, params) {
4884
+ return new ZodRecord({
4885
+ type: "record",
4886
+ keyType,
4887
+ valueType: valueType,
4888
+ ...normalizeParams(params),
4889
+ });
4890
+ }
4706
4891
  const ZodEnum = /*@__PURE__*/ $constructor("ZodEnum", (inst, def) => {
4707
4892
  $ZodEnum.init(inst, def);
4708
4893
  ZodType.init(inst, def);
@@ -4940,7 +5125,7 @@ const ZodCustom = /*@__PURE__*/ $constructor("ZodCustom", (inst, def) => {
4940
5125
  inst._zod.processJSONSchema = (ctx, json, params) => customProcessor(inst, ctx);
4941
5126
  });
4942
5127
  function custom(fn, _params) {
4943
- return _custom(ZodCustom, (() => true), _params);
5128
+ return _custom(ZodCustom, fn ?? (() => true), _params);
4944
5129
  }
4945
5130
  function refine(fn, _params = {}) {
4946
5131
  return _refine(ZodCustom, fn, _params);
@@ -4955,6 +5140,19 @@ const optionalPositiveInt = number().int().positive().optional();
4955
5140
  const optionalString = string().optional();
4956
5141
  const selectOptionsSchema = array(nonEmptyString).min(1);
4957
5142
  const optionalNumber = number().optional();
5143
+ const textEvaluationSourceSchema = object({
5144
+ type: literal('text'),
5145
+ });
5146
+ const customEvaluationSourceSchema = object({
5147
+ type: literal('custom'),
5148
+ extractorId: nonEmptyString,
5149
+ });
5150
+ const evaluationSourceExtractorSchema = custom(value => typeof value === 'function', 'Extractor must be a function.');
5151
+ record(string().min(1), evaluationSourceExtractorSchema);
5152
+ const evaluationSourceSchema = discriminatedUnion('type', [
5153
+ textEvaluationSourceSchema,
5154
+ customEvaluationSourceSchema,
5155
+ ]);
4958
5156
  const expectedOutcomeModeSchema = _enum(['static', 'dynamic']);
4959
5157
  const evaluationParametersSchema = object({
4960
5158
  approach: _enum(EvaluationApproach),
@@ -4972,6 +5170,7 @@ const selectEvaluationParametersSchema = evaluationParametersSchema.superRefine(
4972
5170
  const defaultExpectedOutcomeBaseSchema = object({
4973
5171
  label: nonEmptyString,
4974
5172
  placeholder: optionalString,
5173
+ evaluationSource: evaluationSourceSchema.optional(),
4975
5174
  });
4976
5175
  const createDefaultExpectedOutcomeFieldSchemas = (baseSchema) => ({
4977
5176
  text: baseSchema.extend({
@@ -5064,18 +5263,55 @@ function validateExpectedOutcomeSchema(schema) {
5064
5263
  throw new Error(`Invalid expectedOutcomeSchema: ${parsed.error.issues[0].message}`);
5065
5264
  }
5066
5265
  }
5266
+ function validateExpectedOutcomeArrayWithExtractors(expectedOutcome, allowedExtractorIds) {
5267
+ const allowed = new Set(allowedExtractorIds);
5268
+ const schema = expectedOutcomeArraySchema.superRefine((fields, ctx) => {
5269
+ fields.forEach((field, index) => {
5270
+ if (field.evaluationSource?.type !== 'custom') {
5271
+ return;
5272
+ }
5273
+ if (allowed.has(field.evaluationSource.extractorId)) {
5274
+ return;
5275
+ }
5276
+ ctx.addIssue({
5277
+ code: 'custom',
5278
+ path: [index, 'evaluationSource', 'extractorId'],
5279
+ message: `Invalid expectedOutcome: Extractor "${field.evaluationSource.extractorId}" is not registered.`,
5280
+ });
5281
+ });
5282
+ });
5283
+ const parsed = schema.safeParse(expectedOutcome);
5284
+ if (!parsed.success) {
5285
+ throw new Error(parsed.error.issues[0].message);
5286
+ }
5287
+ }
5288
+ function getExtractorIds(extractors) {
5289
+ return Object.keys(extractors || {});
5290
+ }
5067
5291
 
5292
+ const modelResponseMetadataSchema = record(string(), unknown());
5293
+ const modelResponsePayloadSchema = object({
5294
+ text: string().optional(),
5295
+ metadata: modelResponseMetadataSchema.optional(),
5296
+ });
5297
+
5298
+ const testCaseChatHistorySchema = object({
5299
+ enabled: boolean(),
5300
+ value: string(),
5301
+ });
5068
5302
  const testCaseInputSchema = object({
5069
5303
  id: string(),
5070
5304
  question: string(),
5071
5305
  expectedOutcome: expectedOutcomeArraySchema,
5306
+ chatHistory: testCaseChatHistorySchema.optional(),
5072
5307
  });
5073
5308
  const testCaseInputArraySchema = array(testCaseInputSchema);
5074
5309
  object({
5075
5310
  id: string(),
5076
5311
  question: string(),
5077
5312
  expectedOutcome: expectedOutcomeArraySchema,
5078
- output: string().optional(),
5313
+ output: modelResponsePayloadSchema.optional(),
5314
+ chatHistory: testCaseChatHistorySchema,
5079
5315
  isRunning: boolean().optional(),
5080
5316
  error: string().optional(),
5081
5317
  evaluationResult: custom().optional(),
@@ -5097,10 +5333,15 @@ function validateTestCaseInputArray(data) {
5097
5333
  * @param jsonContent - The JSON string to parse and validate
5098
5334
  * @returns Validation result with test cases or error message
5099
5335
  */
5100
- function importTestSuite(jsonContent) {
5336
+ function importTestSuite(jsonContent, allowedExtractorIds = []) {
5101
5337
  try {
5102
5338
  const parsed = JSON.parse(jsonContent);
5103
5339
  validateTestCaseInputArray(parsed);
5340
+ if (allowedExtractorIds.length > 0) {
5341
+ parsed.forEach((testCase) => {
5342
+ validateExpectedOutcomeArrayWithExtractors(testCase.expectedOutcome, allowedExtractorIds);
5343
+ });
5344
+ }
5104
5345
  const testCases = parsed.map((item, index) => {
5105
5346
  try {
5106
5347
  return createTestCaseFromInput(item);
@@ -5126,7 +5367,7 @@ function importTestSuite(jsonContent) {
5126
5367
  }
5127
5368
 
5128
5369
  const MISSING_RESOLVER_MESSAGE = 'resolveExpectedOutcome is required when a test case has dynamic expected outcomes.';
5129
- function isDynamicTextareaField(field) {
5370
+ function isDynamicTextareaField$1(field) {
5130
5371
  return field.type === 'textarea' && field.outcomeMode === 'dynamic';
5131
5372
  }
5132
5373
  function applyResolvedDynamicValues(testCase, resolvedValues) {
@@ -5136,7 +5377,7 @@ function applyResolvedDynamicValues(testCase, resolvedValues) {
5136
5377
  const expectedOutcome = [...(testCase.expectedOutcome || [])];
5137
5378
  for (const resolved of resolvedValues) {
5138
5379
  const field = expectedOutcome[resolved.index];
5139
- if (!field || !isDynamicTextareaField(field)) {
5380
+ if (!field || !isDynamicTextareaField$1(field)) {
5140
5381
  continue;
5141
5382
  }
5142
5383
  expectedOutcome[resolved.index] = {
@@ -5151,7 +5392,7 @@ function applyResolvedDynamicValues(testCase, resolvedValues) {
5151
5392
  }
5152
5393
  async function resolveDynamicExpectedOutcomes(testCase, resolver) {
5153
5394
  const dynamicFields = (testCase.expectedOutcome || []).flatMap((field, index) => {
5154
- if (!isDynamicTextareaField(field)) {
5395
+ if (!isDynamicTextareaField$1(field)) {
5155
5396
  return [];
5156
5397
  }
5157
5398
  return [{ field, index }];
@@ -5169,6 +5410,15 @@ async function resolveDynamicExpectedOutcomes(testCase, resolver) {
5169
5410
  return applyResolvedDynamicValues(testCase, resolvedValues);
5170
5411
  }
5171
5412
 
5413
+ function isChipsInputField(field) {
5414
+ return field.type === 'chips-input';
5415
+ }
5416
+ function isTextareaField(field) {
5417
+ return field.type === 'textarea';
5418
+ }
5419
+ function isDynamicTextareaField(field) {
5420
+ return isTextareaField(field) && field.outcomeMode === 'dynamic';
5421
+ }
5172
5422
  function applyExpectedOutcomeChange(testCase, change) {
5173
5423
  const { index } = change;
5174
5424
  const expectedOutcome = [...(testCase.expectedOutcome || [])];
@@ -5176,73 +5426,99 @@ function applyExpectedOutcomeChange(testCase, change) {
5176
5426
  if (!target) {
5177
5427
  return testCase;
5178
5428
  }
5429
+ const commit = (updatedField) => {
5430
+ expectedOutcome[index] = updatedField;
5431
+ return { ...testCase, expectedOutcome };
5432
+ };
5179
5433
  switch (change.operation) {
5180
5434
  case 'set-value': {
5181
- if (target.type === 'chips-input') {
5435
+ if (isChipsInputField(target)) {
5182
5436
  return testCase;
5183
5437
  }
5184
- if (target.type === 'textarea' && target.outcomeMode === 'dynamic') {
5438
+ if (isDynamicTextareaField(target)) {
5185
5439
  return testCase;
5186
5440
  }
5187
- expectedOutcome[index] = {
5441
+ return commit({
5188
5442
  ...target,
5189
5443
  value: change.value,
5190
- };
5191
- return { ...testCase, expectedOutcome };
5444
+ });
5192
5445
  }
5193
5446
  case 'add-chip': {
5194
- if (target.type !== 'chips-input') {
5447
+ if (!isChipsInputField(target)) {
5195
5448
  return testCase;
5196
5449
  }
5197
- expectedOutcome[index] = {
5450
+ return commit({
5198
5451
  ...target,
5199
5452
  value: [...target.value, change.value],
5200
- };
5201
- return { ...testCase, expectedOutcome };
5453
+ });
5202
5454
  }
5203
5455
  case 'remove-chip': {
5204
- if (target.type !== 'chips-input') {
5456
+ if (!isChipsInputField(target)) {
5205
5457
  return testCase;
5206
5458
  }
5207
- expectedOutcome[index] = {
5459
+ return commit({
5208
5460
  ...target,
5209
5461
  value: target.value.filter(chip => chip !== change.value),
5210
- };
5211
- return { ...testCase, expectedOutcome };
5462
+ });
5212
5463
  }
5213
5464
  case 'set-evaluation-approach':
5214
5465
  return updateExpectedOutcomeFieldApproach(testCase, index, change.value);
5215
5466
  case 'set-outcome-mode': {
5216
- if (target.type !== 'textarea') {
5467
+ if (!isTextareaField(target)) {
5217
5468
  return testCase;
5218
5469
  }
5219
5470
  const mode = change.value;
5220
5471
  if (mode === 'static') {
5221
5472
  const { resolutionQuery: _, ...rest } = target;
5222
- expectedOutcome[index] = {
5473
+ return commit({
5223
5474
  ...rest,
5224
5475
  outcomeMode: 'static',
5225
5476
  value: '',
5226
- };
5477
+ });
5227
5478
  }
5228
5479
  else {
5229
- expectedOutcome[index] = {
5480
+ return commit({
5230
5481
  ...target,
5231
5482
  outcomeMode: 'dynamic',
5232
5483
  value: '',
5233
- };
5484
+ });
5234
5485
  }
5235
- return { ...testCase, expectedOutcome };
5236
5486
  }
5237
5487
  case 'set-resolution-query': {
5238
- if (target.type !== 'textarea' || target.outcomeMode !== 'dynamic') {
5488
+ if (!isDynamicTextareaField(target)) {
5239
5489
  return testCase;
5240
5490
  }
5241
- expectedOutcome[index] = {
5491
+ return commit({
5242
5492
  ...target,
5243
5493
  resolutionQuery: change.value,
5244
- };
5245
- return { ...testCase, expectedOutcome };
5494
+ });
5495
+ }
5496
+ case 'set-evaluation-source-type': {
5497
+ if (change.value === 'text') {
5498
+ return commit({
5499
+ ...target,
5500
+ evaluationSource: { type: 'text' },
5501
+ });
5502
+ }
5503
+ const extractorId = target.evaluationSource?.type === 'custom'
5504
+ ? target.evaluationSource.extractorId
5505
+ : (change.fallbackExtractorId ?? '');
5506
+ return commit({
5507
+ ...target,
5508
+ evaluationSource: {
5509
+ type: 'custom',
5510
+ extractorId,
5511
+ },
5512
+ });
5513
+ }
5514
+ case 'set-evaluation-source-extractor': {
5515
+ return commit({
5516
+ ...target,
5517
+ evaluationSource: {
5518
+ type: 'custom',
5519
+ extractorId: change.value,
5520
+ },
5521
+ });
5246
5522
  }
5247
5523
  }
5248
5524
  }
@@ -30023,7 +30299,7 @@ class LLMEvaluationEngine {
30023
30299
  const fieldRequest = {
30024
30300
  testCaseId: request.testCaseId,
30025
30301
  question: request.question,
30026
- actualResponse: request.actualResponse,
30302
+ actualResponse: field.actualResponse,
30027
30303
  expectedOutcome: field.expectedValue,
30028
30304
  evaluationParameters: field.evaluationParameters,
30029
30305
  };
@@ -30093,6 +30369,58 @@ class LLMEvaluationEngine {
30093
30369
  }
30094
30370
  }
30095
30371
 
30372
+ function toTextSource() {
30373
+ return { type: 'text' };
30374
+ }
30375
+ async function resolveActualValue(field, output, extractors) {
30376
+ const source = field.evaluationSource || toTextSource();
30377
+ if (source.type === 'text') {
30378
+ const text = output?.text?.trim();
30379
+ if (!text) {
30380
+ return {
30381
+ success: false,
30382
+ error: 'Model response text is empty.',
30383
+ };
30384
+ }
30385
+ return { success: true, value: text };
30386
+ }
30387
+ const extractor = extractors?.[source.extractorId];
30388
+ if (!extractor) {
30389
+ return {
30390
+ success: false,
30391
+ error: `Extractor "${source.extractorId}" is not registered.`,
30392
+ };
30393
+ }
30394
+ try {
30395
+ const extractedRaw = await extractor(output || {});
30396
+ if (typeof extractedRaw !== 'string') {
30397
+ return {
30398
+ success: false,
30399
+ error: `Extractor "${source.extractorId}" must return a string.`,
30400
+ };
30401
+ }
30402
+ const extracted = extractedRaw.trim();
30403
+ if (!extracted) {
30404
+ return {
30405
+ success: false,
30406
+ error: `Extractor "${source.extractorId}" returned an empty value.`,
30407
+ };
30408
+ }
30409
+ return {
30410
+ success: true,
30411
+ value: extracted,
30412
+ };
30413
+ }
30414
+ catch (error) {
30415
+ return {
30416
+ success: false,
30417
+ error: error instanceof Error
30418
+ ? error.message
30419
+ : `Extractor "${source.extractorId}" failed.`,
30420
+ };
30421
+ }
30422
+ }
30423
+
30096
30424
  /**
30097
30425
  * Service for evaluating test case responses
30098
30426
  */
@@ -30106,34 +30434,71 @@ class EvaluationService {
30106
30434
  * @param testCase - The test case to evaluate
30107
30435
  * @param onResult - Callback to handle the evaluation result
30108
30436
  */
30109
- async evaluateTestCase(testCase, onResult) {
30110
- if (!testCase.output) {
30111
- console.warn('⚠️ No output to evaluate for test case:', testCase.id);
30112
- return;
30113
- }
30114
- const fields = (testCase.expectedOutcome || []).flatMap((field, index) => {
30437
+ async evaluateTestCase(testCase, onResult, extractors) {
30438
+ const fields = [];
30439
+ const failedFields = [];
30440
+ for (const [index, field] of (testCase.expectedOutcome || []).entries()) {
30115
30441
  if (field.type === 'textarea' && field.outcomeMode === 'dynamic') {
30116
- return [];
30442
+ continue;
30117
30443
  }
30118
- return [
30119
- {
30444
+ const evaluationParameters = normalizeEvaluationParametersForField(field.type, field.evaluationParameters);
30445
+ const expectedValue = getFieldExpectedValue(field);
30446
+ const resolvedActualValue = await resolveActualValue(field, testCase.output, extractors);
30447
+ if (resolvedActualValue.success) {
30448
+ fields.push({
30120
30449
  index,
30121
30450
  label: field.label,
30122
30451
  type: field.type,
30123
- expectedValue: getFieldExpectedValue(field),
30124
- evaluationParameters: normalizeEvaluationParametersForField(field.type, field.evaluationParameters),
30125
- },
30126
- ];
30127
- });
30452
+ expectedValue,
30453
+ actualResponse: resolvedActualValue.value,
30454
+ evaluationParameters,
30455
+ });
30456
+ }
30457
+ else {
30458
+ failedFields.push({
30459
+ index,
30460
+ label: field.label,
30461
+ type: field.type,
30462
+ expectedValue,
30463
+ passed: false,
30464
+ keywordMatches: [],
30465
+ evaluationParameters,
30466
+ evaluationApproachResult: {
30467
+ score: 0,
30468
+ approachUsed: evaluationParameters.approach,
30469
+ },
30470
+ error: 'error' in resolvedActualValue
30471
+ ? resolvedActualValue.error
30472
+ : 'Failed to resolve actual value.',
30473
+ });
30474
+ }
30475
+ }
30476
+ if (fields.length === 0) {
30477
+ if (failedFields.length === 0) {
30478
+ console.warn('⚠️ No evaluable fields for test case:', testCase.id);
30479
+ return;
30480
+ }
30481
+ onResult({
30482
+ testCaseId: testCase.id,
30483
+ passed: false,
30484
+ keywordMatches: [],
30485
+ fieldResults: failedFields,
30486
+ timestamp: new Date().toISOString(),
30487
+ });
30488
+ return;
30489
+ }
30128
30490
  const evaluationRequest = {
30129
30491
  testCaseId: testCase.id,
30130
30492
  question: testCase.question,
30131
- actualResponse: testCase.output,
30132
30493
  fields,
30133
30494
  };
30134
30495
  await this.engine.evaluateResponse(evaluationRequest, (result) => {
30135
- console.log('📊 Evaluation result received:', result);
30136
- onResult(result);
30496
+ const combinedResults = [...(result.fieldResults || []), ...failedFields].sort((a, b) => a.index - b.index);
30497
+ onResult({
30498
+ ...result,
30499
+ passed: combinedResults.every(field => field.passed && !field.error),
30500
+ fieldResults: combinedResults,
30501
+ });
30137
30502
  });
30138
30503
  }
30139
30504
  }
@@ -30181,7 +30546,7 @@ const LLMTestRunnerHeader = ({ isExportingTestSuite, isExportingTestResults, isR
30181
30546
  };
30182
30547
 
30183
30548
  const ResponseOutput = ({ output, isRunning, }) => {
30184
- return (index.h("div", { class: "response-output" }, output ? (index.h("div", { class: "response-output__content" }, output)) : (index.h("div", { class: "response-output__placeholder" }, isRunning ? 'Running...' : ''))));
30549
+ return (index.h("div", { class: "response-output" }, output?.text ? (index.h("div", { class: "response-output__content" }, output.text)) : (index.h("div", { class: "response-output__placeholder" }, isRunning ? 'Running...' : ''))));
30185
30550
  };
30186
30551
 
30187
30552
  const EvaluationSummary = ({ result, isRunning, }) => {
@@ -30219,7 +30584,9 @@ var FormFieldType;
30219
30584
  FormFieldType["SELECT"] = "select";
30220
30585
  })(FormFieldType || (FormFieldType = {}));
30221
30586
 
30222
- const ExpectedOutcomeRenderer = ({ testCaseId, fields, dynamicResolutionSupported = false, onExpectedOutcomeChange, }) => {
30587
+ const ExpectedOutcomeRenderer = ({ testCaseId, fields, dynamicResolutionSupported = false, extractorIds = [], onExpectedOutcomeChange, }) => {
30588
+ const hasExtractorOptions = extractorIds.length > 0;
30589
+ const firstExtractorId = extractorIds[0];
30223
30590
  const emit = (detail) => onExpectedOutcomeChange({
30224
30591
  detail,
30225
30592
  });
@@ -30249,6 +30616,23 @@ const ExpectedOutcomeRenderer = ({ testCaseId, fields, dynamicResolutionSupporte
30249
30616
  required: false,
30250
30617
  rows: 2,
30251
30618
  });
30619
+ const buildEvaluationSourceConfig = (index) => ({
30620
+ name: `expectedOutcomeEvaluationSource-${index}`,
30621
+ fieldType: FormFieldType.SELECT,
30622
+ label: 'Evaluation Source',
30623
+ placeholder: 'Select evaluation source',
30624
+ required: true,
30625
+ optionList: ['text', 'custom'],
30626
+ defaultValue: 'text',
30627
+ });
30628
+ const buildExtractorConfig = (index) => ({
30629
+ name: `expectedOutcomeEvaluationSourceExtractor-${index}`,
30630
+ fieldType: FormFieldType.SELECT,
30631
+ label: 'Extractor',
30632
+ placeholder: 'Select extractor',
30633
+ required: true,
30634
+ optionList: extractorIds,
30635
+ });
30252
30636
  const renderEvaluationSelector = (field, index$1) => {
30253
30637
  const optionList = getAllowedApproachesForFieldType(field.type);
30254
30638
  return (index.h("app-select", { config: buildEvaluationConfig(index$1, optionList), value: field.evaluationParameters?.approach, onValueChange: (e) => emit({
@@ -30258,6 +30642,27 @@ const ExpectedOutcomeRenderer = ({ testCaseId, fields, dynamicResolutionSupporte
30258
30642
  value: e.detail.value,
30259
30643
  }) }));
30260
30644
  };
30645
+ const renderEvaluationSourceSelector = (field, index$1) => {
30646
+ if (!hasExtractorOptions) {
30647
+ return null;
30648
+ }
30649
+ const sourceType = field.evaluationSource?.type || 'text';
30650
+ return (index.h("div", null, index.h("app-select", { config: buildEvaluationSourceConfig(index$1), value: sourceType, onValueChange: (e) => emit({
30651
+ testCaseId,
30652
+ index: index$1,
30653
+ operation: 'set-evaluation-source-type',
30654
+ value: e.detail.value,
30655
+ fallbackExtractorId: firstExtractorId,
30656
+ }) }), sourceType === 'custom' && (index.h("app-select", { config: buildExtractorConfig(index$1), value: field.evaluationSource?.type === 'custom'
30657
+ ? field.evaluationSource.extractorId
30658
+ : '', onValueChange: (e) => emit({
30659
+ testCaseId,
30660
+ index: index$1,
30661
+ operation: 'set-evaluation-source-extractor',
30662
+ value: e.detail.value,
30663
+ }) }))));
30664
+ };
30665
+ const renderEvaluationOptions = (field, index$1) => (index.h("details", { class: "expected-outcome-renderer__options" }, index.h("summary", { class: "expected-outcome-renderer__options-summary" }, "More options"), index.h("div", { class: "expected-outcome-renderer__options-content" }, renderEvaluationSelector(field, index$1), renderEvaluationSourceSelector(field, index$1))));
30261
30666
  return (index.h("div", { class: "expected-outcome-renderer" }, (fields || []).map((field, index$1) => {
30262
30667
  if (field.type === 'textarea') {
30263
30668
  const isDynamic = dynamicResolutionSupported && field.outcomeMode === 'dynamic';
@@ -30289,7 +30694,7 @@ const ExpectedOutcomeRenderer = ({ testCaseId, fields, dynamicResolutionSupporte
30289
30694
  index: index$1,
30290
30695
  operation: 'set-resolution-query',
30291
30696
  value: e.detail.value,
30292
- }) })), !isDynamic && renderEvaluationSelector(field, index$1)));
30697
+ }) })), !isDynamic && renderEvaluationOptions(field, index$1)));
30293
30698
  }
30294
30699
  if (field.type === 'chips-input') {
30295
30700
  const config = {
@@ -30309,7 +30714,7 @@ const ExpectedOutcomeRenderer = ({ testCaseId, fields, dynamicResolutionSupporte
30309
30714
  index: index$1,
30310
30715
  operation: 'remove-chip',
30311
30716
  value: e.detail.value,
30312
- }) }), renderEvaluationSelector(field, index$1)));
30717
+ }) }), renderEvaluationOptions(field, index$1)));
30313
30718
  }
30314
30719
  if (field.type === 'select') {
30315
30720
  const config = {
@@ -30325,18 +30730,18 @@ const ExpectedOutcomeRenderer = ({ testCaseId, fields, dynamicResolutionSupporte
30325
30730
  index: index$1,
30326
30731
  operation: 'set-value',
30327
30732
  value: e.detail.value,
30328
- }) }), renderEvaluationSelector(field, index$1)));
30733
+ }) }), renderEvaluationOptions(field, index$1)));
30329
30734
  }
30330
30735
  return (index.h("div", { class: "expected-outcome-renderer__group" }, index.h("div", { class: "expected-outcome-renderer__text" }, index.h("label", null, field.label), index.h("input", { type: "text", value: field.value, placeholder: field.placeholder, onInput: (e) => emit({
30331
30736
  testCaseId,
30332
30737
  index: index$1,
30333
30738
  operation: 'set-value',
30334
30739
  value: e.target.value,
30335
- }) })), renderEvaluationSelector(field, index$1)));
30740
+ }) })), renderEvaluationOptions(field, index$1)));
30336
30741
  })));
30337
30742
  };
30338
30743
 
30339
- const LLMTestCaseRow = ({ testCase, dynamicResolutionSupported = false, onRun, onDelete, handleTestCaseChange, onExpectedOutcomeChange, }) => {
30744
+ const LLMTestCaseRow = ({ testCase, dynamicResolutionSupported = false, extractorIds = [], onRun, onDelete, handleTestCaseChange, onExpectedOutcomeChange, onChatHistoryChange, }) => {
30340
30745
  const questionConfig = {
30341
30746
  name: 'question',
30342
30747
  fieldType: FormFieldType.TEXT_AREA,
@@ -30352,11 +30757,21 @@ const LLMTestCaseRow = ({ testCase, dynamicResolutionSupported = false, onRun, o
30352
30757
  key: 'question',
30353
30758
  value: e.detail.value,
30354
30759
  },
30355
- }) }), index.h(ExpectedOutcomeRenderer, { testCaseId: testCase.id, fields: testCase.expectedOutcome || [], dynamicResolutionSupported: dynamicResolutionSupported, onExpectedOutcomeChange: onExpectedOutcomeChange })), index.h(ResponseOutput, { output: testCase.output, isRunning: testCase.isRunning }), index.h(EvaluationSummary, { result: testCase.evaluationResult, isRunning: testCase.isRunning }), index.h(RowActions, { isRunning: testCase.isRunning, canRun: !!testCase.question.trim(), onRun: () => onRun(testCase), onDelete: () => onDelete(testCase.id) })));
30760
+ }) }), index.h("chat-history", { chatHistoryEnabled: testCase.chatHistory?.enabled ?? false, chatHistoryValue: testCase.chatHistory?.value ?? '', onChatHistoryChange: (e) => {
30761
+ const { enabled, value } = e
30762
+ .detail;
30763
+ onChatHistoryChange({
30764
+ detail: {
30765
+ testCaseId: testCase.id,
30766
+ enabled,
30767
+ value,
30768
+ },
30769
+ });
30770
+ } }), index.h(ExpectedOutcomeRenderer, { testCaseId: testCase.id, fields: testCase.expectedOutcome || [], dynamicResolutionSupported: dynamicResolutionSupported, extractorIds: extractorIds, onExpectedOutcomeChange: onExpectedOutcomeChange })), index.h(ResponseOutput, { output: testCase.output, isRunning: testCase.isRunning }), index.h(EvaluationSummary, { result: testCase.evaluationResult, isRunning: testCase.isRunning }), index.h(RowActions, { isRunning: testCase.isRunning, canRun: !!testCase.question.trim(), onRun: () => onRun(testCase), onDelete: () => onDelete(testCase.id) })));
30356
30771
  };
30357
30772
 
30358
- const LLMTestCases = ({ testCases, dynamicResolutionSupported = false, onRun, onDelete, onAddTestCase, handleTestCaseChange, onExpectedOutcomeChange, }) => {
30359
- return (index.h("div", { class: "test-cases" }, index.h("div", { class: "test-cases__column-headers" }, index.h("div", { class: "test-cases__column-header" }, "Input"), index.h("div", { class: "test-cases__column-header" }, "Output"), index.h("div", { class: "test-cases__column-header" }, "Evaluation"), index.h("div", { class: "test-cases__column-header" }, "Actions")), testCases.map(testCase => (index.h(LLMTestCaseRow, { testCase: testCase, dynamicResolutionSupported: dynamicResolutionSupported, onRun: onRun, onDelete: onDelete, handleTestCaseChange: handleTestCaseChange, onExpectedOutcomeChange: onExpectedOutcomeChange }))), index.h("div", { class: "test-cases__add-section" }, index.h(Button, { variant: "outline", size: "md", onClick: onAddTestCase }, "+ Add Question"))));
30773
+ const LLMTestCases = ({ testCases, dynamicResolutionSupported = false, extractorIds = [], onRun, onDelete, onAddTestCase, handleTestCaseChange, onExpectedOutcomeChange, onChatHistoryChange, }) => {
30774
+ return (index.h("div", { class: "test-cases" }, index.h("div", { class: "test-cases__column-headers" }, index.h("div", { class: "test-cases__column-header" }, "Input"), index.h("div", { class: "test-cases__column-header" }, "Output"), index.h("div", { class: "test-cases__column-header" }, "Evaluation"), index.h("div", { class: "test-cases__column-header" }, "Actions")), testCases.map(testCase => (index.h(LLMTestCaseRow, { testCase: testCase, dynamicResolutionSupported: dynamicResolutionSupported, extractorIds: extractorIds, onRun: onRun, onDelete: onDelete, handleTestCaseChange: handleTestCaseChange, onExpectedOutcomeChange: onExpectedOutcomeChange, onChatHistoryChange: onChatHistoryChange }))), index.h("div", { class: "test-cases__add-section" }, index.h(Button, { variant: "outline", size: "md", onClick: onAddTestCase }, "+ Add Question"))));
30360
30775
  };
30361
30776
 
30362
30777
  const tokensCss = () => `:host{--spacing:0.25rem;--spacing-1:calc(var(--spacing) * 1);--spacing-2:calc(var(--spacing) * 2);--spacing-3:calc(var(--spacing) * 3);--spacing-4:calc(var(--spacing) * 4);--spacing-5:calc(var(--spacing) * 5);--spacing-6:calc(var(--spacing) * 6);--spacing-8:calc(var(--spacing) * 8);--spacing-10:calc(var(--spacing) * 10);--spacing-12:calc(var(--spacing) * 12);--spacing-16:calc(var(--spacing) * 16);--spacing-20:calc(var(--spacing) * 20);--spacing-24:calc(var(--spacing) * 24);--radius-none:0;--radius-sm:0.125rem;--radius-md:0.375rem;--radius-lg:0.5rem;--radius-xl:0.75rem;--radius-2xl:1rem;--radius-3xl:1.5rem;--radius-full:9999px;--radius:var(--radius-lg);--font-size-xs:0.75rem;--font-size-sm:0.875rem;--font-size-base:1rem;--font-size-lg:1.125rem;--font-size-xl:1.25rem;--font-size-2xl:1.5rem;--font-size-3xl:1.875rem;--font-size-4xl:2.25rem;--font-weight-normal:400;--font-weight-medium:500;--font-weight-semibold:600;--font-weight-bold:700;--line-height-none:1;--line-height-tight:1.25;--line-height-snug:1.375;--line-height-normal:1.5;--line-height-relaxed:1.625;--line-height-loose:2;--letter-spacing-tight:-0.025em;--letter-spacing-normal:0;--letter-spacing-wide:0.05em;--shadow-sm:0 1px 2px 0 rgba(0, 0, 0, 0.05);--shadow-md:0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06);--shadow-lg:0 10px 15px -3px rgba(0, 0, 0, 0.1), 0 4px 6px -2px rgba(0, 0, 0, 0.05);--shadow-xl:0 20px 25px -5px rgba(0, 0, 0, 0.1), 0 10px 10px -5px rgba(0, 0, 0, 0.04);--shadow-2xl:0 25px 50px -12px rgba(0, 0, 0, 0.25);--border-width:1px;--z-base:0;--z-dropdown:1000;--z-sticky:1100;--z-modal:1200;--z-popover:1300;--z-tooltip:1400;--opacity-disabled:0.5;--opacity-hover:0.8;--opacity-muted:0.6;--max-w-sm:24rem;--max-w-md:28rem;--max-w-lg:32rem;--max-w-xl:42rem;--max-w-2xl:48rem;--max-w-full:100%;--breakpoint-sm:640px;--breakpoint-md:768px;--breakpoint-lg:1024px;--breakpoint-xl:1280px;--breakpoint-2xl:1536px;--background:#ffffff;--foreground:#0a0a0a;--card:#ffffff;--card-foreground:#0a0a0a;--popover:#ffffff;--popover-foreground:#0a0a0a;--primary:#0a0a0a;--primary-foreground:#fafafa;--secondary:#f4f4f5;--secondary-foreground:#0a0a0a;--muted:#f4f4f5;--muted-foreground:#71717a;--accent:#f4f4f5;--accent-foreground:#0a0a0a;--destructive:#ef4444;--destructive-foreground:#fafafa;--border:#e4e4e7;--input:#e4e4e7;--ring:#3b82f6;--success:#10b981;--success-foreground:#fafafa;--warning:#f59e0b;--warning-foreground:#fafafa;--info:#3b82f6;--info-foreground:#fafafa}:host([data-theme='dark']){--background:#0a0a0a;--foreground:#fafafa;--card:#171717;--card-foreground:#fafafa;--popover:#171717;--popover-foreground:#fafafa;--primary:#fafafa;--primary-foreground:#0a0a0a;--secondary:#27272a;--secondary-foreground:#fafafa;--muted:#27272a;--muted-foreground:#a1a1aa;--accent:#27272a;--accent-foreground:#fafafa;--destructive:#dc2626;--destructive-foreground:#fafafa;--border:#27272a;--input:#27272a;--ring:#3b82f6;--success:#059669;--success-foreground:#fafafa;--warning:#d97706;--warning-foreground:#fafafa;--info:#2563eb;--info-foreground:#fafafa}`;
@@ -30367,7 +30782,7 @@ const llmTestRunnerHeaderCss = () => `.test-runner-header{display:flex;justify-c
30367
30782
 
30368
30783
  const llmTestCasesCss = () => `.test-cases{background:var(--background)}.test-cases__column-headers{display:grid;grid-template-columns:1fr 1.5fr 0.5fr 120px;gap:var(--border-width);background:var(--border);border-bottom:2px solid var(--border)}.test-cases__column-header{background:var(--muted);padding:var(--spacing-4) var(--spacing-5);font-weight:var(--font-weight-semibold);color:var(--foreground);font-size:var(--font-size-sm);text-transform:uppercase;letter-spacing:var(--letter-spacing-wide)}.test-cases__add-section{padding:var(--spacing-6);text-align:center;background:var(--muted);border-top:var(--border-width) solid var(--border)}@media (max-width: 1200px){.test-cases__column-headers{display:none}}`;
30369
30784
 
30370
- const llmTestCaseRowCss = () => `.test-case-row{display:grid;grid-template-columns:1fr 1.5fr 0.5fr 120px;gap:var(--border-width);border-bottom:var(--border-width) solid var(--border);min-height:200px}.test-case-row:hover{background:var(--muted)}.test-case-row__input-column{padding:var(--spacing-5);background:var(--background);border-right:var(--border-width) solid var(--border)}.expected-outcome-renderer{display:flex;flex-direction:column;gap:var(--spacing-4);margin-top:var(--spacing-4)}.expected-outcome-renderer__group{display:flex;flex-direction:column;gap:var(--spacing-2);padding:var(--spacing-3);border:var(--border-width) solid var(--border);border-radius:var(--radius-md);background:var(--background)}@media (max-width: 1200px){.test-case-row{grid-template-columns:1fr;gap:0}.test-case-row__input-column{border-right:none;border-bottom:var(--border-width) solid var(--border)}}@media (max-width: 768px){.test-case-row__input-column{padding:var(--spacing-4)}.test-case-row{min-height:auto}}`;
30785
+ const llmTestCaseRowCss = () => `.test-case-row{display:grid;grid-template-columns:1fr 1.5fr 0.5fr 120px;gap:var(--border-width);border-bottom:var(--border-width) solid var(--border);min-height:200px}.test-case-row:hover{background:var(--muted)}.test-case-row__input-column{padding:var(--spacing-5);background:var(--background);border-right:var(--border-width) solid var(--border)}.expected-outcome-renderer{display:flex;flex-direction:column;gap:var(--spacing-4);margin-top:var(--spacing-4)}.expected-outcome-renderer__group{display:flex;flex-direction:column;gap:var(--spacing-2);padding:var(--spacing-3);border:var(--border-width) solid var(--border);border-radius:var(--radius-md);background:var(--background)}.expected-outcome-renderer__options{border:var(--border-width) solid var(--border);border-radius:var(--radius-sm);background:var(--muted)}.expected-outcome-renderer__options-summary{cursor:pointer;font-size:var(--font-size-sm);color:var(--foreground);padding:var(--spacing-2) var(--spacing-3);user-select:none}.expected-outcome-renderer__options-content{display:flex;flex-direction:column;gap:var(--spacing-2);padding:0 var(--spacing-3) var(--spacing-3)}@media (max-width: 1200px){.test-case-row{grid-template-columns:1fr;gap:0}.test-case-row__input-column{border-right:none;border-bottom:var(--border-width) solid var(--border)}}@media (max-width: 768px){.test-case-row__input-column{padding:var(--spacing-4)}.test-case-row{min-height:auto}}`;
30371
30786
 
30372
30787
  const rowActionsCss = () => `.row-actions{height:100%;padding:var(--spacing-5);background:var(--background);display:flex;flex-direction:column;gap:var(--spacing-3);align-items:center;justify-content:flex-start;align-self:flex-start}@media (max-width: 1200px){.row-actions{border-right:none;border-bottom:var(--border-width) solid var(--border);flex-direction:row;justify-content:center}}@media (max-width: 768px){.row-actions{padding:var(--spacing-4)}}`;
30373
30788
 
@@ -30393,6 +30808,7 @@ const LLMTestRunner = class {
30393
30808
  useSave = false;
30394
30809
  usePromptEditor = false;
30395
30810
  resolveExpectedOutcome;
30811
+ evaluationSourceExtractors;
30396
30812
  initialTestCases;
30397
30813
  defaultExpectedOutcomeSchema;
30398
30814
  testCases = [
@@ -30406,6 +30822,7 @@ const LLMTestRunner = class {
30406
30822
  value: '',
30407
30823
  },
30408
30824
  ],
30825
+ chatHistory: { enabled: false, value: '' },
30409
30826
  isRunning: false,
30410
30827
  },
30411
30828
  ];
@@ -30428,6 +30845,12 @@ const LLMTestRunner = class {
30428
30845
  // Initialize testCases from prop if provided
30429
30846
  if (this.initialTestCases !== undefined) {
30430
30847
  validateTestCaseInputArray(this.initialTestCases);
30848
+ const extractorIds = getExtractorIds(this.evaluationSourceExtractors);
30849
+ if (extractorIds.length > 0) {
30850
+ this.initialTestCases.forEach(testCase => {
30851
+ validateExpectedOutcomeArrayWithExtractors(testCase.expectedOutcome, extractorIds);
30852
+ });
30853
+ }
30431
30854
  this.testCases = this.initialTestCases.map((rawTestCase, index) => {
30432
30855
  try {
30433
30856
  return createTestCaseFromInput(rawTestCase);
@@ -30451,8 +30874,6 @@ const LLMTestRunner = class {
30451
30874
  this.testCases = [];
30452
30875
  }
30453
30876
  }
30454
- componentDidLoad() { }
30455
- disconnectedCallback() { }
30456
30877
  async resetSavingState() {
30457
30878
  this.isSaving = false;
30458
30879
  }
@@ -30463,6 +30884,12 @@ const LLMTestRunner = class {
30463
30884
  const { testCaseId, key, value } = event.detail;
30464
30885
  this.testCases = this.testCases.map(tc => tc.id === testCaseId ? { ...tc, [key]: value } : tc);
30465
30886
  };
30887
+ handleChatHistoryChange = (event) => {
30888
+ const { testCaseId, enabled, value } = event.detail;
30889
+ this.updateTestCase(testCaseId, {
30890
+ chatHistory: { enabled, value },
30891
+ });
30892
+ };
30466
30893
  addNewTestCase() {
30467
30894
  try {
30468
30895
  const schema = this.getResolvedExpectedOutcomeSchema();
@@ -30479,13 +30906,17 @@ const LLMTestRunner = class {
30479
30906
  updateTestCase(id, updates) {
30480
30907
  this.testCases = this.testCases.map(tc => tc.id === id ? { ...tc, ...updates } : tc);
30481
30908
  }
30482
- requestLlmText(testCase) {
30909
+ requestLlmResponse(testCase) {
30483
30910
  return new Promise((resolve, reject) => {
30484
- this.llmRequest.emit({
30911
+ const payload = {
30485
30912
  prompt: testCase.question,
30486
30913
  resolve,
30487
30914
  reject,
30488
- });
30915
+ };
30916
+ if (testCase.chatHistory?.enabled) {
30917
+ payload.chatHistory = testCase.chatHistory.value;
30918
+ }
30919
+ this.llmRequest.emit(payload);
30489
30920
  });
30490
30921
  }
30491
30922
  throwError(reason) {
@@ -30498,14 +30929,14 @@ const LLMTestRunner = class {
30498
30929
  const startTime = Date.now();
30499
30930
  this.updateTestCase(testCase.id, { isRunning: true });
30500
30931
  const [llmSettled, resolutionSettled] = await Promise.allSettled([
30501
- this.requestLlmText(testCase),
30932
+ this.requestLlmResponse(testCase),
30502
30933
  resolveDynamicExpectedOutcomes(testCase, this.resolveExpectedOutcome),
30503
30934
  ]);
30504
30935
  const responseTime = Date.now() - startTime;
30505
30936
  if (llmSettled.status === 'rejected') {
30506
30937
  this.updateTestCase(testCase.id, {
30507
30938
  isRunning: false,
30508
- output: null,
30939
+ output: undefined,
30509
30940
  error: this.addErrorMessage(llmSettled.reason, 'Unknown error'),
30510
30941
  responseTime,
30511
30942
  });
@@ -30553,7 +30984,7 @@ const LLMTestRunner = class {
30553
30984
  this.updateTestCase(testCase.id, {
30554
30985
  evaluationResult: result,
30555
30986
  });
30556
- });
30987
+ }, this.evaluationSourceExtractors);
30557
30988
  }
30558
30989
  async runAllTests() {
30559
30990
  this.isRunningAll = true;
@@ -30584,7 +31015,7 @@ const LLMTestRunner = class {
30584
31015
  this.error = '';
30585
31016
  try {
30586
31017
  const content = await readFileAsync(file);
30587
- const result = importTestSuite(content);
31018
+ const result = importTestSuite(content, getExtractorIds(this.evaluationSourceExtractors));
30588
31019
  if (!result.success) {
30589
31020
  this.error = result.error || 'Unknown error occurred during import.';
30590
31021
  return;
@@ -30645,7 +31076,7 @@ const LLMTestRunner = class {
30645
31076
  }
30646
31077
  }
30647
31078
  render() {
30648
- return (index.h("div", { key: '5536c02dcbf03e1d21de2df307f5255a17c000c7', class: "test-runner-container" }, index.h(LLMTestRunnerHeader, { key: 'b6b7db13d7b5576986f4de469c4eeff62b9be873', isExportingTestSuite: this.isExportingTestSuite, isExportingTestResults: this.isExportingTestResults, isRunningAll: this.isRunningAll, useSave: this.useSave, isSaving: this.isSaving, usePromptEditor: this.usePromptEditor, onImport: file => this.handleImport(file), onExportSuite: () => this.handleExportTestSuite(), onExportResults: () => this.handleExportTestResults(), onRunAll: () => this.runAllTests(), onSave: () => this.handleSave() }), index.h(ErrorMessage, { key: '48407658a40dd79864ddf24426df743df9206b30', message: this.error, onClear: () => (this.error = '') }), index.h("div", { key: '2b1db34dbb4defc98c255ca7bcb9318ca1c52cba', class: "test-runner-container__content" }, index.h(LLMTestCases, { key: '90b4d25f51d5de43790cabeb4fa6fc1c90c246cd', testCases: this.testCases, dynamicResolutionSupported: !!this.resolveExpectedOutcome, onRun: testCase => this.runSingleTest(testCase).catch(() => { }), onDelete: id => this.deleteTestCase(id), onAddTestCase: () => this.addNewTestCase(), handleTestCaseChange: this.handleTestCaseChange, onExpectedOutcomeChange: this.handleExpectedOutcomeChange }))));
31079
+ return (index.h("div", { key: '7433beaa1d60d48f65600c43e11b302b892a7bca', class: "test-runner-container" }, index.h(LLMTestRunnerHeader, { key: '8083cc39376e7a710bd3f52efb184b959e885a87', isExportingTestSuite: this.isExportingTestSuite, isExportingTestResults: this.isExportingTestResults, isRunningAll: this.isRunningAll, useSave: this.useSave, isSaving: this.isSaving, usePromptEditor: this.usePromptEditor, onImport: file => this.handleImport(file), onExportSuite: () => this.handleExportTestSuite(), onExportResults: () => this.handleExportTestResults(), onRunAll: () => this.runAllTests(), onSave: () => this.handleSave() }), index.h(ErrorMessage, { key: 'ddced98c13cd595c4cfb6eef11b27cb173769518', message: this.error, onClear: () => (this.error = '') }), index.h("div", { key: '8d6f65c4d68d34869b644709eacb97fec93683c6', class: "test-runner-container__content" }, index.h(LLMTestCases, { key: '5ccb186132b23af6209209b0a14086e03cf790af', testCases: this.testCases, dynamicResolutionSupported: !!this.resolveExpectedOutcome, extractorIds: getExtractorIds(this.evaluationSourceExtractors), onRun: testCase => this.runSingleTest(testCase).catch(() => { }), onDelete: id => this.deleteTestCase(id), onAddTestCase: () => this.addNewTestCase(), handleTestCaseChange: this.handleTestCaseChange, onExpectedOutcomeChange: this.handleExpectedOutcomeChange, onChatHistoryChange: this.handleChatHistoryChange }))));
30649
31080
  }
30650
31081
  };
30651
31082
  LLMTestRunner.style = tokensCss() + (llmTestRunnerCss() + (llmTestRunnerHeaderCss() + (llmTestCasesCss() + (llmTestCaseRowCss() + (rowActionsCss() + (evaluationSummaryCss() + (responseOutputCss() + (errorMessageCss() + (buttonCss() + iconButtonCss())))))))));